[摘要]
[正文] 读文件回顾
[正文] xz解压缩过程
注意:请使用谷歌浏览器阅读(IE浏览器排版混乱)
【摘要】
本文可以作为另一篇博文: squashfs文件的读取过程 的后续。根据squashfs文件的读取过程 一文介绍,当从flash上获取了文件数据后,如果数据是压缩的还要经过解压缩,本文就为您介绍一下xz压缩文件的解压缩过程。
【正文】读文件回顾
1 首先回顾一下 squashfs文件的读取过程一文中介绍的读文件数据过程:
int squashfs_read_data(struct super_block *sb, u64 index, int length,u64 *next_index, struct squashfs_page_actor *output)
{
/* 上面squashfs_fill_super中赋值 */
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head **bh;
/* index是要操作的flash 分区内偏移地址,devlbksize=1024byte*/
int offset = index & ((1 << msblk->devblksize_log2) - 1);
/*
flash分区内偏移地址msblk->devblksize_log2=10;cur_index表示flash分区内偏移地址对应的逻辑块;
*/
u64 cur_index = index >> msblk->devblksize_log2;
int bytes, compressed, b = 0, k = 0, avail, i;
bh = kcalloc(((output->length + msblk->devblksize - 1)
>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
if (bh == NULL)
return -ENOMEM;
if (length) {
/*
* Datablock.读取数据块内容.
*/
bytes = -offset;
compressed = SQUASHFS_COMPRESSED_BLOCK(length);
length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
if (next_index)
*next_index = index + length;
/*output->length=512k mksquashfs时指定的块大小*/
TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
index, compressed ? "" : "un", length, output->length);
if (length < 0 || length > output->length ||
(index + length) > msblk->bytes_used)
goto read_failure;
/*
循环读取逻辑块;每次读取devblksize=1024大小;最先读取的逻辑块是flash分区内偏移地址所在的逻辑块.
*/
for (b = 0; bytes < length; b++, cur_index++) {
/*获取buffer_head*/
bh[b] = sb_getblk(sb, cur_index);
if (bh[b] == NULL)
goto block_release;
/*devblksize=1024*/
bytes += msblk->devblksize;
}
/*
1 提交一个读请求 ll_rw_block->submit_bh->submit_bio->do_blktrans_request();
2 处理读请求,真正实现驱动中的读操作 :
mtd_blktrans_work->do_blktrans_request->mtdblock_tr->mtdblock_readsect->do_cached_read->(mtd_read->mtd->_read=part_read)
->nand_read()->nand_do_read_ops()->(chip->cmdfunc)
*/
ll_rw_block(READ, b, bh);
} else {
/*
* Metadata block.读取逻辑块内容;
*/
if ((index + 2) > msblk->bytes_used)
goto read_failure;
bh[0] = get_block_length(sb, &cur_index, &offset, &length);
if (bh[0] == NULL)
goto read_failure;
b = 1;
bytes = msblk->devblksize - offset;
compressed = SQUASHFS_COMPRESSED(length);
length = SQUASHFS_COMPRESSED_SIZE(length);
if (next_index)
*next_index = index + length + 2;
TRACE("Block @ 0x%llx, %scompressed size %d\n", index,compressed ? "" : "un", length);
if (length < 0 || length > output->length ||
(index + length) > msblk->bytes_used)
goto block_release;
for (; bytes < length; b++) {
bh[b] = sb_getblk(sb, ++cur_index);
if (bh[b] == NULL)
goto block_release;
bytes += msblk->devblksize;
}
ll_rw_block(READ, b - 1, bh + 1);
}
for (i = 0; i < b; i++) {
wait_on_buffer(bh[i]);
if (!buffer_uptodate(bh[i]))
goto block_release;
}
if (compressed) {
/*
解压缩操作,将flash上读出的数据解压缩
msblk:超级块信息squashfs_sb_info ;
bh:buffer_head,bh->data中保存从flash上读取的数据;
b:表示读取的数据长度对应的逻辑块个数;
offset:表示读取的flash地址对应的逻辑块偏移地址,一个逻辑块为1024byte,offset=index&0x3ff
length:表示从flash上读取的数据长度;
output->length=512k mksquashfs时指定的块大小
*/
length = squashfs_decompress(msblk, bh, b, offset, length,output);
if (length < 0)
goto read_failure;
} else {
/*
* Block is uncompressed.
*/
int in, pg_offset = 0;
void *data = squashfs_first_page(output);
for (bytes = length; k < b; k++) {
in = min(bytes, msblk->devblksize - offset);
bytes -= in;
while (in) {
if (pg_offset == PAGE_CACHE_SIZE) {
data = squashfs_next_page(output);
pg_offset = 0;
}
avail = min_t(int, in, PAGE_CACHE_SIZE -
pg_offset);
memcpy(data + pg_offset, bh[k]->b_data + offset,
avail);
in -= avail;
pg_offset += avail;
offset += avail;
}
offset = 0;
put_bh(bh[k]);
}
squashfs_finish_page(output);
}
kfree(bh);
return length;
block_release:
for (; k < b; k++)
put_bh(bh[k]);
read_failure:
ERROR("squashfs_read_data failed to read block 0x%llx\n",
(unsigned long long) index);
kfree(bh);
return -EIO;
}
其中squashfs_read_data->squashfs_decompres是解压缩过程,因为此时的squashfs文件系统使用xz方式压缩,所以解压缩时使用squashfs_xz_uncompress
【正文】xz解压缩过程
下面介绍下xz方式解压缩squashfs文件:squashfs_decompress->squashfs_xz_uncompress
/*
解压缩操作,将flash上读出的数据解压缩
msblk:超级块信息squashfs_sb_info ;
bh:buffer_head,bh->b_data中保存从flash上读取的数据;
b:表示读取的数据长度对应的逻辑块个数;
offset:表示读取的flash地址对应的逻辑块偏移地址,一个逻辑块为1024byte,offset=index&0x3ff
length:表示从flash上读取的数据长度
*/
static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct buffer_head **bh, int b, int offset, int length,
struct squashfs_page_actor *output)
{
enum xz_ret xz_err;
int avail, total = 0, k = 0;
struct squashfs_xz *stream = strm;
/*
xz_dec->sequence=SEQ_STREAM_HEADER;解压类型
xz_dec->temp.pos=0;解压位置;
xz_dec->temp.size=STREAM_HEADER_SIZE;需要解压12byte的头;
*/
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
stream->buf.in_size = 0;
stream->buf.out_pos = 0;
stream->buf.out_size = PAGE_CACHE_SIZE;
stream->buf.out = squashfs_first_page(output);
/*
while循环中逐个逻辑块进行解压,msblk->devblksize=1024byte ;注意不一定一个循环就解压一个块,
如果一个循环内因为 xz_dec_run异常未解压完一个块内指定长度的数据,则循环解压这个块的数据,直到完成.
*/
do {
/* 分两种情况:读取的flash地址是逻辑块大小对齐的情况和非对齐情况 ; 对齐时:offset=0; 非对齐时:offset != 0;
第一次循环时:stream->buf.in_pos==stream->buf.in_size=0;
因为之后stream->buf.in_pos会在 xz_dec_run中更新,该值表示解压进度,即已经解压到了一个块内的哪个位置(这个块中需要解压的数据是否解压完,
读取地址可能非块大小对齐,所以需要解压的数据不一定是1024即一个块的大小,如果in_pos==in_size表示这个块内需要解压的数据解压完了);
stream->buf.in_size在此初始化为一个逻辑块内真正要解压的数据大小.用xz_buf->in_size和xz_dec_run处理后的xz_buf->in_pos来判断解压进度.
*/
if (stream->buf.in_pos == stream->buf.in_size && k < b) {
/*
此次计算一个块里需要解压的数据的大小(可能不需要解压一整个块),因为读取的地址范围可能非逻辑块对齐;
举例:假设读取flash分区内偏移位1020的地址,读取大小为10byte:则
要先读取第0个块的:1020-1024地址范围和第1个块的0-6地址范围;
*/
avail = min(length, msblk->devblksize - offset);
length -= avail;
/* 要解压的数据保存的地址*/
stream->buf.in = bh[k]->b_data + offset;
/* 一个块里要解压的数据大小 */
stream->buf.in_size = avail;
/* 一个块解压之前,解压进度当然是0 */
stream->buf.in_pos = 0;
offset = 0;
}
if (stream->buf.out_pos == stream->buf.out_size) {
stream->buf.out = squashfs_next_page(output);
if (stream->buf.out != NULL) {
stream->buf.out_pos = 0;
total += PAGE_CACHE_SIZE;
}
}
/*1 stream->buf=xz_buf:该结构里保存了读取到的一个逻辑块里的内容(xz_buf->in)和读取的大小(xz_buf->in_size),
因为读取长度可能非逻辑块对齐,所以读取大小也可能不是1024字节.
stream->buf.in_pos表示解压到了一个块的地址区间上的哪个位置,当一个块第一次解压时in_pos初始化为0,
只有当xz_dec_run里面全部解压完,即in_pos==in_size,时才会解压下一个逻辑块;如果xz_dec_run处理之后,
in_pos!=in_size表示该逻辑块未解压完,while循环处理中不会处理下一个块
2 stream->state=xz_dec第一次循环时使用前面xz_dec_reset()初始化的信息,表示SEQ_STREAM_HEADER;
*/
xz_err = xz_dec_run(stream->state, &stream->buf);
if (stream->buf.in_pos == stream->buf.in_size && k < b)
put_bh(bh[k++]);
} while (xz_err == XZ_OK);
squashfs_finish_page(output);
if (xz_err != XZ_STREAM_END || k < b)
goto out;
return total + stream->buf.out_pos;
out:
for (; k < b; k++)
put_bh(bh[k]);
return -EIO;
}
squashfs_xz_uncompressxz_dec_run
XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
{
size_t in_start;
size_t out_start;
enum xz_ret ret;
if (DEC_IS_SINGLE(s->mode))
xz_dec_reset(s);
in_start = b->in_pos;
out_start = b->out_pos;
ret = dec_main(s, b);
if (DEC_IS_SINGLE(s->mode)) {
if (ret == XZ_OK)
ret = b->in_pos == b->in_size
? XZ_DATA_ERROR : XZ_BUF_ERROR;
if (ret != XZ_STREAM_END) {
b->in_pos = in_start;
b->out_pos = out_start;
}
} else if (ret == XZ_OK && in_start == b->in_pos
&& out_start == b->out_pos) {
if (s->allow_buf_error)
ret = XZ_BUF_ERROR;
s->allow_buf_error = true;
} else {
s->allow_buf_error = false;
}
return ret;
}
squashfs_xz_uncompressxz_dec_run->dec_main
static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
{
while(true)
/*squashfs_xz_uncompress中有介绍,第一次解压时s->sequence=SEQ_STREAM_HEADER*/
switch(s->sequence){
/* 解压 STREAM_HEADER_SIZE=12字节的头信息*/
case SEQ_STREAM_HEADER:
/*把STREAM_HEADER_SIZE=12字节的数据copy到xz_dec->temp.buf上*/
if (!fill_temp(s, b))
return XZ_OK;
/*
* If dec_stream_header() returns
* XZ_UNSUPPORTED_CHECK, it is still possible
* to continue decoding if working in multi-call
* mode. Thus, update s->sequence before calling
* dec_stream_header().
*/
s->sequence = SEQ_BLOCK_START;
ret = dec_stream_header(s);
if (ret != XZ_OK)
return ret;
/* 按顺序依次解压 不依次介绍*/
case..
}
}
【总结】
本文以squashfs文件系统为例,简单介绍了下linux系统中xz的解压缩过程。撰写本文主要目的是作为另一篇博文: squashfs文件的读取过程 的后续.