| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2008 Oracle. All rights reserved. |
| */ |
| |
| #include <linux/kernel.h> |
| #include <linux/slab.h> |
| #include <linux/mm.h> |
| #include <linux/init.h> |
| #include <linux/err.h> |
| #include <linux/sched.h> |
| #include <linux/pagemap.h> |
| #include <linux/bio.h> |
| #include <linux/lzo.h> |
| #include <linux/refcount.h> |
| #include "messages.h" |
| #include "compression.h" |
| #include "ctree.h" |
| #include "super.h" |
| #include "btrfs_inode.h" |
| |
| #define LZO_LEN 4 |
| |
| /* |
| * Btrfs LZO compression format |
| * |
| * Regular and inlined LZO compressed data extents consist of: |
| * |
| * 1. Header |
| * Fixed size. LZO_LEN (4) bytes long, LE32. |
| * Records the total size (including the header) of compressed data. |
| * |
| * 2. Segment(s) |
| * Variable size. Each segment includes one segment header, followed by data |
| * payload. |
| * One regular LZO compressed extent can have one or more segments. |
| * For inlined LZO compressed extent, only one segment is allowed. |
| * One segment represents at most one sector of uncompressed data. |
| * |
| * 2.1 Segment header |
| * Fixed size. LZO_LEN (4) bytes long, LE32. |
| * Records the total size of the segment (not including the header). |
| * Segment header never crosses sector boundary, thus it's possible to |
| * have at most 3 padding zeros at the end of the sector. |
| * |
| * 2.2 Data Payload |
| * Variable size. Size up limit should be lzo1x_worst_compress(sectorsize) |
| * which is 4419 for a 4KiB sectorsize. |
| * |
| * Example with 4K sectorsize: |
| * Page 1: |
| * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10 |
| * 0x0000 | Header | SegHdr 01 | Data payload 01 ... | |
| * ... |
| * 0x0ff0 | SegHdr N | Data payload N ... |00| |
| * ^^ padding zeros |
| * Page 2: |
| * 0x1000 | SegHdr N+1| Data payload N+1 ... | |
| */ |
| |
| struct workspace { |
| void *mem; |
| void *buf; /* where decompressed data goes */ |
| void *cbuf; /* where compressed data goes */ |
| struct list_head list; |
| }; |
| |
| static u32 workspace_buf_length(const struct btrfs_fs_info *fs_info) |
| { |
| return lzo1x_worst_compress(fs_info->sectorsize); |
| } |
| static u32 workspace_cbuf_length(const struct btrfs_fs_info *fs_info) |
| { |
| return lzo1x_worst_compress(fs_info->sectorsize); |
| } |
| |
| void lzo_free_workspace(struct list_head *ws) |
| { |
| struct workspace *workspace = list_entry(ws, struct workspace, list); |
| |
| kvfree(workspace->buf); |
| kvfree(workspace->cbuf); |
| kvfree(workspace->mem); |
| kfree(workspace); |
| } |
| |
| struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info) |
| { |
| struct workspace *workspace; |
| |
| workspace = kzalloc_obj(*workspace); |
| if (!workspace) |
| return ERR_PTR(-ENOMEM); |
| |
| workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN); |
| workspace->buf = kvmalloc(workspace_buf_length(fs_info), GFP_KERNEL | __GFP_NOWARN); |
| workspace->cbuf = kvmalloc(workspace_cbuf_length(fs_info), GFP_KERNEL | __GFP_NOWARN); |
| if (!workspace->mem || !workspace->buf || !workspace->cbuf) |
| goto fail; |
| |
| INIT_LIST_HEAD(&workspace->list); |
| |
| return &workspace->list; |
| fail: |
| lzo_free_workspace(&workspace->list); |
| return ERR_PTR(-ENOMEM); |
| } |
| |
| static inline void write_compress_length(char *buf, size_t len) |
| { |
| __le32 dlen; |
| |
| dlen = cpu_to_le32(len); |
| memcpy(buf, &dlen, LZO_LEN); |
| } |
| |
| static inline size_t read_compress_length(const char *buf) |
| { |
| __le32 dlen; |
| |
| memcpy(&dlen, buf, LZO_LEN); |
| return le32_to_cpu(dlen); |
| } |
| |
| /* |
| * Write data into @out_folio and queue it into @out_bio. |
| * |
| * Return 0 if everything is fine and @total_out will be increased. |
| * Return <0 for error. |
| * |
| * The @out_folio can be NULL after a full folio is queued. |
| * Thus the caller should check and allocate a new folio when needed. |
| */ |
| static int write_and_queue_folio(struct bio *out_bio, struct folio **out_folio, |
| u32 *total_out, u32 write_len) |
| { |
| const u32 fsize = folio_size(*out_folio); |
| const u32 foffset = offset_in_folio(*out_folio, *total_out); |
| |
| ASSERT(out_folio && *out_folio); |
| /* Should not cross folio boundary. */ |
| ASSERT(foffset + write_len <= fsize); |
| |
| /* We can not use bio_add_folio_nofail() which doesn't do any merge. */ |
| if (!bio_add_folio(out_bio, *out_folio, write_len, foffset)) { |
| /* |
| * We have allocated a bio that havs BTRFS_MAX_COMPRESSED_PAGES |
| * vecs, and all ranges inside the same folio should have been |
| * merged. If bio_add_folio() still failed, that means we have |
| * reached the bvec limits. |
| * |
| * This should only happen at the beginning of a folio, and |
| * caller is responsible for releasing the folio, since it's |
| * not yet queued into the bio. |
| */ |
| ASSERT(IS_ALIGNED(*total_out, fsize)); |
| return -E2BIG; |
| } |
| |
| *total_out += write_len; |
| /* |
| * The full folio has been filled and queued, reset @out_folio to NULL, |
| * so that error handling is fully handled by the bio. |
| */ |
| if (IS_ALIGNED(*total_out, fsize)) |
| *out_folio = NULL; |
| return 0; |
| } |
| |
| /* |
| * Copy compressed data to bio. |
| * |
| * @out_bio: The bio that will contain all the compressed data. |
| * @compressed_data: The compressed data of this segment. |
| * @compressed_size: The size of the compressed data. |
| * @out_folio: The current output folio, will be updated if a new |
| * folio is allocated. |
| * @total_out: The total bytes of current output. |
| * @max_out: The maximum size of the compressed data. |
| * |
| * Will do: |
| * |
| * - Write a segment header into the destination |
| * - Copy the compressed buffer into the destination |
| * - Make sure we have enough space in the last sector to fit a segment header |
| * If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros. |
| * - If a full folio is filled, it will be queued into @out_bio, and @out_folio |
| * will be updated. |
| * |
| * Will allocate new pages when needed. |
| */ |
| static int copy_compressed_data_to_bio(struct btrfs_fs_info *fs_info, |
| struct bio *out_bio, |
| const char *compressed_data, |
| size_t compressed_size, |
| struct folio **out_folio, |
| u32 *total_out, u32 max_out) |
| { |
| const u32 sectorsize = fs_info->sectorsize; |
| const u32 sectorsize_bits = fs_info->sectorsize_bits; |
| const u32 fsize = btrfs_min_folio_size(fs_info); |
| const u32 old_size = out_bio->bi_iter.bi_size; |
| u32 copy_start; |
| u32 sector_bytes_left; |
| char *kaddr; |
| int ret; |
| |
| ASSERT(out_folio); |
| |
| /* There should be at least a lzo header queued. */ |
| ASSERT(old_size); |
| ASSERT(old_size == *total_out); |
| |
| /* |
| * We never allow a segment header crossing sector boundary, previous |
| * run should ensure we have enough space left inside the sector. |
| */ |
| ASSERT((old_size >> sectorsize_bits) == (old_size + LZO_LEN - 1) >> sectorsize_bits); |
| |
| if (!*out_folio) { |
| *out_folio = btrfs_alloc_compr_folio(fs_info); |
| if (!*out_folio) |
| return -ENOMEM; |
| } |
| |
| /* Write the segment header first. */ |
| kaddr = kmap_local_folio(*out_folio, offset_in_folio(*out_folio, *total_out)); |
| write_compress_length(kaddr, compressed_size); |
| kunmap_local(kaddr); |
| ret = write_and_queue_folio(out_bio, out_folio, total_out, LZO_LEN); |
| if (ret < 0) |
| return ret; |
| |
| copy_start = *total_out; |
| |
| /* Copy compressed data. */ |
| while (*total_out - copy_start < compressed_size) { |
| u32 copy_len = min_t(u32, sectorsize - *total_out % sectorsize, |
| copy_start + compressed_size - *total_out); |
| u32 foffset = *total_out & (fsize - 1); |
| |
| /* With the range copied, we're larger than the original range. */ |
| if (((*total_out + copy_len) >> sectorsize_bits) >= |
| max_out >> sectorsize_bits) |
| return -E2BIG; |
| |
| if (!*out_folio) { |
| *out_folio = btrfs_alloc_compr_folio(fs_info); |
| if (!*out_folio) |
| return -ENOMEM; |
| } |
| |
| kaddr = kmap_local_folio(*out_folio, foffset); |
| memcpy(kaddr, compressed_data + *total_out - copy_start, copy_len); |
| kunmap_local(kaddr); |
| ret = write_and_queue_folio(out_bio, out_folio, total_out, copy_len); |
| if (ret < 0) |
| return ret; |
| } |
| |
| /* |
| * Check if we can fit the next segment header into the remaining space |
| * of the sector. |
| */ |
| sector_bytes_left = round_up(*total_out, sectorsize) - *total_out; |
| if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0) |
| return 0; |
| |
| ASSERT(*out_folio); |
| |
| /* The remaining size is not enough, pad it with zeros */ |
| folio_zero_range(*out_folio, offset_in_folio(*out_folio, *total_out), sector_bytes_left); |
| return write_and_queue_folio(out_bio, out_folio, total_out, sector_bytes_left); |
| } |
| |
| int lzo_compress_bio(struct list_head *ws, struct compressed_bio *cb) |
| { |
| struct btrfs_inode *inode = cb->bbio.inode; |
| struct btrfs_fs_info *fs_info = inode->root->fs_info; |
| struct workspace *workspace = list_entry(ws, struct workspace, list); |
| struct bio *bio = &cb->bbio.bio; |
| const u64 start = cb->start; |
| const u32 len = cb->len; |
| const u32 sectorsize = fs_info->sectorsize; |
| const u32 min_folio_size = btrfs_min_folio_size(fs_info); |
| struct address_space *mapping = inode->vfs_inode.i_mapping; |
| struct folio *folio_in = NULL; |
| struct folio *folio_out = NULL; |
| char *sizes_ptr; |
| int ret = 0; |
| /* Points to the file offset of input data. */ |
| u64 cur_in = start; |
| /* Points to the current output byte. */ |
| u32 total_out = 0; |
| |
| ASSERT(bio->bi_iter.bi_size == 0); |
| ASSERT(len); |
| |
| folio_out = btrfs_alloc_compr_folio(fs_info); |
| if (!folio_out) |
| return -ENOMEM; |
| |
| /* Queue a segment header first. */ |
| ret = write_and_queue_folio(bio, &folio_out, &total_out, LZO_LEN); |
| /* The first header should not fail. */ |
| ASSERT(ret == 0); |
| |
| while (cur_in < start + len) { |
| char *data_in; |
| const u32 sectorsize_mask = sectorsize - 1; |
| u32 sector_off = (cur_in - start) & sectorsize_mask; |
| u32 in_len; |
| size_t out_len; |
| |
| /* Get the input page first. */ |
| if (!folio_in) { |
| ret = btrfs_compress_filemap_get_folio(mapping, cur_in, &folio_in); |
| if (ret < 0) |
| goto out; |
| } |
| |
| /* Compress at most one sector of data each time. */ |
| in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off); |
| ASSERT(in_len); |
| data_in = kmap_local_folio(folio_in, offset_in_folio(folio_in, cur_in)); |
| ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, &out_len, |
| workspace->mem); |
| kunmap_local(data_in); |
| if (unlikely(ret < 0)) { |
| /* lzo1x_1_compress never fails. */ |
| ret = -EIO; |
| goto out; |
| } |
| |
| ret = copy_compressed_data_to_bio(fs_info, bio, workspace->cbuf, out_len, |
| &folio_out, &total_out, len); |
| if (ret < 0) |
| goto out; |
| |
| cur_in += in_len; |
| |
| /* |
| * Check if we're making it bigger after two sectors. And if |
| * it is so, give up. |
| */ |
| if (cur_in - start > sectorsize * 2 && cur_in - start < total_out) { |
| ret = -E2BIG; |
| goto out; |
| } |
| |
| /* Check if we have reached input folio boundary. */ |
| if (IS_ALIGNED(cur_in, min_folio_size)) { |
| folio_put(folio_in); |
| folio_in = NULL; |
| } |
| } |
| /* |
| * The last folio is already queued. Bio is responsible for freeing |
| * those folios now. |
| */ |
| folio_out = NULL; |
| |
| /* Store the size of all chunks of compressed data */ |
| sizes_ptr = kmap_local_folio(bio_first_folio_all(bio), 0); |
| write_compress_length(sizes_ptr, total_out); |
| kunmap_local(sizes_ptr); |
| out: |
| /* |
| * We can only free the folio that has no part queued into the bio. |
| * |
| * As any folio that is already queued into bio will be released by |
| * the endio function of bio. |
| */ |
| if (folio_out && IS_ALIGNED(total_out, min_folio_size)) { |
| btrfs_free_compr_folio(folio_out); |
| folio_out = NULL; |
| } |
| if (folio_in) |
| folio_put(folio_in); |
| return ret; |
| } |
| |
| static struct folio *get_current_folio(struct compressed_bio *cb, struct folio_iter *fi, |
| u32 *cur_folio_index, u32 cur_in) |
| { |
| struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); |
| const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; |
| |
| ASSERT(cur_folio_index); |
| |
| /* Need to switch to the next folio. */ |
| if (cur_in >> min_folio_shift != *cur_folio_index) { |
| /* We can only do the switch one folio a time. */ |
| ASSERT(cur_in >> min_folio_shift == *cur_folio_index + 1); |
| |
| bio_next_folio(fi, &cb->bbio.bio); |
| (*cur_folio_index)++; |
| } |
| return fi->folio; |
| } |
| |
| /* |
| * Copy the compressed segment payload into @dest. |
| * |
| * For the payload there will be no padding, just need to do page switching. |
| */ |
| static void copy_compressed_segment(struct compressed_bio *cb, |
| struct folio_iter *fi, u32 *cur_folio_index, |
| char *dest, u32 len, u32 *cur_in) |
| { |
| u32 orig_in = *cur_in; |
| |
| while (*cur_in < orig_in + len) { |
| struct folio *cur_folio = get_current_folio(cb, fi, cur_folio_index, *cur_in); |
| u32 copy_len; |
| |
| ASSERT(cur_folio); |
| copy_len = min_t(u32, orig_in + len - *cur_in, |
| folio_size(cur_folio) - offset_in_folio(cur_folio, *cur_in)); |
| ASSERT(copy_len); |
| |
| memcpy_from_folio(dest + *cur_in - orig_in, cur_folio, |
| offset_in_folio(cur_folio, *cur_in), copy_len); |
| |
| *cur_in += copy_len; |
| } |
| } |
| |
| int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) |
| { |
| struct workspace *workspace = list_entry(ws, struct workspace, list); |
| struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; |
| const u32 sectorsize = fs_info->sectorsize; |
| struct folio_iter fi; |
| char *kaddr; |
| int ret; |
| /* Compressed data length, can be unaligned */ |
| u32 len_in; |
| /* Offset inside the compressed data */ |
| u32 cur_in = 0; |
| /* Bytes decompressed so far */ |
| u32 cur_out = 0; |
| /* The current folio index number inside the bio. */ |
| u32 cur_folio_index = 0; |
| |
| bio_first_folio(&fi, &cb->bbio.bio, 0); |
| /* There must be a compressed folio and matches the sectorsize. */ |
| if (unlikely(!fi.folio)) |
| return -EINVAL; |
| ASSERT(folio_size(fi.folio) == btrfs_min_folio_size(fs_info)); |
| kaddr = kmap_local_folio(fi.folio, 0); |
| len_in = read_compress_length(kaddr); |
| kunmap_local(kaddr); |
| cur_in += LZO_LEN; |
| |
| /* |
| * LZO header length check |
| * |
| * The total length should not exceed the maximum extent length, |
| * and all sectors should be used. |
| * If this happens, it means the compressed extent is corrupted. |
| */ |
| if (unlikely(len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) || |
| round_up(len_in, sectorsize) < cb->compressed_len)) { |
| struct btrfs_inode *inode = cb->bbio.inode; |
| |
| btrfs_err(fs_info, |
| "lzo header invalid, root %llu inode %llu offset %llu lzo len %u compressed len %u", |
| btrfs_root_id(inode->root), btrfs_ino(inode), |
| cb->start, len_in, cb->compressed_len); |
| return -EUCLEAN; |
| } |
| |
| /* Go through each lzo segment */ |
| while (cur_in < len_in) { |
| struct folio *cur_folio; |
| /* Length of the compressed segment */ |
| u32 seg_len; |
| u32 sector_bytes_left; |
| size_t out_len = lzo1x_worst_compress(sectorsize); |
| |
| /* |
| * We should always have enough space for one segment header |
| * inside current sector. |
| */ |
| ASSERT(cur_in / sectorsize == |
| (cur_in + LZO_LEN - 1) / sectorsize); |
| cur_folio = get_current_folio(cb, &fi, &cur_folio_index, cur_in); |
| ASSERT(cur_folio); |
| kaddr = kmap_local_folio(cur_folio, 0); |
| seg_len = read_compress_length(kaddr + offset_in_folio(cur_folio, cur_in)); |
| kunmap_local(kaddr); |
| cur_in += LZO_LEN; |
| |
| if (unlikely(seg_len > workspace_cbuf_length(fs_info))) { |
| struct btrfs_inode *inode = cb->bbio.inode; |
| |
| /* |
| * seg_len shouldn't be larger than we have allocated |
| * for workspace->cbuf |
| */ |
| btrfs_err(fs_info, |
| "lzo segment too big, root %llu inode %llu offset %llu len %u", |
| btrfs_root_id(inode->root), btrfs_ino(inode), |
| cb->start, seg_len); |
| return -EIO; |
| } |
| |
| /* Copy the compressed segment payload into workspace */ |
| copy_compressed_segment(cb, &fi, &cur_folio_index, workspace->cbuf, |
| seg_len, &cur_in); |
| |
| /* Decompress the data */ |
| ret = lzo1x_decompress_safe(workspace->cbuf, seg_len, |
| workspace->buf, &out_len); |
| if (unlikely(ret != LZO_E_OK)) { |
| struct btrfs_inode *inode = cb->bbio.inode; |
| |
| btrfs_err(fs_info, |
| "lzo decompression failed, error %d root %llu inode %llu offset %llu", |
| ret, btrfs_root_id(inode->root), btrfs_ino(inode), |
| cb->start); |
| return -EIO; |
| } |
| |
| /* Copy the data into inode pages */ |
| ret = btrfs_decompress_buf2page(workspace->buf, out_len, cb, cur_out); |
| cur_out += out_len; |
| |
| /* All data read, exit */ |
| if (ret == 0) |
| return 0; |
| ret = 0; |
| |
| /* Check if the sector has enough space for a segment header */ |
| sector_bytes_left = sectorsize - (cur_in % sectorsize); |
| if (sector_bytes_left >= LZO_LEN) |
| continue; |
| |
| /* Skip the padding zeros */ |
| cur_in += sector_bytes_left; |
| } |
| |
| return 0; |
| } |
| |
| int lzo_decompress(struct list_head *ws, const u8 *data_in, |
| struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, |
| size_t destlen) |
| { |
| struct workspace *workspace = list_entry(ws, struct workspace, list); |
| struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio); |
| const u32 sectorsize = fs_info->sectorsize; |
| size_t in_len; |
| size_t out_len; |
| size_t max_segment_len = workspace_buf_length(fs_info); |
| int ret; |
| |
| if (unlikely(srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)) |
| return -EUCLEAN; |
| |
| in_len = read_compress_length(data_in); |
| if (unlikely(in_len != srclen)) |
| return -EUCLEAN; |
| data_in += LZO_LEN; |
| |
| in_len = read_compress_length(data_in); |
| if (unlikely(in_len != srclen - LZO_LEN * 2)) |
| return -EUCLEAN; |
| data_in += LZO_LEN; |
| |
| out_len = sectorsize; |
| ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); |
| if (unlikely(ret != LZO_E_OK)) { |
| struct btrfs_inode *inode = folio_to_inode(dest_folio); |
| |
| btrfs_err(fs_info, |
| "lzo decompression failed, error %d root %llu inode %llu offset %llu", |
| ret, btrfs_root_id(inode->root), btrfs_ino(inode), |
| folio_pos(dest_folio)); |
| return -EIO; |
| } |
| |
| ASSERT(out_len <= sectorsize); |
| memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, out_len); |
| /* Early end, considered as an error. */ |
| if (unlikely(out_len < destlen)) { |
| folio_zero_range(dest_folio, dest_pgoff + out_len, destlen - out_len); |
| return -EIO; |
| } |
| |
| return 0; |
| } |
| |
| const struct btrfs_compress_levels btrfs_lzo_compress = { |
| .max_level = 1, |
| .default_level = 1, |
| }; |