| From stable+bounces-171741-greg=kroah.com@vger.kernel.org Tue Aug 19 05:18:57 2025 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Mon, 18 Aug 2025 23:18:42 -0400 |
| Subject: btrfs: send: use fallocate for hole punching with send stream v2 |
| To: stable@vger.kernel.org |
| Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org> |
| Message-ID: <20250819031842.309897-1-sashal@kernel.org> |
| |
| From: Filipe Manana <fdmanana@suse.com> |
| |
| [ Upstream commit 005b0a0c24e1628313e951516b675109a92cacfe ] |
| |
| Currently holes are sent as writes full of zeroes, which results in |
| unnecessarily using disk space at the receiving end and increasing the |
| stream size. |
| |
| In some cases we avoid sending writes of zeroes, like during a full |
| send operation where we just skip writes for holes. |
| |
| But for some cases we fill previous holes with writes of zeroes too, like |
| in this scenario: |
| |
| 1) We have a file with a hole in the range [2M, 3M), we snapshot the |
| subvolume and do a full send. The range [2M, 3M) stays as a hole at |
| the receiver since we skip sending write commands full of zeroes; |
| |
| 2) We punch a hole for the range [3M, 4M) in our file, so that now it |
| has a 2M hole in the range [2M, 4M), and snapshot the subvolume. |
| Now if we do an incremental send, we will send write commands full |
| of zeroes for the range [2M, 4M), removing the hole for [2M, 3M) at |
| the receiver. |
| |
| We could improve cases such as this last one by doing additional |
| comparisons of file extent items (or their absence) between the parent |
| and send snapshots, but that's a lot of code to add plus additional CPU |
| and IO costs. |
| |
| Since the send stream v2 already has a fallocate command and btrfs-progs |
| implements a callback to execute fallocate since the send stream v2 |
| support was added to it, update the kernel to use fallocate for punching |
| holes for V2+ streams. |
| |
| Test coverage is provided by btrfs/284 which is a version of btrfs/007 |
| that exercises send stream v2 instead of v1, using fsstress with random |
| operations and fssum to verify file contents. |
| |
| Link: https://github.com/kdave/btrfs-progs/issues/1001 |
| CC: stable@vger.kernel.org # 6.1+ |
| Reviewed-by: Boris Burkov <boris@bur.io> |
| Signed-off-by: Filipe Manana <fdmanana@suse.com> |
| Reviewed-by: David Sterba <dsterba@suse.com> |
| Signed-off-by: David Sterba <dsterba@suse.com> |
| [ Replaced get_cur_inode_path() with fs_path_alloc() and get_cur_path() ] |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| fs/btrfs/send.c | 39 +++++++++++++++++++++++++++++++++++++++ |
| 1 file changed, 39 insertions(+) |
| |
| --- a/fs/btrfs/send.c |
| +++ b/fs/btrfs/send.c |
| @@ -4,6 +4,7 @@ |
| */ |
| |
| #include <linux/bsearch.h> |
| +#include <linux/falloc.h> |
| #include <linux/fs.h> |
| #include <linux/file.h> |
| #include <linux/sort.h> |
| @@ -5231,6 +5232,36 @@ out: |
| return ret; |
| } |
| |
| +static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len) |
| +{ |
| + struct fs_path *p; |
| + int ret; |
| + |
| + p = fs_path_alloc(); |
| + if (!p) |
| + return -ENOMEM; |
| + |
| + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); |
| + if (ret < 0) |
| + goto out; |
| + |
| + ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE); |
| + if (ret < 0) |
| + goto out; |
| + |
| + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| + TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode); |
| + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
| + TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); |
| + |
| + ret = send_cmd(sctx); |
| + |
| +tlv_put_failure: |
| +out: |
| + fs_path_free(p); |
| + return ret; |
| +} |
| + |
| static int send_hole(struct send_ctx *sctx, u64 end) |
| { |
| struct fs_path *p = NULL; |
| @@ -5239,6 +5270,14 @@ static int send_hole(struct send_ctx *sc |
| int ret = 0; |
| |
| /* |
| + * Starting with send stream v2 we have fallocate and can use it to |
| + * punch holes instead of sending writes full of zeroes. |
| + */ |
| + if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE)) |
| + return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, |
| + offset, end - offset); |
| + |
| + /* |
| * A hole that starts at EOF or beyond it. Since we do not yet support |
| * fallocate (for extent preallocation and hole punching), sending a |
| * write of zeroes starting at EOF or beyond would later require issuing |