|  | // SPDX-License-Identifier: GPL-2.0 | 
|  | /* | 
|  | * Copyright (C) 2023 Western Digital Corporation or its affiliates. | 
|  | */ | 
|  |  | 
|  | #include <linux/btrfs_tree.h> | 
|  | #include "ctree.h" | 
|  | #include "fs.h" | 
|  | #include "accessors.h" | 
|  | #include "transaction.h" | 
|  | #include "disk-io.h" | 
|  | #include "raid-stripe-tree.h" | 
|  | #include "volumes.h" | 
|  | #include "print-tree.h" | 
|  |  | 
|  | static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans, | 
|  | struct btrfs_path *path, | 
|  | const struct btrfs_key *oldkey, | 
|  | u64 newlen, u64 frontpad) | 
|  | { | 
|  | struct btrfs_root *stripe_root = trans->fs_info->stripe_root; | 
|  | struct btrfs_stripe_extent *extent, *newitem; | 
|  | struct extent_buffer *leaf; | 
|  | int slot; | 
|  | size_t item_size; | 
|  | struct btrfs_key newkey = { | 
|  | .objectid = oldkey->objectid + frontpad, | 
|  | .type = BTRFS_RAID_STRIPE_KEY, | 
|  | .offset = newlen, | 
|  | }; | 
|  | int ret; | 
|  |  | 
|  | ASSERT(newlen > 0); | 
|  | ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY); | 
|  |  | 
|  | leaf = path->nodes[0]; | 
|  | slot = path->slots[0]; | 
|  | item_size = btrfs_item_size(leaf, slot); | 
|  |  | 
|  | newitem = kzalloc(item_size, GFP_NOFS); | 
|  | if (!newitem) | 
|  | return -ENOMEM; | 
|  |  | 
|  | extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); | 
|  |  | 
|  | for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { | 
|  | struct btrfs_raid_stride *stride = &extent->strides[i]; | 
|  | u64 phys; | 
|  |  | 
|  | phys = btrfs_raid_stride_physical(leaf, stride) + frontpad; | 
|  | btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys); | 
|  | } | 
|  |  | 
|  | ret = btrfs_del_item(trans, stripe_root, path); | 
|  | if (ret) | 
|  | goto out; | 
|  |  | 
|  | btrfs_release_path(path); | 
|  | ret = btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size); | 
|  |  | 
|  | out: | 
|  | kfree(newitem); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length) | 
|  | { | 
|  | struct btrfs_fs_info *fs_info = trans->fs_info; | 
|  | struct btrfs_root *stripe_root = fs_info->stripe_root; | 
|  | BTRFS_PATH_AUTO_FREE(path); | 
|  | struct btrfs_key key; | 
|  | struct extent_buffer *leaf; | 
|  | u64 found_start; | 
|  | u64 found_end; | 
|  | u64 end = start + length; | 
|  | int slot; | 
|  | int ret; | 
|  |  | 
|  | if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root) | 
|  | return 0; | 
|  |  | 
|  | if (!btrfs_is_testing(fs_info)) { | 
|  | struct btrfs_chunk_map *map; | 
|  | bool use_rst; | 
|  |  | 
|  | map = btrfs_find_chunk_map(fs_info, start, length); | 
|  | if (!map) | 
|  | return -EINVAL; | 
|  | use_rst = btrfs_need_stripe_tree_update(fs_info, map->type); | 
|  | btrfs_free_chunk_map(map); | 
|  | if (!use_rst) | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | path = btrfs_alloc_path(); | 
|  | if (!path) | 
|  | return -ENOMEM; | 
|  |  | 
|  | while (1) { | 
|  | key.objectid = start; | 
|  | key.type = BTRFS_RAID_STRIPE_KEY; | 
|  | key.offset = 0; | 
|  |  | 
|  | ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1); | 
|  | if (ret < 0) | 
|  | break; | 
|  |  | 
|  | if (path->slots[0] == btrfs_header_nritems(path->nodes[0])) | 
|  | path->slots[0]--; | 
|  |  | 
|  | leaf = path->nodes[0]; | 
|  | slot = path->slots[0]; | 
|  | btrfs_item_key_to_cpu(leaf, &key, slot); | 
|  | found_start = key.objectid; | 
|  | found_end = found_start + key.offset; | 
|  | ret = 0; | 
|  |  | 
|  | /* | 
|  | * The stripe extent starts before the range we want to delete, | 
|  | * but the range spans more than one stripe extent: | 
|  | * | 
|  | * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---| | 
|  | *        |--- keep  ---|--- drop ---| | 
|  | * | 
|  | * This means we have to get the previous item, truncate its | 
|  | * length and then restart the search. | 
|  | */ | 
|  | if (found_start > start) { | 
|  | if (slot == 0) { | 
|  | ret = btrfs_previous_item(stripe_root, path, start, | 
|  | BTRFS_RAID_STRIPE_KEY); | 
|  | if (ret) { | 
|  | if (ret > 0) | 
|  | ret = -ENOENT; | 
|  | break; | 
|  | } | 
|  | } else { | 
|  | path->slots[0]--; | 
|  | } | 
|  |  | 
|  | leaf = path->nodes[0]; | 
|  | slot = path->slots[0]; | 
|  | btrfs_item_key_to_cpu(leaf, &key, slot); | 
|  | found_start = key.objectid; | 
|  | found_end = found_start + key.offset; | 
|  | ASSERT(found_start <= start); | 
|  | } | 
|  |  | 
|  | if (key.type != BTRFS_RAID_STRIPE_KEY) | 
|  | break; | 
|  |  | 
|  | /* That stripe ends before we start, we're done. */ | 
|  | if (found_end <= start) | 
|  | break; | 
|  |  | 
|  | trace_btrfs_raid_extent_delete(fs_info, start, end, | 
|  | found_start, found_end); | 
|  |  | 
|  | /* | 
|  | * The stripe extent starts before the range we want to delete | 
|  | * and ends after the range we want to delete, i.e. we're | 
|  | * punching a hole in the stripe extent: | 
|  | * | 
|  | *  |--- RAID Stripe Extent ---| | 
|  | *  | keep |--- drop ---| keep | | 
|  | * | 
|  | * This means we need to a) truncate the existing item and b) | 
|  | * create a second item for the remaining range. | 
|  | */ | 
|  | if (found_start < start && found_end > end) { | 
|  | size_t item_size; | 
|  | u64 diff_start = start - found_start; | 
|  | u64 diff_end = found_end - end; | 
|  | struct btrfs_stripe_extent *extent; | 
|  | struct btrfs_key newkey = { | 
|  | .objectid = end, | 
|  | .type = BTRFS_RAID_STRIPE_KEY, | 
|  | .offset = diff_end, | 
|  | }; | 
|  |  | 
|  | /* The "right" item. */ | 
|  | ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey); | 
|  | if (ret) | 
|  | break; | 
|  |  | 
|  | item_size = btrfs_item_size(leaf, path->slots[0]); | 
|  | extent = btrfs_item_ptr(leaf, path->slots[0], | 
|  | struct btrfs_stripe_extent); | 
|  |  | 
|  | for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { | 
|  | struct btrfs_raid_stride *stride = &extent->strides[i]; | 
|  | u64 phys; | 
|  |  | 
|  | phys = btrfs_raid_stride_physical(leaf, stride); | 
|  | phys += diff_start + length; | 
|  | btrfs_set_raid_stride_physical(leaf, stride, phys); | 
|  | } | 
|  |  | 
|  | /* The "left" item. */ | 
|  | path->slots[0]--; | 
|  | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 
|  | btrfs_partially_delete_raid_extent(trans, path, &key, | 
|  | diff_start, 0); | 
|  | break; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The stripe extent starts before the range we want to delete: | 
|  | * | 
|  | * |--- RAID Stripe Extent ---| | 
|  | * |--- keep  ---|--- drop ---| | 
|  | * | 
|  | * This means we have to duplicate the tree item, truncate the | 
|  | * length to the new size and then re-insert the item. | 
|  | */ | 
|  | if (found_start < start) { | 
|  | u64 diff_start = start - found_start; | 
|  |  | 
|  | btrfs_partially_delete_raid_extent(trans, path, &key, | 
|  | diff_start, 0); | 
|  |  | 
|  | start += (key.offset - diff_start); | 
|  | length -= (key.offset - diff_start); | 
|  | if (length == 0) | 
|  | break; | 
|  |  | 
|  | btrfs_release_path(path); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The stripe extent ends after the range we want to delete: | 
|  | * | 
|  | * |--- RAID Stripe Extent ---| | 
|  | * |--- drop  ---|--- keep ---| | 
|  | * | 
|  | * This means we have to duplicate the tree item, truncate the | 
|  | * length to the new size and then re-insert the item. | 
|  | */ | 
|  | if (found_end > end) { | 
|  | u64 diff_end = found_end - end; | 
|  |  | 
|  | btrfs_partially_delete_raid_extent(trans, path, &key, | 
|  | key.offset - length, | 
|  | length); | 
|  | ASSERT(key.offset - diff_end == length); | 
|  | break; | 
|  | } | 
|  |  | 
|  | /* Finally we can delete the whole item, no more special cases. */ | 
|  | ret = btrfs_del_item(trans, stripe_root, path); | 
|  | if (ret) | 
|  | break; | 
|  |  | 
|  | start += key.offset; | 
|  | length -= key.offset; | 
|  | if (length == 0) | 
|  | break; | 
|  |  | 
|  | btrfs_release_path(path); | 
|  | } | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int update_raid_extent_item(struct btrfs_trans_handle *trans, | 
|  | struct btrfs_key *key, | 
|  | struct btrfs_stripe_extent *stripe_extent, | 
|  | const size_t item_size) | 
|  | { | 
|  | BTRFS_PATH_AUTO_FREE(path); | 
|  | struct extent_buffer *leaf; | 
|  | int ret; | 
|  | int slot; | 
|  |  | 
|  | path = btrfs_alloc_path(); | 
|  | if (!path) | 
|  | return -ENOMEM; | 
|  |  | 
|  | ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path, | 
|  | 0, 1); | 
|  | if (ret) | 
|  | return (ret == 1 ? ret : -EINVAL); | 
|  |  | 
|  | leaf = path->nodes[0]; | 
|  | slot = path->slots[0]; | 
|  |  | 
|  | write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot), | 
|  | item_size); | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | EXPORT_FOR_TESTS | 
|  | int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans, | 
|  | struct btrfs_io_context *bioc) | 
|  | { | 
|  | struct btrfs_fs_info *fs_info = trans->fs_info; | 
|  | struct btrfs_key stripe_key; | 
|  | struct btrfs_root *stripe_root = fs_info->stripe_root; | 
|  | const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type); | 
|  | struct btrfs_stripe_extent *stripe_extent; | 
|  | const size_t item_size = struct_size(stripe_extent, strides, num_stripes); | 
|  | int ret; | 
|  |  | 
|  | stripe_extent = kzalloc(item_size, GFP_NOFS); | 
|  | if (!unlikely(stripe_extent)) { | 
|  | btrfs_abort_transaction(trans, -ENOMEM); | 
|  | btrfs_end_transaction(trans); | 
|  | return -ENOMEM; | 
|  | } | 
|  |  | 
|  | trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size, | 
|  | num_stripes); | 
|  | for (int i = 0; i < num_stripes; i++) { | 
|  | u64 devid = bioc->stripes[i].dev->devid; | 
|  | u64 physical = bioc->stripes[i].physical; | 
|  | struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i]; | 
|  |  | 
|  | btrfs_set_stack_raid_stride_devid(raid_stride, devid); | 
|  | btrfs_set_stack_raid_stride_physical(raid_stride, physical); | 
|  | } | 
|  |  | 
|  | stripe_key.objectid = bioc->logical; | 
|  | stripe_key.type = BTRFS_RAID_STRIPE_KEY; | 
|  | stripe_key.offset = bioc->size; | 
|  |  | 
|  | ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent, | 
|  | item_size); | 
|  | if (ret == -EEXIST) { | 
|  | ret = update_raid_extent_item(trans, &stripe_key, stripe_extent, | 
|  | item_size); | 
|  | if (ret) | 
|  | btrfs_abort_transaction(trans, ret); | 
|  | } else if (ret) { | 
|  | btrfs_abort_transaction(trans, ret); | 
|  | } | 
|  |  | 
|  | kfree(stripe_extent); | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, | 
|  | struct btrfs_ordered_extent *ordered_extent) | 
|  | { | 
|  | struct btrfs_io_context *bioc; | 
|  | int ret; | 
|  |  | 
|  | if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE)) | 
|  | return 0; | 
|  |  | 
|  | list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) { | 
|  | ret = btrfs_insert_one_raid_extent(trans, bioc); | 
|  | if (ret) | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | while (!list_empty(&ordered_extent->bioc_list)) { | 
|  | bioc = list_first_entry(&ordered_extent->bioc_list, | 
|  | typeof(*bioc), rst_ordered_entry); | 
|  | list_del(&bioc->rst_ordered_entry); | 
|  | btrfs_put_bioc(bioc); | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, | 
|  | u64 logical, u64 *length, u64 map_type, | 
|  | u32 stripe_index, struct btrfs_io_stripe *stripe) | 
|  | { | 
|  | struct btrfs_root *stripe_root = fs_info->stripe_root; | 
|  | struct btrfs_stripe_extent *stripe_extent; | 
|  | struct btrfs_key stripe_key; | 
|  | struct btrfs_key found_key; | 
|  | BTRFS_PATH_AUTO_FREE(path); | 
|  | struct extent_buffer *leaf; | 
|  | const u64 end = logical + *length; | 
|  | int num_stripes; | 
|  | u64 offset; | 
|  | u64 found_logical; | 
|  | u64 found_length; | 
|  | u64 found_end; | 
|  | int slot; | 
|  | int ret; | 
|  |  | 
|  | stripe_key.objectid = logical; | 
|  | stripe_key.type = BTRFS_RAID_STRIPE_KEY; | 
|  | stripe_key.offset = 0; | 
|  |  | 
|  | path = btrfs_alloc_path(); | 
|  | if (!path) | 
|  | return -ENOMEM; | 
|  |  | 
|  | if (stripe->rst_search_commit_root) { | 
|  | path->skip_locking = 1; | 
|  | path->search_commit_root = 1; | 
|  | } | 
|  |  | 
|  | ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); | 
|  | if (ret < 0) | 
|  | return ret; | 
|  | if (ret) { | 
|  | if (path->slots[0] != 0) | 
|  | path->slots[0]--; | 
|  | } | 
|  |  | 
|  | while (1) { | 
|  | leaf = path->nodes[0]; | 
|  | slot = path->slots[0]; | 
|  |  | 
|  | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 
|  | found_logical = found_key.objectid; | 
|  | found_length = found_key.offset; | 
|  | found_end = found_logical + found_length; | 
|  |  | 
|  | if (found_logical > end) { | 
|  | ret = -ENODATA; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (in_range(logical, found_logical, found_length)) | 
|  | break; | 
|  |  | 
|  | ret = btrfs_next_item(stripe_root, path); | 
|  | if (ret) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | offset = logical - found_logical; | 
|  |  | 
|  | /* | 
|  | * If we have a logically contiguous, but physically non-continuous | 
|  | * range, we need to split the bio. Record the length after which we | 
|  | * must split the bio. | 
|  | */ | 
|  | if (end > found_end) | 
|  | *length -= end - found_end; | 
|  |  | 
|  | num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot)); | 
|  | stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); | 
|  |  | 
|  | for (int i = 0; i < num_stripes; i++) { | 
|  | struct btrfs_raid_stride *stride = &stripe_extent->strides[i]; | 
|  | u64 devid = btrfs_raid_stride_devid(leaf, stride); | 
|  | u64 physical = btrfs_raid_stride_physical(leaf, stride); | 
|  |  | 
|  | if (devid != stripe->dev->devid) | 
|  | continue; | 
|  |  | 
|  | if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i) | 
|  | continue; | 
|  |  | 
|  | stripe->physical = physical + offset; | 
|  |  | 
|  | trace_btrfs_get_raid_extent_offset(fs_info, logical, *length, | 
|  | stripe->physical, devid); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* If we're here, we haven't found the requested devid in the stripe. */ | 
|  | ret = -ENODATA; | 
|  | out: | 
|  | if (ret > 0) | 
|  | ret = -ENODATA; | 
|  | if (ret && ret != -EIO && !stripe->rst_search_commit_root) { | 
|  | btrfs_debug(fs_info, | 
|  | "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s", | 
|  | logical, logical + *length, stripe->dev->devid, | 
|  | btrfs_bg_type_to_raid_name(map_type)); | 
|  | } | 
|  |  | 
|  | return ret; | 
|  | } |