| /* SPDX-License-Identifier: GPL-2.0 */ |
| /* |
| * Copyright (C) 2012 Fusion-io All rights reserved. |
| * Copyright (C) 2012 Intel Corp. All rights reserved. |
| */ |
| |
| #ifndef BTRFS_RAID56_H |
| #define BTRFS_RAID56_H |
| |
| #include <linux/types.h> |
| #include <linux/list.h> |
| #include <linux/spinlock.h> |
| #include <linux/bio.h> |
| #include <linux/refcount.h> |
| #include <linux/workqueue.h> |
| #include "volumes.h" |
| |
| struct page; |
| struct btrfs_fs_info; |
| |
| enum btrfs_rbio_ops { |
| BTRFS_RBIO_WRITE, |
| BTRFS_RBIO_READ_REBUILD, |
| BTRFS_RBIO_PARITY_SCRUB, |
| }; |
| |
| /* |
| * Overview of btrfs_raid_bio. |
| * |
| * One btrfs_raid_bio represents a full stripe of RAID56, including both data |
| * and P/Q stripes. For now, each data and P/Q stripe is of a fixed length (64K). |
| * |
| * One btrfs_raid_bio can have one or more bios from higher layer, covering |
| * part or all of the data stripes. |
| * |
| * [PAGES FROM HIGHER LAYER BIOS] |
| * Higher layer bios are in the btrfs_raid_bio::bio_list. |
| * |
| * Pages from the bio_list are represented like the following: |
| * |
| * bio_list: |<- Bio 1 ->| |<- Bio 2 ->| ... |
| * bio_paddrs: [0] [1] [2] [3] [4] [5] ... |
| * |
| * If there is a bio covering a sector (one btrfs fs block), the corresponding |
| * pointer in btrfs_raid_bio::bio_paddrs[] will point to the physical address |
| * (with the offset inside the page) of the corresponding bio. |
| * |
| * If there is no bio covering a sector, then btrfs_raid_bio::bio_paddrs[i] will |
| * be INVALID_PADDR. |
| * |
| * The length of each entry in bio_paddrs[] is a step (aka, min(sectorsize, PAGE_SIZE)). |
| * |
| * [PAGES FOR INTERNAL USAGES] |
| * Pages not covered by any bio or belonging to P/Q stripes are stored in |
| * btrfs_raid_bio::stripe_pages[] and stripe_paddrs[], like the following: |
| * |
| * stripe_pages: |<- Page 0 ->|<- Page 1 ->| ... |
| * stripe_paddrs: [0] [1] [2] [3] [4] ... |
| * |
| * stripe_pages[] array stores all the pages covering the full stripe, including |
| * data and P/Q pages. |
| * stripe_pages[0] is the first page of the first data stripe. |
| * stripe_pages[BTRFS_STRIPE_LEN / PAGE_SIZE] is the first page of the second |
| * data stripe. |
| * |
| * Some pointers inside stripe_pages[] can be NULL, e.g. for a full stripe write |
| * (the bio covers all data stripes) there is no need to allocate pages for |
| * data stripes (can grab from bio_paddrs[]). |
| * |
| * If the corresponding page of stripe_paddrs[i] is not allocated, the value of |
| * stripe_paddrs[i] will be INVALID_PADDR. |
| * |
| * The length of each entry in stripe_paddrs[] is a step. |
| * |
| * [LOCATING A SECTOR] |
| * To locate a sector for IO, we need the following info: |
| * |
| * - stripe_nr |
| * Starts from 0 (representing the first data stripe), ends at |
| * @nr_data (RAID5, P stripe) or @nr_data + 1 (RAID6, Q stripe). |
| * |
| * - sector_nr |
| * Starts from 0 (representing the first sector of the stripe), ends |
| * at BTRFS_STRIPE_LEN / sectorsize - 1. |
| * |
| * - step_nr |
| * A step is min(sector_size, PAGE_SIZE). |
| * |
| * Starts from 0 (representing the first step of the sector), ends |
| * at @sector_nsteps - 1. |
| * |
| * For most call sites they do not need to bother this parameter. |
| * It is for bs > ps support and only for vertical stripe related works. |
| * (e.g. RMW/recover) |
| * |
| * - from which array |
| * Whether grabbing from stripe_paddrs[] (aka, internal pages) or from the |
| * bio_paddrs[] (aka, from the higher layer bios). |
| * |
| * For IO, a physical address is returned, so that we can extract the page and |
| * the offset inside the page for IO. |
| * A special value INVALID_PADDR represents when the physical address is invalid, |
| * normally meaning there is no page allocated for the specified sector. |
| */ |
| struct btrfs_raid_bio { |
| struct btrfs_io_context *bioc; |
| |
| /* |
| * While we're doing RMW on a stripe we put it into a hash table so we |
| * can lock the stripe and merge more rbios into it. |
| */ |
| struct list_head hash_list; |
| |
| /* LRU list for the stripe cache */ |
| struct list_head stripe_cache; |
| |
| /* For scheduling work in the helper threads */ |
| struct work_struct work; |
| |
| /* |
| * bio_list and bio_list_lock are used to add more bios into the stripe |
| * in hopes of avoiding the full RMW |
| */ |
| struct bio_list bio_list; |
| spinlock_t bio_list_lock; |
| |
| /* |
| * Also protected by the bio_list_lock, the plug list is used by the |
| * plugging code to collect partial bios while plugged. The stripe |
| * locking code also uses it to hand off the stripe lock to the next |
| * pending IO. |
| */ |
| struct list_head plug_list; |
| |
| /* Flags that tell us if it is safe to merge with this bio. */ |
| unsigned long flags; |
| |
| /* |
| * Set if we're doing a parity rebuild for a read from higher up, which |
| * is handled differently from a parity rebuild as part of RMW. |
| */ |
| enum btrfs_rbio_ops operation; |
| |
| /* How many pages there are for the full stripe including P/Q */ |
| u16 nr_pages; |
| |
| /* How many sectors there are for the full stripe including P/Q */ |
| u16 nr_sectors; |
| |
| /* Number of data stripes (no p/q) */ |
| u8 nr_data; |
| |
| /* Number of all stripes (including P/Q) */ |
| u8 real_stripes; |
| |
| /* How many pages there are for each stripe */ |
| u8 stripe_npages; |
| |
| /* How many sectors there are for each stripe */ |
| u8 stripe_nsectors; |
| |
| /* |
| * How many steps there are for one sector. |
| * |
| * For bs > ps cases, it's sectorsize / PAGE_SIZE. |
| * For bs <= ps cases, it's always 1. |
| */ |
| u8 sector_nsteps; |
| |
| /* Stripe number that we're scrubbing */ |
| u8 scrubp; |
| |
| /* |
| * Size of all the bios in the bio_list. This helps us decide if the |
| * rbio maps to a full stripe or not. |
| */ |
| int bio_list_bytes; |
| |
| refcount_t refs; |
| |
| atomic_t stripes_pending; |
| |
| wait_queue_head_t io_wait; |
| |
| /* Bitmap to record which horizontal stripe has data */ |
| unsigned long dbitmap; |
| |
| /* Allocated with stripe_nsectors-many bits for finish_*() calls */ |
| unsigned long finish_pbitmap; |
| |
| /* |
| * These are two arrays of pointers. We allocate the rbio big enough |
| * to hold them both and setup their locations when the rbio is |
| * allocated. |
| */ |
| |
| /* |
| * Pointers to pages that we allocated for reading/writing stripes |
| * directly from the disk (including P/Q). |
| */ |
| struct page **stripe_pages; |
| |
| /* Pointers to the sectors in the bio_list, for faster lookup */ |
| phys_addr_t *bio_paddrs; |
| |
| /* Pointers to the sectors in the stripe_pages[]. */ |
| phys_addr_t *stripe_paddrs; |
| |
| /* Each set bit means the corresponding sector in stripe_sectors[] is uptodate. */ |
| unsigned long *stripe_uptodate_bitmap; |
| |
| /* Allocated with real_stripes-many pointers for finish_*() calls */ |
| void **finish_pointers; |
| |
| /* |
| * The bitmap recording where IO errors happened. |
| * Each bit is corresponding to one sector in either bio_sectors[] or |
| * stripe_sectors[] array. |
| */ |
| unsigned long *error_bitmap; |
| |
| /* |
| * Checksum buffer if the rbio is for data. The buffer should cover |
| * all data sectors (excluding P/Q sectors). |
| */ |
| u8 *csum_buf; |
| |
| /* |
| * Each bit represents if the corresponding sector has data csum found. |
| * Should only cover data sectors (excluding P/Q sectors). |
| */ |
| unsigned long *csum_bitmap; |
| }; |
| |
| /* |
| * For trace event usage only. Records useful debug info for each bio submitted |
| * by RAID56 to each physical device. |
| * |
| * No matter signed or not, (-1) is always the one indicating we can not grab |
| * the proper stripe number. |
| */ |
| struct raid56_bio_trace_info { |
| u64 devid; |
| |
| /* The offset inside the stripe. (<= STRIPE_LEN) */ |
| u32 offset; |
| |
| /* |
| * Stripe number. |
| * 0 is the first data stripe, and nr_data for P stripe, |
| * nr_data + 1 for Q stripe. |
| * >= real_stripes for |
| */ |
| u8 stripe_nr; |
| }; |
| |
| static inline int nr_data_stripes(const struct btrfs_chunk_map *map) |
| { |
| return map->num_stripes - btrfs_nr_parity_stripes(map->type); |
| } |
| |
| static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc) |
| { |
| return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type); |
| } |
| |
| #define RAID5_P_STRIPE ((u64)-2) |
| #define RAID6_Q_STRIPE ((u64)-1) |
| |
| #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ |
| ((x) == RAID6_Q_STRIPE)) |
| |
| struct btrfs_device; |
| |
| void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, |
| int mirror_num); |
| void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc); |
| |
| struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, |
| struct btrfs_io_context *bioc, |
| struct btrfs_device *scrub_dev, |
| unsigned long *dbitmap, int stripe_nsectors); |
| void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); |
| |
| void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio, |
| struct folio **data_folios, u64 data_logical); |
| |
| int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); |
| void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); |
| |
| #endif |