| From aad560b7f63b495f48a7232fd086c5913a676e6f Mon Sep 17 00:00:00 2001 |
| From: Boaz Harrosh <bharrosh@panasas.com> |
| Date: Thu, 21 Nov 2013 17:58:08 +0200 |
| Subject: ore: Fix wrong math in allocation of per device BIO |
| |
| From: Boaz Harrosh <bharrosh@panasas.com> |
| |
| commit aad560b7f63b495f48a7232fd086c5913a676e6f upstream. |
| |
| At IO preparation we calculate the max pages at each device and |
| allocate a BIO per device of that size. The calculation was wrong |
| on some unaligned corner cases offset/length combination and would |
| make prepare return with -ENOMEM. This would be bad for pnfs-objects |
| that would in that case IO through MDS. And fatal for exofs were it |
| would fail writes with EIO. |
| |
| Fix it by doing the proper math, that will work in all cases. (I |
| ran a test with all possible offset/length combinations this time |
| round). |
| |
| Also when reading we do not need to allocate for the parity units |
| since we jump over them. |
| |
| Also lower the max_io_length to take into account the parity pages |
| so not to allocate BIOs bigger than PAGE_SIZE |
| |
| Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| fs/exofs/ore.c | 37 +++++++++++++++++++++++++------------ |
| include/scsi/osd_ore.h | 1 + |
| 2 files changed, 26 insertions(+), 12 deletions(-) |
| |
| --- a/fs/exofs/ore.c |
| +++ b/fs/exofs/ore.c |
| @@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_com |
| |
| layout->max_io_length = |
| (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) * |
| - layout->group_width; |
| + (layout->group_width - layout->parity); |
| if (layout->parity) { |
| unsigned stripe_length = |
| (layout->group_width - layout->parity) * |
| @@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout |
| if (length) { |
| ore_calc_stripe_info(layout, offset, length, &ios->si); |
| ios->length = ios->si.length; |
| - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE; |
| + ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) + |
| + ios->length + PAGE_SIZE - 1) / PAGE_SIZE; |
| if (layout->parity) |
| _ore_post_alloc_raid_stuff(ios); |
| } |
| @@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_lay |
| u64 H = LmodS - G * T; |
| |
| u32 N = div_u64(H, U); |
| + u32 Nlast; |
| |
| /* "H - (N * U)" is just "H % U" so it's bound to u32 */ |
| u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width; |
| @@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_lay |
| si->length = T - H; |
| if (si->length > length) |
| si->length = length; |
| + |
| + Nlast = div_u64(H + si->length + U - 1, U); |
| + si->maxdevUnits = Nlast - N; |
| + |
| si->M = M; |
| } |
| EXPORT_SYMBOL(ore_calc_stripe_info); |
| @@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_s |
| int ret; |
| |
| if (per_dev->bio == NULL) { |
| - unsigned pages_in_stripe = ios->layout->group_width * |
| - (ios->layout->stripe_unit / PAGE_SIZE); |
| - unsigned nr_pages = ios->nr_pages * ios->layout->group_width / |
| - (ios->layout->group_width - |
| - ios->layout->parity); |
| - unsigned bio_size = (nr_pages + pages_in_stripe) / |
| - ios->layout->group_width; |
| + unsigned bio_size; |
| + |
| + if (!ios->reading) { |
| + bio_size = ios->si.maxdevUnits; |
| + } else { |
| + bio_size = (ios->si.maxdevUnits + 1) * |
| + (ios->layout->group_width - ios->layout->parity) / |
| + ios->layout->group_width; |
| + } |
| + bio_size *= (ios->layout->stripe_unit / PAGE_SIZE); |
| |
| per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); |
| if (unlikely(!per_dev->bio)) { |
| @@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_s |
| added_len = bio_add_pc_page(q, per_dev->bio, pages[pg], |
| pglen, pgbase); |
| if (unlikely(pglen != added_len)) { |
| - ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n", |
| - per_dev->bio->bi_vcnt); |
| + /* If bi_vcnt == bi_max then this is a SW BUG */ |
| + ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x " |
| + "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n", |
| + per_dev->bio->bi_vcnt, |
| + per_dev->bio->bi_max_vecs, |
| + BIO_MAX_PAGES_KMALLOC, cur_len); |
| ret = -ENOMEM; |
| goto out; |
| } |
| @@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layo |
| size_attr->attr = g_attr_logical_length; |
| size_attr->attr.val_ptr = &size_attr->newsize; |
| |
| - ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", |
| + ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", |
| _LLU(oc->comps->obj.id), _LLU(obj_size), i); |
| ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, |
| &size_attr->attr); |
| --- a/include/scsi/osd_ore.h |
| +++ b/include/scsi/osd_ore.h |
| @@ -102,6 +102,7 @@ struct ore_striping_info { |
| unsigned unit_off; |
| unsigned cur_pg; |
| unsigned cur_comp; |
| + unsigned maxdevUnits; |
| }; |
| |
| struct ore_io_state; |