| From 9638556a276125553549fdfe349c464481ec2f39 Mon Sep 17 00:00:00 2001 |
| From: Ilya Dryomov <ilya.dryomov@inktank.com> |
| Date: Tue, 10 Jun 2014 13:53:29 +0400 |
| Subject: rbd: handle parent_overlap on writes correctly |
| |
| From: Ilya Dryomov <ilya.dryomov@inktank.com> |
| |
| commit 9638556a276125553549fdfe349c464481ec2f39 upstream. |
| |
| The following check in rbd_img_obj_request_submit() |
| |
| rbd_dev->parent_overlap <= obj_request->img_offset |
| |
| allows the fall through to the non-layered write case even if both |
| parent_overlap and obj_request->img_offset belong to the same RADOS |
| object. This leads to data corruption, because the area to the left of |
| parent_overlap ends up unconditionally zero-filled instead of being |
| populated with parent data. Suppose we want to write 1M to offset 6M |
| of image bar, which is a clone of foo@snap; object_size is 4M, |
| parent_overlap is 5M: |
| |
| rbd_data.<id>.0000000000000001 |
| ---------------------|----------------------|------------ |
| | should be copyup'ed | should be zeroed out | write ... |
| ---------------------|----------------------|------------ |
| 4M 5M 6M |
| parent_overlap obj_request->img_offset |
| |
| 4..5M should be copyup'ed from foo, yet it is zero-filled, just like |
| 5..6M is. |
| |
| Given that the only striping mode kernel client currently supports is |
| chunking (i.e. stripe_unit == object_size, stripe_count == 1), round |
| parent_overlap up to the next object boundary for the purposes of the |
| overlap check. |
| |
| Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com> |
| Reviewed-by: Josh Durgin <josh.durgin@inktank.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| drivers/block/rbd.c | 10 +++++++++- |
| 1 file changed, 9 insertions(+), 1 deletion(-) |
| |
| --- a/drivers/block/rbd.c |
| +++ b/drivers/block/rbd.c |
| @@ -1366,6 +1366,14 @@ static bool obj_request_exists_test(stru |
| return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0; |
| } |
| |
| +static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request) |
| +{ |
| + struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; |
| + |
| + return obj_request->img_offset < |
| + round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header)); |
| +} |
| + |
| static void rbd_obj_request_get(struct rbd_obj_request *obj_request) |
| { |
| dout("%s: obj %p (was %d)\n", __func__, obj_request, |
| @@ -2656,7 +2664,7 @@ static int rbd_img_obj_request_submit(st |
| */ |
| if (!img_request_write_test(img_request) || |
| !img_request_layered_test(img_request) || |
| - rbd_dev->parent_overlap <= obj_request->img_offset || |
| + !obj_request_overlaps_parent(obj_request) || |
| ((known = obj_request_known_test(obj_request)) && |
| obj_request_exists_test(obj_request))) { |
| |