| From 0f2d5be792b0466b06797f637cfbb0f64dbb408c Mon Sep 17 00:00:00 2001 |
| From: Alex Elder <elder@linaro.org> |
| Date: Sat, 26 Apr 2014 14:21:44 +0400 |
| Subject: rbd: use reference counts for image requests |
| |
| From: Alex Elder <elder@linaro.org> |
| |
| commit 0f2d5be792b0466b06797f637cfbb0f64dbb408c upstream. |
| |
| Each image request contains a reference count, but to date it has |
| not actually been used. (I think this was just an oversight.) A |
| recent report involving rbd failing an assertion shed light on why |
| and where we need to use these reference counts. |
| |
| Every OSD request associated with an object request uses |
| rbd_osd_req_callback() as its callback function. That function will |
| call a helper function (dependent on the type of OSD request) that |
| will set the object request's "done" flag if the object request if |
| appropriate. If that "done" flag is set, the object request is |
| passed to rbd_obj_request_complete(). |
| |
| In rbd_obj_request_complete(), requests are processed in sequential |
| order. So if an object request completes before one of its |
| predecessors in the image request, the completion is deferred. |
| Otherwise, if it's a completing object's "turn" to be completed, it |
| is passed to rbd_img_obj_end_request(), which records the result of |
| the operation, accumulates transferred bytes, and so on. Next, the |
| successor to this request is checked and if it is marked "done", |
| (deferred) completion processing is performed on that request, and |
| so on. If the last object request in an image request is completed, |
| rbd_img_request_complete() is called, which (typically) destroys |
| the image request. |
| |
| There is a race here, however. The instant an object request is |
| marked "done" it can be provided (by a thread handling completion of |
| one of its predecessor operations) to rbd_img_obj_end_request(), |
| which (for the last request) can then lead to the image request |
| getting torn down. And this can happen *before* that object has |
| itself entered rbd_img_obj_end_request(). As a result, once it |
| *does* enter that function, the image request (and even the object |
| request itself) may have been freed and become invalid. |
| |
| All that's necessary to avoid this is to properly count references |
| to the image requests. We tear down an image request's object |
| requests all at once--only when the entire image request has |
| completed. So there's no need for an image request to count |
| references for its object requests. However, we don't want an |
| image request to go away until the last of its object requests |
| has passed through rbd_img_obj_callback(). In other words, |
| we don't want rbd_img_request_complete() to necessarily |
| result in the image request being destroyed, because it may |
| get called before we've finished processing on all of its |
| object requests. |
| |
| So the fix is to add a reference to an image request for |
| each of its object requests. The reference can be viewed |
| as representing an object request that has not yet finished |
| its call to rbd_img_obj_callback(). That is emphasized by |
| getting the reference right after assigning that as the image |
| object's callback function. The corresponding release of that |
| reference is done at the end of rbd_img_obj_callback(), which |
| every image object request passes through exactly once. |
| |
| Signed-off-by: Alex Elder <elder@linaro.org> |
| Reviewed-by: Ilya Dryomov <ilya.dryomov@inktank.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| drivers/block/rbd.c | 9 +++++++++ |
| 1 file changed, 9 insertions(+) |
| |
| --- a/drivers/block/rbd.c |
| +++ b/drivers/block/rbd.c |
| @@ -1382,6 +1382,13 @@ static void rbd_obj_request_put(struct r |
| kref_put(&obj_request->kref, rbd_obj_request_destroy); |
| } |
| |
| +static void rbd_img_request_get(struct rbd_img_request *img_request) |
| +{ |
| + dout("%s: img %p (was %d)\n", __func__, img_request, |
| + atomic_read(&img_request->kref.refcount)); |
| + kref_get(&img_request->kref); |
| +} |
| + |
| static bool img_request_child_test(struct rbd_img_request *img_request); |
| static void rbd_parent_request_destroy(struct kref *kref); |
| static void rbd_img_request_destroy(struct kref *kref); |
| @@ -2128,6 +2135,7 @@ static void rbd_img_obj_callback(struct |
| img_request->next_completion = which; |
| out: |
| spin_unlock_irq(&img_request->completion_lock); |
| + rbd_img_request_put(img_request); |
| |
| if (!more) |
| rbd_img_request_complete(img_request); |
| @@ -2225,6 +2233,7 @@ static int rbd_img_request_fill(struct r |
| goto out_partial; |
| obj_request->osd_req = osd_req; |
| obj_request->callback = rbd_img_obj_callback; |
| + rbd_img_request_get(img_request); |
| |
| osd_req_op_extent_init(osd_req, 0, opcode, offset, length, |
| 0, 0); |