| From f5df068417ff22948c1fe457a1cb008d21bffa94 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Tue, 11 May 2021 23:22:35 +0800 |
| Subject: blk-mq: clear stale request in tags->rq[] before freeing one request |
| pool |
| |
| From: Ming Lei <ming.lei@redhat.com> |
| |
| [ Upstream commit bd63141d585bef14f4caf111f6d0e27fe2300ec6 ] |
| |
| refcount_inc_not_zero() in bt_tags_iter() still may read one freed |
| request. |
| |
| Fix the issue by the following approach: |
| |
| 1) hold a per-tags spinlock when reading ->rqs[tag] and calling |
| refcount_inc_not_zero in bt_tags_iter() |
| |
| 2) clearing stale request referred via ->rqs[tag] before freeing |
| request pool, the per-tags spinlock is held for clearing stale |
| ->rq[tag] |
| |
| So after we cleared stale requests, bt_tags_iter() won't observe |
| freed request any more, also the clearing will wait for pending |
| request reference. |
| |
| The idea of clearing ->rqs[] is borrowed from John Garry's previous |
| patch and one recent David's patch. |
| |
| Tested-by: John Garry <john.garry@huawei.com> |
| Reviewed-by: David Jeffery <djeffery@redhat.com> |
| Reviewed-by: Bart Van Assche <bvanassche@acm.org> |
| Signed-off-by: Ming Lei <ming.lei@redhat.com> |
| Link: https://lore.kernel.org/r/20210511152236.763464-4-ming.lei@redhat.com |
| Signed-off-by: Jens Axboe <axboe@kernel.dk> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| block/blk-mq-tag.c | 9 +++++++-- |
| block/blk-mq-tag.h | 6 ++++++ |
| block/blk-mq.c | 46 +++++++++++++++++++++++++++++++++++++++++----- |
| 3 files changed, 54 insertions(+), 7 deletions(-) |
| |
| diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c |
| index 6772c3728865..c4f2f6c123ae 100644 |
| --- a/block/blk-mq-tag.c |
| +++ b/block/blk-mq-tag.c |
| @@ -202,10 +202,14 @@ struct bt_iter_data { |
| static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, |
| unsigned int bitnr) |
| { |
| - struct request *rq = tags->rqs[bitnr]; |
| + struct request *rq; |
| + unsigned long flags; |
| |
| + spin_lock_irqsave(&tags->lock, flags); |
| + rq = tags->rqs[bitnr]; |
| if (!rq || !refcount_inc_not_zero(&rq->ref)) |
| - return NULL; |
| + rq = NULL; |
| + spin_unlock_irqrestore(&tags->lock, flags); |
| return rq; |
| } |
| |
| @@ -538,6 +542,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, |
| |
| tags->nr_tags = total_tags; |
| tags->nr_reserved_tags = reserved_tags; |
| + spin_lock_init(&tags->lock); |
| |
| if (flags & BLK_MQ_F_TAG_HCTX_SHARED) |
| return tags; |
| diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h |
| index 7d3e6b333a4a..f887988e5ef6 100644 |
| --- a/block/blk-mq-tag.h |
| +++ b/block/blk-mq-tag.h |
| @@ -20,6 +20,12 @@ struct blk_mq_tags { |
| struct request **rqs; |
| struct request **static_rqs; |
| struct list_head page_list; |
| + |
| + /* |
| + * used to clear request reference in rqs[] before freeing one |
| + * request pool |
| + */ |
| + spinlock_t lock; |
| }; |
| |
| extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, |
| diff --git a/block/blk-mq.c b/block/blk-mq.c |
| index 50d3527a5d97..00d6ed2fe812 100644 |
| --- a/block/blk-mq.c |
| +++ b/block/blk-mq.c |
| @@ -2276,6 +2276,45 @@ queue_exit: |
| return BLK_QC_T_NONE; |
| } |
| |
| +static size_t order_to_size(unsigned int order) |
| +{ |
| + return (size_t)PAGE_SIZE << order; |
| +} |
| + |
| +/* called before freeing request pool in @tags */ |
| +static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set, |
| + struct blk_mq_tags *tags, unsigned int hctx_idx) |
| +{ |
| + struct blk_mq_tags *drv_tags = set->tags[hctx_idx]; |
| + struct page *page; |
| + unsigned long flags; |
| + |
| + list_for_each_entry(page, &tags->page_list, lru) { |
| + unsigned long start = (unsigned long)page_address(page); |
| + unsigned long end = start + order_to_size(page->private); |
| + int i; |
| + |
| + for (i = 0; i < set->queue_depth; i++) { |
| + struct request *rq = drv_tags->rqs[i]; |
| + unsigned long rq_addr = (unsigned long)rq; |
| + |
| + if (rq_addr >= start && rq_addr < end) { |
| + WARN_ON_ONCE(refcount_read(&rq->ref) != 0); |
| + cmpxchg(&drv_tags->rqs[i], rq, NULL); |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * Wait until all pending iteration is done. |
| + * |
| + * Request reference is cleared and it is guaranteed to be observed |
| + * after the ->lock is released. |
| + */ |
| + spin_lock_irqsave(&drv_tags->lock, flags); |
| + spin_unlock_irqrestore(&drv_tags->lock, flags); |
| +} |
| + |
| void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, |
| unsigned int hctx_idx) |
| { |
| @@ -2294,6 +2333,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, |
| } |
| } |
| |
| + blk_mq_clear_rq_mapping(set, tags, hctx_idx); |
| + |
| while (!list_empty(&tags->page_list)) { |
| page = list_first_entry(&tags->page_list, struct page, lru); |
| list_del_init(&page->lru); |
| @@ -2353,11 +2394,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, |
| return tags; |
| } |
| |
| -static size_t order_to_size(unsigned int order) |
| -{ |
| - return (size_t)PAGE_SIZE << order; |
| -} |
| - |
| static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, |
| unsigned int hctx_idx, int node) |
| { |
| -- |
| 2.30.2 |
| |