| From e6e1920c0b490f1e9c3552a151ab2cd95dea2a8e Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Fri, 26 Jun 2020 14:49:10 -0300 |
| Subject: RDMA/core: Fix bogus WARN_ON during ib_unregister_device_queued() |
| |
| From: Jason Gunthorpe <jgg@nvidia.com> |
| |
| [ Upstream commit 0cb42c0265837fafa2b4f302c8a7fed2631d7869 ] |
| |
| ib_unregister_device_queued() can only be used by drivers using the new |
| dealloc_device callback flow, and it has a safety WARN_ON to ensure |
| drivers are using it properly. |
| |
| However, if unregister and register are raced there is a special |
| destruction path that maintains the uniform error handling semantic of |
| 'caller does ib_dealloc_device() on failure'. This requires disabling the |
| dealloc_device callback which triggers the WARN_ON. |
| |
| Instead of using NULL to disable the callback use a special function |
| pointer so the WARN_ON does not trigger. |
| |
| Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration") |
| Link: https://lore.kernel.org/r/0-v1-a36d512e0a99+762-syz_dealloc_driver_jgg@nvidia.com |
| Reported-by: syzbot+4088ed905e4ae2b0e13b@syzkaller.appspotmail.com |
| Suggested-by: Hillf Danton <hdanton@sina.com> |
| Reviewed-by: Leon Romanovsky <leonro@mellanox.com> |
| Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| drivers/infiniband/core/device.c | 11 ++++++++--- |
| 1 file changed, 8 insertions(+), 3 deletions(-) |
| |
| diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c |
| index d0b3d35ad3e43..0fe3c3eb3dfd1 100644 |
| --- a/drivers/infiniband/core/device.c |
| +++ b/drivers/infiniband/core/device.c |
| @@ -1327,6 +1327,10 @@ static int enable_device_and_get(struct ib_device *device) |
| return ret; |
| } |
| |
| +static void prevent_dealloc_device(struct ib_device *ib_dev) |
| +{ |
| +} |
| + |
| /** |
| * ib_register_device - Register an IB device with IB core |
| * @device: Device to register |
| @@ -1396,11 +1400,11 @@ int ib_register_device(struct ib_device *device, const char *name) |
| * possibility for a parallel unregistration along with this |
| * error flow. Since we have a refcount here we know any |
| * parallel flow is stopped in disable_device and will see the |
| - * NULL pointers, causing the responsibility to |
| + * special dealloc_driver pointer, causing the responsibility to |
| * ib_dealloc_device() to revert back to this thread. |
| */ |
| dealloc_fn = device->ops.dealloc_driver; |
| - device->ops.dealloc_driver = NULL; |
| + device->ops.dealloc_driver = prevent_dealloc_device; |
| ib_device_put(device); |
| __ib_unregister_device(device); |
| device->ops.dealloc_driver = dealloc_fn; |
| @@ -1448,7 +1452,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev) |
| * Drivers using the new flow may not call ib_dealloc_device except |
| * in error unwind prior to registration success. |
| */ |
| - if (ib_dev->ops.dealloc_driver) { |
| + if (ib_dev->ops.dealloc_driver && |
| + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { |
| WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); |
| ib_dealloc_device(ib_dev); |
| } |
| -- |
| 2.25.1 |
| |