| From ba4fea74b6435053c660dfe1a1e2d1dfb84d0745 Mon Sep 17 00:00:00 2001 |
| From: Parav Pandit <parav@mellanox.com> |
| Date: Thu, 12 Dec 2019 13:30:22 +0200 |
| Subject: [PATCH] IB/core: Let IB core distribute cache update events |
| |
| commit 6b57cea9221b0247ad5111b348522625e489a8e4 upstream. |
| |
| Currently when the low level driver notifies Pkey, GID, and port change |
| events they are notified to the registered handlers in the order they are |
| registered. |
| |
| IB core and other ULPs such as IPoIB are interested in GID, LID, Pkey |
| change events. |
| |
| Since all GID queries done by ULPs are serviced by IB core, and the IB |
| core deferes cache updates to a work queue, it is possible for other |
| clients to see stale cache data when they handle their own events. |
| |
| For example, the below call tree shows how ipoib will call |
| rdma_query_gid() concurrently with the update to the cache sitting in the |
| WQ. |
| |
| mlx5_ib_handle_event() |
| ib_dispatch_event() |
| ib_cache_event() |
| queue_work() -> slow cache update |
| |
| [..] |
| ipoib_event() |
| queue_work() |
| [..] |
| work handler |
| ipoib_ib_dev_flush_light() |
| __ipoib_ib_dev_flush() |
| ipoib_dev_addr_changed_valid() |
| rdma_query_gid() <- Returns old GID, cache not updated. |
| |
| Move all the event dispatch to a work queue so that the cache update is |
| always done before any clients are notified. |
| |
| Fixes: f35faa4ba956 ("IB/core: Simplify ib_query_gid to always refer to cache") |
| Link: https://lore.kernel.org/r/20191212113024.336702-3-leon@kernel.org |
| Signed-off-by: Parav Pandit <parav@mellanox.com> |
| Signed-off-by: Leon Romanovsky <leonro@mellanox.com> |
| Reviewed-by: Jason Gunthorpe <jgg@mellanox.com> |
| Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c |
| index 18e476b3ced0..c9ee829ce386 100644 |
| --- a/drivers/infiniband/core/cache.c |
| +++ b/drivers/infiniband/core/cache.c |
| @@ -51,9 +51,8 @@ struct ib_pkey_cache { |
| |
| struct ib_update_work { |
| struct work_struct work; |
| - struct ib_device *device; |
| - u8 port_num; |
| - bool enforce_security; |
| + struct ib_event event; |
| + bool enforce_security; |
| }; |
| |
| union ib_gid zgid; |
| @@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) |
| event.element.port_num = port; |
| event.event = IB_EVENT_GID_CHANGE; |
| |
| - ib_dispatch_event(&event); |
| + ib_dispatch_event_clients(&event); |
| } |
| |
| static const char * const gid_type_str[] = { |
| @@ -1386,9 +1385,8 @@ static int config_non_roce_gid_cache(struct ib_device *device, |
| return ret; |
| } |
| |
| -static void ib_cache_update(struct ib_device *device, |
| - u8 port, |
| - bool enforce_security) |
| +static int |
| +ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) |
| { |
| struct ib_port_attr *tprops = NULL; |
| struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; |
| @@ -1396,11 +1394,11 @@ static void ib_cache_update(struct ib_device *device, |
| int ret; |
| |
| if (!rdma_is_port_valid(device, port)) |
| - return; |
| + return -EINVAL; |
| |
| tprops = kmalloc(sizeof *tprops, GFP_KERNEL); |
| if (!tprops) |
| - return; |
| + return -ENOMEM; |
| |
| ret = ib_query_port(device, port, tprops); |
| if (ret) { |
| @@ -1418,8 +1416,10 @@ static void ib_cache_update(struct ib_device *device, |
| pkey_cache = kmalloc(struct_size(pkey_cache, table, |
| tprops->pkey_tbl_len), |
| GFP_KERNEL); |
| - if (!pkey_cache) |
| + if (!pkey_cache) { |
| + ret = -ENOMEM; |
| goto err; |
| + } |
| |
| pkey_cache->table_len = tprops->pkey_tbl_len; |
| |
| @@ -1451,50 +1451,84 @@ static void ib_cache_update(struct ib_device *device, |
| |
| kfree(old_pkey_cache); |
| kfree(tprops); |
| - return; |
| + return 0; |
| |
| err: |
| kfree(pkey_cache); |
| kfree(tprops); |
| + return ret; |
| +} |
| + |
| +static void ib_cache_event_task(struct work_struct *_work) |
| +{ |
| + struct ib_update_work *work = |
| + container_of(_work, struct ib_update_work, work); |
| + int ret; |
| + |
| + /* Before distributing the cache update event, first sync |
| + * the cache. |
| + */ |
| + ret = ib_cache_update(work->event.device, work->event.element.port_num, |
| + work->enforce_security); |
| + |
| + /* GID event is notified already for individual GID entries by |
| + * dispatch_gid_change_event(). Hence, notifiy for rest of the |
| + * events. |
| + */ |
| + if (!ret && work->event.event != IB_EVENT_GID_CHANGE) |
| + ib_dispatch_event_clients(&work->event); |
| + |
| + kfree(work); |
| } |
| |
| -static void ib_cache_task(struct work_struct *_work) |
| +static void ib_generic_event_task(struct work_struct *_work) |
| { |
| struct ib_update_work *work = |
| container_of(_work, struct ib_update_work, work); |
| |
| - ib_cache_update(work->device, |
| - work->port_num, |
| - work->enforce_security); |
| + ib_dispatch_event_clients(&work->event); |
| kfree(work); |
| } |
| |
| -static void ib_cache_event(struct ib_event_handler *handler, |
| - struct ib_event *event) |
| +static bool is_cache_update_event(const struct ib_event *event) |
| +{ |
| + return (event->event == IB_EVENT_PORT_ERR || |
| + event->event == IB_EVENT_PORT_ACTIVE || |
| + event->event == IB_EVENT_LID_CHANGE || |
| + event->event == IB_EVENT_PKEY_CHANGE || |
| + event->event == IB_EVENT_CLIENT_REREGISTER || |
| + event->event == IB_EVENT_GID_CHANGE); |
| +} |
| + |
| +/** |
| + * ib_dispatch_event - Dispatch an asynchronous event |
| + * @event:Event to dispatch |
| + * |
| + * Low-level drivers must call ib_dispatch_event() to dispatch the |
| + * event to all registered event handlers when an asynchronous event |
| + * occurs. |
| + */ |
| +void ib_dispatch_event(const struct ib_event *event) |
| { |
| struct ib_update_work *work; |
| |
| - if (event->event == IB_EVENT_PORT_ERR || |
| - event->event == IB_EVENT_PORT_ACTIVE || |
| - event->event == IB_EVENT_LID_CHANGE || |
| - event->event == IB_EVENT_PKEY_CHANGE || |
| - event->event == IB_EVENT_CLIENT_REREGISTER || |
| - event->event == IB_EVENT_GID_CHANGE) { |
| - work = kmalloc(sizeof *work, GFP_ATOMIC); |
| - if (work) { |
| - INIT_WORK(&work->work, ib_cache_task); |
| - work->device = event->device; |
| - work->port_num = event->element.port_num; |
| - if (event->event == IB_EVENT_PKEY_CHANGE || |
| - event->event == IB_EVENT_GID_CHANGE) |
| - work->enforce_security = true; |
| - else |
| - work->enforce_security = false; |
| - |
| - queue_work(ib_wq, &work->work); |
| - } |
| - } |
| + work = kzalloc(sizeof(*work), GFP_ATOMIC); |
| + if (!work) |
| + return; |
| + |
| + if (is_cache_update_event(event)) |
| + INIT_WORK(&work->work, ib_cache_event_task); |
| + else |
| + INIT_WORK(&work->work, ib_generic_event_task); |
| + |
| + work->event = *event; |
| + if (event->event == IB_EVENT_PKEY_CHANGE || |
| + event->event == IB_EVENT_GID_CHANGE) |
| + work->enforce_security = true; |
| + |
| + queue_work(ib_wq, &work->work); |
| } |
| +EXPORT_SYMBOL(ib_dispatch_event); |
| |
| int ib_cache_setup_one(struct ib_device *device) |
| { |
| @@ -1510,9 +1544,6 @@ int ib_cache_setup_one(struct ib_device *device) |
| rdma_for_each_port (device, p) |
| ib_cache_update(device, p, true); |
| |
| - INIT_IB_EVENT_HANDLER(&device->cache.event_handler, |
| - device, ib_cache_event); |
| - ib_register_event_handler(&device->cache.event_handler); |
| return 0; |
| } |
| |
| @@ -1534,14 +1565,12 @@ void ib_cache_release_one(struct ib_device *device) |
| |
| void ib_cache_cleanup_one(struct ib_device *device) |
| { |
| - /* The cleanup function unregisters the event handler, |
| - * waits for all in-progress workqueue elements and cleans |
| - * up the GID cache. This function should be called after |
| - * the device was removed from the devices list and all |
| - * clients were removed, so the cache exists but is |
| + /* The cleanup function waits for all in-progress workqueue |
| + * elements and cleans up the GID cache. This function should be |
| + * called after the device was removed from the devices list and |
| + * all clients were removed, so the cache exists but is |
| * non-functional and shouldn't be updated anymore. |
| */ |
| - ib_unregister_event_handler(&device->cache.event_handler); |
| flush_workqueue(ib_wq); |
| gid_table_cleanup_one(device); |
| |
| diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h |
| index ff9e0d7fb4f3..b0227f955b96 100644 |
| --- a/drivers/infiniband/core/core_priv.h |
| +++ b/drivers/infiniband/core/core_priv.h |
| @@ -119,6 +119,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); |
| int ib_cache_setup_one(struct ib_device *device); |
| void ib_cache_cleanup_one(struct ib_device *device); |
| void ib_cache_release_one(struct ib_device *device); |
| +void ib_dispatch_event_clients(struct ib_event *event); |
| |
| #ifdef CONFIG_CGROUP_RDMA |
| void ib_device_register_rdmacg(struct ib_device *device); |
| diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c |
| index 862cda8d4f46..0423c64de820 100644 |
| --- a/drivers/infiniband/core/device.c |
| +++ b/drivers/infiniband/core/device.c |
| @@ -589,6 +589,7 @@ struct ib_device *_ib_alloc_device(size_t size) |
| |
| INIT_LIST_HEAD(&device->event_handler_list); |
| spin_lock_init(&device->event_handler_lock); |
| + init_rwsem(&device->event_handler_rwsem); |
| mutex_init(&device->unregistration_lock); |
| /* |
| * client_data needs to be alloc because we don't want our mark to be |
| @@ -1820,17 +1821,15 @@ EXPORT_SYMBOL(ib_set_client_data); |
| * |
| * ib_register_event_handler() registers an event handler that will be |
| * called back when asynchronous IB events occur (as defined in |
| - * chapter 11 of the InfiniBand Architecture Specification). This |
| - * callback may occur in interrupt context. |
| + * chapter 11 of the InfiniBand Architecture Specification). This |
| + * callback occurs in workqueue context. |
| */ |
| void ib_register_event_handler(struct ib_event_handler *event_handler) |
| { |
| - unsigned long flags; |
| - |
| - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); |
| + down_write(&event_handler->device->event_handler_rwsem); |
| list_add_tail(&event_handler->list, |
| &event_handler->device->event_handler_list); |
| - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); |
| + up_write(&event_handler->device->event_handler_rwsem); |
| } |
| EXPORT_SYMBOL(ib_register_event_handler); |
| |
| @@ -1843,35 +1842,23 @@ EXPORT_SYMBOL(ib_register_event_handler); |
| */ |
| void ib_unregister_event_handler(struct ib_event_handler *event_handler) |
| { |
| - unsigned long flags; |
| - |
| - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); |
| + down_write(&event_handler->device->event_handler_rwsem); |
| list_del(&event_handler->list); |
| - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); |
| + up_write(&event_handler->device->event_handler_rwsem); |
| } |
| EXPORT_SYMBOL(ib_unregister_event_handler); |
| |
| -/** |
| - * ib_dispatch_event - Dispatch an asynchronous event |
| - * @event:Event to dispatch |
| - * |
| - * Low-level drivers must call ib_dispatch_event() to dispatch the |
| - * event to all registered event handlers when an asynchronous event |
| - * occurs. |
| - */ |
| -void ib_dispatch_event(struct ib_event *event) |
| +void ib_dispatch_event_clients(struct ib_event *event) |
| { |
| - unsigned long flags; |
| struct ib_event_handler *handler; |
| |
| - spin_lock_irqsave(&event->device->event_handler_lock, flags); |
| + down_read(&event->device->event_handler_rwsem); |
| |
| list_for_each_entry(handler, &event->device->event_handler_list, list) |
| handler->handler(handler, event); |
| |
| - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); |
| + up_read(&event->device->event_handler_rwsem); |
| } |
| -EXPORT_SYMBOL(ib_dispatch_event); |
| |
| /** |
| * ib_query_port - Query IB port attributes |
| diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h |
| index 07390b56dcc4..4a76ec877603 100644 |
| --- a/include/rdma/ib_verbs.h |
| +++ b/include/rdma/ib_verbs.h |
| @@ -2220,7 +2220,6 @@ struct ib_port_cache { |
| |
| struct ib_cache { |
| rwlock_t lock; |
| - struct ib_event_handler event_handler; |
| }; |
| |
| struct ib_port_immutable { |
| @@ -2622,7 +2621,11 @@ struct ib_device { |
| struct rcu_head rcu_head; |
| |
| struct list_head event_handler_list; |
| - spinlock_t event_handler_lock; |
| + /* Protects event_handler_list */ |
| + struct rw_semaphore event_handler_rwsem; |
| + |
| + /* Protects QP's event_handler calls and open_qp list */ |
| + spinlock_t event_handler_lock; |
| |
| struct rw_semaphore client_data_rwsem; |
| struct xarray client_data; |
| @@ -2927,7 +2930,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, |
| |
| void ib_register_event_handler(struct ib_event_handler *event_handler); |
| void ib_unregister_event_handler(struct ib_event_handler *event_handler); |
| -void ib_dispatch_event(struct ib_event *event); |
| +void ib_dispatch_event(const struct ib_event *event); |
| |
| int ib_query_port(struct ib_device *device, |
| u8 port_num, struct ib_port_attr *port_attr); |
| -- |
| 2.7.4 |
| |