| From 8aac0e2338916e273ccbd438a2b7a1e8c61749f5 Mon Sep 17 00:00:00 2001 |
| From: Dan Williams <dan.j.williams@intel.com> |
| Date: Wed, 17 Jul 2019 18:07:58 -0700 |
| Subject: libnvdimm/bus: Prevent duplicate device_unregister() calls |
| |
| From: Dan Williams <dan.j.williams@intel.com> |
| |
| commit 8aac0e2338916e273ccbd438a2b7a1e8c61749f5 upstream. |
| |
| A multithreaded namespace creation/destruction stress test currently |
| fails with signatures like the following: |
| |
| sysfs group 'power' not found for kobject 'dax1.1' |
| RIP: 0010:sysfs_remove_group+0x76/0x80 |
| Call Trace: |
| device_del+0x73/0x370 |
| device_unregister+0x16/0x50 |
| nd_async_device_unregister+0x1e/0x30 [libnvdimm] |
| async_run_entry_fn+0x39/0x160 |
| process_one_work+0x23c/0x5e0 |
| worker_thread+0x3c/0x390 |
| |
| BUG: kernel NULL pointer dereference, address: 0000000000000020 |
| RIP: 0010:klist_put+0x1b/0x6c |
| Call Trace: |
| klist_del+0xe/0x10 |
| device_del+0x8a/0x2c9 |
| ? __switch_to_asm+0x34/0x70 |
| ? __switch_to_asm+0x40/0x70 |
| device_unregister+0x44/0x4f |
| nd_async_device_unregister+0x22/0x2d [libnvdimm] |
| async_run_entry_fn+0x47/0x15a |
| process_one_work+0x1a2/0x2eb |
| worker_thread+0x1b8/0x26e |
| |
| Use the kill_device() helper to atomically resolve the race of multiple |
| threads issuing kill, device_unregister(), requests. |
| |
| Reported-by: Jane Chu <jane.chu@oracle.com> |
| Reported-by: Erwin Tsaur <erwin.tsaur@oracle.com> |
| Fixes: 4d88a97aa9e8 ("libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver...") |
| Cc: <stable@vger.kernel.org> |
| Link: https://github.com/pmem/ndctl/issues/96 |
| Tested-by: Tested-by: Jane Chu <jane.chu@oracle.com> |
| Link: https://lore.kernel.org/r/156341207846.292348.10435719262819764054.stgit@dwillia2-desk3.amr.corp.intel.com |
| Signed-off-by: Dan Williams <dan.j.williams@intel.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| drivers/nvdimm/bus.c | 25 +++++++++++++++++++++++++ |
| 1 file changed, 25 insertions(+) |
| |
| --- a/drivers/nvdimm/bus.c |
| +++ b/drivers/nvdimm/bus.c |
| @@ -547,13 +547,38 @@ EXPORT_SYMBOL(nd_device_register); |
| |
| void nd_device_unregister(struct device *dev, enum nd_async_mode mode) |
| { |
| + bool killed; |
| + |
| switch (mode) { |
| case ND_ASYNC: |
| + /* |
| + * In the async case this is being triggered with the |
| + * device lock held and the unregistration work needs to |
| + * be moved out of line iff this is thread has won the |
| + * race to schedule the deletion. |
| + */ |
| + if (!kill_device(dev)) |
| + return; |
| + |
| get_device(dev); |
| async_schedule_domain(nd_async_device_unregister, dev, |
| &nd_async_domain); |
| break; |
| case ND_SYNC: |
| + /* |
| + * In the sync case the device is being unregistered due |
| + * to a state change of the parent. Claim the kill state |
| + * to synchronize against other unregistration requests, |
| + * or otherwise let the async path handle it if the |
| + * unregistration was already queued. |
| + */ |
| + device_lock(dev); |
| + killed = kill_device(dev); |
| + device_unlock(dev); |
| + |
| + if (!killed) |
| + return; |
| + |
| nd_synchronize(); |
| device_unregister(dev); |
| break; |