| From 2995fa78e423d7193f3b57835f6c1c75006a0315 Mon Sep 17 00:00:00 2001 |
| From: Mikulas Patocka <mpatocka@redhat.com> |
| Date: Mon, 13 Jan 2014 19:37:54 -0500 |
| Subject: dm sysfs: fix a module unload race |
| |
| From: Mikulas Patocka <mpatocka@redhat.com> |
| |
| commit 2995fa78e423d7193f3b57835f6c1c75006a0315 upstream. |
| |
| This reverts commit be35f48610 ("dm: wait until embedded kobject is |
| released before destroying a device") and provides an improved fix. |
| |
| The kobject release code that calls the completion must be placed in a |
| non-module file, otherwise there is a module unload race (if the process |
| calling dm_kobject_release is preempted and the DM module unloaded after |
| the completion is triggered, but before dm_kobject_release returns). |
| |
| To fix this race, this patch moves the completion code to dm-builtin.c |
| which is always compiled directly into the kernel if BLK_DEV_DM is |
| selected. |
| |
| The patch introduces a new dm_kobject_holder structure, its purpose is |
| to keep the completion and kobject in one place, so that it can be |
| accessed from non-module code without the need to export the layout of |
| struct mapped_device to that code. |
| |
| Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> |
| Signed-off-by: Mike Snitzer <snitzer@redhat.com> |
| Cc: stable@vger.kernel.org |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| |
| --- |
| drivers/md/Kconfig | 4 ++++ |
| drivers/md/Makefile | 1 + |
| drivers/md/dm-builtin.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ |
| drivers/md/dm-sysfs.c | 5 ----- |
| drivers/md/dm.c | 26 +++++--------------------- |
| drivers/md/dm.h | 17 ++++++++++++++++- |
| 6 files changed, 74 insertions(+), 27 deletions(-) |
| |
| --- a/drivers/md/Kconfig |
| +++ b/drivers/md/Kconfig |
| @@ -176,8 +176,12 @@ config MD_FAULTY |
| |
| source "drivers/md/bcache/Kconfig" |
| |
| +config BLK_DEV_DM_BUILTIN |
| + boolean |
| + |
| config BLK_DEV_DM |
| tristate "Device mapper support" |
| + select BLK_DEV_DM_BUILTIN |
| ---help--- |
| Device-mapper is a low level volume manager. It works by allowing |
| people to specify mappings for ranges of logical sectors. Various |
| --- a/drivers/md/Makefile |
| +++ b/drivers/md/Makefile |
| @@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o |
| obj-$(CONFIG_BCACHE) += bcache/ |
| obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
| obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
| +obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o |
| obj-$(CONFIG_DM_BUFIO) += dm-bufio.o |
| obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o |
| obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
| --- /dev/null |
| +++ b/drivers/md/dm-builtin.c |
| @@ -0,0 +1,48 @@ |
| +#include "dm.h" |
| + |
| +/* |
| + * The kobject release method must not be placed in the module itself, |
| + * otherwise we are subject to module unload races. |
| + * |
| + * The release method is called when the last reference to the kobject is |
| + * dropped. It may be called by any other kernel code that drops the last |
| + * reference. |
| + * |
| + * The release method suffers from module unload race. We may prevent the |
| + * module from being unloaded at the start of the release method (using |
| + * increased module reference count or synchronizing against the release |
| + * method), however there is no way to prevent the module from being |
| + * unloaded at the end of the release method. |
| + * |
| + * If this code were placed in the dm module, the following race may |
| + * happen: |
| + * 1. Some other process takes a reference to dm kobject |
| + * 2. The user issues ioctl function to unload the dm device |
| + * 3. dm_sysfs_exit calls kobject_put, however the object is not released |
| + * because of the other reference taken at step 1 |
| + * 4. dm_sysfs_exit waits on the completion |
| + * 5. The other process that took the reference in step 1 drops it, |
| + * dm_kobject_release is called from this process |
| + * 6. dm_kobject_release calls complete() |
| + * 7. a reschedule happens before dm_kobject_release returns |
| + * 8. dm_sysfs_exit continues, the dm device is unloaded, module reference |
| + * count is decremented |
| + * 9. The user unloads the dm module |
| + * 10. The other process that was rescheduled in step 7 continues to run, |
| + * it is now executing code in unloaded module, so it crashes |
| + * |
| + * Note that if the process that takes the foreign reference to dm kobject |
| + * has a low priority and the system is sufficiently loaded with |
| + * higher-priority processes that prevent the low-priority process from |
| + * being scheduled long enough, this bug may really happen. |
| + * |
| + * In order to fix this module unload race, we place the release method |
| + * into a helper code that is compiled directly into the kernel. |
| + */ |
| + |
| +void dm_kobject_release(struct kobject *kobj) |
| +{ |
| + complete(dm_get_completion_from_kobject(kobj)); |
| +} |
| + |
| +EXPORT_SYMBOL(dm_kobject_release); |
| --- a/drivers/md/dm-sysfs.c |
| +++ b/drivers/md/dm-sysfs.c |
| @@ -79,11 +79,6 @@ static const struct sysfs_ops dm_sysfs_o |
| .show = dm_attr_show, |
| }; |
| |
| -static void dm_kobject_release(struct kobject *kobj) |
| -{ |
| - complete(dm_get_completion_from_kobject(kobj)); |
| -} |
| - |
| /* |
| * dm kobject is embedded in mapped_device structure |
| * no need to define release function here |
| --- a/drivers/md/dm.c |
| +++ b/drivers/md/dm.c |
| @@ -194,11 +194,8 @@ struct mapped_device { |
| /* forced geometry settings */ |
| struct hd_geometry geometry; |
| |
| - /* sysfs handle */ |
| - struct kobject kobj; |
| - |
| - /* wait until the kobject is released */ |
| - struct completion kobj_completion; |
| + /* kobject and completion */ |
| + struct dm_kobject_holder kobj_holder; |
| |
| /* zero-length flush that will be cloned and submitted to targets */ |
| struct bio flush_bio; |
| @@ -2008,7 +2005,7 @@ static struct mapped_device *alloc_dev(i |
| init_waitqueue_head(&md->wait); |
| INIT_WORK(&md->work, dm_wq_work); |
| init_waitqueue_head(&md->eventq); |
| - init_completion(&md->kobj_completion); |
| + init_completion(&md->kobj_holder.completion); |
| |
| md->disk->major = _major; |
| md->disk->first_minor = minor; |
| @@ -2870,20 +2867,14 @@ struct gendisk *dm_disk(struct mapped_de |
| |
| struct kobject *dm_kobject(struct mapped_device *md) |
| { |
| - return &md->kobj; |
| + return &md->kobj_holder.kobj; |
| } |
| |
| -/* |
| - * struct mapped_device should not be exported outside of dm.c |
| - * so use this check to verify that kobj is part of md structure |
| - */ |
| struct mapped_device *dm_get_from_kobject(struct kobject *kobj) |
| { |
| struct mapped_device *md; |
| |
| - md = container_of(kobj, struct mapped_device, kobj); |
| - if (&md->kobj != kobj) |
| - return NULL; |
| + md = container_of(kobj, struct mapped_device, kobj_holder.kobj); |
| |
| if (test_bit(DMF_FREEING, &md->flags) || |
| dm_deleting_md(md)) |
| @@ -2893,13 +2884,6 @@ struct mapped_device *dm_get_from_kobjec |
| return md; |
| } |
| |
| -struct completion *dm_get_completion_from_kobject(struct kobject *kobj) |
| -{ |
| - struct mapped_device *md = container_of(kobj, struct mapped_device, kobj); |
| - |
| - return &md->kobj_completion; |
| -} |
| - |
| int dm_suspended_md(struct mapped_device *md) |
| { |
| return test_bit(DMF_SUSPENDED, &md->flags); |
| --- a/drivers/md/dm.h |
| +++ b/drivers/md/dm.h |
| @@ -16,6 +16,7 @@ |
| #include <linux/blkdev.h> |
| #include <linux/hdreg.h> |
| #include <linux/completion.h> |
| +#include <linux/kobject.h> |
| |
| #include "dm-stats.h" |
| |
| @@ -139,11 +140,25 @@ void dm_interface_exit(void); |
| /* |
| * sysfs interface |
| */ |
| +struct dm_kobject_holder { |
| + struct kobject kobj; |
| + struct completion completion; |
| +}; |
| + |
| +static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) |
| +{ |
| + return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; |
| +} |
| + |
| int dm_sysfs_init(struct mapped_device *md); |
| void dm_sysfs_exit(struct mapped_device *md); |
| struct kobject *dm_kobject(struct mapped_device *md); |
| struct mapped_device *dm_get_from_kobject(struct kobject *kobj); |
| -struct completion *dm_get_completion_from_kobject(struct kobject *kobj); |
| + |
| +/* |
| + * The kobject helper |
| + */ |
| +void dm_kobject_release(struct kobject *kobj); |
| |
| /* |
| * Targets for linear and striped mappings |