| From aea948bb80b478ddc2448f7359d574387521a52d Mon Sep 17 00:00:00 2001 |
| From: Mahesh Salgaonkar <mahesh@linux.ibm.com> |
| Date: Tue, 6 Oct 2020 13:02:18 +0530 |
| Subject: powerpc/powernv/elog: Fix race while processing OPAL error log event. |
| |
| From: Mahesh Salgaonkar <mahesh@linux.ibm.com> |
| |
| commit aea948bb80b478ddc2448f7359d574387521a52d upstream. |
| |
| Every error log reported by OPAL is exported to userspace through a |
| sysfs interface and notified using kobject_uevent(). The userspace |
| daemon (opal_errd) then reads the error log and acknowledges the error |
| log is saved safely to disk. Once acknowledged the kernel removes the |
| respective sysfs file entry causing respective resources to be |
| released including kobject. |
| |
| However it's possible the userspace daemon may already be scanning |
| elog entries when a new sysfs elog entry is created by the kernel. |
| User daemon may read this new entry and ack it even before kernel can |
| notify userspace about it through kobject_uevent() call. If that |
| happens then we have a potential race between |
| elog_ack_store->kobject_put() and kobject_uevent which can lead to |
| use-after-free of a kernfs object resulting in a kernel crash. eg: |
| |
| BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6bfb |
| Faulting instruction address: 0xc0000000008ff2a0 |
| Oops: Kernel access of bad area, sig: 11 [#1] |
| LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV |
| CPU: 27 PID: 805 Comm: irq/29-opal-elo Not tainted 5.9.0-rc2-gcc-8.2.0-00214-g6f56a67bcbb5-dirty #363 |
| ... |
| NIP kobject_uevent_env+0xa0/0x910 |
| LR elog_event+0x1f4/0x2d0 |
| Call Trace: |
| 0x5deadbeef0000122 (unreliable) |
| elog_event+0x1f4/0x2d0 |
| irq_thread_fn+0x4c/0xc0 |
| irq_thread+0x1c0/0x2b0 |
| kthread+0x1c4/0x1d0 |
| ret_from_kernel_thread+0x5c/0x6c |
| |
| This patch fixes this race by protecting the sysfs file |
| creation/notification by holding a reference count on kobject until we |
| safely send kobject_uevent(). |
| |
| The function create_elog_obj() returns the elog object which if used |
| by caller function will end up in use-after-free problem again. |
| However, the return value of create_elog_obj() function isn't being |
| used today and there is no need as well. Hence change it to return |
| void to make this fix complete. |
| |
| Fixes: 774fea1a38c6 ("powerpc/powernv: Read OPAL error log and export it through sysfs") |
| Cc: stable@vger.kernel.org # v3.15+ |
| Reported-by: Oliver O'Halloran <oohall@gmail.com> |
| Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com> |
| Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> |
| Reviewed-by: Oliver O'Halloran <oohall@gmail.com> |
| Reviewed-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com> |
| [mpe: Rework the logic to use a single return, reword comments, add oops] |
| Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> |
| Link: https://lore.kernel.org/r/20201006122051.190176-1-mpe@ellerman.id.au |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| arch/powerpc/platforms/powernv/opal-elog.c | 33 ++++++++++++++++++++++------- |
| 1 file changed, 26 insertions(+), 7 deletions(-) |
| |
| --- a/arch/powerpc/platforms/powernv/opal-elog.c |
| +++ b/arch/powerpc/platforms/powernv/opal-elog.c |
| @@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file |
| return count; |
| } |
| |
| -static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) |
| +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) |
| { |
| struct elog_obj *elog; |
| int rc; |
| |
| elog = kzalloc(sizeof(*elog), GFP_KERNEL); |
| if (!elog) |
| - return NULL; |
| + return; |
| |
| elog->kobj.kset = elog_kset; |
| |
| @@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj( |
| rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); |
| if (rc) { |
| kobject_put(&elog->kobj); |
| - return NULL; |
| + return; |
| } |
| |
| + /* |
| + * As soon as the sysfs file for this elog is created/activated there is |
| + * a chance the opal_errd daemon (or any userspace) might read and |
| + * acknowledge the elog before kobject_uevent() is called. If that |
| + * happens then there is a potential race between |
| + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a |
| + * use-after-free of a kernfs object resulting in a kernel crash. |
| + * |
| + * To avoid that, we need to take a reference on behalf of the bin file, |
| + * so that our reference remains valid while we call kobject_uevent(). |
| + * We then drop our reference before exiting the function, leaving the |
| + * bin file to drop the last reference (if it hasn't already). |
| + */ |
| + |
| + /* Take a reference for the bin file */ |
| + kobject_get(&elog->kobj); |
| rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); |
| - if (rc) { |
| + if (rc == 0) { |
| + kobject_uevent(&elog->kobj, KOBJ_ADD); |
| + } else { |
| + /* Drop the reference taken for the bin file */ |
| kobject_put(&elog->kobj); |
| - return NULL; |
| } |
| |
| - kobject_uevent(&elog->kobj, KOBJ_ADD); |
| + /* Drop our reference */ |
| + kobject_put(&elog->kobj); |
| |
| - return elog; |
| + return; |
| } |
| |
| static irqreturn_t elog_event(int irq, void *data) |