| From 0254127ab977e70798707a7a2b757c9f3c971210 Mon Sep 17 00:00:00 2001 |
| From: Petr Pavlu <petr.pavlu@suse.com> |
| Date: Mon, 5 Dec 2022 11:35:57 +0100 |
| Subject: module: Don't wait for GOING modules |
| |
| From: Petr Pavlu <petr.pavlu@suse.com> |
| |
| commit 0254127ab977e70798707a7a2b757c9f3c971210 upstream. |
| |
| During a system boot, it can happen that the kernel receives a burst of |
| requests to insert the same module but loading it eventually fails |
| during its init call. For instance, udev can make a request to insert |
| a frequency module for each individual CPU when another frequency module |
| is already loaded which causes the init function of the new module to |
| return an error. |
| |
| Since commit 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for |
| modules that have finished loading"), the kernel waits for modules in |
| MODULE_STATE_GOING state to finish unloading before making another |
| attempt to load the same module. |
| |
| This creates unnecessary work in the described scenario and delays the |
| boot. In the worst case, it can prevent udev from loading drivers for |
| other devices and might cause timeouts of services waiting on them and |
| subsequently a failed boot. |
| |
| This patch attempts a different solution for the problem 6e6de3dee51a |
| was trying to solve. Rather than waiting for the unloading to complete, |
| it returns a different error code (-EBUSY) for modules in the GOING |
| state. This should avoid the error situation that was described in |
| 6e6de3dee51a (user space attempting to load a dependent module because |
| the -EEXIST error code would suggest to user space that the first module |
| had been loaded successfully), while avoiding the delay situation too. |
| |
| This has been tested on linux-next since December 2022 and passes |
| all kmod selftests except test 0009 with module compression enabled |
| but it has been confirmed that this issue has existed and has gone |
| unnoticed since prior to this commit and can also be reproduced without |
| module compression with a simple usleep(5000000) on tools/modprobe.c [0]. |
| These failures are caused by hitting the kernel mod_concurrent_max and can |
| happen either due to a self inflicted kernel module auto-loead DoS somehow |
| or on a system with large CPU count and each CPU count incorrectly triggering |
| many module auto-loads. Both of those issues need to be fixed in-kernel. |
| |
| [0] https://lore.kernel.org/all/Y9A4fiobL6IHp%2F%2FP@bombadil.infradead.org/ |
| |
| Fixes: 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading") |
| Co-developed-by: Martin Wilck <mwilck@suse.com> |
| Signed-off-by: Martin Wilck <mwilck@suse.com> |
| Signed-off-by: Petr Pavlu <petr.pavlu@suse.com> |
| Cc: stable@vger.kernel.org |
| Reviewed-by: Petr Mladek <pmladek@suse.com> |
| [mcgrof: enhance commit log with testing and kmod test result interpretation ] |
| Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| kernel/module.c | 26 +++++++++++++++++++++----- |
| 1 file changed, 21 insertions(+), 5 deletions(-) |
| |
| --- a/kernel/module.c |
| +++ b/kernel/module.c |
| @@ -3654,7 +3654,8 @@ static bool finished_loading(const char |
| sched_annotate_sleep(); |
| mutex_lock(&module_mutex); |
| mod = find_module_all(name, strlen(name), true); |
| - ret = !mod || mod->state == MODULE_STATE_LIVE; |
| + ret = !mod || mod->state == MODULE_STATE_LIVE |
| + || mod->state == MODULE_STATE_GOING; |
| mutex_unlock(&module_mutex); |
| |
| return ret; |
| @@ -3820,20 +3821,35 @@ static int add_unformed_module(struct mo |
| |
| mod->state = MODULE_STATE_UNFORMED; |
| |
| -again: |
| mutex_lock(&module_mutex); |
| old = find_module_all(mod->name, strlen(mod->name), true); |
| if (old != NULL) { |
| - if (old->state != MODULE_STATE_LIVE) { |
| + if (old->state == MODULE_STATE_COMING |
| + || old->state == MODULE_STATE_UNFORMED) { |
| /* Wait in case it fails to load. */ |
| mutex_unlock(&module_mutex); |
| err = wait_event_interruptible(module_wq, |
| finished_loading(mod->name)); |
| if (err) |
| goto out_unlocked; |
| - goto again; |
| + |
| + /* The module might have gone in the meantime. */ |
| + mutex_lock(&module_mutex); |
| + old = find_module_all(mod->name, strlen(mod->name), |
| + true); |
| } |
| - err = -EEXIST; |
| + |
| + /* |
| + * We are here only when the same module was being loaded. Do |
| + * not try to load it again right now. It prevents long delays |
| + * caused by serialized module load failures. It might happen |
| + * when more devices of the same type trigger load of |
| + * a particular module. |
| + */ |
| + if (old && old->state == MODULE_STATE_LIVE) |
| + err = -EEXIST; |
| + else |
| + err = -EBUSY; |
| goto out; |
| } |
| mod_update_bounds(mod); |