| From 039c4b541a8da823781f3b2a7acb29170aa90bfa Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Wed, 23 Feb 2022 12:58:21 +1100 |
| Subject: powerpc/code-patching: Pre-map patch area |
| |
| From: Michael Ellerman <mpe@ellerman.id.au> |
| |
| [ Upstream commit 591b4b268435f00d2f0b81f786c2c7bd5ef66416 ] |
| |
| Paul reported a warning with DEBUG_ATOMIC_SLEEP=y: |
| |
| BUG: sleeping function called from invalid context at include/linux/sched/mm.h:256 |
| in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0 |
| preempt_count: 0, expected: 0 |
| ... |
| Call Trace: |
| dump_stack_lvl+0xa0/0xec (unreliable) |
| __might_resched+0x2f4/0x310 |
| kmem_cache_alloc+0x220/0x4b0 |
| __pud_alloc+0x74/0x1d0 |
| hash__map_kernel_page+0x2cc/0x390 |
| do_patch_instruction+0x134/0x4a0 |
| arch_jump_label_transform+0x64/0x78 |
| __jump_label_update+0x148/0x180 |
| static_key_enable_cpuslocked+0xd0/0x120 |
| static_key_enable+0x30/0x50 |
| check_kvm_guest+0x60/0x88 |
| pSeries_smp_probe+0x54/0xb0 |
| smp_prepare_cpus+0x3e0/0x430 |
| kernel_init_freeable+0x20c/0x43c |
| kernel_init+0x30/0x1a0 |
| ret_from_kernel_thread+0x5c/0x64 |
| |
| Peter pointed out that this is because do_patch_instruction() has |
| disabled interrupts, but then map_patch_area() calls map_kernel_page() |
| then hash__map_kernel_page() which does a sleeping memory allocation. |
| |
| We only see the warning in KVM guests with SMT enabled, which is not |
| particularly common, or on other platforms if CONFIG_KPROBES is |
| disabled, also not common. The reason we don't see it in most |
| configurations is that another path that happens to have interrupts |
| enabled has allocated the required page tables for us, eg. there's a |
| path in kprobes init that does that. That's just pure luck though. |
| |
| As Christophe suggested, the simplest solution is to do a dummy |
| map/unmap when we initialise the patching, so that any required page |
| table levels are pre-allocated before the first call to |
| do_patch_instruction(). This works because the unmap doesn't free any |
| page tables that were allocated by the map, it just clears the PTE, |
| leaving the page table levels there for the next map. |
| |
| Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> |
| Debugged-by: Peter Zijlstra <peterz@infradead.org> |
| Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu> |
| Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> |
| Link: https://lore.kernel.org/r/20220223015821.473097-1-mpe@ellerman.id.au |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/powerpc/lib/code-patching.c | 14 ++++++++++++++ |
| 1 file changed, 14 insertions(+) |
| |
| diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c |
| index 906d43463366..00c68e7fb11e 100644 |
| --- a/arch/powerpc/lib/code-patching.c |
| +++ b/arch/powerpc/lib/code-patching.c |
| @@ -43,9 +43,14 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) |
| #ifdef CONFIG_STRICT_KERNEL_RWX |
| static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); |
| |
| +static int map_patch_area(void *addr, unsigned long text_poke_addr); |
| +static void unmap_patch_area(unsigned long addr); |
| + |
| static int text_area_cpu_up(unsigned int cpu) |
| { |
| struct vm_struct *area; |
| + unsigned long addr; |
| + int err; |
| |
| area = get_vm_area(PAGE_SIZE, VM_ALLOC); |
| if (!area) { |
| @@ -53,6 +58,15 @@ static int text_area_cpu_up(unsigned int cpu) |
| cpu); |
| return -1; |
| } |
| + |
| + // Map/unmap the area to ensure all page tables are pre-allocated |
| + addr = (unsigned long)area->addr; |
| + err = map_patch_area(empty_zero_page, addr); |
| + if (err) |
| + return err; |
| + |
| + unmap_patch_area(addr); |
| + |
| this_cpu_write(text_poke_area, area); |
| |
| return 0; |
| -- |
| 2.35.1 |
| |