| From d0f973aeefc48a4a335aa5b950355e458e9fc984 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Wed, 2 Mar 2022 08:40:32 -0800 |
| Subject: xen: delay xen_hvm_init_time_ops() if kdump is boot on vcpu>=32 |
| |
| From: Dongli Zhang <dongli.zhang@oracle.com> |
| |
| [ Upstream commit eed05744322da07dd7e419432dcedf3c2e017179 ] |
| |
| The sched_clock() can be used very early since commit 857baa87b642 |
| ("sched/clock: Enable sched clock early"). In addition, with commit |
| 38669ba205d1 ("x86/xen/time: Output xen sched_clock time from 0"), kdump |
| kernel in Xen HVM guest may panic at very early stage when accessing |
| &__this_cpu_read(xen_vcpu)->time as in below: |
| |
| setup_arch() |
| -> init_hypervisor_platform() |
| -> x86_init.hyper.init_platform = xen_hvm_guest_init() |
| -> xen_hvm_init_time_ops() |
| -> xen_clocksource_read() |
| -> src = &__this_cpu_read(xen_vcpu)->time; |
| |
| This is because Xen HVM supports at most MAX_VIRT_CPUS=32 'vcpu_info' |
| embedded inside 'shared_info' during early stage until xen_vcpu_setup() is |
| used to allocate/relocate 'vcpu_info' for boot cpu at arbitrary address. |
| |
| However, when Xen HVM guest panic on vcpu >= 32, since |
| xen_vcpu_info_reset(0) would set per_cpu(xen_vcpu, cpu) = NULL when |
| vcpu >= 32, xen_clocksource_read() on vcpu >= 32 would panic. |
| |
| This patch calls xen_hvm_init_time_ops() again later in |
| xen_hvm_smp_prepare_boot_cpu() after the 'vcpu_info' for boot vcpu is |
| registered when the boot vcpu is >= 32. |
| |
| This issue can be reproduced on purpose via below command at the guest |
| side when kdump/kexec is enabled: |
| |
| "taskset -c 33 echo c > /proc/sysrq-trigger" |
| |
| The bugfix for PVM is not implemented due to the lack of testing |
| environment. |
| |
| [boris: xen_hvm_init_time_ops() returns on errors instead of jumping to end] |
| |
| Cc: Joe Jin <joe.jin@oracle.com> |
| Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com> |
| Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> |
| Link: https://lore.kernel.org/r/20220302164032.14569-3-dongli.zhang@oracle.com |
| Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/x86/xen/smp_hvm.c | 6 ++++++ |
| arch/x86/xen/time.c | 24 +++++++++++++++++++++++- |
| 2 files changed, 29 insertions(+), 1 deletion(-) |
| |
| diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c |
| index 6ff3c887e0b9..b70afdff419c 100644 |
| --- a/arch/x86/xen/smp_hvm.c |
| +++ b/arch/x86/xen/smp_hvm.c |
| @@ -19,6 +19,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) |
| */ |
| xen_vcpu_setup(0); |
| |
| + /* |
| + * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS. |
| + * Refer to comments in xen_hvm_init_time_ops(). |
| + */ |
| + xen_hvm_init_time_ops(); |
| + |
| /* |
| * The alternative logic (which patches the unlock/lock) runs before |
| * the smp bootup up code is activated. Hence we need to set this up |
| diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c |
| index d9c945ee1100..9ef0a5cca96e 100644 |
| --- a/arch/x86/xen/time.c |
| +++ b/arch/x86/xen/time.c |
| @@ -558,6 +558,11 @@ static void xen_hvm_setup_cpu_clockevents(void) |
| |
| void __init xen_hvm_init_time_ops(void) |
| { |
| + static bool hvm_time_initialized; |
| + |
| + if (hvm_time_initialized) |
| + return; |
| + |
| /* |
| * vector callback is needed otherwise we cannot receive interrupts |
| * on cpu > 0 and at this point we don't know how many cpus are |
| @@ -567,7 +572,22 @@ void __init xen_hvm_init_time_ops(void) |
| return; |
| |
| if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { |
| - pr_info("Xen doesn't support pvclock on HVM, disable pv timer"); |
| + pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer"); |
| + return; |
| + } |
| + |
| + /* |
| + * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. |
| + * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest |
| + * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access |
| + * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. |
| + * |
| + * The xen_hvm_init_time_ops() should be called again later after |
| + * __this_cpu_read(xen_vcpu) is available. |
| + */ |
| + if (!__this_cpu_read(xen_vcpu)) { |
| + pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n", |
| + xen_vcpu_nr(0)); |
| return; |
| } |
| |
| @@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void) |
| x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; |
| |
| x86_platform.set_wallclock = xen_set_wallclock; |
| + |
| + hvm_time_initialized = true; |
| } |
| #endif |
| |
| -- |
| 2.35.1 |
| |