| From 8646e53633f314e4d746a988240d3b951a92f94a Mon Sep 17 00:00:00 2001 |
| From: Sean Christopherson <seanjc@google.com> |
| Date: Wed, 1 Sep 2021 13:30:26 -0700 |
| Subject: KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM guest |
| |
| From: Sean Christopherson <seanjc@google.com> |
| |
| commit 8646e53633f314e4d746a988240d3b951a92f94a upstream. |
| |
| Invoke rseq's NOTIFY_RESUME handler when processing the flag prior to |
| transferring to a KVM guest, which is roughly equivalent to an exit to |
| userspace and processes many of the same pending actions. While the task |
| cannot be in an rseq critical section as the KVM path is reachable only |
| by via ioctl(KVM_RUN), the side effects that apply to rseq outside of a |
| critical section still apply, e.g. the current CPU needs to be updated if |
| the task is migrated. |
| |
| Clearing TIF_NOTIFY_RESUME without informing rseq can lead to segfaults |
| and other badness in userspace VMMs that use rseq in combination with KVM, |
| e.g. due to the CPU ID being stale after task migration. |
| |
| Fixes: 72c3c0fe54a3 ("x86/kvm: Use generic xfer to guest work function") |
| Reported-by: Peter Foley <pefoley@google.com> |
| Bisected-by: Doug Evans <dje@google.com> |
| Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
| Cc: Shakeel Butt <shakeelb@google.com> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: stable@vger.kernel.org |
| Signed-off-by: Sean Christopherson <seanjc@google.com> |
| Message-Id: <20210901203030.1292304-2-seanjc@google.com> |
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
| [sean: Resolve benign conflict due to unrelated access_ok() check in 5.10] |
| Signed-off-by: Sean Christopherson <seanjc@google.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| kernel/entry/kvm.c | 4 +++- |
| kernel/rseq.c | 13 ++++++++++--- |
| 2 files changed, 13 insertions(+), 4 deletions(-) |
| |
| --- a/kernel/entry/kvm.c |
| +++ b/kernel/entry/kvm.c |
| @@ -16,8 +16,10 @@ static int xfer_to_guest_mode_work(struc |
| if (ti_work & _TIF_NEED_RESCHED) |
| schedule(); |
| |
| - if (ti_work & _TIF_NOTIFY_RESUME) |
| + if (ti_work & _TIF_NOTIFY_RESUME) { |
| tracehook_notify_resume(NULL); |
| + rseq_handle_notify_resume(NULL, NULL); |
| + } |
| |
| ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); |
| if (ret) |
| --- a/kernel/rseq.c |
| +++ b/kernel/rseq.c |
| @@ -268,9 +268,16 @@ void __rseq_handle_notify_resume(struct |
| return; |
| if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq)))) |
| goto error; |
| - ret = rseq_ip_fixup(regs); |
| - if (unlikely(ret < 0)) |
| - goto error; |
| + /* |
| + * regs is NULL if and only if the caller is in a syscall path. Skip |
| + * fixup and leave rseq_cs as is so that rseq_sycall() will detect and |
| + * kill a misbehaving userspace on debug kernels. |
| + */ |
| + if (regs) { |
| + ret = rseq_ip_fixup(regs); |
| + if (unlikely(ret < 0)) |
| + goto error; |
| + } |
| if (unlikely(rseq_update_cpu_id(t))) |
| goto error; |
| return; |