| From b6aa57c69cb26ea0160c51f7cf45f1af23542686 Mon Sep 17 00:00:00 2001 |
| From: Sean Christopherson <sean.j.christopherson@intel.com> |
| Date: Wed, 17 Apr 2019 10:15:34 -0700 |
| Subject: KVM: lapic: Convert guest TSC to host time domain if necessary |
| |
| From: Sean Christopherson <sean.j.christopherson@intel.com> |
| |
| commit b6aa57c69cb26ea0160c51f7cf45f1af23542686 upstream. |
| |
| To minimize the latency of timer interrupts as observed by the guest, |
| KVM adjusts the values it programs into the host timers to account for |
| the host's overhead of programming and handling the timer event. In |
| the event that the adjustments are too aggressive, i.e. the timer fires |
| earlier than the guest expects, KVM busy waits immediately prior to |
| entering the guest. |
| |
| Currently, KVM manually converts the delay from nanoseconds to clock |
| cycles. But, the conversion is done in the guest's time domain, while |
| the delay occurs in the host's time domain. This is perfectly ok when |
| the guest and host are using the same TSC ratio, but if the guest is |
| using a different ratio then the delay may not be accurate and could |
| wait too little or too long. |
| |
| When the guest is not using the host's ratio, convert the delay from |
| guest clock cycles to host nanoseconds and use ndelay() instead of |
| __delay() to provide more accurate timing. Because converting to |
| nanoseconds is relatively expensive, e.g. requires division and more |
| multiplication ops, continue using __delay() directly when guest and |
| host TSCs are running at the same ratio. |
| |
| Cc: Liran Alon <liran.alon@oracle.com> |
| Cc: Wanpeng Li <wanpengli@tencent.com> |
| Cc: stable@vger.kernel.org |
| Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically") |
| Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> |
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| arch/x86/kvm/lapic.c | 24 +++++++++++++++++++++--- |
| 1 file changed, 21 insertions(+), 3 deletions(-) |
| |
| --- a/arch/x86/kvm/lapic.c |
| +++ b/arch/x86/kvm/lapic.c |
| @@ -1478,6 +1478,26 @@ static bool lapic_timer_int_injected(str |
| return false; |
| } |
| |
| +static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) |
| +{ |
| + u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; |
| + |
| + /* |
| + * If the guest TSC is running at a different ratio than the host, then |
| + * convert the delay to nanoseconds to achieve an accurate delay. Note |
| + * that __delay() uses delay_tsc whenever the hardware has TSC, thus |
| + * always for VMX enabled hardware. |
| + */ |
| + if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { |
| + __delay(min(guest_cycles, |
| + nsec_to_cycles(vcpu, timer_advance_ns))); |
| + } else { |
| + u64 delay_ns = guest_cycles * 1000000ULL; |
| + do_div(delay_ns, vcpu->arch.virtual_tsc_khz); |
| + ndelay(min_t(u32, delay_ns, timer_advance_ns)); |
| + } |
| +} |
| + |
| void wait_lapic_expire(struct kvm_vcpu *vcpu) |
| { |
| struct kvm_lapic *apic = vcpu->arch.apic; |
| @@ -1498,10 +1518,8 @@ void wait_lapic_expire(struct kvm_vcpu * |
| guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); |
| trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); |
| |
| - /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ |
| if (guest_tsc < tsc_deadline) |
| - __delay(min(tsc_deadline - guest_tsc, |
| - nsec_to_cycles(vcpu, timer_advance_ns))); |
| + __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); |
| |
| if (!apic->lapic_timer.timer_advance_adjust_done) { |
| /* too early */ |