| From: Marcelo Tosatti <mtosatti@redhat.com> |
| Date: Fri, 19 Feb 2016 09:46:39 +0100 |
| Subject: [PATCH 3/5] KVM: Use simple waitqueue for vcpu->wq |
| |
| The problem: |
| |
| On -rt, an emulated LAPIC timer instances has the following path: |
| |
| 1) hard interrupt |
| 2) ksoftirqd is scheduled |
| 3) ksoftirqd wakes up vcpu thread |
| 4) vcpu thread is scheduled |
| |
| This extra context switch introduces unnecessary latency in the |
| LAPIC path for a KVM guest. |
| |
| The solution: |
| |
| Allow waking up vcpu thread from hardirq context, |
| thus avoiding the need for ksoftirqd to be scheduled. |
| |
| Normal waitqueues make use of spinlocks, which on -RT |
| are sleepable locks. Therefore, waking up a waitqueue |
| waiter involves locking a sleeping lock, which |
| is not allowed from hard interrupt context. |
| |
| cyclictest command line: |
| |
| This patch reduces the average latency in my tests from 14us to 11us. |
| |
| Daniel writes: |
| Paolo asked for numbers from kvm-unit-tests/tscdeadline_latency |
| benchmark on mainline. The test was run 1000 times on |
| tip/sched/core 4.4.0-rc8-01134-g0905f04: |
| |
| ./x86-run x86/tscdeadline_latency.flat -cpu host |
| |
| with idle=poll. |
| |
| The test seems not to deliver really stable numbers though most of |
| them are smaller. Paolo write: |
| |
| "Anything above ~10000 cycles means that the host went to C1 or |
| lower---the number means more or less nothing in that case. |
| |
| The mean shows an improvement indeed." |
| |
| Before: |
| |
| min max mean std |
| count 1000.000000 1000.000000 1000.000000 1000.000000 |
| mean 5162.596000 2019270.084000 5824.491541 20681.645558 |
| std 75.431231 622607.723969 89.575700 6492.272062 |
| min 4466.000000 23928.000000 5537.926500 585.864966 |
| 25% 5163.000000 1613252.750000 5790.132275 16683.745433 |
| 50% 5175.000000 2281919.000000 5834.654000 23151.990026 |
| 75% 5190.000000 2382865.750000 5861.412950 24148.206168 |
| max 5228.000000 4175158.000000 6254.827300 46481.048691 |
| |
| After |
| min max mean std |
| count 1000.000000 1000.00000 1000.000000 1000.000000 |
| mean 5143.511000 2076886.10300 5813.312474 21207.357565 |
| std 77.668322 610413.09583 86.541500 6331.915127 |
| min 4427.000000 25103.00000 5529.756600 559.187707 |
| 25% 5148.000000 1691272.75000 5784.889825 17473.518244 |
| 50% 5160.000000 2308328.50000 5832.025000 23464.837068 |
| 75% 5172.000000 2393037.75000 5853.177675 24223.969976 |
| max 5222.000000 3922458.00000 6186.720500 42520.379830 |
| |
| [Patch was originaly based on the swait implementation found in the -rt |
| tree. Daniel ported it to mainline's version and gathered the |
| benchmark numbers for tscdeadline_latency test.] |
| |
| Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> |
| Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Cc: linux-rt-users@vger.kernel.org |
| Cc: Boqun Feng <boqun.feng@gmail.com> |
| Cc: Marcelo Tosatti <mtosatti@redhat.com> |
| Cc: Steven Rostedt <rostedt@goodmis.org> |
| Cc: Paul Gortmaker <paul.gortmaker@windriver.com> |
| Cc: Paolo Bonzini <pbonzini@redhat.com> |
| Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> |
| Link: http://lkml.kernel.org/r/1455871601-27484-4-git-send-email-wagi@monom.org |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| --- |
| arch/arm/kvm/arm.c | 8 ++++---- |
| arch/arm/kvm/psci.c | 4 ++-- |
| arch/mips/kvm/mips.c | 8 ++++---- |
| arch/powerpc/include/asm/kvm_host.h | 4 ++-- |
| arch/powerpc/kvm/book3s_hv.c | 23 +++++++++++------------ |
| arch/s390/include/asm/kvm_host.h | 2 +- |
| arch/s390/kvm/interrupt.c | 4 ++-- |
| arch/x86/kvm/lapic.c | 6 +++--- |
| include/linux/kvm_host.h | 5 +++-- |
| virt/kvm/async_pf.c | 4 ++-- |
| virt/kvm/kvm_main.c | 17 ++++++++--------- |
| 11 files changed, 42 insertions(+), 43 deletions(-) |
| |
| --- a/arch/arm/kvm/arm.c |
| +++ b/arch/arm/kvm/arm.c |
| @@ -498,18 +498,18 @@ static void kvm_arm_resume_guest(struct |
| struct kvm_vcpu *vcpu; |
| |
| kvm_for_each_vcpu(i, vcpu, kvm) { |
| - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); |
| + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); |
| |
| vcpu->arch.pause = false; |
| - wake_up_interruptible(wq); |
| + swake_up(wq); |
| } |
| } |
| |
| static void vcpu_sleep(struct kvm_vcpu *vcpu) |
| { |
| - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); |
| + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); |
| |
| - wait_event_interruptible(*wq, ((!vcpu->arch.power_off) && |
| + swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && |
| (!vcpu->arch.pause))); |
| } |
| |
| --- a/arch/arm/kvm/psci.c |
| +++ b/arch/arm/kvm/psci.c |
| @@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(st |
| { |
| struct kvm *kvm = source_vcpu->kvm; |
| struct kvm_vcpu *vcpu = NULL; |
| - wait_queue_head_t *wq; |
| + struct swait_queue_head *wq; |
| unsigned long cpu_id; |
| unsigned long context_id; |
| phys_addr_t target_pc; |
| @@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(st |
| smp_mb(); /* Make sure the above is visible */ |
| |
| wq = kvm_arch_vcpu_wq(vcpu); |
| - wake_up_interruptible(wq); |
| + swake_up(wq); |
| |
| return PSCI_RET_SUCCESS; |
| } |
| --- a/arch/mips/kvm/mips.c |
| +++ b/arch/mips/kvm/mips.c |
| @@ -445,8 +445,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_ |
| |
| dvcpu->arch.wait = 0; |
| |
| - if (waitqueue_active(&dvcpu->wq)) |
| - wake_up_interruptible(&dvcpu->wq); |
| + if (swait_active(&dvcpu->wq)) |
| + swake_up(&dvcpu->wq); |
| |
| return 0; |
| } |
| @@ -1174,8 +1174,8 @@ static void kvm_mips_comparecount_func(u |
| kvm_mips_callbacks->queue_timer_int(vcpu); |
| |
| vcpu->arch.wait = 0; |
| - if (waitqueue_active(&vcpu->wq)) |
| - wake_up_interruptible(&vcpu->wq); |
| + if (swait_active(&vcpu->wq)) |
| + swake_up(&vcpu->wq); |
| } |
| |
| /* low level hrtimer wake routine */ |
| --- a/arch/powerpc/include/asm/kvm_host.h |
| +++ b/arch/powerpc/include/asm/kvm_host.h |
| @@ -286,7 +286,7 @@ struct kvmppc_vcore { |
| struct list_head runnable_threads; |
| struct list_head preempt_list; |
| spinlock_t lock; |
| - wait_queue_head_t wq; |
| + struct swait_queue_head wq; |
| spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ |
| u64 stolen_tb; |
| u64 preempt_tb; |
| @@ -626,7 +626,7 @@ struct kvm_vcpu_arch { |
| u8 prodded; |
| u32 last_inst; |
| |
| - wait_queue_head_t *wqp; |
| + struct swait_queue_head *wqp; |
| struct kvmppc_vcore *vcore; |
| int ret; |
| int trap; |
| --- a/arch/powerpc/kvm/book3s_hv.c |
| +++ b/arch/powerpc/kvm/book3s_hv.c |
| @@ -114,11 +114,11 @@ static bool kvmppc_ipi_thread(int cpu) |
| static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) |
| { |
| int cpu; |
| - wait_queue_head_t *wqp; |
| + struct swait_queue_head *wqp; |
| |
| wqp = kvm_arch_vcpu_wq(vcpu); |
| - if (waitqueue_active(wqp)) { |
| - wake_up_interruptible(wqp); |
| + if (swait_active(wqp)) { |
| + swake_up(wqp); |
| ++vcpu->stat.halt_wakeup; |
| } |
| |
| @@ -707,8 +707,8 @@ int kvmppc_pseries_do_hcall(struct kvm_v |
| tvcpu->arch.prodded = 1; |
| smp_mb(); |
| if (vcpu->arch.ceded) { |
| - if (waitqueue_active(&vcpu->wq)) { |
| - wake_up_interruptible(&vcpu->wq); |
| + if (swait_active(&vcpu->wq)) { |
| + swake_up(&vcpu->wq); |
| vcpu->stat.halt_wakeup++; |
| } |
| } |
| @@ -1447,7 +1447,7 @@ static struct kvmppc_vcore *kvmppc_vcore |
| INIT_LIST_HEAD(&vcore->runnable_threads); |
| spin_lock_init(&vcore->lock); |
| spin_lock_init(&vcore->stoltb_lock); |
| - init_waitqueue_head(&vcore->wq); |
| + init_swait_queue_head(&vcore->wq); |
| vcore->preempt_tb = TB_NIL; |
| vcore->lpcr = kvm->arch.lpcr; |
| vcore->first_vcpuid = core * threads_per_subcore; |
| @@ -2519,10 +2519,9 @@ static void kvmppc_vcore_blocked(struct |
| { |
| struct kvm_vcpu *vcpu; |
| int do_sleep = 1; |
| + DECLARE_SWAITQUEUE(wait); |
| |
| - DEFINE_WAIT(wait); |
| - |
| - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); |
| + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); |
| |
| /* |
| * Check one last time for pending exceptions and ceded state after |
| @@ -2536,7 +2535,7 @@ static void kvmppc_vcore_blocked(struct |
| } |
| |
| if (!do_sleep) { |
| - finish_wait(&vc->wq, &wait); |
| + finish_swait(&vc->wq, &wait); |
| return; |
| } |
| |
| @@ -2544,7 +2543,7 @@ static void kvmppc_vcore_blocked(struct |
| trace_kvmppc_vcore_blocked(vc, 0); |
| spin_unlock(&vc->lock); |
| schedule(); |
| - finish_wait(&vc->wq, &wait); |
| + finish_swait(&vc->wq, &wait); |
| spin_lock(&vc->lock); |
| vc->vcore_state = VCORE_INACTIVE; |
| trace_kvmppc_vcore_blocked(vc, 1); |
| @@ -2600,7 +2599,7 @@ static int kvmppc_run_vcpu(struct kvm_ru |
| kvmppc_start_thread(vcpu, vc); |
| trace_kvm_guest_enter(vcpu); |
| } else if (vc->vcore_state == VCORE_SLEEPING) { |
| - wake_up(&vc->wq); |
| + swake_up(&vc->wq); |
| } |
| |
| } |
| --- a/arch/s390/include/asm/kvm_host.h |
| +++ b/arch/s390/include/asm/kvm_host.h |
| @@ -427,7 +427,7 @@ struct kvm_s390_irq_payload { |
| struct kvm_s390_local_interrupt { |
| spinlock_t lock; |
| struct kvm_s390_float_interrupt *float_int; |
| - wait_queue_head_t *wq; |
| + struct swait_queue_head *wq; |
| atomic_t *cpuflags; |
| DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); |
| struct kvm_s390_irq_payload irq; |
| --- a/arch/s390/kvm/interrupt.c |
| +++ b/arch/s390/kvm/interrupt.c |
| @@ -868,13 +868,13 @@ int kvm_s390_handle_wait(struct kvm_vcpu |
| |
| void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) |
| { |
| - if (waitqueue_active(&vcpu->wq)) { |
| + if (swait_active(&vcpu->wq)) { |
| /* |
| * The vcpu gave up the cpu voluntarily, mark it as a good |
| * yield-candidate. |
| */ |
| vcpu->preempted = true; |
| - wake_up_interruptible(&vcpu->wq); |
| + swake_up(&vcpu->wq); |
| vcpu->stat.halt_wakeup++; |
| } |
| } |
| --- a/arch/x86/kvm/lapic.c |
| +++ b/arch/x86/kvm/lapic.c |
| @@ -1195,7 +1195,7 @@ static void apic_update_lvtt(struct kvm_ |
| static void apic_timer_expired(struct kvm_lapic *apic) |
| { |
| struct kvm_vcpu *vcpu = apic->vcpu; |
| - wait_queue_head_t *q = &vcpu->wq; |
| + struct swait_queue_head *q = &vcpu->wq; |
| struct kvm_timer *ktimer = &apic->lapic_timer; |
| |
| if (atomic_read(&apic->lapic_timer.pending)) |
| @@ -1204,8 +1204,8 @@ static void apic_timer_expired(struct kv |
| atomic_inc(&apic->lapic_timer.pending); |
| kvm_set_pending_timer(vcpu); |
| |
| - if (waitqueue_active(q)) |
| - wake_up_interruptible(q); |
| + if (swait_active(q)) |
| + swake_up(q); |
| |
| if (apic_lvtt_tscdeadline(apic)) |
| ktimer->expired_tscdeadline = ktimer->tscdeadline; |
| --- a/include/linux/kvm_host.h |
| +++ b/include/linux/kvm_host.h |
| @@ -25,6 +25,7 @@ |
| #include <linux/irqflags.h> |
| #include <linux/context_tracking.h> |
| #include <linux/irqbypass.h> |
| +#include <linux/swait.h> |
| #include <asm/signal.h> |
| |
| #include <linux/kvm.h> |
| @@ -243,7 +244,7 @@ struct kvm_vcpu { |
| int fpu_active; |
| int guest_fpu_loaded, guest_xcr0_loaded; |
| unsigned char fpu_counter; |
| - wait_queue_head_t wq; |
| + struct swait_queue_head wq; |
| struct pid *pid; |
| int sigset_active; |
| sigset_t sigset; |
| @@ -794,7 +795,7 @@ static inline bool kvm_arch_has_assigned |
| } |
| #endif |
| |
| -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) |
| +static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) |
| { |
| #ifdef __KVM_HAVE_ARCH_WQP |
| return vcpu->arch.wqp; |
| --- a/virt/kvm/async_pf.c |
| +++ b/virt/kvm/async_pf.c |
| @@ -98,8 +98,8 @@ static void async_pf_execute(struct work |
| * This memory barrier pairs with prepare_to_wait's set_current_state() |
| */ |
| smp_mb(); |
| - if (waitqueue_active(&vcpu->wq)) |
| - wake_up_interruptible(&vcpu->wq); |
| + if (swait_active(&vcpu->wq)) |
| + swake_up(&vcpu->wq); |
| |
| mmput(mm); |
| kvm_put_kvm(vcpu->kvm); |
| --- a/virt/kvm/kvm_main.c |
| +++ b/virt/kvm/kvm_main.c |
| @@ -226,8 +226,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, |
| vcpu->kvm = kvm; |
| vcpu->vcpu_id = id; |
| vcpu->pid = NULL; |
| - vcpu->halt_poll_ns = 0; |
| - init_waitqueue_head(&vcpu->wq); |
| + init_swait_queue_head(&vcpu->wq); |
| kvm_async_pf_vcpu_init(vcpu); |
| |
| vcpu->pre_pcpu = -1; |
| @@ -2003,7 +2002,7 @@ static int kvm_vcpu_check_block(struct k |
| void kvm_vcpu_block(struct kvm_vcpu *vcpu) |
| { |
| ktime_t start, cur; |
| - DEFINE_WAIT(wait); |
| + DECLARE_SWAITQUEUE(wait); |
| bool waited = false; |
| u64 block_ns; |
| |
| @@ -2028,7 +2027,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp |
| kvm_arch_vcpu_blocking(vcpu); |
| |
| for (;;) { |
| - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
| + prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
| |
| if (kvm_vcpu_check_block(vcpu) < 0) |
| break; |
| @@ -2037,7 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp |
| schedule(); |
| } |
| |
| - finish_wait(&vcpu->wq, &wait); |
| + finish_swait(&vcpu->wq, &wait); |
| cur = ktime_get(); |
| |
| kvm_arch_vcpu_unblocking(vcpu); |
| @@ -2069,11 +2068,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu |
| { |
| int me; |
| int cpu = vcpu->cpu; |
| - wait_queue_head_t *wqp; |
| + struct swait_queue_head *wqp; |
| |
| wqp = kvm_arch_vcpu_wq(vcpu); |
| - if (waitqueue_active(wqp)) { |
| - wake_up_interruptible(wqp); |
| + if (swait_active(wqp)) { |
| + swake_up(wqp); |
| ++vcpu->stat.halt_wakeup; |
| } |
| |
| @@ -2174,7 +2173,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *m |
| continue; |
| if (vcpu == me) |
| continue; |
| - if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) |
| + if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) |
| continue; |
| if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) |
| continue; |