Merge tag 'stable/for-linus-4.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen bugfixes from David Vrabel:
"Xen regression and bug fixes for 4.0-rc1
- Fix two regressions introduced in 4.0-rc1 affecting PV/PVH guests
in certain configurations.
- Prevent pvscsi frontends bypassing backend checks.
- Allow privcmd hypercalls to be preempted even on kernel with
voluntary preemption. This fixes soft-lockups with long running
toolstack hypercalls (e.g., when creating/destroying large
domains)"
* tag 'stable/for-linus-4.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
x86/xen: Initialize cr4 shadow for 64-bit PV(H) guests
xen-scsiback: mark pvscsi frontend request consumed only after last read
x86/xen: allow privcmd hypercalls to be preempted
x86/xen: Make sure X2APIC_ENABLE bit of MSR_IA32_APICBASE is not set
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 000d419..31e2d5b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -982,6 +982,9 @@
ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_evtchn_do_upcall
+#ifndef CONFIG_PREEMPT
+ call xen_maybe_preempt_hcall
+#endif
jmp ret_from_intr
CFI_ENDPROC
ENDPROC(xen_hypervisor_callback)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index db13655..10074ad 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1208,6 +1208,9 @@
popq %rsp
CFI_DEF_CFA_REGISTER rsp
decl PER_CPU_VAR(irq_count)
+#ifndef CONFIG_PREEMPT
+ call xen_maybe_preempt_hcall
+#endif
jmp error_exit
CFI_ENDPROC
END(xen_do_hypervisor_callback)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bd8b845..5240f56 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1070,6 +1070,23 @@
BUG_ON(val);
}
#endif
+
+static u64 xen_read_msr_safe(unsigned int msr, int *err)
+{
+ u64 val;
+
+ val = native_read_msr_safe(msr, err);
+ switch (msr) {
+ case MSR_IA32_APICBASE:
+#ifdef CONFIG_X86_X2APIC
+ if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31))))
+#endif
+ val &= ~X2APIC_ENABLE;
+ break;
+ }
+ return val;
+}
+
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
@@ -1240,7 +1257,7 @@
.wbinvd = native_wbinvd,
- .read_msr = native_read_msr_safe,
+ .read_msr = xen_read_msr_safe,
.write_msr = xen_write_msr_safe,
.read_tsc = native_read_tsc,
@@ -1741,6 +1758,7 @@
#ifdef CONFIG_X86_32
i386_start_kernel();
#else
+ cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
#endif
}
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 2140398..2ccd359 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
obj-$(CONFIG_X86) += fallback.o
-obj-y += grant-table.o features.o balloon.o manage.o
+obj-y += grant-table.o features.o balloon.o manage.o preempt.o
obj-y += events/
obj-y += xenbus/
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
new file mode 100644
index 0000000..a1800c1
--- /dev/null
+++ b/drivers/xen/preempt.c
@@ -0,0 +1,44 @@
+/*
+ * Preemptible hypercalls
+ *
+ * Copyright (C) 2014 Citrix Systems R&D ltd.
+ *
+ * This source code is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <xen/xen-ops.h>
+
+#ifndef CONFIG_PREEMPT
+
+/*
+ * Some hypercalls issued by the toolstack can take many 10s of
+ * seconds. Allow tasks running hypercalls via the privcmd driver to
+ * be voluntarily preempted even if full kernel preemption is
+ * disabled.
+ *
+ * Such preemptible hypercalls are bracketed by
+ * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
+ * calls.
+ */
+
+DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
+EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+
+asmlinkage __visible void xen_maybe_preempt_hcall(void)
+{
+ if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
+ && should_resched())) {
+ /*
+ * Clear flag as we may be rescheduled on a different
+ * cpu.
+ */
+ __this_cpu_write(xen_in_preemptible_hcall, false);
+ _cond_resched();
+ __this_cpu_write(xen_in_preemptible_hcall, true);
+ }
+}
+#endif /* CONFIG_PREEMPT */
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 569a13b..59ac71c 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -56,10 +56,12 @@
if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
return -EFAULT;
+ xen_preemptible_hcall_begin();
ret = privcmd_call(hypercall.op,
hypercall.arg[0], hypercall.arg[1],
hypercall.arg[2], hypercall.arg[3],
hypercall.arg[4]);
+ xen_preemptible_hcall_end();
return ret;
}
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 61653a0..9faca6a 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -709,12 +709,11 @@
static int scsiback_do_cmd_fn(struct vscsibk_info *info)
{
struct vscsiif_back_ring *ring = &info->ring;
- struct vscsiif_request *ring_req;
+ struct vscsiif_request ring_req;
struct vscsibk_pend *pending_req;
RING_IDX rc, rp;
int err, more_to_do;
uint32_t result;
- uint8_t act;
rc = ring->req_cons;
rp = ring->sring->req_prod;
@@ -735,11 +734,10 @@
if (!pending_req)
return 1;
- ring_req = RING_GET_REQUEST(ring, rc);
+ ring_req = *RING_GET_REQUEST(ring, rc);
ring->req_cons = ++rc;
- act = ring_req->act;
- err = prepare_pending_reqs(info, ring_req, pending_req);
+ err = prepare_pending_reqs(info, &ring_req, pending_req);
if (err) {
switch (err) {
case -ENODEV:
@@ -755,9 +753,9 @@
return 1;
}
- switch (act) {
+ switch (ring_req.act) {
case VSCSIIF_ACT_SCSI_CDB:
- if (scsiback_gnttab_data_map(ring_req, pending_req)) {
+ if (scsiback_gnttab_data_map(&ring_req, pending_req)) {
scsiback_fast_flush_area(pending_req);
scsiback_do_resp_with_sense(NULL,
DRIVER_ERROR << 24, 0, pending_req);
@@ -768,7 +766,7 @@
break;
case VSCSIIF_ACT_SCSI_ABORT:
scsiback_device_action(pending_req, TMR_ABORT_TASK,
- ring_req->ref_rqid);
+ ring_req.ref_rqid);
break;
case VSCSIIF_ACT_SCSI_RESET:
scsiback_device_action(pending_req, TMR_LUN_RESET, 0);
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 7491ee5..8333821 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -46,4 +46,30 @@
}
#endif
+#ifdef CONFIG_PREEMPT
+
+static inline void xen_preemptible_hcall_begin(void)
+{
+}
+
+static inline void xen_preemptible_hcall_end(void)
+{
+}
+
+#else
+
+DECLARE_PER_CPU(bool, xen_in_preemptible_hcall);
+
+static inline void xen_preemptible_hcall_begin(void)
+{
+ __this_cpu_write(xen_in_preemptible_hcall, true);
+}
+
+static inline void xen_preemptible_hcall_end(void)
+{
+ __this_cpu_write(xen_in_preemptible_hcall, false);
+}
+
+#endif /* CONFIG_PREEMPT */
+
#endif /* INCLUDE_XEN_OPS_H */