xen/pvh: bootup and setup (E820) related changes.
In the bootup code for PVH we can trap cpuid via vmexit, so don't
need to use emulated prefix call. We also check for vector callback
early on, as it is a required feature. PVH also runs at default kernel
IOPL.
In setup.c which deals with E820, in xen_add_extra_mem() we can skip
updating P2M as it's managed by Xen. PVH maps the entire IO space,
but only RAM pages need to be repopulated.
Finally, pure PV settings are moved to a separate function that are
only called for pure PV, ie, pv with pvmmu.
Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b679f86..bd8f718 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
#include <xen/hvm.h>
#include <xen/hvc-console.h>
#include <xen/acpi.h>
+#include <xen/features.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
@@ -107,6 +108,9 @@
__read_mostly int xen_have_vector_callback;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
+#define xen_pvh_domain() (xen_pv_domain() && \
+ xen_feature(XENFEAT_auto_translated_physmap) && \
+ xen_have_vector_callback)
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
@@ -219,8 +223,9 @@
struct xen_extraversion extra;
HYPERVISOR_xen_version(XENVER_extraversion, &extra);
- printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- pv_info.name);
+ pr_info("Booting paravirtualized kernel %son %s\n",
+ xen_feature(XENFEAT_auto_translated_physmap) ?
+ "with PVH extensions " : "", pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -273,12 +278,15 @@
break;
}
- asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*ax),
- "=b" (*bx),
- "=c" (*cx),
- "=d" (*dx)
- : "0" (*ax), "2" (*cx));
+ if (xen_pvh_domain())
+ native_cpuid(ax, bx, cx, dx);
+ else
+ asm(XEN_EMULATE_PREFIX "cpuid"
+ : "=a" (*ax),
+ "=b" (*bx),
+ "=c" (*cx),
+ "=d" (*dx)
+ : "0" (*ax), "2" (*cx));
*bx &= maskebx;
*cx &= maskecx;
@@ -1055,6 +1063,10 @@
HYPERVISOR_shared_info =
(struct shared_info *)__va(xen_start_info->shared_info);
+ /* PVH TBD/FIXME: vcpu info placement in phase 2 */
+ if (xen_pvh_domain())
+ return;
+
#ifndef CONFIG_SMP
/* In UP this is as good a place as any to set up shared info */
xen_setup_vcpu_info_placement();
@@ -1292,6 +1304,11 @@
*/
static void __init xen_setup_stackprotector(void)
{
+ /* PVH TBD/FIXME: investigate setup_stack_canary_segment */
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ switch_to_new_gdt(0);
+ return;
+ }
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;
@@ -1302,6 +1319,19 @@
pv_cpu_ops.load_gdt = xen_load_gdt;
}
+static void __init xen_pvh_early_guest_init(void)
+{
+ if (xen_feature(XENFEAT_hvm_callback_vector))
+ xen_have_vector_callback = 1;
+
+#ifdef CONFIG_X86_32
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ xen_raw_printk("ERROR: 32bit PVH guests are not supported\n");
+ BUG();
+ }
+#endif
+}
+
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
@@ -1313,13 +1343,18 @@
xen_domain_type = XEN_PV_DOMAIN;
+ xen_setup_features();
+ xen_pvh_early_guest_init();
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
- pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
+ if (xen_pvh_domain())
+ pv_cpu_ops.cpuid = xen_cpuid;
+ else
+ pv_cpu_ops = xen_cpu_ops;
x86_init.resources.memory_setup = xen_memory_setup;
x86_init.oem.arch_setup = xen_arch_setup;
@@ -1351,8 +1386,6 @@
/* Work out if we support NX */
x86_configure_nx();
- xen_setup_features();
-
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_build_dynamic_phys_to_machine();
@@ -1423,14 +1456,18 @@
/* set the limit of our address space */
xen_reserve_top();
- /* We used to do this in xen_arch_setup, but that is too late on AMD
- * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
- * which pokes 0xcf8 port.
- */
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- xen_raw_printk("physdev_op failed %d\n", rc);
+ /* PVH: runs at default kernel iopl of 0 */
+ if (!xen_pvh_domain()) {
+ /*
+ * We used to do this in xen_arch_setup, but that is too late
+ * on AMD were early_cpu_init (run before ->arch_setup()) calls
+ * early_amd_init which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
+ }
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
@@ -1497,6 +1534,8 @@
#endif
}
+/* Use a pfn in RAM, may move to MMIO before kexec.
+ * This function also called for PVH dom0 */
void __ref xen_hvm_init_shared_info(void)
{
int cpu;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8971a26..8cce47b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
+#include "mmu.h"
#include "xen-ops.h"
#include "vdso.h"
@@ -78,6 +79,9 @@
memblock_reserve(start, size);
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
xen_max_p2m_pfn = PFN_DOWN(start + size);
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
@@ -100,6 +104,7 @@
.domid = DOMID_SELF
};
unsigned long len = 0;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
unsigned long pfn;
int ret;
@@ -113,7 +118,7 @@
continue;
frame = mfn;
} else {
- if (mfn != INVALID_P2M_ENTRY)
+ if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
continue;
frame = pfn;
}
@@ -230,6 +235,27 @@
*identity += set_phys_range_identity(start_pfn, end_pfn);
}
+/* For PVH, the pfns [0..MAX] are mapped to mfn's in the EPT/NPT. The mfns
+ * are released as part of this 1:1 mapping hypercall back to the dom heap.
+ * Also, we map the entire IO space, ie, beyond max_pfn_mapped.
+ */
+static void __init xen_pvh_identity_map_chunk(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long *released,
+ unsigned long *identity, unsigned long max_pfn)
+{
+ unsigned long pfn;
+ int numpfns = 1, add_mapping = 1;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++)
+ xen_set_clr_mmio_pvh_pte(pfn, pfn, numpfns, add_mapping);
+
+ if (start_pfn <= max_pfn) {
+ unsigned long end = min(max_pfn_mapped, end_pfn);
+ *released += end - start_pfn;
+ }
+ *identity += end_pfn - start_pfn;
+}
+
static unsigned long __init xen_set_identity_and_release(
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
@@ -238,6 +264,7 @@
unsigned long identity = 0;
const struct e820entry *entry;
int i;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
/*
* Combine non-RAM regions and gaps until a RAM region (or the
@@ -259,11 +286,17 @@
if (entry->type == E820_RAM)
end_pfn = PFN_UP(entry->addr);
- if (start_pfn < end_pfn)
- xen_set_identity_and_release_chunk(
- start_pfn, end_pfn, nr_pages,
- &released, &identity);
-
+ if (start_pfn < end_pfn) {
+ if (xlated_phys) {
+ xen_pvh_identity_map_chunk(start_pfn,
+ end_pfn, &released, &identity,
+ nr_pages);
+ } else {
+ xen_set_identity_and_release_chunk(
+ start_pfn, end_pfn, nr_pages,
+ &released, &identity);
+ }
+ }
start = end;
}
}
@@ -526,16 +559,14 @@
#endif /* CONFIG_X86_64 */
}
-void __init xen_arch_setup(void)
+/* Non auto translated PV domain, ie, it's not PVH. */
+static __init void xen_pvmmu_arch_setup(void)
{
- xen_panic_handler_init();
-
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_pae_extended_cr3);
if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -543,6 +574,15 @@
xen_enable_sysenter();
xen_enable_syscall();
+}
+
+/* This function not called for HVM domain */
+void __init xen_arch_setup(void)
+{
+ xen_panic_handler_init();
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {