[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 3/7] xen/x86: support per-domain flag for xpti



Instead of switching XPTI globally on or off add a per-domain flag for
that purpose. This allows to modify the xpti boot parameter to support
running dom0 without Meltdown mitigations. Using "xpti=nodom0" as boot
parameter will achieve that.

Move the xpti boot parameter handling to xen/arch/x86/pv/domain.c as
it is pv-domain specific.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
V3:
- latch get_cpu_info() return value in variable (Jan Beulich)
- call always xpti_domain_init() for pv dom0 (Jan Beulich)
- add __init annotations (Jan Beulich)
- drop per domain XPTI message (Jan Beulich)
- document xpti=default support (Jan Beulich)
- move domain xpti flag into a padding hole (Jan Beulich)
---
 docs/misc/xen-command-line.markdown | 10 ++++-
 xen/arch/x86/domctl.c               |  4 ++
 xen/arch/x86/mm.c                   | 10 ++++-
 xen/arch/x86/pv/dom0_build.c        |  3 ++
 xen/arch/x86/pv/domain.c            | 80 ++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/setup.c                | 20 +---------
 xen/arch/x86/smpboot.c              |  5 +--
 xen/arch/x86/x86_64/entry.S         |  2 +
 xen/include/asm-x86/current.h       |  3 +-
 xen/include/asm-x86/domain.h        |  3 ++
 xen/include/asm-x86/pv/domain.h     |  5 ++-
 11 files changed, 118 insertions(+), 27 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index b353352adf..79be9a6ba5 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1955,7 +1955,7 @@ clustered mode.  The default, given no hint from the 
**FADT**, is cluster
 mode.
 
 ### xpti
-> `= <boolean>`
+> `= default | nodom0 | <boolean>`
 
 > Default: `false` on AMD hardware
 > Default: `true` everywhere else
@@ -1963,6 +1963,14 @@ mode.
 Override default selection of whether to isolate 64-bit PV guest page
 tables.
 
+`true` activates page table isolation even on AMD hardware.
+
+`false` deactivates page table isolation on all systems.
+
+`default` sets the default behaviour.
+
+`nodom0` deactivates page table isolation for dom0.
+
 ### xsave
 > `= <boolean>`
 
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 8fbbf3aeb3..0704f398c7 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -24,6 +24,7 @@
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
 #include <asm/processor.h>
+#include <asm/pv/domain.h>
 #include <asm/acpi.h> /* for hvm_acpi_power_button */
 #include <xen/hypercall.h> /* for arch_do_domctl */
 #include <xsm/xsm.h>
@@ -610,6 +611,9 @@ long arch_do_domctl(
             ret = switch_compat(d);
         else
             ret = -EINVAL;
+
+        if ( ret == 0 )
+            xpti_domain_init(d);
         break;
 
     case XEN_DOMCTL_get_address_size:
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 8c944b33c9..e68ed474bf 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -507,14 +507,20 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
 
 void write_ptbase(struct vcpu *v)
 {
-    if ( this_cpu(root_pgt) && is_pv_vcpu(v) && !is_pv_32bit_vcpu(v) )
+    struct cpu_info *cpu_info = get_cpu_info();
+
+    if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti )
     {
-        get_cpu_info()->root_pgt_changed = true;
+        cpu_info->root_pgt_changed = true;
+        cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
         asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
     }
     else
     {
+        /* Make sure to clear xen_cr3 before pv_cr3; write_cr3() serializes. */
+        cpu_info->xen_cr3 = 0;
         write_cr3(v->arch.cr3);
+        cpu_info->pv_cr3 = 0;
     }
 }
 
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 0bd2f1bf90..77186c19bd 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -19,6 +19,7 @@
 #include <asm/dom0_build.h>
 #include <asm/guest.h>
 #include <asm/page.h>
+#include <asm/pv/domain.h>
 #include <asm/pv/mm.h>
 #include <asm/setup.h>
 
@@ -707,6 +708,8 @@ int __init dom0_construct_pv(struct domain *d,
             cpu = p->processor;
     }
 
+    xpti_domain_init(d);
+
     d->arch.paging.mode = 0;
 
     /* Set up CR3 value for write_ptbase */
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 2524326b74..266117e804 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -9,6 +9,8 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 
+#include <asm/cpufeature.h>
+#include <asm/msr-index.h>
 #include <asm/pv/domain.h>
 
 /* Override macros from asm/page.h to make them work with mfn_t */
@@ -17,6 +19,82 @@
 #undef page_to_mfn
 #define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
 
+static __read_mostly enum {
+    XPTI_DEFAULT,
+    XPTI_ON,
+    XPTI_OFF,
+    XPTI_NODOM0
+} opt_xpti = XPTI_DEFAULT;
+
+static __init int parse_xpti(const char *s)
+{
+    int rc = 0;
+
+    switch ( parse_bool(s, NULL) )
+    {
+    case 0:
+        opt_xpti = XPTI_OFF;
+        break;
+    case 1:
+        opt_xpti = XPTI_ON;
+        break;
+    default:
+        if ( !strcmp(s, "default") )
+            opt_xpti = XPTI_DEFAULT;
+        else if ( !strcmp(s, "nodom0") )
+            opt_xpti = XPTI_NODOM0;
+        else
+            rc = -EINVAL;
+        break;
+    }
+
+    return rc;
+}
+custom_param("xpti", parse_xpti);
+
+void __init xpti_init(void)
+{
+    uint64_t caps = 0;
+
+    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+        caps = ARCH_CAPABILITIES_RDCL_NO;
+    else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
+        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
+
+    if ( opt_xpti != XPTI_ON && (caps & ARCH_CAPABILITIES_RDCL_NO) )
+        opt_xpti = XPTI_OFF;
+    else if ( opt_xpti == XPTI_DEFAULT )
+        opt_xpti = XPTI_ON;
+
+    if ( opt_xpti == XPTI_OFF )
+        setup_force_cpu_cap(X86_FEATURE_NO_XPTI);
+    else
+        setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
+}
+
+void xpti_domain_init(struct domain *d)
+{
+    if ( !is_pv_domain(d) || is_pv_32bit_domain(d) )
+        return;
+
+    switch ( opt_xpti )
+    {
+    case XPTI_OFF:
+        d->arch.pv_domain.xpti = false;
+        break;
+    case XPTI_ON:
+        d->arch.pv_domain.xpti = true;
+        break;
+    case XPTI_NODOM0:
+        d->arch.pv_domain.xpti = d->domain_id != 0 &&
+                                 d->domain_id != hardware_domid;
+        break;
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+}
+
 static void noreturn continue_nonidle_domain(struct vcpu *v)
 {
     check_wakeup_from_wait();
@@ -264,7 +342,7 @@ void toggle_guest_mode(struct vcpu *v)
     }
     asm volatile ( "swapgs" );
 
-    _toggle_guest_pt(v, cpu_has_no_xpti);
+    _toggle_guest_pt(v, !v->domain->arch.pv_domain.xpti);
 }
 
 void toggle_guest_pt(struct vcpu *v)
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index d7aac724b0..801334b43f 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -53,6 +53,7 @@
 #include <asm/cpuid.h>
 #include <asm/spec_ctrl.h>
 #include <asm/guest.h>
+#include <asm/pv/domain.h>
 
 /* opt_nosmp: If true, secondary processors are ignored. */
 static bool __initdata opt_nosmp;
@@ -169,9 +170,6 @@ static int __init parse_smap_param(const char *s)
 }
 custom_param("smap", parse_smap_param);
 
-static int8_t __initdata opt_xpti = -1;
-boolean_param("xpti", opt_xpti);
-
 bool __read_mostly acpi_disabled;
 bool __initdata acpi_force;
 static char __initdata acpi_param[10] = "";
@@ -1546,21 +1544,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 
     cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
 
-    if ( opt_xpti < 0 )
-    {
-        uint64_t caps = 0;
-
-        if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
-            caps = ARCH_CAPABILITIES_RDCL_NO;
-        else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
-            rdmsrl(MSR_ARCH_CAPABILITIES, caps);
-
-        opt_xpti = !(caps & ARCH_CAPABILITIES_RDCL_NO);
-    }
-    if ( opt_xpti )
-        setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
-    else
-        setup_force_cpu_cap(X86_FEATURE_NO_XPTI);
+    xpti_init();
 
     if ( cpu_has_fsgsbase )
         set_in_cr4(X86_CR4_FSGSBASE);
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index b0b72ca544..07624da32b 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -331,7 +331,7 @@ void start_secondary(void *unused)
     spin_debug_disable();
 
     get_cpu_info()->xen_cr3 = 0;
-    get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0;
+    get_cpu_info()->pv_cr3 = 0;
 
     load_system_tables();
 
@@ -1050,8 +1050,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
         panic("Error %d setting up PV root page table\n", rc);
     if ( per_cpu(root_pgt, 0) )
     {
-        get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
-
+        get_cpu_info()->pv_cr3 = 0;
         /*
          * All entry points which may need to switch page tables have to start
          * with interrupts off. Re-write what pv_trap_init() has put there.
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 18b79be539..2a06cd1a51 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -130,6 +130,8 @@ restore_all_guest:
         mov   VCPU_cr3(%rbx), %r9
         GET_STACK_END(dx)
         mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rax
+        test  %rax, %rax
+        jz    .Lrag_cr3_end
         cmpb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
         je    .Lrag_copy_done
         movb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index f2491b4423..b2475783f8 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -44,7 +44,8 @@ struct cpu_info {
     /*
      * Of the two following fields the latter is being set to the CR3 value
      * to be used on the given pCPU for loading whenever 64-bit PV guest
-     * context is being entered. The value never changes once set.
+     * context is being entered. A value of zero indicates no setting of CR3
+     * is to be performed.
      * The former is the value to restore when re-entering Xen, if any. IOW
      * its value being zero means there's nothing to restore. However, its
      * value can also be negative, indicating to the exit-to-Xen code that
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 47aadc2600..b97a8792e9 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -253,6 +253,9 @@ struct pv_domain
 
     atomic_t nr_l4_pages;
 
+    /* XPTI active? */
+    bool xpti;
+
     /* map_domain_page() mapping cache. */
     struct mapcache_domain mapcache;
 
diff --git a/xen/include/asm-x86/pv/domain.h b/xen/include/asm-x86/pv/domain.h
index 5e34176939..2213a8fb3d 100644
--- a/xen/include/asm-x86/pv/domain.h
+++ b/xen/include/asm-x86/pv/domain.h
@@ -27,6 +27,8 @@ void pv_vcpu_destroy(struct vcpu *v);
 int pv_vcpu_initialise(struct vcpu *v);
 void pv_domain_destroy(struct domain *d);
 int pv_domain_initialise(struct domain *d);
+void xpti_init(void);
+void xpti_domain_init(struct domain *d);
 
 #else  /* !CONFIG_PV */
 
@@ -36,7 +38,8 @@ static inline void pv_vcpu_destroy(struct vcpu *v) {}
 static inline int pv_vcpu_initialise(struct vcpu *v) { return -EOPNOTSUPP; }
 static inline void pv_domain_destroy(struct domain *d) {}
 static inline int pv_domain_initialise(struct domain *d) { return -EOPNOTSUPP; 
}
-
+static inline void xpti_init(void) {}
+static inline void xpti_domain_init(struct domain *d) {}
 #endif /* CONFIG_PV */
 
 void paravirt_ctxt_switch_from(struct vcpu *v);
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.