[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 6/6] xen/x86: use PCID feature for XPTI



Avoid flushing the complete TLB when switching %cr3 for mitigation of
Meltdown by using the PCID feature if available.

We are using 4 PCID values for a 64 bit pv domain subject to XPTI:

- hypervisor active and guest in kernel mode
- guest active and in kernel mode
- hypervisor active and guest in user mode
- guest active and in user mode

The 2 hypervisor cases could possibly be merged, but for security
reasons this is left for another patch.

Add a pcid flag to struct pv_domain to make it possible using PCID
without XPTI later.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
 xen/arch/x86/cpu/mtrr/generic.c |  5 +++
 xen/arch/x86/domain_page.c      |  2 +-
 xen/arch/x86/flushtlb.c         | 74 +++++++++++++++++++++++------------------
 xen/arch/x86/mm.c               | 12 ++++++-
 xen/arch/x86/pv/domain.c        |  4 +++
 xen/arch/x86/setup.c            |  3 ++
 xen/include/asm-x86/domain.h    | 34 +++++++++++++------
 xen/include/asm-x86/x86-defns.h |  1 +
 8 files changed, 90 insertions(+), 45 deletions(-)

diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c
index d705138100..84b9cd78df 100644
--- a/xen/arch/x86/cpu/mtrr/generic.c
+++ b/xen/arch/x86/cpu/mtrr/generic.c
@@ -5,6 +5,7 @@
 #include <xen/mm.h>
 #include <xen/stdbool.h>
 #include <asm/flushtlb.h>
+#include <asm/invpcid.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
@@ -417,6 +418,8 @@ static bool prepare_set(void)
        cr4 = read_cr4();
        if (cr4 & X86_CR4_PGE)
                write_cr4(cr4 & ~X86_CR4_PGE);
+       else if ( cpu_has_invpcid )
+               invpcid_flush_all();
        else
                asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
 
@@ -440,6 +443,8 @@ static void post_set(bool pge)
        /*  Reenable CR4.PGE (also flushes the TLB) */
        if (pge)
                write_cr4(read_cr4() | X86_CR4_PGE);
+       else if ( cpu_has_invpcid )
+               invpcid_flush_all();
        else
                asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
 
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index 3432a854dd..e4b7f74f34 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -51,7 +51,7 @@ static inline struct vcpu *mapcache_current_vcpu(void)
         if ( (v = idle_vcpu[smp_processor_id()]) == current )
             sync_local_execstate();
         /* We must now be running on the idle page table. */
-        ASSERT(read_cr3() == __pa(idle_pg_table));
+        ASSERT((read_cr3() & ~X86_CR3_PCIDMASK) == __pa(idle_pg_table));
     }
 
     return v;
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index 186d9099f6..a65fad00ed 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -75,39 +75,46 @@ static void post_flush(u32 t)
 static void do_flush_tlb(unsigned long cr3)
 {
     unsigned long cr4;
+    u32 t;
+
+    t = pre_flush();
 
     cr4 = read_cr4();
-    if ( cr4 & X86_CR4_PGE )
+
+    if ( cpu_has_invpcid )
     {
-        write_cr4(cr4 & ~X86_CR4_PGE);
         if ( cr3 )
             asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
-        else
-            barrier();
-        write_cr4(cr4);
+        if ( !cr3 || (cr3 & X86_CR3_NOFLUSH) || (cr4 & X86_CR4_PGE) )
+            invpcid_flush_all();
     }
     else
     {
-        if ( !cr3 )
+        /* PCID not possible here, as invpcid is required for PCID. */
+        if ( cr4 & X86_CR4_PGE )
+            write_cr4(cr4 & ~X86_CR4_PGE);
+        else if ( !cr3 )
             cr3 = read_cr3();
-        asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+        if ( cr3 )
+            asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+        else
+            barrier();
+        if ( cr4 & X86_CR4_PGE )
+            write_cr4(cr4);
     }
+
+    post_flush(t);
 }
 
 void write_cr3(unsigned long cr3)
 {
     unsigned long flags;
-    u32 t;
 
     /* This non-reentrant function is sometimes called in interrupt context. */
     local_irq_save(flags);
 
-    t = pre_flush();
-
     do_flush_tlb(cr3);
 
-    post_flush(t);
-
     local_irq_restore(flags);
 }
 
@@ -128,30 +135,33 @@ unsigned int flush_area_local(const void *va, unsigned 
int flags)
     {
         if ( order == 0 )
         {
-            /*
-             * We don't INVLPG multi-page regions because the 2M/4M/1G
-             * region may not have been mapped with a superpage. Also there
-             * are various errata surrounding INVLPG usage on superpages, and
-             * a full flush is in any case not *that* expensive.
-             */
-            asm volatile ( "invlpg %0"
-                           : : "m" (*(const char *)(va)) : "memory" );
-        }
-        else
-        {
-            u32 t = pre_flush();
+            if ( read_cr3() & X86_CR3_PCIDMASK )
+            {
+                unsigned long addr = (unsigned long)va;
 
-            if ( !cpu_has_invpcid )
-                do_flush_tlb(0);
+                /*
+                 * Flush the addresses for all potential address spaces.
+                 */
+                invpcid_flush_one(PCID_PV_PRIV, addr);
+                invpcid_flush_one(PCID_PV_USER, addr);
+                invpcid_flush_one(PCID_PV_PRIV | PCID_PV_XEN, addr);
+                invpcid_flush_one(PCID_PV_USER | PCID_PV_XEN, addr);
+            }
             else
+            {
                 /*
-                 * Using invpcid to flush all mappings works
-                 * regardless of whether PCID is enabled or not.
-                 * It is faster than read-modify-write CR4.
+                 * We don't INVLPG multi-page regions because the 2M/4M/1G
+                 * region may not have been mapped with a superpage. Also there
+                 * are various errata surrounding INVLPG usage on superpages,
+                 * and a full flush is in any case not *that* expensive.
                  */
-                invpcid_flush_all();
-
-            post_flush(t);
+                asm volatile ( "invlpg %0"
+                               : : "m" (*(const char *)(va)) : "memory" );
+            }
+        }
+        else
+        {
+            do_flush_tlb(0);
         }
     }
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 2d8366a01c..82fbbe0a10 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -506,6 +506,8 @@ void free_shared_domheap_page(struct page_info *page)
 void make_cr3(struct vcpu *v, mfn_t mfn)
 {
     v->arch.cr3 = mfn_x(mfn) << PAGE_SHIFT;
+    if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.pcid )
+        v->arch.cr3 |= X86_CR3_NOFLUSH | get_pv_pcid(v, 1);
 }
 
 void write_ptbase(struct vcpu *v)
@@ -514,7 +516,15 @@ void write_ptbase(struct vcpu *v)
     {
         get_cpu_info()->root_pgt_changed = true;
         get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
-        asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
+        if ( v->domain->arch.pv_domain.pcid )
+        {
+            get_cpu_info()->pv_cr3 |= X86_CR3_NOFLUSH | get_pv_pcid(v, 0);
+            write_cr3(v->arch.cr3);
+        }
+        else
+        {
+            asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
+        }
     }
     else
     {
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 5f15c9e25b..37338b2a01 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -96,8 +96,12 @@ void xpti_domain_init(struct domain *d)
     }
 
     if ( d->arch.pv_domain.xpti )
+    {
+        d->arch.pv_domain.pcid = cpu_has_pcid && cpu_has_invpcid;
+
         printk("Enabling Xen Pagetable protection (XPTI) for Domain %d\n",
                d->domain_id);
+    }
 }
 
 static void noreturn continue_nonidle_domain(struct vcpu *v)
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 7c9fbfe04a..781f191e6e 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1547,6 +1547,9 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     if ( cpu_has_fsgsbase )
         set_in_cr4(X86_CR4_FSGSBASE);
 
+    if ( cpu_has_invpcid && cpu_has_pcid )
+        set_in_cr4(X86_CR4_PCIDE);
+
     init_speculation_mitigations();
 
     init_idle_domain();
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 316418a6fe..a2ca03583f 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -260,8 +260,20 @@ struct pv_domain
 
     /* XPTI active? */
     bool xpti;
+
+    /* Use PCID for the different address spaces? */
+    bool pcid;
 };
 
+/* PCID values for the address spaces: */
+#define PCID_PV_PRIV      0x0001
+#define PCID_PV_USER      0x0002
+#define PCID_PV_XEN       0x0004    /* To be ORed to above values. */
+
+#define get_pv_pcid(v, xen)                                              \
+    (((xen) ? PCID_PV_XEN : 0) |                                         \
+     (((v)->arch.flags & TF_kernel_mode) ? PCID_PV_PRIV : PCID_PV_USER))
+
 struct monitor_write_data {
     struct {
         unsigned int msr : 1;
@@ -615,18 +627,18 @@ void vcpu_show_registers(const struct vcpu *);
 unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
 
 /* Convert between guest-visible and real CR4 values. */
-#define pv_guest_cr4_to_real_cr4(v)                         \
-    (((v)->arch.pv_vcpu.ctrlreg[4]                          \
-      | (mmu_cr4_features                                   \
-         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
-            X86_CR4_SMAP | X86_CR4_OSXSAVE |                \
-            X86_CR4_FSGSBASE))                              \
-      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
-     & ~(X86_CR4_DE |                                       \
+#define pv_guest_cr4_to_real_cr4(v)                             \
+    (((v)->arch.pv_vcpu.ctrlreg[4]                              \
+      | (mmu_cr4_features                                       \
+         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |          \
+            X86_CR4_SMAP | X86_CR4_OSXSAVE |                    \
+            X86_CR4_FSGSBASE | X86_CR4_PCIDE))                  \
+      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))             \
+     & ~(X86_CR4_DE |                                           \
          ((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0)))
-#define real_cr4_to_pv_guest_cr4(c)                         \
-    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |      \
-             X86_CR4_OSXSAVE | X86_CR4_SMEP |               \
+#define real_cr4_to_pv_guest_cr4(c)                             \
+    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |          \
+             X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_PCIDE |   \
              X86_CR4_FSGSBASE | X86_CR4_SMAP))
 
 #define domain_max_vcpus(d) (is_hvm_domain(d) ? HVM_MAX_VCPUS : MAX_VIRT_CPUS)
diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h
index 8598adef14..d007997f88 100644
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -46,6 +46,7 @@
  * Intel CPU flags in CR3
  */
 #define X86_CR3_NOFLUSH (_AC(1, ULL) << 63)
+#define X86_CR3_PCIDMASK _AC(0x0000000000000fff, ULL) /* Mask for PCID */
 
 /*
  * Intel CPU features in CR4
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.