[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v9 5/9] xen/x86: disable global pages for domains with XPTI active



Instead of flushing the TLB from global pages when switching address
spaces with XPTI being active just disable global pages via %cr4
completely when a domain subject to XPTI is active. This avoids the
need for extra TLB flushes as loading %cr3 will remove all TLB
entries.

In order to avoid states with cr3/cr4 having inconsistent values
(e.g. global pages being activated while cr3 already specifies a XPTI
address space) move loading of the new cr4 value to write_ptbase()
(actually to switch_cr3_cr4() called by write_ptbase()).

This requires to use switch_cr3_cr4() instead of write_ptbase() when
building dom0 in order to avoid setting cr4 with cr4.smap set.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
V7:
- use switch_cr3_cr4() in dom0_build.c

V6:
- don't call read_cr4() multiple times in switch_cr3_cr4()
  (Andrew Cooper)

V4:
- don't use mmu_cr4_features for setting new cr4 value (Jan Beulich)
- use simpler scheme for setting X86_CR4_PGE in
  pv_guest_cr4_to_real_cr4() (Jan Beulich)

V3:
- move cr4 loading for all domains from *_ctxt_switch_to() to
  write_cr3_cr4() called by write_ptbase() (Jan Beulich)
- rebase
---
 xen/arch/x86/domain.c          |  5 -----
 xen/arch/x86/flushtlb.c        | 17 ++++++++++++-----
 xen/arch/x86/mm.c              | 14 +++++++++++---
 xen/arch/x86/pv/dom0_build.c   |  6 +++---
 xen/arch/x86/x86_64/entry.S    | 10 ----------
 xen/common/efi/runtime.c       |  4 ++--
 xen/include/asm-x86/domain.h   |  3 ++-
 xen/include/asm-x86/flushtlb.h |  2 +-
 8 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 3d9c19d055..9b001a03ec 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1523,17 +1523,12 @@ void paravirt_ctxt_switch_from(struct vcpu *v)
 void paravirt_ctxt_switch_to(struct vcpu *v)
 {
     root_pgentry_t *root_pgt = this_cpu(root_pgt);
-    unsigned long cr4;
 
     if ( root_pgt )
         root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
             l4e_from_page(v->domain->arch.perdomain_l3_pg,
                           __PAGE_HYPERVISOR_RW);
 
-    cr4 = pv_guest_cr4_to_real_cr4(v);
-    if ( unlikely(cr4 != read_cr4()) )
-        write_cr4(cr4);
-
     if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
         activate_debugregs(v);
 
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index b6817f1817..59884e7989 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -90,20 +90,27 @@ static void do_tlb_flush(void)
     post_flush(t);
 }
 
-void switch_cr3(unsigned long cr3)
+void switch_cr3_cr4(unsigned long cr3, unsigned long cr4)
 {
-    unsigned long flags, cr4;
+    unsigned long flags, old_cr4;
     u32 t;
 
     /* This non-reentrant function is sometimes called in interrupt context. */
     local_irq_save(flags);
 
     t = pre_flush();
-    cr4 = read_cr4();
 
-    write_cr4(cr4 & ~X86_CR4_PGE);
+    old_cr4 = read_cr4();
+    if ( old_cr4 & X86_CR4_PGE )
+    {
+        old_cr4 = cr4 & ~X86_CR4_PGE;
+        write_cr4(old_cr4);
+    }
+
     write_cr3(cr3);
-    write_cr4(cr4);
+
+    if ( old_cr4 != cr4 )
+        write_cr4(cr4);
 
     post_flush(t);
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index b1e8133d92..13e13dfde7 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -503,20 +503,28 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
 void write_ptbase(struct vcpu *v)
 {
     struct cpu_info *cpu_info = get_cpu_info();
+    unsigned long new_cr4;
+
+    new_cr4 = (is_pv_vcpu(v) && !is_idle_vcpu(v))
+              ? pv_guest_cr4_to_real_cr4(v)
+              : ((read_cr4() & ~X86_CR4_TSD) | X86_CR4_PGE);
 
     if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti )
     {
         cpu_info->root_pgt_changed = true;
         cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
-        switch_cr3(v->arch.cr3);
+        switch_cr3_cr4(v->arch.cr3, new_cr4);
     }
     else
     {
-        /* Make sure to clear xen_cr3 before pv_cr3; switch_cr3() serializes. 
*/
+        /* Make sure to clear xen_cr3 before pv_cr3. */
         cpu_info->xen_cr3 = 0;
-        switch_cr3(v->arch.cr3);
+        /* switch_cr3_cr4() serializes. */
+        switch_cr3_cr4(v->arch.cr3, new_cr4);
         cpu_info->pv_cr3 = 0;
     }
+
+    ASSERT(is_pv_vcpu(v) || read_cr4() == mmu_cr4_features);
 }
 
 /*
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index d148395919..4465a059a8 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -717,7 +717,7 @@ int __init dom0_construct_pv(struct domain *d,
         update_cr3(v);
 
     /* We run on dom0's page tables for the final part of the build process. */
-    write_ptbase(v);
+    switch_cr3_cr4(v->arch.cr3, read_cr4());
     mapcache_override_current(v);
 
     /* Copy the OS image and free temporary buffer. */
@@ -738,7 +738,7 @@ int __init dom0_construct_pv(struct domain *d,
              (parms.virt_hypercall >= v_end) )
         {
             mapcache_override_current(NULL);
-            write_ptbase(current);
+            switch_cr3_cr4(current->arch.cr3, read_cr4());
             printk("Invalid HYPERCALL_PAGE field in ELF notes.\n");
             rc = -1;
             goto out;
@@ -871,7 +871,7 @@ int __init dom0_construct_pv(struct domain *d,
 
     /* Return to idle domain's page tables. */
     mapcache_override_current(NULL);
-    write_ptbase(current);
+    switch_cr3_cr4(current->arch.cr3, read_cr4());
 
     update_domain_wallclock_time(d);
 
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 732f834a09..197dc6e4de 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -153,13 +153,8 @@ restore_all_guest:
                 ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
         rep movsq
 .Lrag_copy_done:
-        mov   STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
-        mov   %rdi, %rsi
-        and   $~X86_CR4_PGE, %rdi
-        mov   %rdi, %cr4
         mov   %rax, %cr3
-        mov   %rsi, %cr4
 .Lrag_keep_cr3:
 
         /* Restore stashed SPEC_CTRL value. */
@@ -215,12 +210,7 @@ restore_all_xen:
          * so "g" will have to do.
          */
 UNLIKELY_START(g, exit_cr3)
-        mov   %cr4, %rdi
-        mov   %rdi, %rsi
-        and   $~X86_CR4_PGE, %rdi
-        mov   %rdi, %cr4
         mov   %rax, %cr3
-        mov   %rsi, %cr4
 UNLIKELY_END(exit_cr3)
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
diff --git a/xen/common/efi/runtime.c b/xen/common/efi/runtime.c
index 4e5ddfef4f..070a70d784 100644
--- a/xen/common/efi/runtime.c
+++ b/xen/common/efi/runtime.c
@@ -111,7 +111,7 @@ struct efi_rs_state efi_rs_enter(void)
         lgdt(&gdt_desc);
     }
 
-    switch_cr3(virt_to_maddr(efi_l4_pgtable));
+    switch_cr3_cr4(virt_to_maddr(efi_l4_pgtable), read_cr4());
 
     return state;
 }
@@ -120,7 +120,7 @@ void efi_rs_leave(struct efi_rs_state *state)
 {
     if ( !state->cr3 )
         return;
-    switch_cr3(state->cr3);
+    switch_cr3_cr4(state->cr3, read_cr4());
     if ( is_pv_vcpu(current) && !is_idle_vcpu(current) )
     {
         struct desc_ptr gdt_desc = {
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index ed4199931a..b7894dc8c8 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -618,9 +618,10 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, 
unsigned long guest_cr4);
 #define pv_guest_cr4_to_real_cr4(v)                         \
     (((v)->arch.pv_vcpu.ctrlreg[4]                          \
       | (mmu_cr4_features                                   \
-         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
+         & (X86_CR4_PSE | X86_CR4_SMEP |                    \
             X86_CR4_SMAP | X86_CR4_OSXSAVE |                \
             X86_CR4_FSGSBASE))                              \
+      | ((v)->domain->arch.pv_domain.xpti ? 0 : X86_CR4_PGE) \
       | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
      & ~X86_CR4_DE)
 #define real_cr4_to_pv_guest_cr4(c)                         \
diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h
index fe48c89de7..ed5f45e806 100644
--- a/xen/include/asm-x86/flushtlb.h
+++ b/xen/include/asm-x86/flushtlb.h
@@ -84,7 +84,7 @@ static inline unsigned long read_cr3(void)
 }
 
 /* Write pagetable base and implicitly tick the tlbflush clock. */
-void switch_cr3(unsigned long cr3);
+void switch_cr3_cr4(unsigned long cr3, unsigned long cr4);
 
 /* flush_* flag fields: */
  /*
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.