[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86/pv: Optimise to the segment context switching paths



commit cbe69ba3e8e0499fac73cb91caebb8732e08f577
Author:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Tue Aug 11 16:05:06 2020 +0100
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Tue Sep 29 13:46:13 2020 +0100

    x86/pv: Optimise to the segment context switching paths
    
    Opencode the fs/gs helpers, as the optimiser is unable to rearrange the 
logic
    down to a single X86_CR4_FSGSBASE check.  This removes several jumps and
    creates bigger basic blocks.
    
    In load_segments(), optimise GS base handling substantially.  The call to
    svm_load_segs() already needs gsb/gss the correct way around, so hoist the
    logic for the later path to use it as well.  Swapping the inputs in GPRs is
    far more efficient than using SWAPGS.
    
    Previously, there was optionally one SWAPGS from the user/kernel mode check,
    two SWAPGS's in write_gs_shadow() and two WRGSBASE's.  Updates to GS (4 or 5
    here) in quick succession stall all contemporary pipelines repeatedly.  
(Intel
    Core/Xeon pipelines have segment register renaming[1], so can continue to
    speculatively execute with one GS update in flight.  Other pipelines cannot
    have two updates in flight concurrently, and must stall dispatch of the 
second
    until the first has retired.)
    
    Rewrite the logic to have exactly two WRGSBASEs and one SWAPGS, which 
removes
    two stalles all contemporary processors.
    
    Although modest, the resulting delta is:
    
      add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-106 (-106)
      Function                                     old     new   delta
      paravirt_ctxt_switch_from                    235     198     -37
      context_switch                              3582    3513     -69
    
    in a common path.
    
    [1] 
https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-speculative-behavior-swapgs-and-segment-registers
    
    Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/domain.c | 59 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 5ce11cebf8..7e16d49bfd 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1537,6 +1537,7 @@ arch_do_vcpu_op(
 static void load_segments(struct vcpu *n)
 {
     struct cpu_user_regs *uregs = &n->arch.user_regs;
+    unsigned long gsb = 0, gss = 0;
     bool compat = is_pv_32bit_vcpu(n);
     bool all_segs_okay = true, fs_gs_done = false;
 
@@ -1556,18 +1557,25 @@ static void load_segments(struct vcpu *n)
                    : [ok] "+r" (all_segs_okay)          \
                    : [_val] "rm" (val) )
 
-#ifdef CONFIG_HVM
-    if ( cpu_has_svm && !compat && (uregs->fs | uregs->gs) <= 3 )
+    if ( !compat )
     {
-        unsigned long gsb = n->arch.flags & TF_kernel_mode
-            ? n->arch.pv.gs_base_kernel : n->arch.pv.gs_base_user;
-        unsigned long gss = n->arch.flags & TF_kernel_mode
-            ? n->arch.pv.gs_base_user : n->arch.pv.gs_base_kernel;
+        gsb = n->arch.pv.gs_base_kernel;
+        gss = n->arch.pv.gs_base_user;
 
-        fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
-                                   n->arch.pv.fs_base, gsb, gss);
-    }
+        /*
+         * Figure out which way around gsb/gss want to be.  gsb needs to be
+         * the active context, and gss needs to be the inactive context.
+         */
+        if ( !(n->arch.flags & TF_kernel_mode) )
+            SWAP(gsb, gss);
+
+#ifdef CONFIG_HVM
+        if ( cpu_has_svm && (uregs->fs | uregs->gs) <= 3 )
+            fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
+                                       n->arch.pv.fs_base, gsb, gss);
 #endif
+    }
+
     if ( !fs_gs_done )
     {
         load_LDT(n);
@@ -1581,13 +1589,19 @@ static void load_segments(struct vcpu *n)
 
     if ( !fs_gs_done && !compat )
     {
-        write_fs_base(n->arch.pv.fs_base);
-        write_gs_shadow(n->arch.pv.gs_base_kernel);
-        write_gs_base(n->arch.pv.gs_base_user);
-
-        /* If in kernel mode then switch the GS bases around. */
-        if ( (n->arch.flags & TF_kernel_mode) )
+        if ( read_cr4() & X86_CR4_FSGSBASE )
+        {
+            __wrgsbase(gss);
+            __wrfsbase(n->arch.pv.fs_base);
             asm volatile ( "swapgs" );
+            __wrgsbase(gsb);
+        }
+        else
+        {
+            wrmsrl(MSR_FS_BASE, n->arch.pv.fs_base);
+            wrmsrl(MSR_GS_BASE, gsb);
+            wrmsrl(MSR_SHADOW_GS_BASE, gss);
+        }
     }
 
     if ( unlikely(!all_segs_okay) )
@@ -1707,9 +1721,20 @@ static void save_segments(struct vcpu *v)
 
     if ( !is_pv_32bit_vcpu(v) )
     {
-        unsigned long gs_base = read_gs_base();
+        unsigned long fs_base, gs_base;
+
+        if ( read_cr4() & X86_CR4_FSGSBASE )
+        {
+            fs_base = __rdfsbase();
+            gs_base = __rdgsbase();
+        }
+        else
+        {
+            rdmsrl(MSR_FS_BASE, fs_base);
+            rdmsrl(MSR_GS_BASE, gs_base);
+        }
 
-        v->arch.pv.fs_base = read_fs_base();
+        v->arch.pv.fs_base = fs_base;
         if ( v->arch.flags & TF_kernel_mode )
             v->arch.pv.gs_base_kernel = gs_base;
         else
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.