[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2] x86/PV: fix/generalize guest nul selector handling



Segment bases (and limits) aren't being cleared by the loading of a nul
selector into a segment register on AMD CPUs. Therefore, if an
outgoing vCPU has a non-zero base in FS or GS and the subsequent
incoming vCPU has a non-zero but nul selector in the respective
register(s), the selector value(s) would be loaded without clearing the
segment base(s) in the hidden register portion.

Since the ABI states "zero" in its description of the fs and gs fields,
it is worth noting that the chosen approach to fix this alters the
written down ABI. I consider this preferrable over enforcing the
previously written down behavior, as nul selectors are far more likely
to be what was meant from the beginning.

The adjustments also eliminate an inconsistency between FS and GS
handling: Old code had an extra pointless (gs_base_user was always zero
when DIRTY_GS was set) conditional for GS. The old bitkeeper changeset
has no explanation for this asymmetry.

Inspired by Linux commit e137a4d8f4dd2e277e355495b6b2cb241a8693c3.

Additionally for DS and ES a flat selector is being loaded prior to the
loading of a nul one on AMD CPUs, just as a precautionary measure
(we're not currently aware of ways for a guest to deduce the base of a
segment register which has a nul selector loaded).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Add DS/ES handling.

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1237,6 +1237,18 @@ arch_do_vcpu_op(
     return rc;
 }
 
+/*
+ * Loading a nul selector does not clear bases and limits on AMD CPUs. Be on
+ * the safe side and re-initialize both to flat segment values before loading
+ * a nul selector.
+ */
+#define preload_segment(seg, value) do {              \
+    if ( !((value) & ~3) &&                           \
+         boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) \
+        asm volatile ( "movl %k0, %%" #seg            \
+                       :: "r" (FLAT_USER_DS32) );     \
+} while ( false )
+
 #define loadsegment(seg,value) ({               \
     int __r = 1;                                \
     asm volatile (                              \
@@ -1275,36 +1287,40 @@ static void load_segments(struct vcpu *n
 
     /* Either selector != 0 ==> reload. */
     if ( unlikely((dirty_segment_mask & DIRTY_DS) | uregs->ds) )
+    {
+        preload_segment(ds, uregs->ds);
         all_segs_okay &= loadsegment(ds, uregs->ds);
+    }
 
     /* Either selector != 0 ==> reload. */
     if ( unlikely((dirty_segment_mask & DIRTY_ES) | uregs->es) )
+    {
+        preload_segment(es, uregs->es);
         all_segs_okay &= loadsegment(es, uregs->es);
+    }
 
-    /*
-     * Either selector != 0 ==> reload.
-     * Also reload to reset FS_BASE if it was non-zero.
-     */
-    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
-                  uregs->fs) )
+    /* Either selector != 0 ==> reload. */
+    if ( unlikely((dirty_segment_mask & DIRTY_FS) | uregs->fs) )
+    {
         all_segs_okay &= loadsegment(fs, uregs->fs);
+        /* non-nul selector updates fs_base */
+        if ( uregs->fs & ~3 )
+            dirty_segment_mask &= ~DIRTY_FS_BASE;
+    }
 
-    /*
-     * Either selector != 0 ==> reload.
-     * Also reload to reset GS_BASE if it was non-zero.
-     */
-    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
-                  uregs->gs) )
-    {
-        /* Reset GS_BASE with user %gs? */
-        if ( (dirty_segment_mask & DIRTY_GS) || !n->arch.pv_vcpu.gs_base_user )
-            all_segs_okay &= loadsegment(gs, uregs->gs);
+    /* Either selector != 0 ==> reload. */
+    if ( unlikely((dirty_segment_mask & DIRTY_GS) | uregs->gs) )
+    {
+        all_segs_okay &= loadsegment(gs, uregs->gs);
+        /* non-nul selector updates gs_base_user */
+        if ( uregs->gs & ~3 )
+            dirty_segment_mask &= ~DIRTY_GS_BASE_USER;
     }
 
     if ( !is_pv_32bit_vcpu(n) )
     {
         /* This can only be non-zero if selector is NULL. */
-        if ( n->arch.pv_vcpu.fs_base )
+        if ( n->arch.pv_vcpu.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) )
             wrfsbase(n->arch.pv_vcpu.fs_base);
 
         /* Most kernels have non-zero GS base, so don't bother testing. */
@@ -1312,7 +1328,8 @@ static void load_segments(struct vcpu *n
         wrmsrl(MSR_SHADOW_GS_BASE, n->arch.pv_vcpu.gs_base_kernel);
 
         /* This can only be non-zero if selector is NULL. */
-        if ( n->arch.pv_vcpu.gs_base_user )
+        if ( n->arch.pv_vcpu.gs_base_user |
+             (dirty_segment_mask & DIRTY_GS_BASE_USER) )
             wrgsbase(n->arch.pv_vcpu.gs_base_user);
 
         /* If in kernel mode then switch the GS bases around. */
@@ -1447,22 +1464,22 @@ static void save_segments(struct vcpu *v
     if ( regs->fs || is_pv_32bit_vcpu(v) )
     {
         dirty_segment_mask |= DIRTY_FS;
-        v->arch.pv_vcpu.fs_base = 0; /* != 0 selector kills fs_base */
+        /* non-nul selector kills fs_base */
+        if ( regs->fs & ~3 )
+            v->arch.pv_vcpu.fs_base = 0;
     }
-    else if ( v->arch.pv_vcpu.fs_base )
-    {
+    if ( v->arch.pv_vcpu.fs_base )
         dirty_segment_mask |= DIRTY_FS_BASE;
-    }
 
     if ( regs->gs || is_pv_32bit_vcpu(v) )
     {
         dirty_segment_mask |= DIRTY_GS;
-        v->arch.pv_vcpu.gs_base_user = 0; /* != 0 selector kills gs_base_user 
*/
+        /* non-nul selector kills gs_base_user */
+        if ( regs->gs & ~3 )
+            v->arch.pv_vcpu.gs_base_user = 0;
     }
-    else if ( v->arch.pv_vcpu.gs_base_user )
-    {
+    if ( v->arch.pv_vcpu.gs_base_user )
         dirty_segment_mask |= DIRTY_GS_BASE_USER;
-    }
 
     this_cpu(dirty_segment_mask) = dirty_segment_mask;
 }
--- a/xen/include/public/arch-x86/xen-x86_64.h
+++ b/xen/include/public/arch-x86/xen-x86_64.h
@@ -203,8 +203,8 @@ struct cpu_user_regs {
     uint16_t ss, _pad2[3];
     uint16_t es, _pad3[3];
     uint16_t ds, _pad4[3];
-    uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */
-    uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
+    uint16_t fs, _pad5[3]; /* Non-nul => takes precedence over fs_base.      */
+    uint16_t gs, _pad6[3]; /* Non-nul => takes precedence over gs_base_user. */
 };
 typedef struct cpu_user_regs cpu_user_regs_t;
 DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.