[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 15/24] x86/hvm: Reposition the modification of raw segment data from the VMCB/VMCS



Intel VT-x and AMD SVM provide access to the full segment descriptor cache via
fields in the VMCB/VMCS.  However, the bits which are actually checked by
hardware and preserved across vmentry/exit are inconsistent, and the vendor
accessor functions perform inconsistent modification to the raw values.

Convert {svm,vmx}_{get,set}_segment_register() into raw accessors, and alter
hvm_{get,set}_segment_register() to cook the values consistently.  This allows
the common emulation code to better rely on finding architecturally-expected
values.

While moving the code performing the cooking, fix the %ss.db quirk.  A NULL
selector is indicated by .p being clear, not the value of the .type field.

This does cause some functional changes because of the modifications being
applied uniformly.  A side effect of this fixes latent bugs where
vmx_set_segment_register() didn't correctly fix up .G for segments, and
inconsistent fixing up of the GDTR/IDTR limits.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
v2:
 * Clarify the change of the %ss.db quirk
 * Rework %tr typecheck logic
 * Swap a break for return following ASSERT_UNREACHABLE()
---
 xen/arch/x86/hvm/hvm.c        | 154 ++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/svm/svm.c    |  20 +-----
 xen/arch/x86/hvm/vmx/vmx.c    |   6 +-
 xen/include/asm-x86/desc.h    |   6 ++
 xen/include/asm-x86/hvm/hvm.h |  17 ++---
 5 files changed, 167 insertions(+), 36 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ef83100..bdfd94e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -6051,6 +6051,160 @@ void hvm_domain_soft_reset(struct domain *d)
 }
 
 /*
+ * Segment caches in VMCB/VMCS are inconsistent about which bits are checked,
+ * important, and preserved across vmentry/exit.  Cook the values to make them
+ * closer to what is architecturally expected from entries in the segment
+ * cache.
+ */
+void hvm_get_segment_register(struct vcpu *v, enum x86_segment seg,
+                              struct segment_register *reg)
+{
+    hvm_funcs.get_segment_register(v, seg, reg);
+
+    switch ( seg )
+    {
+    case x86_seg_ss:
+        /* SVM may retain %ss.DB when %ss is loaded with a NULL selector. */
+        if ( !reg->attr.fields.p )
+            reg->attr.fields.db = 0;
+        break;
+
+    case x86_seg_tr:
+        /*
+         * SVM doesn't track %tr.B. Architecturally, a loaded TSS segment will
+         * always be busy.
+         */
+        reg->attr.fields.type |= 0x2;
+
+        /*
+         * %cs and %tr are unconditionally present.  SVM ignores these present
+         * bits and will happily run without them set.
+         */
+    case x86_seg_cs:
+        reg->attr.fields.p = 1;
+        break;
+
+    case x86_seg_gdtr:
+    case x86_seg_idtr:
+        /*
+         * Treat GDTR/IDTR as being present system segments.  This avoids them
+         * needing special casing for segmentation checks.
+         */
+        reg->attr.bytes = 0x80;
+        break;
+
+    default: /* Avoid triggering -Werror=switch */
+        break;
+    }
+
+    if ( reg->attr.fields.p )
+    {
+        /*
+         * For segments which are present/usable, cook the system flag.  SVM
+         * ignores the S bit on all segments and will happily run with them in
+         * any state.
+         */
+        reg->attr.fields.s = is_x86_user_segment(seg);
+
+        /*
+         * SVM discards %cs.G on #VMEXIT.  Other user segments do have .G
+         * tracked, but Linux commit 80112c89ed87 "KVM: Synthesize G bit for
+         * all segments." indicates that this isn't necessarily the case when
+         * nested under ESXi.
+         *
+         * Unconditionally recalculate G.
+         */
+        reg->attr.fields.g = !!(reg->limit >> 20);
+
+        /*
+         * SVM doesn't track the Accessed flag.  It will always be set for
+         * usable user segments loaded into the descriptor cache.
+         */
+        if ( is_x86_user_segment(seg) )
+            reg->attr.fields.type |= 0x1;
+    }
+}
+
+void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
+                              struct segment_register *reg)
+{
+    /* Set G to match the limit field.  VT-x cares, while SVM doesn't. */
+    if ( reg->attr.fields.p )
+        reg->attr.fields.g = !!(reg->limit >> 20);
+
+    switch ( seg )
+    {
+    case x86_seg_cs:
+        ASSERT(reg->attr.fields.p);                  /* Usable. */
+        ASSERT(reg->attr.fields.s);                  /* User segment. */
+        ASSERT((reg->base >> 32) == 0);              /* Upper bits clear. */
+        break;
+
+    case x86_seg_ss:
+        if ( reg->attr.fields.p )
+        {
+            ASSERT(reg->attr.fields.s);              /* User segment. */
+            ASSERT(!(reg->attr.fields.type & 0x8));  /* Data segment. */
+            ASSERT(reg->attr.fields.type & 0x2);     /* Writeable. */
+            ASSERT((reg->base >> 32) == 0);          /* Upper bits clear. */
+        }
+        break;
+
+    case x86_seg_ds:
+    case x86_seg_es:
+    case x86_seg_fs:
+    case x86_seg_gs:
+        if ( reg->attr.fields.p )
+        {
+            ASSERT(reg->attr.fields.s);              /* User segment. */
+
+            if ( reg->attr.fields.type & 0x8 )
+                ASSERT(reg->attr.fields.type & 0x2); /* Readable. */
+
+            if ( seg == x86_seg_fs || seg == x86_seg_gs )
+                ASSERT(is_canonical_address(reg->base));
+            else
+                ASSERT((reg->base >> 32) == 0);      /* Upper bits clear. */
+        }
+        break;
+
+    case x86_seg_tr:
+        ASSERT(reg->attr.fields.p);                  /* Usable. */
+        ASSERT(!reg->attr.fields.s);                 /* System segment. */
+        ASSERT(!(reg->sel & 0x4));                   /* !TI. */
+        if ( reg->attr.fields.type == SYS_DESC_tss_busy )
+            ASSERT(is_canonical_address(reg->base));
+        else if ( reg->attr.fields.type == SYS_DESC_tss16_busy )
+            ASSERT((reg->base >> 32) == 0);
+        else
+            ASSERT(!"%tr typecheck failure");
+        break;
+
+    case x86_seg_ldtr:
+        if ( reg->attr.fields.p )
+        {
+            ASSERT(!reg->attr.fields.s);             /* System segment. */
+            ASSERT(!(reg->sel & 0x4));               /* !TI. */
+            ASSERT(reg->attr.fields.type == SYS_DESC_ldt);
+            ASSERT(is_canonical_address(reg->base));
+        }
+        break;
+
+    case x86_seg_gdtr:
+    case x86_seg_idtr:
+        ASSERT(is_canonical_address(reg->base));
+        ASSERT((reg->limit >> 16) == 0);             /* Upper bits clear. */
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        return;
+    }
+
+    hvm_funcs.set_segment_register(v, seg, reg);
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 65eeab7..2ea2c11 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -627,50 +627,34 @@ static void svm_get_segment_register(struct vcpu *v, enum 
x86_segment seg,
     {
     case x86_seg_cs:
         memcpy(reg, &vmcb->cs, sizeof(*reg));
-        reg->attr.fields.p = 1;
-        reg->attr.fields.g = reg->limit > 0xFFFFF;
         break;
     case x86_seg_ds:
         memcpy(reg, &vmcb->ds, sizeof(*reg));
-        if ( reg->attr.fields.type != 0 )
-            reg->attr.fields.type |= 0x1;
         break;
     case x86_seg_es:
         memcpy(reg, &vmcb->es, sizeof(*reg));
-        if ( reg->attr.fields.type != 0 )
-            reg->attr.fields.type |= 0x1;
         break;
     case x86_seg_fs:
         svm_sync_vmcb(v);
         memcpy(reg, &vmcb->fs, sizeof(*reg));
-        if ( reg->attr.fields.type != 0 )
-            reg->attr.fields.type |= 0x1;
         break;
     case x86_seg_gs:
         svm_sync_vmcb(v);
         memcpy(reg, &vmcb->gs, sizeof(*reg));
-        if ( reg->attr.fields.type != 0 )
-            reg->attr.fields.type |= 0x1;
         break;
     case x86_seg_ss:
         memcpy(reg, &vmcb->ss, sizeof(*reg));
         reg->attr.fields.dpl = vmcb->_cpl;
-        if ( reg->attr.fields.type == 0 )
-            reg->attr.fields.db = 0;
         break;
     case x86_seg_tr:
         svm_sync_vmcb(v);
         memcpy(reg, &vmcb->tr, sizeof(*reg));
-        reg->attr.fields.p = 1;
-        reg->attr.fields.type |= 0x2;
         break;
     case x86_seg_gdtr:
         memcpy(reg, &vmcb->gdtr, sizeof(*reg));
-        reg->attr.bytes = 0x80;
         break;
     case x86_seg_idtr:
         memcpy(reg, &vmcb->idtr, sizeof(*reg));
-        reg->attr.bytes = 0x80;
         break;
     case x86_seg_ldtr:
         svm_sync_vmcb(v);
@@ -740,11 +724,11 @@ static void svm_set_segment_register(struct vcpu *v, enum 
x86_segment seg,
         break;
     case x86_seg_gdtr:
         vmcb->gdtr.base = reg->base;
-        vmcb->gdtr.limit = (uint16_t)reg->limit;
+        vmcb->gdtr.limit = reg->limit;
         break;
     case x86_seg_idtr:
         vmcb->idtr.base = reg->base;
-        vmcb->idtr.limit = (uint16_t)reg->limit;
+        vmcb->idtr.limit = reg->limit;
         break;
     case x86_seg_ldtr:
         memcpy(&vmcb->ldtr, reg, sizeof(*reg));
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 377c789..004dad8 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1126,9 +1126,6 @@ static void vmx_set_segment_register(struct vcpu *v, enum 
x86_segment seg,
      */
     attr = (!(attr & (1u << 7)) << 16) | ((attr & 0xf00) << 4) | (attr & 0xff);
 
-    /* VMX has strict consistency requirement for flag G. */
-    attr |= !!(limit >> 20) << 15;
-
     vmx_vmcs_enter(v);
 
     switch ( seg )
@@ -1173,8 +1170,7 @@ static void vmx_set_segment_register(struct vcpu *v, enum 
x86_segment seg,
         __vmwrite(GUEST_TR_SELECTOR, sel);
         __vmwrite(GUEST_TR_LIMIT, limit);
         __vmwrite(GUEST_TR_BASE, base);
-        /* VMX checks that the the busy flag (bit 1) is set. */
-        __vmwrite(GUEST_TR_AR_BYTES, attr | 2);
+        __vmwrite(GUEST_TR_AR_BYTES, attr);
         break;
     case x86_seg_gdtr:
         __vmwrite(GUEST_GDTR_LIMIT, limit);
diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h
index 0e2d97f..da924bf 100644
--- a/xen/include/asm-x86/desc.h
+++ b/xen/include/asm-x86/desc.h
@@ -89,7 +89,13 @@
 #ifndef __ASSEMBLY__
 
 /* System Descriptor types for GDT and IDT entries. */
+#define SYS_DESC_tss16_avail  1
 #define SYS_DESC_ldt          2
+#define SYS_DESC_tss16_busy   3
+#define SYS_DESC_call_gate16  4
+#define SYS_DESC_task_gate    5
+#define SYS_DESC_irq_gate16   6
+#define SYS_DESC_trap_gate16  7
 #define SYS_DESC_tss_avail    9
 #define SYS_DESC_tss_busy     11
 #define SYS_DESC_call_gate    12
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 51a64f7..b37b335 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -358,19 +358,10 @@ static inline void hvm_flush_guest_tlbs(void)
 void hvm_hypercall_page_initialise(struct domain *d,
                                    void *hypercall_page);
 
-static inline void
-hvm_get_segment_register(struct vcpu *v, enum x86_segment seg,
-                         struct segment_register *reg)
-{
-    hvm_funcs.get_segment_register(v, seg, reg);
-}
-
-static inline void
-hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
-                         struct segment_register *reg)
-{
-    hvm_funcs.set_segment_register(v, seg, reg);
-}
+void hvm_get_segment_register(struct vcpu *v, enum x86_segment seg,
+                              struct segment_register *reg);
+void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
+                              struct segment_register *reg);
 
 static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v)
 {
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.