[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 08/15] x86/hvm: Reposition the modification of raw segment data from the VMCB/VMCS
Intel VT-x and AMD SVM provide access to the full segment descriptor cache via fields in the VMCB/VMCS. However, the bits which are actually checked by hardware and preserved across vmentry/exit are inconsistent, and the vendor accessor functions perform inconsistent modification to the raw values. Convert {svm,vmx}_{get,set}_segment_register() into raw accessors, and alter hvm_{get,set}_segment_register() to cook the values consistently. This allows the common emulation code to better rely on finding architecturally-expected values. This does cause some functional changes because of the modifications being applied uniformly. A side effect of this fixes latent bugs where vmx_set_segment_register() didn't correctly fix up .G for segments, and inconsistent fixing up of the GDTR/IDTR limits. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- CC: Jan Beulich <JBeulich@xxxxxxxx> CC: Jun Nakajima <jun.nakajima@xxxxxxxxx> CC: Kevin Tian <kevin.tian@xxxxxxxxx> CC: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> CC: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> --- xen/arch/x86/hvm/hvm.c | 151 ++++++++++++++++++++++++++++++++++++++++++ xen/arch/x86/hvm/svm/svm.c | 20 +----- xen/arch/x86/hvm/vmx/vmx.c | 6 +- xen/include/asm-x86/desc.h | 6 ++ xen/include/asm-x86/hvm/hvm.h | 17 ++--- 5 files changed, 164 insertions(+), 36 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index ef83100..804cd88 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -6051,6 +6051,157 @@ void hvm_domain_soft_reset(struct domain *d) } /* + * Segment caches in VMCB/VMCS are inconsistent about which bits are checked, + * important, and preserved across vmentry/exit. Cook the values to make them + * closer to what is architecturally expected from entries in the segment + * cache. + */ +void hvm_get_segment_register(struct vcpu *v, enum x86_segment seg, + struct segment_register *reg) +{ + hvm_funcs.get_segment_register(v, seg, reg); + + switch ( seg ) + { + case x86_seg_ss: + /* SVM may retain %ss.DB when %ss is loaded with a NULL selector. */ + if ( !reg->attr.fields.p ) + reg->attr.fields.db = 0; + break; + + case x86_seg_tr: + /* + * SVM doesn't track %tr.B. Architecturally, a loaded TSS segment will + * always be busy. + */ + reg->attr.fields.type |= 0x2; + + /* + * %cs and %tr are unconditionally present. SVM ignores these present + * bits and will happily run without them set. + */ + case x86_seg_cs: + reg->attr.fields.p = 1; + break; + + case x86_seg_gdtr: + case x86_seg_idtr: + /* + * Treat GDTR/IDTR as being present system segments. This avoids them + * needing special casing for segmentation checks. + */ + reg->attr.bytes = 0x80; + break; + + default: /* Avoid triggering -Werror=switch */ + break; + } + + if ( reg->attr.fields.p ) + { + /* + * For segments which are present/usable, cook the system flag. SVM + * ignores the S bit on all segments and will happily run with them in + * any state. + */ + reg->attr.fields.s = is_x86_user_segment(seg); + + /* + * SVM discards %cs.G on #VMEXIT. Other user segments do have .G + * tracked, but Linux commit 80112c89ed87 "KVM: Synthesize G bit for + * all segments." indicates that this isn't necessarily the case when + * nested under ESXi. + * + * Unconditionally recalculate G. + */ + reg->attr.fields.g = !!(reg->limit >> 20); + + /* + * SVM doesn't track the Accessed flag. It will always be set for + * usable user segments loaded into the descriptor cache. + */ + if ( is_x86_user_segment(seg) ) + reg->attr.fields.type |= 0x1; + } +} + +void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg, + struct segment_register *reg) +{ + /* Set G to match the limit field. VT-x cares, while SVM doesn't. */ + if ( reg->attr.fields.p ) + reg->attr.fields.g = !!(reg->limit >> 20); + + switch ( seg ) + { + case x86_seg_cs: + ASSERT(reg->attr.fields.p); /* Usable. */ + ASSERT(reg->attr.fields.s); /* User segment. */ + ASSERT((reg->base >> 32) == 0); /* Upper bits clear. */ + break; + + case x86_seg_ss: + if ( reg->attr.fields.p ) + { + ASSERT(reg->attr.fields.s); /* User segment. */ + ASSERT(!(reg->attr.fields.type & 0x8)); /* Data segment. */ + ASSERT(reg->attr.fields.type & 0x2); /* Writeable. */ + ASSERT((reg->base >> 32) == 0); /* Upper bits clear. */ + } + break; + + case x86_seg_ds: + case x86_seg_es: + case x86_seg_fs: + case x86_seg_gs: + if ( reg->attr.fields.p ) + { + ASSERT(reg->attr.fields.s); /* User segment. */ + + if ( reg->attr.fields.type & 0x8 ) + ASSERT(reg->attr.fields.type & 0x2); /* Readable. */ + + if ( seg == x86_seg_fs || seg == x86_seg_gs ) + ASSERT(is_canonical_address(reg->base)); + else + ASSERT((reg->base >> 32) == 0); /* Upper bits clear. */ + } + break; + + case x86_seg_tr: + ASSERT(reg->attr.fields.p); /* Usable. */ + ASSERT(!reg->attr.fields.s); /* System segment. */ + ASSERT(!(reg->sel & 0x4)); /* !TI. */ + ASSERT(reg->attr.fields.type == SYS_DESC_tss16_busy || + reg->attr.fields.type == SYS_DESC_tss_busy); + ASSERT(is_canonical_address(reg->base)); + break; + + case x86_seg_ldtr: + if ( reg->attr.fields.p ) + { + ASSERT(!reg->attr.fields.s); /* System segment. */ + ASSERT(!(reg->sel & 0x4)); /* !TI. */ + ASSERT(reg->attr.fields.type == SYS_DESC_ldt); + ASSERT(is_canonical_address(reg->base)); + } + break; + + case x86_seg_gdtr: + case x86_seg_idtr: + ASSERT(is_canonical_address(reg->base)); + ASSERT((reg->limit >> 16) == 0); /* Upper bits clear. */ + break; + + default: + ASSERT_UNREACHABLE(); + break; + } + + hvm_funcs.set_segment_register(v, seg, reg); +} + +/* * Local variables: * mode: C * c-file-style: "BSD" diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index fb4fd0b..c3c759f 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -627,50 +627,34 @@ static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg, { case x86_seg_cs: memcpy(reg, &vmcb->cs, sizeof(*reg)); - reg->attr.fields.p = 1; - reg->attr.fields.g = reg->limit > 0xFFFFF; break; case x86_seg_ds: memcpy(reg, &vmcb->ds, sizeof(*reg)); - if ( reg->attr.fields.type != 0 ) - reg->attr.fields.type |= 0x1; break; case x86_seg_es: memcpy(reg, &vmcb->es, sizeof(*reg)); - if ( reg->attr.fields.type != 0 ) - reg->attr.fields.type |= 0x1; break; case x86_seg_fs: svm_sync_vmcb(v); memcpy(reg, &vmcb->fs, sizeof(*reg)); - if ( reg->attr.fields.type != 0 ) - reg->attr.fields.type |= 0x1; break; case x86_seg_gs: svm_sync_vmcb(v); memcpy(reg, &vmcb->gs, sizeof(*reg)); - if ( reg->attr.fields.type != 0 ) - reg->attr.fields.type |= 0x1; break; case x86_seg_ss: memcpy(reg, &vmcb->ss, sizeof(*reg)); reg->attr.fields.dpl = vmcb->_cpl; - if ( reg->attr.fields.type == 0 ) - reg->attr.fields.db = 0; break; case x86_seg_tr: svm_sync_vmcb(v); memcpy(reg, &vmcb->tr, sizeof(*reg)); - reg->attr.fields.p = 1; - reg->attr.fields.type |= 0x2; break; case x86_seg_gdtr: memcpy(reg, &vmcb->gdtr, sizeof(*reg)); - reg->attr.bytes = 0x80; break; case x86_seg_idtr: memcpy(reg, &vmcb->idtr, sizeof(*reg)); - reg->attr.bytes = 0x80; break; case x86_seg_ldtr: svm_sync_vmcb(v); @@ -740,11 +724,11 @@ static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg, break; case x86_seg_gdtr: vmcb->gdtr.base = reg->base; - vmcb->gdtr.limit = (uint16_t)reg->limit; + vmcb->gdtr.limit = reg->limit; break; case x86_seg_idtr: vmcb->idtr.base = reg->base; - vmcb->idtr.limit = (uint16_t)reg->limit; + vmcb->idtr.limit = reg->limit; break; case x86_seg_ldtr: memcpy(&vmcb->ldtr, reg, sizeof(*reg)); diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 29c6088..0fd4b5c 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1126,9 +1126,6 @@ static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg, */ attr = (!(attr & (1u << 7)) << 16) | ((attr & 0xf00) << 4) | (attr & 0xff); - /* VMX has strict consistency requirement for flag G. */ - attr |= !!(limit >> 20) << 15; - vmx_vmcs_enter(v); switch ( seg ) @@ -1173,8 +1170,7 @@ static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg, __vmwrite(GUEST_TR_SELECTOR, sel); __vmwrite(GUEST_TR_LIMIT, limit); __vmwrite(GUEST_TR_BASE, base); - /* VMX checks that the the busy flag (bit 1) is set. */ - __vmwrite(GUEST_TR_AR_BYTES, attr | 2); + __vmwrite(GUEST_TR_AR_BYTES, attr); break; case x86_seg_gdtr: __vmwrite(GUEST_GDTR_LIMIT, limit); diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h index 0e2d97f..da924bf 100644 --- a/xen/include/asm-x86/desc.h +++ b/xen/include/asm-x86/desc.h @@ -89,7 +89,13 @@ #ifndef __ASSEMBLY__ /* System Descriptor types for GDT and IDT entries. */ +#define SYS_DESC_tss16_avail 1 #define SYS_DESC_ldt 2 +#define SYS_DESC_tss16_busy 3 +#define SYS_DESC_call_gate16 4 +#define SYS_DESC_task_gate 5 +#define SYS_DESC_irq_gate16 6 +#define SYS_DESC_trap_gate16 7 #define SYS_DESC_tss_avail 9 #define SYS_DESC_tss_busy 11 #define SYS_DESC_call_gate 12 diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 51a64f7..b37b335 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -358,19 +358,10 @@ static inline void hvm_flush_guest_tlbs(void) void hvm_hypercall_page_initialise(struct domain *d, void *hypercall_page); -static inline void -hvm_get_segment_register(struct vcpu *v, enum x86_segment seg, - struct segment_register *reg) -{ - hvm_funcs.get_segment_register(v, seg, reg); -} - -static inline void -hvm_set_segment_register(struct vcpu *v, enum x86_segment seg, - struct segment_register *reg) -{ - hvm_funcs.set_segment_register(v, seg, reg); -} +void hvm_get_segment_register(struct vcpu *v, enum x86_segment seg, + struct segment_register *reg); +void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg, + struct segment_register *reg); static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v) { -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |