[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 2/2] x86/Intel: virtualize support for cpuid faulting



On HVM guests, the cpuid triggers a vm exit, so we can check the emulated
faulting state in vmx_do_cpuid and hvmemul_cpuid. A new function,
hvm_check_cpuid_fault will check if cpuid faulting is enabled and the CPL > 0.
When it returns true, the cpuid handling functions will inject a GP(0). Notably
no hardware support for faulting on cpuid is necessary to emulate support with
an HVM guest.

On PV guests, hardware support is required so that userspace cpuid will trap
to Xen. Xen already enables cpuid faulting on supported CPUs for pv guests (that
aren't the control domain, see the comment in intel_ctxt_switch_levelling).
Every PV guest cpuid will trap via a GP(0) to emulate_privileged_op (via
do_general_protection). Once there we simply decline to emulate cpuid if the
CPL > 0 and faulting is enabled, leaving the GP(0) for the guest kernel to
handle.

Signed-off-by: Kyle Huey <khuey@xxxxxxxxxxxx>
---
 xen/arch/x86/hvm/emulate.c    | 19 +++++++++++++++++++
 xen/arch/x86/hvm/hvm.c        | 14 ++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c    | 20 ++++++++++++++++++--
 xen/arch/x86/traps.c          | 34 ++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/domain.h  |  3 +++
 xen/include/asm-x86/hvm/hvm.h |  1 +
 6 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 6ed7486..a713ff3 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1544,16 +1544,35 @@ static int hvmemul_wbinvd(
 
 static int hvmemul_cpuid(
     unsigned int *eax,
     unsigned int *ebx,
     unsigned int *ecx,
     unsigned int *edx,
     struct x86_emulate_ctxt *ctxt)
 {
+    /*
+     * x86_emulate uses this function to query CPU features for its own 
internal
+     * use. Make sure we're actually emulating CPUID before emulating CPUID
+     * faulting.
+     */
+    if ( ctxt->opcode == X86EMUL_OPC(0x0f, 0xa2) &&
+         hvm_check_cpuid_fault(current) ) {
+        struct hvm_emulate_ctxt *hvmemul_ctxt =
+            container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+        hvmemul_ctxt->exn_pending = 1;
+        hvmemul_ctxt->trap.vector = TRAP_gp_fault;
+        hvmemul_ctxt->trap.type = X86_EVENTTYPE_HW_EXCEPTION;
+        hvmemul_ctxt->trap.error_code = 0;
+        hvmemul_ctxt->trap.insn_len = 0;
+
+        return X86EMUL_EXCEPTION;
+    }
+
     hvm_funcs.cpuid_intercept(eax, ebx, ecx, edx);
     return X86EMUL_OKAY;
 }
 
 static int hvmemul_inject_hw_exception(
     uint8_t vector,
     int32_t error_code,
     struct x86_emulate_ctxt *ctxt)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 3c90ecd..cf81e64 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3675,16 +3675,30 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, 
unsigned int *ebx,
         hvm_cpuid(0x80000001, NULL, NULL, NULL, &_edx);
         *eax |= (_edx & cpufeat_mask(X86_FEATURE_LM) ? vaddr_bits : 32) << 8;
 
         *ebx &= hvm_featureset[FEATURESET_e8b];
         break;
     }
 }
 
+bool hvm_check_cpuid_fault(struct vcpu *v)
+{
+    struct segment_register sreg;
+
+    if ( !v->arch.cpuid_fault )
+        return false;
+
+    hvm_get_segment_register(v, x86_seg_ss, &sreg);
+    if ( sreg.attr.fields.dpl == 0 )
+        return false;
+
+    return true;
+}
+
 static uint64_t _hvm_rdtsc_intercept(void)
 {
     struct vcpu *curr = current;
 #if !defined(NDEBUG) || defined(CONFIG_PERF_COUNTERS)
     struct domain *currd = curr->domain;
 
     if ( currd->arch.vtsc )
         switch ( hvm_guest_x86_mode(curr) )
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b9102ce..228c1b9 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2428,16 +2428,21 @@ static void vmx_cpuid_intercept(
     HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
 }
 
 static int vmx_do_cpuid(struct cpu_user_regs *regs)
 {
     unsigned int eax, ebx, ecx, edx;
     unsigned int leaf, subleaf;
 
+    if ( hvm_check_cpuid_fault(current) ) {
+        hvm_inject_hw_exception(TRAP_gp_fault, 0);
+        return 1;  /* Don't advance the guest IP! */
+    }
+
     eax = regs->eax;
     ebx = regs->ebx;
     ecx = regs->ecx;
     edx = regs->edx;
 
     leaf = regs->eax;
     subleaf = regs->ecx;
 
@@ -2694,19 +2699,23 @@ static int vmx_msr_read_intercept(unsigned int msr, 
uint64_t *msr_content)
     case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
     case MSR_IA32_PEBS_ENABLE:
     case MSR_IA32_DS_AREA:
         if ( vpmu_do_rdmsr(msr, msr_content) )
             goto gp_fault;
         break;
 
     case MSR_INTEL_PLATFORM_INFO:
-        if ( rdmsr_safe(MSR_INTEL_PLATFORM_INFO, *msr_content) )
-            goto gp_fault;
+        *msr_content = MSR_PLATFORM_INFO_CPUID_FAULTING;
+        break;
+
+    case MSR_INTEL_MISC_FEATURES_ENABLES:
         *msr_content = 0;
+        if ( current->arch.cpuid_fault )
+            *msr_content |= MSR_MISC_FEATURES_CPUID_FAULTING;
         break;
 
     default:
         if ( passive_domain_do_rdmsr(msr, msr_content) )
             goto done;
         switch ( long_mode_do_msr_read(msr, msr_content) )
         {
             case HNDL_unhandled:
@@ -2925,16 +2934,23 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
         break;
 
     case MSR_INTEL_PLATFORM_INFO:
         if ( msr_content ||
              rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msr_content) )
             goto gp_fault;
         break;
 
+    case MSR_INTEL_MISC_FEATURES_ENABLES:
+        if ( msr_content & ~MSR_MISC_FEATURES_CPUID_FAULTING )
+            goto gp_fault;
+        v->arch.cpuid_fault =
+            !!(msr_content & MSR_MISC_FEATURES_CPUID_FAULTING);
+        break;
+
     default:
         if ( passive_domain_do_wrmsr(msr, msr_content) )
             return X86EMUL_OKAY;
 
         if ( wrmsr_viridian_regs(msr, msr_content) ) 
             break;
 
         switch ( long_mode_do_msr_write(msr, msr_content) )
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 293ff8d..12322bd 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1315,16 +1315,24 @@ static int emulate_forced_invalid_op(struct 
cpu_user_regs *regs)
     /* We only emulate CPUID. */
     if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
     {
         propagate_page_fault(eip + sizeof(instr) - rc, 0);
         return EXCRET_fault_fixed;
     }
     if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
         return 0;
+
+    /* If cpuid faulting is enabled and CPL>0 inject a #GP in place of #UD. */
+    if ( current->arch.cpuid_fault && !guest_kernel_mode(current, regs) ) {
+        regs->eip = eip;
+        do_guest_trap(TRAP_gp_fault, regs);
+        return EXCRET_fault_fixed;
+    }
+
     eip += sizeof(instr);
 
     pv_cpuid(regs);
 
     instruction_done(regs, eip, 0);
 
     trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip);
 
@@ -2474,16 +2482,27 @@ static int priv_op_read_msr(unsigned int reg, uint64_t 
*val,
         *val = 0;
         return X86EMUL_OKAY;
 
     case MSR_INTEL_PLATFORM_INFO:
         if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
              rdmsr_safe(MSR_INTEL_PLATFORM_INFO, *val) )
             break;
         *val = 0;
+        if ( this_cpu(cpuid_faulting_enabled) )
+            *val |= MSR_PLATFORM_INFO_CPUID_FAULTING;
+        return X86EMUL_OKAY;
+
+    case MSR_INTEL_MISC_FEATURES_ENABLES:
+        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+             rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, *val) )
+            break;
+        *val = 0;
+        if ( curr->arch.cpuid_fault )
+            *val |= MSR_MISC_FEATURES_CPUID_FAULTING;
         return X86EMUL_OKAY;
 
     case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
     case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
     case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
     case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
         if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
         {
@@ -2677,16 +2696,27 @@ static int priv_op_write_msr(unsigned int reg, uint64_t 
val,
         return X86EMUL_OKAY;
 
     case MSR_INTEL_PLATFORM_INFO:
         if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
              val || rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val) )
             break;
         return X86EMUL_OKAY;
 
+    case MSR_INTEL_MISC_FEATURES_ENABLES:
+        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+             (val & ~MSR_MISC_FEATURES_CPUID_FAULTING) ||
+             rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, temp) )
+            break;
+        if ( (val & MSR_MISC_FEATURES_CPUID_FAULTING) &&
+             !this_cpu(cpuid_faulting_enabled) )
+            break;
+        curr->arch.cpuid_fault = !!(val & MSR_MISC_FEATURES_CPUID_FAULTING);
+        return X86EMUL_OKAY;
+
     case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
     case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
     case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
     case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
         if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
         {
             vpmu_msr = true;
     case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
@@ -3186,16 +3216,20 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
         if ( priv_op_read_msr(regs->_ecx, &val, NULL) != X86EMUL_OKAY )
             goto fail;
  rdmsr_writeback:
         regs->eax = (uint32_t)val;
         regs->edx = (uint32_t)(val >> 32);
         break;
 
     case 0xa2: /* CPUID */
+        /* If cpuid faulting is enabled and CPL>0 leave the #GP untouched. */
+        if ( v->arch.cpuid_fault && !guest_kernel_mode(v, regs) )
+            goto fail;
+
         pv_cpuid(regs);
         break;
 
     default:
         goto fail;
     }
 
 #undef wr_ad
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 5807a1f..27c20cc 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -552,16 +552,19 @@ struct arch_vcpu
      * However, processor should not be able to touch eXtended states before
      * it explicitly enables it via xcr0.
      */
     uint64_t xcr0_accum;
     /* This variable determines whether nonlazy extended state has been used,
      * and thus should be saved/restored. */
     bool_t nonlazy_xstate_used;
 
+    /* Has the guest enabled CPUID faulting? */
+    bool cpuid_fault;
+
     /*
      * The SMAP check policy when updating runstate_guest(v) and the
      * secondary system time.
      */
     smap_check_policy_t smap_check_policy;
 
     struct vmce vmce;
 
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index b1be6bd..5460d3a 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -409,16 +409,17 @@ bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val);
 #define has_viridian_apic_assist(d) \
     (is_viridian_domain(d) && (viridian_feature_mask(d) & HVMPV_apic_assist))
 
 void hvm_hypervisor_cpuid_leaf(uint32_t sub_idx,
                                uint32_t *eax, uint32_t *ebx,
                                uint32_t *ecx, uint32_t *edx);
 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                    unsigned int *ecx, unsigned int *edx);
+bool hvm_check_cpuid_fault(struct vcpu *v);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
 void hvm_migrate_pirqs(struct vcpu *v);
 
 void hvm_inject_trap(const struct hvm_trap *trap);
 void hvm_inject_hw_exception(unsigned int trapnr, int errcode);
 void hvm_inject_page_fault(int errcode, unsigned long cr2);
 
-- 
2.10.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.