[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] vmx: last branch recording MSR emulation



# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1192630745 -3600
# Node ID 765600a13e4a05aa27c4c8810abf7882aad46406
# Parent  ca2984b17fcf134cd675248499e8ed90125774ba
vmx: last branch recording MSR emulation

This required adding infrastructure to make use of VMX' MSR save/
restore feature as well as making the MSR intercept bitmap per-VM.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/x86/hvm/vmx/vmcs.c        |  154 +++++++++++++++++++++++++++++++++-
 xen/arch/x86/hvm/vmx/vmx.c         |  163 +++++++++++++++++++++++++++----------
 xen/include/asm-x86/hvm/vmx/vmcs.h |   19 ++++
 xen/include/asm-x86/msr-index.h    |   21 ++++
 4 files changed, 310 insertions(+), 47 deletions(-)

diff -r ca2984b17fcf -r 765600a13e4a xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Wed Oct 17 15:19:05 2007 +0100
@@ -413,9 +413,35 @@ static void vmx_set_host_env(struct vcpu
               (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
 }
 
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
+{
+    char *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
+
+    /* VMX MSR bitmap supported? */
+    if ( msr_bitmap == NULL )
+        return;
+
+    /*
+     * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+     * have the write-low and read-high bitmap offsets the wrong way round.
+     * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+     */
+    if ( msr <= 0x1fff )
+    {
+        __clear_bit(msr, msr_bitmap + 0x000); /* read-low */
+        __clear_bit(msr, msr_bitmap + 0x800); /* write-low */
+    }
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+    {
+        msr &= 0x1fff;
+        __clear_bit(msr, msr_bitmap + 0x400); /* read-high */
+        __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */
+    }
+}
+
 #define GUEST_SEGMENT_LIMIT     0xffffffff
 
-static void construct_vmcs(struct vcpu *v)
+static int construct_vmcs(struct vcpu *v)
 {
     union vmcs_arbytes arbytes;
 
@@ -430,8 +456,24 @@ static void construct_vmcs(struct vcpu *
     if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
         __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
 
+    /* MSR access bitmap. */
     if ( cpu_has_vmx_msr_bitmap )
-        __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
+    {
+        char *msr_bitmap = alloc_xenheap_page();
+
+        if ( msr_bitmap == NULL )
+            return -ENOMEM;
+
+        memset(msr_bitmap, ~0, PAGE_SIZE);
+        v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+        __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+
+        vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
+        vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
+        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
+        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
+        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+    }
 
     /* I/O access bitmap. */
     __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
@@ -463,10 +505,8 @@ static void construct_vmcs(struct vcpu *
     __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
 
     /* MSR intercepts. */
-    __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
-    __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
+    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
     __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
-    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
     __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
 
     __vmwrite(VM_ENTRY_INTR_INFO, 0);
@@ -565,11 +605,108 @@ static void construct_vmcs(struct vcpu *
     paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
 
     vmx_vlapic_msr_changed(v);
+
+    return 0;
+}
+
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val)
+{
+    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+    const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+    for ( i = 0; i < msr_count; i++ )
+    {
+        if ( msr_area[i].index == msr )
+        {
+            *val = msr_area[i].data;
+            return 0;
+        }
+    }
+
+    return -ESRCH;
+}
+
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val)
+{
+    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+    for ( i = 0; i < msr_count; i++ )
+    {
+        if ( msr_area[i].index == msr )
+        {
+            msr_area[i].data = val;
+            return 0;
+        }
+    }
+
+    return -ESRCH;
+}
+
+int vmx_add_guest_msr(struct vcpu *v, u32 msr)
+{
+    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+    for ( i = 0; i < msr_count; i++ )
+        if ( msr_area[i].index == msr )
+            return 0;
+
+    if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
+        return -ENOSPC;
+
+    if ( msr_area == NULL )
+    {
+        if ( (msr_area = alloc_xenheap_page()) == NULL )
+            return -ENOMEM;
+        v->arch.hvm_vmx.msr_area = msr_area;
+        __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area));
+        __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+    }
+
+    msr_area[msr_count].index = msr;
+    msr_area[msr_count].mbz   = 0;
+    msr_area[msr_count].data  = 0;
+    v->arch.hvm_vmx.msr_count = ++msr_count;
+    __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count);
+    __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count);
+
+    return 0;
+}
+
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr)
+{
+    unsigned int i, msr_count = v->arch.hvm_vmx.host_msr_count;
+    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area;
+
+    for ( i = 0; i < msr_count; i++ )
+        if ( msr_area[i].index == msr )
+            return 0;
+
+    if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
+        return -ENOSPC;
+
+    if ( msr_area == NULL )
+    {
+        if ( (msr_area = alloc_xenheap_page()) == NULL )
+            return -ENOMEM;
+        v->arch.hvm_vmx.host_msr_area = msr_area;
+        __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+    }
+
+    msr_area[msr_count].index = msr;
+    msr_area[msr_count].mbz   = 0;
+    rdmsrl(msr, msr_area[msr_count].data);
+    v->arch.hvm_vmx.host_msr_count = ++msr_count;
+    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count);
+
+    return 0;
 }
 
 int vmx_create_vmcs(struct vcpu *v)
 {
     struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
+    int rc;
 
     if ( arch_vmx->vmcs == NULL )
     {
@@ -582,7 +719,12 @@ int vmx_create_vmcs(struct vcpu *v)
         arch_vmx->launched   = 0;
     }
 
-    construct_vmcs(v);
+    if ( (rc = construct_vmcs(v)) != 0 )
+    {
+        vmx_free_vmcs(arch_vmx->vmcs);
+        arch_vmx->vmcs = NULL;
+        return rc;
+    }
 
     return 0;
 }
diff -r ca2984b17fcf -r 765600a13e4a xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Oct 17 15:19:05 2007 +0100
@@ -53,8 +53,6 @@
 
 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
 
-char *vmx_msr_bitmap;
-
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
 
@@ -1104,26 +1102,6 @@ static int vmx_event_pending(struct vcpu
 {
     ASSERT(v == current);
     return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
-}
-
-static void disable_intercept_for_msr(u32 msr)
-{
-    /*
-     * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-     * have the write-low and read-high bitmap offsets the wrong way round.
-     * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-     */
-    if ( msr <= 0x1fff )
-    {
-        __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
-        __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
-    }
-    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
-    {
-        msr &= 0x1fff;
-        __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
-        __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
-    }
 }
 
 static struct hvm_function_table vmx_function_table = {
@@ -1190,21 +1168,6 @@ void start_vmx(void)
     setup_vmcs_dump();
 
     hvm_enable(&vmx_function_table);
-
-    if ( cpu_has_vmx_msr_bitmap )
-    {
-        printk("VMX: MSR intercept bitmap enabled\n");
-        vmx_msr_bitmap = alloc_xenheap_page();
-        BUG_ON(vmx_msr_bitmap == NULL);
-        memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
-
-        disable_intercept_for_msr(MSR_FS_BASE);
-        disable_intercept_for_msr(MSR_GS_BASE);
-
-        disable_intercept_for_msr(MSR_IA32_SYSENTER_CS);
-        disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP);
-        disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP);
-    }
 }
 
 /*
@@ -1302,10 +1265,12 @@ static void vmx_do_cpuid(struct cpu_user
                  bitmaskof(X86_FEATURE_EST)  |
                  bitmaskof(X86_FEATURE_TM2)  |
                  bitmaskof(X86_FEATURE_CID)  |
-                 bitmaskof(X86_FEATURE_PDCM));
+                 bitmaskof(X86_FEATURE_PDCM) |
+                 bitmaskof(X86_FEATURE_DSCPL));
         edx &= ~(bitmaskof(X86_FEATURE_HT)   |
                  bitmaskof(X86_FEATURE_ACPI) |
-                 bitmaskof(X86_FEATURE_ACC));
+                 bitmaskof(X86_FEATURE_ACC)  |
+                 bitmaskof(X86_FEATURE_DS));
         break;
 
     case 0x00000004:
@@ -2239,6 +2204,82 @@ static int vmx_cr_access(unsigned long e
     return 1;
 }
 
+static const struct lbr_info {
+    u32 base, count;
+} p4_lbr[] = {
+    { MSR_P4_LER_FROM_LIP,          1 },
+    { MSR_P4_LER_TO_LIP,            1 },
+    { MSR_P4_LASTBRANCH_TOS,        1 },
+    { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+    { MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
+    { 0, 0 }
+}, c2_lbr[] = {
+    { MSR_IA32_LASTINTFROMIP,       1 },
+    { MSR_IA32_LASTINTTOIP,         1 },
+    { MSR_C2_LASTBRANCH_TOS,        1 },
+    { MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
+    { MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
+    { 0, 0 }
+#ifdef __i386__
+}, pm_lbr[] = {
+    { MSR_IA32_LASTINTFROMIP,       1 },
+    { MSR_IA32_LASTINTTOIP,         1 },
+    { MSR_PM_LASTBRANCH_TOS,        1 },
+    { MSR_PM_LASTBRANCH_0,          NUM_MSR_PM_LASTBRANCH },
+    { 0, 0 }
+#endif
+};
+
+static const struct lbr_info *last_branch_msr_get(void)
+{
+    switch ( boot_cpu_data.x86 )
+    {
+    case 6:
+        switch ( boot_cpu_data.x86_model )
+        {
+#ifdef __i386__
+        /* PentiumM */
+        case 9: case 13:
+        /* Core Solo/Duo */
+        case 14:
+            return pm_lbr;
+            break;
+#endif
+        /* Core2 Duo */
+        case 15:
+            return c2_lbr;
+            break;
+        }
+        break;
+
+    case 15:
+        switch ( boot_cpu_data.x86_model )
+        {
+        /* Pentium4/Xeon with em64t */
+        case 3: case 4: case 6:
+            return p4_lbr;
+            break;
+        }
+        break;
+    }
+
+    return NULL;
+}
+
+static int is_last_branch_msr(u32 ecx)
+{
+    const struct lbr_info *lbr = last_branch_msr_get();
+
+    if ( lbr == NULL )
+        return 0;
+
+    for ( ; lbr->count; lbr++ )
+        if ( (ecx >= lbr->base) && (ecx < (lbr->base + lbr->count)) )
+            return 1;
+
+    return 0;
+}
+
 static int vmx_do_msr_read(struct cpu_user_regs *regs)
 {
     u64 msr_content = 0;
@@ -2263,6 +2304,10 @@ static int vmx_do_msr_read(struct cpu_us
         break;
     case MSR_IA32_APICBASE:
         msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        break;
+    case MSR_IA32_DEBUGCTLMSR:
+        if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 )
+            msr_content = 0;
         break;
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
@@ -2288,6 +2333,15 @@ static int vmx_do_msr_read(struct cpu_us
                 goto done;
         }
 
+        if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0 )
+            break;
+
+        if ( is_last_branch_msr(ecx) )
+        {
+            msr_content = 0;
+            break;
+        }
+
         if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
              rdmsr_safe(ecx, eax, edx) == 0 )
         {
@@ -2405,13 +2459,42 @@ static int vmx_do_msr_write(struct cpu_u
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
+    case MSR_IA32_DEBUGCTLMSR: {
+        int i, rc = 0;
+
+        if ( !msr_content || (msr_content & ~3) )
+            break;
+
+        if ( msr_content & 1 )
+        {
+            const struct lbr_info *lbr = last_branch_msr_get();
+            if ( lbr == NULL )
+                break;
+
+            for ( ; (rc == 0) && lbr->count; lbr++ )
+                for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
+                    if ( (rc = vmx_add_guest_msr(v, lbr->base + i)) == 0 )
+                        vmx_disable_intercept_for_msr(v, lbr->base + i);
+        }
+
+        if ( (rc < 0) ||
+             (vmx_add_guest_msr(v, ecx) < 0) ||
+             (vmx_add_host_load_msr(v, ecx) < 0) )
+            vmx_inject_hw_exception(v, TRAP_machine_check, 0);
+        else
+            vmx_write_guest_msr(v, ecx, msr_content);
+
+        break;
+    }
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
-                wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
+                if ( (vmx_write_guest_msr(v, ecx, msr_content) != 0) &&
+                     !is_last_branch_msr(ecx) )
+                    wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
                 break;
             case HNDL_exception_raised:
                 return 0;
diff -r ca2984b17fcf -r 765600a13e4a xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Wed Oct 17 15:19:05 2007 +0100
@@ -33,6 +33,12 @@ struct vmcs_struct {
 struct vmcs_struct {
     u32 vmcs_revision_id;
     unsigned char data [0]; /* vmcs size is read from MSR */
+};
+
+struct vmx_msr_entry {
+    u32 index;
+    u32 mbz;
+    u64 data;
 };
 
 enum {
@@ -72,6 +78,12 @@ struct arch_vmx_struct {
     unsigned long        shadow_gs;
     unsigned long        cstar;
 #endif
+
+    char                *msr_bitmap;
+    unsigned int         msr_count;
+    struct vmx_msr_entry *msr_area;
+    unsigned int         host_msr_count;
+    struct vmx_msr_entry *host_msr_area;
 
     /* Following fields are all specific to vmxassist. */
     unsigned long        vmxassist_enabled:1;
@@ -131,7 +143,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr
     (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS)
 #define cpu_has_vmx_msr_bitmap \
     (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
-extern char *vmx_msr_bitmap;
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define VMX_INTR_SHADOW_STI             0x00000001
@@ -268,6 +279,12 @@ enum vmcs_field {
     HOST_RIP                        = 0x00006c16,
 };
 
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr);
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val);
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val);
+int vmx_add_guest_msr(struct vcpu *v, u32 msr);
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr);
+
 #endif /* ASM_X86_HVM_VMX_VMCS_H__ */
 
 /*
diff -r ca2984b17fcf -r 765600a13e4a xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/include/asm-x86/msr-index.h   Wed Oct 17 15:19:05 2007 +0100
@@ -323,6 +323,27 @@
 #define MSR_P4_U2L_ESCR0               0x000003b0
 #define MSR_P4_U2L_ESCR1               0x000003b1
 
+/* Netburst (P4) last-branch recording */
+#define MSR_P4_LER_FROM_LIP            0x000001d7
+#define MSR_P4_LER_TO_LIP              0x000001d8
+#define MSR_P4_LASTBRANCH_TOS          0x000001da
+#define MSR_P4_LASTBRANCH_0            0x000001db
+#define NUM_MSR_P4_LASTBRANCH          4
+#define MSR_P4_LASTBRANCH_0_FROM_LIP   0x00000680
+#define MSR_P4_LASTBRANCH_0_TO_LIP     0x000006c0
+#define NUM_MSR_P4_LASTBRANCH_FROM_TO  16
+
+/* Pentium M (and Core) last-branch recording */
+#define MSR_PM_LASTBRANCH_TOS          0x000001c9
+#define MSR_PM_LASTBRANCH_0            0x00000040
+#define NUM_MSR_PM_LASTBRANCH          8
+
+/* Core 2 last-branch recording */
+#define MSR_C2_LASTBRANCH_TOS          0x000001c9
+#define MSR_C2_LASTBRANCH_0_FROM_IP    0x00000040
+#define MSR_C2_LASTBRANCH_0_TO_IP      0x00000060
+#define NUM_MSR_C2_LASTBRANCH_FROM_TO  4
+
 /* Intel Core-based CPU performance counters */
 #define MSR_CORE_PERF_FIXED_CTR0       0x00000309
 #define MSR_CORE_PERF_FIXED_CTR1       0x0000030a

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.