[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] vmx: last branch recording MSR emulation



.. to have feature parity with SVM.

This required adding infrastructure to make use of VMX' MSR save/
restore feature as well as making the MSR intercept bitmap per-VM.

(Applies cleanly only on top of the previously sent SVM/EFER and
HVM/CPUID patches.)

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-06 15:08:41.000000000 
+0200
+++ 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c      2007-08-08 11:46:40.000000000 
+0200
@@ -163,6 +163,10 @@ static void vmx_init_vmcs_config(void)
 
     /* Require Write-Back (WB) memory type for VMCS accesses. */
     BUG_ON(((vmx_msr_high >> 18) & 15) != 6);
+
+    rdmsr(MSR_IA32_VMX_MISC, vmx_msr_low, vmx_msr_high);
+    /* 16-byte entries in 512-entry steps */
+    vmx_msr_max_order = ((vmx_msr_low >> 25) & 7) + 13 - PAGE_SHIFT;
 }
 
 static struct vmcs_struct *vmx_alloc_vmcs(void)
@@ -378,7 +382,7 @@ static void vmx_set_host_env(struct vcpu
 
 #define GUEST_SEGMENT_LIMIT     0xffffffff
 
-static void construct_vmcs(struct vcpu *v)
+static int construct_vmcs(struct vcpu *v)
 {
     unsigned long cr0, cr4;
     union vmcs_arbytes arbytes;
@@ -394,8 +398,22 @@ static void construct_vmcs(struct vcpu *
     if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
         __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
 
+    /* MSR access bitmap. */
     if ( cpu_has_vmx_msr_bitmap )
-        __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
+    {
+        char *msr_bitmap = alloc_xenheap_page();
+
+        if ( msr_bitmap == NULL)
+            return -ENOMEM;
+        memset(msr_bitmap, ~0, PAGE_SIZE);
+        vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE);
+        vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE);
+        vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS);
+        vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP);
+        vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP);
+        v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+        __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+    }
 
     /* I/O access bitmap. */
     __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
@@ -427,10 +445,8 @@ static void construct_vmcs(struct vcpu *
     __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
 
     /* MSR intercepts. */
-    __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
-    __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
-    __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
     __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+    __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
     __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
 
     __vmwrite(VM_ENTRY_INTR_INFO, 0);
@@ -537,6 +553,131 @@ static void construct_vmcs(struct vcpu *
     paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
 
     vmx_vlapic_msr_changed(v);
+
+    return 0;
+}
+
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val)
+{
+    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+    const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+    for ( i = 0; i < msr_count; ++i )
+        if (msr_area[i].index == msr)
+        {
+            *val = msr_area[i].data;
+            return 0;
+        }
+
+    return -ESRCH;
+}
+
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val)
+{
+    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+    for ( i = 0; i < msr_count; ++i )
+        if (msr_area[i].index == msr)
+        {
+            msr_area[i].data = val;
+            return 0;
+        }
+
+    return -ESRCH;
+}
+
+int vmx_add_guest_msr(struct vcpu *v, u32 msr)
+{
+    unsigned int i, order;
+    unsigned int msr_count = v->arch.hvm_vmx.msr_count;
+    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+    for ( i = 0; i < msr_count; ++i )
+        if (msr_area[i].index == msr)
+            return 0;
+
+    order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area));
+    if ( order > vmx_msr_max_order )
+        return -ENOSPC;
+
+    if ( v->arch.hvm_vmx.msr_order < order )
+    {
+        if ( (msr_area = alloc_xenheap_pages(order)) == NULL )
+            return -ENOMEM;
+        if ( v->arch.hvm_vmx.msr_order )
+        {
+            memcpy(msr_area,
+                   v->arch.hvm_vmx.msr_area,
+                   msr_count * sizeof(*msr_area));
+            free_xenheap_pages(v->arch.hvm_vmx.msr_area,
+                               v->arch.hvm_vmx.msr_order);
+        }
+#ifdef __i386__
+        else
+        {
+            __vmwrite(VM_EXIT_MSR_STORE_ADDR_HIGH, 0);
+            __vmwrite(VM_ENTRY_MSR_LOAD_ADDR_HIGH, 0);
+        }
+#endif
+        v->arch.hvm_vmx.msr_area  = msr_area;
+        v->arch.hvm_vmx.msr_order = order;
+        __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area));
+        __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+    }
+
+    msr_area[msr_count].index = msr;
+    msr_area[msr_count].mbz   = 0;
+    msr_area[msr_count].data  = 0;
+    v->arch.hvm_vmx.msr_count = ++msr_count;
+    __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count);
+    __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count);
+
+    return 0;
+}
+
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr)
+{
+    unsigned int i, order;
+    unsigned int msr_count = v->arch.hvm_vmx.host_msr_count;
+    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area;
+
+    for ( i = 0; i < msr_count; ++i )
+        if (msr_area[i].index == msr)
+            return 0;
+
+    order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area));
+    if ( order > vmx_msr_max_order )
+        return -ENOSPC;
+
+    if ( v->arch.hvm_vmx.host_msr_order < order )
+    {
+        if ( (msr_area = alloc_xenheap_pages(order)) == NULL )
+            return -ENOMEM;
+        if ( v->arch.hvm_vmx.host_msr_order )
+        {
+            memcpy(msr_area,
+                   v->arch.hvm_vmx.host_msr_area,
+                   msr_count * sizeof(*msr_area));
+            free_xenheap_pages(v->arch.hvm_vmx.host_msr_area,
+                               v->arch.hvm_vmx.host_msr_order);
+        }
+#ifdef __i386__
+        else
+            __vmwrite(VM_EXIT_MSR_LOAD_ADDR_HIGH, 0);
+#endif
+        v->arch.hvm_vmx.host_msr_area  = msr_area;
+        v->arch.hvm_vmx.host_msr_order = order;
+        __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+    }
+
+    msr_area[msr_count].index = msr;
+    msr_area[msr_count].mbz   = 0;
+    rdmsrl(msr, msr_area[msr_count].data);
+    v->arch.hvm_vmx.host_msr_count = ++msr_count;
+    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count);
+
+    return 0;
 }
 
 int vmx_create_vmcs(struct vcpu *v)
Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmx.c  2007-08-08 11:45:25.000000000 
+0200
+++ 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c       2007-08-08 11:56:05.000000000 
+0200
@@ -53,7 +53,7 @@
 
 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
 
-char *vmx_msr_bitmap;
+unsigned int vmx_msr_max_order = 0;
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
@@ -1170,26 +1170,6 @@ static int vmx_event_pending(struct vcpu
     return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
 }
 
-static void disable_intercept_for_msr(u32 msr)
-{
-    /*
-     * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-     * have the write-low and read-high bitmap offsets the wrong way round.
-     * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-     */
-    if ( msr <= 0x1fff )
-    {
-        __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
-        __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
-    }
-    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
-    {
-        msr &= 0x1fff;
-        __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
-        __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
-    }
-}
-
 static struct hvm_function_table vmx_function_table = {
     .name                 = "VMX",
     .domain_initialise    = vmx_domain_initialise,
@@ -1259,21 +1239,6 @@ void start_vmx(void)
     setup_vmcs_dump();
 
     hvm_enable(&vmx_function_table);
-
-    if ( cpu_has_vmx_msr_bitmap )
-    {
-        printk("VMX: MSR intercept bitmap enabled\n");
-        vmx_msr_bitmap = alloc_xenheap_page();
-        BUG_ON(vmx_msr_bitmap == NULL);
-        memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
-
-        disable_intercept_for_msr(MSR_FS_BASE);
-        disable_intercept_for_msr(MSR_GS_BASE);
-
-        disable_intercept_for_msr(MSR_IA32_SYSENTER_CS);
-        disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP);
-        disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP);
-    }
 }
 
 /*
@@ -1380,7 +1345,10 @@ static void vmx_do_cpuid(struct cpu_user
                  bitmaskof(X86_FEATURE_ACC));
 
         /* Unsupported for virtualised CPUs. */
-        ecx &= ~(bitmaskof(X86_FEATURE_PDCM));
+        ecx &= ~(bitmaskof(X86_FEATURE_PDCM) |
+                 bitmaskof(X86_FEATURE_DSCPL));
+
+        edx &= ~bitmaskof(X86_FEATURE_DTES);
 
         break;
 
@@ -2572,6 +2540,82 @@ static int vmx_cr_access(unsigned long e
     return 1;
 }
 
+static const struct lbr_info {
+    u32 base, count;
+} p4_lbr[] = {
+    { MSR_P4_LER_FROM_LIP,          1 },
+    { MSR_P4_LER_TO_LIP,            1 },
+    { MSR_P4_LASTBRANCH_TOS,        1 },
+    { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+    { MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
+    { 0, 0 }
+}, c2_lbr[] = {
+    { MSR_IA32_LASTINTFROMIP,       1 },
+    { MSR_IA32_LASTINTTOIP,         1 },
+    { MSR_P6_LASTBRANCH_TOS,        1 },
+    { MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
+    { MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
+    { 0, 0 }
+#ifdef __i386__
+}, pm_lbr[] = {
+    { MSR_IA32_LASTINTFROMIP,       1 },
+    { MSR_IA32_LASTINTTOIP,         1 },
+    { MSR_P6_LASTBRANCH_TOS,        1 },
+    { MSR_PM_LASTBRANCH_0,          NUM_MSR_PM_LASTBRANCH },
+    { 0, 0 }
+#endif
+};
+
+static const struct lbr_info *last_branch_msr_get(void)
+{
+    switch ( boot_cpu_data.x86 )
+    {
+    case 6:
+        switch ( boot_cpu_data.x86_model )
+        {
+#ifdef __i386__
+        /* PentiumM */
+        case 9: case 13:
+        /* Core Solo/Duo */
+        case 14:
+            return pm_lbr;
+            break;
+#endif
+        /* Core2 Duo */
+        case 15:
+            return c2_lbr;
+            break;
+        }
+        break;
+
+    case 15:
+        switch ( boot_cpu_data.x86_model )
+        {
+        /* Pentium4/Xeon with em64t */
+        case 3: case 4: case 6:
+            return p4_lbr;
+            break;
+        }
+        break;
+    }
+
+    return NULL;
+}
+
+static int last_branch_msr(u32 ecx)
+{
+    const struct lbr_info *lbr = last_branch_msr_get();
+
+    if ( lbr != NULL )
+    {
+        for ( ; lbr->count; ++lbr )
+            if ( ecx >= lbr->base && ecx < lbr->base + lbr->count )
+                return 1;
+    }
+
+    return 0;
+}
+
 static int vmx_do_msr_read(struct cpu_user_regs *regs)
 {
     u64 msr_content = 0;
@@ -2597,6 +2641,10 @@ static int vmx_do_msr_read(struct cpu_us
     case MSR_IA32_APICBASE:
         msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
+    case MSR_IA32_DEBUGCTLMSR:
+        if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0)
+            msr_content = 0;
+        break;
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     case MSR_IA32_MCG_STATUS:
@@ -2610,6 +2658,15 @@ static int vmx_do_msr_read(struct cpu_us
         msr_content = 0;
         break;
     default:
+        if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0)
+            break;
+
+        if ( last_branch_msr(ecx) )
+        {
+            msr_content = 0;
+            break;
+        }
+
         switch ( long_mode_do_msr_read(regs) )
         {
             case HNDL_unhandled:
@@ -2736,13 +2793,50 @@ static int vmx_do_msr_write(struct cpu_u
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
+    case MSR_IA32_DEBUGCTLMSR:
+        if ( msr_content & ~3 )
+            break;
+        if ( msr_content )
+        {
+            int rc = 0;
+
+            if ( msr_content & 1 )
+            {
+                const struct lbr_info *lbr = last_branch_msr_get();
+
+                if ( lbr == NULL )
+                    break;
+                for ( ; rc == 0 && lbr->count; ++lbr )
+                {
+                    u32 i;
+
+                    for ( i = 0; rc == 0 && i < lbr->count; ++i )
+                    {
+                         rc = vmx_add_guest_msr(v, lbr->base + i);
+                         if ( rc == 0 && cpu_has_vmx_msr_bitmap )
+                             
vmx_disable_intercept_for_msr(v->arch.hvm_vmx.msr_bitmap,
+                                                           lbr->base + i);
+                    }
+                }
+            }
+
+            if ( rc < 0 ||
+                 vmx_add_guest_msr(v, ecx) < 0 ||
+                 vmx_add_host_load_msr(v, ecx) < 0)
+                vmx_inject_hw_exception(v, TRAP_machine_check, 0);
+            else
+                vmx_write_guest_msr(v, ecx, msr_content);
+        }
+        break;
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
-                wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
+                if ( vmx_write_guest_msr(v, ecx, msr_content) != 0 &&
+                     !last_branch_msr(ecx) )
+                    wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
                 break;
             case HNDL_exception_raised:
                 return 0;
Index: 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h
===================================================================
--- 2007-08-08.orig/xen/include/asm-x86/hvm/vmx/vmcs.h  2007-08-06 
15:08:41.000000000 +0200
+++ 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h       2007-08-08 
11:45:33.000000000 +0200
@@ -35,6 +35,13 @@ struct vmcs_struct {
     unsigned char data [0]; /* vmcs size is read from MSR */
 };
 
+struct vmx_msr_entry {
+    u32 index;
+    u32 mbz;
+    u64 data;
+};
+extern unsigned int vmx_msr_max_order;
+
 enum {
     VMX_INDEX_MSR_LSTAR = 0,
     VMX_INDEX_MSR_STAR,
@@ -79,6 +86,14 @@ struct arch_vmx_struct {
 #endif
     unsigned long        efer;
 
+    char                *msr_bitmap;
+    unsigned int         msr_order;
+    unsigned int         msr_count;
+    struct vmx_msr_entry *msr_area;
+    unsigned int         host_msr_order;
+    unsigned int         host_msr_count;
+    struct vmx_msr_entry *host_msr_area;
+
     /* Following fields are all specific to vmxassist. */
     unsigned long        vmxassist_enabled:1;
     unsigned long        irqbase_mode:1;
@@ -137,7 +152,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr
     (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS)
 #define cpu_has_vmx_msr_bitmap \
     (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
-extern char *vmx_msr_bitmap;
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define VMX_INTR_SHADOW_STI             0x00000001
@@ -274,6 +288,31 @@ enum vmcs_field {
     HOST_RIP                        = 0x00006c16,
 };
 
+static inline void vmx_disable_intercept_for_msr(char *msr_bitmap, u32 msr)
+{
+    /*
+     * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+     * have the write-low and read-high bitmap offsets the wrong way round.
+     * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+     */
+    if ( msr <= 0x1fff )
+    {
+        __clear_bit(msr, msr_bitmap + 0x000); /* read-low */
+        __clear_bit(msr, msr_bitmap + 0x800); /* write-low */
+    }
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+    {
+        msr &= 0x1fff;
+        __clear_bit(msr, msr_bitmap + 0x400); /* read-high */
+        __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */
+    }
+}
+
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val);
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val);
+int vmx_add_guest_msr(struct vcpu *v, u32 msr);
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr);
+
 #endif /* ASM_X86_HVM_VMX_VMCS_H__ */
 
 /*
Index: 2007-08-08/xen/include/asm-x86/msr.h
===================================================================
--- 2007-08-08.orig/xen/include/asm-x86/msr.h   2007-08-08 11:43:53.000000000 
+0200
+++ 2007-08-08/xen/include/asm-x86/msr.h        2007-08-08 11:45:33.000000000 
+0200
@@ -200,6 +200,13 @@ static inline void write_efer(__u64 val)
 #define MSR_P6_EVNTSEL0                        0x186
 #define MSR_P6_EVNTSEL1                        0x187
 
+#define MSR_P6_LASTBRANCH_TOS          0x1c9
+#define MSR_PM_LASTBRANCH_0            0x40
+#define NUM_MSR_PM_LASTBRANCH          8
+#define MSR_C2_LASTBRANCH_0_FROM_IP    0x40
+#define MSR_C2_LASTBRANCH_0_TO_IP      0x60
+#define NUM_MSR_C2_LASTBRANCH_FROM_TO  4
+
 #define MSR_IA32_PERF_STATUS           0x198
 #define MSR_IA32_PERF_CTL              0x199
 
@@ -223,6 +230,8 @@ static inline void write_efer(__u64 val)
 #define MSR_IA32_MC0_ADDR              0x402
 #define MSR_IA32_MC0_MISC              0x403
 
+#define MSR_IA32_DS_AREA               0x600
+
 /* K8 Machine Check MSRs */
 #define MSR_K8_MC1_CTL                 0x404
 #define MSR_K8_MC1_STATUS              0x405
@@ -333,6 +342,15 @@ static inline void write_efer(__u64 val)
 #define MSR_P4_U2L_ESCR0               0x3b0
 #define MSR_P4_U2L_ESCR1               0x3b1
 
+#define MSR_P4_LER_FROM_LIP            0x1d7
+#define MSR_P4_LER_TO_LIP              0x1d8
+#define MSR_P4_LASTBRANCH_TOS          0x1da
+#define MSR_P4_LASTBRANCH_0            0x1db
+#define NUM_MSR_P4_LASTBRANCH          4
+#define MSR_P4_LASTBRANCH_0_FROM_LIP   0x680
+#define MSR_P4_LASTBRANCH_0_TO_LIP     0x6c0
+#define NUM_MSR_P4_LASTBRANCH_FROM_TO  16
+
 #define MSR_K6_WHCR                    0xC0000082
 #define MSR_K6_UWCCR                   0xC0000085
 #define MSR_K6_EPMR                    0xC0000086



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.