[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
.. to have feature parity with SVM. This required adding infrastructure to make use of VMX' MSR save/ restore feature as well as making the MSR intercept bitmap per-VM. (Applies cleanly only on top of the previously sent SVM/EFER and HVM/CPUID patches.) Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c =================================================================== --- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-06 15:08:41.000000000 +0200 +++ 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-08 11:46:40.000000000 +0200 @@ -163,6 +163,10 @@ static void vmx_init_vmcs_config(void) /* Require Write-Back (WB) memory type for VMCS accesses. */ BUG_ON(((vmx_msr_high >> 18) & 15) != 6); + + rdmsr(MSR_IA32_VMX_MISC, vmx_msr_low, vmx_msr_high); + /* 16-byte entries in 512-entry steps */ + vmx_msr_max_order = ((vmx_msr_low >> 25) & 7) + 13 - PAGE_SHIFT; } static struct vmcs_struct *vmx_alloc_vmcs(void) @@ -378,7 +382,7 @@ static void vmx_set_host_env(struct vcpu #define GUEST_SEGMENT_LIMIT 0xffffffff -static void construct_vmcs(struct vcpu *v) +static int construct_vmcs(struct vcpu *v) { unsigned long cr0, cr4; union vmcs_arbytes arbytes; @@ -394,8 +398,22 @@ static void construct_vmcs(struct vcpu * if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control); + /* MSR access bitmap. */ if ( cpu_has_vmx_msr_bitmap ) - __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap)); + { + char *msr_bitmap = alloc_xenheap_page(); + + if ( msr_bitmap == NULL) + return -ENOMEM; + memset(msr_bitmap, ~0, PAGE_SIZE); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP); + v->arch.hvm_vmx.msr_bitmap = msr_bitmap; + __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); + } /* I/O access bitmap. */ __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap)); @@ -427,10 +445,8 @@ static void construct_vmcs(struct vcpu * __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); /* MSR intercepts. */ - __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0); - __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0); - __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); __vmwrite(VM_ENTRY_INTR_INFO, 0); @@ -537,6 +553,131 @@ static void construct_vmcs(struct vcpu * paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ vmx_vlapic_msr_changed(v); + + return 0; +} + +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val) +{ + unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; + const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + { + *val = msr_area[i].data; + return 0; + } + + return -ESRCH; +} + +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val) +{ + unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + { + msr_area[i].data = val; + return 0; + } + + return -ESRCH; +} + +int vmx_add_guest_msr(struct vcpu *v, u32 msr) +{ + unsigned int i, order; + unsigned int msr_count = v->arch.hvm_vmx.msr_count; + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + return 0; + + order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); + if ( order > vmx_msr_max_order ) + return -ENOSPC; + + if ( v->arch.hvm_vmx.msr_order < order ) + { + if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) + return -ENOMEM; + if ( v->arch.hvm_vmx.msr_order ) + { + memcpy(msr_area, + v->arch.hvm_vmx.msr_area, + msr_count * sizeof(*msr_area)); + free_xenheap_pages(v->arch.hvm_vmx.msr_area, + v->arch.hvm_vmx.msr_order); + } +#ifdef __i386__ + else + { + __vmwrite(VM_EXIT_MSR_STORE_ADDR_HIGH, 0); + __vmwrite(VM_ENTRY_MSR_LOAD_ADDR_HIGH, 0); + } +#endif + v->arch.hvm_vmx.msr_area = msr_area; + v->arch.hvm_vmx.msr_order = order; + __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area)); + __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); + } + + msr_area[msr_count].index = msr; + msr_area[msr_count].mbz = 0; + msr_area[msr_count].data = 0; + v->arch.hvm_vmx.msr_count = ++msr_count; + __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count); + __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count); + + return 0; +} + +int vmx_add_host_load_msr(struct vcpu *v, u32 msr) +{ + unsigned int i, order; + unsigned int msr_count = v->arch.hvm_vmx.host_msr_count; + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + return 0; + + order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); + if ( order > vmx_msr_max_order ) + return -ENOSPC; + + if ( v->arch.hvm_vmx.host_msr_order < order ) + { + if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) + return -ENOMEM; + if ( v->arch.hvm_vmx.host_msr_order ) + { + memcpy(msr_area, + v->arch.hvm_vmx.host_msr_area, + msr_count * sizeof(*msr_area)); + free_xenheap_pages(v->arch.hvm_vmx.host_msr_area, + v->arch.hvm_vmx.host_msr_order); + } +#ifdef __i386__ + else + __vmwrite(VM_EXIT_MSR_LOAD_ADDR_HIGH, 0); +#endif + v->arch.hvm_vmx.host_msr_area = msr_area; + v->arch.hvm_vmx.host_msr_order = order; + __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); + } + + msr_area[msr_count].index = msr; + msr_area[msr_count].mbz = 0; + rdmsrl(msr, msr_area[msr_count].data); + v->arch.hvm_vmx.host_msr_count = ++msr_count; + __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count); + + return 0; } int vmx_create_vmcs(struct vcpu *v) Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c =================================================================== --- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 11:45:25.000000000 +0200 +++ 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 11:56:05.000000000 +0200 @@ -53,7 +53,7 @@ enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised }; -char *vmx_msr_bitmap; +unsigned int vmx_msr_max_order = 0; static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); @@ -1170,26 +1170,6 @@ static int vmx_event_pending(struct vcpu return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK); } -static void disable_intercept_for_msr(u32 msr) -{ - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if ( msr <= 0x1fff ) - { - __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */ - __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */ - } - else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) - { - msr &= 0x1fff; - __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */ - __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */ - } -} - static struct hvm_function_table vmx_function_table = { .name = "VMX", .domain_initialise = vmx_domain_initialise, @@ -1259,21 +1239,6 @@ void start_vmx(void) setup_vmcs_dump(); hvm_enable(&vmx_function_table); - - if ( cpu_has_vmx_msr_bitmap ) - { - printk("VMX: MSR intercept bitmap enabled\n"); - vmx_msr_bitmap = alloc_xenheap_page(); - BUG_ON(vmx_msr_bitmap == NULL); - memset(vmx_msr_bitmap, ~0, PAGE_SIZE); - - disable_intercept_for_msr(MSR_FS_BASE); - disable_intercept_for_msr(MSR_GS_BASE); - - disable_intercept_for_msr(MSR_IA32_SYSENTER_CS); - disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP); - disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP); - } } /* @@ -1380,7 +1345,10 @@ static void vmx_do_cpuid(struct cpu_user bitmaskof(X86_FEATURE_ACC)); /* Unsupported for virtualised CPUs. */ - ecx &= ~(bitmaskof(X86_FEATURE_PDCM)); + ecx &= ~(bitmaskof(X86_FEATURE_PDCM) | + bitmaskof(X86_FEATURE_DSCPL)); + + edx &= ~bitmaskof(X86_FEATURE_DTES); break; @@ -2572,6 +2540,82 @@ static int vmx_cr_access(unsigned long e return 1; } +static const struct lbr_info { + u32 base, count; +} p4_lbr[] = { + { MSR_P4_LER_FROM_LIP, 1 }, + { MSR_P4_LER_TO_LIP, 1 }, + { MSR_P4_LASTBRANCH_TOS, 1 }, + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, + { 0, 0 } +}, c2_lbr[] = { + { MSR_IA32_LASTINTFROMIP, 1 }, + { MSR_IA32_LASTINTTOIP, 1 }, + { MSR_P6_LASTBRANCH_TOS, 1 }, + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, + { 0, 0 } +#ifdef __i386__ +}, pm_lbr[] = { + { MSR_IA32_LASTINTFROMIP, 1 }, + { MSR_IA32_LASTINTTOIP, 1 }, + { MSR_P6_LASTBRANCH_TOS, 1 }, + { MSR_PM_LASTBRANCH_0, NUM_MSR_PM_LASTBRANCH }, + { 0, 0 } +#endif +}; + +static const struct lbr_info *last_branch_msr_get(void) +{ + switch ( boot_cpu_data.x86 ) + { + case 6: + switch ( boot_cpu_data.x86_model ) + { +#ifdef __i386__ + /* PentiumM */ + case 9: case 13: + /* Core Solo/Duo */ + case 14: + return pm_lbr; + break; +#endif + /* Core2 Duo */ + case 15: + return c2_lbr; + break; + } + break; + + case 15: + switch ( boot_cpu_data.x86_model ) + { + /* Pentium4/Xeon with em64t */ + case 3: case 4: case 6: + return p4_lbr; + break; + } + break; + } + + return NULL; +} + +static int last_branch_msr(u32 ecx) +{ + const struct lbr_info *lbr = last_branch_msr_get(); + + if ( lbr != NULL ) + { + for ( ; lbr->count; ++lbr ) + if ( ecx >= lbr->base && ecx < lbr->base + lbr->count ) + return 1; + } + + return 0; +} + static int vmx_do_msr_read(struct cpu_user_regs *regs) { u64 msr_content = 0; @@ -2597,6 +2641,10 @@ static int vmx_do_msr_read(struct cpu_us case MSR_IA32_APICBASE: msr_content = vcpu_vlapic(v)->hw.apic_base_msr; break; + case MSR_IA32_DEBUGCTLMSR: + if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0) + msr_content = 0; + break; case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; case MSR_IA32_MCG_STATUS: @@ -2610,6 +2658,15 @@ static int vmx_do_msr_read(struct cpu_us msr_content = 0; break; default: + if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0) + break; + + if ( last_branch_msr(ecx) ) + { + msr_content = 0; + break; + } + switch ( long_mode_do_msr_read(regs) ) { case HNDL_unhandled: @@ -2736,13 +2793,50 @@ static int vmx_do_msr_write(struct cpu_u case MSR_IA32_APICBASE: vlapic_msr_set(vcpu_vlapic(v), msr_content); break; + case MSR_IA32_DEBUGCTLMSR: + if ( msr_content & ~3 ) + break; + if ( msr_content ) + { + int rc = 0; + + if ( msr_content & 1 ) + { + const struct lbr_info *lbr = last_branch_msr_get(); + + if ( lbr == NULL ) + break; + for ( ; rc == 0 && lbr->count; ++lbr ) + { + u32 i; + + for ( i = 0; rc == 0 && i < lbr->count; ++i ) + { + rc = vmx_add_guest_msr(v, lbr->base + i); + if ( rc == 0 && cpu_has_vmx_msr_bitmap ) + vmx_disable_intercept_for_msr(v->arch.hvm_vmx.msr_bitmap, + lbr->base + i); + } + } + } + + if ( rc < 0 || + vmx_add_guest_msr(v, ecx) < 0 || + vmx_add_host_load_msr(v, ecx) < 0) + vmx_inject_hw_exception(v, TRAP_machine_check, 0); + else + vmx_write_guest_msr(v, ecx, msr_content); + } + break; case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; default: switch ( long_mode_do_msr_write(regs) ) { case HNDL_unhandled: - wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); + if ( vmx_write_guest_msr(v, ecx, msr_content) != 0 && + !last_branch_msr(ecx) ) + wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); break; case HNDL_exception_raised: return 0; Index: 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h =================================================================== --- 2007-08-08.orig/xen/include/asm-x86/hvm/vmx/vmcs.h 2007-08-06 15:08:41.000000000 +0200 +++ 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h 2007-08-08 11:45:33.000000000 +0200 @@ -35,6 +35,13 @@ struct vmcs_struct { unsigned char data [0]; /* vmcs size is read from MSR */ }; +struct vmx_msr_entry { + u32 index; + u32 mbz; + u64 data; +}; +extern unsigned int vmx_msr_max_order; + enum { VMX_INDEX_MSR_LSTAR = 0, VMX_INDEX_MSR_STAR, @@ -79,6 +86,14 @@ struct arch_vmx_struct { #endif unsigned long efer; + char *msr_bitmap; + unsigned int msr_order; + unsigned int msr_count; + struct vmx_msr_entry *msr_area; + unsigned int host_msr_order; + unsigned int host_msr_count; + struct vmx_msr_entry *host_msr_area; + /* Following fields are all specific to vmxassist. */ unsigned long vmxassist_enabled:1; unsigned long irqbase_mode:1; @@ -137,7 +152,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS) #define cpu_has_vmx_msr_bitmap \ (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP) -extern char *vmx_msr_bitmap; /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define VMX_INTR_SHADOW_STI 0x00000001 @@ -274,6 +288,31 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; +static inline void vmx_disable_intercept_for_msr(char *msr_bitmap, u32 msr) +{ + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if ( msr <= 0x1fff ) + { + __clear_bit(msr, msr_bitmap + 0x000); /* read-low */ + __clear_bit(msr, msr_bitmap + 0x800); /* write-low */ + } + else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) + { + msr &= 0x1fff; + __clear_bit(msr, msr_bitmap + 0x400); /* read-high */ + __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */ + } +} + +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val); +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val); +int vmx_add_guest_msr(struct vcpu *v, u32 msr); +int vmx_add_host_load_msr(struct vcpu *v, u32 msr); + #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ /* Index: 2007-08-08/xen/include/asm-x86/msr.h =================================================================== --- 2007-08-08.orig/xen/include/asm-x86/msr.h 2007-08-08 11:43:53.000000000 +0200 +++ 2007-08-08/xen/include/asm-x86/msr.h 2007-08-08 11:45:33.000000000 +0200 @@ -200,6 +200,13 @@ static inline void write_efer(__u64 val) #define MSR_P6_EVNTSEL0 0x186 #define MSR_P6_EVNTSEL1 0x187 +#define MSR_P6_LASTBRANCH_TOS 0x1c9 +#define MSR_PM_LASTBRANCH_0 0x40 +#define NUM_MSR_PM_LASTBRANCH 8 +#define MSR_C2_LASTBRANCH_0_FROM_IP 0x40 +#define MSR_C2_LASTBRANCH_0_TO_IP 0x60 +#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4 + #define MSR_IA32_PERF_STATUS 0x198 #define MSR_IA32_PERF_CTL 0x199 @@ -223,6 +230,8 @@ static inline void write_efer(__u64 val) #define MSR_IA32_MC0_ADDR 0x402 #define MSR_IA32_MC0_MISC 0x403 +#define MSR_IA32_DS_AREA 0x600 + /* K8 Machine Check MSRs */ #define MSR_K8_MC1_CTL 0x404 #define MSR_K8_MC1_STATUS 0x405 @@ -333,6 +342,15 @@ static inline void write_efer(__u64 val) #define MSR_P4_U2L_ESCR0 0x3b0 #define MSR_P4_U2L_ESCR1 0x3b1 +#define MSR_P4_LER_FROM_LIP 0x1d7 +#define MSR_P4_LER_TO_LIP 0x1d8 +#define MSR_P4_LASTBRANCH_TOS 0x1da +#define MSR_P4_LASTBRANCH_0 0x1db +#define NUM_MSR_P4_LASTBRANCH 4 +#define MSR_P4_LASTBRANCH_0_FROM_LIP 0x680 +#define MSR_P4_LASTBRANCH_0_TO_LIP 0x6c0 +#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16 + #define MSR_K6_WHCR 0xC0000082 #define MSR_K6_UWCCR 0xC0000085 #define MSR_K6_EPMR 0xC0000086 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |