[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 7/9] x86/VPMU: Save/restore VPMU only when necessary
VPMU doesn't need to always be saved during context switch. If we are comming back to the same processor and no other VPCU has run here we can simply continue running. This is especailly useful on Intel processors where Global Control MSR is stored in VMCS, thus not requiring us to stop the counters during save operation. On AMD we need to explicitly stop the counters but we don't need to save them. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx> Tested-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx> --- xen/arch/x86/domain.c | 14 ++++++-- xen/arch/x86/hvm/svm/svm.c | 2 -- xen/arch/x86/hvm/svm/vpmu.c | 52 +++++++++++++++++++++++---- xen/arch/x86/hvm/vmx/vmx.c | 2 -- xen/arch/x86/hvm/vmx/vpmu_core2.c | 25 +++++++++++-- xen/arch/x86/hvm/vpmu.c | 74 +++++++++++++++++++++++++++++++++++---- xen/include/asm-x86/hvm/vpmu.h | 14 +++++--- 7 files changed, 156 insertions(+), 27 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 0d7a40c..6f9cbed 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1535,8 +1535,14 @@ void context_switch(struct vcpu *prev, struct vcpu *next) if (prev != next) update_runstate_area(prev); - if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) - pt_save_timer(prev); + if ( is_hvm_vcpu(prev) ) + { + if (prev != next) + vpmu_save(prev); + + if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) ) + pt_save_timer(prev); + } local_irq_disable(); @@ -1574,6 +1580,10 @@ void context_switch(struct vcpu *prev, struct vcpu *next) (next->domain->domain_id != 0)); } + if (is_hvm_vcpu(next) && (prev != next) ) + /* Must be done with interrupts enabled */ + vpmu_load(next); + context_saved(prev); if (prev != next) diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 89e47b3..8123f37 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -860,7 +860,6 @@ static void svm_ctxt_switch_from(struct vcpu *v) svm_fpu_leave(v); svm_save_dr(v); - vpmu_save(v); svm_lwp_save(v); svm_tsc_ratio_save(v); @@ -901,7 +900,6 @@ static void svm_ctxt_switch_to(struct vcpu *v) svm_vmsave(per_cpu(root_vmcb, cpu)); svm_vmload(vmcb); vmcb->cleanbits.bytes = 0; - vpmu_load(v); svm_lwp_load(v); svm_tsc_ratio_load(v); diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c index f2f2d12..efe2b7c 100644 --- a/xen/arch/x86/hvm/svm/vpmu.c +++ b/xen/arch/x86/hvm/svm/vpmu.c @@ -188,6 +188,21 @@ static inline void context_restore(struct vcpu *v) static void amd_vpmu_restore(struct vcpu *v) { + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + + vpmu_reset(vpmu, VPMU_FROZEN); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + unsigned int i; + + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], ctxt->ctrls[i]); + + return; + } + context_restore(v); } @@ -197,23 +212,40 @@ static inline void context_save(struct vcpu *v) struct vpmu_struct *vpmu = vcpu_vpmu(v); struct amd_vpmu_context *ctxt = vpmu->context; + /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */ for ( i = 0; i < num_counters; i++ ) - { - rdmsrl(ctrls[i], ctxt->ctrls[i]); - wrmsrl(ctrls[i], 0); rdmsrl(counters[i], ctxt->counters[i]); - } } -static void amd_vpmu_save(struct vcpu *v) +static int amd_vpmu_save(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); struct amd_vpmu_context *ctx = vpmu->context; + unsigned int i; + + /* + * Stop the counters. If we came here via vpmu_save_force (i.e. + * when VPMU_CONTEXT_SAVE is set) counters are already stopped. + */ + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + { + vpmu_set(vpmu, VPMU_FROZEN); + + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], 0); + + return 0; + } + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return 0; context_save(v); if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && ctx->msr_bitmap_set ) amd_vpmu_unset_msr_bitmap(v); + + return 1; } static void context_update(unsigned int msr, u64 msr_content) @@ -258,6 +290,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) return 1; vpmu_set(vpmu, VPMU_RUNNING); apic_write(APIC_LVTPC, PMU_APIC_VECTOR); + vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR; if ( !((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) amd_vpmu_set_msr_bitmap(v); @@ -268,16 +301,19 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) ) { apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); + vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED; vpmu_reset(vpmu, VPMU_RUNNING); if ( ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) amd_vpmu_unset_msr_bitmap(v); release_pmu_ownship(PMU_OWNER_HVM); } - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) { context_restore(v); vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); } /* Update vpmu context immediately */ @@ -293,10 +329,12 @@ static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) struct vcpu *v = current; struct vpmu_struct *vpmu = vcpu_vpmu(v); - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) { context_restore(v); vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); } rdmsrl(msr, *msr_content); diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index a869ed4..e36dbcb 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -615,7 +615,6 @@ static void vmx_ctxt_switch_from(struct vcpu *v) vmx_save_guest_msrs(v); vmx_restore_host_msrs(); vmx_save_dr(v); - vpmu_save(v); } static void vmx_ctxt_switch_to(struct vcpu *v) @@ -640,7 +639,6 @@ static void vmx_ctxt_switch_to(struct vcpu *v) vmx_restore_guest_msrs(v); vmx_restore_dr(v); - vpmu_load(v); } diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c index 6195bfc..9f152b4 100644 --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c @@ -307,17 +307,23 @@ static inline void __core2_vpmu_save(struct vcpu *v) rdmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter); } -static void core2_vpmu_save(struct vcpu *v) +static int core2_vpmu_save(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + return 0; + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return 0; + __core2_vpmu_save(v); /* Unset PMU MSR bitmap to trap lazy load. */ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && cpu_has_vmx_msr_bitmap ) core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); - return; + return 1; } static inline void __core2_vpmu_load(struct vcpu *v) @@ -338,6 +344,11 @@ static inline void __core2_vpmu_load(struct vcpu *v) static void core2_vpmu_load(struct vcpu *v) { + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return; + __core2_vpmu_load(v); } @@ -539,9 +550,15 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) /* Setup LVTPC in local apic */ if ( vpmu_is_set(vpmu, VPMU_RUNNING) && is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) ) + { apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR); + vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR; + } else + { apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); + vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED; + } core2_vpmu_save_msr_context(v, type, index, msr_content); if ( type != MSR_TYPE_GLOBAL ) @@ -616,6 +633,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) else return 0; } + return 1; } @@ -710,7 +728,8 @@ static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) } /* HW sets the MASK bit when performance counter interrupt occurs*/ - apic_write_around(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + vpmu->hw_lapic_lvtpc = apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED; + apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); return 1; } diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c index fa5329d..20a0841 100644 --- a/xen/arch/x86/hvm/vpmu.c +++ b/xen/arch/x86/hvm/vpmu.c @@ -18,7 +18,6 @@ * * Author: Haitao Shan <haitao.shan@xxxxxxxxx> */ - #include <xen/config.h> #include <xen/sched.h> #include <xen/xenoprof.h> @@ -42,6 +41,8 @@ static unsigned int __read_mostly opt_vpmu_enabled; static void parse_vpmu_param(char *s); custom_param("vpmu", parse_vpmu_param); +static DEFINE_PER_CPU(struct vcpu *, last_vcpu); + static void __init parse_vpmu_param(char *s) { switch ( parse_bool(s) ) @@ -121,30 +122,89 @@ void vpmu_do_cpuid(unsigned int input, vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx); } +static void vpmu_save_force(void *arg) +{ + struct vcpu *v = (struct vcpu *)arg; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return; + + if ( vpmu->arch_vpmu_ops ) + (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); + + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); + + per_cpu(last_vcpu, smp_processor_id()) = NULL; +} + void vpmu_save(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)) ) return; + vpmu->last_pcpu = pcpu; + per_cpu(last_vcpu, pcpu) = v; + if ( vpmu->arch_vpmu_ops ) - vpmu->arch_vpmu_ops->arch_vpmu_save(v); + if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - vpmu->hw_lapic_lvtpc = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); - - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); } void vpmu_load(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); + struct vcpu *prev = NULL; + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + /* First time this VCPU is running here */ + if ( vpmu->last_pcpu != pcpu ) + { + /* + * Get the context from last pcpu that we ran on. Note that if another + * VCPU is running there it must have saved this VPCU's context before + * startig to run (see below). + * There should be no race since remote pcpu will disable interrupts + * before saving the context. + */ + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + on_selected_cpus(cpumask_of(vpmu->last_pcpu), + vpmu_save_force, (void *)v, 1); + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + } + } + + /* Prevent forced context save from remote CPU */ + local_irq_disable(); + + prev = per_cpu(last_vcpu, pcpu); + + if (prev != v && prev) { + vpmu = vcpu_vpmu(prev); + + /* Someone ran here before us */ + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + vpmu_save_force(prev); + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + + vpmu = vcpu_vpmu(v); + } + + local_irq_enable(); /* Only when PMU is counting, we load PMU context immediately. */ - if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) && - vpmu_is_set(vpmu, VPMU_RUNNING)) ) + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) return; if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h index 01be976..03b9462 100644 --- a/xen/include/asm-x86/hvm/vpmu.h +++ b/xen/include/asm-x86/hvm/vpmu.h @@ -52,7 +52,7 @@ struct arch_vpmu_ops { unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); void (*arch_vpmu_destroy)(struct vcpu *v); - void (*arch_vpmu_save)(struct vcpu *v); + int (*arch_vpmu_save)(struct vcpu *v); void (*arch_vpmu_load)(struct vcpu *v); void (*arch_vpmu_dump)(struct vcpu *v); }; @@ -62,17 +62,23 @@ int svm_vpmu_initialise(struct vcpu *, unsigned int flags); struct vpmu_struct { u32 flags; + u32 last_pcpu; u32 hw_lapic_lvtpc; void *context; struct arch_vpmu_ops *arch_vpmu_ops; }; +/* VPMU states */ #define VPMU_CONTEXT_ALLOCATED 0x1 #define VPMU_CONTEXT_LOADED 0x2 #define VPMU_RUNNING 0x4 -#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x8 -#define VPMU_CPU_HAS_DS 0x10 /* Has Debug Store */ -#define VPMU_CPU_HAS_BTS 0x20 /* Has Branch Trace Store */ +#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ +#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */ +#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 + +/* VPMU features */ +#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */ +#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */ #define vpmu_set(_vpmu, _x) ((_vpmu)->flags |= (_x)) -- 1.8.1.2 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |