Xen project Mailing List

Re: [Xen-devel] [PATCH v11 for-xen-4.5 19/20] x86/VPMU: NMI-based VPMU support

On Mon, Sep 22, 2014 at 07:58:00PM -0400, Boris Ostrovsky wrote: > Add support for using NMIs as PMU interrupts. Could you explain what the benefits are vs using the regular vector? Any why one would wnat to use this? > > Most of processing is still performed by vpmu_do_interrupt(). However, since > certain operations are not NMI-safe we defer them to a softint that > vpmu_do_interrupt() > will schedule: > * For PV guests that would be send_guest_vcpu_virq() > * For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the > later > can be called in privileged profiling mode when the interrupted guest is an > HVM one). > > With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and > vlapic > accesses for HVM moved to sofint, the only routines/macros that > vpmu_do_interrupt() > calls in NMI mode are: > * memcpy() > * querying domain type (is_XX_domain()) > * guest_cpu_user_regs() > * XLAT_cpu_user_regs() > * raise_softirq() > * vcpu_vpmu() > * vpmu_ops->arch_vpmu_save() > * vpmu_ops->do_interrupt() (in the future for PVH support) I think you can drop the 'in the future for PVH support' as the codebase until now looks to have the right bits for PVH support? > > The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe versions > which would not be NMI-safe). > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> > --- > xen/arch/x86/hvm/svm/vpmu.c | 3 +- > xen/arch/x86/hvm/vmx/vpmu_core2.c | 3 +- > xen/arch/x86/hvm/vpmu.c | 184 > ++++++++++++++++++++++++++++++-------- > xen/include/asm-x86/hvm/vpmu.h | 4 +- > xen/include/xen/softirq.h | 1 + > 5 files changed, 156 insertions(+), 39 deletions(-) > > diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c > index 055b21c..9db0559 100644 > --- a/xen/arch/x86/hvm/svm/vpmu.c > +++ b/xen/arch/x86/hvm/svm/vpmu.c > @@ -169,7 +169,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v) > msr_bitmap_off(vpmu); > } > > -static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs) > +static int amd_vpmu_do_interrupt(const struct cpu_user_regs *regs) > { > return 1; > } > @@ -224,6 +224,7 @@ static inline void context_save(struct vcpu *v) > rdmsrl(counters[i], counter_regs[i]); > } > > +/* Must be NMI-safe */ > static int amd_vpmu_save(struct vcpu *v) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c > b/xen/arch/x86/hvm/vmx/vpmu_core2.c > index ff0f890..37266ca 100644 > --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c > +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c > @@ -300,6 +300,7 @@ static inline void __core2_vpmu_save(struct vcpu *v) > rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); > } > > +/* Must be NMI-safe */ > static int core2_vpmu_save(struct vcpu *v) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > @@ -707,7 +708,7 @@ static void core2_vpmu_dump(const struct vcpu *v) > } > } > > -static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) > +static int core2_vpmu_do_interrupt(const struct cpu_user_regs *regs) > { > struct vcpu *v = current; > u64 msr_content; > diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c > index 1e0575a..52f5ce8 100644 > --- a/xen/arch/x86/hvm/vpmu.c > +++ b/xen/arch/x86/hvm/vpmu.c > @@ -34,6 +34,7 @@ > #include <asm/hvm/svm/svm.h> > #include <asm/hvm/svm/vmcb.h> > #include <asm/apic.h> > +#include <asm/nmi.h> > #include <public/pmu.h> > #include <xen/tasklet.h> > > @@ -53,36 +54,57 @@ uint64_t __read_mostly vpmu_features = 0; > static void parse_vpmu_param(char *s); > custom_param("vpmu", parse_vpmu_param); > > +static void pmu_softnmi(void); > + > static DEFINE_PER_CPU(struct vcpu *, last_vcpu); > +static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu); > + > +static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR; > > static void __init parse_vpmu_param(char *s) > { > - switch ( parse_bool(s) ) > - { > - case 0: > - break; > - default: > - if ( !strcmp(s, "bts") ) > - vpmu_features |= XENPMU_FEATURE_INTEL_BTS; > - else if ( *s ) > + char *ss; > + > + vpmu_mode = XENPMU_MODE_SELF; > + if (*s == '\0') > + return; > + > + do { > + ss = strchr(s, ','); > + if ( ss ) > + *ss = '\0'; > + > + switch ( parse_bool(s) ) > { > - printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); > - break; > + case 0: > + vpmu_mode = XENPMU_MODE_OFF; > + /* FALLTHROUGH */ > + case 1: > + return; > + default: > + if ( !strcmp(s, "nmi") ) > + vpmu_interrupt_type = APIC_DM_NMI; > + else if ( !strcmp(s, "bts") ) > + vpmu_features |= XENPMU_FEATURE_INTEL_BTS; > + else > + { > + printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); > + vpmu_mode = XENPMU_MODE_OFF; > + return; > + } > } > - /* fall through */ > - case 1: > - /* Default VPMU mode */ > - vpmu_mode = XENPMU_MODE_SELF; > - break; > - } > + > + s = ss + 1; > + } while ( ss ); > } > > + > void vpmu_lvtpc_update(uint32_t val) > { > struct vcpu *curr = current; > struct vpmu_struct *vpmu = vcpu_vpmu(curr); > > - vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); > + vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED); > > /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ > if ( is_hvm_domain(curr->domain) || > @@ -90,6 +112,24 @@ void vpmu_lvtpc_update(uint32_t val) > apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > } > > +static void vpmu_send_interrupt(struct vcpu *v) > +{ > + struct vlapic *vlapic; > + u32 vlapic_lvtpc; > + > + ASSERT( is_hvm_vcpu(v) ); > + > + vlapic = vcpu_vlapic(v); > + if ( !is_vlapic_lvtpc_enabled(vlapic) ) > + return; > + > + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); > + if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) > + vlapic_set_irq(vcpu_vlapic(v), vlapic_lvtpc & APIC_VECTOR_MASK, 0); > + else > + v->nmi_pending = 1; > +} > + > int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, > uint64_t supported, bool_t is_write) > { > @@ -150,7 +190,8 @@ static struct vcpu *choose_hwdom_vcpu(void) > return v; > } > > -int vpmu_do_interrupt(struct cpu_user_regs *regs) > +/* This routine may be called in NMI context */ > +int vpmu_do_interrupt(const struct cpu_user_regs *regs) > { > struct vcpu *sampled = current, *sampling; > struct vpmu_struct *vpmu; > @@ -231,8 +272,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs) > if ( sampled->arch.flags & TF_kernel_mode ) > r->cs &= ~3; > } > - else > + else if ( !(vpmu_interrupt_type & APIC_DM_NMI) ) > { > + /* Unsafe in NMI context, defer to softint later */ > struct segment_register seg_cs; > > hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs); > @@ -250,30 +292,30 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs) > vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; > apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > > - send_guest_vcpu_virq(sampling, VIRQ_XENPMU); > + if ( vpmu_interrupt_type & APIC_DM_NMI ) > + { > + this_cpu(sampled_vcpu) = sampled; > + raise_softirq(PMU_SOFTIRQ); > + } > + else > + send_guest_vcpu_virq(sampling, VIRQ_XENPMU); > > return 1; > } > > if ( vpmu->arch_vpmu_ops ) > { > - struct vlapic *vlapic = vcpu_vlapic(sampling); > - u32 vlapic_lvtpc; > - unsigned char int_vec; > - > if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ) > return 0; > > - if ( !is_vlapic_lvtpc_enabled(vlapic) ) > - return 1; > - > - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); > - int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; > - > - if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) > - vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0); > + if ( vpmu_interrupt_type & APIC_DM_NMI ) > + { > + this_cpu(sampled_vcpu) = sampled; > + raise_softirq(PMU_SOFTIRQ); > + } > else > - sampling->nmi_pending = 1; > + vpmu_send_interrupt(sampling); > + > return 1; > } > > @@ -306,6 +348,8 @@ static void vpmu_save_force(void *arg) > vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); > > per_cpu(last_vcpu, smp_processor_id()) = NULL; > + > + pmu_softnmi(); > } > > void vpmu_save(struct vcpu *v) > @@ -323,7 +367,10 @@ void vpmu_save(struct vcpu *v) > if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) > vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); > > - apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); > + apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED); > + > + /* Make sure there are no outstanding PMU NMIs */ Could this comment be replicated across all of the callers of of this function? > + pmu_softnmi(); > } > > void vpmu_load(struct vcpu *v) > @@ -377,6 +424,8 @@ void vpmu_load(struct vcpu *v) > (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) ) > return; > > + pmu_softnmi(); > + > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) > { > apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > @@ -399,7 +448,7 @@ void vpmu_initialise(struct vcpu *v) > vpmu_destroy(v); > vpmu_clear(vpmu); > vpmu->context = NULL; > - vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED; > + vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | APIC_LVT_MASKED; > > switch ( vendor ) > { > @@ -435,11 +484,55 @@ void vpmu_destroy(struct vcpu *v) > } > } > > +/* Process the softirq set by PMU NMI handler */ > +static void pmu_softnmi(void) > +{ > + struct vcpu *v, *sampled = this_cpu(sampled_vcpu); > + > + if ( sampled == NULL ) > + return; Add a new line here. > + this_cpu(sampled_vcpu) = NULL; Andrew mentioned to me that doing a single 'this_cpu' means using 'smp_processor_id()'. And since we are doing this twice, we could just introduce 'unsigned int cpu = smp_processor_id()' and then use 'per_cpu(samples_vcpu, cpu)' in this code. > + > + if ( (vpmu_mode & XENPMU_MODE_ALL) || > + (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) > + { > + v = choose_hwdom_vcpu(); > + if ( !v ) > + return; > + } > + else > + { > + if ( is_hvm_domain(sampled->domain) ) > + { > + vpmu_send_interrupt(sampled); > + return; > + } > + v = sampled; > + } > + > + if ( has_hvm_container_domain(sampled->domain) ) > + { > + struct segment_register seg_cs; > + > + hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs); > + v->arch.vpmu.xenpmu_data->pmu.r.regs.cs = seg_cs.sel; > + } > + > + send_guest_vcpu_virq(v, VIRQ_XENPMU); > +} > + > +int pmu_nmi_interrupt(const struct cpu_user_regs *regs, int cpu) > +{ > + return vpmu_do_interrupt(regs); > +} > + > static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) > { > struct vcpu *v; > struct page_info *page; > uint64_t gfn = params->val; > + static bool_t __read_mostly pvpmu_init_done; > + static DEFINE_SPINLOCK(init_lock); > > if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) || > (d->vcpu[params->vcpu] == NULL) ) > @@ -463,6 +556,27 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t > *params) > return -EINVAL; > } > > + spin_lock(&init_lock); > + > + if ( !pvpmu_init_done ) > + { > + if ( reserve_lapic_nmi() != 0 ) > + { > + spin_unlock(&init_lock); > + printk(XENLOG_G_ERR "Failed to reserve PMU NMI\n"); > + put_page(page); > + return -EBUSY; > + } > + > + set_nmi_callback(pmu_nmi_interrupt); > + > + open_softirq(PMU_SOFTIRQ, pmu_softnmi); > + > + pvpmu_init_done = 1; > + } > + > + spin_unlock(&init_lock); > + > vpmu_initialise(v); > > return 0; > diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h > index a6e7934..7d86f64 100644 > --- a/xen/include/asm-x86/hvm/vpmu.h > +++ b/xen/include/asm-x86/hvm/vpmu.h > @@ -42,7 +42,7 @@ struct arch_vpmu_ops { > int (*do_wrmsr)(unsigned int msr, uint64_t msr_content, > uint64_t supported); > int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content); > - int (*do_interrupt)(struct cpu_user_regs *regs); > + int (*do_interrupt)(const struct cpu_user_regs *regs); > void (*do_cpuid)(unsigned int input, > unsigned int *eax, unsigned int *ebx, > unsigned int *ecx, unsigned int *edx); > @@ -98,7 +98,7 @@ static inline bool_t vpmu_are_all_set(const struct > vpmu_struct *vpmu, > void vpmu_lvtpc_update(uint32_t val); > int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, > uint64_t supported, bool_t is_write); > -int vpmu_do_interrupt(struct cpu_user_regs *regs); > +int vpmu_do_interrupt(const struct cpu_user_regs *regs); > void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, > unsigned int *ecx, unsigned int *edx); > void vpmu_initialise(struct vcpu *v); > diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h > index 0c0d481..5829fa4 100644 > --- a/xen/include/xen/softirq.h > +++ b/xen/include/xen/softirq.h > @@ -8,6 +8,7 @@ enum { > NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, > RCU_SOFTIRQ, > TASKLET_SOFTIRQ, > + PMU_SOFTIRQ, > NR_COMMON_SOFTIRQS > }; > > -- > 1.8.1.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.