|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v11 for-xen-4.5 19/20] x86/VPMU: NMI-based VPMU support
On Mon, Sep 22, 2014 at 07:58:00PM -0400, Boris Ostrovsky wrote:
> Add support for using NMIs as PMU interrupts.
Could you explain what the benefits are vs using the regular vector?
Any why one would wnat to use this?
>
> Most of processing is still performed by vpmu_do_interrupt(). However, since
> certain operations are not NMI-safe we defer them to a softint that
> vpmu_do_interrupt()
> will schedule:
> * For PV guests that would be send_guest_vcpu_virq()
> * For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the
> later
> can be called in privileged profiling mode when the interrupted guest is an
> HVM one).
>
> With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and
> vlapic
> accesses for HVM moved to sofint, the only routines/macros that
> vpmu_do_interrupt()
> calls in NMI mode are:
> * memcpy()
> * querying domain type (is_XX_domain())
> * guest_cpu_user_regs()
> * XLAT_cpu_user_regs()
> * raise_softirq()
> * vcpu_vpmu()
> * vpmu_ops->arch_vpmu_save()
> * vpmu_ops->do_interrupt() (in the future for PVH support)
I think you can drop the 'in the future for PVH support' as the codebase until
now looks to have the right bits for PVH support?
>
> The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe versions
> which would not be NMI-safe).
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> ---
> xen/arch/x86/hvm/svm/vpmu.c | 3 +-
> xen/arch/x86/hvm/vmx/vpmu_core2.c | 3 +-
> xen/arch/x86/hvm/vpmu.c | 184
> ++++++++++++++++++++++++++++++--------
> xen/include/asm-x86/hvm/vpmu.h | 4 +-
> xen/include/xen/softirq.h | 1 +
> 5 files changed, 156 insertions(+), 39 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 055b21c..9db0559 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -169,7 +169,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
> msr_bitmap_off(vpmu);
> }
>
> -static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
> +static int amd_vpmu_do_interrupt(const struct cpu_user_regs *regs)
> {
> return 1;
> }
> @@ -224,6 +224,7 @@ static inline void context_save(struct vcpu *v)
> rdmsrl(counters[i], counter_regs[i]);
> }
>
> +/* Must be NMI-safe */
> static int amd_vpmu_save(struct vcpu *v)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index ff0f890..37266ca 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -300,6 +300,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
> rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
> }
>
> +/* Must be NMI-safe */
> static int core2_vpmu_save(struct vcpu *v)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> @@ -707,7 +708,7 @@ static void core2_vpmu_dump(const struct vcpu *v)
> }
> }
>
> -static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
> +static int core2_vpmu_do_interrupt(const struct cpu_user_regs *regs)
> {
> struct vcpu *v = current;
> u64 msr_content;
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index 1e0575a..52f5ce8 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -34,6 +34,7 @@
> #include <asm/hvm/svm/svm.h>
> #include <asm/hvm/svm/vmcb.h>
> #include <asm/apic.h>
> +#include <asm/nmi.h>
> #include <public/pmu.h>
> #include <xen/tasklet.h>
>
> @@ -53,36 +54,57 @@ uint64_t __read_mostly vpmu_features = 0;
> static void parse_vpmu_param(char *s);
> custom_param("vpmu", parse_vpmu_param);
>
> +static void pmu_softnmi(void);
> +
> static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
> +static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu);
> +
> +static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR;
>
> static void __init parse_vpmu_param(char *s)
> {
> - switch ( parse_bool(s) )
> - {
> - case 0:
> - break;
> - default:
> - if ( !strcmp(s, "bts") )
> - vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
> - else if ( *s )
> + char *ss;
> +
> + vpmu_mode = XENPMU_MODE_SELF;
> + if (*s == '\0')
> + return;
> +
> + do {
> + ss = strchr(s, ',');
> + if ( ss )
> + *ss = '\0';
> +
> + switch ( parse_bool(s) )
> {
> - printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
> - break;
> + case 0:
> + vpmu_mode = XENPMU_MODE_OFF;
> + /* FALLTHROUGH */
> + case 1:
> + return;
> + default:
> + if ( !strcmp(s, "nmi") )
> + vpmu_interrupt_type = APIC_DM_NMI;
> + else if ( !strcmp(s, "bts") )
> + vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
> + else
> + {
> + printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
> + vpmu_mode = XENPMU_MODE_OFF;
> + return;
> + }
> }
> - /* fall through */
> - case 1:
> - /* Default VPMU mode */
> - vpmu_mode = XENPMU_MODE_SELF;
> - break;
> - }
> +
> + s = ss + 1;
> + } while ( ss );
> }
>
> +
> void vpmu_lvtpc_update(uint32_t val)
> {
> struct vcpu *curr = current;
> struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>
> - vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> + vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED);
>
> /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> if ( is_hvm_domain(curr->domain) ||
> @@ -90,6 +112,24 @@ void vpmu_lvtpc_update(uint32_t val)
> apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> }
>
> +static void vpmu_send_interrupt(struct vcpu *v)
> +{
> + struct vlapic *vlapic;
> + u32 vlapic_lvtpc;
> +
> + ASSERT( is_hvm_vcpu(v) );
> +
> + vlapic = vcpu_vlapic(v);
> + if ( !is_vlapic_lvtpc_enabled(vlapic) )
> + return;
> +
> + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> + if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> + vlapic_set_irq(vcpu_vlapic(v), vlapic_lvtpc & APIC_VECTOR_MASK, 0);
> + else
> + v->nmi_pending = 1;
> +}
> +
> int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
> uint64_t supported, bool_t is_write)
> {
> @@ -150,7 +190,8 @@ static struct vcpu *choose_hwdom_vcpu(void)
> return v;
> }
>
> -int vpmu_do_interrupt(struct cpu_user_regs *regs)
> +/* This routine may be called in NMI context */
> +int vpmu_do_interrupt(const struct cpu_user_regs *regs)
> {
> struct vcpu *sampled = current, *sampling;
> struct vpmu_struct *vpmu;
> @@ -231,8 +272,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
> if ( sampled->arch.flags & TF_kernel_mode )
> r->cs &= ~3;
> }
> - else
> + else if ( !(vpmu_interrupt_type & APIC_DM_NMI) )
> {
> + /* Unsafe in NMI context, defer to softint later */
> struct segment_register seg_cs;
>
> hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> @@ -250,30 +292,30 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
> vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>
> - send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> + if ( vpmu_interrupt_type & APIC_DM_NMI )
> + {
> + this_cpu(sampled_vcpu) = sampled;
> + raise_softirq(PMU_SOFTIRQ);
> + }
> + else
> + send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
>
> return 1;
> }
>
> if ( vpmu->arch_vpmu_ops )
> {
> - struct vlapic *vlapic = vcpu_vlapic(sampling);
> - u32 vlapic_lvtpc;
> - unsigned char int_vec;
> -
> if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> return 0;
>
> - if ( !is_vlapic_lvtpc_enabled(vlapic) )
> - return 1;
> -
> - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> - int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
> -
> - if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> - vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0);
> + if ( vpmu_interrupt_type & APIC_DM_NMI )
> + {
> + this_cpu(sampled_vcpu) = sampled;
> + raise_softirq(PMU_SOFTIRQ);
> + }
> else
> - sampling->nmi_pending = 1;
> + vpmu_send_interrupt(sampling);
> +
> return 1;
> }
>
> @@ -306,6 +348,8 @@ static void vpmu_save_force(void *arg)
> vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
>
> per_cpu(last_vcpu, smp_processor_id()) = NULL;
> +
> + pmu_softnmi();
> }
>
> void vpmu_save(struct vcpu *v)
> @@ -323,7 +367,10 @@ void vpmu_save(struct vcpu *v)
> if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
> vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>
> - apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
> + apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED);
> +
> + /* Make sure there are no outstanding PMU NMIs */
Could this comment be replicated across all of the callers of
of this function?
> + pmu_softnmi();
> }
>
> void vpmu_load(struct vcpu *v)
> @@ -377,6 +424,8 @@ void vpmu_load(struct vcpu *v)
> (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
> return;
>
> + pmu_softnmi();
> +
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
> {
> apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> @@ -399,7 +448,7 @@ void vpmu_initialise(struct vcpu *v)
> vpmu_destroy(v);
> vpmu_clear(vpmu);
> vpmu->context = NULL;
> - vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
> + vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | APIC_LVT_MASKED;
>
> switch ( vendor )
> {
> @@ -435,11 +484,55 @@ void vpmu_destroy(struct vcpu *v)
> }
> }
>
> +/* Process the softirq set by PMU NMI handler */
> +static void pmu_softnmi(void)
> +{
> + struct vcpu *v, *sampled = this_cpu(sampled_vcpu);
> +
> + if ( sampled == NULL )
> + return;
Add a new line here.
> + this_cpu(sampled_vcpu) = NULL;
Andrew mentioned to me that doing a single 'this_cpu' means
using 'smp_processor_id()'. And since we are doing this twice, we could
just introduce 'unsigned int cpu = smp_processor_id()' and
then use 'per_cpu(samples_vcpu, cpu)' in this code.
> +
> + if ( (vpmu_mode & XENPMU_MODE_ALL) ||
> + (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
> + {
> + v = choose_hwdom_vcpu();
> + if ( !v )
> + return;
> + }
> + else
> + {
> + if ( is_hvm_domain(sampled->domain) )
> + {
> + vpmu_send_interrupt(sampled);
> + return;
> + }
> + v = sampled;
> + }
> +
> + if ( has_hvm_container_domain(sampled->domain) )
> + {
> + struct segment_register seg_cs;
> +
> + hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> + v->arch.vpmu.xenpmu_data->pmu.r.regs.cs = seg_cs.sel;
> + }
> +
> + send_guest_vcpu_virq(v, VIRQ_XENPMU);
> +}
> +
> +int pmu_nmi_interrupt(const struct cpu_user_regs *regs, int cpu)
> +{
> + return vpmu_do_interrupt(regs);
> +}
> +
> static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
> {
> struct vcpu *v;
> struct page_info *page;
> uint64_t gfn = params->val;
> + static bool_t __read_mostly pvpmu_init_done;
> + static DEFINE_SPINLOCK(init_lock);
>
> if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) ||
> (d->vcpu[params->vcpu] == NULL) )
> @@ -463,6 +556,27 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
> *params)
> return -EINVAL;
> }
>
> + spin_lock(&init_lock);
> +
> + if ( !pvpmu_init_done )
> + {
> + if ( reserve_lapic_nmi() != 0 )
> + {
> + spin_unlock(&init_lock);
> + printk(XENLOG_G_ERR "Failed to reserve PMU NMI\n");
> + put_page(page);
> + return -EBUSY;
> + }
> +
> + set_nmi_callback(pmu_nmi_interrupt);
> +
> + open_softirq(PMU_SOFTIRQ, pmu_softnmi);
> +
> + pvpmu_init_done = 1;
> + }
> +
> + spin_unlock(&init_lock);
> +
> vpmu_initialise(v);
>
> return 0;
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
> index a6e7934..7d86f64 100644
> --- a/xen/include/asm-x86/hvm/vpmu.h
> +++ b/xen/include/asm-x86/hvm/vpmu.h
> @@ -42,7 +42,7 @@ struct arch_vpmu_ops {
> int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
> uint64_t supported);
> int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
> - int (*do_interrupt)(struct cpu_user_regs *regs);
> + int (*do_interrupt)(const struct cpu_user_regs *regs);
> void (*do_cpuid)(unsigned int input,
> unsigned int *eax, unsigned int *ebx,
> unsigned int *ecx, unsigned int *edx);
> @@ -98,7 +98,7 @@ static inline bool_t vpmu_are_all_set(const struct
> vpmu_struct *vpmu,
> void vpmu_lvtpc_update(uint32_t val);
> int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
> uint64_t supported, bool_t is_write);
> -int vpmu_do_interrupt(struct cpu_user_regs *regs);
> +int vpmu_do_interrupt(const struct cpu_user_regs *regs);
> void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
> unsigned int *ecx, unsigned int *edx);
> void vpmu_initialise(struct vcpu *v);
> diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
> index 0c0d481..5829fa4 100644
> --- a/xen/include/xen/softirq.h
> +++ b/xen/include/xen/softirq.h
> @@ -8,6 +8,7 @@ enum {
> NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
> RCU_SOFTIRQ,
> TASKLET_SOFTIRQ,
> + PMU_SOFTIRQ,
> NR_COMMON_SOFTIRQS
> };
>
> --
> 1.8.1.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |