[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v11 for-xen-4.5 19/20] x86/VPMU: NMI-based VPMU support



On Mon, Sep 22, 2014 at 07:58:00PM -0400, Boris Ostrovsky wrote:
> Add support for using NMIs as PMU interrupts.

Could you explain what the benefits are vs using the regular vector?

Any why one would wnat to use this?

> 
> Most of processing is still performed by vpmu_do_interrupt(). However, since
> certain operations are not NMI-safe we defer them to a softint that 
> vpmu_do_interrupt()
> will schedule:
> * For PV guests that would be send_guest_vcpu_virq()
> * For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the 
> later
> can be called in privileged profiling mode when the interrupted guest is an 
> HVM one).
> 
> With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and 
> vlapic
> accesses for HVM moved to sofint, the only routines/macros that 
> vpmu_do_interrupt()
> calls in NMI mode are:
> * memcpy()
> * querying domain type (is_XX_domain())
> * guest_cpu_user_regs()
> * XLAT_cpu_user_regs()
> * raise_softirq()
> * vcpu_vpmu()
> * vpmu_ops->arch_vpmu_save()
> * vpmu_ops->do_interrupt() (in the future for PVH support)

I think you can drop the 'in the future for PVH support' as the codebase until
now looks to have the right bits for PVH support?

> 
> The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe versions
> which would not be NMI-safe).
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> ---
>  xen/arch/x86/hvm/svm/vpmu.c       |   3 +-
>  xen/arch/x86/hvm/vmx/vpmu_core2.c |   3 +-
>  xen/arch/x86/hvm/vpmu.c           | 184 
> ++++++++++++++++++++++++++++++--------
>  xen/include/asm-x86/hvm/vpmu.h    |   4 +-
>  xen/include/xen/softirq.h         |   1 +
>  5 files changed, 156 insertions(+), 39 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 055b21c..9db0559 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -169,7 +169,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
>      msr_bitmap_off(vpmu);
>  }
>  
> -static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
> +static int amd_vpmu_do_interrupt(const struct cpu_user_regs *regs)
>  {
>      return 1;
>  }
> @@ -224,6 +224,7 @@ static inline void context_save(struct vcpu *v)
>          rdmsrl(counters[i], counter_regs[i]);
>  }
>  
> +/* Must be NMI-safe */
>  static int amd_vpmu_save(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index ff0f890..37266ca 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -300,6 +300,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
>          rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
>  }
>  
> +/* Must be NMI-safe */
>  static int core2_vpmu_save(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> @@ -707,7 +708,7 @@ static void core2_vpmu_dump(const struct vcpu *v)
>      }
>  }
>  
> -static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
> +static int core2_vpmu_do_interrupt(const struct cpu_user_regs *regs)
>  {
>      struct vcpu *v = current;
>      u64 msr_content;
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index 1e0575a..52f5ce8 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -34,6 +34,7 @@
>  #include <asm/hvm/svm/svm.h>
>  #include <asm/hvm/svm/vmcb.h>
>  #include <asm/apic.h>
> +#include <asm/nmi.h>
>  #include <public/pmu.h>
>  #include <xen/tasklet.h>
>  
> @@ -53,36 +54,57 @@ uint64_t __read_mostly vpmu_features = 0;
>  static void parse_vpmu_param(char *s);
>  custom_param("vpmu", parse_vpmu_param);
>  
> +static void pmu_softnmi(void);
> +
>  static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
> +static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu);
> +
> +static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR;
>  
>  static void __init parse_vpmu_param(char *s)
>  {
> -    switch ( parse_bool(s) )
> -    {
> -    case 0:
> -        break;
> -    default:
> -        if ( !strcmp(s, "bts") )
> -            vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
> -        else if ( *s )
> +    char *ss;
> +
> +    vpmu_mode = XENPMU_MODE_SELF;
> +    if (*s == '\0')
> +        return;
> +
> +    do {
> +        ss = strchr(s, ',');
> +        if ( ss )
> +            *ss = '\0';
> +
> +        switch  ( parse_bool(s) )
>          {
> -            printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
> -            break;
> +        case 0:
> +            vpmu_mode = XENPMU_MODE_OFF;
> +            /* FALLTHROUGH */
> +        case 1:
> +            return;
> +        default:
> +            if ( !strcmp(s, "nmi") )
> +                vpmu_interrupt_type = APIC_DM_NMI;
> +            else if ( !strcmp(s, "bts") )
> +                vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
> +            else
> +            {
> +                printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
> +                vpmu_mode = XENPMU_MODE_OFF;
> +                return;
> +            }
>          }
> -        /* fall through */
> -    case 1:
> -        /* Default VPMU mode */
> -        vpmu_mode = XENPMU_MODE_SELF;
> -        break;
> -    }
> +
> +        s = ss + 1;
> +    } while ( ss );
>  }
>  
> +
>  void vpmu_lvtpc_update(uint32_t val)
>  {
>      struct vcpu *curr = current;
>      struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>  
> -    vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> +    vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED);
>  
>      /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
>      if ( is_hvm_domain(curr->domain) ||
> @@ -90,6 +112,24 @@ void vpmu_lvtpc_update(uint32_t val)
>          apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>  }
>  
> +static void vpmu_send_interrupt(struct vcpu *v)
> +{
> +    struct vlapic *vlapic;
> +    u32 vlapic_lvtpc;
> +
> +    ASSERT( is_hvm_vcpu(v) );
> +
> +    vlapic = vcpu_vlapic(v);
> +    if ( !is_vlapic_lvtpc_enabled(vlapic) )
> +        return;
> +
> +    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> +    if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> +        vlapic_set_irq(vcpu_vlapic(v), vlapic_lvtpc & APIC_VECTOR_MASK, 0);
> +    else
> +        v->nmi_pending = 1;
> +}
> +
>  int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
>                  uint64_t supported, bool_t is_write)
>  {
> @@ -150,7 +190,8 @@ static struct vcpu *choose_hwdom_vcpu(void)
>      return v;
>  }
>  
> -int vpmu_do_interrupt(struct cpu_user_regs *regs)
> +/* This routine may be called in NMI context */
> +int vpmu_do_interrupt(const struct cpu_user_regs *regs)
>  {
>      struct vcpu *sampled = current, *sampling;
>      struct vpmu_struct *vpmu;
> @@ -231,8 +272,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
>                  if ( sampled->arch.flags & TF_kernel_mode )
>                      r->cs &= ~3;
>              }
> -            else
> +            else if ( !(vpmu_interrupt_type & APIC_DM_NMI) )
>              {
> +                /* Unsafe in NMI context, defer to softint later */
>                  struct segment_register seg_cs;
>  
>                  hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> @@ -250,30 +292,30 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
>          vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
>          apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>  
> -        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> +        if ( vpmu_interrupt_type & APIC_DM_NMI )
> +        {
> +            this_cpu(sampled_vcpu) = sampled;
> +            raise_softirq(PMU_SOFTIRQ);
> +        }
> +        else
> +            send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
>  
>          return 1;
>      }
>  
>      if ( vpmu->arch_vpmu_ops )
>      {
> -        struct vlapic *vlapic = vcpu_vlapic(sampling);
> -        u32 vlapic_lvtpc;
> -        unsigned char int_vec;
> -
>          if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
>              return 0;
>  
> -        if ( !is_vlapic_lvtpc_enabled(vlapic) )
> -            return 1;
> -
> -        vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> -        int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
> -
> -        if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> -            vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0);
> +        if ( vpmu_interrupt_type & APIC_DM_NMI )
> +        {
> +            this_cpu(sampled_vcpu) = sampled;
> +            raise_softirq(PMU_SOFTIRQ);
> +        }
>          else
> -            sampling->nmi_pending = 1;
> +            vpmu_send_interrupt(sampling);
> +
>          return 1;
>      }
>  
> @@ -306,6 +348,8 @@ static void vpmu_save_force(void *arg)
>      vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
>  
>      per_cpu(last_vcpu, smp_processor_id()) = NULL;
> +
> +    pmu_softnmi();
>  }
>  
>  void vpmu_save(struct vcpu *v)
> @@ -323,7 +367,10 @@ void vpmu_save(struct vcpu *v)
>          if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
>              vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>  
> -    apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
> +    apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED);
> +
> +    /* Make sure there are no outstanding PMU NMIs */

Could this comment be replicated across all of the callers of
of this function?
> +    pmu_softnmi();
>  }
>  
>  void vpmu_load(struct vcpu *v)
> @@ -377,6 +424,8 @@ void vpmu_load(struct vcpu *v)
>            (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
>          return;
>  
> +    pmu_softnmi();
> +
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
>      {
>          apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> @@ -399,7 +448,7 @@ void vpmu_initialise(struct vcpu *v)
>          vpmu_destroy(v);
>      vpmu_clear(vpmu);
>      vpmu->context = NULL;
> -    vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
> +    vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | APIC_LVT_MASKED;
>  
>      switch ( vendor )
>      {
> @@ -435,11 +484,55 @@ void vpmu_destroy(struct vcpu *v)
>      }
>  }
>  
> +/* Process the softirq set by PMU NMI handler */
> +static void pmu_softnmi(void)
> +{
> +    struct vcpu *v, *sampled = this_cpu(sampled_vcpu);
> +
> +    if ( sampled == NULL )
> +        return;

Add a new line here.
> +    this_cpu(sampled_vcpu) = NULL;

Andrew mentioned to me that doing a single 'this_cpu' means
using 'smp_processor_id()'. And since we are doing this twice, we could
just introduce 'unsigned int cpu = smp_processor_id()' and 
then use 'per_cpu(samples_vcpu, cpu)' in this code.

> +
> +    if ( (vpmu_mode & XENPMU_MODE_ALL) ||
> +         (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
> +    {
> +            v = choose_hwdom_vcpu();
> +            if ( !v )
> +                return;
> +    }
> +    else
> +    {
> +        if ( is_hvm_domain(sampled->domain) )
> +        {
> +            vpmu_send_interrupt(sampled);
> +            return;
> +        }
> +        v = sampled;
> +    }
> +
> +    if ( has_hvm_container_domain(sampled->domain) )
> +    {
> +        struct segment_register seg_cs;
> +
> +        hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> +        v->arch.vpmu.xenpmu_data->pmu.r.regs.cs = seg_cs.sel;
> +    }
> +
> +    send_guest_vcpu_virq(v, VIRQ_XENPMU);
> +}
> +
> +int pmu_nmi_interrupt(const struct cpu_user_regs *regs, int cpu)
> +{
> +    return vpmu_do_interrupt(regs);
> +}
> +
>  static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
>  {
>      struct vcpu *v;
>      struct page_info *page;
>      uint64_t gfn = params->val;
> +    static bool_t __read_mostly pvpmu_init_done;
> +    static DEFINE_SPINLOCK(init_lock);
>  
>      if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) ||
>           (d->vcpu[params->vcpu] == NULL) )
> @@ -463,6 +556,27 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t 
> *params)
>          return -EINVAL;
>      }
>  
> +    spin_lock(&init_lock);
> +
> +    if ( !pvpmu_init_done )
> +    {
> +        if ( reserve_lapic_nmi() != 0 )
> +        {
> +            spin_unlock(&init_lock);
> +            printk(XENLOG_G_ERR "Failed to reserve PMU NMI\n");
> +            put_page(page);
> +            return -EBUSY;
> +        }
> +
> +        set_nmi_callback(pmu_nmi_interrupt);
> +
> +        open_softirq(PMU_SOFTIRQ, pmu_softnmi);
> +
> +        pvpmu_init_done = 1;
> +    }
> +
> +    spin_unlock(&init_lock);
> +
>      vpmu_initialise(v);
>  
>      return 0;
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
> index a6e7934..7d86f64 100644
> --- a/xen/include/asm-x86/hvm/vpmu.h
> +++ b/xen/include/asm-x86/hvm/vpmu.h
> @@ -42,7 +42,7 @@ struct arch_vpmu_ops {
>      int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
>                      uint64_t supported);
>      int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
> -    int (*do_interrupt)(struct cpu_user_regs *regs);
> +    int (*do_interrupt)(const struct cpu_user_regs *regs);
>      void (*do_cpuid)(unsigned int input,
>                       unsigned int *eax, unsigned int *ebx,
>                       unsigned int *ecx, unsigned int *edx);
> @@ -98,7 +98,7 @@ static inline bool_t vpmu_are_all_set(const struct 
> vpmu_struct *vpmu,
>  void vpmu_lvtpc_update(uint32_t val);
>  int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
>                  uint64_t supported, bool_t is_write);
> -int vpmu_do_interrupt(struct cpu_user_regs *regs);
> +int vpmu_do_interrupt(const struct cpu_user_regs *regs);
>  void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
>                                         unsigned int *ecx, unsigned int *edx);
>  void vpmu_initialise(struct vcpu *v);
> diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
> index 0c0d481..5829fa4 100644
> --- a/xen/include/xen/softirq.h
> +++ b/xen/include/xen/softirq.h
> @@ -8,6 +8,7 @@ enum {
>      NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
>      RCU_SOFTIRQ,
>      TASKLET_SOFTIRQ,
> +    PMU_SOFTIRQ,
>      NR_COMMON_SOFTIRQS
>  };
>  
> -- 
> 1.8.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.