[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v6 17/19] x86/VPMU: NMI-based VPMU support



> From: Boris Ostrovsky [mailto:boris.ostrovsky@xxxxxxxxxx]
> Sent: Tuesday, May 13, 2014 8:54 AM
> 
> Add support for using NMIs as PMU interrupts.
> 
> Most of processing is still performed by vpmu_do_interrupt(). However, since
> certain operations are not NMI-safe we defer them to a softint that
> vpmu_do_interrupt()
> will schedule:
> * For PV guests that would be send_guest_vcpu_virq()
> * For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the
> later
> can be called in privileged profiling mode when the interrupted guest is an
> HVM one).
> 
> With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and
> vlapic
> accesses for HVM moved to sofint, the only routines/macros that
> vpmu_do_interrupt()
> calls in NMI mode are:
> * memcpy()
> * querying domain type (is_XX_domain())
> * guest_cpu_user_regs()
> * XLAT_cpu_user_regs()
> * raise_softirq()
> * vcpu_vpmu()
> * vpmu_ops->arch_vpmu_save()
> * vpmu_ops->do_interrupt() (in the future for PVH support)
> 
> The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe
> versions
> which would not be NMI-safe).
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
> Tested-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>

The whole concept looks OK to me, except that you need address several
other comments from Jan.

Reviewed-by: Kevin Tian <kevint.tian@xxxxxxxxx>

> ---
>  xen/arch/x86/hvm/svm/vpmu.c       |   1 +
>  xen/arch/x86/hvm/vmx/vpmu_core2.c |   1 +
>  xen/arch/x86/hvm/vpmu.c           | 183
> +++++++++++++++++++++++++++++++-------
>  3 files changed, 152 insertions(+), 33 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 42c3530..8711e86 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -185,6 +185,7 @@ static inline void context_load(struct vcpu *v)
>      }
>  }
> 
> +/* Must be NMI-safe */
>  static void amd_vpmu_load(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index 8182dc3..c06b305 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -303,6 +303,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
>          rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS,
> core2_vpmu_cxt->global_status);
>  }
> 
> +/* Must be NMI-safe */
>  static int core2_vpmu_save(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index 7cb2231..f73ebbb 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -36,6 +36,7 @@
>  #include <asm/hvm/svm/svm.h>
>  #include <asm/hvm/svm/vmcb.h>
>  #include <asm/apic.h>
> +#include <asm/nmi.h>
>  #include <public/pmu.h>
> 
>  /*
> @@ -48,34 +49,60 @@ uint64_t __read_mostly vpmu_features = 0;
>  static void parse_vpmu_param(char *s);
>  custom_param("vpmu", parse_vpmu_param);
> 
> +static void pmu_softnmi(void);
> +
>  static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
> +static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu);
> +
> +static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR;
> 
>  static void __init parse_vpmu_param(char *s)
>  {
> -    switch ( parse_bool(s) )
> -    {
> -    case 0:
> -        break;
> -    default:
> -        if ( !strcmp(s, "bts") )
> -            vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
> -        else if ( *s )
> +    char *ss;
> +
> +    vpmu_mode = XENPMU_MODE_ON;
> +    if (*s == '\0')
> +        return;
> +
> +    do {
> +        ss = strchr(s, ',');
> +        if ( ss )
> +            *ss = '\0';
> +
> +        switch  ( parse_bool(s) )
>          {
> -            printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
> +        case 0:
> +            vpmu_mode = XENPMU_MODE_OFF;
> +            return;
> +        case -1:
> +            if ( !strcmp(s, "nmi") )
> +                vpmu_interrupt_type = APIC_DM_NMI;
> +            else if ( !strcmp(s, "bts") )
> +                vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
> +            else if ( !strcmp(s, "priv") )
> +            {
> +                vpmu_mode &= ~XENPMU_MODE_ON;
> +                vpmu_mode |= XENPMU_MODE_PRIV;
> +            }
> +            else
> +            {
> +                printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
> +                vpmu_mode = XENPMU_MODE_OFF;
> +                return;
> +            }
> +        default:
>              break;
>          }
> -        /* fall through */
> -    case 1:
> -        vpmu_mode = XENPMU_MODE_ON;
> -        break;
> -    }
> +
> +        s = ss + 1;
> +    } while ( ss );
>  }
> 
>  void vpmu_lvtpc_update(uint32_t val)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(current);
> 
> -    vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val &
> APIC_LVT_MASKED);
> +    vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val &
> APIC_LVT_MASKED);
> 
>      /* Postpone APIC updates for PV guests if PMU interrupt is pending */
>      if ( !is_pv_domain(current->domain) ||
> @@ -84,6 +111,27 @@ void vpmu_lvtpc_update(uint32_t val)
>          apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>  }
> 
> +static void vpmu_send_nmi(struct vcpu *v)
> +{
> +    struct vlapic *vlapic;
> +    u32 vlapic_lvtpc;
> +    unsigned char int_vec;
> +
> +    ASSERT( is_hvm_vcpu(v) );
> +
> +    vlapic = vcpu_vlapic(v);
> +    if ( !is_vlapic_lvtpc_enabled(vlapic) )
> +        return;
> +
> +    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> +    int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
> +
> +    if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> +        vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
> +    else
> +        v->nmi_pending = 1;
> +}
> +
>  int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, uint8_t rw)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(current);
> @@ -125,6 +173,7 @@ int vpmu_do_msr(unsigned int msr, uint64_t
> *msr_content, uint8_t rw)
>      return 0;
>  }
> 
> +/* This routine may be called in NMI context */
>  int vpmu_do_interrupt(struct cpu_user_regs *regs)
>  {
>      struct vcpu *v = current;
> @@ -209,9 +258,13 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
>              memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs,
>                     gregs, sizeof(struct cpu_user_regs));
> 
> -            hvm_get_segment_register(current, x86_seg_cs, &cs);
> -            gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs;
> -            gregs->cs = cs.attr.fields.dpl;
> +            /* This is unsafe in NMI context, we'll do it in softint handler
> */
> +            if ( !(vpmu_interrupt_type & APIC_DM_NMI ) )
> +            {
> +                hvm_get_segment_register(current, x86_seg_cs, &cs);
> +                gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs;
> +                gregs->cs = cs.attr.fields.dpl;
> +            }
>          }
> 
>          v->arch.vpmu.xenpmu_data->domain_id =
> current->domain->domain_id;
> @@ -222,30 +275,30 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
>          apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc |
> APIC_LVT_MASKED);
>          vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> 
> -        send_guest_vcpu_virq(v, VIRQ_XENPMU);
> +        if ( vpmu_interrupt_type & APIC_DM_NMI )
> +        {
> +            per_cpu(sampled_vcpu, smp_processor_id()) = current;
> +            raise_softirq(PMU_SOFTIRQ);
> +        }
> +        else
> +            send_guest_vcpu_virq(v, VIRQ_XENPMU);
> 
>          return 1;
>      }
> 
>      if ( vpmu->arch_vpmu_ops )
>      {
> -        struct vlapic *vlapic = vcpu_vlapic(v);
> -        u32 vlapic_lvtpc;
> -        unsigned char int_vec;
> -
>          if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
>              return 0;
> 
> -        if ( !is_vlapic_lvtpc_enabled(vlapic) )
> -            return 1;
> -
> -        vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> -        int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
> -
> -        if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) ==
> APIC_MODE_FIXED )
> -            vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
> +        if ( vpmu_interrupt_type & APIC_DM_NMI )
> +        {
> +            per_cpu(sampled_vcpu, smp_processor_id()) = current;
> +            raise_softirq(PMU_SOFTIRQ);
> +        }
>          else
> -            v->nmi_pending = 1;
> +            vpmu_send_nmi(v);
> +
>          return 1;
>      }
> 
> @@ -276,6 +329,8 @@ static void vpmu_save_force(void *arg)
>      vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
> 
>      per_cpu(last_vcpu, smp_processor_id()) = NULL;
> +
> +    pmu_softnmi();
>  }
> 
>  void vpmu_save(struct vcpu *v)
> @@ -293,7 +348,10 @@ void vpmu_save(struct vcpu *v)
>          if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
>              vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> 
> -    apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
> +    apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED);
> +
> +    /* Make sure there are no outstanding PMU NMIs */
> +    pmu_softnmi();
>  }
> 
>  void vpmu_load(struct vcpu *v)
> @@ -338,6 +396,8 @@ void vpmu_load(struct vcpu *v)
>          vpmu_save_force(prev);
>          vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> 
> +        pmu_softnmi();
> +
>          vpmu = vcpu_vpmu(v);
>      }
> 
> @@ -442,11 +502,53 @@ static void vpmu_unload_all(void)
>      }
>  }
> 
> +/* Process the softirq set by PMU NMI handler */
> +static void pmu_softnmi(void)
> +{
> +    struct cpu_user_regs *regs;
> +    struct vcpu *v, *sampled = per_cpu(sampled_vcpu, smp_processor_id());
> +
> +    if ( sampled == NULL )
> +        return;
> +    per_cpu(sampled_vcpu, smp_processor_id()) = NULL;
> +
> +    if ( (vpmu_mode & XENPMU_MODE_PRIV) ||
> +         (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
> +        v = hardware_domain->vcpu[smp_processor_id() %
> +                                  hardware_domain->max_vcpus];
> +    else
> +    {
> +        if ( is_hvm_domain(sampled->domain) )
> +        {
> +            vpmu_send_nmi(sampled);
> +            return;
> +        }
> +        v = sampled;
> +    }
> +
> +    regs = &v->arch.vpmu.xenpmu_data->pmu.r.regs;
> +    if ( !is_pv_domain(sampled->domain) )
> +    {
> +        struct segment_register cs;
> +
> +        hvm_get_segment_register(sampled, x86_seg_cs, &cs);
> +        regs->cs = cs.attr.fields.dpl;
> +    }
> +
> +    send_guest_vcpu_virq(v, VIRQ_XENPMU);
> +}
> +
> +int pmu_nmi_interrupt(struct cpu_user_regs *regs, int cpu)
> +{
> +    return vpmu_do_interrupt(regs);
> +}
> +
>  static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
>  {
>      struct vcpu *v;
>      struct page_info *page;
>      uint64_t gfn = params->d.val;
> +    static bool_t __read_mostly pvpmu_initted = 0;
> 
>      if ( !is_pv_domain(d) )
>          return -EINVAL;
> @@ -472,6 +574,21 @@ static int pvpmu_init(struct domain *d,
> xen_pmu_params_t *params)
>          return -EINVAL;
>      }
> 
> +    if ( !pvpmu_initted )
> +    {
> +        if (reserve_lapic_nmi() == 0)
> +            set_nmi_callback(pmu_nmi_interrupt);
> +        else
> +        {
> +            printk("Failed to reserve PMU NMI\n");
> +            put_page(page);
> +            return -EBUSY;
> +        }
> +        open_softirq(PMU_SOFTIRQ, pmu_softnmi);
> +
> +        pvpmu_initted = 1;
> +    }
> +
>      vpmu_initialise(v);
> 
>      return 0;
> --
> 1.8.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.