[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests



> From: Boris Ostrovsky [mailto:boris.ostrovsky@xxxxxxxxxx]
> Sent: Wednesday, June 10, 2015 11:04 PM
> 
> Add support for handling PMU interrupts for PV(H) guests.
> 
> VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
> hypercall. This allows the guest to access PMU MSR values that are stored in
> VPMU context which is shared between hypervisor and domain, thus avoiding
> traps to hypervisor.
> 
> Since the interrupt handler may now force VPMU context save (i.e. set
> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
> until now expected this flag to be set only when the counters were stopped.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Acked-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>

I may need more time to understand the whole interrupt stuff for PV(H)
guest. But regarding to VMX specific changes I think they are clear:

Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>

> ---
> Changes in v24:
> * For both AMD and Intel copy guest's MSRs first into context and then verify
>   it (to keep things as read-once by hypervisor)
> * To make sure that guest did not alter offsets to registers don't copy these
>   values. Store them into shared area during VPMU initialization. Clarify in
>   public header file that they are RO by the guest
> * Make vpmu_load return arch_vpmu_load()'s error code, not 1.
> 
>  xen/arch/x86/hvm/svm/vpmu.c       |  90 ++++++++++---
>  xen/arch/x86/hvm/vmx/vpmu_core2.c | 108 ++++++++++++++-
>  xen/arch/x86/hvm/vpmu.c           | 268
> +++++++++++++++++++++++++++++++++-----
>  xen/include/asm-x86/hvm/vpmu.h    |  10 +-
>  xen/include/public/arch-x86/pmu.h |  41 +++++-
>  xen/include/public/pmu.h          |   2 +
>  xen/include/xsm/dummy.h           |   4 +-
>  xen/xsm/flask/hooks.c             |   2 +
>  8 files changed, 464 insertions(+), 61 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 934f1b7..b93d31d 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -46,6 +46,9 @@ static const u32 __read_mostly *counters;
>  static const u32 __read_mostly *ctrls;
>  static bool_t __read_mostly k7_counters_mirrored;
> 
> +/* Total size of PMU registers block (copied to/from PV(H) guest) */
> +static unsigned int __read_mostly regs_sz;
> +
>  #define F10H_NUM_COUNTERS   4
>  #define F15H_NUM_COUNTERS   6
>  #define MAX_NUM_COUNTERS    F15H_NUM_COUNTERS
> @@ -158,7 +161,7 @@ static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt 
> *ctxt)
>      unsigned i;
>      uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> 
> -    memset(&ctxt->regs[0], 0, 2 * sizeof(uint64_t) * num_counters);
> +    memset(&ctxt->regs[0], 0, regs_sz);
>      for ( i = 0; i < num_counters; i++ )
>          ctrl_regs[i] = ctrl_rsvd[i];
>  }
> @@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
>      }
>  }
> 
> -static void amd_vpmu_load(struct vcpu *v)
> +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> -    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
> -    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> +    struct xen_pmu_amd_ctxt *ctxt;
> +    uint64_t *ctrl_regs;
> +    unsigned int i;
> 
>      vpmu_reset(vpmu, VPMU_FROZEN);
> 
> -    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> +    if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
>      {
> -        unsigned int i;
> +        ctxt = vpmu->context;
> +        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> 
>          for ( i = 0; i < num_counters; i++ )
>              wrmsrl(ctrls[i], ctrl_regs[i]);
> 
> -        return;
> +        return 0;
> +    }
> +
> +    if ( from_guest )
> +    {
> +        unsigned int num_enabled = 0;
> +        struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
> +
> +        ASSERT(!is_hvm_vcpu(v));
> +
> +        ctxt = vpmu->context;
> +        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> +
> +        memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
> +
> +        for ( i = 0; i < num_counters; i++ )
> +        {
> +            if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
> +            {
> +                /*
> +                 * Not necessary to re-init context since we should never 
> load
> +                 * it until guest provides valid values. But just to be safe.
> +                 */
> +                amd_vpmu_init_regs(ctxt);
> +                return -EINVAL;
> +            }
> +
> +            if ( is_pmu_enabled(ctrl_regs[i]) )
> +                num_enabled++;
> +        }
> +
> +        if ( num_enabled )
> +            vpmu_set(vpmu, VPMU_RUNNING);
> +        else
> +            vpmu_reset(vpmu, VPMU_RUNNING);
>      }
> 
>      vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
> 
>      context_load(v);
> +
> +    return 0;
>  }
> 
>  static inline void context_save(struct vcpu *v)
> @@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
>          rdmsrl(counters[i], counter_regs[i]);
>  }
> 
> -static int amd_vpmu_save(struct vcpu *v)
> +static int amd_vpmu_save(struct vcpu *v,  bool_t to_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      unsigned int i;
> 
> -    /*
> -     * Stop the counters. If we came here via vpmu_save_force (i.e.
> -     * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
> -     */
> +    for ( i = 0; i < num_counters; i++ )
> +        wrmsrl(ctrls[i], 0);
> +
>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
>      {
>          vpmu_set(vpmu, VPMU_FROZEN);
> -
> -        for ( i = 0; i < num_counters; i++ )
> -            wrmsrl(ctrls[i], 0);
> -
>          return 0;
>      }
> 
> @@ -274,6 +310,16 @@ static int amd_vpmu_save(struct vcpu *v)
>           has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
>          amd_vpmu_unset_msr_bitmap(v);
> 
> +    if ( to_guest )
> +    {
> +        struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
> +
> +        ASSERT(!is_hvm_vcpu(v));
> +        ctxt = vpmu->context;
> +        guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
> +        memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
> +    }
> +
>      return 1;
>  }
> 
> @@ -461,8 +507,7 @@ int svm_vpmu_initialise(struct vcpu *v)
>      if ( !counters )
>          return -EINVAL;
> 
> -    ctxt = xmalloc_bytes(sizeof(*ctxt) +
> -                         2 * sizeof(uint64_t) * num_counters);
> +    ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
>      if ( !ctxt )
>      {
>          printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
> @@ -478,6 +523,13 @@ int svm_vpmu_initialise(struct vcpu *v)
>      vpmu->context = ctxt;
>      vpmu->priv_context = NULL;
> 
> +    if ( !is_hvm_vcpu(v) )
> +    {
> +        /* Copy register offsets to shared area */
> +        ASSERT(vpmu->xenpmu_data);
> +        memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt, sizeof(*ctxt));
> +    }
> +
>      vpmu->arch_vpmu_ops = &amd_vpmu_ops;
> 
>      vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
> @@ -527,6 +579,8 @@ int __init amd_vpmu_init(void)
>          ctrl_rsvd[i] &= CTRL_RSVD_MASK;
>      }
> 
> +    regs_sz = 2 * sizeof(uint64_t) * num_counters;
> +
>      return 0;
>  }
> 
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index 166277a..1206e90 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -90,6 +90,13 @@ static unsigned int __read_mostly arch_pmc_cnt, 
> fixed_pmc_cnt;
>  static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
>  static uint64_t __read_mostly global_ovf_ctrl_mask;
> 
> +/* Total size of PMU registers block (copied to/from PV(H) guest) */
> +static unsigned int __read_mostly regs_sz;
> +/* Offset into context of the beginning of PMU register block */
> +static const unsigned int regs_off =
> +        sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
> +        sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
> +
>  /*
>   * QUIRK to workaround an issue on various family 6 cpus.
>   * The issue leads to endless PMC interrupt loops on the processor.
> @@ -312,7 +319,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
>          rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
>  }
> 
> -static int core2_vpmu_save(struct vcpu *v)
> +static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> 
> @@ -329,6 +336,13 @@ static int core2_vpmu_save(struct vcpu *v)
>           has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
>          core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
> 
> +    if ( to_guest )
> +    {
> +        ASSERT(!is_hvm_vcpu(v));
> +        memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
> +               vpmu->context + regs_off, regs_sz);
> +    }
> +
>      return 1;
>  }
> 
> @@ -365,16 +379,93 @@ static inline void __core2_vpmu_load(struct vcpu *v)
>      }
>  }
> 
> -static void core2_vpmu_load(struct vcpu *v)
> +static int core2_vpmu_verify(struct vcpu *v)
> +{
> +    unsigned int i;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
> +    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, 
> fixed_counters);
> +    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
> +        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
> +    uint64_t fixed_ctrl;
> +    uint64_t *priv_context = vpmu->priv_context;
> +    uint64_t enabled_cntrs = 0;
> +
> +    if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
> +        return -EINVAL;
> +
> +    fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
> +    if ( fixed_ctrl & fixed_ctrl_mask )
> +        return -EINVAL;
> +
> +    for ( i = 0; i < fixed_pmc_cnt; i++ )
> +    {
> +        if ( fixed_counters[i] & fixed_counters_mask )
> +            return -EINVAL;
> +        if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
> +            enabled_cntrs |= (1ULL << i);
> +    }
> +    enabled_cntrs <<= 32;
> +
> +    for ( i = 0; i < arch_pmc_cnt; i++ )
> +    {
> +        uint64_t control = xen_pmu_cntr_pair[i].control;
> +
> +        if ( control & ARCH_CTRL_MASK )
> +            return -EINVAL;
> +        if ( control & ARCH_CNTR_ENABLED )
> +            enabled_cntrs |= (1ULL << i);
> +    }
> +
> +    if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
> +         !is_canonical_address(core2_vpmu_cxt->ds_area) )
> +        return -EINVAL;
> +
> +    if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
> +         (core2_vpmu_cxt->ds_area != 0) )
> +        vpmu_set(vpmu, VPMU_RUNNING);
> +    else
> +        vpmu_reset(vpmu, VPMU_RUNNING);
> +
> +    *priv_context = enabled_cntrs;
> +
> +    return 0;
> +}
> +
> +static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> 
>      if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> -        return;
> +        return 0;
> +
> +    if ( from_guest )
> +    {
> +        int ret;
> +
> +        ASSERT(!is_hvm_vcpu(v));
> +
> +        memcpy(vpmu->context + regs_off,
> +               (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
> +               regs_sz);
> +
> +        ret = core2_vpmu_verify(v);
> +        if ( ret )
> +        {
> +            /*
> +             * Not necessary since we should never load the context until
> +             * guest provides valid values. But just to be safe.
> +             */
> +            memset(vpmu->context + regs_off, 0, regs_sz);
> +            return ret;
> +        }
> +    }
> 
>      vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
> 
>      __core2_vpmu_load(v);
> +
> +    return 0;
>  }
> 
>  static int core2_vpmu_alloc_resource(struct vcpu *v)
> @@ -412,6 +503,13 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
>      vpmu->context = core2_vpmu_cxt;
>      vpmu->priv_context = p;
> 
> +    if ( !is_hvm_vcpu(v) )
> +    {
> +        /* Copy fixed/arch register offsets to shared area */
> +        ASSERT(vpmu->xenpmu_data);
> +        memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
> +    }
> +
>      vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
> 
>      return 1;
> @@ -923,6 +1021,10 @@ int __init core2_vpmu_init(void)
>                               (((1ULL << fixed_pmc_cnt) - 1) << 32) |
>                               ((1ULL << arch_pmc_cnt) - 1));
> 
> +    regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
> +              sizeof(uint64_t) * fixed_pmc_cnt +
> +              sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
> +
>      check_pmc_quirk();
> 
>      if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index 07fa368..37e541b 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -85,31 +85,56 @@ static void __init parse_vpmu_param(char *s)
>  void vpmu_lvtpc_update(uint32_t val)
>  {
>      struct vpmu_struct *vpmu;
> +    struct vcpu *curr = current;
> 
> -    if ( vpmu_mode == XENPMU_MODE_OFF )
> +    if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
>          return;
> 
> -    vpmu = vcpu_vpmu(current);
> +    vpmu = vcpu_vpmu(curr);
> 
>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> -    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> +    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> +    if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
> +         !vpmu_is_set(vpmu, VPMU_CACHED) )
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>  }
> 
>  int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu;
> 
>      if ( vpmu_mode == XENPMU_MODE_OFF )
>          return 0;
> 
> +    vpmu = vcpu_vpmu(curr);
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
> -        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +
> +        /*
> +         * We may have received a PMU interrupt during WRMSR handling
> +         * and since do_wrmsr may load VPMU context we should save
> +         * (and unload) it again.
> +         */
> +        if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
> +             vpmu_is_set(vpmu, VPMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
> +
>      return 0;
>  }
> 
>  int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu;
> 
>      if ( vpmu_mode == XENPMU_MODE_OFF )
>      {
> @@ -117,39 +142,184 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t 
> *msr_content)
>          return 0;
>      }
> 
> +    vpmu = vcpu_vpmu(curr);
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
> -        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +
> +        if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
> +             vpmu_is_set(vpmu, VPMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      else
>          *msr_content = 0;
> 
>      return 0;
>  }
> 
> +static inline struct vcpu *choose_hwdom_vcpu(void)
> +{
> +    unsigned idx;
> +
> +    if ( hardware_domain->max_vcpus == 0 )
> +        return NULL;
> +
> +    idx = smp_processor_id() % hardware_domain->max_vcpus;
> +
> +    return hardware_domain->vcpu[idx];
> +}
> +
>  void vpmu_do_interrupt(struct cpu_user_regs *regs)
>  {
> -    struct vcpu *v = current;
> -    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct vcpu *sampled = current, *sampling;
> +    struct vpmu_struct *vpmu;
> +    struct vlapic *vlapic;
> +    u32 vlapic_lvtpc;
> 
> -    if ( vpmu->arch_vpmu_ops )
> +    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> +    if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
> +    {
> +        sampling = choose_hwdom_vcpu();
> +        if ( !sampling )
> +            return;
> +    }
> +    else
> +        sampling = sampled;
> +
> +    vpmu = vcpu_vpmu(sampling);
> +    if ( !vpmu->arch_vpmu_ops )
> +        return;
> +
> +    /* PV(H) guest */
> +    if ( !is_hvm_vcpu(sampling) )
>      {
> -        struct vlapic *vlapic = vcpu_vlapic(v);
> -        u32 vlapic_lvtpc;
> +        const struct cpu_user_regs *cur_regs;
> +        uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
> +        domid_t domid = DOMID_SELF;
> +
> +        if ( !vpmu->xenpmu_data )
> +            return;
> +
> +        if ( is_pvh_vcpu(sampling) &&
> +             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> +            return;
> 
> -        if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
> -             !is_vlapic_lvtpc_enabled(vlapic) )
> +        if ( vpmu_is_set(vpmu, VPMU_CACHED) )
>              return;
> 
> -        vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> +        /* PV guest will be reading PMU MSRs from xenpmu_data */
> +        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
> +        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> 
> -        switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
> +        if ( has_hvm_container_vcpu(sampled) )
> +            *flags = 0;
> +        else
> +            *flags = PMU_SAMPLE_PV;
> +
> +        /* Store appropriate registers in xenpmu_data */
> +        /* FIXME: 32-bit PVH should go here as well */
> +        if ( is_pv_32bit_vcpu(sampling) )
>          {
> -        case APIC_MODE_FIXED:
> -            vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
> -            break;
> -        case APIC_MODE_NMI:
> -            v->nmi_pending = 1;
> -            break;
> +            /*
> +             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> +             * and therefore we treat it the same way as a non-privileged
> +             * PV 32-bit domain.
> +             */
> +            struct compat_pmu_regs *cmp;
> +
> +            cur_regs = guest_cpu_user_regs();
> +
> +            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
> +            cmp->ip = cur_regs->rip;
> +            cmp->sp = cur_regs->rsp;
> +            cmp->flags = cur_regs->eflags;
> +            cmp->ss = cur_regs->ss;
> +            cmp->cs = cur_regs->cs;
> +            if ( (cmp->cs & 3) > 1 )
> +                *flags |= PMU_SAMPLE_USER;
> +        }
> +        else
> +        {
> +            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
> +
> +            if ( (vpmu_mode & XENPMU_MODE_SELF) )
> +                cur_regs = guest_cpu_user_regs();
> +            else if ( !guest_mode(regs) && 
> is_hardware_domain(sampling->domain) )
> +            {
> +                cur_regs = regs;
> +                domid = DOMID_XEN;
> +            }
> +            else
> +                cur_regs = guest_cpu_user_regs();
> +
> +            r->ip = cur_regs->rip;
> +            r->sp = cur_regs->rsp;
> +            r->flags = cur_regs->eflags;
> +
> +            if ( !has_hvm_container_vcpu(sampled) )
> +            {
> +                r->ss = cur_regs->ss;
> +                r->cs = cur_regs->cs;
> +                if ( !(sampled->arch.flags & TF_kernel_mode) )
> +                    *flags |= PMU_SAMPLE_USER;
> +            }
> +            else
> +            {
> +                struct segment_register seg;
> +
> +                hvm_get_segment_register(sampled, x86_seg_cs, &seg);
> +                r->cs = seg.sel;
> +                hvm_get_segment_register(sampled, x86_seg_ss, &seg);
> +                r->ss = seg.sel;
> +                r->cpl = seg.attr.fields.dpl;
> +                if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
> +                    *flags |= PMU_SAMPLE_REAL;
> +            }
>          }
> +
> +        vpmu->xenpmu_data->domain_id = domid;
> +        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
> +        if ( is_hardware_domain(sampling->domain) )
> +            vpmu->xenpmu_data->pcpu_id = smp_processor_id();
> +        else
> +            vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
> +
> +        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +        *flags |= PMU_CACHED;
> +        vpmu_set(vpmu, VPMU_CACHED);
> +
> +        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> +
> +        return;
> +    }
> +
> +    /* HVM guests */
> +    vlapic = vcpu_vlapic(sampling);
> +
> +    /* We don't support (yet) HVM dom0 */
> +    ASSERT(sampling == sampled);
> +
> +    if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
> +         !is_vlapic_lvtpc_enabled(vlapic) )
> +        return;
> +
> +    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> +
> +    switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
> +    {
> +    case APIC_MODE_FIXED:
> +        vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
> +        break;
> +    case APIC_MODE_NMI:
> +        sampling->nmi_pending = 1;
> +        break;
>      }
>  }
> 
> @@ -174,7 +344,7 @@ static void vpmu_save_force(void *arg)
>      vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> 
>      if ( vpmu->arch_vpmu_ops )
> -        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v);
> +        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
> 
>      vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
> 
> @@ -193,20 +363,20 @@ void vpmu_save(struct vcpu *v)
>      per_cpu(last_vcpu, pcpu) = v;
> 
>      if ( vpmu->arch_vpmu_ops )
> -        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
> +        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
>              vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> 
>      apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
>  }
> 
> -void vpmu_load(struct vcpu *v)
> +int vpmu_load(struct vcpu *v, bool_t from_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      int pcpu = smp_processor_id();
>      struct vcpu *prev = NULL;
> 
>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
> -        return;
> +        return 0;
> 
>      /* First time this VCPU is running here */
>      if ( vpmu->last_pcpu != pcpu )
> @@ -245,15 +415,26 @@ void vpmu_load(struct vcpu *v)
>      local_irq_enable();
> 
>      /* Only when PMU is counting, we load PMU context immediately. */
> -    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> -        return;
> +    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> +         (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
> +        return 0;
> 
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
>      {
> +        int ret;
> +
>          apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>          /* Arch code needs to set VPMU_CONTEXT_LOADED */
> -        vpmu->arch_vpmu_ops->arch_vpmu_load(v);
> +        ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
> +        if ( ret )
> +        {
> +            apic_write_around(APIC_LVTPC,
> +                              vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
> +            return ret;
> +        }
>      }
> +
> +    return 0;
>  }
> 
>  void vpmu_initialise(struct vcpu *v)
> @@ -265,6 +446,8 @@ void vpmu_initialise(struct vcpu *v)
> 
>      BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
>      BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
> +    BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
> +    BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
> 
>      ASSERT(!vpmu->flags && !vpmu->context);
> 
> @@ -449,7 +632,10 @@ void vpmu_dump(struct vcpu *v)
>  long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t)
> arg)
>  {
>      int ret;
> +    struct vcpu *curr;
>      struct xen_pmu_params pmu_params = {.val = 0};
> +    struct xen_pmu_data *xenpmu_data;
> +    struct vpmu_struct *vpmu;
> 
>      if ( !opt_vpmu_enabled )
>          return -EOPNOTSUPP;
> @@ -552,6 +738,30 @@ long do_xenpmu_op(unsigned int op,
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>          pvpmu_finish(current->domain, &pmu_params);
>          break;
> 
> +    case XENPMU_lvtpc_set:
> +        xenpmu_data = current->arch.vpmu.xenpmu_data;
> +        if ( xenpmu_data == NULL )
> +            return -EINVAL;
> +        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> +        break;
> +
> +    case XENPMU_flush:
> +        curr = current;
> +        vpmu = vcpu_vpmu(curr);
> +        xenpmu_data = curr->arch.vpmu.xenpmu_data;
> +        if ( xenpmu_data == NULL )
> +            return -EINVAL;
> +        xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
> +        vpmu_reset(vpmu, VPMU_CACHED);
> +        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> +        if ( vpmu_load(curr, 1) )
> +        {
> +            xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
> +            vpmu_set(vpmu, VPMU_CACHED);
> +            return -EIO;
> +        }
> +        break ;
> +
>      default:
>          ret = -EINVAL;
>      }
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
> index 642a4b7..f486d2f 100644
> --- a/xen/include/asm-x86/hvm/vpmu.h
> +++ b/xen/include/asm-x86/hvm/vpmu.h
> @@ -47,8 +47,8 @@ struct arch_vpmu_ops {
>                       unsigned int *eax, unsigned int *ebx,
>                       unsigned int *ecx, unsigned int *edx);
>      void (*arch_vpmu_destroy)(struct vcpu *v);
> -    int (*arch_vpmu_save)(struct vcpu *v);
> -    void (*arch_vpmu_load)(struct vcpu *v);
> +    int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
> +    int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
>      void (*arch_vpmu_dump)(const struct vcpu *);
>  };
> 
> @@ -75,6 +75,8 @@ struct vpmu_struct {
>  #define VPMU_CONTEXT_SAVE                   0x8   /* Force context save */
>  #define VPMU_FROZEN                         0x10  /* Stop counters while 
> VCPU is not
> running */
>  #define VPMU_PASSIVE_DOMAIN_ALLOCATED       0x20
> +/* PV(H) guests: VPMU registers are accessed by guest from shared page */
> +#define VPMU_CACHED                         0x40
> 
>  static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
>  {
> @@ -107,7 +109,7 @@ void vpmu_do_cpuid(unsigned int input, unsigned int *eax,
> unsigned int *ebx,
>  void vpmu_initialise(struct vcpu *v);
>  void vpmu_destroy(struct vcpu *v);
>  void vpmu_save(struct vcpu *v);
> -void vpmu_load(struct vcpu *v);
> +int vpmu_load(struct vcpu *v, bool_t from_guest);
>  void vpmu_dump(struct vcpu *v);
> 
>  extern int acquire_pmu_ownership(int pmu_ownership);
> @@ -126,7 +128,7 @@ static inline void vpmu_switch_from(struct vcpu *prev)
>  static inline void vpmu_switch_to(struct vcpu *next)
>  {
>      if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> -        vpmu_load(next);
> +        vpmu_load(next, 0);
>  }
> 
>  #endif /* __ASM_X86_HVM_VPMU_H_*/
> diff --git a/xen/include/public/arch-x86/pmu.h 
> b/xen/include/public/arch-x86/pmu.h
> index 4351115..1a53888 100644
> --- a/xen/include/public/arch-x86/pmu.h
> +++ b/xen/include/public/arch-x86/pmu.h
> @@ -5,7 +5,10 @@
> 
>  /* AMD PMU registers and structures */
>  struct xen_pmu_amd_ctxt {
> -    /* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd) 
> */
> +    /*
> +     * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
> +     * For PV(H) guests these fields are RO.
> +     */
>      uint32_t counters;
>      uint32_t ctrls;
> 
> @@ -30,7 +33,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t);
>  struct xen_pmu_intel_ctxt {
>     /*
>      * Offsets to fixed and architectural counter MSRs (relative to
> -    * xen_pmu_arch.c.intel)
> +    * xen_pmu_arch.c.intel).
> +    * For PV(H) guests these fields are RO.
>      */
>      uint32_t fixed_counters;
>      uint32_t arch_counters;
> @@ -69,6 +73,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t);
> 
>  /* PMU flags */
>  #define PMU_CACHED         (1<<0) /* PMU MSRs are cached in the context */
> +#define PMU_SAMPLE_USER    (1<<1) /* Sample is from user or kernel mode */
> +#define PMU_SAMPLE_REAL    (1<<2) /* Sample is from realmode */
> +#define PMU_SAMPLE_PV      (1<<3) /* Sample from a PV guest */
> 
>  /*
>   * Architecture-specific information describing state of the processor at
> @@ -93,12 +100,34 @@ struct xen_pmu_arch {
>      /* WO for hypervisor, RO for guest */
>      uint64_t pmu_flags;
> 
> -    /* Placeholder for APIC LVTPC register */
> -    uint64_t lvtpc_pad;
> +    /*
> +     * APIC LVTPC register.
> +     * RW for both hypervisor and guest.
> +     * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
> +     * during XENPMU_flush or XENPMU_lvtpc_set.
> +     */
> +    union {
> +        uint32_t lapic_lvtpc;
> +        uint64_t pad;
> +    } l;
> +
> +    /*
> +     * Vendor-specific PMU registers.
> +     * RW for both hypervisor and guest (see exceptions above).
> +     * Guest's updates to this field are verified and then loaded by the
> +     * hypervisor into hardware during XENPMU_flush
> +     */
> +    union {
> +        struct xen_pmu_amd_ctxt amd;
> +        struct xen_pmu_intel_ctxt intel;
> 
> -    /* Placeholder for vendor-specific PMU registers */
> +        /*
> +         * Padding for contexts (fixed parts only, does not include MSR banks
> +         * that are specified by offsets)
> +         */
>  #define XENPMU_CTXT_PAD_SZ  128
> -    uint64_t pmu_regs_pad[XENPMU_CTXT_PAD_SZ / 8];
> +        uint8_t pad[XENPMU_CTXT_PAD_SZ];
> +    } c;
>  };
>  typedef struct xen_pmu_arch xen_pmu_arch_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t);
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index e6307b5..7a45783 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -27,6 +27,8 @@
>  #define XENPMU_feature_set     3
>  #define XENPMU_init            4
>  #define XENPMU_finish          5
> +#define XENPMU_lvtpc_set       6
> +#define XENPMU_flush           7 /* Write cached MSR values to HW     */
>  /* ` } */
> 
>  /* Parameters structure for HYPERVISOR_xenpmu_op call */
> diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
> index 6456f72..37e6aa3 100644
> --- a/xen/include/xsm/dummy.h
> +++ b/xen/include/xsm/dummy.h
> @@ -705,7 +705,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct
> domain *d, int op)
>      case XENPMU_feature_get:
>          return xsm_default_action(XSM_PRIV, d, current->domain);
>      case XENPMU_init:
> -    case XENPMU_finish:
> +    case XENPMU_finish:
> +    case XENPMU_lvtpc_set:
> +    case XENPMU_flush:
>          return xsm_default_action(XSM_HOOK, d, current->domain);
>      default:
>          return -EPERM;
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index aefcbda..4aa3e79 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -1594,6 +1594,8 @@ static int flask_pmu_op (struct domain *d, unsigned int 
> op)
>                              XEN2__PMU_CTRL, NULL);
>      case XENPMU_init:
>      case XENPMU_finish:
> +    case XENPMU_lvtpc_set:
> +    case XENPMU_flush:
>          return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
>                              XEN2__PMU_USE, NULL);
>      default:
> --
> 1.8.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.