[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v7 14/19] x86/VPMU: Handle PMU interrupts for PV guests



On 06/06/14 18:40, Boris Ostrovsky wrote:
> Add support for handling PMU interrupts for PV guests.
>
> VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
> hypercall. This allows the guest to access PMU MSR values that are stored in
> VPMU context which is shared between hypervisor and domain, thus avoiding
> traps to hypervisor.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx>
> Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
> Tested-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
> ---
>  xen/arch/x86/hvm/vpmu.c  | 134 
> +++++++++++++++++++++++++++++++++++++++++++++--
>  xen/include/public/pmu.h |   7 +++
>  2 files changed, 136 insertions(+), 5 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index ca0534b..ce842e4 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -76,7 +76,12 @@ void vpmu_lvtpc_update(uint32_t val)
>      struct vpmu_struct *vpmu = vcpu_vpmu(current);
>  
>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> -    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> +    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> +    if ( is_hvm_domain(current->domain) ||
> +         !(current->arch.vpmu.xenpmu_data &&
> +           (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED)) )
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);

Repeatedly referencing current is expensive (as it is an assembly block
which frobs the stack pointer).  Please use "struct vcpu *curr = current;".

Although these accesses look as if they should be through the vpmu
pointer anyway.

>  }
>  
>  int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
> @@ -87,7 +92,23 @@ int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
>          return 0;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
> -        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content);
> +
> +        /*
> +         * We may have received a PMU interrupt during WRMSR handling
> +         * and since do_wrmsr may load VPMU context we should save
> +         * (and unload) it again.
> +         */
> +        if ( !is_hvm_domain(current->domain) &&
> +             (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(current);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      return 0;
>  }
>  
> @@ -99,14 +120,109 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t 
> *msr_content)
>          return 0;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
> -        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +
> +        if ( !is_hvm_domain(current->domain) &&
> +            (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(current);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      return 0;
>  }
>  
> +static struct vcpu *choose_dom0_vcpu(void)

Probably want to rename this function in line with the
s/dom0/hardware_domain/ which has happened in the tree since you first
posted a series.

> +{
> +    struct vcpu *v;
> +    unsigned idx = smp_processor_id() % hardware_domain->max_vcpus;
> +
> +    v = hardware_domain->vcpu[idx];
> +
> +    /*
> +     * If index is not populated search downwards the vcpu array until
> +     * a valid vcpu can be found
> +     */
> +    while ( !v && idx-- )
> +        v = hardware_domain->vcpu[idx];
> +
> +    return v;
> +}
> +
>  int vpmu_do_interrupt(struct cpu_user_regs *regs)
>  {
>      struct vcpu *v = current;
> -    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct vpmu_struct *vpmu;

Perhaps an ASSERT(system_state == SYS_STATE_active) to sanity check the
environment before we start assuming that dom0s vcpus are constructed?

~Andrew

> +
> +    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> +    if ( v->domain->domain_id >= DOMID_FIRST_RESERVED )
> +    {
> +        v = choose_dom0_vcpu();
> +        if ( !v )
> +            return 0;
> +    }
> +
> +    vpmu = vcpu_vpmu(v);
> +    if ( !is_hvm_domain(v->domain) )
> +    {
> +        /* PV(H) guest or dom0 is doing system profiling */
> +        const struct cpu_user_regs *gregs;
> +        int err;
> +
> +        if ( v->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED )
> +            return 1;
> +
> +        if ( is_pvh_domain(current->domain) &&
> +             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> +            return 0;
> +
> +        /* PV guest will be reading PMU MSRs from xenpmu_data */
> +        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        err = vpmu->arch_vpmu_ops->arch_vpmu_save(v);
> +        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +
> +        /* Store appropriate registers in xenpmu_data */
> +        if ( is_pv_32bit_domain(current->domain) )
> +        {
> +            /*
> +             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> +             * and therefore we treat it the same way as a non-priviledged
> +             * PV 32-bit domain.
> +             */
> +            struct compat_cpu_user_regs *cmp;
> +
> +            gregs = guest_cpu_user_regs();
> +
> +            cmp = (void *)&v->arch.vpmu.xenpmu_data->pmu.r.regs;
> +            XLAT_cpu_user_regs(cmp, gregs);
> +        }
> +        else if ( !is_hardware_domain(current->domain) &&
> +                 !is_idle_vcpu(current) )
> +        {
> +            /* PV(H) guest */
> +            gregs = guest_cpu_user_regs();
> +            memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs,
> +                   gregs, sizeof(struct cpu_user_regs));
> +        }
> +        else
> +            memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs,
> +                   regs, sizeof(struct cpu_user_regs));
> +
> +        v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id;
> +        v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id;
> +        v->arch.vpmu.xenpmu_data->pcpu_id = smp_processor_id();
> +
> +        v->arch.vpmu.xenpmu_data->pmu_flags |= PMU_CACHED;
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
> +        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> +
> +        send_guest_vcpu_virq(v, VIRQ_XENPMU);
> +
> +        return 1;
> +    }
>  
>      if ( vpmu->arch_vpmu_ops )
>      {
> @@ -225,7 +341,9 @@ void vpmu_load(struct vcpu *v)
>      local_irq_enable();
>  
>      /* Only when PMU is counting, we load PMU context immediately. */
> -    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> +    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> +         (!is_hvm_domain(v->domain) &&
> +          vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
>          return;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
> @@ -463,6 +581,12 @@ long do_xenpmu_op(int op, 
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>          vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
>          ret = 0;
>          break;
> +    case XENPMU_flush:
> +        current->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED;
> +        vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        vpmu_load(current);
> +        ret = 0;
> +        break;
>      }
>  
>      return ret;
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 7341de9..67ce5de 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -28,6 +28,7 @@
>  #define XENPMU_init            4
>  #define XENPMU_finish          5
>  #define XENPMU_lvtpc_set       6
> +#define XENPMU_flush           7 /* Write cached MSR values to HW     */
>  /* ` } */
>  
>  /* Parameters structure for HYPERVISOR_xenpmu_op call */
> @@ -65,6 +66,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
>   */
>  #define XENPMU_FEATURE_INTEL_BTS  1
>  
> +/*
> + * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
> + * the hypervisor
> + */
> +#define PMU_CACHED 1
> +
>  /* Shared between hypervisor and PV domain */
>  struct xen_pmu_data {
>      uint32_t domain_id;


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.