[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags



Am Montag 16 Februar 2015, 17:26:48 schrieb Boris Ostrovsky:
> Add runtime interface for setting PMU mode and flags. Three main modes are
> provided:
> * XENPMU_MODE_OFF:  PMU is not virtualized
> * XENPMU_MODE_SELF: Guests can access PMU MSRs and receive PMU interrupts.
> * XENPMU_MODE_HV: Same as XENPMU_MODE_SELF for non-proviledged guests, dom0
>   can profile itself and the hypervisor.
> 
> Note that PMU modes are different from what can be provided at Xen's boot line
> with 'vpmu' argument. An 'off' (or '0') value is equivalent to 
> XENPMU_MODE_OFF.
> Any other value, on the other hand, will cause VPMU mode to be set to
> XENPMU_MODE_SELF during boot.
> 
> For feature flags only Intel's BTS is currently supported.
> 
> Mode and flags are set via HYPERVISOR_xenpmu_op hypercall.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Acked-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
> ---
>  tools/flask/policy/policy/modules/xen/xen.te |   3 +
>  xen/arch/x86/domain.c                        |   6 +-
>  xen/arch/x86/hvm/svm/vpmu.c                  |  25 ++-
>  xen/arch/x86/hvm/vmx/vmcs.c                  |   7 +-
>  xen/arch/x86/hvm/vmx/vpmu_core2.c            |  27 ++-
>  xen/arch/x86/hvm/vpmu.c                      | 240 
> +++++++++++++++++++++++++--
>  xen/arch/x86/oprofile/nmi_int.c              |   3 +-
>  xen/arch/x86/x86_64/compat/entry.S           |   4 +
>  xen/arch/x86/x86_64/entry.S                  |   4 +
>  xen/include/asm-x86/hvm/vmx/vmcs.h           |   7 +-
>  xen/include/asm-x86/hvm/vpmu.h               |  33 +++-
>  xen/include/public/pmu.h                     |  45 +++++
>  xen/include/public/xen.h                     |   1 +
>  xen/include/xen/hypercall.h                  |   4 +
>  xen/include/xlat.lst                         |   1 +
>  xen/include/xsm/dummy.h                      |  15 ++
>  xen/include/xsm/xsm.h                        |   6 +
>  xen/xsm/dummy.c                              |   1 +
>  xen/xsm/flask/hooks.c                        |  18 ++
>  xen/xsm/flask/policy/access_vectors          |   2 +
>  20 files changed, 417 insertions(+), 35 deletions(-)
> 
> diff --git a/tools/flask/policy/policy/modules/xen/xen.te 
> b/tools/flask/policy/policy/modules/xen/xen.te
> index c0128aa..870ff81 100644
> --- a/tools/flask/policy/policy/modules/xen/xen.te
> +++ b/tools/flask/policy/policy/modules/xen/xen.te
> @@ -68,6 +68,9 @@ allow dom0_t xen_t:xen2 {
>      resource_op
>      psr_cmt_op
>  };
> +allow dom0_t xen_t:xen2 {
> +    pmu_ctrl
> +};
>  allow dom0_t xen_t:mmu memorymap;
>  
>  # Allow dom0 to use these domctls on itself. For domctls acting on other
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index eb8ac3a..b0e3c3d 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1536,7 +1536,7 @@ void context_switch(struct vcpu *prev, struct vcpu 
> *next)
>      if ( is_hvm_vcpu(prev) )
>      {
>          if (prev != next)
> -            vpmu_save(vcpu_vpmu(prev));
> +            vpmu_switch_from(vcpu_vpmu(prev), vcpu_vpmu(next));
>  
>          if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) )
>              pt_save_timer(prev);
> @@ -1579,9 +1579,9 @@ void context_switch(struct vcpu *prev, struct vcpu 
> *next)
>                             !is_hardware_domain(next->domain));
>      }
>  
> -    if (is_hvm_vcpu(next) && (prev != next) )
> +    if ( is_hvm_vcpu(next) && (prev != next) )
>          /* Must be done with interrupts enabled */
> -        vpmu_load(vcpu_vpmu(next));
> +        vpmu_switch_to(vcpu_vpmu(prev), vcpu_vpmu(next));
>  
>      context_saved(prev);
>  
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 72e2561..2cfdf08 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -253,6 +253,26 @@ static int amd_vpmu_save(struct vpmu_struct *vpmu)
>      return 1;
>  }
>  
> +static void amd_vpmu_unload(struct vpmu_struct *vpmu)
> +{
> +    struct vcpu *v;
> +
> +    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_FROZEN) )
> +    {
> +        unsigned int i;
> +
> +        for ( i = 0; i < num_counters; i++ )
> +            wrmsrl(ctrls[i], 0);
> +        context_save(vpmu);
> +    }
> +
> +    v = vpmu_vcpu(vpmu);
> +    if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
> +        amd_vpmu_unset_msr_bitmap(v);
> +
> +    vpmu_reset(vpmu, VPMU_FROZEN);
> +}
> +
>  static void context_update(unsigned int msr, u64 msr_content)
>  {
>      unsigned int i;
> @@ -471,17 +491,18 @@ struct arch_vpmu_ops amd_vpmu_ops = {
>      .arch_vpmu_destroy = amd_vpmu_destroy,
>      .arch_vpmu_save = amd_vpmu_save,
>      .arch_vpmu_load = amd_vpmu_load,
> +    .arch_vpmu_unload = amd_vpmu_unload,
>      .arch_vpmu_dump = amd_vpmu_dump
>  };
>  
> -int svm_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
> +int svm_vpmu_initialise(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      uint8_t family = current_cpu_data.x86;
>      int ret = 0;
>  
>      /* vpmu enabled? */
> -    if ( !vpmu_flags )
> +    if ( vpmu_mode == XENPMU_MODE_OFF )
>          return 0;
>  
>      switch ( family )
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index d614638..0183222 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1183,11 +1183,10 @@ int vmx_read_guest_msr(u32 msr, u64 *val)
>      return -ESRCH;
>  }
>  
> -int vmx_write_guest_msr(u32 msr, u64 val)
> +int vmx_write_guest_msr_vcpu(struct vcpu *v, u32 msr, u64 val)
>  {
> -    struct vcpu *curr = current;
> -    unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
> -    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
> +    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
> +    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
>  
>      for ( i = 0; i < msr_count; i++ )
>      {
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index ad7c058..4d08d1b 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -320,6 +320,22 @@ static int core2_vpmu_save(struct vpmu_struct *vpmu)
>      return 1;
>  }
>  
> +static void core2_vpmu_unload(struct vpmu_struct *vpmu)
> +{
> +    struct vcpu *v = vpmu_vcpu(vpmu);
> +
> +    if ( !has_hvm_container_vcpu(v) )
> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> +    else
> +        vmx_write_guest_msr_vcpu(v, MSR_CORE_PERF_GLOBAL_CTRL, 0);

For better readability maybe this if clause should be switched to

    if ( has_hvm_container_vcpu(v) )
        vmx_write_guest_msr_vcpu(v, MSR_CORE_PERF_GLOBAL_CTRL, 0);
    else
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);

> +
> +    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> +        __core2_vpmu_save(vpmu);
> +
> +    if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
> +        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
> +}
> +
>  static inline void __core2_vpmu_load(struct vpmu_struct *vpmu)
>  {
>      unsigned int i, pmc_start;
> @@ -708,13 +724,13 @@ static int core2_vpmu_do_interrupt(struct cpu_user_regs 
> *regs)
>      return 1;
>  }
>  
> -static int core2_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
> +static int core2_vpmu_initialise(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      u64 msr_content;
>      static bool_t ds_warned;
>  
> -    if ( !(vpmu_flags & VPMU_BOOT_BTS) )
> +    if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
>          goto func_out;
>      /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
>      while ( boot_cpu_has(X86_FEATURE_DS) )
> @@ -784,6 +800,7 @@ struct arch_vpmu_ops core2_vpmu_ops = {
>      .arch_vpmu_destroy = core2_vpmu_destroy,
>      .arch_vpmu_save = core2_vpmu_save,
>      .arch_vpmu_load = core2_vpmu_load,
> +    .arch_vpmu_unload = core2_vpmu_unload,
>      .arch_vpmu_dump = core2_vpmu_dump
>  };
>  
> @@ -826,7 +843,7 @@ struct arch_vpmu_ops core2_no_vpmu_ops = {
>      .do_cpuid = core2_no_vpmu_do_cpuid,
>  };
>  
> -int vmx_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
> +int vmx_vpmu_initialise(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      uint8_t family = current_cpu_data.x86;
> @@ -834,7 +851,7 @@ int vmx_vpmu_initialise(struct vcpu *v, unsigned int 
> vpmu_flags)
>      int ret = 0;
>  
>      vpmu->arch_vpmu_ops = &core2_no_vpmu_ops;
> -    if ( !vpmu_flags )
> +    if ( vpmu_mode == XENPMU_MODE_OFF )
>          return 0;
>  
>      if ( family == 6 )
> @@ -877,7 +894,7 @@ int vmx_vpmu_initialise(struct vcpu *v, unsigned int 
> vpmu_flags)
>          /* future: */
>          case 0x3d:
>          case 0x4e:
> -            ret = core2_vpmu_initialise(v, vpmu_flags);
> +            ret = core2_vpmu_initialise(v);
>              if ( !ret )
>                  vpmu->arch_vpmu_ops = &core2_vpmu_ops;
>              return ret;
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index 3045e91..40a89d9 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -21,6 +21,8 @@
>  #include <xen/config.h>
>  #include <xen/sched.h>
>  #include <xen/xenoprof.h>
> +#include <xen/event.h>
> +#include <xen/guest_access.h>
>  #include <asm/regs.h>
>  #include <asm/types.h>
>  #include <asm/msr.h>
> @@ -33,8 +35,10 @@
>  #include <asm/hvm/svm/vmcb.h>
>  #include <asm/apic.h>
>  #include <public/pmu.h>
> +#include <xsm/xsm.h>
>  
>  #include <compat/pmu.h>
> +CHECK_pmu_params;
>  CHECK_pmu_intel_ctxt;
>  CHECK_pmu_amd_ctxt;
>  CHECK_pmu_cntr_pair;
> @@ -45,7 +49,9 @@ CHECK_pmu_regs;
>   * "vpmu=off" : vpmu generally disabled
>   * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on.
>   */
> -static unsigned int __read_mostly opt_vpmu_enabled;
> +unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
> +unsigned int __read_mostly vpmu_features = 0;
> +static bool_t __read_mostly vpmu_disabled = 1;

The same readability thing here with vpmu_disabled.
Why not use vpmu_enabled = 0 as the default instead of vpmu_disabled = 1?

>  static void parse_vpmu_param(char *s);
>  custom_param("vpmu", parse_vpmu_param);
>  
> @@ -59,7 +65,7 @@ static void __init parse_vpmu_param(char *s)
>          break;
>      default:
>          if ( !strcmp(s, "bts") )
> -            opt_vpmu_enabled |= VPMU_BOOT_BTS;
> +            vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
>          else if ( *s )
>          {
>              printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
> @@ -67,7 +73,9 @@ static void __init parse_vpmu_param(char *s)
>          }
>          /* fall through */
>      case 1:
> -        opt_vpmu_enabled |= VPMU_BOOT_ENABLED;
> +        /* Default VPMU mode */
> +        vpmu_mode = XENPMU_MODE_SELF;
> +        vpmu_disabled = 0;
>          break;
>      }
>  }
> @@ -76,7 +84,7 @@ void vpmu_lvtpc_update(uint32_t val)
>  {
>      struct vpmu_struct *vpmu;
>  
> -    if ( !opt_vpmu_enabled )
> +    if ( vpmu_mode == XENPMU_MODE_OFF )
>          return;
>  
>      vpmu = vcpu_vpmu(current);
> @@ -89,6 +97,9 @@ int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, 
> uint64_t supported)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(current);
>  
> +    if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> +        return 0;
> +
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
>          return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
>      return 0;
> @@ -98,6 +109,12 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(current);
>  
> +    if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> +    {
> +        *msr_content = 0;
> +        return 0;
> +    }
> +
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
>          return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
>      return 0;
> @@ -249,19 +266,20 @@ void vpmu_initialise(struct vcpu *v)
>      switch ( vendor )
>      {
>      case X86_VENDOR_AMD:
> -        ret = svm_vpmu_initialise(v, opt_vpmu_enabled);
> +        ret = svm_vpmu_initialise(v);
>          break;
>  
>      case X86_VENDOR_INTEL:
> -        ret = vmx_vpmu_initialise(v, opt_vpmu_enabled);
> +        ret = vmx_vpmu_initialise(v);
>          break;
>  
>      default:
> -        if ( opt_vpmu_enabled )
> +        if ( vpmu_mode != XENPMU_MODE_OFF )
>          {
>              printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
>                     "Disabling VPMU\n", vendor);
> -            opt_vpmu_enabled = 0;
> +            vpmu_mode = XENPMU_MODE_OFF;
> +            vpmu_disabled = 1;
>          }
>          return;
>      }
> @@ -307,13 +325,215 @@ void vpmu_dump(struct vcpu *v)
>          vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
>  }
>  
> +void vpmu_unload(struct vpmu_struct *vpmu)
> +{
> +    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_RUNNING) )
> +        return;
> +
> +    if (vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_unload)

Coding style?

Dietmar.

> +        vpmu->arch_vpmu_ops->arch_vpmu_unload(vpmu);
> +
> +    vpmu_reset(vpmu, VPMU_CONTEXT_LOADED | VPMU_RUNNING);
> +}
> +
> +#define VPMU_INVALID_CPU (~0U)
> +static unsigned int vpmu_first_unload_cpu = VPMU_INVALID_CPU;
> +static unsigned int vpmu_next_unload_cpu;
> +
> +static long vpmu_unload_next(void *arg)
> +{
> +    struct vcpu *last;
> +    int ret;
> +    unsigned int thiscpu = smp_processor_id();
> +
> +    if ( thiscpu != vpmu_next_unload_cpu )
> +    {
> +        /* Continuation thread may have been moved due to CPU hot-unplug */
> +        vpmu_mode = (unsigned long)arg;
> +        vpmu_first_unload_cpu = VPMU_INVALID_CPU;
> +        return -EAGAIN;
> +    }
> +
> +    local_irq_disable(); /* so that last_vcpu doesn't change under us. */
> +
> +    last = this_cpu(last_vcpu);
> +    if ( last )
> +    {
> +        vpmu_unload(vcpu_vpmu(last));
> +        this_cpu(last_vcpu) = NULL;
> +    }
> +
> +    local_irq_enable();
> +
> +    vpmu_next_unload_cpu = cpumask_cycle(thiscpu, &cpu_online_map);
> +    if ( vpmu_next_unload_cpu == vpmu_first_unload_cpu )
> +    {
> +        /* We have visited everyone. */
> +        vpmu_first_unload_cpu = VPMU_INVALID_CPU;
> +        return 0;
> +    }
> +
> +    while ( !cpumask_test_cpu(vpmu_first_unload_cpu, &cpu_online_map) )
> +    {
> +        /* First cpu was hot-unplugged */
> +        vpmu_first_unload_cpu = cpumask_cycle(vpmu_first_unload_cpu,
> +                                              &cpu_online_map);
> +        if ( thiscpu == vpmu_first_unload_cpu )
> +        {
> +            vpmu_first_unload_cpu = VPMU_INVALID_CPU;
> +            return 0;
> +        }
> +    }
> +
> +    ret = continue_hypercall_on_cpu(vpmu_next_unload_cpu,
> +                                    vpmu_unload_next, arg);
> +    if ( ret )
> +    {
> +        vpmu_mode = (unsigned long)arg;
> +        vpmu_first_unload_cpu = VPMU_INVALID_CPU;
> +    }
> +
> +    return ret;
> +}
> +
> +static int vpmu_unload_all(unsigned long old_mode)
> +{
> +    int ret = 0;
> +    struct vcpu *last;
> +
> +    vpmu_unload(vcpu_vpmu(current));
> +
> +    local_irq_disable();
> +    last = this_cpu(last_vcpu);
> +    if ( last && (last != current) )
> +    {
> +        vpmu_unload(vcpu_vpmu(last));
> +        this_cpu(last_vcpu) = NULL;
> +    }
> +    local_irq_enable();
> +
> +    if ( cpumask_weight(&cpu_online_map) > 1 )
> +    {
> +        unsigned int thiscpu = smp_processor_id();
> +
> +        vpmu_first_unload_cpu = thiscpu;
> +        vpmu_next_unload_cpu = cpumask_cycle(thiscpu, &cpu_online_map);
> +
> +        ret = continue_hypercall_on_cpu(vpmu_next_unload_cpu, 
> vpmu_unload_next,
> +                                        (void *)old_mode);
> +        if ( ret )
> +            vpmu_first_unload_cpu = VPMU_INVALID_CPU;
> +    }
> +
> +    return ret;
> +}
> +
> +long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) 
> arg)
> +{
> +    int ret;
> +    struct xen_pmu_params pmu_params;
> +
> +    if ( vpmu_disabled )
> +        return -EINVAL;
> +
> +    ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
> +    if ( ret )
> +        return ret;
> +
> +    /* Check major version when parameters are specified */
> +    switch ( op )
> +    {
> +    case XENPMU_mode_set:
> +    case XENPMU_feature_set:
> +        if ( copy_from_guest(&pmu_params, arg, 1) )
> +            return -EFAULT;
> +
> +        if ( pmu_params.version.maj != XENPMU_VER_MAJ )
> +            return -EINVAL;
> +    }
> +
> +    switch ( op )
> +    {
> +    case XENPMU_mode_set:
> +    {
> +        unsigned int old_mode;
> +        static DEFINE_SPINLOCK(xenpmu_mode_lock);
> +
> +        if ( pmu_params.val & ~(XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> +            return -EINVAL;
> +
> +        /* 32-bit dom0 can only sample itself. */
> +        if ( is_pv_32bit_vcpu(current) && (pmu_params.val & XENPMU_MODE_HV) )
> +            return -EINVAL;
> +
> +        /*
> +         * Return error if someone else is in the middle of changing mode ---
> +         * this is most likely indication of two system administrators
> +         * working against each other.
> +         */
> +        if ( !spin_trylock(&xenpmu_mode_lock) )
> +            return -EAGAIN;
> +        if ( vpmu_first_unload_cpu != VPMU_INVALID_CPU )
> +        {
> +            spin_unlock(&xenpmu_mode_lock);
> +            return -EAGAIN;
> +        }
> +
> +        old_mode = vpmu_mode;
> +        vpmu_mode = pmu_params.val;
> +
> +        if ( vpmu_mode == XENPMU_MODE_OFF )
> +        {
> +            /* Make sure all (non-dom0) VCPUs have unloaded their VPMUs. */
> +            ret = vpmu_unload_all(old_mode);
> +            if ( ret )
> +                vpmu_mode = old_mode;
> +        }
> +
> +        spin_unlock(&xenpmu_mode_lock);
> +
> +        break;
> +    }
> +
> +    case XENPMU_mode_get:
> +        memset(&pmu_params, 0, sizeof(pmu_params));
> +        pmu_params.val = vpmu_mode;
> +
> +        pmu_params.version.maj = XENPMU_VER_MAJ;
> +        pmu_params.version.min = XENPMU_VER_MIN;
> +
> +        if ( copy_to_guest(arg, &pmu_params, 1) )
> +            return -EFAULT;
> +        break;
> +
> +    case XENPMU_feature_set:
> +        if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
> +            return -EINVAL;
> +
> +        vpmu_features = pmu_params.val;
> +        break;
> +
> +    case XENPMU_feature_get:
> +        pmu_params.val = vpmu_features;
> +        if ( copy_field_to_guest(arg, &pmu_params, val) )
> +            return -EFAULT;
> +        break;
> +
> +    default:
> +        ret = -EINVAL;
> +    }
> +
> +    return ret;
> +}
> +
>  static int __init vpmu_init(void)
>  {
>      /* NMI watchdog uses LVTPC and HW counter */
> -    if ( opt_watchdog && opt_vpmu_enabled )
> +    if ( opt_watchdog && !vpmu_disabled )
>      {
>          printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU 
> off.\n");
> -        opt_vpmu_enabled = 0;
> +        vpmu_mode = XENPMU_MODE_OFF;
> +        vpmu_disabled = 1;
>      }
>  
>      return 0;
> diff --git a/xen/arch/x86/oprofile/nmi_int.c b/xen/arch/x86/oprofile/nmi_int.c
> index 13534d4..3c3a37c 100644
> --- a/xen/arch/x86/oprofile/nmi_int.c
> +++ b/xen/arch/x86/oprofile/nmi_int.c
> @@ -47,7 +47,8 @@ static int passive_domain_msr_op_checks(unsigned int msr, 
> int *typep, int *index
>       if ( !model->is_arch_pmu_msr(msr, typep, indexp) )
>               return 0;
>  
> -     if ( !vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) )
> +     if ( !vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED |
> +                               VPMU_CONTEXT_ALLOCATED) )
>               if ( ! model->allocated_msr(current) )
>                       return 0;
>       return 1;
> diff --git a/xen/arch/x86/x86_64/compat/entry.S 
> b/xen/arch/x86/x86_64/compat/entry.S
> index 5b0af61..7691a79 100644
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -417,6 +417,8 @@ ENTRY(compat_hypercall_table)
>          .quad do_domctl
>          .quad compat_kexec_op
>          .quad do_tmem_op
> +        .quad do_ni_hypercall           /* reserved for XenClient */
> +        .quad do_xenpmu_op              /* 40 */
>          .rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
>          .quad compat_ni_hypercall
>          .endr
> @@ -466,6 +468,8 @@ ENTRY(compat_hypercall_args_table)
>          .byte 1 /* do_domctl                */
>          .byte 2 /* compat_kexec_op          */
>          .byte 1 /* do_tmem_op               */
> +        .byte 0 /* reserved for XenClient   */
> +        .byte 2 /* do_xenpmu_op             */  /* 40 */
>          .rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
>          .byte 0 /* compat_ni_hypercall      */
>          .endr
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index b3d6e32..aa842ac 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -772,6 +772,8 @@ ENTRY(hypercall_table)
>          .quad do_domctl
>          .quad do_kexec_op
>          .quad do_tmem_op
> +        .quad do_ni_hypercall       /* reserved for XenClient */
> +        .quad do_xenpmu_op          /* 40 */
>          .rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
>          .quad do_ni_hypercall
>          .endr
> @@ -821,6 +823,8 @@ ENTRY(hypercall_args_table)
>          .byte 1 /* do_domctl            */
>          .byte 2 /* do_kexec             */
>          .byte 1 /* do_tmem_op           */
> +        .byte 0 /* reserved for XenClient */
> +        .byte 2 /* do_xenpmu_op         */  /* 40 */
>          .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
>          .byte 0 /* do_ni_hypercall      */
>          .endr
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
> b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index 6fce6aa..f05e2b5 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -463,7 +463,7 @@ extern const unsigned int 
> vmx_introspection_force_enabled_msrs_size;
>  void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type);
>  void vmx_enable_intercept_for_msr(struct vcpu *v, u32 msr, int type);
>  int vmx_read_guest_msr(u32 msr, u64 *val);
> -int vmx_write_guest_msr(u32 msr, u64 val);
> +int vmx_write_guest_msr_vcpu(struct vcpu *v, u32 msr, u64 val);
>  int vmx_add_msr(u32 msr, int type);
>  void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to);
>  void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
> @@ -483,6 +483,11 @@ static inline int vmx_add_host_load_msr(u32 msr)
>      return vmx_add_msr(msr, VMX_HOST_MSR);
>  }
>  
> +static inline int vmx_write_guest_msr(u32 msr, u64 val)
> +{
> +    return vmx_write_guest_msr_vcpu(current, msr, val);
> +}
> +
>  DECLARE_PER_CPU(bool_t, vmxon);
>  
>  #endif /* ASM_X86_HVM_VMX_VMCS_H__ */
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
> index 897d5de..1171b2a 100644
> --- a/xen/include/asm-x86/hvm/vpmu.h
> +++ b/xen/include/asm-x86/hvm/vpmu.h
> @@ -24,13 +24,6 @@
>  
>  #include <public/pmu.h>
>  
> -/*
> - * Flag bits given as a string on the hypervisor boot parameter 'vpmu'.
> - * See arch/x86/hvm/vpmu.c.
> - */
> -#define VPMU_BOOT_ENABLED 0x1    /* vpmu generally enabled. */
> -#define VPMU_BOOT_BTS     0x2    /* Intel BTS feature wanted. */
> -
>  #define vcpu_vpmu(vcpu)   (&(vcpu)->arch.vpmu)
>  #define vpmu_vcpu(vpmu)   container_of((vpmu), struct vcpu, arch.vpmu)
>  
> @@ -65,11 +58,12 @@ struct arch_vpmu_ops {
>      void (*arch_vpmu_destroy)(struct vcpu *v);
>      int (*arch_vpmu_save)(struct vpmu_struct *vpmu);
>      void (*arch_vpmu_load)(struct vpmu_struct *vpmu);
> +    void (*arch_vpmu_unload)(struct vpmu_struct *vpmu);
>      void (*arch_vpmu_dump)(const struct vcpu *);
>  };
>  
> -int vmx_vpmu_initialise(struct vcpu *, unsigned int flags);
> -int svm_vpmu_initialise(struct vcpu *, unsigned int flags);
> +int vmx_vpmu_initialise(struct vcpu *);
> +int svm_vpmu_initialise(struct vcpu *);
>  
>  /* VPMU states */
>  #define VPMU_CONTEXT_ALLOCATED              0x1
> @@ -111,10 +105,31 @@ void vpmu_initialise(struct vcpu *v);
>  void vpmu_destroy(struct vcpu *v);
>  void vpmu_save(struct vpmu_struct *vpmu);
>  void vpmu_load(struct vpmu_struct *vpmu);
> +void vpmu_unload(struct vpmu_struct *vpmu);
>  void vpmu_dump(struct vcpu *v);
>  
>  extern int acquire_pmu_ownership(int pmu_ownership);
>  extern void release_pmu_ownership(int pmu_ownership);
>  
> +extern unsigned int vpmu_mode;
> +extern unsigned int vpmu_features;
> +
> +/* Context switch */
> +static inline void vpmu_switch_from(struct vpmu_struct *prev,
> +                                    struct vpmu_struct *next)
> +{
> +    if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> +        vpmu_save(prev);
> +}
> +
> +static inline void vpmu_switch_to(struct vpmu_struct *prev,
> +                                  struct vpmu_struct *next)
> +{
> +    if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> +        vpmu_load(next);
> +    else if ( vpmu_is_set(next, VPMU_CONTEXT_LOADED | VPMU_RUNNING) )
> +        vpmu_unload(next);
> +}
> +
>  #endif /* __ASM_X86_HVM_VPMU_H_*/
>  
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index f97106d..66cc494 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -13,6 +13,51 @@
>  #define XENPMU_VER_MAJ    0
>  #define XENPMU_VER_MIN    1
>  
> +/*
> + * ` enum neg_errnoval
> + * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args);
> + *
> + * @cmd  == XENPMU_* (PMU operation)
> + * @args == struct xenpmu_params
> + */
> +/* ` enum xenpmu_op { */
> +#define XENPMU_mode_get        0 /* Also used for getting PMU version */
> +#define XENPMU_mode_set        1
> +#define XENPMU_feature_get     2
> +#define XENPMU_feature_set     3
> +/* ` } */
> +
> +/* Parameters structure for HYPERVISOR_xenpmu_op call */
> +struct xen_pmu_params {
> +    /* IN/OUT parameters */
> +    struct {
> +        uint32_t maj;
> +        uint32_t min;
> +    } version;
> +    uint64_t val;
> +
> +    /* IN parameters */
> +    uint32_t vcpu;
> +    uint32_t pad;
> +};
> +typedef struct xen_pmu_params xen_pmu_params_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
> +
> +/* PMU modes:
> + * - XENPMU_MODE_OFF:   No PMU virtualization
> + * - XENPMU_MODE_SELF:  Guests can profile themselves
> + * - XENPMU_MODE_HV:    Guests can profile themselves, dom0 profiles
> + *                      itself and Xen
> + */
> +#define XENPMU_MODE_OFF           0
> +#define XENPMU_MODE_SELF          (1<<0)
> +#define XENPMU_MODE_HV            (1<<1)
> +
> +/*
> + * PMU features:
> + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
> + */
> +#define XENPMU_FEATURE_INTEL_BTS  1
>  
>  /* Shared between hypervisor and PV domain */
>  struct xen_pmu_data {
> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
> index 3703c39..0dd3c97 100644
> --- a/xen/include/public/xen.h
> +++ b/xen/include/public/xen.h
> @@ -101,6 +101,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
>  #define __HYPERVISOR_kexec_op             37
>  #define __HYPERVISOR_tmem_op              38
>  #define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
> +#define __HYPERVISOR_xenpmu_op            40
>  
>  /* Architecture-specific hypercall definitions. */
>  #define __HYPERVISOR_arch_0               48
> diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h
> index eda8a36..ef665db 100644
> --- a/xen/include/xen/hypercall.h
> +++ b/xen/include/xen/hypercall.h
> @@ -14,6 +14,7 @@
>  #include <public/event_channel.h>
>  #include <public/tmem.h>
>  #include <public/version.h>
> +#include <public/pmu.h>
>  #include <asm/hypercall.h>
>  #include <xsm/xsm.h>
>  
> @@ -144,6 +145,9 @@ do_tmem_op(
>  extern long
>  do_xenoprof_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg);
>  
> +extern long
> +do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg);
> +
>  #ifdef CONFIG_COMPAT
>  
>  extern int
> diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
> index 4c9ef97..0f972eb 100644
> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -109,6 +109,7 @@
>  ?    pmu_cntr_pair                   arch-x86/pmu.h
>  ?    pmu_intel_ctxt                  arch-x86/pmu.h
>  ?    pmu_regs                        arch-x86/pmu.h
> +?    pmu_params                      pmu.h
>  ?    flask_access                    xsm/flask_op.h
>  !    flask_boolean                   xsm/flask_op.h
>  ?    flask_cache_stats               xsm/flask_op.h
> diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
> index f20e89c..c637454 100644
> --- a/xen/include/xsm/dummy.h
> +++ b/xen/include/xsm/dummy.h
> @@ -655,4 +655,19 @@ static XSM_INLINE int xsm_ioport_mapping(XSM_DEFAULT_ARG 
> struct domain *d, uint3
>      return xsm_default_action(action, current->domain, d);
>  }
>  
> +static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct domain *d, int op)
> +{
> +    XSM_ASSERT_ACTION(XSM_OTHER);
> +    switch ( op )
> +    {
> +    case XENPMU_mode_set:
> +    case XENPMU_mode_get:
> +    case XENPMU_feature_set:
> +    case XENPMU_feature_get:
> +        return xsm_default_action(XSM_PRIV, d, current->domain);
> +    default:
> +        return -EPERM;
> +    }
> +}
> +
>  #endif /* CONFIG_X86 */
> diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
> index 4ce089f..90edbb1 100644
> --- a/xen/include/xsm/xsm.h
> +++ b/xen/include/xsm/xsm.h
> @@ -173,6 +173,7 @@ struct xsm_operations {
>      int (*unbind_pt_irq) (struct domain *d, struct xen_domctl_bind_pt_irq 
> *bind);
>      int (*ioport_permission) (struct domain *d, uint32_t s, uint32_t e, 
> uint8_t allow);
>      int (*ioport_mapping) (struct domain *d, uint32_t s, uint32_t e, uint8_t 
> allow);
> +    int (*pmu_op) (struct domain *d, unsigned int op);
>  #endif
>  };
>  
> @@ -665,6 +666,11 @@ static inline int xsm_ioport_mapping (xsm_default_t def, 
> struct domain *d, uint3
>      return xsm_ops->ioport_mapping(d, s, e, allow);
>  }
>  
> +static inline int xsm_pmu_op (xsm_default_t def, struct domain *d, int op)
> +{
> +    return xsm_ops->pmu_op(d, op);
> +}
> +
>  #endif /* CONFIG_X86 */
>  
>  #endif /* XSM_NO_WRAPPERS */
> diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
> index 8eb3050..94f1cf0 100644
> --- a/xen/xsm/dummy.c
> +++ b/xen/xsm/dummy.c
> @@ -144,5 +144,6 @@ void xsm_fixup_ops (struct xsm_operations *ops)
>      set_to_dummy_if_null(ops, unbind_pt_irq);
>      set_to_dummy_if_null(ops, ioport_permission);
>      set_to_dummy_if_null(ops, ioport_mapping);
> +    set_to_dummy_if_null(ops, pmu_op);
>  #endif
>  }
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index c6431b5..982e879 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -1505,6 +1505,23 @@ static int flask_unbind_pt_irq (struct domain *d, 
> struct xen_domctl_bind_pt_irq
>  {
>      return current_has_perm(d, SECCLASS_RESOURCE, RESOURCE__REMOVE);
>  }
> +
> +static int flask_pmu_op (struct domain *d, unsigned int op)
> +{
> +    u32 dsid = domain_sid(d);
> +
> +    switch ( op )
> +    {
> +    case XENPMU_mode_set:
> +    case XENPMU_mode_get:
> +    case XENPMU_feature_set:
> +    case XENPMU_feature_get:
> +        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
> +                            XEN2__PMU_CTRL, NULL);
> +    default:
> +        return -EPERM;
> +    }
> +}
>  #endif /* CONFIG_X86 */
>  
>  long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
> @@ -1627,6 +1644,7 @@ static struct xsm_operations flask_ops = {
>      .unbind_pt_irq = flask_unbind_pt_irq,
>      .ioport_permission = flask_ioport_permission,
>      .ioport_mapping = flask_ioport_mapping,
> +    .pmu_op = flask_pmu_op,
>  #endif
>  };
>  
> diff --git a/xen/xsm/flask/policy/access_vectors 
> b/xen/xsm/flask/policy/access_vectors
> index 3a97577..626850d 100644
> --- a/xen/xsm/flask/policy/access_vectors
> +++ b/xen/xsm/flask/policy/access_vectors
> @@ -86,6 +86,8 @@ class xen2
>      psr_cmt_op
>  # XENPF_get_symbol
>      get_symbol
> +# PMU control
> +    pmu_ctrl
>  }
>  
>  # Classes domain and domain2 consist of operations that a domain performs on
> 

-- 
Company details: http://ts.fujitsu.com/imprint.html

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.