Xen project Mailing List

Re: [Xen-devel] [PATCH v9 11/20] x86/VPMU: Interface for setting PMU mode and flags

>>> On 08.08.14 at 18:55, <boris.ostrovsky@xxxxxxxxxx> wrote: > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -1482,7 +1482,7 @@ void context_switch(struct vcpu *prev, struct vcpu > *next) > > if ( is_hvm_vcpu(prev) ) > { > - if (prev != next) > + if ( (prev != next) && (vpmu_mode & XENPMU_MODE_SELF) ) > vpmu_save(prev); > > if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) ) > @@ -1526,7 +1526,7 @@ void context_switch(struct vcpu *prev, struct vcpu > *next) > !is_hardware_domain(next->domain)); > } > > - if (is_hvm_vcpu(next) && (prev != next) ) > + if ( is_hvm_vcpu(next) && (prev != next) && (vpmu_mode & > XENPMU_MODE_SELF) ) > /* Must be done with interrupts enabled */ > vpmu_load(next); Wouldn't such vPMU internals be better hidden in the functions themselves? I realize you can save the calls this way, but if the condition changes again later, we'll again have to adjust this core function rather than just the vPMU code. It's bad enough that the vpmu_mode variable is visible to non-vPMU code. > --- a/xen/arch/x86/hvm/vpmu.c > +++ b/xen/arch/x86/hvm/vpmu.c > @@ -21,6 +21,8 @@ > #include <xen/config.h> > #include <xen/sched.h> > #include <xen/xenoprof.h> > +#include <xen/event.h> > +#include <xen/guest_access.h> > #include <asm/regs.h> > #include <asm/types.h> > #include <asm/msr.h> > @@ -32,13 +34,21 @@ > #include <asm/hvm/svm/vmcb.h> > #include <asm/apic.h> > #include <public/pmu.h> > +#include <xen/tasklet.h> > + > +#include <compat/pmu.h> > +CHECK_pmu_params; > +CHECK_pmu_intel_ctxt; > +CHECK_pmu_amd_ctxt; > +CHECK_pmu_cntr_pair; Such being placed in a HVM-specific file suggests that the series is badly ordered: Anything relevant to PV should normally only be added here _after_ the file got moved out of the hvm/ subtree. Yet since I realize this would be a major re-work, I think you can leave this as is unless you expect potentially just part of the series to go in, with the splitting point being (immediately or later) after this patch (from my looking at it I would suppose that patches 3 and 4 could go in right away - they don't appear to depend on patches 1 and 2 - and patch 1 probably could go in too, but it doesn't make much sense to have it in without the rest of the series). > @@ -274,3 +290,159 @@ void vpmu_dump(struct vcpu *v) > vpmu->arch_vpmu_ops->arch_vpmu_dump(v); > } > > +static atomic_t vpmu_sched_counter; > + > +static void vpmu_sched_checkin(unsigned long unused) > +{ > + atomic_inc(&vpmu_sched_counter); > +} > + > +static int > +vpmu_force_context_switch(XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) > +{ > + unsigned i, j, allbutself_num; > + cpumask_t allbutself; > + static s_time_t start; > + static struct tasklet *sync_task; > + int ret = 0; > + > + allbutself_num = num_online_cpus() - 1; > + > + if ( sync_task ) /* if true, we are in hypercall continuation */ "true"? This is not a boolean, so perhaps "set" would be the better term? > + goto cont_wait; > + > + cpumask_andnot(&allbutself, &cpu_online_map, > + cpumask_of(smp_processor_id())); > + > + sync_task = xmalloc_array(struct tasklet, allbutself_num); > + if ( !sync_task ) > + { > + printk("vpmu_force_context_switch: out of memory\n"); > + return -ENOMEM; > + } > + > + for ( i = 0; i < allbutself_num; i++ ) > + tasklet_init(&sync_task[i], vpmu_sched_checkin, 0); > + > + atomic_set(&vpmu_sched_counter, 0); > + > + j = 0; > + for_each_cpu ( i, &allbutself ) This looks to be the only use for the (on stack) allbutself variable, but you could easily avoid this by using for_each_online_cpu() and skipping the local one. I'd also recommend that you count allbutself_num here rather than up front, since that will much more obviously prove that you wait for exactly as many CPUs as you scheduled. The array allocation above is bogus anyway, as on a huge system this can easily be more than a page in size. > + tasklet_schedule_on_cpu(&sync_task[j++], i); > + > + vpmu_save(current); > + > + start = NOW(); > + > + cont_wait: > + /* > + * Note that we may fail here if a CPU is hot-unplugged while we are > + * waiting. We will then time out. > + */ > + while ( atomic_read(&vpmu_sched_counter) != allbutself_num ) > + { > + /* Give up after 5 seconds */ > + if ( NOW() > start + SECONDS(5) ) > + { > + printk("vpmu_force_context_switch: failed to sync\n"); > + ret = -EBUSY; > + break; > + } > + cpu_relax(); > + if ( hypercall_preempt_check() ) > + return hypercall_create_continuation( > + __HYPERVISOR_xenpmu_op, "ih", XENPMU_mode_set, arg); > + } > + > + for ( i = 0; i < allbutself_num; i++ ) > + tasklet_kill(&sync_task[i]); > + xfree(sync_task); > + sync_task = NULL; > + > + return ret; > +} > + > +long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) > +{ > + int ret = -EINVAL; > + xen_pmu_params_t pmu_params; > + > + switch ( op ) > + { > + case XENPMU_mode_set: > + { > + static DEFINE_SPINLOCK(xenpmu_mode_lock); > + uint32_t current_mode; > + > + if ( !is_control_domain(current->domain) ) > + return -EPERM; > + > + if ( copy_from_guest(&pmu_params, arg, 1) ) > + return -EFAULT; > + > + if ( pmu_params.val & ~XENPMU_MODE_SELF ) > + return -EINVAL; > + > + /* > + * Return error is someone else is in the middle of changing mode --- > + * this is most likely indication of two system administrators > + * working against each other > + */ > + if ( !spin_trylock(&xenpmu_mode_lock) ) > + return -EAGAIN; > + > + current_mode = vpmu_mode; > + vpmu_mode = pmu_params.val; > + > + if ( vpmu_mode == XENPMU_MODE_OFF ) > + { > + /* > + * Make sure all (non-dom0) VCPUs have unloaded their VPMUs. This > + * can be achieved by having all physical processors go through > + * context_switch(). > + */ > + ret = vpmu_force_context_switch(arg); > + if ( ret ) > + vpmu_mode = current_mode; > + } > + else > + ret = 0; > + > + spin_unlock(&xenpmu_mode_lock); > + break; This still isn't safe: There's nothing preventing another vCPU to issue another XENPMU_mode_set operation while the one turning the vPMU off is still in the process of waiting, but having exited the lock protected region in order to allow other processing to occur. I think you simply need another mode "being-turned-off" during which only mode changes to XENPMU_MODE_OFF, and only by the originally requesting vCPU, are permitted (or else your "if true, we are in hypercall continuation" comment above wouldn't always be true either, as that second vCPU might also issue a second turn-off request). > + case XENPMU_mode_get: > + pmu_params.val = vpmu_mode; > + pmu_params.version.maj = XENPMU_VER_MAJ; > + pmu_params.version.min = XENPMU_VER_MIN; > + if ( copy_to_guest(arg, &pmu_params, 1) ) You're leaking hypervisor stack contents here ... > + case XENPMU_feature_get: > + pmu_params.val = vpmu_mode; > + if ( copy_to_guest(arg, &pmu_params, 1) ) ... and here. > --- a/xen/include/Makefile > +++ b/xen/include/Makefile > @@ -26,6 +26,7 @@ headers-y := \ > headers-$(CONFIG_X86) += compat/arch-x86/xen-mca.h > headers-$(CONFIG_X86) += compat/arch-x86/xen.h > headers-$(CONFIG_X86) += compat/arch-x86/xen-$(compat-arch-y).h > +headers-$(CONFIG_X86) += compat/pmu.h compat/arch-x86/pmu.h The first one isn't x86-specific, so doesn't belong here. > --- a/xen/include/public/arch-x86/pmu.h > +++ b/xen/include/public/arch-x86/pmu.h > @@ -9,12 +9,16 @@ struct xen_pmu_amd_ctxt { > uint32_t counters; > uint32_t ctrls; > }; > +typedef struct xen_pmu_amd_ctxt xen_pmu_amd_ctxt_t; > +DEFINE_XEN_GUEST_HANDLE(xen_pmu_amd_ctxt_t); > > /* Intel PMU registers and structures */ > struct xen_pmu_cntr_pair { > uint64_t counter; > uint64_t control; > }; > +typedef struct xen_pmu_cntr_pair xen_pmu_cntr_pair_t; > +DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t); > > struct xen_pmu_intel_ctxt { > uint64_t global_ctrl; > @@ -31,8 +35,10 @@ struct xen_pmu_intel_ctxt { > uint32_t fixed_counters; > uint32_t arch_counters; > }; > +typedef struct xen_pmu_intel_ctxt xen_pmu_intel_ctxt_t; > +DEFINE_XEN_GUEST_HANDLE(xen_pmu_intel_ctxt_t); If you really need these typedefs and handles, why don't you add them right away when introducing this header? > -struct xen_arch_pmu { > +struct xen_pmu_arch { And why can't this be named the final way from the beginning? Jan _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.