[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v10 15/20] x86/VPMU: Handle PMU interrupts for PV guests



Add support for handling PMU interrupts for PV guests.

VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
hypercall. This allows the guest to access PMU MSR values that are stored in
VPMU context which is shared between hypervisor and domain, thus avoiding
traps to hypervisor.

Since the the interrupt handler may now force VPMU context save (i.e. set
VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
until now expected this flag to be set only when the counters are stopped.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx>
Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
Tested-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
---
 xen/arch/x86/hvm/svm/vpmu.c |  11 +--
 xen/arch/x86/hvm/vpmu.c     | 163 ++++++++++++++++++++++++++++++++++++++++----
 xen/include/public/pmu.h    |   7 ++
 3 files changed, 159 insertions(+), 22 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 63c099c..055b21c 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -229,17 +229,12 @@ static int amd_vpmu_save(struct vcpu *v)
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     unsigned int i;
 
-    /*
-     * Stop the counters. If we came here via vpmu_save_force (i.e.
-     * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
-     */
+    for ( i = 0; i < num_counters; i++ )
+        wrmsrl(ctrls[i], 0);
+
     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
     {
         vpmu_set(vpmu, VPMU_FROZEN);
-
-        for ( i = 0; i < num_counters; i++ )
-            wrmsrl(ctrls[i], 0);
-
         return 0;
     }
 
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 8316f14..edc81be 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -78,44 +78,167 @@ static void __init parse_vpmu_param(char *s)
 
 void vpmu_lvtpc_update(uint32_t val)
 {
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+    struct vcpu *curr = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
 
     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
-    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+
+    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
+    if ( is_hvm_domain(curr->domain) ||
+         !(vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
 }
 
 int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
 {
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+    struct vcpu *curr = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
 
     if ( !(vpmu_mode & XENPMU_MODE_SELF) )
         return 0;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
-        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
+    {
+        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
+
+        /*
+         * We may have received a PMU interrupt during WRMSR handling
+         * and since do_wrmsr may load VPMU context we should save
+         * (and unload) it again.
+         */
+        if ( !is_hvm_domain(curr->domain) &&
+             vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
+        {
+            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+            vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
+            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        }
+        return ret;
+    }
     return 0;
 }
 
 int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
 {
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+    struct vcpu *curr = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
 
     if ( !(vpmu_mode & XENPMU_MODE_SELF) )
         return 0;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
-        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+    {
+        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+
+        if ( !is_hvm_domain(curr->domain) &&
+             vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
+        {
+            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+            vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
+            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        }
+        return ret;
+    }
     return 0;
 }
 
+static struct vcpu *choose_hwdom_vcpu(void)
+{
+    struct vcpu *v;
+    unsigned idx = smp_processor_id() % hardware_domain->max_vcpus;
+
+    if ( hardware_domain->vcpu == NULL )
+        return NULL;
+
+    v = hardware_domain->vcpu[idx];
+
+    /*
+     * If index is not populated search downwards the vcpu array until
+     * a valid vcpu can be found
+     */
+    while ( !v && idx-- )
+        v = hardware_domain->vcpu[idx];
+
+    return v;
+}
+
 int vpmu_do_interrupt(struct cpu_user_regs *regs)
 {
-    struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct vcpu *sampled = current, *sampling;
+    struct vpmu_struct *vpmu;
+
+    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
+    if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
+    {
+        sampling = choose_hwdom_vcpu();
+        if ( !sampling )
+            return 0;
+    }
+    else
+        sampling = sampled;
+
+    vpmu = vcpu_vpmu(sampling);
+    if ( !is_hvm_domain(sampling->domain) )
+    {
+        /* PV(H) guest or dom0 is doing system profiling */
+        const struct cpu_user_regs *gregs;
+
+        if ( !vpmu->xenpmu_data )
+            return 0;
+
+        if ( vpmu->xenpmu_data->pmu_flags & PMU_CACHED )
+            return 1;
+
+        if ( is_pvh_domain(sampled->domain) &&
+             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+            return 0;
+
+        /* PV guest will be reading PMU MSRs from xenpmu_data */
+        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling);
+        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+
+        /* Store appropriate registers in xenpmu_data */
+        if ( is_pv_32bit_domain(sampled->domain) )
+        {
+            /*
+             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+             * and therefore we treat it the same way as a non-priviledged
+             * PV 32-bit domain.
+             */
+            struct compat_cpu_user_regs *cmp;
+
+            gregs = guest_cpu_user_regs();
+
+            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
+            XLAT_cpu_user_regs(cmp, gregs);
+        }
+        else if ( !is_hardware_domain(sampled->domain) &&
+                  !is_idle_vcpu(sampled) )
+        {
+            /* PV(H) guest */
+            gregs = guest_cpu_user_regs();
+            vpmu->xenpmu_data->pmu.r.regs = *gregs;
+        }
+        else
+            vpmu->xenpmu_data->pmu.r.regs = *regs;
+
+        vpmu->xenpmu_data->domain_id = sampled->domain->domain_id;
+        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
+        vpmu->xenpmu_data->pcpu_id = smp_processor_id();
+
+        vpmu->xenpmu_data->pmu_flags |= PMU_CACHED;
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
+        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
+
+        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+
+        return 1;
+    }
 
     if ( vpmu->arch_vpmu_ops )
     {
-        struct vlapic *vlapic = vcpu_vlapic(v);
+        struct vlapic *vlapic = vcpu_vlapic(sampling);
         u32 vlapic_lvtpc;
         unsigned char int_vec;
 
@@ -129,9 +252,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
         int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
 
         if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
-            vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
+            vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0);
         else
-            v->nmi_pending = 1;
+            sampling->nmi_pending = 1;
         return 1;
     }
 
@@ -230,7 +353,9 @@ void vpmu_load(struct vcpu *v)
     local_irq_enable();
 
     /* Only when PMU is counting, we load PMU context immediately. */
-    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+         (!is_hvm_domain(v->domain) &&
+          vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
         return;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
@@ -458,6 +583,7 @@ 
vpmu_force_context_switch(XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
 long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
 {
     int ret = -EINVAL;
+    struct vcpu *curr;
     xen_pmu_params_t pmu_params;
 
     switch ( op )
@@ -551,9 +677,18 @@ long do_xenpmu_op(int op, 
XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
         break;
 
     case XENPMU_lvtpc_set:
-        if ( current->arch.vpmu.xenpmu_data == NULL )
+        curr = current;
+        if ( curr->arch.vpmu.xenpmu_data == NULL )
             return -EINVAL;
-        vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
+        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
+        ret = 0;
+        break;
+
+    case XENPMU_flush:
+        curr = current;
+        curr->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED;
+        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
+        vpmu_load(curr);
         ret = 0;
         break;
     }
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
index 92f3683..44bf43f 100644
--- a/xen/include/public/pmu.h
+++ b/xen/include/public/pmu.h
@@ -28,6 +28,7 @@
 #define XENPMU_init            4
 #define XENPMU_finish          5
 #define XENPMU_lvtpc_set       6
+#define XENPMU_flush           7 /* Write cached MSR values to HW     */
 /* ` } */
 
 /* Parameters structure for HYPERVISOR_xenpmu_op call */
@@ -59,6 +60,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
  */
 #define XENPMU_FEATURE_INTEL_BTS  1
 
+/*
+ * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
+ * the hypervisor
+ */
+#define PMU_CACHED 1
+
 /* Shared between hypervisor and PV domain */
 struct xen_pmu_data {
     uint32_t domain_id;
-- 
1.8.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.