[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2] x86/msr: fix handling of MSR_IA32_PERF_{STATUS/CTL}


  • To: Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
  • Date: Mon, 9 Nov 2020 17:38:19 +0000
  • Authentication-results: esa2.hc3370-68.iphmx.com; dkim=none (message not signed) header.i=none
  • Cc: Roger Pau Monné <roger.pau@xxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>, Wei Liu <wl@xxxxxxx>
  • Delivery-date: Mon, 09 Nov 2020 17:39:03 +0000
  • Ironport-sdr: 4Oz/y88grkHPEUvsMUtbkrMf/0c9qbhY5dNSvopmyQh50I3iPcfbl/lwoLSHd/vXTOvnFBWHpN IBBBbxFLyIDVGTuVVhKSHckX4KKaiOI+Qlrw8v8IVS7cfbXWT/apuIaImIlbTzUh2/AZjDv9i/ mLZdoxqUuke8IQmweMxYYuuVFzl+gVdbSaVjKUX8Oh9HQpnAxwCZsRYp+vQ/DL7h+bplwCfiAy KxkJIxD2bMvfdqwokkFgO1AhofyvelwJM6CL9xi8fqGea4DoV4PZvU5DxvCZ0dTJcR5r/Ak3M3 1+U=
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

From: Roger Pau Monné <roger.pau@xxxxxxxxxx>

Currently a PV hardware domain can also be given control over the CPU
frequency, and such guest is allowed to write to MSR_IA32_PERF_CTL.
However since commit 322ec7c89f6 the default behavior has been changed
to reject accesses to not explicitly handled MSRs, preventing PV
guests that manage CPU frequency from reading
MSR_IA32_PERF_{STATUS/CTL}.

Additionally some HVM guests (Windows at least) will attempt to read
MSR_IA32_PERF_CTL and will panic if given back a #GP fault:

  vmx.c:3035:d8v0 RDMSR 0x00000199 unimplemented
  d8v0 VIRIDIAN CRASH: 3b c0000096 fffff806871c1651 ffffda0253683720 0

Move the handling of MSR_IA32_PERF_{STATUS/CTL} to the common MSR
handling shared between HVM and PV guests, and add an explicit case
for reads to MSR_IA32_PERF_{STATUS/CTL}.

Restore previous behavior and allow PV guests with the required
permissions to read the contents of the mentioned MSRs. Non privileged
guests will get 0 when trying to read those registers, as writes to
MSR_IA32_PERF_CTL by such guest will already be silently dropped.

Fixes: 322ec7c89f6 ('x86/pv: disallow access to unknown MSRs')
Fixes: 84e848fd7a1 ('x86/hvm: disallow access to unknown MSRs')
Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Wei Liu <wl@xxxxxxx>

v2:
 * fix is_cpufreq_controller() to exclude PVH dom0, and collapse to nothing in
   !CONFIG_PV builds
 * Drop the cross-vendor checks.  It isn't possible to configure dom0 as
   cross-vendor, and anyone using is_cpufreq_controller() without an exact
   model match has far bigger problems.
 * At least Centaur implements these MSRs.  Add access.
---
 xen/arch/x86/msr.c             | 34 ++++++++++++++++++++++++++++++++++
 xen/arch/x86/pv/emul-priv-op.c | 14 --------------
 xen/include/xen/sched.h        | 17 +++++++++++++++++
 3 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 9c69ef8792..0a8ae4d22c 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -242,6 +242,25 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t 
*val)
             goto gp_fault;
         break;
 
+        /*
+         * These MSRs are not enumerated in CPUID.  They have been around
+         * since the Pentium 4, and implemented by other vendors.
+         *
+         * Some versions of Windows try reading these before setting up a #GP
+         * handler, and Linux has several unguarded reads as well.  Provide
+         * RAZ semantics, in general, but permit a cpufreq controller dom0 to
+         * have full access.
+         */
+    case MSR_IA32_PERF_STATUS:
+    case MSR_IA32_PERF_CTL:
+        if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
+            goto gp_fault;
+
+        *val = 0;
+        if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 )
+            break;
+        goto gp_fault;
+
     case MSR_IA32_THERM_STATUS:
         if ( cp->x86_vendor != X86_VENDOR_INTEL )
             goto gp_fault;
@@ -448,6 +467,21 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
             goto gp_fault;
         break;
 
+        /*
+         * This MSR are not enumerated in CPUID.  It has been around since the
+         * Pentium 4, and implemented by other vendors.
+         *
+         * To match the RAZ semantics, implement as write-discard, except for
+         * a cpufreq controller dom0 which has full access.
+         */
+    case MSR_IA32_PERF_CTL:
+        if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
+            goto gp_fault;
+
+        if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 )
+            break;
+        goto gp_fault;
+
     case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
         if ( !is_hvm_domain(d) || v != curr )
             goto gp_fault;
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 7cc16d6eda..dbceed8a05 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -849,12 +849,6 @@ static inline uint64_t guest_misc_enable(uint64_t val)
     return val;
 }
 
-static inline bool is_cpufreq_controller(const struct domain *d)
-{
-    return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
-            is_hardware_domain(d));
-}
-
 static uint64_t guest_efer(const struct domain *d)
 {
     uint64_t val;
@@ -1121,14 +1115,6 @@ static int write_msr(unsigned int reg, uint64_t val,
             return X86EMUL_OKAY;
         break;
 
-    case MSR_IA32_PERF_CTL:
-        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
-            break;
-        if ( likely(!is_cpufreq_controller(currd)) ||
-             wrmsr_safe(reg, val) == 0 )
-            return X86EMUL_OKAY;
-        break;
-
     case MSR_IA32_THERM_CONTROL:
     case MSR_IA32_ENERGY_PERF_BIAS:
         if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index d8ed83f869..b4d3e53310 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -1069,6 +1069,23 @@ extern enum cpufreq_controller {
     FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
 } cpufreq_controller;
 
+static always_inline bool is_cpufreq_controller(const struct domain *d)
+{
+    /*
+     * A PV dom0 can be nominated as the cpufreq controller, instead of using
+     * Xen's cpufreq driver, at which point dom0 gets direct access to certain
+     * MSRs.
+     *
+     * This interface only works when dom0 is identity pinned and has the same
+     * number of vCPUs as pCPUs on the system.
+     *
+     * It would be far better to paravirtualise the interface.
+     */
+    return (IS_ENABLED(CONFIG_PV) &&
+            (cpufreq_controller == FREQCTL_dom0_kernel) &&
+            is_pv_domain(d) && is_hardware_domain(d));
+}
+
 int cpupool_move_domain(struct domain *d, struct cpupool *c);
 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
 int cpupool_get_id(const struct domain *d);
-- 
2.11.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.