[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 4/9] x86/vmx: Support remote access to the MSR lists



At the moment, all modifications of the MSR lists are in current context.
However, future changes may need to put MSR_EFER into the lists from domctl
hypercall context.

Plumb a struct vcpu parameter down through the infrastructure, and use
vmx_vmcs_{enter,exit}() for safe access to the VMCS in vmx_add_msr().  Use
assertions to ensure that access is either in current context, or while the
vcpu is paused.

Note these expectations beside the fields in arch_vmx_struct, and reorder the
fields to avoid unnecessary padding.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Jun Nakajima <jun.nakajima@xxxxxxxxx>
CC: Kevin Tian <kevin.tian@xxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>

To preempt any questions about spinlocks, the use of the MSR lists in the
return-to-guest path causes checklock failures for plain spinlocks (despite it
technically being safe to live here), and the call to alloc_xenheap_page()
makes it impossible to use irqsave/restore variants, due to the nested
acquisition of the heap lock.

v2:
 * Remove paragraph from the commit message.
 * Constify vmx_find_msr().  This in turn requires correcting the prototype
   for vcpu_runnable()
---
 xen/arch/x86/cpu/vpmu_intel.c      | 14 ++++++-------
 xen/arch/x86/hvm/vmx/vmcs.c        | 40 ++++++++++++++++++++++++++++----------
 xen/arch/x86/hvm/vmx/vmx.c         | 22 +++++++++++----------
 xen/include/asm-x86/hvm/vmx/vmcs.h | 34 ++++++++++++++++++++------------
 xen/include/xen/sched.h            |  2 +-
 5 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 207e2e7..c499e69 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -455,12 +455,12 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
     if ( is_hvm_vcpu(v) )
     {
         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-        if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+        if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
             goto out_err;
 
-        if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+        if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
             goto out_err;
-        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0);
     }
 
     core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
@@ -613,7 +613,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
             return -EINVAL;
 
         if ( is_hvm_vcpu(v) )
-            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
                                &core2_vpmu_cxt->global_ctrl);
         else
             rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
@@ -682,7 +682,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
                 return -EINVAL;
 
             if ( is_hvm_vcpu(v) )
-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
                                    &core2_vpmu_cxt->global_ctrl);
             else
                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
@@ -701,7 +701,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
     else
     {
         if ( is_hvm_vcpu(v) )
-            vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+            vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
         else
             wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
     }
@@ -735,7 +735,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t 
*msr_content)
             break;
         case MSR_CORE_PERF_GLOBAL_CTRL:
             if ( is_hvm_vcpu(v) )
-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
             else
                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
             break;
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index d6ebcd6..2541ee0 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1295,13 +1295,15 @@ static struct vmx_msr_entry *locate_msr_entry(
     return start;
 }
 
-struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum vmx_msr_list_type type)
+struct vmx_msr_entry *vmx_find_msr(const struct vcpu *v, uint32_t msr,
+                                   enum vmx_msr_list_type type)
 {
-    struct vcpu *curr = current;
-    struct arch_vmx_struct *vmx = &curr->arch.hvm_vmx;
+    const struct arch_vmx_struct *vmx = &v->arch.hvm_vmx;
     struct vmx_msr_entry *start = NULL, *ent, *end;
     unsigned int total;
 
+    ASSERT(v == current || !vcpu_runnable(v));
+
     switch ( type )
     {
     case VMX_MSR_HOST:
@@ -1327,12 +1329,14 @@ struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum 
vmx_msr_list_type type)
     return ((ent < end) && (ent->index == msr)) ? ent : NULL;
 }
 
-int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type type)
+int vmx_add_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type type)
 {
-    struct vcpu *curr = current;
-    struct arch_vmx_struct *vmx = &curr->arch.hvm_vmx;
+    struct arch_vmx_struct *vmx = &v->arch.hvm_vmx;
     struct vmx_msr_entry **ptr, *start = NULL, *ent, *end;
     unsigned int total;
+    int rc;
+
+    ASSERT(v == current || !vcpu_runnable(v));
 
     switch ( type )
     {
@@ -1351,13 +1355,18 @@ int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type 
type)
         return -EINVAL;
     }
 
+    vmx_vmcs_enter(v);
+
     /* Allocate memory on first use. */
     if ( unlikely(!*ptr) )
     {
         paddr_t addr;
 
         if ( (*ptr = alloc_xenheap_page()) == NULL )
-            return -ENOMEM;
+        {
+            rc = -ENOMEM;
+            goto out;
+        }
 
         addr = virt_to_maddr(*ptr);
 
@@ -1379,10 +1388,16 @@ int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type 
type)
     ent   = locate_msr_entry(start, end, msr);
 
     if ( (ent < end) && (ent->index == msr) )
-        return 0;
+    {
+        rc = 0;
+        goto out;
+    }
 
     if ( total == (PAGE_SIZE / sizeof(*ent)) )
-        return -ENOSPC;
+    {
+        rc = -ENOSPC;
+        goto out;
+    }
 
     memmove(ent + 1, ent, sizeof(*ent) * (end - ent));
 
@@ -1403,7 +1418,12 @@ int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type 
type)
         break;
     }
 
-    return 0;
+    rc = 0;
+
+ out:
+    vmx_vmcs_exit(v);
+
+    return rc;
 }
 
 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index bfabcab..25dd204 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2814,7 +2814,7 @@ static int is_last_branch_msr(u32 ecx)
 
 static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
 {
-    const struct vcpu *curr = current;
+    struct vcpu *curr = current;
 
     HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x", msr);
 
@@ -2893,7 +2893,7 @@ static int vmx_msr_read_intercept(unsigned int msr, 
uint64_t *msr_content)
         if ( passive_domain_do_rdmsr(msr, msr_content) )
             goto done;
 
-        if ( vmx_read_guest_msr(msr, msr_content) == 0 )
+        if ( vmx_read_guest_msr(curr, msr, msr_content) == 0 )
             break;
 
         if ( is_last_branch_msr(msr) )
@@ -3105,7 +3105,7 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
 
             for ( ; (rc == 0) && lbr->count; lbr++ )
                 for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
-                    if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
+                    if ( (rc = vmx_add_guest_msr(v, lbr->base + i)) == 0 )
                     {
                         vmx_clear_msr_intercept(v, lbr->base + i, VMX_MSR_RW);
                         if ( lbr_tsx_fixup_needed )
@@ -3145,7 +3145,7 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
         if ( wrmsr_viridian_regs(msr, msr_content) ) 
             break;
 
-        if ( vmx_write_guest_msr(msr, msr_content) == 0 ||
+        if ( vmx_write_guest_msr(v, msr, msr_content) == 0 ||
              is_last_branch_msr(msr) )
             break;
 
@@ -4160,7 +4160,7 @@ static void lbr_tsx_fixup(void)
     struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
     struct vmx_msr_entry *msr;
 
-    if ( (msr = vmx_find_msr(lbr_from_start, VMX_MSR_GUEST)) != NULL )
+    if ( (msr = vmx_find_msr(curr, lbr_from_start, VMX_MSR_GUEST)) != NULL )
     {
         /*
          * Sign extend into bits 61:62 while preserving bit 63
@@ -4170,20 +4170,22 @@ static void lbr_tsx_fixup(void)
             msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
     }
 
-    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_MSR_GUEST)) != NULL )
+    if ( (msr = vmx_find_msr(curr, lbr_lastint_from, VMX_MSR_GUEST)) != NULL )
         msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
 }
 
-static void sign_extend_msr(u32 msr, int type)
+static void sign_extend_msr(struct vcpu *v, u32 msr, int type)
 {
     struct vmx_msr_entry *entry;
 
-    if ( (entry = vmx_find_msr(msr, type)) != NULL )
+    if ( (entry = vmx_find_msr(v, msr, type)) != NULL )
         entry->data = canonicalise_addr(entry->data);
 }
 
 static void bdw_erratum_bdf14_fixup(void)
 {
+    struct vcpu *curr = current;
+
     /*
      * Occasionally, on certain Broadwell CPUs MSR_IA32_LASTINTTOIP has
      * been observed to have the top three bits corrupted as though the
@@ -4193,8 +4195,8 @@ static void bdw_erratum_bdf14_fixup(void)
      * erratum BDF14. Fix up MSR_IA32_LASTINT{FROM,TO}IP by
      * sign-extending into bits 48:63.
      */
-    sign_extend_msr(MSR_IA32_LASTINTFROMIP, VMX_MSR_GUEST);
-    sign_extend_msr(MSR_IA32_LASTINTTOIP, VMX_MSR_GUEST);
+    sign_extend_msr(curr, MSR_IA32_LASTINTFROMIP, VMX_MSR_GUEST);
+    sign_extend_msr(curr, MSR_IA32_LASTINTTOIP, VMX_MSR_GUEST);
 }
 
 static void lbr_fixup(void)
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 20882d1..62afebe 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -130,10 +130,17 @@ struct arch_vmx_struct {
     uint64_t             sfmask;
 
     struct vmx_msr_bitmap *msr_bitmap;
-    unsigned int         msr_count;
+
+    /*
+     * Most accesses to the MSR host/guest load/save lists are in current
+     * context.  However, the data can be modified by toolstack/migration
+     * actions.  Remote access is only permitted for paused vcpus, and is
+     * protected under the domctl lock.
+     */
     struct vmx_msr_entry *msr_area;
-    unsigned int         host_msr_count;
     struct vmx_msr_entry *host_msr_area;
+    unsigned int         msr_count;
+    unsigned int         host_msr_count;
 
     unsigned long        eoi_exitmap_changed;
     DECLARE_BITMAP(eoi_exit_bitmap, NR_VECTORS);
@@ -537,23 +544,25 @@ enum vmx_msr_list_type {
     VMX_MSR_GUEST,          /* MSRs saved on VMExit, loaded on VMEntry. */
 };
 
-int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type type);
+int vmx_add_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type type);
 
-static inline int vmx_add_host_load_msr(uint32_t msr)
+static inline int vmx_add_guest_msr(struct vcpu *v, uint32_t msr)
 {
-    return vmx_add_msr(msr, VMX_MSR_HOST);
+    return vmx_add_msr(v, msr, VMX_MSR_GUEST);
 }
 
-static inline int vmx_add_guest_msr(uint32_t msr)
+static inline int vmx_add_host_load_msr(struct vcpu *v, uint32_t msr)
 {
-    return vmx_add_msr(msr, VMX_MSR_GUEST);
+    return vmx_add_msr(v, msr, VMX_MSR_HOST);
 }
 
-struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum vmx_msr_list_type type);
+struct vmx_msr_entry *vmx_find_msr(const struct vcpu *v, uint32_t msr,
+                                   enum vmx_msr_list_type type);
 
-static inline int vmx_read_guest_msr(uint32_t msr, uint64_t *val)
+static inline int vmx_read_guest_msr(const struct vcpu *v, uint32_t msr,
+                                     uint64_t *val)
 {
-    const struct vmx_msr_entry *ent = vmx_find_msr(msr, VMX_MSR_GUEST);
+    const struct vmx_msr_entry *ent = vmx_find_msr(v, msr, VMX_MSR_GUEST);
 
     if ( !ent )
         return -ESRCH;
@@ -563,9 +572,10 @@ static inline int vmx_read_guest_msr(uint32_t msr, 
uint64_t *val)
     return 0;
 }
 
-static inline int vmx_write_guest_msr(uint32_t msr, uint64_t val)
+static inline int vmx_write_guest_msr(struct vcpu *v, uint32_t msr,
+                                      uint64_t val)
 {
-    struct vmx_msr_entry *ent = vmx_find_msr(msr, VMX_MSR_GUEST);
+    struct vmx_msr_entry *ent = vmx_find_msr(v, msr, VMX_MSR_GUEST);
 
     if ( !ent )
         return -ESRCH;
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 99d2af2..e79d5a3 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -788,7 +788,7 @@ static inline struct domain *next_domain_in_cpupool(
 #define _VPF_parked          8
 #define VPF_parked           (1UL<<_VPF_parked)
 
-static inline int vcpu_runnable(struct vcpu *v)
+static inline bool vcpu_runnable(const struct vcpu *v)
 {
     return !(v->pause_flags |
              atomic_read(&v->pause_count) |
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.