[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] x86/vmce: fill MSR_IA32_MCG_STATUS on all vcpus in broadcast case



commit 795b988e7c3d271c9472d4b9150b57a3e5b5a50b
Author:     Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
AuthorDate: Wed Mar 8 15:10:29 2017 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Wed Mar 8 15:10:29 2017 +0100

    x86/vmce: fill MSR_IA32_MCG_STATUS on all vcpus in broadcast case
    
    The current implementation only fills MC MSRs on vcpu0 and leaves MC
    MSRs on other vcpus empty in the broadcast case. When guest reads 0
    from MSR_IA32_MCG_STATUS on vcpuN (N > 0), it may think it's not
    possible to recover the execution on that vcpu and then get panic,
    although MSR_IA32_MCG_STATUS filled on vcpu0 may imply the injected
    vMCE is actually recoverable. To avoid such unnecessary guest panic,
    set MSR_IA32_MCG_STATUS on vcpuN (N > 0) to MCG_STATUS_MCIP|MCG_STATUS_RIPV.
    
    In addition, fill_vmsr_data(mc_bank, ...) is changed to return -EINVAL
    rather than 0, if an invalid domain ID is contained in mc_bank.
    
    Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/cpu/mcheck/mcaction.c | 16 ++++-----
 xen/arch/x86/cpu/mcheck/vmce.c     | 74 ++++++++++++++++++++++++++------------
 xen/arch/x86/cpu/mcheck/vmce.h     |  2 +-
 3 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c 
b/xen/arch/x86/cpu/mcheck/mcaction.c
index 32056f2..dab9eac 100644
--- a/xen/arch/x86/cpu/mcheck/mcaction.c
+++ b/xen/arch/x86/cpu/mcheck/mcaction.c
@@ -88,22 +88,22 @@ mc_memerr_dhandler(struct mca_binfo *binfo,
                     goto vmce_failed;
                 }
 
+                if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
+                    global->mc_vcpuid == XEN_MC_VCPUID_INVALID)
+                    vmce_vcpuid = VMCE_INJECT_BROADCAST;
+                else
+                    vmce_vcpuid = global->mc_vcpuid;
+
                 bank->mc_addr = gfn << PAGE_SHIFT |
                   (bank->mc_addr & (PAGE_SIZE -1 ));
-                if ( fill_vmsr_data(bank, d,
-                      global->mc_gstatus) == -1 )
+                if (fill_vmsr_data(bank, d, global->mc_gstatus,
+                                   vmce_vcpuid == VMCE_INJECT_BROADCAST))
                 {
                     mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
                       "failed\n", bank->mc_domid);
                     goto vmce_failed;
                 }
 
-                if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
-                    global->mc_vcpuid == XEN_MC_VCPUID_INVALID)
-                    vmce_vcpuid = VMCE_INJECT_BROADCAST;
-                else
-                    vmce_vcpuid = global->mc_vcpuid;
-
                 /* We will inject vMCE to DOMU*/
                 if ( inject_vmce(d, vmce_vcpuid) < 0 )
                 {
diff --git a/xen/arch/x86/cpu/mcheck/vmce.c b/xen/arch/x86/cpu/mcheck/vmce.c
index 7dde457..01a5826 100644
--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -386,38 +386,66 @@ int inject_vmce(struct domain *d, int vcpu)
     return ret;
 }
 
-int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
-                   uint64_t gstatus)
+static int vcpu_fill_mc_msrs(struct vcpu *v, uint64_t mcg_status,
+                             uint64_t mci_status, uint64_t mci_addr,
+                             uint64_t mci_misc)
 {
-    struct vcpu *v = d->vcpu[0];
-
-    if ( mc_bank->mc_domid != DOMID_INVALID )
+    if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP )
     {
-        if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP )
-        {
-            mce_printk(MCE_QUIET, "MCE: guest has not handled previous"
-                       " vMCE yet!\n");
-            return -1;
-        }
+        mce_printk(MCE_QUIET, "MCE: %pv: guest has not handled previous"
+                   " vMCE yet!\n", v);
+        return -EBUSY;
+    }
 
-        spin_lock(&v->arch.vmce.lock);
+    spin_lock(&v->arch.vmce.lock);
 
-        v->arch.vmce.mcg_status = gstatus;
-        /*
-         * 1. Skip bank 0 to avoid 'bank 0 quirk' of old processors
-         * 2. Filter MCi_STATUS MSCOD model specific error code to guest
-         */
-        v->arch.vmce.bank[1].mci_status = mc_bank->mc_status &
-                                              MCi_STATUS_MSCOD_MASK;
-        v->arch.vmce.bank[1].mci_addr = mc_bank->mc_addr;
-        v->arch.vmce.bank[1].mci_misc = mc_bank->mc_misc;
+    v->arch.vmce.mcg_status = mcg_status;
+    /*
+     * 1. Skip bank 0 to avoid 'bank 0 quirk' of old processors
+     * 2. Filter MCi_STATUS MSCOD model specific error code to guest
+     */
+    v->arch.vmce.bank[1].mci_status = mci_status & MCi_STATUS_MSCOD_MASK;
+    v->arch.vmce.bank[1].mci_addr = mci_addr;
+    v->arch.vmce.bank[1].mci_misc = mci_misc;
 
-        spin_unlock(&v->arch.vmce.lock);
-    }
+    spin_unlock(&v->arch.vmce.lock);
 
     return 0;
 }
 
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+                   uint64_t gstatus, bool broadcast)
+{
+    struct vcpu *v = d->vcpu[0];
+    int ret, err;
+
+    if ( mc_bank->mc_domid == DOMID_INVALID )
+        return -EINVAL;
+
+    /*
+     * vMCE with the actual error information is injected to vCPU0,
+     * and, if broadcast is required, we choose to inject less severe
+     * vMCEs to other vCPUs. Thus guest can always get the severest
+     * error (i.e. the actual one) on vCPU0. If guest can recover from
+     * the severest error on vCPU0, the less severe errors on other
+     * vCPUs will not prevent guest from recovering on those vCPUs.
+     */
+    ret = vcpu_fill_mc_msrs(v, gstatus, mc_bank->mc_status,
+                            mc_bank->mc_addr, mc_bank->mc_misc);
+    if ( broadcast )
+        for_each_vcpu ( d, v )
+        {
+            if ( !v->vcpu_id )
+                continue;
+            err = vcpu_fill_mc_msrs(v, MCG_STATUS_MCIP | MCG_STATUS_RIPV,
+                                    0, 0, 0);
+            if ( err )
+                ret = err;
+        }
+
+    return ret;
+}
+
 /* It's said some ram is setup as mmio_direct for UC cache attribute */
 #define P2M_UNMAP_TYPES (p2m_to_mask(p2m_ram_rw) \
                                 | p2m_to_mask(p2m_ram_logdirty) \
diff --git a/xen/arch/x86/cpu/mcheck/vmce.h b/xen/arch/x86/cpu/mcheck/vmce.h
index 163ce3c..74f6381 100644
--- a/xen/arch/x86/cpu/mcheck/vmce.h
+++ b/xen/arch/x86/cpu/mcheck/vmce.h
@@ -17,7 +17,7 @@ int vmce_amd_rdmsr(const struct vcpu *, uint32_t msr, 
uint64_t *val);
 int vmce_amd_wrmsr(struct vcpu *, uint32_t msr, uint64_t val);
 
 int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
-    uint64_t gstatus);
+                   uint64_t gstatus, bool broadcast);
 
 #define VMCE_INJECT_BROADCAST (-1)
 int inject_vmce(struct domain *d, int vcpu);
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.