[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/7] x86/vmce: fill MSR_IA32_MCG_STATUS on all vcpus in broadcast case



The current implementation only fills MC MSRs on vcpu0 and leaves MC
MSRs on other vcpus empty in the broadcast case. When guest reads 0
from MSR_IA32_MCG_STATUS on vcpuN (N > 0), it may think it's not
possible to recover the execution on that vcpu and then get panic,
although MSR_IA32_MCG_STATUS filled on vcpu0 may imply the injected
vMCE is actually recoverable. To avoid such unnecessary guest panic,
set MSR_IA32_MCG_STATUS on vcpuN (N > 0) to MCG_STATUS_MCIP|MCG_STATUS_RIPV.

In addition, fill_vmsr_data(mc_bank, ...) is changed to return -EINVAL
rather than 0, if an invalid domain ID is contained in mc_bank.

Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
---
Cc: Christoph Egger <chegger@xxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Changes:
 * Fix the return value check of fill_vmsr_data() in mc_memerr_dhandler().
 * Include the behavior change in the commit message.
 * Compare by vcpu id rather than vcpu structure in fill_vmsr_data()
 * Exlpain vMCEs injected to each vCPU in the code comment.
 * Update MC MSRs for *all* vCPUs and return the last error (if any).
 * Remove "goto" in fill_vmsr_data().
---
 xen/arch/x86/cpu/mcheck/mcaction.c | 16 ++++-----
 xen/arch/x86/cpu/mcheck/vmce.c     | 74 ++++++++++++++++++++++++++------------
 xen/arch/x86/cpu/mcheck/vmce.h     |  2 +-
 3 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c 
b/xen/arch/x86/cpu/mcheck/mcaction.c
index 32056f2..dab9eac 100644
--- a/xen/arch/x86/cpu/mcheck/mcaction.c
+++ b/xen/arch/x86/cpu/mcheck/mcaction.c
@@ -88,22 +88,22 @@ mc_memerr_dhandler(struct mca_binfo *binfo,
                     goto vmce_failed;
                 }
 
+                if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
+                    global->mc_vcpuid == XEN_MC_VCPUID_INVALID)
+                    vmce_vcpuid = VMCE_INJECT_BROADCAST;
+                else
+                    vmce_vcpuid = global->mc_vcpuid;
+
                 bank->mc_addr = gfn << PAGE_SHIFT |
                   (bank->mc_addr & (PAGE_SIZE -1 ));
-                if ( fill_vmsr_data(bank, d,
-                      global->mc_gstatus) == -1 )
+                if (fill_vmsr_data(bank, d, global->mc_gstatus,
+                                   vmce_vcpuid == VMCE_INJECT_BROADCAST))
                 {
                     mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
                       "failed\n", bank->mc_domid);
                     goto vmce_failed;
                 }
 
-                if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
-                    global->mc_vcpuid == XEN_MC_VCPUID_INVALID)
-                    vmce_vcpuid = VMCE_INJECT_BROADCAST;
-                else
-                    vmce_vcpuid = global->mc_vcpuid;
-
                 /* We will inject vMCE to DOMU*/
                 if ( inject_vmce(d, vmce_vcpuid) < 0 )
                 {
diff --git a/xen/arch/x86/cpu/mcheck/vmce.c b/xen/arch/x86/cpu/mcheck/vmce.c
index 13b692c..2caccae 100644
--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -381,38 +381,66 @@ int inject_vmce(struct domain *d, int vcpu)
     return ret;
 }
 
-int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
-                   uint64_t gstatus)
+static int vcpu_fill_mc_msrs(struct vcpu *v, uint64_t mcg_status,
+                             uint64_t mci_status, uint64_t mci_addr,
+                             uint64_t mci_misc)
 {
-    struct vcpu *v = d->vcpu[0];
-
-    if ( mc_bank->mc_domid != DOMID_INVALID )
+    if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP )
     {
-        if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP )
-        {
-            mce_printk(MCE_QUIET, "MCE: guest has not handled previous"
-                       " vMCE yet!\n");
-            return -1;
-        }
+        mce_printk(MCE_QUIET, "MCE: %pv: guest has not handled previous"
+                   " vMCE yet!\n", v);
+        return -EBUSY;
+    }
 
-        spin_lock(&v->arch.vmce.lock);
+    spin_lock(&v->arch.vmce.lock);
 
-        v->arch.vmce.mcg_status = gstatus;
-        /*
-         * 1. Skip bank 0 to avoid 'bank 0 quirk' of old processors
-         * 2. Filter MCi_STATUS MSCOD model specific error code to guest
-         */
-        v->arch.vmce.bank[1].mci_status = mc_bank->mc_status &
-                                              MCi_STATUS_MSCOD_MASK;
-        v->arch.vmce.bank[1].mci_addr = mc_bank->mc_addr;
-        v->arch.vmce.bank[1].mci_misc = mc_bank->mc_misc;
+    v->arch.vmce.mcg_status = mcg_status;
+    /*
+     * 1. Skip bank 0 to avoid 'bank 0 quirk' of old processors
+     * 2. Filter MCi_STATUS MSCOD model specific error code to guest
+     */
+    v->arch.vmce.bank[1].mci_status = mci_status & MCi_STATUS_MSCOD_MASK;
+    v->arch.vmce.bank[1].mci_addr = mci_addr;
+    v->arch.vmce.bank[1].mci_misc = mci_misc;
 
-        spin_unlock(&v->arch.vmce.lock);
-    }
+    spin_unlock(&v->arch.vmce.lock);
 
     return 0;
 }
 
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+                   uint64_t gstatus, bool broadcast)
+{
+    struct vcpu *v = d->vcpu[0];
+    int ret, err;
+
+    if ( mc_bank->mc_domid == DOMID_INVALID )
+        return -EINVAL;
+
+    /*
+     * vMCE with the actual error information is injected to vCPU0,
+     * and, if broadcast is required, we choose to inject less severe
+     * vMCEs to other vCPUs. Thus guest can always get the severest
+     * error (i.e. the actual one) on vCPU0. If guest can recover from
+     * the severest error on vCPU0, the less severe errors on other
+     * vCPUs will not prevent guest from recovering on those vCPUs.
+     */
+    ret = vcpu_fill_mc_msrs(v, gstatus, mc_bank->mc_status,
+                            mc_bank->mc_addr, mc_bank->mc_misc);
+    if ( broadcast )
+        for_each_vcpu ( d, v )
+        {
+            if ( !v->vcpu_id )
+                continue;
+            err = vcpu_fill_mc_msrs(v, MCG_STATUS_MCIP | MCG_STATUS_RIPV,
+                                    0, 0, 0);
+            if ( err )
+                ret = err;
+        }
+
+    return ret;
+}
+
 /* It's said some ram is setup as mmio_direct for UC cache attribute */
 #define P2M_UNMAP_TYPES (p2m_to_mask(p2m_ram_rw) \
                                 | p2m_to_mask(p2m_ram_logdirty) \
diff --git a/xen/arch/x86/cpu/mcheck/vmce.h b/xen/arch/x86/cpu/mcheck/vmce.h
index 163ce3c..74f6381 100644
--- a/xen/arch/x86/cpu/mcheck/vmce.h
+++ b/xen/arch/x86/cpu/mcheck/vmce.h
@@ -17,7 +17,7 @@ int vmce_amd_rdmsr(const struct vcpu *, uint32_t msr, 
uint64_t *val);
 int vmce_amd_wrmsr(struct vcpu *, uint32_t msr, uint64_t val);
 
 int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
-    uint64_t gstatus);
+                   uint64_t gstatus, bool broadcast);
 
 #define VMCE_INJECT_BROADCAST (-1)
 int inject_vmce(struct domain *d, int vcpu);
-- 
2.10.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.