[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 5/5] Xen/MCE: handle broken page occurs during migration



Xen/MCE: handle broken page occurs during migration

This patch handles broken page which occurs during migration.

It monitors the critical area of live migration (from vMCE point of view,
the copypages stage of migration is the critical area while other areas are 
not).

If a vMCE occur at the critical area of live migration, it marks the broken page
to dirty map, so that at copypages stage of migration, its pfn_type
and pfn number would transfer to target and then take appropriate action.

At target, it would set p2m as p2m_ram_broken for broken page, so that if
guest access the broken page again, it would kill itself as expected.

Suggested-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>

diff -r 3313ee9f6142 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Thu Oct 25 05:49:11 2012 +0800
+++ b/tools/libxc/xc_domain.c   Tue Oct 30 06:07:05 2012 +0800
@@ -299,6 +299,24 @@
     return ret ? -1 : 0;
 }
 
+/* start/end vmce monitor */
+int xc_domain_vmce_monitor(xc_interface *xch,
+                           uint32_t domid,
+                           uint32_t start)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    if ( start )
+        domctl.cmd = XEN_DOMCTL_vmce_monitor_start;
+    else
+        domctl.cmd = XEN_DOMCTL_vmce_monitor_end;
+    domctl.domain = (domid_t)domid;
+    ret = do_domctl(xch, &domctl);
+
+    return ret ? -1 : 0;
+}
+
 /* get info from hvm guest for save */
 int xc_domain_hvm_getcontext(xc_interface *xch,
                              uint32_t domid,
diff -r 3313ee9f6142 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Thu Oct 25 05:49:11 2012 +0800
+++ b/tools/libxc/xc_domain_save.c      Tue Oct 30 06:07:05 2012 +0800
@@ -1109,6 +1109,13 @@
         goto out;
     }
 
+    /* Start vmce monitor */
+    if ( xc_domain_vmce_monitor(xch, dom, 1) )
+    {
+        PERROR("Error starting vmce monitor");
+        goto out;
+    }
+
   copypages:
 #define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), 
(len))
 #define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, 
(fd), (buf), (len))
@@ -1582,6 +1589,13 @@
 
     DPRINTF("All memory is saved\n");
 
+    /* End vmce monitor */
+    if ( xc_domain_vmce_monitor(xch, dom, 0) )
+    {
+        PERROR("Error ending vmce monitor");
+        goto out;
+    }
+
     /* After last_iter, buffer the rest of pagebuf & tailbuf data into a
      * separate output buffer and flush it after the compressed page chunks.
      */
diff -r 3313ee9f6142 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Oct 25 05:49:11 2012 +0800
+++ b/tools/libxc/xenctrl.h     Tue Oct 30 06:07:05 2012 +0800
@@ -586,6 +586,17 @@
                            unsigned long pfn);
 
 /**
+ * This function start/end monitor vmce event.
+ * @parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id monitored
+ * @parm flag to start/end monitor
+ * @return <0 on failure, 0 on success
+ */
+int xc_domain_vmce_monitor(xc_interface *xch,
+                           uint32_t domid,
+                           uint32_t start);
+
+/**
  * This function returns information about the context of a hvm domain
  * @parm xch a handle to an open hypervisor interface
  * @parm domid the domain to get information from
diff -r 3313ee9f6142 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Thu Oct 25 05:49:11 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Tue Oct 30 06:07:05 2012 +0800
@@ -342,6 +342,22 @@
                     goto vmce_failed;
                 }
 
+                if ( unlikely(d->arch.vmce_monitor) )
+                {
+                    /*
+                     * vMCE occur during migration
+                     *
+                     * mark broken page to dirty bitmap, so that at copypages
+                     * stage of migration, its pfn_type and pfn number would
+                     * transfer to target and then take appropriate action
+                     *
+                     * At target, it would set p2m as p2m_ram_broken for broken
+                     * page, so that if guest access the broken page again, it
+                     * would kill itself as expected.
+                     */
+                    paging_mark_dirty(d, mfn);
+                }
+
                 if ( unmmap_broken_page(d, _mfn(mfn), gfn) )
                 {
                     printk("Unmap broken memory %lx for DOM%d failed\n",
diff -r 3313ee9f6142 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Thu Oct 25 05:49:11 2012 +0800
+++ b/xen/arch/x86/domctl.c     Tue Oct 30 06:07:05 2012 +0800
@@ -1599,6 +1599,44 @@
     }
     break;
 
+    case XEN_DOMCTL_vmce_monitor_start:
+    {
+        struct domain *d;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            if ( d->arch.vmce_monitor )
+                ret = -EBUSY;
+            else
+                d->arch.vmce_monitor = 1;
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
+    case XEN_DOMCTL_vmce_monitor_end:
+    {
+        struct domain *d;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL)
+        {
+            if ( !d->arch.vmce_monitor )
+                ret = -EINVAL;
+            else
+                d->arch.vmce_monitor = 0;
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, u_domctl);
         break;
diff -r 3313ee9f6142 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu Oct 25 05:49:11 2012 +0800
+++ b/xen/include/asm-x86/domain.h      Tue Oct 30 06:07:05 2012 +0800
@@ -279,6 +279,10 @@
     bool_t has_32bit_shinfo;
     /* Domain cannot handle spurious page faults? */
     bool_t suppress_spurious_page_faults;
+    /* Monitoring guest memory copy of migration
+     * = 0 - not monitoring
+     * = 1 - monitoring */
+    bool_t vmce_monitor;
 
     /* Continuable domain_relinquish_resources(). */
     enum {
diff -r 3313ee9f6142 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Thu Oct 25 05:49:11 2012 +0800
+++ b/xen/include/public/domctl.h       Tue Oct 30 06:07:05 2012 +0800
@@ -908,6 +908,8 @@
 #define XEN_DOMCTL_audit_p2m                     65
 #define XEN_DOMCTL_set_virq_handler              66
 #define XEN_DOMCTL_set_broken_page_p2m           67
+#define XEN_DOMCTL_vmce_monitor_start            68
+#define XEN_DOMCTL_vmce_monitor_end              69
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002

Attachment: 5_vmce_during_migration.patch
Description: 5_vmce_during_migration.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.