[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3 of 9] Initiate failover if a packet is not received every 500ms



# HG changeset patch
# User Brendan Cully <brendan@xxxxxxxxx>
# Date 1240355510 25200
# Node ID b51238ea926948383500b94cd227321eb40a82dd
# Parent  f5c0d3208d8ae9183391398d52c9be5969da24ec
Initiate failover if a packet is not received every 500ms.
This breaks checkpoints at lower frequencies, and should be made
configurable.

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -59,6 +59,51 @@
 /* Address size of the guest, in bytes */
 unsigned int guest_width;
 
+/* set when a consistent image is available */
+static int completed = 0;
+
+#define HEARTBEAT_MS 500
+
+# ifndef __MINIOS__
+static ssize_t read_exact_timed(int fd, void* buf, size_t size)
+{
+  size_t offset = 0;
+  ssize_t len;
+  struct timeval tv;
+  fd_set rfds;
+
+  while ( offset < size )
+  {
+    if (completed) {
+      /* expect a heartbeat every HEARBEAT_MS ms maximum */
+      tv.tv_sec = 0;
+      tv.tv_usec = HEARTBEAT_MS * 1000;
+
+      FD_ZERO(&rfds);
+      FD_SET(fd, &rfds);
+      len = select(fd + 1, &rfds, NULL, NULL, &tv);
+      if ( !FD_ISSET(fd, &rfds) ) {
+       fprintf(stderr, "read_exact_timed failed (select returned %zd)\n", len);
+       return -1;
+      }
+    }
+
+    len = read(fd, buf + offset, size - offset);
+    if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
+      continue;
+    if ( len <= 0 )
+      return -1;
+    offset += len;
+  }
+
+  return 0;
+}
+
+#define read_exact read_exact_timed
+
+#else
+#define read_exact_timed read_exact
+#endif
 /*
 ** In the state file (or during transfer), all page-table pages are
 ** converted into a 'canonical' form where references to actual mfns
@@ -413,7 +458,9 @@
   // DPRINTF("reading batch of %d pages\n", count);
 
   if (!count) {
+    /*
     DPRINTF("Last batch read\n");
+    */
     return 0;
   } else if (count == -1) {
     DPRINTF("Entering page verify mode\n");
@@ -704,7 +751,8 @@
 int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
                       unsigned int store_evtchn, unsigned long *store_mfn,
                       unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae)
+                      unsigned int hvm, unsigned int pae,
+                     int (*resume)(void*), void* resumedata)
 {
     DECLARE_DOMCTL;
     int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
@@ -752,7 +800,6 @@
     /* Buffer for holding HVM context */
     uint8_t *hvm_buf = NULL;
 
-    int completed = 0;
     pagebuf_t pagebuf;
     tailbuf_t tailbuf, tmptail;
     void* vcpup;
@@ -946,7 +993,9 @@
         goto out;
     }
 
+    /*
     DPRINTF("Received all pages (%d races)\n", nraces);
+    */
 
     if ( hvm ) 
     {
@@ -1021,28 +1070,40 @@
     /* Non-HVM guests only from here on */
 
     if (!completed) {
+      int flags = 0;
+
       if ( buffer_tail(&tailbuf, io_fd, max_vcpu_id, vcpumap,
                       ext_vcpucontext) < 0 ) {
        ERROR ("error buffering image tail");
        goto out;
       }
+
       completed = 1;
+      /* shift into nonblocking mode for the remainder */
+      if ((flags = fcntl(io_fd, F_GETFL, 0)) < 0)
+       flags = 0;
+      fcntl(io_fd, F_SETFL, flags | O_NONBLOCK);
     }
-    
+
+    /*
     DPRINTF("Buffered checkpoint\n");
-    if (pagebuf_get(&pagebuf, io_fd)) {
-         ERROR("error when buffering batch, finishing\n");
-         goto finish;
+    */
+    if (!resume(resumedata)) {
+      if (pagebuf_get(&pagebuf, io_fd)) {
+       ERROR("error when buffering batch, finishing\n");
+       goto finish;
+      }
+      memset(&tmptail, 0, sizeof(tmptail));
+      if ( buffer_tail(&tmptail, io_fd, max_vcpu_id, vcpumap,
+                      ext_vcpucontext) < 0 ) {
+       ERROR ("error buffering image tail, finishing");
+       goto finish;
+      }
+      tailbuf_free(&tailbuf);
+      memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
+
+      goto loadpages;
     }
-    if ( buffer_tail(&tmptail, io_fd, max_vcpu_id, vcpumap,
-                    ext_vcpucontext) < 0 ) {
-      ERROR ("error buffering image tail, finishing");
-         goto finish;
-    }
-    tailbuf_free(&tailbuf);
-    memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
-
-    goto loadpages;
 
   finish:
 
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -54,12 +54,16 @@
  * @parm store_mfn returned with the mfn of the store page
  * @parm hvm non-zero if this is a HVM restore
  * @parm pae non-zero if this HVM domain has PAE support enabled
+ * @parm resume a function returning 1 to resume or 0 to expect
+ *       another checkpoint
+ * @parm resumedata a void pointer to pass back to the resume function
  * @return 0 on success, -1 on failure
  */
 int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
                       unsigned int store_evtchn, unsigned long *store_mfn,
                       unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae);
+                      unsigned int hvm, unsigned int pae,
+                     int (*resume)(void*), void* resumedata);
 
 /**
  * This function will create a domain for a paravirtualized Linux
diff --git a/tools/python/xen/xend/XendCheckpoint.py 
b/tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -318,7 +318,8 @@
             restore_image.setCpuid()
 
 
-        os.read(fd, 1)           # Wait for source to close connection
+        #os.read(fd, 1)           # Wait for source to close connection
+        # ^^ breaks failover, and I don't know why it's needed.
         
         dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
 
diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c
+++ b/tools/xcutils/xc_restore.c
@@ -11,10 +11,59 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <string.h>
+#include <sys/select.h>
 
 #include <xenctrl.h>
 #include <xenguest.h>
 
+typedef struct {
+  int fd;
+} resume_t;
+
+static int resume(void* resumedata)
+{
+  fd_set rfds;
+  struct timeval tv;
+//  char buf[64];
+  int rc;
+  resume_t* rd = resumedata;
+
+  FD_ZERO(&rfds);
+
+  do {
+    /* expect a heartbeat every 500ms maximum */
+    tv.tv_sec = 0;
+    tv.tv_usec = 500000;
+
+    FD_SET(rd->fd, &rfds);
+    rc = select(rd->fd + 1, &rfds, NULL, NULL, &tv);
+    if (!FD_ISSET(rd->fd, &rfds)) {
+      fprintf(stderr, "resume: heartbeat failed (select returned %d)\n", rc);
+      return -1;
+    }
+#if 0
+    rc = read(rd->fd, buf, 4);
+    if (rc == 4 && !strncmp(buf, "done", 4)) {
+      /*
+      fprintf(stderr, "resume: received 'done'\n");
+      */
+      return 0;
+    }
+    if (rc < 4 || strncmp(buf, "wait", 4)) {
+      if (rc >= 0)
+        buf[rc] = '\0';
+      else
+        buf[0] = '\0';
+      fprintf(stderr, "bad heartbeat response: %d, %s\n", rc, buf);
+      return -1;
+    }
+#endif
+  } while(0);
+
+  return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -22,6 +71,7 @@
     unsigned int hvm, pae, apic;
     int xc_fd, io_fd, ret;
     unsigned long store_mfn, console_mfn;
+    resume_t rdata;
 
     if ( argc != 8 )
         errx(1, "usage: %s iofd domid store_evtchn "
@@ -39,8 +89,11 @@
     pae  = atoi(argv[6]);
     apic = atoi(argv[7]);
 
+    rdata.fd = io_fd;
+
     ret = xc_domain_restore(xc_fd, io_fd, domid, store_evtchn, &store_mfn,
-                            console_evtchn, &console_mfn, hvm, pae);
+                            console_evtchn, &console_mfn, hvm, pae,
+                           resume, &rdata);
 
     if ( ret == 0 )
     {

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.