[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 08/19] libxc: suspend: Fix suspend event channel locking



On Tue, Mar 4, 2014 at 6:56 AM, Ian Jackson <ian.jackson@xxxxxxxxxxxxx> wrote:
Use fcntl F_SETLK, rather than writing our pid into a "lock" file.
That way if we crash we don't leave the lockfile lying about.  Callers
now need to keep the fd for our lockfile.  (We don't use flock because
we don't want anyone who inherits this fd across fork to end up with a
handle onto the lock.)

While we are here:
 * Move the lockfile to /var/run/xen
 * De-duplicate the calculation of the pathname
 * Compute the buffer size for the pathname so that it will definitely
   not overrun (and use the computed value everywhere)
 * Fix various error handling bugs

Signed-off-by: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
CC: Ian Campbell <ian.campbell@xxxxxxxxxx>
CC: Shriram Rajagopalan <rshriram@xxxxxxxxx>
---
 tools/libxc/xc_suspend.c                           |  159 +++++++++++++-------
 tools/libxc/xenguest.h                             |   16 +-
 tools/libxl/libxl_dom.c                            |    6 +-
 tools/libxl/libxl_internal.h                       |    1 +
 tools/misc/xen-hptool.c                            |   19 ++-
 tools/python/xen/lowlevel/checkpoint/checkpoint.h  |    2 +-
 .../python/xen/lowlevel/checkpoint/libcheckpoint.c |    7 +-
 tools/xcutils/xc_save.c                            |    8 +-
 8 files changed, 149 insertions(+), 69 deletions(-)

diff --git a/tools/libxc/xc_suspend.c b/tools/libxc/xc_suspend.c
index 7968a44..f0f70c1 100644
--- a/tools/libxc/xc_suspend.c
+++ b/tools/libxc/xc_suspend.c
@@ -15,66 +15,115 @@
  */

 #include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>

 #include "xc_private.h"
 #include "xenguest.h"

-#define SUSPEND_LOCK_FILE "/var/lib/xen/suspend_evtchn"
-static int lock_suspend_event(xc_interface *xch, int domid)
+#define SUSPEND_LOCK_FILE "/var/run/xen/suspend-evtchn-%d.lock"
+
+/*
+ * locking
+ */
+
+#define ERR(x) do{                                                      \
+    ERROR("Can't " #x " lock file for suspend event channel %s: %s\n",  \
+          suspend_file, strerror(errno));                               \
+    goto err;                                                           \
+}while(0)
+
+#define SUSPEND_FILE_BUFLEN (sizeof(SUSPEND_LOCK_FILE) + 10)
+
+static void get_suspend_file(char buf[SUSPEND_FILE_BUFLEN], int domid)
 {
-    int fd, rc;
-    mode_t mask;
-    char buf[128];
-    char suspend_file[256];
-
-    snprintf(suspend_file, sizeof(suspend_file), "%s_%d_lock.d",
-           SUSPEND_LOCK_FILE, domid);
-    mask = umask(022);
-    fd = open(suspend_file, O_CREAT | O_EXCL | O_RDWR, 0666);
-    if (fd < 0)
-    {
-        ERROR("Can't create lock file for suspend event channel %s\n",
-               suspend_file);
-        return -EINVAL;
+    snprintf(buf, sizeof(buf), SUSPEND_LOCK_FILE, domid);
+}
+
+static int lock_suspend_event(xc_interface *xch, int domid, int *lockfd)
+{
+    int fd = -1, r;
+    char suspend_file[SUSPEND_FILE_BUFLEN];
+    struct stat ours, theirs;
+    struct flock fl;
+
+    get_suspend_file(suspend_file, domid);
+
+    *lockfd = -1;
+
+    for (;;) {
+        if (fd >= 0)
+            close (fd);
+
+        fd = open(suspend_file, O_CREAT | O_RDWR, 0600);
+        if (fd < 0)
+            ERR("create");
+
+        r = fcntl(fd, F_SETFD, FD_CLOEXEC);
+        if (r)
+            ERR("fcntl F_SETFD FD_CLOEXEC");
+
+        memset(&fl, 0, sizeof(fl));
+        fl.l_type = F_WRLCK;
+        fl.l_whence = SEEK_SET;
+        fl.l_len = 1;
+        r = fcntl(fd, F_SETLK, &fl);
+        if (r)
+            ERR("fcntl F_SETLK");
+
+        r = fstat(fd, &ours);
+        if (r)
+            ERR("fstat");
+
+        r = stat(suspend_file, &theirs);
+        if (r) {
+            if (errno == ENOENT)
+                /* try again */
+                continue;
+            ERR("stat");
+        }
+
+        if (ours.st_ino != theirs.st_ino)
+            /* someone else must have removed it while we were locking it */
+            continue;
+
+        break;
     }
-    umask(mask);
-    snprintf(buf, sizeof(buf), "%10ld", (long)getpid());

-    rc = write_exact(fd, buf, strlen(buf));
-    close(fd);
+    *lockfd = fd;
+    return 0;

-    return rc;
+ err:
+    if (fd >= 0)
+        close(fd);
+
+    return -1;
 }

-static int unlock_suspend_event(xc_interface *xch, int domid)
+static int unlock_suspend_event(xc_interface *xch, int domid, int *lockfd)
 {
-    int fd, pid, n;
-    char buf[128];
-    char suspend_file[256];
+    int r;
+    char suspend_file[SUSPEND_FILE_BUFLEN];

-    snprintf(suspend_file, sizeof(suspend_file), "%s_%d_lock.d",
-           SUSPEND_LOCK_FILE, domid);
-    fd = open(suspend_file, O_RDWR);
+    if (*lockfd < 0)
+        return 0;

-    if (fd < 0)
-        return -EINVAL;
+    get_suspend_file(suspend_file, domid);

-    n = read(fd, buf, 127);
+    r = unlink(suspend_file);
+    if (r)
+        ERR("unlink");

-    close(fd);
+    r = close(*lockfd);
+    *lockfd = -1;
+    if (r)
+        ERR("close");

-    if (n > 0)
-    {
-        sscanf(buf, "%d", &pid);
-        /* We are the owner, so we can simply delete the file */
-        if (pid == getpid())
-        {
-            unlink(suspend_file);
-            return 0;
-        }
-    }
+ err:
+    if (*lockfd >= 0)
+        close(*lockfd);

-    return -EPERM;
+    return -1;
 }

 int xc_await_suspend(xc_interface *xch, xc_evtchn *xce, int suspend_evtchn)
@@ -96,20 +145,26 @@ int xc_await_suspend(xc_interface *xch, xc_evtchn *xce, int suspend_evtchn)
     return 0;
 }

-int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn)
+/* Internal callers are allowed to call this with suspend_evtchn<0
+ * but *lockfd>0. */
+int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce,
+                              int domid, int suspend_evtchn, int *lockfd)
 {
     if (suspend_evtchn >= 0)
         xc_evtchn_unbind(xce, suspend_evtchn);

-    return unlock_suspend_event(xch, domid);
+    return unlock_suspend_event(xch, domid, lockfd);
 }

-int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce, int domid, int port)
+int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce,
+                                int domid, int port, int *lockfd)
 {
     int rc, suspend_evtchn = -1;

-    if (lock_suspend_event(xch, domid))
-        return -EINVAL;
+    if (lock_suspend_event(xch, domid, lockfd)) {
+        errno = EINVAL;
+        goto cleanup;
+    }

     suspend_evtchn = xc_evtchn_bind_interdomain(xce, domid, port);
     if (suspend_evtchn < 0) {
@@ -126,17 +181,17 @@ int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce, int domid, in
     return suspend_evtchn;

 cleanup:
-    if (suspend_evtchn != -1)
-        xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);
+    xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn, lockfd);

     return -1;
 }

-int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce, int domid, int port)
+int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce,
+                                     int domid, int port, int *lockfd)
 {
     int suspend_evtchn;

-    suspend_evtchn = xc_suspend_evtchn_init_sane(xch, xce, domid, port);
+    suspend_evtchn = xc_suspend_evtchn_init_sane(xch, xce, domid, port, lockfd);
     if (suspend_evtchn < 0)
         return suspend_evtchn;

diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index ce5456c..1f216cd 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -254,13 +254,19 @@ int xc_hvm_build_target_mem(xc_interface *xch,
                             int target,
                             const char *image_name);

-int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn);
+/*
+ * Sets *lockfd to -1.
+ * Has deallocated everything even on error.
+ */
+int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn, int *lockfd);

 /**
  * This function eats the initial notification.
  * xce must not be used for anything else
+ * See xc_suspend_evtchn_init_sane re lockfd.
  */
-int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce, int domid, int port);
+int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce,
+                                     int domid, int port, int *lockfd);

 /* xce must not be used for anything else */
 int xc_await_suspend(xc_interface *xch, xc_evtchn *xce, int suspend_evtchn);
@@ -268,8 +274,12 @@ int xc_await_suspend(xc_interface *xch, xc_evtchn *xce, int suspend_evtchn);
 /**
  * The port will be signaled immediately after this call
  * The caller should check the domain status and look for the next event
+ * On success, *lockfd will be set to >=0 and *lockfd must be preserved
+ * and fed to xc_suspend_evtchn_release.  (On error *lockfd is
+ * undefined and xc_suspend_evtchn_release is not allowed.)
  */
-int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce, int domid, int port);
+int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce,
+                                int domid, int port, int *lockfd);

 int xc_get_bit_size(xc_interface *xch,
                     const char *image_name, const char *cmdline,
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 4b42856..48a4b8e 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1340,6 +1340,7 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss)
           | (dss->hvm ? XCFLAGS_HVM : 0);

     dss->suspend_eventchn = -1;
+    dss->guest_evtchn_lockfd = -1;
     dss->guest_responded = 0;
     dss->dm_savefile = libxl__device_model_savefile(gc, domid);

@@ -1358,7 +1359,8 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss)

         if (port >= 0) {
             dss->suspend_eventchn =
-                xc_suspend_evtchn_init_exclusive(CTX->xch, dss->xce, dss->domid, port);
+                xc_suspend_evtchn_init_exclusive(CTX->xch, dss->xce,
+                                  dss->domid, port, &dss->guest_evtchn_lockfd);

             if (dss->suspend_eventchn < 0)
                 LOG(WARN, "Suspend event channel initialization failed");
@@ -1522,7 +1524,7 @@ static void domain_suspend_done(libxl__egc *egc,

     if (dss->suspend_eventchn > 0)
         xc_suspend_evtchn_release(CTX->xch, dss->xce, domid,
-                                  dss->suspend_eventchn);
+                           dss->suspend_eventchn, &dss->guest_evtchn_lockfd);
     if (dss->xce != NULL)
         xc_evtchn_close(dss->xce);

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 979b266..ebbfa09 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2417,6 +2417,7 @@ struct libxl__domain_suspend_state {
     /* private */
     xc_evtchn *xce; /* event channel handle */
     int suspend_eventchn;
+    int guest_evtchn_lockfd;
     int hvm;
     int xcflags;
     int guest_responded;
diff --git a/tools/misc/xen-hptool.c b/tools/misc/xen-hptool.c
index db76f79..bd9d936 100644
--- a/tools/misc/xen-hptool.c
+++ b/tools/misc/xen-hptool.c
@@ -99,10 +99,13 @@ static int hp_mem_query_func(int argc, char *argv[])

 extern int xs_suspend_evtchn_port(int domid);

-static int suspend_guest(xc_interface *xch, xc_evtchn *xce, int domid, int *evtchn)
+static int suspend_guest(xc_interface *xch, xc_evtchn *xce, int domid,
+                         int *evtchn, int *lockfd)
 {
     int port, rc, suspend_evtchn = -1;

+    *lockfd = -1;
+
     if (!evtchn)
         return -1;

@@ -112,7 +115,8 @@ static int suspend_guest(xc_interface *xch, xc_evtchn *xce, int domid, int *evtc
         fprintf(stderr, "DOM%d: No suspend port, try live migration\n", domid);
         goto failed;
     }
-    suspend_evtchn = xc_suspend_evtchn_init_exclusive(xch, xce, domid, port);
+    suspend_evtchn = xc_suspend_evtchn_init_exclusive(xch, xce, domid,
+                                                      port, lockfd);
     if (suspend_evtchn < 0)
     {
         fprintf(stderr, "Suspend evtchn initialization failed\n");
@@ -135,7 +139,8 @@ static int suspend_guest(xc_interface *xch, xc_evtchn *xce, int domid, int *evtc

 failed:
     if (suspend_evtchn != -1)
-        xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);
+        xc_suspend_evtchn_release(xch, xce, domid,
+                                  suspend_evtchn, lockfd);

     return -1;
 }
@@ -193,7 +198,7 @@ static int hp_mem_offline_func(int argc, char *argv[])
                 }
                 else if (status & PG_OFFLINE_OWNED)
                 {
-                    int result, suspend_evtchn = -1;
+                    int result, suspend_evtchn = -1, suspend_lockfd = -1;
                     xc_evtchn *xce;
                     xce = xc_evtchn_open(NULL, 0);

@@ -205,7 +210,8 @@ static int hp_mem_offline_func(int argc, char *argv[])
                     }

                     domid = status >> PG_OFFLINE_OWNER_SHIFT;
-                    if (suspend_guest(xch, xce, domid, &suspend_evtchn))
+                    if (suspend_guest(xch, xce, domid,
+                                      &suspend_evtchn, &suspend_lockfd))
                     {
                         fprintf(stderr, "Failed to suspend guest %d for"
                                 " mfn %lx\n", domid, mfn);
@@ -231,7 +237,8 @@ static int hp_mem_offline_func(int argc, char *argv[])
                                 mfn, domid);
                     }
                     xc_domain_resume(xch, domid, 1);
-                    xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);
+                    xc_suspend_evtchn_release(xch, xce, domid,
+                                              suspend_evtchn, &suspend_lockfd);
                     xc_evtchn_close(xce);
                 }
                 break;
diff --git a/tools/python/xen/lowlevel/checkpoint/checkpoint.h b/tools/python/xen/lowlevel/checkpoint/checkpoint.h
index 187d9d7..2414956 100644
--- a/tools/python/xen/lowlevel/checkpoint/checkpoint.h
+++ b/tools/python/xen/lowlevel/checkpoint/checkpoint.h
@@ -27,7 +27,7 @@ typedef struct {
     checkpoint_domtype domtype;
     int fd;

-    int suspend_evtchn;
+    int suspend_evtchn, suspend_lockfd;

     char* errstr;

diff --git a/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c b/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c
index 817d272..74ca062 100644
--- a/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c
+++ b/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c
@@ -57,6 +57,7 @@ void checkpoint_init(checkpoint_state* s)
     s->fd = -1;

     s->suspend_evtchn = -1;
+    s->suspend_lockfd = -1;

     s->errstr = NULL;

@@ -360,7 +361,8 @@ static int setup_suspend_evtchn(checkpoint_state* s)
     return -1;
   }

-  s->suspend_evtchn = xc_suspend_evtchn_init_exclusive(s->xch, s->xce, s->domid, port);
+  s->suspend_evtchn = xc_suspend_evtchn_init_exclusive(s->xch, s->xce,
+                                    s->domid, port, &s->suspend_lockfd);
   if (s->suspend_evtchn < 0) {
       s->errstr = "failed to bind suspend event channel";
       return -1;
@@ -377,7 +379,8 @@ static void release_suspend_evtchn(checkpoint_state *s)
 {
   /* TODO: teach xen to clean up if port is unbound */
   if (s->xce != NULL && s->suspend_evtchn >= 0) {
-    xc_suspend_evtchn_release(s->xch, s->xce, s->domid, s->suspend_evtchn);
+    xc_suspend_evtchn_release(s->xch, s->xce, s->domid,
+                              s->suspend_evtchn, &s->suspend_lockfd);
     s->suspend_evtchn = -1;
   }
 }
diff --git a/tools/xcutils/xc_save.c b/tools/xcutils/xc_save.c
index 974f706..bf74e46 100644
--- a/tools/xcutils/xc_save.c
+++ b/tools/xcutils/xc_save.c
@@ -167,7 +167,7 @@ int
 main(int argc, char **argv)
 {
     unsigned int maxit, max_f, lflags;
-    int io_fd, ret, port;
+    int io_fd, ret, port, suspend_lockfd = -1;
     struct save_callbacks callbacks;
     xentoollog_level lvl;
     xentoollog_logger *l;
@@ -202,7 +202,8 @@ main(int argc, char **argv)
         else
         {
             si.suspend_evtchn =
-              xc_suspend_evtchn_init_exclusive(si.xch, si.xce, si.domid, port);
+              xc_suspend_evtchn_init_exclusive(si.xch, si.xce, si.domid,
+                                               port, &suspend_lockfd);

             if (si.suspend_evtchn < 0)
                 warnx("suspend event channel initialization failed, "
@@ -216,7 +217,8 @@ main(int argc, char **argv)
                          &callbacks, !!(si.flags & XCFLAGS_HVM), 0);

     if (si.suspend_evtchn > 0)
-        xc_suspend_evtchn_release(si.xch, si.xce, si.domid, si.suspend_evtchn);
+        xc_suspend_evtchn_release(si.xch, si.xce, si.domid,
+                                   si.suspend_evtchn, &suspend_lockfd);

     if (si.xce > 0)
         xc_evtchn_close(si.xce);
--
1.7.10.4


Thanks for this.

Acked-by: Shriram Rajagopalan <rshriram@xxxxxxxxx

Slightly off-track:
  If you got a chance to test this patch, were you able to live migrate more than one 
domain simultaneously? I don't see any reason why it shouldn't be possible
but I sort of vaguely remember some bug fix in the past, where we couldn't 
start more than one live migration at the same time

Shriram
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.