[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 for-4.9 1/3] libxl/devd: fix a race with concurrent device addition/removal



Current code can free the libxl__device inside of the libxl__ddomain_device
before the addition has finished if a removal happens while an addition is
still in process:

  backend_watch_callback
            |
            v
       add_device
            |                 backend_watch_callback
    (async operation)                   |
            |                           v
            |                     remove_device
            |                           |
            |                           V
            |                    device_complete
            |                 (free libxl__device)
            v
     device_complete
  (deref libxl__device)

Fix this by creating a temporary copy of the libxl__device, that's tracked by
the GC of the nested async operation. This ensures that the libxl__device used
by the async operations cannot be freed while being used.

Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
Reported-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Reviewed-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Julien Grall <julien.grall@xxxxxxx>

Changes since v2:
 - Add a comment at the top of libxl__device explaining why it can be copied by
   assignment.
---
 tools/libxl/libxl_device.c   | 32 +++++++++++++++++++-------------
 tools/libxl/libxl_internal.h |  4 ++++
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 5e966762c6..cd4ad05a6f 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -1415,9 +1415,6 @@ static void device_complete(libxl__egc *egc, 
libxl__ao_device *aodev)
                libxl__device_action_to_string(aodev->action),
                aodev->rc ? "failed" : "succeed");
 
-    if (aodev->action == LIBXL__DEVICE_ACTION_REMOVE)
-        free(aodev->dev);
-
     libxl__nested_ao_free(aodev->ao);
 }
 
@@ -1521,7 +1518,12 @@ static int add_device(libxl__egc *egc, libxl__ao *ao,
 
         GCNEW(aodev);
         libxl__prepare_ao_device(ao, aodev);
-        aodev->dev = dev;
+        /*
+         * Clone the libxl__device to avoid races if remove_device is called
+         * before the device addition has finished.
+         */
+        GCNEW(aodev->dev);
+        *aodev->dev = *dev;
         aodev->action = LIBXL__DEVICE_ACTION_ADD;
         aodev->callback = device_complete;
         libxl__wait_device_connection(egc, aodev);
@@ -1564,7 +1566,12 @@ static int remove_device(libxl__egc *egc, libxl__ao *ao,
 
         GCNEW(aodev);
         libxl__prepare_ao_device(ao, aodev);
-        aodev->dev = dev;
+        /*
+         * Clone the libxl__device to avoid races if there's a add_device
+         * running in parallel.
+         */
+        GCNEW(aodev->dev);
+        *aodev->dev = *dev;
         aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
         aodev->callback = device_complete;
         libxl__initiate_device_generic_remove(egc, aodev);
@@ -1576,7 +1583,6 @@ static int remove_device(libxl__egc *egc, libxl__ao *ao,
                 goto out;
         }
         libxl__device_destroy(gc, dev);
-        free(dev);
         /* Fall through to return > 0, no ao has been dispatched */
     default:
         rc = 1;
@@ -1597,7 +1603,7 @@ static void backend_watch_callback(libxl__egc *egc, 
libxl__ev_xswatch *watch,
     char *p, *path;
     const char *sstate, *sonline;
     int state, online, rc, num_devs;
-    libxl__device *dev = NULL;
+    libxl__device *dev;
     libxl__ddomain_device *ddev = NULL;
     libxl__ddomain_guest *dguest = NULL;
     bool free_ao = false;
@@ -1625,7 +1631,7 @@ static void backend_watch_callback(libxl__egc *egc, 
libxl__ev_xswatch *watch,
         goto skip;
     online = atoi(sonline);
 
-    dev = libxl__zalloc(NOGC, sizeof(*dev));
+    GCNEW(dev);
     rc = libxl__parse_backend_path(gc, path, dev);
     if (rc)
         goto skip;
@@ -1659,7 +1665,8 @@ static void backend_watch_callback(libxl__egc *egc, 
libxl__ev_xswatch *watch,
          * to the list of active devices for a given guest.
          */
         ddev = libxl__zalloc(NOGC, sizeof(*ddev));
-        ddev->dev = dev;
+        ddev->dev = libxl__zalloc(NOGC, sizeof(*ddev->dev));
+        *ddev->dev = *dev;
         LIBXL_SLIST_INSERT_HEAD(&dguest->devices, ddev, next);
         LOGD(DEBUG, dev->domid, "Added device %s to the list of active 
devices",
              path);
@@ -1670,9 +1677,6 @@ static void backend_watch_callback(libxl__egc *egc, 
libxl__ev_xswatch *watch,
         /*
          * Removal of an active device, remove it from the list and
          * free it's data structures if they are no longer needed.
-         *
-         * The free of the associated libxl__device is left to the
-         * helper remove_device function.
          */
         LIBXL_SLIST_REMOVE(&dguest->devices, ddev, libxl__ddomain_device,
                            next);
@@ -1682,6 +1686,7 @@ static void backend_watch_callback(libxl__egc *egc, 
libxl__ev_xswatch *watch,
         if (rc > 0)
             free_ao = true;
 
+        free(ddev->dev);
         free(ddev);
         /* If this was the last device in the domain, remove it from the list 
*/
         num_devs = dguest->num_vifs + dguest->num_vbds + dguest->num_qdisks;
@@ -1703,7 +1708,8 @@ static void backend_watch_callback(libxl__egc *egc, 
libxl__ev_xswatch *watch,
 
 skip:
     libxl__nested_ao_free(nested_ao);
-    free(dev);
+    if (ddev)
+        free(ddev->dev);
     free(ddev);
     free(dguest);
     return;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 5d082c5704..afe6652847 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -501,6 +501,10 @@ struct libxl__ctx {
     libxl_version_info version_info;
 };
 
+/*
+ * libxl__device is a transparent structure that doesn't contain private fields
+ * or external memory references, and as such can be copied by assignment.
+ */
 typedef struct {
     uint32_t backend_devid;
     uint32_t backend_domid;
-- 
2.11.0 (Apple Git-81)


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.