[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 5/7] libxl: support unmapping static shared memory areas during domain destruction



Hi,

On 01/30/2018 05:50 PM, Zhongze Liu wrote:
Add libxl__sshm_del to unmap static shared memory areas mapped by
libxl__sshm_add during domain creation. The unmapping process is:

* For a master: decrease the refcount of the sshm region, if the refcount
   reaches 0, cleanup the whole sshm path.
* For a slave: 1) unmap the shared pages, and cleanup related xs entries. If
   the system works normally, all the shared pages will be unmapped, so there
   won't be page leaks. In case of errors, the unmapping process will go on and
   unmap all the other pages that can be unmapped, so the other pages won't
   be leaked, either. 2) Decrease the refcount of the sshm region, if the
   refcount reaches 0, cleanup the whole sshm path.

May I ask to add a newline line 1) and 2). This would make clearer the 2 steps.


This is for the proposal "Allow setting up shared memory areas between VMs
from xl config file" (see [1]).

[1] https://lists.xen.org/archives/html/xen-devel/2017-08/msg03242.html

Signed-off-by: Zhongze Liu <blackskygg@xxxxxxxxx>

Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: Julien Grall <julien.grall@xxxxxxx>
Cc: xen-devel@xxxxxxxxxxxxx
---
  tools/libxl/libxl_domain.c   |   5 ++
  tools/libxl/libxl_internal.h |   2 +
  tools/libxl/libxl_sshm.c     | 106 +++++++++++++++++++++++++++++++++++++++++++
  3 files changed, 113 insertions(+)

diff --git a/tools/libxl/libxl_domain.c b/tools/libxl/libxl_domain.c
index 13b1c73d40..37f001554b 100644
--- a/tools/libxl/libxl_domain.c
+++ b/tools/libxl/libxl_domain.c
@@ -1026,6 +1026,11 @@ void libxl__destroy_domid(libxl__egc *egc, 
libxl__destroy_domid_state *dis)
          goto out;
      }
+ rc = libxl__sshm_del(gc, domid);
+    if (rc) {
+        LOGD(ERROR, domid, "Deleting static shm failed.");
+    }
+
      if (libxl__device_pci_destroy_all(gc, domid) < 0)
          LOGD(ERROR, domid, "Pci shutdown failed");
      rc = xc_domain_pause(ctx->xch, domid);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 2cfe4c08a7..c398b6a6b8 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -4424,6 +4424,8 @@ static inline bool libxl__string_is_default(char **s)
  _hidden int libxl__sshm_add(libxl__gc *gc, uint32_t domid,
                              libxl_static_shm *sshm, int len);
+_hidden int libxl__sshm_del(libxl__gc *gc, uint32_t domid);
+
  _hidden int libxl__sshm_check_overlap(libxl__gc *gc, uint32_t domid,
                                        libxl_static_shm *sshms, int len);
  _hidden int libxl__sshm_setdefault(libxl__gc *gc, uint32_t domid,
diff --git a/tools/libxl/libxl_sshm.c b/tools/libxl/libxl_sshm.c
index 562f46f299..1bf4d4c2dc 100644
--- a/tools/libxl/libxl_sshm.c
+++ b/tools/libxl/libxl_sshm.c
@@ -86,6 +86,112 @@ int libxl__sshm_check_overlap(libxl__gc *gc, uint32_t domid,
      return 0;
  }
+/* Decrease the refcount of an sshm. When refcount reaches 0,

NIT: Libxl coding style regarding the comment seems to be uncleared (Ian, Wei?). But I feel keep /* and */ in separate line is nicer.

+ * clean up the whole sshm path.
+ */
+static void libxl__sshm_decref(libxl__gc *gc, xs_transaction_t xt,
+                               const char *sshm_path)
+{
+    int count;
+    const char *count_path, *count_string;
+
+    count_path = GCSPRINTF("%s/usercnt", sshm_path);
+    if (libxl__xs_read_checked(gc, xt, count_path, &count_string))
+        return;
+    count = atoi(count_string);
+
+    if (--count == 0) {
+        libxl__xs_path_cleanup(gc, xt, sshm_path);
+        return;
+    }
+
+    count_string = GCSPRINTF("%d", count);
+    libxl__xs_write_checked(gc, xt, count_path, count_string);

See my comment about incref in a patch #4.

+
+    return;
+}
+
+static void libxl__sshm_do_unmap(libxl__gc *gc, uint32_t domid, const char *id,
+                                 uint64_t begin, uint64_t end)
+{
+    begin >>= XC_PAGE_SHIFT;
+    end >>= XC_PAGE_SHIFT;
+    for (; begin < end; ++begin) {
+        if (xc_domain_remove_from_physmap(CTX->xch, domid, begin)) {
+            SSHM_ERROR(domid, id,
+                       "unable to unmap shared page at 0x%"PRIx64".",
+                       begin);
+        }
+    }
+}
+
+static void libxl__sshm_del_slave(libxl__gc *gc, xs_transaction_t xt,
+                                  uint32_t domid, const char *id, bool isretry)
+{
+    const char *slave_path, *begin_str, *end_str;
+    uint64_t begin, end;
+
+    slave_path = GCSPRINTF("%s/slaves/%"PRIu32, SSHM_PATH(id), domid);
+
+    begin_str = libxl__xs_read(gc, xt, GCSPRINTF("%s/begin", slave_path));
+    end_str = libxl__xs_read(gc, xt, GCSPRINTF("%s/end", slave_path));
+    begin = strtoull(begin_str, NULL, 16);
+    end = strtoull(end_str, NULL, 16);
+
+    /* Avoid calling do_unmap many times in case of xs transaction retry */
+    if (!isretry)
+        libxl__sshm_do_unmap(gc, domid, id, begin, end);

IHMO, by unmapping the regions in middle of the transaction, you increase the potential failure of it. I would move that out of the transaction path.

I would be interested to hear the opinion of the tools maintainers here.

+
+    libxl__xs_path_cleanup(gc, xt, slave_path);
+}
+
+/* Delete static_shm entries in the xensotre. */
+int libxl__sshm_del(libxl__gc *gc,  uint32_t domid)
+{
+    int rc, i;
+    bool isretry;
+    xs_transaction_t xt = XBT_NULL;
+    const char *dom_path, *dom_sshm_path, *role;
+    char **sshm_ents;
+    unsigned int sshm_num;
+
+    dom_path = libxl__xs_get_dompath(gc, domid);
+    dom_sshm_path = GCSPRINTF("%s/static_shm", dom_path);
+
+    isretry = false;
+    for (;;) {
+        rc = libxl__xs_transaction_start(gc, &xt);
+        if (rc) goto out;
+
+        if (libxl__xs_read(gc, xt, dom_sshm_path)) {
+            sshm_ents = libxl__xs_directory(gc, xt, dom_sshm_path, &sshm_num);
+            if (!sshm_ents) continue;
+
+            for (i = 0; i < sshm_num; ++i) {

Similar comment here. You increase the chance of the transaction failing by trying to del multiple shared memory regions at the same time. I am mostly thinking on the libxl__sshm_decref bit.

+                role = libxl__xs_read(gc, xt,
+                                      GCSPRINTF("%s/%s/role",
+                                                dom_sshm_path,
+                                                sshm_ents[i]));
+                assert(role);
+                if (!strncmp(role, "slave", 5))
+                    libxl__sshm_del_slave(gc, xt, domid, sshm_ents[i], 
isretry);
+
+                libxl__sshm_decref(gc, xt, SSHM_PATH(sshm_ents[i]));
+            }
+        }
+
+        rc = libxl__xs_transaction_commit(gc, &xt);
+        if (!rc) break;
+        if (rc < 0) goto out;
+        isretry = true;
+    }
+
+    rc = 0;
+out:
+    libxl__xs_transaction_abort(gc, &xt);
+    return rc;
+}
+
  /*   libxl__sshm_do_map -- map pages into slave's physmap
   *
   *   This functions maps


Cheers,

--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.