[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-4.0-testing] tmem (tools): move to new ABI version to handle long object-ids



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1284394866 -3600
# Node ID 99b5871889e38c1b6a252e76dddd999b99ae5d7f
# Parent  f195cf61ff94f2515faab816e9939cba8f693e8b
tmem (tools): move to new ABI version to handle long object-ids

After a great deal of discussion and review with linux
kernel developers, it appears there are "next-generation"
filesystems (such as btrfs, xfs, Lustre) that will not
be able to use tmem due to an ABI limitation... a field
that represents a unique file identifier is 64-bits in
the tmem ABI and may need to be as large as 192-bits.
So to support these guest filesystems, the tmem ABI must be
revised, from "v0" to "v1".

I *think* it is still the case that tmem is experimental
and is not used anywhere yet in production.

The tmem ABI is designed to support multiple revisions,
so the Xen tmem implementation could be updated to
handle both v0 and v1.  However this is a bit
messy and would require data structures for both v0
and v1 to appear in public Xen header files.

I am inclined to update the Xen tmem implementation
to only support v1 and gracefully fail v0.  This would
result in only a performance loss (as if tmem were
disabled) for newly launched tmem-v0-enabled guests,
but live-migration between old tmem-v0 Xen and new
tmem-v1 Xen machines would fail, and saved tmem-v0
guests will not be able to be restored on a tmem-v1
Xen machine.  I would plan to update both pre-4.0.2
and unstable (future 4.1) to only support v1.

I believe these restrictions are reasonable at this
point in the tmem lifecycle, though they may not
be reasonable in the near future; should the tmem
ABI need to be revised from v1 to v2, I understand
backwards compatibility will be required.

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
xen-unstable changeset:   22137:fd2e5008c2e0
xen-unstable date:        Mon Sep 13 17:11:04 2010 +0100
---
 tools/libxc/xc_tmem.c |   73 ++++++++++++++++++++++++++++++++++++++++++--------
 tools/libxc/xenctrl.h |    8 +++++
 2 files changed, 70 insertions(+), 11 deletions(-)

diff -r f195cf61ff94 -r 99b5871889e3 tools/libxc/xc_tmem.c
--- a/tools/libxc/xc_tmem.c     Mon Sep 13 17:19:34 2010 +0100
+++ b/tools/libxc/xc_tmem.c     Mon Sep 13 17:21:06 2010 +0100
@@ -49,7 +49,56 @@ int xc_tmem_control(int xc,
     set_xen_guest_handle(op.u.ctrl.buf,buf);
     op.u.ctrl.arg1 = arg1;
     op.u.ctrl.arg2 = arg2;
-    op.u.ctrl.arg3 = arg3;
+    /* use xc_tmem_control_oid if arg3 is required */
+    op.u.ctrl.oid[0] = 0;
+    op.u.ctrl.oid[1] = 0;
+    op.u.ctrl.oid[2] = 0;
+
+    if (subop == TMEMC_LIST) {
+        if ((arg1 != 0) && (lock_pages(buf, arg1) != 0))
+        {
+            PERROR("Could not lock memory for Xen hypercall");
+            return -ENOMEM;
+        }
+    }
+
+#ifdef VALGRIND
+    if (arg1 != 0)
+        memset(buf, 0, arg1);
+#endif
+
+    rc = do_tmem_op(xc, &op);
+
+    if (subop == TMEMC_LIST) {
+        if (arg1 != 0)
+            unlock_pages(buf, arg1);
+    }
+
+    return rc;
+}
+
+int xc_tmem_control_oid(int xc,
+                        int32_t pool_id,
+                        uint32_t subop,
+                        uint32_t cli_id,
+                        uint32_t arg1,
+                        uint32_t arg2,
+                        struct tmem_oid oid,
+                        void *buf)
+{
+    tmem_op_t op;
+    int rc;
+
+    op.cmd = TMEM_CONTROL;
+    op.pool_id = pool_id;
+    op.u.ctrl.subop = subop;
+    op.u.ctrl.cli_id = cli_id;
+    set_xen_guest_handle(op.u.ctrl.buf,buf);
+    op.u.ctrl.arg1 = arg1;
+    op.u.ctrl.arg2 = arg2;
+    op.u.ctrl.oid[0] = oid.oid[0];
+    op.u.ctrl.oid[1] = oid.oid[1];
+    op.u.ctrl.oid[2] = oid.oid[2];
 
     if (subop == TMEMC_LIST) {
         if ((arg1 != 0) && (lock_pages(buf, arg1) != 0))
@@ -239,7 +288,7 @@ int xc_tmem_save(int xc, int dom, int io
                 } else {
                     /* page list terminator */
                     h = (struct tmem_handle *)buf;
-                    h->oid = -1;
+                    h->oid[0] = h->oid[1] = h->oid[2] = -1L;
                     if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
                         return -1;
                     break;
@@ -276,7 +325,8 @@ int xc_tmem_save_extra(int xc, int dom, 
         if ( write_exact(io_fd, &handle.index, sizeof(handle.index)) )
             return -1;
         count++;
-        checksum += handle.pool_id + handle.oid + handle.index;
+        checksum += handle.pool_id + handle.oid[0] + handle.oid[1] +
+                    handle.oid[2] + handle.index;
     }
     if ( count )
             DPRINTF("needed %d tmem invalidates, check=%d\n",count,checksum);
@@ -386,20 +436,21 @@ int xc_tmem_restore(int xc, int dom, int
         }
         for ( j = n_pages; j > 0; j-- )
         {
-            uint64_t oid;
+            struct tmem_oid oid;
             uint32_t index;
             int rc;
             if ( read_exact(io_fd, &oid, sizeof(oid)) )
                 return -1;
-            if ( oid == -1 )
+            if ( oid.oid[0] == -1L && oid.oid[1] == -1L && oid.oid[2] == -1L )
                 break;
             if ( read_exact(io_fd, &index, sizeof(index)) )
                 return -1;
             if ( read_exact(io_fd, buf, pagesize) )
                 return -1;
             checksum += *buf;
-            if ( (rc = xc_tmem_control(xc, pool_id, TMEMC_RESTORE_PUT_PAGE,
-                                 dom, bufsize, index, oid, buf)) <= 0 )
+            if ( (rc = xc_tmem_control_oid(xc, pool_id,
+                                           TMEMC_RESTORE_PUT_PAGE, dom,
+                                           bufsize, index, oid, buf)) <= 0 )
             {
                 DPRINTF("xc_tmem_restore: putting page failed, rc=%d\n",rc);
                 return -1;
@@ -419,7 +470,7 @@ int xc_tmem_restore_extra(int xc, int do
 int xc_tmem_restore_extra(int xc, int dom, int io_fd)
 {
     uint32_t pool_id;
-    uint64_t oid;
+    struct tmem_oid oid;
     uint32_t index;
     int count = 0;
     int checksum = 0;
@@ -430,11 +481,11 @@ int xc_tmem_restore_extra(int xc, int do
             return -1;
         if ( read_exact(io_fd, &index, sizeof(index)) )
             return -1;
-        if ( xc_tmem_control(xc, pool_id, TMEMC_RESTORE_FLUSH_PAGE, dom,
-                             0,index,oid,NULL) <= 0 )
+        if ( xc_tmem_control_oid(xc, pool_id, TMEMC_RESTORE_FLUSH_PAGE, dom,
+                                 0,index,oid,NULL) <= 0 )
             return -1;
         count++;
-        checksum += pool_id + oid + index;
+        checksum += pool_id + oid.oid[0] + oid.oid[1] + oid.oid[2] + index;
     }
     if ( pool_id != -1 )
         return -1;
diff -r f195cf61ff94 -r 99b5871889e3 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Sep 13 17:19:34 2010 +0100
+++ b/tools/libxc/xenctrl.h     Mon Sep 13 17:21:06 2010 +0100
@@ -1309,6 +1309,14 @@ int xc_disable_turbo(int xc_handle, int 
 /**
  * tmem operations
  */
+
+struct tmem_oid {
+    uint64_t oid[3];
+};
+
+int xc_tmem_control_oid(int xc, int32_t pool_id, uint32_t subop,
+                        uint32_t cli_id, uint32_t arg1, uint32_t arg2,
+                        struct tmem_oid oid, void *buf);
 int xc_tmem_control(int xc, int32_t pool_id, uint32_t subop, uint32_t cli_id,
                     uint32_t arg1, uint32_t arg2, uint64_t arg3, void *buf);
 int xc_tmem_auth(int xc_handle, int cli_id, char *uuid_str, int arg1);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.