[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCHv1 1/2] libxencall/linux: use LOCK/UNLOCK ioctls for hypercall buffers



Using just mlock'd buffers for hypercalls is not sufficient as these
are still subject to compaction and page migration.  Use the new
IOCTL_PRIVCMD_HCALL_BUF_LOCK and IOCTL_PRIVCMD_HCALL_BUF_UNLOCK ioctls
provided by the privcmd driver to prevent this.

Since not all kernels support these ioctls, don't repeatedly try these
ioctls if they are unsupported.

MAP_LOCKED is still used as this places the pages on the unevictable
list avoiding the need for the VM subsystem to scan them.

madvise(.., MADV_DONTFORK) is still required since we still need to
prevent children getting CoW mappings of the hypercall buffers.

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 tools/include/xen-sys/Linux/privcmd.h | 37 +++++++++++++++++++++++++++
 tools/libs/call/linux.c               | 47 ++++++++++++++++++++++++++++++++---
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/tools/include/xen-sys/Linux/privcmd.h 
b/tools/include/xen-sys/Linux/privcmd.h
index e4e666a..4afb399 100644
--- a/tools/include/xen-sys/Linux/privcmd.h
+++ b/tools/include/xen-sys/Linux/privcmd.h
@@ -75,6 +75,11 @@ typedef struct privcmd_mmapbatch_v2 {
        int __user *err;  /* array of error codes */
 } privcmd_mmapbatch_v2_t;
 
+struct privcmd_hcall_buf {
+       void *start;
+       size_t len;
+};
+
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
@@ -89,4 +94,36 @@ typedef struct privcmd_mmapbatch_v2 {
 #define IOCTL_PRIVCMD_MMAPBATCH_V2                             \
        _IOC(_IOC_NONE, 'P', 4, sizeof(privcmd_mmapbatch_v2_t))
 
+/*
+ * @cmd: IOCTL_PRIVCMD_HCALL_BUF_LOCK
+ * @arg: struct privcmd hcall_buf *
+ * Return: 0 on success. On an error, -1 is returned and errno is set
+ * to EINVAL, ENOMEM, or EFAULT.
+ *
+ * Locks a memory buffer so it may be used in a hypercall.  This is
+ * similar to mlock(2) but also prevents compaction/page migration.
+ *
+ * The buffers may have any alignment and size and may overlap other
+ * buffers.
+ *
+ * Locked buffers are unlocked with IOCTL_PRIVCMD_HCALL_BUF_UNLOCK or
+ * by closing the file handle.
+ */
+#define IOCTL_PRIVCMD_HCALL_BUF_LOCK                           \
+       _IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_hcall_buf))
+
+/*
+ * @cmd: IOCTL_PRIVCMD_HCALL_BUF_UNLOCK
+ * @arg: struct privcmd hcall_buf *
+ * Return: Always 0.
+ *
+ * Unlocks a memory buffer previously locked with
+ * IOCTL_PRIVCMD_HCALL_BUF_LOCK.
+ *
+ * It is not possible to partially unlock a buffer.  i.e., the
+ * LOCK/UNLOCK must be exactly paired.
+ */
+#define IOCTL_PRIVCMD_HCALL_BUF_UNLOCK                         \
+       _IOC(_IOC_NONE, 'P', 6, sizeof(struct privcmd_hcall_buf))
+
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff --git a/tools/libs/call/linux.c b/tools/libs/call/linux.c
index e8e0311..54ddd23 100644
--- a/tools/libs/call/linux.c
+++ b/tools/libs/call/linux.c
@@ -68,6 +68,8 @@ int osdep_hypercall(xencall_handle *xcall, 
privcmd_hypercall_t *hypercall)
     return ioctl(xcall->fd, IOCTL_PRIVCMD_HYPERCALL, hypercall);
 }
 
+static int have_hbuf_lock = 1;
+
 void *osdep_alloc_pages(xencall_handle *xcall, size_t npages)
 {
     size_t size = npages * PAGE_SIZE;
@@ -84,7 +86,7 @@ void *osdep_alloc_pages(xencall_handle *xcall, size_t npages)
 
     /* Do not copy the VMA to child process on fork. Avoid the page being COW
         on hypercall. */
-    rc = madvise(p, npages * PAGE_SIZE, MADV_DONTFORK);
+    rc = madvise(p, size, MADV_DONTFORK);
     if ( rc < 0 )
     {
         PERROR("alloc_pages: madvise failed");
@@ -103,6 +105,33 @@ void *osdep_alloc_pages(xencall_handle *xcall, size_t 
npages)
         *c = 0;
     }
 
+    if ( have_hbuf_lock )
+    {
+        struct privcmd_hcall_buf hbuf;
+
+        hbuf.start = p;
+        hbuf.len = size;
+
+        rc = ioctl(xcall->fd, IOCTL_PRIVCMD_HCALL_BUF_LOCK, &hbuf);
+        if ( rc < 0 )
+        {
+            /*
+             * Older drivers return EINVAL if the ioctl was not
+             * supported.
+             */
+            if ( errno == ENOTTY || errno == EINVAL )
+            {
+                have_hbuf_lock = 0;
+                errno = 0;
+            }
+            else
+            {
+                PERROR("alloc_pages: lock failed");
+                goto out;
+            }
+        }
+    }
+
     return p;
 
 out:
@@ -114,11 +143,23 @@ out:
 
 void osdep_free_pages(xencall_handle *xcall, void *ptr, size_t npages)
 {
+    size_t size = npages * PAGE_SIZE;
     int saved_errno = errno;
+
+    if ( have_hbuf_lock )
+    {
+        struct privcmd_hcall_buf hbuf;
+
+        hbuf.start = ptr;
+        hbuf.len = size;
+
+        ioctl(xcall->fd, IOCTL_PRIVCMD_HCALL_BUF_UNLOCK, &hbuf);
+    }
+
     /* Recover the VMA flags. Maybe it's not necessary */
-    madvise(ptr, npages * PAGE_SIZE, MADV_DOFORK);
+    madvise(ptr, size, MADV_DOFORK);
 
-    munmap(ptr, npages * PAGE_SIZE);
+    munmap(ptr, size);
     /* We MUST propagate the hypercall errno, not unmap call's. */
     errno = saved_errno;
 }
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.