[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 13/25] argo: implement the register op



Used by a domain to register a region of memory for receiving messages from
either a specified other domain, or, if specifying a wildcard, any domain.

This operation creates a mapping within Xen's private address space that
will remain resident for the lifetime of the ring. In subsequent commits, the
hypervisor will use this mapping to copy data from a sending domain into this
registered ring, making it accessible to the domain that registered the ring to
receive data.

In this code, the p2m type of the memory supplied by the guest for the ring
must be p2m_ram_rw, which is a conservative choice made to defer the need to
reason about the other p2m types with this commit.

argo_pfn_t type is introduced here to create a pfn_t type that is 64-bit on
all architectures, to assist with avoiding the need to add a compat ABI.

Signed-off-by: Christopher Clark <christopher.clark6@xxxxxxxxxxxxxx>
---
 xen/common/argo.c                  | 498 +++++++++++++++++++++++++++++++++++++
 xen/include/asm-arm/guest_access.h |   2 +
 xen/include/asm-x86/guest_access.h |   2 +
 xen/include/public/argo.h          |  64 +++++
 4 files changed, 566 insertions(+)

diff --git a/xen/common/argo.c b/xen/common/argo.c
index 2a95e09..f4e82cf 100644
--- a/xen/common/argo.c
+++ b/xen/common/argo.c
@@ -25,6 +25,7 @@
 #include <xen/guest_access.h>
 #include <xen/time.h>
 
+DEFINE_XEN_GUEST_HANDLE(argo_pfn_t);
 DEFINE_XEN_GUEST_HANDLE(argo_addr_t);
 DEFINE_XEN_GUEST_HANDLE(argo_ring_t);
 
@@ -98,6 +99,25 @@ struct argo_domain
 };
 
 /*
+ * Helper functions
+ */
+
+static inline uint16_t
+argo_hash_fn(const struct argo_ring_id *id)
+{
+    uint16_t ret;
+
+    ret = (uint16_t)(id->addr.port >> 16);
+    ret ^= (uint16_t)id->addr.port;
+    ret ^= id->addr.domain_id;
+    ret ^= id->partner;
+
+    ret &= (ARGO_HTABLE_SIZE - 1);
+
+    return ret;
+}
+
+/*
  * locks
  */
 
@@ -171,6 +191,74 @@ argo_ring_unmap(struct argo_ring_info *ring_info)
     }
 }
 
+/* caller must have L3 or W(L2) */
+static int
+argo_ring_map_page(struct argo_ring_info *ring_info, uint32_t i,
+                   uint8_t **page)
+{
+    if ( i >= ring_info->nmfns )
+    {
+        printk(XENLOG_ERR "argo: ring (vm%u:%x vm%d) %p attempted to map page"
+               " %u of %u\n", ring_info->id.addr.domain_id,
+               ring_info->id.addr.port, ring_info->id.partner, ring_info,
+               i, ring_info->nmfns);
+        return -EFAULT;
+    }
+    ASSERT(ring_info->mfns);
+    ASSERT(ring_info->mfn_mapping);
+
+    if ( !ring_info->mfn_mapping[i] )
+    {
+        /*
+         * TODO:
+         * The first page of the ring contains the ring indices, so both read 
and
+         * write access to the page is required by the hypervisor, but 
read-access
+         * is not needed for this mapping for the remainder of the ring.
+         * Since this mapping will remain resident in Xen's address space for
+         * the lifetime of the ring, and following the principle of least 
privilege,
+         * it could be preferable to:
+         *  # add a XSM check to determine what policy is wanted here
+         *  # depending on the XSM query, optionally create this mapping as
+         *    _write-only_ on platforms that can support it.
+         *    (eg. Intel EPT/AMD NPT).
+         */
+        ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
+
+        if ( !ring_info->mfn_mapping[i] )
+        {
+            printk(XENLOG_ERR "argo: ring (vm%u:%x vm%d) %p attempted to map 
page"
+                   " %u of %u\n", ring_info->id.addr.domain_id,
+                   ring_info->id.addr.port, ring_info->id.partner, ring_info,
+                   i, ring_info->nmfns);
+            return -EFAULT;
+        }
+        argo_dprintk("mapping page %"PRI_mfn" to %p\n",
+               mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
+    }
+
+    if ( page )
+        *page = ring_info->mfn_mapping[i];
+    return 0;
+}
+
+/* caller must have L3 or W(L2) */
+static int
+argo_update_tx_ptr(struct argo_ring_info *ring_info, uint32_t tx_ptr)
+{
+    uint8_t *dst;
+    uint32_t *p;
+    int ret;
+
+    ret = argo_ring_map_page(ring_info, 0, &dst);
+    if ( ret )
+        return ret;
+
+    p = (uint32_t *)(dst + offsetof(argo_ring_t, tx_ptr));
+    write_atomic(p, tx_ptr);
+    mb();
+    return 0;
+}
+
 /*
  * pending
  */
@@ -231,6 +319,388 @@ argo_ring_remove_info(struct domain *d, struct 
argo_ring_info *ring_info)
     xfree(ring_info);
 }
 
+/*
+ * ring
+ */
+
+static int
+argo_find_ring_mfn(struct domain *d, argo_pfn_t pfn, mfn_t *mfn)
+{
+    p2m_type_t p2mt;
+    int ret = 0;
+
+#ifdef CONFIG_X86
+    *mfn = get_gfn_unshare(d, pfn, &p2mt);
+#else
+    *mfn = p2m_lookup(d, _gfn(pfn), &p2mt);
+#endif
+
+    if ( !mfn_valid(*mfn) )
+        ret = -EINVAL;
+#ifdef CONFIG_X86
+    else if ( p2m_is_paging(p2mt) || (p2mt == p2m_ram_logdirty) )
+        ret = -EAGAIN;
+#endif
+    else if ( (p2mt != p2m_ram_rw) ||
+              !get_page_and_type(mfn_to_page(*mfn), d, PGT_writable_page) )
+        ret = -EINVAL;
+
+#ifdef CONFIG_X86
+    put_gfn(d, pfn);
+#endif
+
+    return ret;
+}
+
+static int
+argo_find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
+                    uint32_t npage, XEN_GUEST_HANDLE_PARAM(argo_pfn_t) pfn_hnd,
+                    uint32_t len)
+{
+    int i;
+    int ret = 0;
+
+    if ( (npage << PAGE_SHIFT) < len )
+        return -EINVAL;
+
+    if ( ring_info->mfns )
+    {
+        /*
+         * Ring already existed. Check if it's the same ring,
+         * i.e. same number of pages and all translated gpfns still
+         * translating to the same mfns
+         */
+        if ( ring_info->npage != npage )
+            i = ring_info->nmfns + 1; /* forces re-register below */
+        else
+        {
+            for ( i = 0; i < ring_info->nmfns; i++ )
+            {
+                argo_pfn_t pfn;
+                mfn_t mfn;
+
+                ret = copy_from_guest_offset_errno(&pfn, pfn_hnd, i, 1);
+                if ( ret )
+                    break;
+
+                ret = argo_find_ring_mfn(d, pfn, &mfn);
+                if ( ret )
+                    break;
+
+                if ( mfn_x(mfn) != mfn_x(ring_info->mfns[i]) )
+                    break;
+            }
+        }
+        if ( i != ring_info->nmfns )
+        {
+            printk(XENLOG_INFO "argo: vm%u re-registering existing argo ring"
+                   " (vm%u:%x vm%d), clearing MFN list\n",
+                   current->domain->domain_id, ring_info->id.addr.domain_id,
+                   ring_info->id.addr.port, ring_info->id.partner);
+
+            argo_ring_remove_mfns(d, ring_info);
+            ASSERT(!ring_info->mfns);
+        }
+    }
+
+    if ( !ring_info->mfns )
+    {
+        mfn_t *mfns;
+        uint8_t **mfn_mapping;
+
+        mfns = xmalloc_array(mfn_t, npage);
+        if ( !mfns )
+            return -ENOMEM;
+
+        for ( i = 0; i < npage; i++ )
+            mfns[i] = INVALID_MFN;
+
+        mfn_mapping = xmalloc_array(uint8_t *, npage);
+        if ( !mfn_mapping )
+        {
+            xfree(mfns);
+            return -ENOMEM;
+        }
+
+        ring_info->npage = npage;
+        ring_info->mfns = mfns;
+        ring_info->mfn_mapping = mfn_mapping;
+    }
+    ASSERT(ring_info->npage == npage);
+
+    if ( ring_info->nmfns == ring_info->npage )
+        return 0;
+
+    for ( i = ring_info->nmfns; i < ring_info->npage; i++ )
+    {
+        argo_pfn_t pfn;
+        mfn_t mfn;
+
+        ret = copy_from_guest_offset_errno(&pfn, pfn_hnd, i, 1);
+        if ( ret )
+            break;
+
+        ret = argo_find_ring_mfn(d, pfn, &mfn);
+        if ( ret )
+        {
+            printk(XENLOG_ERR "argo: vm%u passed invalid gpfn %"PRI_xen_pfn
+                   " ring (vm%u:%x vm%d) %p seq %d of %d\n",
+                   d->domain_id, pfn, ring_info->id.addr.domain_id,
+                   ring_info->id.addr.port, ring_info->id.partner,
+                   ring_info, i, ring_info->npage);
+            break;
+        }
+
+        ring_info->mfns[i] = mfn;
+        ring_info->nmfns = i + 1;
+
+        argo_dprintk("%d: %"PRI_xen_pfn" -> %"PRI_mfn"\n",
+               i, pfn, mfn_x(ring_info->mfns[i]));
+
+        ring_info->mfn_mapping[i] = NULL;
+    }
+
+    if ( ret )
+        argo_ring_remove_mfns(d, ring_info);
+    else
+    {
+        ASSERT(ring_info->nmfns == ring_info->npage);
+
+        printk(XENLOG_ERR "argo: vm%u ring (vm%u:%x vm%d) %p mfn_mapping %p"
+               " npage %d nmfns %d\n", current->domain->domain_id,
+               ring_info->id.addr.domain_id, ring_info->id.addr.port,
+               ring_info->id.partner, ring_info, ring_info->mfn_mapping,
+               ring_info->npage, ring_info->nmfns);
+    }
+    return ret;
+}
+
+static struct argo_ring_info *
+argo_ring_find_info(const struct domain *d, const struct argo_ring_id *id)
+{
+    uint16_t hash;
+    struct hlist_node *node;
+    struct argo_ring_info *ring_info;
+
+    ASSERT(rw_is_locked(&d->argo->lock));
+
+    hash = argo_hash_fn(id);
+
+    argo_dprintk("d->argo=%p, d->argo->ring_hash[%d]=%p id=%p\n",
+                 d->argo, hash, d->argo->ring_hash[hash].first, id);
+    argo_dprintk("id.addr.port=%d id.addr.domain=vm%u"
+                 " id.addr.partner=vm%d\n",
+                 id->addr.port, id->addr.domain_id, id->partner);
+
+    hlist_for_each_entry(ring_info, node, &d->argo->ring_hash[hash], node)
+    {
+        argo_ring_id_t *cmpid = &ring_info->id;
+
+        if ( cmpid->addr.port == id->addr.port &&
+             cmpid->addr.domain_id == id->addr.domain_id &&
+             cmpid->partner == id->partner )
+        {
+            argo_dprintk("ring_info=%p\n", ring_info);
+            return ring_info;
+        }
+    }
+    argo_dprintk("no ring_info found\n");
+
+    return NULL;
+}
+
+static long
+argo_register_ring(struct domain *d,
+                   XEN_GUEST_HANDLE_PARAM(argo_ring_t) ring_hnd,
+                   XEN_GUEST_HANDLE_PARAM(argo_pfn_t) pfn_hnd, uint32_t npage,
+                   bool fail_exist)
+{
+    struct argo_ring ring;
+    struct argo_ring_info *ring_info;
+    int ret = 0;
+    bool update_tx_ptr = 0;
+    uint64_t dst_domain_cookie = 0;
+
+    if ( !(guest_handle_is_aligned(ring_hnd, ~PAGE_MASK)) )
+        return -EINVAL;
+
+    read_lock (&argo_lock);
+
+    do {
+        if ( !d->argo )
+        {
+            ret = -ENODEV;
+            break;
+        }
+
+        if ( copy_from_guest(&ring, ring_hnd, 1) )
+        {
+            ret = -EFAULT;
+            break;
+        }
+
+        if ( ring.magic != ARGO_RING_MAGIC )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        if ( (ring.len < (sizeof(struct argo_ring_message_header)
+                          + ARGO_ROUNDUP(1) + ARGO_ROUNDUP(1)))   ||
+             (ARGO_ROUNDUP(ring.len) != ring.len) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        if ( ring.len > ARGO_MAX_RING_SIZE )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        if ( ring.id.partner == ARGO_DOMID_ANY )
+        {
+            ret = xsm_argo_register_any_source(d, 
argo_mac_bootparam_enforcing);
+            if ( ret )
+                break;
+        }
+        else
+        {
+            struct domain *dst_d = get_domain_by_id(ring.id.partner);
+            if ( !dst_d )
+            {
+                argo_dprintk("!dst_d, ECONNREFUSED\n");
+                ret = -ECONNREFUSED;
+                break;
+            }
+
+            ret = xsm_argo_register_single_source(d, dst_d);
+            if ( ret )
+            {
+                put_domain(dst_d);
+                break;
+            }
+
+            if ( !dst_d->argo )
+            {
+                argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
+                ret = -ECONNREFUSED;
+                put_domain(dst_d);
+                break;
+            }
+
+            dst_domain_cookie = dst_d->argo->domain_cookie;
+
+            put_domain(dst_d);
+        }
+
+        ring.id.addr.domain_id = d->domain_id;
+        if ( copy_field_to_guest(ring_hnd, &ring, id) )
+        {
+            ret = -EFAULT;
+            break;
+        }
+
+        /*
+         * no need for a lock yet, because only we know about this
+         * set the tx pointer if it looks bogus (we don't reset it
+         * because this might be a re-register after S4)
+         */
+
+        if ( ring.tx_ptr >= ring.len ||
+             ARGO_ROUNDUP(ring.tx_ptr) != ring.tx_ptr )
+        {
+            /*
+             * Since the ring is a mess, attempt to flush the contents of it
+             * here by setting the tx_ptr to the next aligned message slot past
+             * the latest rx_ptr we have observed. Handle ring wrap correctly.
+             */
+            ring.tx_ptr = ARGO_ROUNDUP(ring.rx_ptr);
+
+            if ( ring.tx_ptr >= ring.len )
+                ring.tx_ptr = 0;
+
+            /* ring.tx_ptr will be written back to the guest ring below. */
+            update_tx_ptr = 1;
+        }
+
+        /* W(L2) protects all the elements of the domain's ring_info */
+        write_lock(&d->argo->lock);
+
+        do {
+            ring_info = argo_ring_find_info(d, &ring.id);
+
+            if ( !ring_info )
+            {
+                uint16_t hash;
+
+                ring_info = xmalloc(struct argo_ring_info);
+                if ( !ring_info )
+                {
+                    ret = -ENOMEM;
+                    break;
+                }
+
+                spin_lock_init(&ring_info->lock);
+
+                ring_info->mfns = NULL;
+                ring_info->npage = 0;
+                ring_info->mfn_mapping = NULL;
+                ring_info->len = 0;
+                ring_info->nmfns = 0;
+                ring_info->tx_ptr = 0;
+                ring_info->partner_cookie = dst_domain_cookie;
+
+                ring_info->id = ring.id;
+                INIT_HLIST_HEAD(&ring_info->pending);
+
+                hash = argo_hash_fn(&ring_info->id);
+                hlist_add_head(&ring_info->node, &d->argo->ring_hash[hash]);
+
+                printk(XENLOG_INFO "argo: vm%u registering ring (vm%u:%x 
vm%d)\n",
+                       current->domain->domain_id, ring.id.addr.domain_id,
+                       ring.id.addr.port, ring.id.partner);
+            }
+            else
+            {
+                /*
+                 * If the caller specified that the ring must not already 
exist,
+                 * fail at attempt to add a completed ring which already 
exists.
+                 */
+                if ( fail_exist && ring_info->len )
+                {
+                    ret = -EEXIST;
+                    break;
+                }
+
+                printk(XENLOG_INFO
+                    "argo: vm%u re-registering existing ring (vm%u:%x vm%d)\n",
+                     current->domain->domain_id, ring.id.addr.domain_id,
+                     ring.id.addr.port, ring.id.partner);
+            }
+
+            /* Since we hold W(L2), there is no need to take L3 here */
+            ring_info->tx_ptr = ring.tx_ptr;
+
+            ret = argo_find_ring_mfns(d, ring_info, npage, pfn_hnd, ring.len);
+            if ( !ret )
+                ret = update_tx_ptr ? argo_update_tx_ptr(ring_info, 
ring.tx_ptr)
+                                    : argo_ring_map_page(ring_info, 0, NULL);
+            if ( !ret )
+                ring_info->len = ring.len;
+
+        } while ( 0 );
+
+        write_unlock(&d->argo->lock);
+
+    } while ( 0 );
+
+    read_unlock(&argo_lock);
+
+    return ret;
+}
+
 long
 do_argo_message_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
                    XEN_GUEST_HANDLE_PARAM(void) arg2,
@@ -253,6 +723,34 @@ do_argo_message_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) 
arg1,
 
     switch (cmd)
     {
+    case ARGO_MESSAGE_OP_register_ring:
+    {
+        XEN_GUEST_HANDLE_PARAM(argo_ring_t) ring_hnd =
+            guest_handle_cast(arg1, argo_ring_t);
+        XEN_GUEST_HANDLE_PARAM(argo_pfn_t) pfn_hnd =
+            guest_handle_cast(arg2, argo_pfn_t);
+        uint32_t npage = arg3;
+        bool fail_exist = arg4 & ARGO_REGISTER_FLAG_FAIL_EXIST;
+
+        if ( unlikely(!guest_handle_okay(ring_hnd, 1)) )
+            break;
+        if ( unlikely(npage > (ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
+        {
+            rc = -EINVAL;
+            break;
+        }
+        if ( unlikely(!guest_handle_okay(pfn_hnd, npage)) )
+            break;
+        /* arg4: reserve currently-undefined bits, require zero.  */
+        if ( unlikely(arg4 & ~ARGO_REGISTER_FLAG_MASK) )
+        {
+            rc = -EINVAL;
+            break;
+        }
+
+        rc = argo_register_ring(d, ring_hnd, pfn_hnd, npage, fail_exist);
+        break;
+    }
     default:
         rc = -ENOSYS;
         break;
diff --git a/xen/include/asm-arm/guest_access.h 
b/xen/include/asm-arm/guest_access.h
index 1137c54..98006f8 100644
--- a/xen/include/asm-arm/guest_access.h
+++ b/xen/include/asm-arm/guest_access.h
@@ -34,6 +34,8 @@ int access_guest_memory_by_ipa(struct domain *d, paddr_t ipa, 
void *buf,
 /* Is the guest handle a NULL reference? */
 #define guest_handle_is_null(hnd)        ((hnd).p == NULL)
 
+#define guest_handle_is_aligned(hnd, mask) (!((uintptr_t)(hnd).p & (mask)))
+
 /* Offset the given guest handle into the array it refers to. */
 #define guest_handle_add_offset(hnd, nr) ((hnd).p += (nr))
 #define guest_handle_subtract_offset(hnd, nr) ((hnd).p -= (nr))
diff --git a/xen/include/asm-x86/guest_access.h 
b/xen/include/asm-x86/guest_access.h
index 9391cd3..e9d25d6 100644
--- a/xen/include/asm-x86/guest_access.h
+++ b/xen/include/asm-x86/guest_access.h
@@ -50,6 +50,8 @@
 /* Is the guest handle a NULL reference? */
 #define guest_handle_is_null(hnd)        ((hnd).p == NULL)
 
+#define guest_handle_is_aligned(hnd, mask) (!((uintptr_t)(hnd).p & (mask)))
+
 /* Offset the given guest handle into the array it refers to. */
 #define guest_handle_add_offset(hnd, nr) ((hnd).p += (nr))
 #define guest_handle_subtract_offset(hnd, nr) ((hnd).p -= (nr))
diff --git a/xen/include/public/argo.h b/xen/include/public/argo.h
index 20dabc0..5ad8e2b 100644
--- a/xen/include/public/argo.h
+++ b/xen/include/public/argo.h
@@ -21,6 +21,20 @@
 
 #include "xen.h"
 
+#define ARGO_RING_MAGIC      0xbd67e163e7777f2fULL
+
+#define ARGO_DOMID_ANY           DOMID_INVALID
+
+/*
+ * The maximum size of an Argo ring is defined to be: 16GB
+ *  -- which is 0x1000000 or 16777216 bytes.
+ * A byte index into the ring is at most 24 bits.
+ */
+#define ARGO_MAX_RING_SIZE  (16777216ULL)
+
+/* pfn type: 64-bit on all architectures to aid avoiding a compat ABI */
+typedef uint64_t argo_pfn_t;
+
 typedef struct argo_addr
 {
     uint32_t port;
@@ -52,4 +66,54 @@ typedef struct argo_ring
 #endif
 } argo_ring_t;
 
+/*
+ * Messages on the ring are padded to 128 bits
+ * Len here refers to the exact length of the data not including the
+ * 128 bit header. The message uses
+ * ((len + 0xf) & ~0xf) + sizeof(argo_ring_message_header) bytes.
+ * Using typeof(a) make clear that this does not truncate any high-order bits.
+ */
+#define ARGO_ROUNDUP(a) (((a) + 0xf) & ~(typeof(a))0xf)
+
+struct argo_ring_message_header
+{
+    uint32_t len;
+    argo_addr_t source;
+    uint32_t message_type;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    uint8_t data[];
+#elif defined(__GNUC__)
+    uint8_t data[0];
+#endif
+};
+
+/*
+ * Hypercall operations
+ */
+
+/*
+ * ARGO_MESSAGE_OP_register_ring
+ *
+ * Register a ring using the indicated memory.
+ * Also used to reregister an existing ring (eg. after resume from sleep).
+ *
+ * arg1: XEN_GUEST_HANDLE(argo_ring_t)
+ * arg2: XEN_GUEST_HANDLE(argo_pfn_t)
+ * arg3: uint32_t npages
+ * arg4: uint32_t flags
+ */
+#define ARGO_MESSAGE_OP_register_ring     1
+
+/* Register op flags */
+/*
+ * Fail exist:
+ * If set, reject attempts to (re)register an existing established ring.
+ * If clear, reregistration occurs if the ring exists, with the new ring
+ * taking the place of the old, preserving tx_ptr if it remains valid.
+ */
+#define ARGO_REGISTER_FLAG_FAIL_EXIST  0x1
+
+/* Mask for all defined flags */
+#define ARGO_REGISTER_FLAG_MASK ARGO_REGISTER_FLAG_FAIL_EXIST
+
 #endif
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.