[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v7 6/7] x86: add xen_iommu_ops to modify IOMMU mappings



This patch adds iommu_ops to add (map) or remove (unmap) frames in the
domain's IOMMU mappings.

Currently the flags value for each op must include the
XEN_IOMMUOP_map/unmap_all flag as the implementation does not yet support
per-device mappings. The sbdf field of each hypercall is accordingly
ignored.

Mappings added by the map operation are tracked and only those mappings
may be removed by a subsequent unmap operation. Frames are specified by the
owning domain and GFN. It is, of course, permissable for a domain to map
and unmap its own frames using DOMID_SELF.

NOTE: The owning domain and GFN must also be specified in the unmap
      operation, as well as the DFN, so that they can be cross-checked
      with the existent mapping.

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Julien Grall <julien.grall@xxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>

v7:
 - Get rid of explicit flush xen_iommu_op. Flush at the end of a batch
   instead.

v6:
 - Add placeholder sbdf field and flag to control scope of map, unmap and
   flush.

v4:
 - Fixed logic inversion when checking return of iommu_unmap_page().

v3:
 - Add type pinning.

v2:
 - Heavily re-worked in v2, including explicit tracking of mappings.
   This avoids the need to clear non-reserved mappings from IOMMU
   at start of day, which would be prohibitively slow on a large host.
---
 xen/common/iommu_op.c           | 280 ++++++++++++++++++++++++++++++++++++++--
 xen/drivers/passthrough/iommu.c |   2 +
 xen/include/public/iommu_op.h   |  98 ++++++++++++++
 xen/include/xen/iommu.h         |   6 +
 xen/include/xlat.lst            |   3 +
 5 files changed, 376 insertions(+), 13 deletions(-)

diff --git a/xen/common/iommu_op.c b/xen/common/iommu_op.c
index 9d914a67db..0876414df5 100644
--- a/xen/common/iommu_op.c
+++ b/xen/common/iommu_op.c
@@ -78,7 +78,205 @@ static int iommu_op_query_reserved(struct 
xen_iommu_op_query_reserved *op)
     return 0;
 }
 
-static void iommu_op(xen_iommu_op_t *op)
+static int iommu_op_enable_modification(
+    struct xen_iommu_op_enable_modification *op)
+{
+    struct domain *currd = current->domain;
+    struct domain_iommu *iommu = dom_iommu(currd);
+    const struct iommu_ops *ops = iommu->platform_ops;
+    int rc;
+
+    if ( op->cap || op->pad )
+        return -EINVAL;
+
+    spin_lock(&iommu->lock);
+
+    /* Has modification already been enabled? */
+    rc = 0;
+    if ( iommu->domain_control )
+        goto unlock;
+
+    /*
+     * Modificaton of IOMMU mappings cannot be put under domain control if:
+     * - this domain does not have IOMMU page tables, or
+     * - HAP is enabled for this domain and the IOMMU shares the tables.
+     */
+    rc = -EACCES;
+    if ( !has_iommu_pt(currd) || iommu_use_hap_pt(currd) )
+        goto unlock;
+
+    /*
+     * The IOMMU implementation must provide the lookup method if
+     * modification of the mappings is to be supported.
+     */
+    rc = -EOPNOTSUPP;
+    if ( !ops->lookup_page )
+        goto unlock;
+
+    rc = 0;
+    iommu->need_sync = false; /* Disable synchronization, if enabled */
+    iommu->domain_control = true; /* Enable control */
+
+ unlock:
+    /*
+     * XEN_IOMMU_CAP_per_device_mappings is not supported yet so we can
+     * leave op->cap alone.
+     */
+
+    spin_unlock(&iommu->lock);
+
+    return rc;
+}
+
+static int iommuop_map(struct xen_iommu_op_map *op)
+{
+    struct domain *d, *currd = current->domain;
+    struct domain_iommu *iommu = dom_iommu(currd);
+    bool readonly = op->flags & XEN_IOMMUOP_map_readonly;
+    dfn_t dfn = _dfn(op->dfn);
+    p2m_type_t p2mt;
+    struct page_info *page;
+    mfn_t ignore;
+    unsigned int flags;
+    int rc;
+
+    if ( op->pad || (op->flags & ~(XEN_IOMMUOP_map_all |
+                                   XEN_IOMMUOP_map_readonly)) )
+        return -EINVAL;
+
+    if ( !iommu->domain_control )
+        return -EOPNOTSUPP;
+
+    /* Per-device mapping not yet supported */
+    if ( !(op->flags & XEN_IOMMUOP_map_all) )
+        return -EINVAL;
+
+    /* Check whether the specified DFN falls in a reserved region */
+    if ( rangeset_contains_singleton(iommu->reserved_ranges, dfn_x(dfn)) )
+        return -EINVAL;
+
+    d = rcu_lock_domain_by_any_id(op->domid);
+    if ( !d )
+        return -ESRCH;
+
+    rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);
+    if ( rc )
+        goto unlock_domain;
+
+    rc = -EINVAL;
+    if ( p2mt != p2m_ram_rw ||
+         (!readonly && !get_page_type(page, PGT_writable_page)) )
+    {
+        put_page(page);
+        goto unlock_domain;
+    }
+
+    spin_lock(&iommu->lock);
+
+    rc = iommu_lookup_page(currd, dfn, &ignore, &flags);
+
+    /* Treat a non-reference-counted entry as non-existent */
+    if ( !rc )
+        rc = !(flags & IOMMUF_refcount) ? -ENOENT : -EEXIST;
+
+    if ( rc != -ENOENT )
+        goto unlock_iommu;
+
+    flags = IOMMUF_readable | IOMMUF_refcount;
+    if ( !readonly )
+        flags |= IOMMUF_writable;
+
+    rc = iommu_map_page_nocrash(currd, dfn, page_to_mfn(page), flags);
+
+ unlock_iommu:
+    spin_unlock(&iommu->lock);
+
+    if ( rc ) /* retain references if mapping is successful */
+    {
+        if ( !readonly )
+            put_page_type(page);
+        put_page(page);
+    }
+
+ unlock_domain:
+    rcu_unlock_domain(d);
+    return rc;
+}
+
+static int iommuop_unmap(struct xen_iommu_op_unmap *op)
+{
+    struct domain *d, *currd = current->domain;
+    struct domain_iommu *iommu = dom_iommu(currd);
+    dfn_t dfn = _dfn(op->dfn);
+    mfn_t mfn;
+    unsigned int flags;
+    bool readonly;
+    p2m_type_t p2mt;
+    struct page_info *page;
+    int rc;
+
+    if ( op->pad ||
+         (op->flags & ~XEN_IOMMUOP_unmap_all) )
+        return -EINVAL;
+
+    if ( !iommu->domain_control )
+        return -EOPNOTSUPP;
+
+    /* Per-device unmapping not yet supported */
+    if ( !(op->flags & XEN_IOMMUOP_unmap_all) )
+        return -EINVAL;
+
+    d = rcu_lock_domain_by_any_id(op->domid);
+    if ( !d )
+        return -ESRCH;
+
+    spin_lock(&iommu->lock);
+
+    rc = iommu_lookup_page(currd, dfn, &mfn, &flags);
+
+    /* Treat a non-reference-counted entry as non-existent */
+    if ( !rc )
+        rc = !(flags & IOMMUF_refcount) ? -ENOENT : 0;
+
+    if ( rc )
+        goto unlock;
+
+    readonly = !(flags & IOMMUF_writable);
+
+    /* Make sure the mapped frame matches */
+    rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);
+    if ( rc )
+        goto unlock;
+
+    rc = !mfn_eq(mfn, page_to_mfn(page)) ? -EINVAL : 0;
+
+    /* Release reference taken above */
+    put_page(page);
+
+    if ( rc )
+        goto unlock;
+
+    /* Release references taken in map */
+    if ( !readonly )
+        put_page_type(page);
+    put_page(page);
+
+    /*
+     * This really should not fail. If it does, there is an implicit
+     * domain_crash() (except in the case of the hardware domain) since
+     * there is not a lot else that can be done to ensure the released
+     * page can be safely re-used.
+     */
+    rc = iommu_unmap_page(currd, dfn);
+
+ unlock:
+    spin_unlock(&iommu->lock);
+    rcu_unlock_domain(d);
+
+    return rc;
+}
+
+static void iommu_op(xen_iommu_op_t *op, bool *need_flush)
 {
     switch ( op->op )
     {
@@ -86,13 +284,30 @@ static void iommu_op(xen_iommu_op_t *op)
         op->status = iommu_op_query_reserved(&op->u.query_reserved);
         break;
 
+    case XEN_IOMMUOP_enable_modification:
+        op->status =
+            iommu_op_enable_modification(&op->u.enable_modification);
+        break;
+
+    case XEN_IOMMUOP_map:
+        op->status = iommuop_map(&op->u.map);
+        if ( !op->status )
+            *need_flush = true;
+        break;
+
+    case XEN_IOMMUOP_unmap:
+        op->status = iommuop_unmap(&op->u.unmap);
+        if ( !op->status )
+            *need_flush = true;
+        break;
+
     default:
         op->status = -EOPNOTSUPP;
         break;
     }
 }
 
-int do_one_iommu_op(xen_iommu_op_buf_t *buf)
+int do_one_iommu_op(xen_iommu_op_buf_t *buf, bool *need_flush)
 {
     const XEN_GUEST_HANDLE(xen_iommu_op_t) h =
         guest_handle_cast(buf->h, xen_iommu_op_t);
@@ -101,6 +316,10 @@ int do_one_iommu_op(xen_iommu_op_buf_t *buf)
     static const size_t op_size[] = {
         [XEN_IOMMUOP_query_reserved] =
             sizeof(struct xen_iommu_op_query_reserved),
+        [XEN_IOMMUOP_enable_modification] =
+            sizeof(struct xen_iommu_op_enable_modification),
+        [XEN_IOMMUOP_map] = sizeof(struct xen_iommu_op_map),
+        [XEN_IOMMUOP_unmap] = sizeof(struct xen_iommu_op_unmap),
     };
     size_t size;
     int rc;
@@ -130,10 +349,12 @@ int do_one_iommu_op(xen_iommu_op_buf_t *buf)
     if ( copy_from_guest_offset((void *)&op.u, buf->h, offset, size) )
         return -EFAULT;
 
-    iommu_op(&op);
+    iommu_op(&op, need_flush);
 
-    if ( op.op == XEN_IOMMUOP_query_reserved &&
-         __copy_field_to_guest(h, &op, u.query_reserved.nr_entries) )
+    if ( (op.op == XEN_IOMMUOP_query_reserved &&
+          __copy_field_to_guest(h, &op, u.query_reserved.nr_entries)) ||
+         (op.op == XEN_IOMMUOP_enable_modification &&
+          __copy_field_to_guest(h, &op, u.enable_modification.cap)) )
         return -EFAULT;
 
     if ( __copy_field_to_guest(h, &op, status) )
@@ -146,8 +367,11 @@ long do_iommu_op(unsigned int nr_bufs,
                  XEN_GUEST_HANDLE_PARAM(xen_iommu_op_buf_t) bufs)
 {
     unsigned int i;
+    bool need_flush = false;
     long rc = 0;
 
+    this_cpu(iommu_dont_flush_iotlb) = 1;
+
     for ( i = 0; i < nr_bufs; i++ )
     {
         xen_iommu_op_buf_t buf;
@@ -164,11 +388,13 @@ long do_iommu_op(unsigned int nr_bufs,
             break;
         }
 
-        rc = do_one_iommu_op(&buf);
+        rc = do_one_iommu_op(&buf, &need_flush);
         if ( rc )
             break;
     }
 
+    this_cpu(iommu_dont_flush_iotlb) = 0;
+
     if ( rc > 0 )
     {
         ASSERT(rc < nr_bufs);
@@ -177,7 +403,8 @@ long do_iommu_op(unsigned int nr_bufs,
 
         rc = hypercall_create_continuation(__HYPERVISOR_iommu_op,
                                            "ih", nr_bufs, bufs);
-    }
+    } else if ( !rc && need_flush )
+        rc = iommu_iotlb_flush_all(current->domain);
 
     return rc;
 }
@@ -186,7 +413,7 @@ long do_iommu_op(unsigned int nr_bufs,
 
 CHECK_iommu_reserved_range;
 
-int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
+int compat_one_iommu_op(compat_iommu_op_buf_t *buf, bool *need_flush)
 {
     const COMPAT_HANDLE(compat_iommu_op_t) h =
         compat_handle_cast(buf->h, compat_iommu_op_t);
@@ -195,6 +422,10 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
     static const size_t op_size[] = {
         [XEN_IOMMUOP_query_reserved] =
             sizeof(struct compat_iommu_op_query_reserved),
+        [XEN_IOMMUOP_enable_modification] =
+            sizeof(struct compat_iommu_op_enable_modification),
+        [XEN_IOMMUOP_map] = sizeof(struct compat_iommu_op_map),
+        [XEN_IOMMUOP_unmap] = sizeof(struct compat_iommu_op_unmap),
     };
     size_t size;
     xen_iommu_op_t nat;
@@ -228,9 +459,15 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
 
     /*
      * The xlat magic doesn't quite know how to handle the union so
-     * we need to fix things up here.
+     * we need to fix things up here. Also, none of the sub-ops, apart from
+     * query_reserved, actually need any translation but the xlat magic
+     * can't deal with that either so all sub-ops must be marked for
+     * translation in xlat.lst.
      */
 #define XLAT_iommu_op_u_query_reserved XEN_IOMMUOP_query_reserved
+#define XLAT_iommu_op_u_enable_modification XEN_IOMMUOP_enable_modification
+#define XLAT_iommu_op_u_map XEN_IOMMUOP_map
+#define XLAT_iommu_op_u_unmap XEN_IOMMUOP_unmap
     u = cmp.op;
 
 #define XLAT_iommu_op_query_reserved_HNDL_ranges(_d_, _s_)            \
@@ -258,9 +495,12 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
     XLAT_iommu_op(&nat, &cmp);
 
 #undef XLAT_iommu_op_query_reserved_HNDL_ranges
+#undef XLAT_iommu_op_u_unmap
+#undef XLAT_iommu_op_u_map
+#undef XLAT_iommu_op_u_enable_modification
 #undef XLAT_iommu_op_u_query_reserved
 
-    iommu_op(&nat);
+    iommu_op(&nat, need_flush);
 
 #define XLAT_iommu_op_query_reserved_HNDL_ranges(_d_, _s_)               \
     do                                                                   \
@@ -282,7 +522,8 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
     /*
      * Avoid the full (and lengthy) XLAT code as the only things that
      * need copying back are the reserved ranges (in the case of the
-     * query op) and the status field (for all ops).
+     * query op), capabilities (in the case of the enable op) and the
+     * status field (for all ops).
      */
     cmp.status = nat.status;
 
@@ -296,6 +537,13 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
         if ( __copy_field_to_compat(h, &cmp, u.query_reserved.nr_entries) )
             return -EFAULT;
     }
+    else if ( cmp.op == XEN_IOMMUOP_enable_modification )
+    {
+        cmp.u.enable_modification.cap = nat.u.enable_modification.cap;
+
+        if ( __copy_field_to_compat(h, &cmp, u.enable_modification.cap) )
+            return -EFAULT;
+    }
 
 #undef XLAT_iommu_op_query_reserved_HNDL_ranges
 
@@ -309,8 +557,11 @@ int compat_iommu_op(unsigned int nr_bufs,
                     XEN_GUEST_HANDLE_PARAM(compat_iommu_op_buf_t) bufs)
 {
     unsigned int i;
+    bool need_flush = false;
     long rc = 0;
 
+    this_cpu(iommu_dont_flush_iotlb) = 1;
+
     for ( i = 0; i < nr_bufs; i++ )
     {
         compat_iommu_op_buf_t buf;
@@ -327,11 +578,13 @@ int compat_iommu_op(unsigned int nr_bufs,
             break;
         }
 
-        rc = compat_one_iommu_op(&buf);
+        rc = compat_one_iommu_op(&buf, &need_flush);
         if ( rc )
             break;
     }
 
+    this_cpu(iommu_dont_flush_iotlb) = 0;
+
     if ( rc > 0 )
     {
         ASSERT(rc < nr_bufs);
@@ -340,7 +593,8 @@ int compat_iommu_op(unsigned int nr_bufs,
 
         rc = hypercall_create_continuation(__HYPERVISOR_iommu_op,
                                            "ih", nr_bufs, bufs);
-    }
+    } else if ( !rc && need_flush )
+        rc = iommu_iotlb_flush_all(current->domain);
 
     return rc;
 }
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index bc67cfe843..47c608cc89 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -184,6 +184,8 @@ int iommu_domain_init(struct domain *d)
     if ( !hd->reserved_ranges )
         return -ENOMEM;
 
+    spin_lock_init(&hd->lock);
+
     hd->platform_ops = iommu_get_ops();
     return hd->platform_ops->init(d);
 }
diff --git a/xen/include/public/iommu_op.h b/xen/include/public/iommu_op.h
index 001f515bb3..fcca47e8d2 100644
--- a/xen/include/public/iommu_op.h
+++ b/xen/include/public/iommu_op.h
@@ -61,6 +61,101 @@ struct xen_iommu_op_query_reserved {
     XEN_GUEST_HANDLE(xen_iommu_reserved_range_t) ranges;
 };
 
+/*
+ * XEN_IOMMUOP_enable_modification: Enable operations that modify IOMMU
+ *                                  mappings.
+ */
+#define XEN_IOMMUOP_enable_modification 2
+
+struct xen_iommu_op_enable_modification {
+    /*
+     * OUT - On successful return this is set to the bitwise OR of capabilities
+     *       defined below. On entry this must be set to zero.
+     */
+    uint32_t cap;
+    uint32_t pad;
+
+    /* Does the implementation support per-device mappings? */
+#define _XEN_IOMMU_CAP_per_device_mappings 0
+#define XEN_IOMMU_CAP_per_device_mappings (1u << 
_XEN_IOMMU_CAP_per_device_mappings)
+};
+
+/*
+ * XEN_IOMMUOP_map: Map a guest page in the IOMMU.
+ */
+#define XEN_IOMMUOP_map 3
+
+struct xen_iommu_op_map {
+    /* IN - The domid of the guest */
+    domid_t domid;
+    /*
+     * IN - flags controlling the mapping. This should be a bitwise OR of the
+     *      flags defined below.
+     */
+    uint16_t flags;
+
+    /*
+     * Should the mapping be created for all initiators?
+     *
+     * NOTE: This flag is currently required as the implementation does not yet
+     *       support pre-device mappings.
+     */
+#define _XEN_IOMMUOP_map_all 0
+#define XEN_IOMMUOP_map_all (1 << (_XEN_IOMMUOP_map_all))
+
+    /* Should the mapping be read-only to the initiator(s)? */
+#define _XEN_IOMMUOP_map_readonly 1
+#define XEN_IOMMUOP_map_readonly (1 << (_XEN_IOMMUOP_map_readonly))
+
+    uint32_t pad;
+    /*
+     * IN - Segment/Bus/Device/Function of the initiator.
+     *
+     * NOTE: This is ignored if XEN_IOMMUOP_map_all is set.
+     */
+    uint64_t sbdf;
+    /* IN - The IOMMU frame number which will hold the new mapping */
+    xen_dfn_t dfn;
+    /* IN - The guest frame number of the page to be mapped */
+    xen_pfn_t gfn;
+};
+
+/*
+ * XEN_IOMMUOP_unmap_gfn: Remove a mapping in the IOMMU.
+ */
+#define XEN_IOMMUOP_unmap 4
+
+struct xen_iommu_op_unmap {
+    /* IN - The domid of the guest */
+    domid_t domid;
+    /*
+     * IN - flags controlling the unmapping. This should be a bitwise OR of the
+     *      flags defined below.
+     */
+    uint16_t flags;
+
+    /*
+     * Should the mapping be destroyed for all initiators?
+     *
+     * NOTE: This flag is currently required as the implementation does not yet
+     *       support pre-device mappings.
+     */
+#define _XEN_IOMMUOP_unmap_all 0
+#define XEN_IOMMUOP_unmap_all (1 << (_XEN_IOMMUOP_unmap_all))
+
+    uint32_t pad;
+    /*
+     * IN - Segment/Bus/Device/Function of the initiator.
+     *
+     * NOTE: This is ignored if XEN_IOMMUOP_unmap_all is set.
+     */
+    uint64_t sbdf;
+    /* IN - The IOMMU frame number which holds the mapping to be removed */
+    xen_dfn_t dfn;
+    /* IN - The guest frame number of the page that is mapped */
+    xen_pfn_t gfn;
+};
+
 struct xen_iommu_op {
     uint16_t op;    /* op type */
     uint16_t pad;
@@ -68,6 +163,9 @@ struct xen_iommu_op {
                     /* 0 for success otherwise, negative errno */
     union {
         struct xen_iommu_op_query_reserved query_reserved;
+        struct xen_iommu_op_enable_modification enable_modification;
+        struct xen_iommu_op_map map;
+        struct xen_iommu_op_unmap unmap;
     } u;
 };
 typedef struct xen_iommu_op xen_iommu_op_t;
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index a56d03b719..a04c312aeb 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -143,6 +143,12 @@ struct domain_iommu {
      * must not be modified after initialization.
      */
     struct rangeset *reserved_ranges;
+
+    /*
+     * PV-IOMMU fields
+     */
+    bool domain_control;
+    spinlock_t lock;
 };
 
 #define dom_iommu(d)              (&(d)->iommu)
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index d2f9b1034b..3f5b0ac004 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -79,7 +79,10 @@
 ?      vcpu_hvm_x86_64                 hvm/hvm_vcpu.h
 !      iommu_op                        iommu_op.h
 !      iommu_op_buf                    iommu_op.h
+!      iommu_op_enable_modification    iommu_op.h
+!      iommu_op_map                    iommu_op.h
 !      iommu_op_query_reserved         iommu_op.h
+!      iommu_op_unmap                  iommu_op.h
 ?      iommu_reserved_range            iommu_op.h
 ?      kexec_exec                      kexec.h
 !      kexec_image                     kexec.h
-- 
2.11.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.