[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 6/7] x86: add xen_iommu_ops to modify IOMMU mappings
This patch adds iommu_ops to add (map) or remove (unmap) frames in the domain's IOMMU mappings. Currently the flags value for each op must include the XEN_IOMMUOP_map/unmap_all flag as the implementation does not yet support per-device mappings. The sbdf field of each hypercall is accordingly ignored. Mappings added by the map operation are tracked and only those mappings may be removed by a subsequent unmap operation. Frames are specified by the owning domain and GFN. It is, of course, permissable for a domain to map and unmap its own frames using DOMID_SELF. NOTE: The owning domain and GFN must also be specified in the unmap operation, as well as the DFN, so that they can be cross-checked with the existent mapping. Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> --- Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> Cc: Julien Grall <julien.grall@xxxxxxx> Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx> Cc: Tim Deegan <tim@xxxxxxx> Cc: Wei Liu <wei.liu2@xxxxxxxxxx> v7: - Get rid of explicit flush xen_iommu_op. Flush at the end of a batch instead. v6: - Add placeholder sbdf field and flag to control scope of map, unmap and flush. v4: - Fixed logic inversion when checking return of iommu_unmap_page(). v3: - Add type pinning. v2: - Heavily re-worked in v2, including explicit tracking of mappings. This avoids the need to clear non-reserved mappings from IOMMU at start of day, which would be prohibitively slow on a large host. --- xen/common/iommu_op.c | 280 ++++++++++++++++++++++++++++++++++++++-- xen/drivers/passthrough/iommu.c | 2 + xen/include/public/iommu_op.h | 98 ++++++++++++++ xen/include/xen/iommu.h | 6 + xen/include/xlat.lst | 3 + 5 files changed, 376 insertions(+), 13 deletions(-) diff --git a/xen/common/iommu_op.c b/xen/common/iommu_op.c index 9d914a67db..0876414df5 100644 --- a/xen/common/iommu_op.c +++ b/xen/common/iommu_op.c @@ -78,7 +78,205 @@ static int iommu_op_query_reserved(struct xen_iommu_op_query_reserved *op) return 0; } -static void iommu_op(xen_iommu_op_t *op) +static int iommu_op_enable_modification( + struct xen_iommu_op_enable_modification *op) +{ + struct domain *currd = current->domain; + struct domain_iommu *iommu = dom_iommu(currd); + const struct iommu_ops *ops = iommu->platform_ops; + int rc; + + if ( op->cap || op->pad ) + return -EINVAL; + + spin_lock(&iommu->lock); + + /* Has modification already been enabled? */ + rc = 0; + if ( iommu->domain_control ) + goto unlock; + + /* + * Modificaton of IOMMU mappings cannot be put under domain control if: + * - this domain does not have IOMMU page tables, or + * - HAP is enabled for this domain and the IOMMU shares the tables. + */ + rc = -EACCES; + if ( !has_iommu_pt(currd) || iommu_use_hap_pt(currd) ) + goto unlock; + + /* + * The IOMMU implementation must provide the lookup method if + * modification of the mappings is to be supported. + */ + rc = -EOPNOTSUPP; + if ( !ops->lookup_page ) + goto unlock; + + rc = 0; + iommu->need_sync = false; /* Disable synchronization, if enabled */ + iommu->domain_control = true; /* Enable control */ + + unlock: + /* + * XEN_IOMMU_CAP_per_device_mappings is not supported yet so we can + * leave op->cap alone. + */ + + spin_unlock(&iommu->lock); + + return rc; +} + +static int iommuop_map(struct xen_iommu_op_map *op) +{ + struct domain *d, *currd = current->domain; + struct domain_iommu *iommu = dom_iommu(currd); + bool readonly = op->flags & XEN_IOMMUOP_map_readonly; + dfn_t dfn = _dfn(op->dfn); + p2m_type_t p2mt; + struct page_info *page; + mfn_t ignore; + unsigned int flags; + int rc; + + if ( op->pad || (op->flags & ~(XEN_IOMMUOP_map_all | + XEN_IOMMUOP_map_readonly)) ) + return -EINVAL; + + if ( !iommu->domain_control ) + return -EOPNOTSUPP; + + /* Per-device mapping not yet supported */ + if ( !(op->flags & XEN_IOMMUOP_map_all) ) + return -EINVAL; + + /* Check whether the specified DFN falls in a reserved region */ + if ( rangeset_contains_singleton(iommu->reserved_ranges, dfn_x(dfn)) ) + return -EINVAL; + + d = rcu_lock_domain_by_any_id(op->domid); + if ( !d ) + return -ESRCH; + + rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page); + if ( rc ) + goto unlock_domain; + + rc = -EINVAL; + if ( p2mt != p2m_ram_rw || + (!readonly && !get_page_type(page, PGT_writable_page)) ) + { + put_page(page); + goto unlock_domain; + } + + spin_lock(&iommu->lock); + + rc = iommu_lookup_page(currd, dfn, &ignore, &flags); + + /* Treat a non-reference-counted entry as non-existent */ + if ( !rc ) + rc = !(flags & IOMMUF_refcount) ? -ENOENT : -EEXIST; + + if ( rc != -ENOENT ) + goto unlock_iommu; + + flags = IOMMUF_readable | IOMMUF_refcount; + if ( !readonly ) + flags |= IOMMUF_writable; + + rc = iommu_map_page_nocrash(currd, dfn, page_to_mfn(page), flags); + + unlock_iommu: + spin_unlock(&iommu->lock); + + if ( rc ) /* retain references if mapping is successful */ + { + if ( !readonly ) + put_page_type(page); + put_page(page); + } + + unlock_domain: + rcu_unlock_domain(d); + return rc; +} + +static int iommuop_unmap(struct xen_iommu_op_unmap *op) +{ + struct domain *d, *currd = current->domain; + struct domain_iommu *iommu = dom_iommu(currd); + dfn_t dfn = _dfn(op->dfn); + mfn_t mfn; + unsigned int flags; + bool readonly; + p2m_type_t p2mt; + struct page_info *page; + int rc; + + if ( op->pad || + (op->flags & ~XEN_IOMMUOP_unmap_all) ) + return -EINVAL; + + if ( !iommu->domain_control ) + return -EOPNOTSUPP; + + /* Per-device unmapping not yet supported */ + if ( !(op->flags & XEN_IOMMUOP_unmap_all) ) + return -EINVAL; + + d = rcu_lock_domain_by_any_id(op->domid); + if ( !d ) + return -ESRCH; + + spin_lock(&iommu->lock); + + rc = iommu_lookup_page(currd, dfn, &mfn, &flags); + + /* Treat a non-reference-counted entry as non-existent */ + if ( !rc ) + rc = !(flags & IOMMUF_refcount) ? -ENOENT : 0; + + if ( rc ) + goto unlock; + + readonly = !(flags & IOMMUF_writable); + + /* Make sure the mapped frame matches */ + rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page); + if ( rc ) + goto unlock; + + rc = !mfn_eq(mfn, page_to_mfn(page)) ? -EINVAL : 0; + + /* Release reference taken above */ + put_page(page); + + if ( rc ) + goto unlock; + + /* Release references taken in map */ + if ( !readonly ) + put_page_type(page); + put_page(page); + + /* + * This really should not fail. If it does, there is an implicit + * domain_crash() (except in the case of the hardware domain) since + * there is not a lot else that can be done to ensure the released + * page can be safely re-used. + */ + rc = iommu_unmap_page(currd, dfn); + + unlock: + spin_unlock(&iommu->lock); + rcu_unlock_domain(d); + + return rc; +} + +static void iommu_op(xen_iommu_op_t *op, bool *need_flush) { switch ( op->op ) { @@ -86,13 +284,30 @@ static void iommu_op(xen_iommu_op_t *op) op->status = iommu_op_query_reserved(&op->u.query_reserved); break; + case XEN_IOMMUOP_enable_modification: + op->status = + iommu_op_enable_modification(&op->u.enable_modification); + break; + + case XEN_IOMMUOP_map: + op->status = iommuop_map(&op->u.map); + if ( !op->status ) + *need_flush = true; + break; + + case XEN_IOMMUOP_unmap: + op->status = iommuop_unmap(&op->u.unmap); + if ( !op->status ) + *need_flush = true; + break; + default: op->status = -EOPNOTSUPP; break; } } -int do_one_iommu_op(xen_iommu_op_buf_t *buf) +int do_one_iommu_op(xen_iommu_op_buf_t *buf, bool *need_flush) { const XEN_GUEST_HANDLE(xen_iommu_op_t) h = guest_handle_cast(buf->h, xen_iommu_op_t); @@ -101,6 +316,10 @@ int do_one_iommu_op(xen_iommu_op_buf_t *buf) static const size_t op_size[] = { [XEN_IOMMUOP_query_reserved] = sizeof(struct xen_iommu_op_query_reserved), + [XEN_IOMMUOP_enable_modification] = + sizeof(struct xen_iommu_op_enable_modification), + [XEN_IOMMUOP_map] = sizeof(struct xen_iommu_op_map), + [XEN_IOMMUOP_unmap] = sizeof(struct xen_iommu_op_unmap), }; size_t size; int rc; @@ -130,10 +349,12 @@ int do_one_iommu_op(xen_iommu_op_buf_t *buf) if ( copy_from_guest_offset((void *)&op.u, buf->h, offset, size) ) return -EFAULT; - iommu_op(&op); + iommu_op(&op, need_flush); - if ( op.op == XEN_IOMMUOP_query_reserved && - __copy_field_to_guest(h, &op, u.query_reserved.nr_entries) ) + if ( (op.op == XEN_IOMMUOP_query_reserved && + __copy_field_to_guest(h, &op, u.query_reserved.nr_entries)) || + (op.op == XEN_IOMMUOP_enable_modification && + __copy_field_to_guest(h, &op, u.enable_modification.cap)) ) return -EFAULT; if ( __copy_field_to_guest(h, &op, status) ) @@ -146,8 +367,11 @@ long do_iommu_op(unsigned int nr_bufs, XEN_GUEST_HANDLE_PARAM(xen_iommu_op_buf_t) bufs) { unsigned int i; + bool need_flush = false; long rc = 0; + this_cpu(iommu_dont_flush_iotlb) = 1; + for ( i = 0; i < nr_bufs; i++ ) { xen_iommu_op_buf_t buf; @@ -164,11 +388,13 @@ long do_iommu_op(unsigned int nr_bufs, break; } - rc = do_one_iommu_op(&buf); + rc = do_one_iommu_op(&buf, &need_flush); if ( rc ) break; } + this_cpu(iommu_dont_flush_iotlb) = 0; + if ( rc > 0 ) { ASSERT(rc < nr_bufs); @@ -177,7 +403,8 @@ long do_iommu_op(unsigned int nr_bufs, rc = hypercall_create_continuation(__HYPERVISOR_iommu_op, "ih", nr_bufs, bufs); - } + } else if ( !rc && need_flush ) + rc = iommu_iotlb_flush_all(current->domain); return rc; } @@ -186,7 +413,7 @@ long do_iommu_op(unsigned int nr_bufs, CHECK_iommu_reserved_range; -int compat_one_iommu_op(compat_iommu_op_buf_t *buf) +int compat_one_iommu_op(compat_iommu_op_buf_t *buf, bool *need_flush) { const COMPAT_HANDLE(compat_iommu_op_t) h = compat_handle_cast(buf->h, compat_iommu_op_t); @@ -195,6 +422,10 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf) static const size_t op_size[] = { [XEN_IOMMUOP_query_reserved] = sizeof(struct compat_iommu_op_query_reserved), + [XEN_IOMMUOP_enable_modification] = + sizeof(struct compat_iommu_op_enable_modification), + [XEN_IOMMUOP_map] = sizeof(struct compat_iommu_op_map), + [XEN_IOMMUOP_unmap] = sizeof(struct compat_iommu_op_unmap), }; size_t size; xen_iommu_op_t nat; @@ -228,9 +459,15 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf) /* * The xlat magic doesn't quite know how to handle the union so - * we need to fix things up here. + * we need to fix things up here. Also, none of the sub-ops, apart from + * query_reserved, actually need any translation but the xlat magic + * can't deal with that either so all sub-ops must be marked for + * translation in xlat.lst. */ #define XLAT_iommu_op_u_query_reserved XEN_IOMMUOP_query_reserved +#define XLAT_iommu_op_u_enable_modification XEN_IOMMUOP_enable_modification +#define XLAT_iommu_op_u_map XEN_IOMMUOP_map +#define XLAT_iommu_op_u_unmap XEN_IOMMUOP_unmap u = cmp.op; #define XLAT_iommu_op_query_reserved_HNDL_ranges(_d_, _s_) \ @@ -258,9 +495,12 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf) XLAT_iommu_op(&nat, &cmp); #undef XLAT_iommu_op_query_reserved_HNDL_ranges +#undef XLAT_iommu_op_u_unmap +#undef XLAT_iommu_op_u_map +#undef XLAT_iommu_op_u_enable_modification #undef XLAT_iommu_op_u_query_reserved - iommu_op(&nat); + iommu_op(&nat, need_flush); #define XLAT_iommu_op_query_reserved_HNDL_ranges(_d_, _s_) \ do \ @@ -282,7 +522,8 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf) /* * Avoid the full (and lengthy) XLAT code as the only things that * need copying back are the reserved ranges (in the case of the - * query op) and the status field (for all ops). + * query op), capabilities (in the case of the enable op) and the + * status field (for all ops). */ cmp.status = nat.status; @@ -296,6 +537,13 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf) if ( __copy_field_to_compat(h, &cmp, u.query_reserved.nr_entries) ) return -EFAULT; } + else if ( cmp.op == XEN_IOMMUOP_enable_modification ) + { + cmp.u.enable_modification.cap = nat.u.enable_modification.cap; + + if ( __copy_field_to_compat(h, &cmp, u.enable_modification.cap) ) + return -EFAULT; + } #undef XLAT_iommu_op_query_reserved_HNDL_ranges @@ -309,8 +557,11 @@ int compat_iommu_op(unsigned int nr_bufs, XEN_GUEST_HANDLE_PARAM(compat_iommu_op_buf_t) bufs) { unsigned int i; + bool need_flush = false; long rc = 0; + this_cpu(iommu_dont_flush_iotlb) = 1; + for ( i = 0; i < nr_bufs; i++ ) { compat_iommu_op_buf_t buf; @@ -327,11 +578,13 @@ int compat_iommu_op(unsigned int nr_bufs, break; } - rc = compat_one_iommu_op(&buf); + rc = compat_one_iommu_op(&buf, &need_flush); if ( rc ) break; } + this_cpu(iommu_dont_flush_iotlb) = 0; + if ( rc > 0 ) { ASSERT(rc < nr_bufs); @@ -340,7 +593,8 @@ int compat_iommu_op(unsigned int nr_bufs, rc = hypercall_create_continuation(__HYPERVISOR_iommu_op, "ih", nr_bufs, bufs); - } + } else if ( !rc && need_flush ) + rc = iommu_iotlb_flush_all(current->domain); return rc; } diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index bc67cfe843..47c608cc89 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -184,6 +184,8 @@ int iommu_domain_init(struct domain *d) if ( !hd->reserved_ranges ) return -ENOMEM; + spin_lock_init(&hd->lock); + hd->platform_ops = iommu_get_ops(); return hd->platform_ops->init(d); } diff --git a/xen/include/public/iommu_op.h b/xen/include/public/iommu_op.h index 001f515bb3..fcca47e8d2 100644 --- a/xen/include/public/iommu_op.h +++ b/xen/include/public/iommu_op.h @@ -61,6 +61,101 @@ struct xen_iommu_op_query_reserved { XEN_GUEST_HANDLE(xen_iommu_reserved_range_t) ranges; }; +/* + * XEN_IOMMUOP_enable_modification: Enable operations that modify IOMMU + * mappings. + */ +#define XEN_IOMMUOP_enable_modification 2 + +struct xen_iommu_op_enable_modification { + /* + * OUT - On successful return this is set to the bitwise OR of capabilities + * defined below. On entry this must be set to zero. + */ + uint32_t cap; + uint32_t pad; + + /* Does the implementation support per-device mappings? */ +#define _XEN_IOMMU_CAP_per_device_mappings 0 +#define XEN_IOMMU_CAP_per_device_mappings (1u << _XEN_IOMMU_CAP_per_device_mappings) +}; + +/* + * XEN_IOMMUOP_map: Map a guest page in the IOMMU. + */ +#define XEN_IOMMUOP_map 3 + +struct xen_iommu_op_map { + /* IN - The domid of the guest */ + domid_t domid; + /* + * IN - flags controlling the mapping. This should be a bitwise OR of the + * flags defined below. + */ + uint16_t flags; + + /* + * Should the mapping be created for all initiators? + * + * NOTE: This flag is currently required as the implementation does not yet + * support pre-device mappings. + */ +#define _XEN_IOMMUOP_map_all 0 +#define XEN_IOMMUOP_map_all (1 << (_XEN_IOMMUOP_map_all)) + + /* Should the mapping be read-only to the initiator(s)? */ +#define _XEN_IOMMUOP_map_readonly 1 +#define XEN_IOMMUOP_map_readonly (1 << (_XEN_IOMMUOP_map_readonly)) + + uint32_t pad; + /* + * IN - Segment/Bus/Device/Function of the initiator. + * + * NOTE: This is ignored if XEN_IOMMUOP_map_all is set. + */ + uint64_t sbdf; + /* IN - The IOMMU frame number which will hold the new mapping */ + xen_dfn_t dfn; + /* IN - The guest frame number of the page to be mapped */ + xen_pfn_t gfn; +}; + +/* + * XEN_IOMMUOP_unmap_gfn: Remove a mapping in the IOMMU. + */ +#define XEN_IOMMUOP_unmap 4 + +struct xen_iommu_op_unmap { + /* IN - The domid of the guest */ + domid_t domid; + /* + * IN - flags controlling the unmapping. This should be a bitwise OR of the + * flags defined below. + */ + uint16_t flags; + + /* + * Should the mapping be destroyed for all initiators? + * + * NOTE: This flag is currently required as the implementation does not yet + * support pre-device mappings. + */ +#define _XEN_IOMMUOP_unmap_all 0 +#define XEN_IOMMUOP_unmap_all (1 << (_XEN_IOMMUOP_unmap_all)) + + uint32_t pad; + /* + * IN - Segment/Bus/Device/Function of the initiator. + * + * NOTE: This is ignored if XEN_IOMMUOP_unmap_all is set. + */ + uint64_t sbdf; + /* IN - The IOMMU frame number which holds the mapping to be removed */ + xen_dfn_t dfn; + /* IN - The guest frame number of the page that is mapped */ + xen_pfn_t gfn; +}; + struct xen_iommu_op { uint16_t op; /* op type */ uint16_t pad; @@ -68,6 +163,9 @@ struct xen_iommu_op { /* 0 for success otherwise, negative errno */ union { struct xen_iommu_op_query_reserved query_reserved; + struct xen_iommu_op_enable_modification enable_modification; + struct xen_iommu_op_map map; + struct xen_iommu_op_unmap unmap; } u; }; typedef struct xen_iommu_op xen_iommu_op_t; diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index a56d03b719..a04c312aeb 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -143,6 +143,12 @@ struct domain_iommu { * must not be modified after initialization. */ struct rangeset *reserved_ranges; + + /* + * PV-IOMMU fields + */ + bool domain_control; + spinlock_t lock; }; #define dom_iommu(d) (&(d)->iommu) diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst index d2f9b1034b..3f5b0ac004 100644 --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -79,7 +79,10 @@ ? vcpu_hvm_x86_64 hvm/hvm_vcpu.h ! iommu_op iommu_op.h ! iommu_op_buf iommu_op.h +! iommu_op_enable_modification iommu_op.h +! iommu_op_map iommu_op.h ! iommu_op_query_reserved iommu_op.h +! iommu_op_unmap iommu_op.h ? iommu_reserved_range iommu_op.h ? kexec_exec kexec.h ! kexec_image kexec.h -- 2.11.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |