[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen master] x86/p2m: use large pages for MMIO mappings
commit c3c756bde3feab4ab1640f2f3aed73b719118349 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Thu Feb 11 16:45:08 2016 +0100 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Thu Feb 11 16:45:08 2016 +0100 x86/p2m: use large pages for MMIO mappings When mapping large BARs (e.g. the frame buffer of a graphics card) the overhead of establishing such mappings using only 4k pages has, particularly after the XSA-125 fix, become unacceptable. Alter the XEN_DOMCTL_memory_mapping semantics once again, so that there's no longer a fixed amount of guest frames that represents the upper limit of what a single invocation can map. Instead bound execution time by limiting the number of iterations (regardless of page size). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx> Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx> Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx> --- tools/libxc/xc_domain.c | 15 +++-- xen/arch/x86/domain_build.c | 3 +- xen/arch/x86/hvm/vmx/vmx.c | 2 +- xen/arch/x86/mm/p2m-ept.c | 1 + xen/arch/x86/mm/p2m-pt.c | 13 ++-- xen/arch/x86/mm/p2m.c | 154 +++++++++++++++++++++++++++++++++----------- xen/common/domctl.c | 6 +- xen/common/memory.c | 2 +- xen/include/asm-x86/p2m.h | 5 +- xen/include/public/domctl.h | 9 ++- 10 files changed, 156 insertions(+), 54 deletions(-) diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index e21b602..4fa993a 100644 --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -2229,7 +2229,7 @@ int xc_domain_memory_mapping( { DECLARE_DOMCTL; xc_dominfo_t info; - int ret = 0, err; + int ret = 0, rc; unsigned long done = 0, nr, max_batch_sz; if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 || @@ -2254,19 +2254,24 @@ int xc_domain_memory_mapping( domctl.u.memory_mapping.nr_mfns = nr; domctl.u.memory_mapping.first_gfn = first_gfn + done; domctl.u.memory_mapping.first_mfn = first_mfn + done; - err = do_domctl(xch, &domctl); - if ( err && errno == E2BIG ) + rc = do_domctl(xch, &domctl); + if ( rc < 0 && errno == E2BIG ) { if ( max_batch_sz <= 1 ) break; max_batch_sz >>= 1; continue; } + if ( rc > 0 ) + { + done += rc; + continue; + } /* Save the first error... */ if ( !ret ) - ret = err; + ret = rc; /* .. and ignore the rest of them when removing. */ - if ( err && add_mapping != DPCI_REMOVE_MAPPING ) + if ( rc && add_mapping != DPCI_REMOVE_MAPPING ) break; done += nr; diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index cb287f0..f9a3eca 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -436,7 +436,8 @@ static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn, else a = p2m_access_rw; - if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) ) + if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), + PAGE_ORDER_4K, a)) ) panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n", gfn, mfn, i, rc); if ( !(i & 0xfffff) ) diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index b9f340c..5e99226 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2504,7 +2504,7 @@ static int vmx_alloc_vlapic_mapping(struct domain *d) share_xen_page_with_guest(pg, d, XENSHARE_writable); d->arch.hvm_domain.vmx.apic_access_mfn = mfn; set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(mfn), - p2m_get_hostp2m(d)->default_access); + PAGE_ORDER_4K, p2m_get_hostp2m(d)->default_access); return 0; } diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index c5fe906..316e3f3 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -136,6 +136,7 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry, entry->r = entry->x = 1; entry->w = !rangeset_contains_singleton(mmio_ro_ranges, entry->mfn); + ASSERT(entry->w || !is_epte_superpage(entry)); entry->a = !!cpu_has_vmx_ept_ad; entry->d = entry->w && cpu_has_vmx_ept_ad; break; diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index 0a7bee4..da546e8 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -72,7 +72,8 @@ static const unsigned long pgt[] = { PGT_l3_page_table }; -static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) +static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn, + unsigned int level) { unsigned long flags; /* @@ -107,6 +108,8 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) case p2m_mmio_direct: if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) flags |= _PAGE_RW; + else + ASSERT(!level); return flags | P2M_BASE_FLAGS | _PAGE_PCD; } } @@ -436,7 +439,7 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn) p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) ? p2m_ram_logdirty : p2m_ram_rw; unsigned long mfn = l1e_get_pfn(e); - unsigned long flags = p2m_type_to_flags(p2mt, _mfn(mfn)); + unsigned long flags = p2m_type_to_flags(p2mt, _mfn(mfn), level); if ( level ) { @@ -573,7 +576,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ? l3e_from_pfn(mfn_x(mfn), - p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE) + p2m_type_to_flags(p2mt, mfn, 2) | _PAGE_PSE) : l3e_empty(); entry_content.l1 = l3e_content.l3; @@ -609,7 +612,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), - p2m_type_to_flags(p2mt, mfn)); + p2m_type_to_flags(p2mt, mfn, 0)); else entry_content = l1e_empty(); @@ -645,7 +648,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) l2e_content = l2e_from_pfn(mfn_x(mfn), - p2m_type_to_flags(p2mt, mfn) | + p2m_type_to_flags(p2mt, mfn, 1) | _PAGE_PSE); else l2e_content = l2e_empty(); diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 1b43853..a25b18b 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -900,48 +900,64 @@ void p2m_change_type_range(struct domain *d, p2m_unlock(p2m); } -/* Returns: 0 for success, -errno for failure */ +/* + * Returns: + * 0 for success + * -errno for failure + * 1 + new order for caller to retry with smaller order (guaranteed + * to be smaller than order passed in) + */ static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, - p2m_type_t gfn_p2mt, p2m_access_t access) + unsigned int order, p2m_type_t gfn_p2mt, + p2m_access_t access) { int rc = 0; p2m_access_t a; p2m_type_t ot; mfn_t omfn; + unsigned int cur_order = 0; struct p2m_domain *p2m = p2m_get_hostp2m(d); if ( !paging_mode_translate(d) ) return -EIO; - gfn_lock(p2m, gfn, 0); - omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL); + gfn_lock(p2m, gfn, order); + omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL); + if ( cur_order < order ) + { + gfn_unlock(p2m, gfn, order); + return cur_order + 1; + } if ( p2m_is_grant(ot) || p2m_is_foreign(ot) ) { - gfn_unlock(p2m, gfn, 0); + gfn_unlock(p2m, gfn, order); domain_crash(d); return -ENOENT; } else if ( p2m_is_ram(ot) ) { - ASSERT(mfn_valid(omfn)); - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); + unsigned long i; + + for ( i = 0; i < (1UL << order); ++i ) + { + ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i))); + set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY); + } } P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn)); - rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt, - access); + rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access); if ( rc ) - gdprintk(XENLOG_ERR, - "p2m_set_entry failed! mfn=%08lx rc:%d\n", - mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc); + gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n", + gfn, order, rc, mfn_x(mfn)); else if ( p2m_is_pod(ot) ) { pod_lock(p2m); - p2m->pod.entry_count--; + p2m->pod.entry_count -= 1UL << order; BUG_ON(p2m->pod.entry_count < 0); pod_unlock(p2m); } - gfn_unlock(p2m, gfn, 0); + gfn_unlock(p2m, gfn, order); return rc; } @@ -950,14 +966,19 @@ static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) { - return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign, + return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign, p2m_get_hostp2m(d)->default_access); } int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, - p2m_access_t access) + unsigned int order, p2m_access_t access) { - return set_typed_p2m_entry(d, gfn, mfn, p2m_mmio_direct, access); + if ( order > PAGE_ORDER_4K && + rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), + mfn_x(mfn) + (1UL << order) - 1) ) + return PAGE_ORDER_4K + 1; + + return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access); } int set_identity_p2m_entry(struct domain *d, unsigned long gfn, @@ -1010,20 +1031,33 @@ int set_identity_p2m_entry(struct domain *d, unsigned long gfn, return ret; } -/* Returns: 0 for success, -errno for failure */ -int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +/* + * Returns: + * 0 for success + * -errno for failure + * order+1 for caller to retry with order (guaranteed smaller than + * the order value passed in) + */ +int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int order) { int rc = -EINVAL; mfn_t actual_mfn; p2m_access_t a; p2m_type_t t; + unsigned int cur_order = 0; struct p2m_domain *p2m = p2m_get_hostp2m(d); if ( !paging_mode_translate(d) ) return -EIO; - gfn_lock(p2m, gfn, 0); - actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, NULL, NULL); + gfn_lock(p2m, gfn, order); + actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL); + if ( cur_order < order ) + { + rc = cur_order + 1; + goto out; + } /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */ if ( (INVALID_MFN == mfn_x(actual_mfn)) || (t != p2m_mmio_direct) ) @@ -1036,11 +1070,11 @@ int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) gdprintk(XENLOG_WARNING, "no mapping between mfn %08lx and gfn %08lx\n", mfn_x(mfn), gfn); - rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid, + rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), order, p2m_invalid, p2m->default_access); out: - gfn_unlock(p2m, gfn, 0); + gfn_unlock(p2m, gfn, order); return rc; } @@ -2131,6 +2165,37 @@ void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn, return map_domain_page(*mfn); } +static unsigned int mmio_order(const struct domain *d, + unsigned long start_fn, unsigned long nr) +{ + /* + * Note that the !iommu_use_hap_pt() here has three effects: + * - cover iommu_{,un}map_page() not having an "order" input yet, + * - exclude shadow mode (which doesn't support large MMIO mappings), + * - exclude PV guests, should execution reach this code for such. + * So be careful when altering this. + */ + if ( !need_iommu(d) || !iommu_use_hap_pt(d) || + (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) ) + return PAGE_ORDER_4K; + + if ( 0 /* + * Don't use 1Gb pages, to limit the iteration count in + * set_typed_p2m_entry() when it needs to zap M2P entries + * for a RAM range. + */ && + !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) && + hap_has_1gb ) + return PAGE_ORDER_1G; + + if ( hap_has_2mb ) + return PAGE_ORDER_2M; + + return PAGE_ORDER_4K; +} + +#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */ + int map_mmio_regions(struct domain *d, unsigned long start_gfn, unsigned long nr, @@ -2138,22 +2203,29 @@ int map_mmio_regions(struct domain *d, { int ret = 0; unsigned long i; + unsigned int iter, order; if ( !paging_mode_translate(d) ) return 0; - for ( i = 0; !ret && i < nr; i++ ) + for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER; + i += 1UL << order, ++iter ) { - ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i), - p2m_get_hostp2m(d)->default_access); - if ( ret ) + /* OR'ing gfn and mfn values will return an order suitable to both. */ + for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ; + order = ret - 1 ) { - unmap_mmio_regions(d, start_gfn, i, mfn); - break; + ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i), order, + p2m_get_hostp2m(d)->default_access); + if ( ret <= 0 ) + break; + ASSERT(ret <= order); } + if ( ret < 0 ) + break; } - return ret; + return i == nr ? 0 : i ?: ret; } int unmap_mmio_regions(struct domain *d, @@ -2161,20 +2233,30 @@ int unmap_mmio_regions(struct domain *d, unsigned long nr, unsigned long mfn) { - int err = 0; + int ret = 0; unsigned long i; + unsigned int iter, order; if ( !paging_mode_translate(d) ) return 0; - for ( i = 0; i < nr; i++ ) + for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER; + i += 1UL << order, ++iter ) { - int ret = clear_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i)); - if ( ret ) - err = ret; + /* OR'ing gfn and mfn values will return an order suitable to both. */ + for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ; + order = ret - 1 ) + { + ret = clear_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i), order); + if ( ret <= 0 ) + break; + ASSERT(ret <= order); + } + if ( ret < 0 ) + break; } - return err; + return i == nr ? 0 : i ?: ret; } unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp) diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 121a34a..22fa5d5 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -1046,10 +1046,12 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) (gfn + nr_mfns - 1) < gfn ) /* wrap? */ break; +#ifndef CONFIG_X86 /* XXX ARM!? */ ret = -E2BIG; /* Must break hypercall up as this could take a while. */ if ( nr_mfns > 64 ) break; +#endif ret = -EPERM; if ( !iomem_access_permitted(current->domain, mfn, mfn_end) || @@ -1067,7 +1069,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) d->domain_id, gfn, mfn, nr_mfns); ret = map_mmio_regions(d, gfn, nr_mfns, mfn); - if ( ret ) + if ( ret < 0 ) printk(XENLOG_G_WARNING "memory_map:fail: dom%d gfn=%lx mfn=%lx nr=%lx ret:%ld\n", d->domain_id, gfn, mfn, nr_mfns, ret); @@ -1079,7 +1081,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) d->domain_id, gfn, mfn, nr_mfns); ret = unmap_mmio_regions(d, gfn, nr_mfns, mfn); - if ( ret && is_hardware_domain(current->domain) ) + if ( ret < 0 && is_hardware_domain(current->domain) ) printk(XENLOG_ERR "memory_map: error %ld removing dom%d access to [%lx,%lx]\n", ret, d->domain_id, mfn, mfn_end); diff --git a/xen/common/memory.c b/xen/common/memory.c index 9ff1145..ef57219 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -259,7 +259,7 @@ int guest_remove_page(struct domain *d, unsigned long gmfn) } if ( p2mt == p2m_mmio_direct ) { - clear_mmio_p2m_entry(d, gmfn, _mfn(mfn)); + clear_mmio_p2m_entry(d, gmfn, _mfn(mfn), 0); put_gfn(d, gmfn); return 1; } diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index c0df1ea..ca0480e 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -574,8 +574,9 @@ int p2m_is_logdirty_range(struct p2m_domain *, unsigned long start, /* Set mmio addresses in the p2m table (for pass-through) */ int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, - p2m_access_t access); -int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); + unsigned int order, p2m_access_t access); +int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int order); /* Set identity addresses in the p2m table (for pass-through) */ int set_identity_p2m_entry(struct domain *d, unsigned long gfn, diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 370bc0f..a934318 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -545,8 +545,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); /* Bind machine I/O address range -> HVM address range. */ -/* If this returns -E2BIG lower nr_mfns value. */ /* XEN_DOMCTL_memory_mapping */ +/* Returns + - zero success, everything done + - -E2BIG passed in nr_mfns value too large for the implementation + - positive partial success for the first <result> page frames (with + <result> less than nr_mfns), requiring re-invocation by the + caller after updating inputs + - negative error; other than -E2BIG +*/ #define DPCI_ADD_MAPPING 1 #define DPCI_REMOVE_MAPPING 0 struct xen_domctl_memory_mapping { -- generated by git-patchbot for /home/xen/git/xen.git#master _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |