Xen project Mailing List

Re: [Xen-devel] [PATCH 3/4 RFC] x86/p2m: use large pages for MMIO mappings

On 15/09/15 08:34, Jan Beulich wrote: > When mapping large BARs (e.g. the frame buffer of a graphics card) the > overhead or establishing such mappings using onle 4k pages has, > particularly after the XSA-125 fix, become unacceptable. Alter the > XEN_DOMCTL_memory_mapping semantics once again, so that there's no > longer a fixed amount of guest frames that represents the upper limit > of what a single invocation can map. Instead bound execution time by > limiting the number of iterations (regardless of page size). > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > --- > RFC reasons: > - ARM side unimplemented (and hence libxc for now made cope with both > models), the main issue (besides my inability to test any change > there) being the many internal uses of map_mmio_regions()) > - error unmapping in map_mmio_regions() and error propagation to caller > from unmap_mmio_regions() are not satisfactory (for the latter a > possible model might be to have the function - and hence the domctl - > return the [non-zero] number of completed entries upon error, > requiring the caller to re-invoke the hypercall to then obtain the > actual error for the failed slot) Doesn't this mean the caller must always make two hypercalls to confirm success? > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -897,39 +897,47 @@ void p2m_change_type_range(struct domain > > /* Returns: 0 for success, -errno for failure */ > static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t > mfn, > - p2m_type_t gfn_p2mt, p2m_access_t access) > + unsigned int order, p2m_type_t gfn_p2mt, > + p2m_access_t access) > { > int rc = 0; > p2m_access_t a; > p2m_type_t ot; > mfn_t omfn; > + unsigned int cur_order = 0; > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > if ( !paging_mode_translate(d) ) > return -EIO; > > - gfn_lock(p2m, gfn, 0); > - omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL); > + gfn_lock(p2m, gfn, order); > + omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL); > + if ( cur_order < order ) > + { > + gfn_unlock(p2m, gfn, order); > + return cur_order + 1; This appears to change the error semantics, therefore warrents an update to the function comment. > + } > if ( p2m_is_grant(ot) || p2m_is_foreign(ot) ) > { > - gfn_unlock(p2m, gfn, 0); > + gfn_unlock(p2m, gfn, order); > domain_crash(d); > return -ENOENT; > } > else if ( p2m_is_ram(ot) ) > { > + unsigned long i; > + > ASSERT(mfn_valid(omfn)); > - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); > + for ( i = 0; i < (1UL << order); ++i ) > + set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY); > } > > P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn)); > - rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt, > - access); > - gfn_unlock(p2m, gfn, 0); > + rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access); > + gfn_unlock(p2m, gfn, order); > if ( rc ) > - gdprintk(XENLOG_ERR, > - "p2m_set_entry failed! mfn=%08lx rc:%d\n", > - mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc); > + gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (%#lx)\n", PRI_mfn > @@ -2037,6 +2052,25 @@ unsigned long paging_gva_to_gfn(struct v > return hostmode->gva_to_gfn(v, hostp2m, va, pfec);- > iommu_{,un}map_page() interfaces don't support "order" (hence > mmio_order() for now returns zero when !iommu_hap_pt_share, which in > particular means the AMD side isn't being take care of just yet) > > --- a/tools/libxc/xc_domain.c > +++ b/tools/libxc/xc_domain.c > @@ -2215,7 +2215,7 @@ int xc_domain_memory_mapping( > { > DECLARE_DOMCTL; > xc_dominfo_t info; > - int ret = 0, err; > + int ret = 0, rc; > unsigned long done = 0, nr, max_batch_sz; > > if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 || > @@ -2240,19 +2240,24 @@ int xc_domain_memory_mapping( > domctl.u.memory_mapping.nr_mfns = nr; > domctl.u.memory_mapping.first_gfn = first_gfn + done; > domctl.u.memory_mapping.first_mfn = first_mfn + done; > - err = do_domctl(xch, &domctl); > - if ( err && errno == E2BIG ) > + rc = do_domctl(xch, &domctl); > + if ( rc < 0 && errno == E2BIG ) > { > if ( max_batch_sz <= 1 ) > break; > max_batch_sz >>= 1; > continue; > } > + if ( rc > 0 ) > + { > + done += rc; > + continue; > + } > /* Save the first error... */ > if ( !ret ) > - ret = err; > + ret = rc; > /* .. and ignore the rest of them when removing. */ > - if ( err && add_mapping != DPCI_REMOVE_MAPPING ) > + if ( rc && add_mapping != DPCI_REMOVE_MAPPING ) > break; > > done += nr; > --- a/xen/arch/x86/domain_build.c > +++ b/xen/arch/x86/domain_build.c > @@ -436,7 +436,7 @@ static __init void pvh_add_mem_mapping(s > else > a = p2m_access_rwx; > > - if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) ) > + if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), 0, a)) ) > panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n", > gfn, mfn, i, rc); > if ( !(i & 0xfffff) ) > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -2396,7 +2396,8 @@ static int vmx_alloc_vlapic_mapping(stru > share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable); > d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va); > set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), > - _mfn(virt_to_mfn(apic_va)), p2m_get_hostp2m(d)->default_access); > + _mfn(virt_to_mfn(apic_va)), 0, > + p2m_get_hostp2m(d)->default_access); > > return 0; > } > > } > > +static unsigned int mmio_order(const struct domain *d, > + unsigned long start_fn, unsigned long nr) Do you mean "start_gfn" ? > +{ > + if ( !hap_enabled(d) || !need_iommu(d) || !iommu_hap_pt_share || > + (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) > ) > + return 0; > + > + if ( !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) > && > + opt_hap_1gb && hvm_hap_has_1gb(d) ) opt_hap_1gb should be made to be redundant with hvm_hap_has_1gb() to avoid all the double checks. The only place where it is interesting for them being different is in hvm_enable(). I will throw together a patch. > + return PAGE_ORDER_1G; > + > + if ( opt_hap_2mb && hvm_hap_has_2mb(d) ) > + return PAGE_ORDER_2M; > + > + return 0; > +} > + > +#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */ > + > int map_mmio_regions(struct domain *d, > unsigned long start_gfn, > unsigned long nr, > @@ -2044,22 +2078,45 @@ int map_mmio_regions(struct domain *d, > { > int ret = 0; > unsigned long i; > + unsigned int iter, order; > > if ( !paging_mode_translate(d) ) > return 0; > > - for ( i = 0; !ret && i < nr; i++ ) > + for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER; > + i += 1UL << order, ++iter ) > { > - ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i), > - p2m_get_hostp2m(d)->default_access); > - if ( ret ) > + for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ; > + order = ret - 1 ) It is hard to reason as to whether this loop will terminate. All it would take is a bug in set_mmio_p2m_entry() which causes it to unilaterally return 1 and this loop would never terminate. Is there any other condition which can be used as a safety check? ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.