[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [V14 PATCH 1/2] pvh dom0: Add and remove foreign pages
In this patch, a new function, p2m_add_foreign(), is added to map pages from a foreign guest into dom0 for various purposes like domU creation, running xentrace, etc... Such pages are typed p2m_map_foreign. Note, it is the nature of such pages that a refcnt is held during their stay in the p2m. The refcnt is added and released in the low level ept function atomic_write_ept_entry. That macro is converted to a function to allow for such refcounting, which only applies to leaf entries in the ept. Furthermore, please note that paging/sharing is disabled if the controlling or hardware domain is pvh. Any enabling of those features would need to ensure refcnt are properly maintained for foreign types, or paging/sharing is skipped for foreign types. Also, we change get_pg_owner so it allows foreign mappings for pvh. Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> --- xen/arch/x86/mm.c | 4 +- xen/arch/x86/mm/mem_event.c | 11 ++++ xen/arch/x86/mm/p2m-ept.c | 104 +++++++++++++++++++++++++++++------- xen/arch/x86/mm/p2m-pt.c | 7 +++ xen/arch/x86/mm/p2m.c | 126 +++++++++++++++++++++++++++++++++++++++++--- xen/include/asm-x86/p2m.h | 7 +++ 6 files changed, 232 insertions(+), 27 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index d3459f4..2543916 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -2811,7 +2811,7 @@ static struct domain *get_pg_owner(domid_t domid) goto out; } - if ( unlikely(paging_mode_translate(curr)) ) + if ( !is_pvh_domain(curr) && unlikely(paging_mode_translate(curr)) ) { MEM_LOG("Cannot mix foreign mappings with translated domains"); goto out; @@ -4584,6 +4584,8 @@ int xenmem_add_to_physmap_one( page = mfn_to_page(mfn); break; } + case XENMAPSPACE_gmfn_foreign: + return p2m_add_foreign(d, idx, gpfn, foreign_domid); default: break; } diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c index f84c383..40ae841 100644 --- a/xen/arch/x86/mm/mem_event.c +++ b/xen/arch/x86/mm/mem_event.c @@ -538,6 +538,12 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, case XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE: { struct p2m_domain *p2m = p2m_get_hostp2m(d); + + rc = -EOPNOTSUPP; + /* pvh fixme: p2m_is_foreign types need addressing */ + if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) ) + break; + rc = -ENODEV; /* Only HAP is supported */ if ( !hap_enabled(d) ) @@ -620,6 +626,11 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, { case XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE: { + rc = -EOPNOTSUPP; + /* pvh fixme: p2m_is_foreign types need addressing */ + if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) ) + break; + rc = -ENODEV; /* Only HAP is supported */ if ( !hap_enabled(d) ) diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index bb98945..5fe063d 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -36,8 +36,6 @@ #define atomic_read_ept_entry(__pepte) \ ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } ) -#define atomic_write_ept_entry(__pepte, __epte) \ - write_atomic(&(__pepte)->epte, (__epte).epte) #define is_epte_present(ept_entry) ((ept_entry)->epte & 0x7) #define is_epte_superpage(ept_entry) ((ept_entry)->sp) @@ -46,6 +44,64 @@ static inline bool_t is_epte_valid(ept_entry_t *e) return (e->epte != 0 && e->sa_p2mt != p2m_invalid); } +/* returns : 0 for success, -errno otherwise */ +static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new, + int level) +{ + int rc; + unsigned long oldmfn = INVALID_MFN; + bool_t check_foreign = (new.mfn != entryptr->mfn || + new.sa_p2mt != entryptr->sa_p2mt); + + if ( level ) + { + ASSERT(!is_epte_superpage(&new) || !p2m_is_foreign(new.sa_p2mt)); + write_atomic(&entryptr->epte, new.epte); + return 0; + } + + if ( unlikely(p2m_is_foreign(new.sa_p2mt)) ) + { + rc = -EINVAL; + if ( !is_epte_present(&new) ) + goto out; + + if ( check_foreign ) + { + struct domain *fdom; + + if ( !mfn_valid(new.mfn) ) + goto out; + + rc = -ESRCH; + fdom = page_get_owner(mfn_to_page(new.mfn)); + if ( fdom == NULL ) + goto out; + + /* get refcount on the page */ + rc = -EBUSY; + if ( !get_page(mfn_to_page(new.mfn), fdom) ) + goto out; + } + } + + if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && check_foreign ) + oldmfn = entryptr->mfn; + + write_atomic(&entryptr->epte, new.epte); + + if ( unlikely(oldmfn != INVALID_MFN) ) + put_page(mfn_to_page(oldmfn)); + + rc = 0; + + out: + if ( rc ) + gdprintk(XENLOG_ERR, "epte o:%"PRIx64" n:%"PRIx64" rc:%d\n", + entryptr->epte, new.epte, rc); + return rc; +} + static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_access_t access) { /* First apply type permissions */ @@ -275,8 +331,9 @@ static int ept_next_level(struct p2m_domain *p2m, bool_t read_only, * present entries in the given page table, optionally marking the entries * also for their subtrees needing P2M type re-calculation. */ -static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc) +static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc, int level) { + int rc; ept_entry_t *epte = map_domain_page(mfn_x(mfn)); unsigned int i; bool_t changed = 0; @@ -292,7 +349,8 @@ static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc) e.emt = MTRR_NUM_TYPES; if ( recalc ) e.recalc = 1; - atomic_write_ept_entry(&epte[i], e); + rc = atomic_write_ept_entry(&epte[i], e, level); + ASSERT(rc == 0); changed = 1; } @@ -316,7 +374,7 @@ static int ept_invalidate_emt_range(struct p2m_domain *p2m, ept_entry_t *table; unsigned long gfn_remainder = first_gfn; unsigned int i, index; - int rc = 0, ret = GUEST_TABLE_MAP_FAILED; + int wrc, rc = 0, ret = GUEST_TABLE_MAP_FAILED; table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); for ( i = ept_get_wl(&p2m->ept); i > target; --i ) @@ -342,7 +400,8 @@ static int ept_invalidate_emt_range(struct p2m_domain *p2m, rc = -ENOMEM; goto out; } - atomic_write_ept_entry(&table[index], split_ept_entry); + wrc = atomic_write_ept_entry(&table[index], split_ept_entry, i); + ASSERT(wrc == 0); for ( ; i > target; --i ) if ( !ept_next_level(p2m, 1, &table, &gfn_remainder, i) ) @@ -361,7 +420,8 @@ static int ept_invalidate_emt_range(struct p2m_domain *p2m, { e.emt = MTRR_NUM_TYPES; e.recalc = 1; - atomic_write_ept_entry(&table[index], e); + wrc = atomic_write_ept_entry(&table[index], e, target); + ASSERT(wrc == 0); rc = 1; } } @@ -390,7 +450,7 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) unsigned int level = ept_get_wl(ept); unsigned long mfn = ept_get_asr(ept); ept_entry_t *epte; - int rc = 0; + int wrc, rc = 0; if ( !mfn ) return 0; @@ -431,7 +491,8 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) ept_p2m_type_to_flags(&e, e.sa_p2mt, e.access); } e.recalc = 0; - atomic_write_ept_entry(&epte[i], e); + wrc = atomic_write_ept_entry(&epte[i], e, level); + ASSERT(wrc == 0); } } else @@ -465,7 +526,8 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) { if ( ept_split_super_page(p2m, &e, level, level - 1) ) { - atomic_write_ept_entry(&epte[i], e); + wrc = atomic_write_ept_entry(&epte[i], e, level); + ASSERT(wrc == 0); unmap_domain_page(epte); mfn = e.mfn; continue; @@ -479,7 +541,8 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) e.recalc = 0; if ( recalc && p2m_is_changeable(e.sa_p2mt) ) ept_p2m_type_to_flags(&e, e.sa_p2mt, e.access); - atomic_write_ept_entry(&epte[i], e); + wrc = atomic_write_ept_entry(&epte[i], e, level); + ASSERT(wrc == 0); } rc = 1; @@ -489,11 +552,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) if ( e.emt == MTRR_NUM_TYPES ) { ASSERT(is_epte_present(&e)); - ept_invalidate_emt(_mfn(e.mfn), e.recalc); + ept_invalidate_emt(_mfn(e.mfn), e.recalc, level); smp_wmb(); e.emt = 0; e.recalc = 0; - atomic_write_ept_entry(&epte[i], e); + wrc = atomic_write_ept_entry(&epte[i], e, level); + ASSERT(wrc == 0); unmap_domain_page(epte); rc = 1; } @@ -585,6 +649,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, ASSERT((target == 2 && hvm_hap_has_1gb()) || (target == 1 && hvm_hap_has_2mb()) || (target == 0)); + ASSERT(!p2m_is_foreign(p2mt) || target == 0); table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); @@ -649,7 +714,8 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, /* now install the newly split ept sub-tree */ /* NB: please make sure domian is paused and no in-fly VT-d DMA. */ - atomic_write_ept_entry(ept_entry, split_ept_entry); + rc = atomic_write_ept_entry(ept_entry, split_ept_entry, i); + ASSERT(rc == 0); /* then move to the level we want to make real changes */ for ( ; i > target; i-- ) @@ -688,10 +754,10 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, ept_p2m_type_to_flags(&new_entry, p2mt, p2ma); } - atomic_write_ept_entry(ept_entry, new_entry); + rc = atomic_write_ept_entry(ept_entry, new_entry, target); /* Track the highest gfn for which we have ever had a valid mapping */ - if ( p2mt != p2m_invalid && + if ( rc == 0 && p2mt != p2m_invalid && (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) ) p2m->max_mapped_pfn = gfn + (1UL << order) - 1; @@ -723,7 +789,7 @@ out: last thing we do, after the ept_sync_domain() and removal from the iommu tables, so as to avoid a potential use-after-free. */ - if ( is_epte_present(&old_entry) ) + if ( rc == 0 && is_epte_present(&old_entry) ) ept_free_entry(p2m, &old_entry, target); return rc; @@ -893,7 +959,7 @@ static void ept_change_entry_type_global(struct p2m_domain *p2m, if ( !mfn ) return; - if ( ept_invalidate_emt(_mfn(mfn), 1) ) + if ( ept_invalidate_emt(_mfn(mfn), 1, ept_get_wl(&p2m->ept)) ) ept_sync_domain(p2m); } @@ -951,7 +1017,7 @@ static void ept_memory_type_changed(struct p2m_domain *p2m) if ( !mfn ) return; - if ( ept_invalidate_emt(_mfn(mfn), 0) ) + if ( ept_invalidate_emt(_mfn(mfn), 0, ept_get_wl(&p2m->ept)) ) ept_sync_domain(p2m); } diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index cd9867a..a1794d0 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -513,6 +513,13 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t); } + if ( unlikely(p2m_is_foreign(p2mt)) ) + { + /* pvh fixme: foreign types are only supported on ept at present */ + gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n"); + return -EINVAL; + } + /* Carry out any eventually pending earlier changes first. */ rc = do_recalc(p2m, gfn); if ( rc < 0 ) diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index b50747a..642ec28 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -36,6 +36,7 @@ #include <xen/event.h> #include <asm/hvm/nestedhvm.h> #include <asm/hvm/svm/amd-iommu-proto.h> +#include <xsm/xsm.h> #include "mm-locks.h" @@ -311,14 +312,20 @@ struct page_info *get_page_from_gfn_p2m( /* Fast path: look up and get out */ p2m_read_lock(p2m); mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0); - if ( (p2m_is_ram(*t) || p2m_is_grant(*t)) - && mfn_valid(mfn) + if ( p2m_is_any_ram(*t) && mfn_valid(mfn) && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) ) { page = mfn_to_page(mfn); - if ( !get_page(page, d) - /* Page could be shared */ - && !get_page(page, dom_cow) ) + if ( unlikely(p2m_is_foreign(*t)) ) + { + struct domain *fdom = page_get_owner_and_reference(page); + ASSERT(fdom != d); + if ( fdom == NULL ) + page = NULL; + } + else if ( !get_page(page, d) + /* Page could be shared */ + && !get_page(page, dom_cow) ) page = NULL; } p2m_read_unlock(p2m); @@ -468,6 +475,10 @@ int p2m_alloc_table(struct p2m_domain *p2m) return rc; } +/* + * pvh fixme: when adding support for pvh non-hardware domains, this path must + * cleanup any foreign p2m types (release refcnts on them). + */ void p2m_teardown(struct p2m_domain *p2m) /* Return all the p2m pages to Xen. * We know we don't have any extra mappings to these pages */ @@ -836,8 +847,8 @@ static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, } /* Set foreign mfn in the given guest's p2m table. */ -static int __attribute__((unused)) -set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, + mfn_t mfn) { return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign); } @@ -1794,6 +1805,107 @@ out_p2m_audit: #endif /* P2M_AUDIT */ /* + * Add frame from foreign domain to target domain's physmap. Similar to + * XENMAPSPACE_gmfn but the frame is foreign being mapped into current, + * and is not removed from foreign domain. + * + * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap. + * - xentrace running on dom0 mapping xenheap pages. foreigndom would + * be DOMID_XEN in such a case. + * etc.. + * + * Side Effect: the mfn for fgfn will be refcounted in lower level routines + * so it is not lost while mapped here. The refcnt is released + * via the XENMEM_remove_from_physmap path. + * + * Returns: 0 ==> success + */ +int p2m_add_foreign(struct domain *tdom, unsigned long fgfn, + unsigned long gpfn, domid_t foreigndom) +{ + p2m_type_t p2mt, p2mt_prev; + unsigned long prev_mfn, mfn; + struct page_info *page; + int rc; + struct domain *fdom; + + ASSERT(tdom); + if ( foreigndom == DOMID_SELF || !is_pvh_domain(tdom) ) + return -EINVAL; + /* + * pvh fixme: until support is added to p2m teardown code to cleanup any + * foreign entries, limit this to hardware domain only. + */ + if ( !is_hardware_domain(tdom) ) + return -EPERM; + + if ( foreigndom == DOMID_XEN ) + fdom = rcu_lock_domain(dom_xen); + else + fdom = rcu_lock_domain_by_id(foreigndom); + if ( fdom == NULL ) + return -ESRCH; + + rc = -EINVAL; + if ( tdom == fdom ) + goto out; + + rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom); + if ( rc ) + goto out; + + /* + * Take a refcnt on the mfn. NB: following supported for foreign mapping: + * ram_rw | ram_logdirty | ram_ro | paging_out. + */ + page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC); + if ( !page || + !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) ) + { + if ( page ) + put_page(page); + rc = -EINVAL; + goto out; + } + mfn = mfn_x(page_to_mfn(page)); + + /* Remove previously mapped page if it is present. */ + prev_mfn = mfn_x(get_gfn(tdom, gpfn, &p2mt_prev)); + if ( mfn_valid(_mfn(prev_mfn)) ) + { + if ( is_xen_heap_mfn(prev_mfn) ) + /* Xen heap frames are simply unhooked from this phys slot */ + guest_physmap_remove_page(tdom, gpfn, prev_mfn, 0); + else + /* Normal domain memory is freed, to avoid leaking memory. */ + guest_remove_page(tdom, gpfn); + } + /* + * Create the new mapping. Can't use guest_physmap_add_page() because it + * will update the m2p table which will result in mfn -> gpfn of dom0 + * and not fgfn of domU. + */ + rc = set_foreign_p2m_entry(tdom, gpfn, _mfn(mfn)); + if ( rc ) + gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. " + "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n", + gpfn, mfn, fgfn, tdom->domain_id, fdom->domain_id); + + put_page(page); + + /* + * This put_gfn for the above get_gfn for prev_mfn. We must do this + * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn + * before us. + */ + put_gfn(tdom, gpfn); + +out: + if ( fdom ) + rcu_unlock_domain(fdom); + return rc; +} +/* * Local variables: * mode: C * c-file-style: "BSD" diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 027f011..d0cfdac 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -188,6 +188,10 @@ typedef unsigned int p2m_query_t; #define p2m_is_broken(_t) (p2m_to_mask(_t) & P2M_BROKEN_TYPES) #define p2m_is_foreign(_t) (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign)) +#define p2m_is_any_ram(_t) (p2m_to_mask(_t) & \ + (P2M_RAM_TYPES | P2M_GRANT_TYPES | \ + p2m_to_mask(p2m_map_foreign))) + /* Per-p2m-table state */ struct p2m_domain { /* Lock that protects updates to the p2m */ @@ -532,6 +536,9 @@ int p2m_is_logdirty_range(struct p2m_domain *, unsigned long start, int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn); +/* Add foreign mapping to the guest's p2m table. */ +int p2m_add_foreign(struct domain *tdom, unsigned long fgfn, + unsigned long gpfn, domid_t foreign_domid); /* * Populate-on-demand -- 1.8.3.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |