[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC XEN PATCH v2 05/15] xen/x86: add XENMEM_populate_pmem_map to map host pmem pages to HVM domain
XENMEM_populate_pmemmap is used by toolstack to map the specified host pmem pages to the specified guest physical address. Only pmem pages that have been setup via XEN_SYSCTL_nvdimm_pmem_setup can be mapped via XENMEM_populate_pmem_map. Because XEN_SYSCTL_nvdimm_pmem_setup only works on x86, XENMEM_populate_pmem_map is made to work only on x86 as well and return -ENOSYS on other architectures. Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx> --- Cc: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> Cc: Wei Liu <wei.liu2@xxxxxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Changes in v2: * Rename *_pmemmap to *_pmem_map. * Add XSM check fo XENMEM_populate_pmem_map. * Add compat code for XENMEM_populate_pmem_map. * Add stub for pmem_populate() on non-x86 architectures. * Add check to avoid populate pmem pages to dom0. * Merge v1 patch 5 "xen/x86: release pmem pages at domain destroy". --- tools/flask/policy/modules/xen.if | 2 +- tools/libxc/include/xenctrl.h | 17 ++++ tools/libxc/xc_domain.c | 15 ++++ xen/arch/x86/domain.c | 7 ++ xen/common/compat/memory.c | 1 + xen/common/domain.c | 3 + xen/common/memory.c | 43 ++++++++++ xen/common/pmem.c | 151 +++++++++++++++++++++++++++++++++++- xen/include/public/memory.h | 14 +++- xen/include/xen/pmem.h | 24 ++++++ xen/include/xen/sched.h | 3 + xen/include/xsm/dummy.h | 11 +++ xen/include/xsm/xsm.h | 12 +++ xen/xsm/dummy.c | 4 + xen/xsm/flask/hooks.c | 13 ++++ xen/xsm/flask/policy/access_vectors | 2 + 16 files changed, 317 insertions(+), 5 deletions(-) diff --git a/tools/flask/policy/modules/xen.if b/tools/flask/policy/modules/xen.if index ed0df4f010..bc4176c089 100644 --- a/tools/flask/policy/modules/xen.if +++ b/tools/flask/policy/modules/xen.if @@ -55,7 +55,7 @@ define(`create_domain_common', ` psr_cmt_op psr_cat_op soft_reset }; allow $1 $2:security check_context; allow $1 $2:shadow enable; - allow $1 $2:mmu { map_read map_write adjust memorymap physmap pinpage mmuext_op updatemp }; + allow $1 $2:mmu { map_read map_write adjust memorymap physmap pinpage mmuext_op updatemp populate_pmem_map }; allow $1 $2:grant setup; allow $1 $2:hvm { cacheattr getparam hvmctl sethvmc setparam nested altp2mhvm altp2mhvm_op dm }; diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h index d4e3002c9e..f8a9581506 100644 --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -2553,6 +2553,23 @@ int xc_nvdimm_pmem_setup(xc_interface *xch, unsigned long smfn, unsigned long emfn, unsigned long mgmt_smfn, unsigned long mgmt_emfn); +/* + * Map host pmem pages to a domain. + * + * Parameters: + * xch: xc interface handler + * domid: the target domain id + * mfn: start MFN of the host pmem pages to be mapped + * nr_mfns: the number of host pmem pages to be mapped + * gfn: start GFN of the target guest physical pages + * + * Return: + * 0 on success; non-zero error code for failures. + */ +int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid, + unsigned long mfn, unsigned long gfn, + unsigned long nr_mfns); + /* Compat shims */ #include "xenctrl_compat.h" diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index d862e537d9..9ccdda086d 100644 --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -2291,6 +2291,21 @@ int xc_domain_soft_reset(xc_interface *xch, domctl.domain = (domid_t)domid; return do_domctl(xch, &domctl); } + +int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid, + unsigned long mfn, unsigned long gfn, + unsigned long nr_mfns) +{ + struct xen_pmem_map args = { + .domid = domid, + .mfn = mfn, + .gfn = gfn, + .nr_mfns = nr_mfns, + }; + + return do_memory_op(xch, XENMEM_populate_pmem_map, &args, sizeof(args)); +} + /* * Local variables: * mode: C diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 479aee641f..2333603f3e 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -36,6 +36,7 @@ #include <xen/wait.h> #include <xen/guest_access.h> #include <xen/livepatch.h> +#include <xen/pmem.h> #include <public/sysctl.h> #include <public/hvm/hvm_vcpu.h> #include <asm/regs.h> @@ -2352,6 +2353,12 @@ int domain_relinquish_resources(struct domain *d) if ( ret ) return ret; +#ifdef CONFIG_PMEM + ret = pmem_teardown(d); + if ( ret ) + return ret; +#endif /* CONFIG_PMEM */ + /* Tear down paging-assistance stuff. */ ret = paging_teardown(d); if ( ret ) diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c index a37a948331..19382f6dfc 100644 --- a/xen/common/compat/memory.c +++ b/xen/common/compat/memory.c @@ -523,6 +523,7 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat) case XENMEM_add_to_physmap: case XENMEM_remove_from_physmap: case XENMEM_access_op: + case XENMEM_populate_pmem_map: break; case XENMEM_get_vnumainfo: diff --git a/xen/common/domain.c b/xen/common/domain.c index 4492c9c3d5..f8b4bd9c29 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -287,6 +287,9 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags, INIT_PAGE_LIST_HEAD(&d->page_list); INIT_PAGE_LIST_HEAD(&d->xenpage_list); + spin_lock_init_prof(d, pmem_lock); + INIT_PAGE_LIST_HEAD(&d->pmem_page_list); + spin_lock_init(&d->node_affinity_lock); d->node_affinity = NODE_MASK_ALL; d->auto_node_affinity = 1; diff --git a/xen/common/memory.c b/xen/common/memory.c index ad0b33ceb6..0883d2d9b8 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -23,6 +23,7 @@ #include <xen/numa.h> #include <xen/mem_access.h> #include <xen/trace.h> +#include <xen/pmem.h> #include <asm/current.h> #include <asm/hardirq.h> #include <asm/p2m.h> @@ -1328,6 +1329,48 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) } #endif +#ifdef CONFIG_PMEM + case XENMEM_populate_pmem_map: + { + struct xen_pmem_map map; + struct xen_pmem_map_args args; + + if ( copy_from_guest(&map, arg, 1) ) + return -EFAULT; + + if ( map.domid == DOMID_SELF ) + return -EINVAL; + + d = rcu_lock_domain_by_any_id(map.domid); + if ( !d ) + return -EINVAL; + + rc = xsm_populate_pmem_map(XSM_TARGET, curr_d, d); + if ( rc ) + { + rcu_unlock_domain(d); + return rc; + } + + args.domain = d; + args.mfn = map.mfn; + args.gfn = map.gfn; + args.nr_mfns = map.nr_mfns; + args.nr_done = start_extent; + args.preempted = 0; + + rc = pmem_populate(&args); + rcu_unlock_domain(d); + + if ( rc == -ERESTART && args.preempted ) + return hypercall_create_continuation( + __HYPERVISOR_memory_op, "lh", + op | (args.nr_done << MEMOP_EXTENT_SHIFT), arg); + + break; + } +#endif /* CONFIG_PMEM */ + default: rc = arch_memory_op(cmd, arg); break; diff --git a/xen/common/pmem.c b/xen/common/pmem.c index 0e2d66f94c..03f1c1b374 100644 --- a/xen/common/pmem.c +++ b/xen/common/pmem.c @@ -17,9 +17,12 @@ */ #include <xen/errno.h> +#include <xen/event.h> #include <xen/list.h> #include <xen/mm.h> +#include <xen/paging.h> #include <xen/pmem.h> +#include <xen/sched.h> #include <xen/spinlock.h> /* @@ -130,8 +133,9 @@ static struct pmem *get_first_overlap(const struct list_head *list, return overlap; } -static bool pmem_list_covered(const struct list_head *list, - unsigned long smfn, unsigned emfn) +static bool pmem_list_covered_ready(const struct list_head *list, + unsigned long smfn, unsigned emfn, + bool check_ready) { struct pmem *overlap; bool covered = false; @@ -139,7 +143,8 @@ static bool pmem_list_covered(const struct list_head *list, do { overlap = get_first_overlap(list, smfn, emfn); - if ( !overlap || smfn < overlap->smfn ) + if ( !overlap || smfn < overlap->smfn || + (check_ready && !overlap->ready) ) break; if ( emfn <= overlap->emfn ) @@ -155,6 +160,12 @@ static bool pmem_list_covered(const struct list_head *list, return covered; } +static bool pmem_list_covered(const struct list_head *list, + unsigned long smfn, unsigned emfn) +{ + return pmem_list_covered_ready(list, smfn, emfn, false); +} + static bool check_mgmt_size(unsigned long mgmt_mfns, unsigned long total_mfns) { return mgmt_mfns >= @@ -301,3 +312,137 @@ int pmem_setup(unsigned long data_smfn, unsigned long data_emfn, out: return rc; } + +#ifdef CONFIG_X86 + +static void pmem_assign_page(struct domain *d, struct page_info *pg, + unsigned long gfn) +{ + pg->u.inuse.type_info = 0; + page_set_owner(pg, d); + guest_physmap_add_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0); + + spin_lock(&d->pmem_lock); + page_list_add_tail(pg, &d->pmem_page_list); + spin_unlock(&d->pmem_lock); +} + +static void pmem_unassign_page(struct domain *d, struct page_info *pg, + unsigned long gfn) +{ + spin_lock(&d->pmem_lock); + page_list_del(pg, &d->pmem_page_list); + spin_unlock(&d->pmem_lock); + + guest_physmap_remove_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0); + page_set_owner(pg, NULL); + pg->count_info = (pg->count_info & ~PGC_count_mask) | PGC_state_free; +} + +static void pmem_unassign_pages(struct domain *d, unsigned long mfn, + unsigned long gfn, unsigned long nr_mfns) +{ + unsigned long emfn = mfn + nr_mfns; + + for ( ; mfn < emfn; mfn++, gfn++ ) + pmem_unassign_page(d, mfn_to_page(mfn), gfn); +} + +/** + * Map host pmem pages to a domain. Currently only HVM domain is + * supported. + * + * Parameters: + * args: please refer to comments of struct xen_pmemmap_args in xen/pmem.h + * + * Return: + * 0 on success; non-zero error code on failures. + */ +int pmem_populate(struct xen_pmem_map_args *args) +{ + struct domain *d = args->domain; + unsigned long i = args->nr_done; + unsigned long mfn = args->mfn + i; + unsigned long emfn = args->mfn + args->nr_mfns; + unsigned long gfn; + struct page_info *page; + int rc = 0; + + if ( unlikely(d->is_dying) ) + return -EINVAL; + + if ( !has_hvm_container_domain(d) || !paging_mode_translate(d) ) + return -EINVAL; + + spin_lock(&pmem_gregions_lock); + if ( !pmem_list_covered_ready(&pmem_gregions, mfn, emfn, true) ) + { + spin_unlock(&pmem_regions_lock); + return -EINVAL; + } + spin_unlock(&pmem_gregions_lock); + + for ( gfn = args->gfn + i; mfn < emfn; i++, mfn++, gfn++ ) + { + if ( i != args->nr_done && hypercall_preempt_check() ) + { + args->preempted = 1; + rc = -ERESTART; + break; + } + + page = mfn_to_page(mfn); + + spin_lock(&pmem_gregions_lock); + if ( !page_state_is(page, free) ) + { + dprintk(XENLOG_DEBUG, "pmem: mfn 0x%lx not in free state\n", mfn); + spin_unlock(&pmem_gregions_lock); + rc = -EINVAL; + break; + } + page->count_info = PGC_state_inuse | 1; + spin_unlock(&pmem_gregions_lock); + + pmem_assign_page(d, page, gfn); + } + + if ( rc && rc != -ERESTART ) + pmem_unassign_pages(d, args->mfn, args->gfn, i); + + args->nr_done = i; + return rc; +} + +int pmem_teardown(struct domain *d) +{ + struct page_info *pg, *next; + int rc = 0; + + ASSERT(d->is_dying); + ASSERT(d != current->domain); + + spin_lock(&d->pmem_lock); + + page_list_for_each_safe (pg, next, &d->pmem_page_list ) + { + BUG_ON(page_get_owner(pg) != d); + BUG_ON(page_state_is(pg, free)); + + page_list_del(pg, &d->pmem_page_list); + page_set_owner(pg, NULL); + pg->count_info = (pg->count_info & ~PGC_count_mask) | PGC_state_free; + + if ( hypercall_preempt_check() ) + { + rc = -ERESTART; + break; + } + } + + spin_unlock(&d->pmem_lock); + + return rc; +} + +#endif /* CONFIG_X86 */ diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 6eee0c8a16..fa636b313a 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -648,7 +648,19 @@ struct xen_vnuma_topology_info { typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t; DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t); -/* Next available subop number is 28 */ +#define XENMEM_populate_pmem_map 28 + +struct xen_pmem_map { + /* IN */ + domid_t domid; + unsigned long mfn; + unsigned long gfn; + unsigned int nr_mfns; +}; +typedef struct xen_pmem_map xen_pmem_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_pmem_map_t); + +/* Next available subop number is 29 */ #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h index 95c8207ff6..cbc621048b 100644 --- a/xen/include/xen/pmem.h +++ b/xen/include/xen/pmem.h @@ -26,9 +26,23 @@ int pmem_register(unsigned long smfn, unsigned long emfn); int pmem_setup(unsigned long data_spfn, unsigned long data_emfn, unsigned long mgmt_smfn, unsigned long mgmt_emfn); +struct xen_pmem_map_args { + struct domain *domain; + + unsigned long mfn; /* start MFN of pmems page to be mapped */ + unsigned long gfn; /* start GFN of target domain */ + unsigned long nr_mfns; /* number of pmem pages to be mapped */ + + /* For preemption ... */ + unsigned long nr_done; /* number of pmem pages processed so far */ + int preempted; /* Is the operation preempted? */ +}; + #ifdef CONFIG_X86 int pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn, unsigned long mgmt_smfn, unsigned long mgmt_emfn); +int pmem_populate(struct xen_pmem_map_args *args); +int pmem_teardown(struct domain *d); #else /* !CONFIG_X86 */ static inline int pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn, @@ -36,6 +50,16 @@ pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn, { return -ENOSYS; } + +static inline int pmem_populate(struct xen_pmem_map_args *args) +{ + return -ENOSYS; +} + +static inline int pmem_teardown(struct domain *d) +{ + return -ENOSYS; +} #endif /* CONFIG_X86 */ #endif /* CONFIG_PMEM */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 0929c0b910..39057243d6 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -336,6 +336,9 @@ struct domain atomic_t shr_pages; /* number of shared pages */ atomic_t paged_pages; /* number of paged-out pages */ + spinlock_t pmem_lock; /* protect all following pmem_ fields */ + struct page_list_head pmem_page_list; /* linked list of pmem pages */ + /* Scheduling. */ void *sched_priv; /* scheduler-specific data */ struct cpupool *cpupool; diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h index 4b27ae72de..aea0b9376f 100644 --- a/xen/include/xsm/dummy.h +++ b/xen/include/xsm/dummy.h @@ -728,3 +728,14 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG uint32_t op) return xsm_default_action(XSM_PRIV, current->domain, NULL); } } + +#ifdef CONFIG_PMEM + +static XSM_INLINE int xsm_populate_pmem_map(XSM_DEFAULT_ARG + struct domain *d1, struct domain *d2) +{ + XSM_ASSERT_ACTION(XSM_TARGET); + return xsm_default_action(action, d1, d2); +} + +#endif /* CONFIG_PMEM */ diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h index 2cf7ac10db..8f62b21739 100644 --- a/xen/include/xsm/xsm.h +++ b/xen/include/xsm/xsm.h @@ -182,6 +182,10 @@ struct xsm_operations { int (*dm_op) (struct domain *d); #endif int (*xen_version) (uint32_t cmd); + +#ifdef CONFIG_PMEM + int (*populate_pmem_map) (struct domain *d1, struct domain *d2); +#endif /* CONFIG_PMEM */ }; #ifdef CONFIG_XSM @@ -705,6 +709,14 @@ static inline int xsm_xen_version (xsm_default_t def, uint32_t op) return xsm_ops->xen_version(op); } +#ifdef CONFIG_PMEM +static inline int xsm_populate_pmem_map(xsm_default_t def, + struct domain *d1, struct domain *d2) +{ + return xsm_ops->populate_pmem_map(d1, d2); +} +#endif /* CONFIG_PMEM */ + #endif /* XSM_NO_WRAPPERS */ #ifdef CONFIG_MULTIBOOT diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c index 3cb5492dd3..dde68ecf59 100644 --- a/xen/xsm/dummy.c +++ b/xen/xsm/dummy.c @@ -159,4 +159,8 @@ void __init xsm_fixup_ops (struct xsm_operations *ops) set_to_dummy_if_null(ops, dm_op); #endif set_to_dummy_if_null(ops, xen_version); + +#ifdef CONFIG_PMEM + set_to_dummy_if_null(ops, populate_pmem_map); +#endif } diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c index e3c77bbe3f..582ddf81d3 100644 --- a/xen/xsm/flask/hooks.c +++ b/xen/xsm/flask/hooks.c @@ -1659,6 +1659,15 @@ static int flask_xen_version (uint32_t op) } } +#ifdef CONFIG_PMEM + +static int flask_populate_pmem_map(struct domain *d1, struct domain *d2) +{ + return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__POPULATE_PMEM_MAP); +} + +#endif /* CONFIG_PMEM */ + long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op); int compat_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op); @@ -1794,6 +1803,10 @@ static struct xsm_operations flask_ops = { .dm_op = flask_dm_op, #endif .xen_version = flask_xen_version, + +#ifdef CONFIG_PMEM + .populate_pmem_map = flask_populate_pmem_map, +#endif /* CONFIG_PMEM */ }; void __init flask_init(const void *policy_buffer, size_t policy_size) diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors index a8ddd7ca84..44cbd66f4d 100644 --- a/xen/xsm/flask/policy/access_vectors +++ b/xen/xsm/flask/policy/access_vectors @@ -385,6 +385,8 @@ class mmu # Allow a privileged domain to install a map of a page it does not own. Used # for stub domain device models with the PV framebuffer. target_hack +# XENMEM_populate_pmem_map + populate_pmem_map } # control of the paging_domctl split by subop -- 2.12.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |