[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 1/9] xen: vnuma topology and subop hypercalls
Define interface, structures and hypercalls for toolstack to build vnuma topology and for guests that wish to retrieve it. Two subop hypercalls introduced by patch: XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain and XENMEM_get_vnumainfo to retrieve that topology by guest. Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> --- xen/common/domain.c | 15 +++++ xen/common/domctl.c | 122 +++++++++++++++++++++++++++++++++++++ xen/common/memory.c | 75 +++++++++++++++++++++++ xen/include/public/arch-x86/xen.h | 8 +++ xen/include/public/domctl.h | 29 +++++++++ xen/include/public/memory.h | 47 +++++++++++++- xen/include/xen/domain.h | 11 ++++ xen/include/xen/sched.h | 4 ++ 8 files changed, 310 insertions(+), 1 deletion(-) diff --git a/xen/common/domain.c b/xen/common/domain.c index d7a84cf..fe96ba0 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -280,6 +280,8 @@ struct domain *domain_create( spin_lock_init(&d->pbuf_lock); + rwlock_init(&d->vnuma_rwlock); + err = -ENOMEM; if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) ) goto fail; @@ -584,6 +586,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d) return 0; } +void vnuma_destroy(struct vnuma_info *vnuma) +{ + if ( vnuma ) + { + xfree(vnuma->vmemrange); + xfree(vnuma->vcpu_to_vnode); + xfree(vnuma->vdistance); + xfree(vnuma->vnode_to_pnode); + xfree(vnuma); + } +} + int domain_kill(struct domain *d) { int rc = 0; @@ -602,6 +616,7 @@ int domain_kill(struct domain *d) evtchn_destroy(d); gnttab_release_mappings(d); tmem_destroy(d->tmem_client); + vnuma_destroy(d->vnuma); domain_set_outstanding_pages(d, 0); d->tmem_client = NULL; /* fallthrough */ diff --git a/xen/common/domctl.c b/xen/common/domctl.c index c326aba..356a3cf 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -297,6 +297,99 @@ int vcpuaffinity_params_invalid(const xen_domctl_vcpuaffinity_t *vcpuaff) guest_handle_is_null(vcpuaff->cpumap_soft.bitmap)); } +/* + * Allocates memory for vNUMA, **vnuma should be NULL. + * Caller has to make sure that domain has max_pages + * and number of vcpus set for domain. + * Verifies that single allocation does not exceed + * PAGE_SIZE. + */ +static int vnuma_alloc(struct vnuma_info **vnuma, + unsigned int nr_vnodes, + unsigned int nr_vcpus) +{ + if ( vnuma && *vnuma ) + return -EINVAL; + + if ( nr_vnodes > XEN_MAX_VNODES ) + return -EINVAL; + + /* + * If XEN_MAX_VNODES increases, these allocations + * should be split into PAGE_SIZE allocations + * due to XCA-77. + */ + *vnuma = xzalloc(struct vnuma_info); + if ( !*vnuma ) + return -ENOMEM; + + (*vnuma)->vdistance = xmalloc_array(unsigned int, nr_vnodes * nr_vnodes); + (*vnuma)->vmemrange = xmalloc_array(vmemrange_t, nr_vnodes); + (*vnuma)->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus); + (*vnuma)->vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes); + + if ( (*vnuma)->vdistance == NULL || (*vnuma)->vmemrange == NULL || + (*vnuma)->vcpu_to_vnode == NULL || (*vnuma)->vnode_to_pnode == NULL ) + { + vnuma_destroy(*vnuma); + return -ENOMEM; + } + + return 0; +} + +/* + * Construct vNUMA topology form u_vnuma struct and return + * it in dst. + */ +long vnuma_init(const struct xen_domctl_vnuma *u_vnuma, + const struct domain *d, + struct vnuma_info **dst) +{ + unsigned int nr_vnodes; + long ret = -EINVAL; + struct vnuma_info *v = NULL; + + /* If vNUMA topology already set, just exit. */ + if ( *dst ) + return ret; + + nr_vnodes = u_vnuma->nr_vnodes; + + if ( nr_vnodes == 0 ) + return ret; + + ret = vnuma_alloc(&v, nr_vnodes, d->max_vcpus); + if ( ret ) + return ret; + + ret = -EFAULT; + + if ( copy_from_guest(v->vdistance, u_vnuma->vdistance, + nr_vnodes * nr_vnodes) ) + goto vnuma_fail; + + if ( copy_from_guest(v->vmemrange, u_vnuma->vmemrange, nr_vnodes) ) + goto vnuma_fail; + + if ( copy_from_guest(v->vcpu_to_vnode, u_vnuma->vcpu_to_vnode, + d->max_vcpus) ) + goto vnuma_fail; + + if ( copy_from_guest(v->vnode_to_pnode, u_vnuma->vnode_to_pnode, + nr_vnodes) ) + goto vnuma_fail; + + v->nr_vnodes = nr_vnodes; + *dst = v; + + return 0; + + vnuma_fail: + vnuma_destroy(v); + return ret; +} + long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) { long ret = 0; @@ -967,6 +1060,35 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) } break; + case XEN_DOMCTL_setvnumainfo: + { + struct vnuma_info *v = NULL; + + ret = -EINVAL; + + if ( guest_handle_is_null(op->u.vnuma.vdistance) || + guest_handle_is_null(op->u.vnuma.vmemrange) || + guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) || + guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) { + break; + } + + ret = vnuma_init(&op->u.vnuma, d, &v); + if ( ret < 0 ) + break; + + ASSERT(v != NULL); + + /* overwrite vnuma for domain */ + write_lock(&d->vnuma_rwlock); + vnuma_destroy(d->vnuma); + d->vnuma = v; + write_unlock(&d->vnuma_rwlock); + + ret = 0; + } + break; + default: ret = arch_do_domctl(op, d, u_domctl); break; diff --git a/xen/common/memory.c b/xen/common/memory.c index c2dd31b..ad61ec0 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -969,6 +969,81 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; + case XENMEM_get_vnumainfo: + { + struct vnuma_topology_info topology; + struct domain *d; + unsigned int dom_vnodes, dom_vcpus; + + /* + * guest passes nr_vnodes and nr_vcpus thus + * we know how much memory guest has allocated. + */ + if ( copy_from_guest(&topology, arg, 1) || + guest_handle_is_null(topology.vmemrange.h) || + guest_handle_is_null(topology.vdistance.h) || + guest_handle_is_null(topology.vcpu_to_vnode.h) ) { + return -EFAULT; + } + + if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL ) + return -ESRCH; + + rc = -EOPNOTSUPP; + + read_lock(&d->vnuma_rwlock); + + if ( d->vnuma == NULL ) + goto vnumainfo_out; + + dom_vnodes = d->vnuma->nr_vnodes; + dom_vcpus = d->max_vcpus; + + if ( d->vnuma->vdistance == NULL || d->vnuma->vmemrange == NULL || + d->vnuma->vcpu_to_vnode == NULL ) + { + rc = -ENOMEM; + goto vnumainfo_out; + } + + /* + * guest nr_cpus and nr_nodes may differ from domain vnuma config. + * Check here guest nr_nodes and nr_cpus to make sure we dont overflow. + */ + rc = -ENOBUFS; + if ( topology.nr_vnodes < dom_vnodes || + topology.nr_vcpus < dom_vcpus ) + goto vnumainfo_out; + + rc = -EFAULT; + + if ( copy_to_guest(topology.vmemrange.h, d->vnuma->vmemrange, + dom_vnodes) != 0 ) + goto vnumainfo_out; + + if ( copy_to_guest(topology.vdistance.h, d->vnuma->vdistance, + dom_vnodes * dom_vnodes) != 0 ) + goto vnumainfo_out; + + if ( copy_to_guest(topology.vcpu_to_vnode.h, d->vnuma->vcpu_to_vnode, + dom_vcpus) != 0 ) + goto vnumainfo_out; + + topology.nr_vnodes = dom_vnodes; + topology.nr_vcpus = dom_vcpus; + + if ( __copy_to_guest(arg, &topology, 1) != 0 ) + goto vnumainfo_out; + + rc = 0; + + vnumainfo_out: + read_unlock(&d->vnuma_rwlock); + rcu_unlock_domain(d); + + break; + } + default: rc = arch_memory_op(cmd, arg); break; diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h index f35804b..6358cbb 100644 --- a/xen/include/public/arch-x86/xen.h +++ b/xen/include/public/arch-x86/xen.h @@ -108,6 +108,14 @@ typedef unsigned long xen_pfn_t; /* Maximum number of virtual CPUs in legacy multi-processor guests. */ #define XEN_LEGACY_MAX_VCPUS 32 +/* + * Maximum number of virtual NUMA nodes per domain. + * This restriction is related to a security advice + * XSA-77 and max xmalloc size of PAGE_SIZE. This limit + * avoids multi page allocation for vnuma. + */ +#define XEN_MAX_VNODES 32 + #ifndef __ASSEMBLY__ typedef unsigned long xen_ulong_t; diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 5b11bbf..5ee74f4 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -35,6 +35,7 @@ #include "xen.h" #include "grant_table.h" #include "hvm/save.h" +#include "memory.h" #define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a @@ -934,6 +935,32 @@ struct xen_domctl_vcpu_msrs { }; typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t); + +/* + * Use in XEN_DOMCTL_setvnumainfo to set + * vNUMA domain topology. + */ +struct xen_domctl_vnuma { + uint32_t nr_vnodes; + uint32_t _pad; + XEN_GUEST_HANDLE_64(uint) vdistance; + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; + + /* + * vnodes to physical NUMA nodes mask. + * This kept on per-domain basis for + * interested consumers, such as numa aware ballooning. + */ + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; + + /* + * memory rages for each vNUMA node + */ + XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange; +}; +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); + #endif struct xen_domctl { @@ -1008,6 +1035,7 @@ struct xen_domctl { #define XEN_DOMCTL_cacheflush 71 #define XEN_DOMCTL_get_vcpu_msrs 72 #define XEN_DOMCTL_set_vcpu_msrs 73 +#define XEN_DOMCTL_setvnumainfo 74 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -1068,6 +1096,7 @@ struct xen_domctl { struct xen_domctl_cacheflush cacheflush; struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + struct xen_domctl_vnuma vnuma; uint8_t pad[128]; } u; }; diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 2c57aa0..2c212e1 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -521,9 +521,54 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); * The zero value is appropiate. */ +/* vNUMA node memory range */ +struct vmemrange { + uint64_t start, end; +}; + +typedef struct vmemrange vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node number, distance table, + * memory ranges and vcpu mapping provided for guests. + * XENMEM_get_vnumainfo hypercall expects to see from guest + * nr_vnodes and nr_vcpus to indicate available memory. After + * filling guests structures, nr_vnodes and nr_vcpus copied + * back to guest. + */ +struct vnuma_topology_info { + /* IN */ + domid_t domid; + /* IN/OUT */ + unsigned int nr_vnodes; + unsigned int nr_vcpus; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(vmemrange_t) h; + uint64_t pad; + } vmemrange; +}; +typedef struct vnuma_topology_info vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t); + +/* + * XENMEM_get_vnumainfo used by guest to get + * vNUMA topology from hypervisor. + */ +#define XENMEM_get_vnumainfo 26 + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ -/* Next available subop number is 26 */ +/* Next available subop number is 27 */ #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index bb1c398..d29a84d 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits; extern bool_t opt_dom0_vcpus_pin; +/* vnuma topology per domain. */ +struct vnuma_info { + unsigned int nr_vnodes; + unsigned int *vdistance; + unsigned int *vcpu_to_vnode; + unsigned int *vnode_to_pnode; + struct vmemrange *vmemrange; +}; + +void vnuma_destroy(struct vnuma_info *vnuma); + #endif /* __XEN_DOMAIN_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 4575dda..5bb7153 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -452,6 +452,10 @@ struct domain nodemask_t node_affinity; unsigned int last_alloc_node; spinlock_t node_affinity_lock; + + /* vNUMA topology protected by rwlock. */ + rwlock_t vnuma_rwlock; + struct vnuma_info *vnuma; }; struct domain_setup_info -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |