[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] 5/7 xen: Add basic NUMA support - Physinfo Stats
This patch exports NUMA specific information collected by the hypervisor in the physinfo hypercall. This additional information is also integrated into the xm info command which will display the NUMA information. Here is a sample output of xm info on a Dual Opteron (2 Node, 2 CPU): root@bebop:~ # xm info system : Linux host : bebop release : 2.6.12.6-xen0-smp version : #1 SMP Fri Dec 16 10:44:58 CST 2005 machine : i686 nr_cpus : 2 nr_nodes : 2 sockets_per_node : 2 cores_per_socket : 1 threads_per_core : 1 cpu_mhz : 2193 hw_caps : 078bfbff:e1d3fbff:00000000:00000010 total_memory : 3583 free_memory : 2907 mem_chunks : node0:0x0000000000000000-0x000000000009ffff node0:0x0000000000100000-0x000000007fffffff node1:0x0000000080000000-0x00000000dfffffff node_to_cpu : node0:0 node1:1 xen_major : 3 xen_minor : 0 xen_extra : .0 xen_caps : xen-3.0-x86_32 platform_params : virt_start=0xfc000000 xen_changeset : Fri Dec 16 10:34:22 2005 -0500 8396:652d00e358e4 cc_compiler : gcc version 3.3.5 (Debian 1:3.3.5-8ubuntu2) cc_compile_by : rharper cc_compile_domain : localdomain cc_compile_date : Fri Dec 16 13:51:48 CST 2005 nr_nodes : 2 Note that this is now calculated from num_online_nodes, rather than a hard-coded value of 1. mem_chunks : node0:0x0000000000000000-0x000000000009ffff node0:0x0000000000100000-0x000000007fffffff node1:0x0000000080000000-0x00000000dfffffff We display the 64-bit address of each memory chunk and which node to which it belongs. node_to_cpu : node0:0 node1:1 This provides node to cpu mapping. The cpu value is a collapsed range, so for example, on a two node 32-way, the node_to_cpu value might look like: node_to_cpu : node0:0-15 node1:16-31 -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 ryanh@xxxxxxxxxx diffstat output: b/xen/include/public/numa_structs.h | 19 ++++++++ tools/libxc/xc_misc.c | 3 + tools/libxc/xenctrl.h | 3 + tools/python/xen/lowlevel/xc/xc.c | 64 +++++++++++++++++++++++++++-- tools/python/xen/xend/XendNode.py | 67 ++++++++++++++++++++++++++++++ xen/arch/x86/dom0_ops.c | 78 +++++++++++++++++++++++++++++++++++- xen/include/public/dom0_ops.h | 4 + xen/include/xen/numa.h | 7 --- 8 files changed, 232 insertions(+), 13 deletions(-) Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx> Signed-off-by: Ryan Grimm <grimm@xxxxxxxxxx> --- diff -r ce4e724a0cdd -r a9dc1db4006c tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Wed Dec 14 22:49:59 2005 +++ b/tools/libxc/xc_misc.c Wed Dec 14 23:03:37 2005 @@ -56,6 +56,9 @@ op.cmd = DOM0_PHYSINFO; op.interface_version = DOM0_INTERFACE_VERSION; + /* set pointers to caller's so memcpy doesn't clobber them */ + op.u.physinfo.memory_chunks = put_info->memory_chunks; + op.u.physinfo.node_to_cpu = put_info->node_to_cpu; if ( (ret = do_dom0_op(xc_handle, &op)) != 0 ) return ret; diff -r ce4e724a0cdd -r a9dc1db4006c tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Dec 14 22:49:59 2005 +++ b/tools/libxc/xenctrl.h Wed Dec 14 23:03:37 2005 @@ -20,6 +20,7 @@ #include <xen/sched_ctl.h> #include <xen/memory.h> #include <xen/acm.h> +#include <xen/numa_structs.h> #ifdef __ia64__ #define XC_PAGE_SHIFT 14 @@ -350,6 +351,8 @@ int clear); typedef dom0_physinfo_t xc_physinfo_t; +typedef struct node_memory_chunk_s xc_memory_chunk_t; +typedef uint64_t xc_node_to_cpu_t; int xc_physinfo(int xc_handle, xc_physinfo_t *info); diff -r ce4e724a0cdd -r a9dc1db4006c tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Wed Dec 14 22:49:59 2005 +++ b/tools/python/xen/lowlevel/xc/xc.c Wed Dec 14 23:03:37 2005 @@ -597,8 +597,19 @@ { xc_physinfo_t info; char cpu_cap[128], *p=cpu_cap, *q=cpu_cap; - int i; - + int i,j; + PyObject *ret_obj, *memchunk_obj, *node_to_cpu_obj; + + /* make space for mem chunks */ + info.memory_chunks = + (xc_memory_chunk_t *)malloc( sizeof(xc_memory_chunk_t) * + PUBLIC_MAXCHUNKS ); + + /* make space for node_to_cpu mapping */ + info.node_to_cpu = + (xc_node_to_cpu_t *)malloc( sizeof(xc_node_to_cpu_t) * + PUBLIC_MAX_NUMNODES ); + if ( xc_physinfo(self->xc_handle, &info) != 0 ) return PyErr_SetFromErrno(xc_error); @@ -611,16 +622,59 @@ } if(q>cpu_cap) *(q-1)=0; - - return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i,s:s}", + + ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:l,s:l,s:i,s:s}", "threads_per_core", info.threads_per_core, "cores_per_socket", info.cores_per_socket, "sockets_per_node", info.sockets_per_node, - "nr_nodes", info.nr_nodes, "total_memory", pages_to_mb(info.total_pages), "free_memory", pages_to_mb(info.free_pages), "cpu_khz", info.cpu_khz, "hw_caps", cpu_cap); + + /* memchunks */ + memchunk_obj = PyList_New(0); + + /* build list of each memchunk's attributes */ + for ( i=0; i<info.nr_chunks; i++ ) + { + PyList_Append(memchunk_obj, + Py_BuildValue("{s:i,s:K,s:K}", + "node" , info.memory_chunks[i].nid, + "start_paddr", info.memory_chunks[i].start_paddr, + "end_paddr" , info.memory_chunks[i].end_paddr)); + } + /* add list of attributes and nr_chunks to physinfo dictionary */ + PyDict_SetItemString(ret_obj, "mem_chunks", memchunk_obj); + PyDict_SetItemString(ret_obj, "nr_chunks", + Py_BuildValue("i", info.nr_chunks)); + + /* node to cpu mappings */ + node_to_cpu_obj = PyList_New(0); + /* build list of node to cpu mappings */ + for ( i=0; i<info.nr_nodes; i++ ) + { + cpumap_t cpumap = (cpumap_t)info.node_to_cpu[i]; + PyObject *cpus = PyList_New(0); + + for ( j=0; cpumap != 0; j++ ) + { + if ( cpumap & 1 ) + PyList_Append(cpus, PyInt_FromLong(j)); + cpumap >>=1; + } + PyList_Append(node_to_cpu_obj, cpus); + } + /* add list of node to cpu mappings and nr_nodes to physinfo dictionary */ + PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj); + PyDict_SetItemString(ret_obj, "nr_nodes", + Py_BuildValue("i", info.nr_nodes)); + + /* free malloc'd memory */ + free(info.memory_chunks); + free(info.node_to_cpu); + + return ret_obj; } static PyObject *pyxc_xeninfo(XcObject *self) diff -r ce4e724a0cdd -r a9dc1db4006c tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Wed Dec 14 22:49:59 2005 +++ b/tools/python/xen/xend/XendNode.py Wed Dec 14 23:03:37 2005 @@ -56,6 +56,69 @@ ['version', ver], ['machine', mch]] + def list_to_rangepairs(self,cmap): + cmap.sort() + pairs = [] + x = y = 0 + for i in range(0,len(cmap)): + try: + if ((cmap[y+1] - cmap[i]) > 1): + pairs.append((cmap[x],cmap[y])) + x = y = i+1 + else: + y = y + 1 + # if we go off the end, then just add x to y + except IndexError: + pairs.append((cmap[x],cmap[y])) + + return pairs + + def format_pairs(self,pairs): + if not pairs: + return "no cpus" + out = "" + for f,s in pairs: + if (f==s): + out += '%d'%f + else: + out += '%d-%d'%(f,s) + out += ',' + # trim trailing ',' + return out[:-1] + + def list_to_strrange(self,list): + return self.format_pairs(self.list_to_rangepairs(list)) + + def format_memchunks(self, pinfo): + str='' + whitespace='' + try: + chunk=pinfo['mem_chunks'] + for i in range(0, pinfo['nr_chunks']): + str+='%snode%d:0x%016x-0x%016x\n' % (whitespace, + chunk[i]['node'], + chunk[i]['start_paddr'], + chunk[i]['end_paddr']) + whitespace='%25s' % '' + except: + str='none\n' + return str[:-1] + + def format_node_to_cpu(self, pinfo): + str='' + whitespace='' + try: + node_to_cpu=pinfo['node_to_cpu'] + for i in range(0, pinfo['nr_nodes']): + str+='%snode%d:%s\n' % (whitespace, + i, + self.list_to_strrange(node_to_cpu[i])) + whitespace='%25s' % '' + except: + str='none\n' + return str[:-1]; + + def physinfo(self): info = self.xc.physinfo() @@ -64,6 +127,8 @@ info['cores_per_socket'] * info['threads_per_core']) info['cpu_mhz'] = info['cpu_khz'] / 1000 + info['mem_chunks'] = self.format_memchunks(info) + info['node_to_cpu'] = self.format_node_to_cpu(info) ITEM_ORDER = ['nr_cpus', 'nr_nodes', @@ -74,6 +139,8 @@ 'hw_caps', 'total_memory', 'free_memory', + 'mem_chunks', + 'node_to_cpu' ] return [[k, info[k]] for k in ITEM_ORDER] diff -r ce4e724a0cdd -r a9dc1db4006c xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Wed Dec 14 22:49:59 2005 +++ b/xen/arch/x86/dom0_ops.c Wed Dec 14 23:03:37 2005 @@ -21,6 +21,7 @@ #include <asm/irq.h> #include <asm/processor.h> #include <public/sched_ctl.h> +#include <xen/numa.h> #include <asm/mtrr.h> #include "mtrr/mtrr.h" @@ -180,20 +181,93 @@ case DOM0_PHYSINFO: { dom0_physinfo_t *pi = &op->u.physinfo; + int i; + u64 node_to_cpu_64[MAX_NUMNODES]; pi->threads_per_core = smp_num_siblings; pi->cores_per_socket = boot_cpu_data.x86_num_cores; pi->sockets_per_node = num_online_cpus() / (pi->threads_per_core * pi->cores_per_socket); - pi->nr_nodes = 1; pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->cpu_khz = cpu_khz; memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4); + +#ifdef CONFIG_NUMA + /* memory chunks */ + pi->nr_chunks = num_memory_chunks; + DPRINTK("num_memory_chunks:%d\n", num_memory_chunks); + for ( i=0; i<num_memory_chunks; i++ ) { + DPRINTK("node%d:%"PRIx64"\n", node_memory_chunk[i].nid, + node_memory_chunk[i].start_paddr); + DPRINTK("node%d:%"PRIx64"\n", node_memory_chunk[i].nid, + node_memory_chunk[i].end_paddr); + } + + /* node to cpu mask */ + pi->nr_nodes = nodes_detected; + for ( i=0; i<nodes_detected; i++ ) + DPRINTK("node_to_cpu:%lx\n", node_to_cpumask[i].bits[0]); + + /* copy memory chunk structs to userspace */ + ret = 0; + if ( copy_to_user(u_dom0_op->u.physinfo.memory_chunks, + node_memory_chunk, + sizeof(struct node_memory_chunk_s) * + num_memory_chunks) ) { + ret = -EFAULT; + break; + } + + /* copy cpu to node mapping to domU */ + /* converting cpumask to u64 b/c userspace doesn't know about cpumask_t + and is accepting a u64 */ + memset(node_to_cpu_64, 0, sizeof(node_to_cpu_64)); + for ( i=0; i<nodes_detected; i++) { + int j = 0; + for ( j=0; j<num_online_cpus(); j++) + if ( cpu_isset(j, node_to_cpumask[i]) ) + node_to_cpu_64[i] |= (u64)1 << j; + } + if ( copy_to_user(u_dom0_op->u.physinfo.node_to_cpu, + node_to_cpu_64, + sizeof(node_to_cpu_64[0]) * nodes_detected ) ) { + ret = -EFAULT; + break; + } +#else + /* if no CONFIG_NUMA, construct a memory chunk of all memory + * in system and node to all online cpus map */ + pi->nr_chunks = 1; + /* send over node_memory_chunk */ + struct node_memory_chunk_s chunk; + chunk.start_paddr = 0; + chunk.end_paddr = total_pages * PAGE_SIZE; + chunk.nid = 1; + chunk.pxm = 1; + ret = 0; + if ( copy_to_user(u_dom0_op->u.physinfo.memory_chunks, + &chunk, + sizeof(struct node_memory_chunk_s)) ) { + ret = -EFAULT; + break; + } + + /* create node to cpu mapping of one node to all online cpus */ + pi->nr_nodes = 1; + node_to_cpu_64[0] = 0; + for ( i=0; i<num_online_cpus(); i++) + node_to_cpu_64[0] |= (u64)1 << i; + if ( copy_to_user(u_dom0_op->u.physinfo.node_to_cpu, + node_to_cpu_64, sizeof(node_to_cpu_64[0])) ) { + ret = -EFAULT; + break; + } +#endif ret = 0; if ( copy_to_user(u_dom0_op, op, sizeof(*op)) ) - ret = -EFAULT; + ret = -EFAULT; } break; diff -r ce4e724a0cdd -r a9dc1db4006c xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Wed Dec 14 22:49:59 2005 +++ b/xen/include/public/dom0_ops.h Wed Dec 14 23:03:37 2005 @@ -13,6 +13,7 @@ #include "xen.h" #include "sched_ctl.h" +#include "numa_structs.h" /* * Make sure you increment the interface version whenever you modify this file! @@ -203,6 +204,9 @@ unsigned long total_pages; unsigned long free_pages; uint32_t hw_cap[8]; + uint32_t nr_chunks; + struct node_memory_chunk_s *memory_chunks; + cpumap_t *node_to_cpu; } dom0_physinfo_t; /* diff -r ce4e724a0cdd -r a9dc1db4006c xen/include/xen/numa.h --- a/xen/include/xen/numa.h Wed Dec 14 22:49:59 2005 +++ b/xen/include/xen/numa.h Wed Dec 14 23:03:37 2005 @@ -2,6 +2,7 @@ #define _LINUX_NUMA_H #include <xen/config.h> +#include <public/numa_structs.h> #ifdef CONFIG_DISCONTIGMEM #include <asm/numnodes.h> @@ -20,12 +21,6 @@ #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) -struct node_memory_chunk_s { - u64 start_paddr; - u64 end_paddr; - u8 pxm; // proximity domain of node - u8 nid; // which cnode contains this chunk? -}; extern struct node_memory_chunk_s node_memory_chunk[]; extern int num_memory_chunks; diff -r ce4e724a0cdd -r a9dc1db4006c xen/include/public/numa_structs.h --- /dev/null Wed Dec 14 22:49:59 2005 +++ b/xen/include/public/numa_structs.h Wed Dec 14 23:03:37 2005 @@ -0,0 +1,19 @@ +#ifndef __XEN_PUBLIC_NUMA_STRUCTS_H__ + +#define __XEN_PUBLIC_NUMA_STRUCTS_H__ + +#include "xen.h" + +/* define these for xc to use b/c MAX_NUMNODES and MAX_CHUNKS + * are not exposed in /public */ +#define PUBLIC_MAX_NUMNODES 16 +#define PUBLIC_MAXCHUNKS 32 + +struct node_memory_chunk_s { + uint64_t start_paddr; /* physical address of chunk start */ + uint64_t end_paddr; /* physical address of chunk end */ + uint8_t pxm; /* proximity domain of node */ + uint8_t nid; /* which cnode contains this chunk? */ +}; + +#endif _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |