[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Extended the physinfo sysctl to export NUMA cpu_to_node topology info.



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1183734727 -3600
# Node ID 89d2192942befb0daf56e730be61d3a3c06926dd
# Parent  538c3d8aa4b14833174423ec506284279d5a33ab
Extended the physinfo sysctl to export NUMA cpu_to_node topology info.
Print this in 'xm info'.
Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 tools/libxc/xc_misc.c                               |    2 
 tools/libxc/xenctrl.h                               |    1 
 tools/python/xen/lowlevel/xc/xc.c                   |   69 ++++++++++++++------
 tools/python/xen/xend/XendNode.py                   |   50 ++++++++++++++
 tools/xenmon/xenbaked.c                             |    2 
 tools/xenstat/libxenstat/src/xenstat.c              |    2 
 tools/xentrace/xentrace.c                           |    2 
 tools/xm-test/tests/info/02_info_compiledata_pos.py |    4 -
 xen/arch/ia64/xen/dom0_ops.c                        |   46 +------------
 xen/arch/powerpc/sysctl.c                           |    6 -
 xen/arch/x86/sysctl.c                               |   32 +++++++--
 xen/include/public/sysctl.h                         |   20 +++++
 xen/include/xen/cpumask.h                           |    9 ++
 13 files changed, 167 insertions(+), 78 deletions(-)

diff -r 538c3d8aa4b1 -r 89d2192942be tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Fri Jul 06 15:01:20 2007 +0100
+++ b/tools/libxc/xc_misc.c     Fri Jul 06 16:12:07 2007 +0100
@@ -59,6 +59,8 @@ int xc_physinfo(int xc_handle,
     DECLARE_SYSCTL;
 
     sysctl.cmd = XEN_SYSCTL_physinfo;
+
+    memcpy(&sysctl.u.physinfo, put_info, sizeof(*put_info));
 
     if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 )
         return ret;
diff -r 538c3d8aa4b1 -r 89d2192942be tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Jul 06 15:01:20 2007 +0100
+++ b/tools/libxc/xenctrl.h     Fri Jul 06 16:12:07 2007 +0100
@@ -473,6 +473,7 @@ int xc_send_debug_keys(int xc_handle, ch
 int xc_send_debug_keys(int xc_handle, char *keys);
 
 typedef xen_sysctl_physinfo_t xc_physinfo_t;
+typedef uint32_t xc_cpu_to_node_t;
 int xc_physinfo(int xc_handle,
                 xc_physinfo_t *info);
 
diff -r 538c3d8aa4b1 -r 89d2192942be tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Jul 06 15:01:20 2007 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jul 06 16:12:07 2007 +0100
@@ -680,33 +680,62 @@ static PyObject *pyxc_pages_to_kib(XcObj
 
 static PyObject *pyxc_physinfo(XcObject *self)
 {
+#define MAX_CPU_ID 255
     xc_physinfo_t info;
     char cpu_cap[128], *p=cpu_cap, *q=cpu_cap;
-    int i;
-    
+    int i, j, max_cpu_id;
+    PyObject *ret_obj, *node_to_cpu_obj;
+    xc_cpu_to_node_t map[MAX_CPU_ID];
+
+    set_xen_guest_handle(info.cpu_to_node, map);
+    info.max_cpu_id = MAX_CPU_ID;
+
     if ( xc_physinfo(self->xc_handle, &info) != 0 )
         return pyxc_error_to_exception();
 
-    *q=0;
-    for(i=0;i<sizeof(info.hw_cap)/4;i++)
+    *q = 0;
+    for ( i = 0; i < sizeof(info.hw_cap)/4; i++ )
     {
-        p+=sprintf(p,"%08x:",info.hw_cap[i]);
-        if(info.hw_cap[i])
-            q=p;
+        p += sprintf(p, "%08x:", info.hw_cap[i]);
+        if ( info.hw_cap[i] )
+            q = p;
     }
-    if(q>cpu_cap)
-        *(q-1)=0;
-
-    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
-                         "threads_per_core", info.threads_per_core,
-                         "cores_per_socket", info.cores_per_socket,
-                         "sockets_per_node", info.sockets_per_node,
-                         "nr_nodes",         info.nr_nodes,
-                         "total_memory",     pages_to_kib(info.total_pages),
-                         "free_memory",      pages_to_kib(info.free_pages),
-                         "scrub_memory",     pages_to_kib(info.scrub_pages),
-                         "cpu_khz",          info.cpu_khz,
-                         "hw_caps",          cpu_cap);
+    if ( q > cpu_cap )
+        *(q-1) = 0;
+
+    ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
+                            "nr_nodes",         info.nr_nodes,
+                            "max_cpu_id",       info.max_cpu_id,
+                            "threads_per_core", info.threads_per_core,
+                            "cores_per_socket", info.cores_per_socket,
+                            "sockets_per_node", info.sockets_per_node,
+                            "total_memory",     pages_to_kib(info.total_pages),
+                            "free_memory",      pages_to_kib(info.free_pages),
+                            "scrub_memory",     pages_to_kib(info.scrub_pages),
+                            "cpu_khz",          info.cpu_khz,
+                            "hw_caps",          cpu_cap);
+
+    max_cpu_id = info.max_cpu_id;
+    if ( max_cpu_id > MAX_CPU_ID )
+        max_cpu_id = MAX_CPU_ID;
+
+    /* Construct node-to-cpu lists. */
+    node_to_cpu_obj = PyList_New(0);
+
+    /* Make a list for each node. */
+    for ( i = 0; i < info.nr_nodes; i++ )
+    {
+        PyObject *cpus = PyList_New(0);
+        for ( j = 0; j <= max_cpu_id; j++ )
+            if ( i == map[j])
+                PyList_Append(cpus, PyInt_FromLong(j));
+        PyList_Append(node_to_cpu_obj, cpus); 
+    }
+
+    PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
+ 
+    return ret_obj;
+#undef MAX_CPU_ID
 }
 
 static PyObject *pyxc_xeninfo(XcObject *self)
diff -r 538c3d8aa4b1 -r 89d2192942be tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Fri Jul 06 15:01:20 2007 +0100
+++ b/tools/python/xen/xend/XendNode.py Fri Jul 06 16:12:07 2007 +0100
@@ -533,6 +533,54 @@ class XendNode:
                 ['version', ver],
                 ['machine', mch]]
 
+    def list_to_rangepairs(self,cmap):
+            cmap.sort()
+            pairs = []
+            x = y = 0
+            for i in range(0,len(cmap)):
+                try:
+                    if ((cmap[y+1] - cmap[i]) > 1):
+                        pairs.append((cmap[x],cmap[y]))
+                        x = y = i+1
+                    else:
+                        y = y + 1
+                # if we go off the end, then just add x to y
+                except IndexError:
+                    pairs.append((cmap[x],cmap[y]))
+
+            return pairs
+
+    def format_pairs(self,pairs):
+            if not pairs:
+                return "no cpus"
+            out = ""
+            for f,s in pairs:
+                if (f==s):
+                    out += '%d'%f
+                else:
+                    out += '%d-%d'%(f,s)
+                out += ','
+            # trim trailing ','
+            return out[:-1]
+
+    def list_to_strrange(self,list):
+        return self.format_pairs(self.list_to_rangepairs(list))
+
+    def format_node_to_cpu(self, pinfo):
+        str=''
+        whitespace=''
+        try:
+            node_to_cpu=pinfo['node_to_cpu']
+            for i in range(0, pinfo['nr_nodes']):
+                str+='%snode%d:%s\n' % (whitespace,
+                                        i, 
+                                      self.list_to_strrange(node_to_cpu[i]))
+                whitespace='%25s' % ''        
+        except:
+            str='none\n'
+        return str[:-1];
+
+
     def physinfo(self):
         info = self.xc.physinfo()
 
@@ -545,6 +593,7 @@ class XendNode:
         # physinfo is in KiB, need it in MiB
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
+        info['node_to_cpu']  = self.format_node_to_cpu(info)
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
@@ -555,6 +604,7 @@ class XendNode:
                       'hw_caps',
                       'total_memory',
                       'free_memory',
+                      'node_to_cpu'
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]
diff -r 538c3d8aa4b1 -r 89d2192942be tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Fri Jul 06 15:01:20 2007 +0100
+++ b/tools/xenmon/xenbaked.c   Fri Jul 06 16:12:07 2007 +0100
@@ -444,7 +444,7 @@ struct t_rec **init_rec_ptrs(struct t_bu
  */
 unsigned int get_num_cpus(void)
 {
-    xc_physinfo_t physinfo;
+    xc_physinfo_t physinfo = { 0 };
     int xc_handle = xc_interface_open();
     int ret;
 
diff -r 538c3d8aa4b1 -r 89d2192942be tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Fri Jul 06 15:01:20 2007 +0100
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Fri Jul 06 16:12:07 2007 +0100
@@ -135,7 +135,7 @@ xenstat_node *xenstat_get_node(xenstat_h
 {
 #define DOMAIN_CHUNK_SIZE 256
        xenstat_node *node;
-       xc_physinfo_t physinfo;
+       xc_physinfo_t physinfo = { 0 };
        xc_domaininfo_t domaininfo[DOMAIN_CHUNK_SIZE];
        unsigned int new_domains;
        unsigned int i;
diff -r 538c3d8aa4b1 -r 89d2192942be tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Fri Jul 06 15:01:20 2007 +0100
+++ b/tools/xentrace/xentrace.c Fri Jul 06 16:12:07 2007 +0100
@@ -256,7 +256,7 @@ struct t_rec **init_rec_ptrs(struct t_bu
  */
 unsigned int get_num_cpus(void)
 {
-    xc_physinfo_t physinfo;
+    xc_physinfo_t physinfo = { 0 };
     int xc_handle = xc_interface_open();
     int ret;
     
diff -r 538c3d8aa4b1 -r 89d2192942be 
tools/xm-test/tests/info/02_info_compiledata_pos.py
--- a/tools/xm-test/tests/info/02_info_compiledata_pos.py       Fri Jul 06 
15:01:20 2007 +0100
+++ b/tools/xm-test/tests/info/02_info_compiledata_pos.py       Fri Jul 06 
16:12:07 2007 +0100
@@ -18,9 +18,7 @@ for line in lines:
 for line in lines:
     pieces = line.split(" : ", 1)
 
-    if len(pieces) < 2:
-        FAIL("Found invalid line: [%s]" % line)
-    else:
+    if len(pieces) > 1:
         map[pieces[0]] = pieces[1]
 
 for field in ["cores_per_socket", "threads_per_core", "cpu_mhz",
diff -r 538c3d8aa4b1 -r 89d2192942be xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c      Fri Jul 06 15:01:20 2007 +0100
+++ b/xen/arch/ia64/xen/dom0_ops.c      Fri Jul 06 16:12:07 2007 +0100
@@ -240,8 +240,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
     {
 #ifdef IA64_NUMA_PHYSINFO
         int i;
-        node_data_t *chunks;
-        u64 *map, cpu_to_node_map[MAX_NUMNODES];
+        uint32_t *map, cpu_to_node_map[NR_CPUS];
 #endif
 
         xen_sysctl_physinfo_t *pi = &op->u.physinfo;
@@ -250,11 +249,9 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
             cpus_weight(cpu_sibling_map[0]);
         pi->cores_per_socket =
             cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
-        pi->sockets_per_node = 
-            num_online_cpus() / cpus_weight(cpu_core_map[0]);
-#ifndef IA64_NUMA_PHYSINFO
-        pi->nr_nodes         = 1; 
-#endif
+        pi->nr_nodes         = num_online_nodes();
+        pi->sockets_per_node = num_online_cpus() / 
+            (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core);
         pi->total_pages      = total_pages; 
         pi->free_pages       = avail_domheap_pages();
         pi->scrub_pages      = avail_scrub_pages();
@@ -264,41 +261,6 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
         ret = 0;
 
 #ifdef IA64_NUMA_PHYSINFO
-        /* fetch memory_chunk pointer from guest */
-        get_xen_guest_handle(chunks, pi->memory_chunks);
-
-        printk("chunks=%p, num_node_memblks=%u\n", chunks, num_node_memblks);
-        /* if it is set, fill out memory chunk array */
-        if (chunks != NULL) {
-            if (num_node_memblks == 0) {
-                /* Non-NUMA machine.  Put pseudo-values.  */
-                node_data_t data;
-                data.node_start_pfn = 0;
-                data.node_spanned_pages = total_pages;
-                data.node_id = 0;
-                /* copy memory chunk structs to guest */
-                if (copy_to_guest_offset(pi->memory_chunks, 0, &data, 1)) {
-                    ret = -EFAULT;
-                    break;
-                }
-            } else {
-                for (i = 0; i < num_node_memblks && i < PUBLIC_MAXCHUNKS; i++) 
{
-                    node_data_t data;
-                    data.node_start_pfn = node_memblk[i].start_paddr >>
-                                          PAGE_SHIFT;
-                    data.node_spanned_pages = node_memblk[i].size >> 
PAGE_SHIFT;
-                    data.node_id = node_memblk[i].nid;
-                    /* copy memory chunk structs to guest */
-                    if (copy_to_guest_offset(pi->memory_chunks, i, &data, 1)) {
-                        ret = -EFAULT;
-                        break;
-                    }
-                }
-            }
-        }
-        /* set number of notes */
-        pi->nr_nodes = num_online_nodes();
-
         /* fetch cpu_to_node pointer from guest */
         get_xen_guest_handle(map, pi->cpu_to_node);
 
diff -r 538c3d8aa4b1 -r 89d2192942be xen/arch/powerpc/sysctl.c
--- a/xen/arch/powerpc/sysctl.c Fri Jul 06 15:01:20 2007 +0100
+++ b/xen/arch/powerpc/sysctl.c Fri Jul 06 16:12:07 2007 +0100
@@ -45,10 +45,10 @@ long arch_do_sysctl(struct xen_sysctl *s
             cpus_weight(cpu_sibling_map[0]);
         pi->cores_per_socket =
             cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
-        pi->sockets_per_node = 
-            num_online_cpus() / cpus_weight(cpu_core_map[0]);
+        pi->sockets_per_node = num_online_cpus() / 
+            (num_online_nodes() * pi->cores_per_socket * pi->threads_per_core);
 
-        pi->nr_nodes         = 1;
+        pi->nr_nodes         = num_online_nodes();
         pi->total_pages      = total_pages;
         pi->free_pages       = avail_domheap_pages();
         pi->cpu_khz          = cpu_khz;
diff -r 538c3d8aa4b1 -r 89d2192942be xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c     Fri Jul 06 15:01:20 2007 +0100
+++ b/xen/arch/x86/sysctl.c     Fri Jul 06 16:12:07 2007 +0100
@@ -23,6 +23,10 @@
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
 #include <asm/processor.h>
+#include <asm/numa.h>
+#include <xen/nodemask.h>
+
+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
 
 long arch_do_sysctl(
     struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
@@ -34,25 +38,41 @@ long arch_do_sysctl(
 
     case XEN_SYSCTL_physinfo:
     {
+        uint32_t i, max_array_ent;
+
         xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo;
 
         pi->threads_per_core =
             cpus_weight(cpu_sibling_map[0]);
         pi->cores_per_socket =
             cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
-        pi->sockets_per_node = 
-            num_online_cpus() / cpus_weight(cpu_core_map[0]);
+        pi->nr_nodes = num_online_nodes();
+        pi->sockets_per_node = num_online_cpus() / 
+            (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core);
 
-        pi->nr_nodes         = 1;
         pi->total_pages      = total_pages;
         pi->free_pages       = avail_domheap_pages();
         pi->scrub_pages      = avail_scrub_pages();
         pi->cpu_khz          = cpu_khz;
         memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
         memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
-        ret = 0;
-        if ( copy_to_guest(u_sysctl, sysctl, 1) )
-            ret = -EFAULT;
+
+        max_array_ent = pi->max_cpu_id;
+        pi->max_cpu_id = last_cpu(cpu_online_map);
+        max_array_ent = min_t(uint32_t, max_array_ent, pi->max_cpu_id);
+
+        ret = -EFAULT;
+        if ( !guest_handle_is_null(pi->cpu_to_node) )
+        {
+            for ( i = 0; i <= max_array_ent; i++ )
+            {
+                uint32_t node = cpu_online(i) ? cpu_to_node(i) : ~0u;
+                if ( copy_to_guest_offset(pi->cpu_to_node, i, &node, 1) )
+                    break;
+            }
+        }
+
+        ret = copy_to_guest(u_sysctl, sysctl, 1) ? -EFAULT : 0;
     }
     break;
     
diff -r 538c3d8aa4b1 -r 89d2192942be xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Fri Jul 06 15:01:20 2007 +0100
+++ b/xen/include/public/sysctl.h       Fri Jul 06 16:12:07 2007 +0100
@@ -34,7 +34,7 @@
 #include "xen.h"
 #include "domctl.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000003
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000004
 
 /*
  * Read console content from Xen buffer ring.
@@ -76,6 +76,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_
  */
 #define XEN_SYSCTL_physinfo          3
 struct xen_sysctl_physinfo {
+    /* IN variables. */
     uint32_t threads_per_core;
     uint32_t cores_per_socket;
     uint32_t sockets_per_node;
@@ -85,6 +86,23 @@ struct xen_sysctl_physinfo {
     uint64_aligned_t free_pages;
     uint64_aligned_t scrub_pages;
     uint32_t hw_cap[8];
+
+    /* IN/OUT variables. */
+    /*
+     * IN: maximum addressable entry in the caller-provided cpu_to_node array.
+     * OUT: largest cpu identifier in the system.
+     * If OUT is greater than IN then the cpu_to_node array is truncated!
+     */
+    uint32_t max_cpu_id;
+    /*
+     * If not NULL, this array is filled with node identifier for each cpu.
+     * If a cpu has no node information (e.g., cpu not present) then the
+     * sentinel value ~0u is written.
+     * The size of this array is specified by the caller in @max_cpu_id.
+     * If the actual @max_cpu_id is smaller than the array then the trailing
+     * elements of the array will not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint32_t) cpu_to_node;
 };
 typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);
diff -r 538c3d8aa4b1 -r 89d2192942be xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Fri Jul 06 15:01:20 2007 +0100
+++ b/xen/include/xen/cpumask.h Fri Jul 06 16:12:07 2007 +0100
@@ -220,6 +220,15 @@ static inline int __next_cpu(int n, cons
 static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
 {
        return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
+}
+
+#define last_cpu(src) __last_cpu(&(src), NR_CPUS)
+static inline int __last_cpu(const cpumask_t *srcp, int nbits)
+{
+       int cpu, pcpu = NR_CPUS;
+       for (cpu = first_cpu(*srcp); cpu < NR_CPUS; cpu = next_cpu(cpu, *srcp))
+               pcpu = cpu;
+       return pcpu;
 }
 
 #define cpumask_of_cpu(cpu)                                            \

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.