[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Host Numa information in dom0



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1270653725 -3600
# Node ID 28e5409e3fb377830a5f4346fd414d3d158f3483
# Parent  f0ef396d8c334100293fcba75ee89f311811b9f2
Host Numa information in dom0

'xm info' command now also gives the cpu topology & host numa
information. This will be later used to build guest numa support.  The
patch basically changes physinfo sysctl, and adds topology_info &
numa_info sysctls, and also changes the python & libxc code
accordingly.

Signed-off-by: Nitin A Kamble <nitin.a.kamble@xxxxxxxxx>
---
 tools/libxc/xc_misc.c             |   37 ++++++
 tools/libxc/xenctrl.h             |   14 ++
 tools/python/xen/lowlevel/xc/xc.c |  215 ++++++++++++++++++++++++++------------
 tools/python/xen/xend/XendNode.py |   63 ++++++-----
 tools/python/xen/xend/balloon.py  |   14 --
 xen/arch/x86/sysctl.c             |  140 ++++++++++++++++++++++--
 xen/common/page_alloc.c           |    6 +
 xen/include/asm-x86/numa.h        |    1 
 xen/include/public/sysctl.h       |   90 ++++++++++++---
 xen/include/xen/mm.h              |    1 
 10 files changed, 447 insertions(+), 134 deletions(-)

diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/libxc/xc_misc.c     Wed Apr 07 16:22:05 2010 +0100
@@ -79,6 +79,43 @@ int xc_physinfo(int xc_handle,
 
     return 0;
 }
+
+int xc_topologyinfo(int xc_handle,
+                xc_topologyinfo_t *put_info)
+{
+    int ret;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_topologyinfo;
+
+    memcpy(&sysctl.u.topologyinfo, put_info, sizeof(*put_info));
+
+    if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 )
+        return ret;
+
+    memcpy(put_info, &sysctl.u.topologyinfo, sizeof(*put_info));
+
+    return 0;
+}
+
+int xc_numainfo(int xc_handle,
+                xc_numainfo_t *put_info)
+{
+    int ret;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_numainfo;
+
+    memcpy(&sysctl.u.numainfo, put_info, sizeof(*put_info));
+
+    if ((ret = do_sysctl(xc_handle, &sysctl)) != 0)
+        return ret;
+
+    memcpy(put_info, &sysctl.u.numainfo, sizeof(*put_info));
+
+    return 0;
+}
+
 
 int xc_sched_id(int xc_handle,
                 int *sched_id)
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/libxc/xenctrl.h     Wed Apr 07 16:22:05 2010 +0100
@@ -612,9 +612,19 @@ int xc_send_debug_keys(int xc_handle, ch
 int xc_send_debug_keys(int xc_handle, char *keys);
 
 typedef xen_sysctl_physinfo_t xc_physinfo_t;
+typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
+typedef xen_sysctl_numainfo_t xc_numainfo_t;
+
 typedef uint32_t xc_cpu_to_node_t;
-int xc_physinfo(int xc_handle,
-                xc_physinfo_t *info);
+typedef uint32_t xc_cpu_to_socket_t;
+typedef uint32_t xc_cpu_to_core_t;
+typedef uint64_t xc_node_to_memsize_t;
+typedef uint64_t xc_node_to_memfree_t;
+typedef uint32_t xc_node_to_node_dist_t;
+
+int xc_physinfo(int xc_handle, xc_physinfo_t *info);
+int xc_topologyinfo(int xc_handle, xc_topologyinfo_t *info);
+int xc_numainfo(int xc_handle, xc_numainfo_t *info);
 
 int xc_sched_id(int xc_handle,
                 int *sched_id);
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Apr 07 16:22:05 2010 +0100
@@ -1151,105 +1151,178 @@ static PyObject *pyxc_pages_to_kib(XcObj
     return PyLong_FromUnsignedLong(pages_to_kib(pages));
 }
 
-
 static PyObject *pyxc_physinfo(XcObject *self)
 {
-#define MAX_CPU_ID 255
-    xc_physinfo_t info;
+    xc_physinfo_t pinfo;
     char cpu_cap[128], virt_caps[128], *p;
-    int i, j, max_cpu_id, nr_nodes = 0;
-    uint64_t free_heap;
-    PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
-    PyObject *node_to_dma32_mem_obj;
-    xc_cpu_to_node_t map[MAX_CPU_ID + 1];
+    int i;
     const char *virtcap_names[] = { "hvm", "hvm_directio" };
 
-    set_xen_guest_handle(info.cpu_to_node, map);
-    info.max_cpu_id = MAX_CPU_ID;
-
-    if ( xc_physinfo(self->xc_handle, &info) != 0 )
+    if ( xc_physinfo(self->xc_handle, &pinfo) != 0 )
         return pyxc_error_to_exception();
 
     p = cpu_cap;
     *p = '\0';
-    for ( i = 0; i < sizeof(info.hw_cap)/4; i++ )
-        p += sprintf(p, "%08x:", info.hw_cap[i]);
+    for ( i = 0; i < sizeof(pinfo.hw_cap)/4; i++ )
+        p += sprintf(p, "%08x:", pinfo.hw_cap[i]);
     *(p-1) = 0;
 
     p = virt_caps;
     *p = '\0';
     for ( i = 0; i < 2; i++ )
-        if ( (info.capabilities >> i) & 1 )
+        if ( (pinfo.capabilities >> i) & 1 )
           p += sprintf(p, "%s ", virtcap_names[i]);
     if ( p != virt_caps )
       *(p-1) = '\0';
 
-    max_cpu_id = info.max_cpu_id;
-    if ( max_cpu_id > MAX_CPU_ID )
-        max_cpu_id = MAX_CPU_ID;
+    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}",
+                            "nr_nodes",         pinfo.nr_nodes,
+                            "threads_per_core", pinfo.threads_per_core,
+                            "cores_per_socket", pinfo.cores_per_socket,
+                            "sockets_per_node", pinfo.sockets_per_node,
+                            "nr_cpus",          pinfo.nr_cpus, 
+                            "total_memory",     
pages_to_kib(pinfo.total_pages),
+                            "free_memory",      pages_to_kib(pinfo.free_pages),
+                            "scrub_memory",     
pages_to_kib(pinfo.scrub_pages),
+                            "cpu_khz",          pinfo.cpu_khz,
+                            "hw_caps",          cpu_cap,
+                            "virt_caps",        virt_caps);
+}
+
+static PyObject *pyxc_topologyinfo(XcObject *self)
+{
+#define MAX_CPU_INDEX 255
+    xc_topologyinfo_t tinfo;
+    int i, max_cpu_index;
+    PyObject *ret_obj;
+    PyObject *cpu_to_core_obj, *cpu_to_socket_obj, *cpu_to_node_obj;
+    xc_cpu_to_core_t coremap[MAX_CPU_INDEX + 1];
+    xc_cpu_to_socket_t socketmap[MAX_CPU_INDEX + 1];
+    xc_cpu_to_node_t nodemap[MAX_CPU_INDEX + 1];
+
+
+    set_xen_guest_handle(tinfo.cpu_to_core, coremap);
+    set_xen_guest_handle(tinfo.cpu_to_socket, socketmap);
+    set_xen_guest_handle(tinfo.cpu_to_node, nodemap);
+    tinfo.max_cpu_index = MAX_CPU_INDEX;
+
+    if ( xc_topologyinfo(self->xc_handle, &tinfo) != 0 )
+        return pyxc_error_to_exception();
+
+    max_cpu_index = tinfo.max_cpu_index;
+    if ( max_cpu_index > MAX_CPU_INDEX )
+        max_cpu_index = MAX_CPU_INDEX;
+
+    /* Construct cpu-to-* lists. */
+    cpu_to_core_obj = PyList_New(0);
+    cpu_to_socket_obj = PyList_New(0);
+    cpu_to_node_obj = PyList_New(0);
+    for ( i = 0; i < max_cpu_index; i++ )
+    {
+        PyObject *pyint;
+
+        pyint = PyInt_FromLong(coremap[i]);
+        PyList_Append(cpu_to_core_obj, pyint);
+        Py_DECREF(pyint);
+
+        pyint = PyInt_FromLong(socketmap[i]);
+        PyList_Append(cpu_to_socket_obj, pyint);
+        Py_DECREF(pyint);
+
+        pyint = PyInt_FromLong(nodemap[i]);
+        PyList_Append(cpu_to_node_obj, pyint);
+        Py_DECREF(pyint);
+    }
+
+    ret_obj = Py_BuildValue("{s:i}", "max_cpu_index", max_cpu_index);
+
+    PyDict_SetItemString(ret_obj, "cpu_to_core", cpu_to_core_obj);
+    Py_DECREF(cpu_to_core_obj);
+
+    PyDict_SetItemString(ret_obj, "cpu_to_socket", cpu_to_socket_obj);
+    Py_DECREF(cpu_to_socket_obj);
+ 
+    PyDict_SetItemString(ret_obj, "cpu_to_node", cpu_to_node_obj);
+    Py_DECREF(cpu_to_node_obj);
+ 
+    return ret_obj;
+#undef MAX_CPU_INDEX
+}
+
+static PyObject *pyxc_numainfo(XcObject *self)
+{
+#define MAX_NODE_INDEX 31
+    xc_numainfo_t ninfo;
+    int i, j, max_node_index;
+    uint64_t free_heap;
+    PyObject *ret_obj;
+    PyObject *node_to_memsize_obj, *node_to_memfree_obj;
+    PyObject *node_to_dma32_mem_obj, *node_to_node_dist_obj;
+    xc_node_to_memsize_t node_memsize[MAX_NODE_INDEX + 1];
+    xc_node_to_memfree_t node_memfree[MAX_NODE_INDEX + 1];
+    xc_node_to_node_dist_t nodes_dist[(MAX_NODE_INDEX * MAX_NODE_INDEX) + 1];
+
+    set_xen_guest_handle(ninfo.node_to_memsize, node_memsize);
+    set_xen_guest_handle(ninfo.node_to_memfree, node_memfree);
+    set_xen_guest_handle(ninfo.node_to_node_distance, nodes_dist);
+    ninfo.max_node_index = MAX_NODE_INDEX;
+    if( xc_numainfo(self->xc_handle, &ninfo) != 0 )
+        return pyxc_error_to_exception();
+
+    max_node_index = ninfo.max_node_index;
+    if ( max_node_index > MAX_NODE_INDEX )
+        max_node_index = MAX_NODE_INDEX;
 
     /* Construct node-to-* lists. */
-    node_to_cpu_obj = PyList_New(0);
-    node_to_memory_obj = PyList_New(0);
+    node_to_memsize_obj = PyList_New(0);
+    node_to_memfree_obj = PyList_New(0);
     node_to_dma32_mem_obj = PyList_New(0);
-    for ( i = 0; i <= info.max_node_id; i++ )
+    node_to_node_dist_obj = PyList_New(0);
+    for ( i = 0; i < max_node_index; i++ )
     {
-        int node_exists = 0;
         PyObject *pyint;
 
-        /* CPUs. */
-        PyObject *cpus = PyList_New(0);
-        for ( j = 0; j <= max_cpu_id; j++ )
-        {
-            if ( i != map[j] )
-                continue;
-            pyint = PyInt_FromLong(j);
-            PyList_Append(cpus, pyint);
-            Py_DECREF(pyint);
-            node_exists = 1;
-        }
-        PyList_Append(node_to_cpu_obj, cpus); 
-        Py_DECREF(cpus);
-
-        /* Memory. */
-        xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
-        node_exists = node_exists || (free_heap != 0);
-        pyint = PyInt_FromLong(free_heap / 1024);
-        PyList_Append(node_to_memory_obj, pyint);
+        /* Total Memory */
+        pyint = PyInt_FromLong(node_memsize[i] >> 20); /* MB */
+        PyList_Append(node_to_memsize_obj, pyint);
+        Py_DECREF(pyint);
+
+        /* Free Memory */
+        pyint = PyInt_FromLong(node_memfree[i] >> 20); /* MB */
+        PyList_Append(node_to_memfree_obj, pyint);
         Py_DECREF(pyint);
 
         /* DMA memory. */
         xc_availheap(self->xc_handle, 0, 32, i, &free_heap);
-        pyint = PyInt_FromLong(free_heap / 1024);
+        pyint = PyInt_FromLong(free_heap >> 20); /* MB */
         PyList_Append(node_to_dma32_mem_obj, pyint);
         Py_DECREF(pyint);
 
-        if ( node_exists )
-            nr_nodes++;
-    }
-
-    ret_obj = 
Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}",
-                            "nr_nodes",         nr_nodes,
-                            "max_node_id",      info.max_node_id,
-                            "max_cpu_id",       info.max_cpu_id,
-                            "threads_per_core", info.threads_per_core,
-                            "cores_per_socket", info.cores_per_socket,
-                            "nr_cpus",          info.nr_cpus, 
-                            "total_memory",     pages_to_kib(info.total_pages),
-                            "free_memory",      pages_to_kib(info.free_pages),
-                            "scrub_memory",     pages_to_kib(info.scrub_pages),
-                            "cpu_khz",          info.cpu_khz,
-                            "hw_caps",          cpu_cap,
-                            "virt_caps",        virt_caps);
-    PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
-    Py_DECREF(node_to_cpu_obj);
-    PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
-    Py_DECREF(node_to_memory_obj);
+        /* Node to Node Distance */
+        for ( j = 0; j < ninfo.max_node_index; j++ )
+        {
+            pyint = PyInt_FromLong(nodes_dist[(i * ninfo.max_node_index) + j]);
+            PyList_Append(node_to_node_dist_obj, pyint);
+            Py_DECREF(pyint);
+        }
+    }
+
+    ret_obj = Py_BuildValue("{s:i}", "max_node_index", max_node_index);
+
+    PyDict_SetItemString(ret_obj, "node_memsize", node_to_memsize_obj);
+    Py_DECREF(node_to_memsize_obj);
+
+    PyDict_SetItemString(ret_obj, "node_memfree", node_to_memfree_obj);
+    Py_DECREF(node_to_memfree_obj);
+
     PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj);
     Py_DECREF(node_to_dma32_mem_obj);
+
+    PyDict_SetItemString(ret_obj, "node_to_node_dist", node_to_node_dist_obj);
+    Py_DECREF(node_to_node_dist_obj);
  
     return ret_obj;
-#undef MAX_CPU_ID
+#undef MAX_NODE_INDEX
 }
 
 static PyObject *pyxc_xeninfo(XcObject *self)
@@ -2056,6 +2129,20 @@ static PyMethodDef pyxc_methods[] = {
       METH_NOARGS, "\n"
       "Get information about the physical host machine\n"
       "Returns [dict]: information about the hardware"
+      "        [None]: on failure.\n" },
+
+    { "topologyinfo",
+      (PyCFunction)pyxc_topologyinfo,
+      METH_NOARGS, "\n"
+      "Get information about the cpu topology on the host machine\n"
+      "Returns [dict]: information about the cpu topology on host"
+      "        [None]: on failure.\n" },
+
+    { "numainfo",
+      (PyCFunction)pyxc_numainfo,
+      METH_NOARGS, "\n"
+      "Get NUMA information on the host machine\n"
+      "Returns [dict]: NUMA information on host"
       "        [None]: on failure.\n" },
 
     { "xeninfo",
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/python/xen/xend/XendNode.py Wed Apr 07 16:22:05 2010 +0100
@@ -878,65 +878,70 @@ class XendNode:
     def list_to_strrange(self,list):
         return self.format_pairs(self.list_to_rangepairs(list))
 
-    def format_node_to_cpu(self, pinfo):
-        str=''
-        whitespace=''
+    def format_cpu_to_core_socket_node(self, tinfo):
         try:
-            node_to_cpu=pinfo['node_to_cpu']
-            for i in range(0, pinfo['max_node_id']+1):
-                str+='%snode%d:%s\n' % (whitespace,
-                                        i, 
-                                      self.list_to_strrange(node_to_cpu[i]))
-                whitespace='%25s' % ''        
+            nr_cpus=tinfo['max_cpu_index']
+            str='\ncpu:    core    socket     node\n'
+            for i in range(0, nr_cpus):
+                str+='%3d:%8d %8d %8d\n' % (i, 
+                                          tinfo['cpu_to_core'][i],
+                                          tinfo['cpu_to_socket'][i],
+                                          tinfo['cpu_to_node'][i])
         except:
             str='none\n'
         return str[:-1];
-    def format_node_to_memory(self, pinfo, key):
-        str=''
-        whitespace=''
+
+    def format_numa_info(self, ninfo):
         try:
-            node_to_memory=pinfo[key]
-            for i in range(0, pinfo['max_node_id']+1):
-                str+='%snode%d:%d\n' % (whitespace,
-                                        i,
-                                        node_to_memory[i] / 1024)
-                whitespace='%25s' % ''
+            nr_nodes=ninfo['max_node_index']
+            str='\nnode: TotalMemory FreeMemory dma32Memory NodeDist:'
+            for i in range(0, nr_nodes):
+                str+='%4d ' % i
+            str+='\n'
+            for i in range(0, nr_nodes):
+                str+='%4d:  %8dMB %8dMB  %8dMB         :' % (i, 
+                                      ninfo['node_memsize'][i],
+                                      ninfo['node_memfree'][i],
+                                      ninfo['node_to_dma32_mem'][i])
+                for j in range(0, nr_nodes):
+                    str+='%4d ' % ninfo['node_to_node_dist'][(i*nr_nodes)+j]
+                str+='\n'
         except:
             str='none\n'
         return str[:-1];
 
-
     def physinfo(self):
         info = self.xc.physinfo()
+        tinfo = self.xc.topologyinfo()
+        ninfo = self.xc.numainfo()
 
         info['cpu_mhz'] = info['cpu_khz'] / 1000
         
         # physinfo is in KiB, need it in MiB
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
-        info['node_to_cpu']  = self.format_node_to_cpu(info)
-        info['node_to_memory'] = \
-            self.format_node_to_memory(info, 'node_to_memory')
-        info['node_to_dma32_mem'] = \
-            self.format_node_to_memory(info, 'node_to_dma32_mem')
+
+        info['cpu_topology']  = \
+             self.format_cpu_to_core_socket_node(tinfo)
+
+        info['numa_info']  = \
+             self.format_numa_info(ninfo)
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
                       'cores_per_socket',
                       'threads_per_core',
+                      'sockets_per_node',
                       'cpu_mhz',
                       'hw_caps',
                       'virt_caps',
                       'total_memory',
                       'free_memory',
-                      'node_to_cpu',
-                      'node_to_memory',
-                      'node_to_dma32_mem',
-                      'max_node_id'
+                      'cpu_topology',
+                      'numa_info',
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]
-
 
     def pciinfo(self):
         from xen.xend.server.pciif import get_all_assigned_pci_devices
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/python/xen/xend/balloon.py  Wed Apr 07 16:22:05 2010 +0100
@@ -184,15 +184,11 @@ def free(need_mem, dominfo):
             waitscrub = 1
             vcpus = dominfo.info['cpus'][0]
             for vcpu in vcpus:
-                nodenum = 0
-                for node in physinfo['node_to_cpu']:
-                    for cpu in node:
-                        if vcpu == cpu:
-                            if oldnode == -1:
-                                oldnode = nodenum
-                            elif oldnode != nodenum:
-                                waitscrub = 0
-                    nodenum = nodenum + 1
+                nodenum = xc.numainfo()['cpu_to_node'][cpu]
+                if oldnode == -1:
+                    oldnode = nodenum
+                elif oldnode != nodenum:
+                    waitscrub = 0
 
             if waitscrub == 1 and scrub_mem > 0:
                 log.debug("wait for scrub %s", scrub_mem)
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c     Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/arch/x86/sysctl.c     Wed Apr 07 16:22:05 2010 +0100
@@ -35,6 +35,8 @@ static long cpu_down_helper(void *data)
     return cpu_down(cpu);
 }
 
+extern int __node_distance(int a, int b);
+
 long arch_do_sysctl(
     struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
 {
@@ -45,25 +47,22 @@ long arch_do_sysctl(
 
     case XEN_SYSCTL_physinfo:
     {
-        uint32_t i, max_array_ent;
-        XEN_GUEST_HANDLE_64(uint32) cpu_to_node_arr;
-
         xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo;
 
         ret = xsm_physinfo();
         if ( ret )
             break;
 
-        max_array_ent = pi->max_cpu_id;
-        cpu_to_node_arr = pi->cpu_to_node;
 
         memset(pi, 0, sizeof(*pi));
-        pi->cpu_to_node = cpu_to_node_arr;
         pi->threads_per_core =
             cpus_weight(per_cpu(cpu_sibling_map, 0));
         pi->cores_per_socket =
             cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
         pi->nr_cpus = (u32)num_online_cpus();
+        pi->nr_nodes = (u32)num_online_nodes();
+        pi->sockets_per_node =  pi->nr_cpus / 
+                     (pi->nr_nodes * pi->cores_per_socket * 
pi->threads_per_core);
         pi->total_pages = total_pages;
         pi->free_pages = avail_domheap_pages();
         pi->scrub_pages = 0;
@@ -74,15 +73,56 @@ long arch_do_sysctl(
         if ( iommu_enabled )
             pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm_directio;
 
-        pi->max_node_id = last_node(node_online_map);
-        pi->max_cpu_id = last_cpu(cpu_online_map);
-        max_array_ent = min_t(uint32_t, max_array_ent, pi->max_cpu_id);
+        if ( copy_to_guest(u_sysctl, sysctl, 1) )
+            ret = -EFAULT;
+    }
+    break;
+        
+    case XEN_SYSCTL_topologyinfo:
+    {
+        uint32_t i, max_cpu_index;
+        XEN_GUEST_HANDLE_64(uint32) cpu_to_core_arr;
+        XEN_GUEST_HANDLE_64(uint32) cpu_to_socket_arr;
+        XEN_GUEST_HANDLE_64(uint32) cpu_to_node_arr;
+
+        xen_sysctl_topologyinfo_t *ti = &sysctl->u.topologyinfo;
+
+        max_cpu_index = ti->max_cpu_index;
+        cpu_to_core_arr = ti->cpu_to_core;
+        cpu_to_socket_arr = ti->cpu_to_socket;
+        cpu_to_node_arr = ti->cpu_to_node;
+
+        memset(ti, 0, sizeof(*ti));
+        ti->cpu_to_core = cpu_to_core_arr;
+        ti->cpu_to_socket = cpu_to_socket_arr;
+        ti->cpu_to_node = cpu_to_node_arr;
+
+        max_cpu_index = min_t(uint32_t, max_cpu_index, num_online_cpus());
+        ti->max_cpu_index = max_cpu_index;
 
         ret = 0;
 
-        if ( !guest_handle_is_null(cpu_to_node_arr) )
-        {
-            for ( i = 0; i <= max_array_ent; i++ )
+        for ( i = 0; i < max_cpu_index; i++ )
+        {
+            if ( !guest_handle_is_null(cpu_to_core_arr) )
+            {
+                uint32_t core = cpu_online(i) ? cpu_to_core(i) : ~0u;
+                if ( copy_to_guest_offset(cpu_to_core_arr, i, &core, 1) )
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+            if ( !guest_handle_is_null(cpu_to_socket_arr) )
+            {
+                uint32_t socket = cpu_online(i) ? cpu_to_socket(i) : ~0u;
+                if ( copy_to_guest_offset(cpu_to_socket_arr, i, &socket, 1) )
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+            if ( !guest_handle_is_null(cpu_to_node_arr) )
             {
                 uint32_t node = cpu_online(i) ? cpu_to_node(i) : ~0u;
                 if ( copy_to_guest_offset(cpu_to_node_arr, i, &node, 1) )
@@ -92,6 +132,82 @@ long arch_do_sysctl(
                 }
             }
         }
+
+        if (ret)
+            break;
+ 
+        if ( copy_to_guest(u_sysctl, sysctl, 1) )
+            ret = -EFAULT;
+    }
+    break;
+
+    case XEN_SYSCTL_numainfo:
+    {
+        uint32_t i, max_node_index;
+        XEN_GUEST_HANDLE_64(uint64) node_to_memsize_arr;
+        XEN_GUEST_HANDLE_64(uint64) node_to_memfree_arr;
+        XEN_GUEST_HANDLE_64(uint32) node_to_node_distance_arr;
+
+        xen_sysctl_numainfo_t *ni = &sysctl->u.numainfo;
+
+        max_node_index = ni->max_node_index;
+        node_to_memsize_arr = ni->node_to_memsize;
+        node_to_memfree_arr = ni->node_to_memfree;
+        node_to_node_distance_arr = ni->node_to_node_distance;
+
+        memset(ni, 0, sizeof(*ni));
+        ni->node_to_memsize = node_to_memsize_arr;
+        ni->node_to_memfree = node_to_memfree_arr;
+        ni->node_to_node_distance = node_to_node_distance_arr;
+
+        max_node_index = min_t(uint32_t, max_node_index, num_online_nodes());
+        ni->max_node_index = max_node_index;
+
+        ret = 0;
+
+        for ( i = 0; i < max_node_index; i++ )
+        {
+            if ( !guest_handle_is_null(node_to_memsize_arr) )
+            {
+                uint64_t memsize = node_online(i) ? 
+                                   node_spanned_pages(i) << PAGE_SHIFT : 0ul;
+                if ( copy_to_guest_offset(node_to_memsize_arr, i, &memsize, 1) 
)
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+            if ( !guest_handle_is_null(node_to_memfree_arr) )
+            {
+                uint64_t memfree = node_online(i) ? 
+                                   avail_node_heap_pages(i) << PAGE_SHIFT : 
0ul;
+                if ( copy_to_guest_offset(node_to_memfree_arr, i, &memfree, 1) 
)
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+
+            if ( !guest_handle_is_null(node_to_node_distance_arr) )
+           {
+                int j;
+                for ( j = 0; j < max_node_index; j++)
+                {
+                    uint32_t distance = ~0u;
+                    if (node_online(i) && node_online (j)) 
+                        distance = __node_distance(i, j);
+                    
+                    if ( copy_to_guest_offset(node_to_node_distance_arr, 
+                         (i * max_node_index + j), &distance, 1) )
+                    {
+                        ret = -EFAULT;
+                        break;
+                    }
+                }
+            }
+        }
+        if (ret)
+            break;
 
         if ( copy_to_guest(u_sysctl, sysctl, 1) )
             ret = -EFAULT;
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/common/page_alloc.c   Wed Apr 07 16:22:05 2010 +0100
@@ -1256,6 +1256,12 @@ unsigned long avail_domheap_pages(void)
                             -1);
 }
 
+unsigned long avail_node_heap_pages(unsigned int nodeid)
+{
+    return avail_heap_pages(MEMZONE_XEN, NR_ZONES -1, nodeid);
+}
+
+
 static void pagealloc_info(unsigned char key)
 {
     unsigned int zone = MEMZONE_XEN;
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/asm-x86/numa.h
--- a/xen/include/asm-x86/numa.h        Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/include/asm-x86/numa.h        Wed Apr 07 16:22:05 2010 +0100
@@ -73,6 +73,7 @@ static inline __attribute__((pure)) int 
 #define NODE_DATA(nid)         (&(node_data[nid]))
 
 #define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
+#define node_spanned_pages(nid)        (NODE_DATA(nid)->node_spanned_pages)
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
                                 NODE_DATA(nid)->node_spanned_pages)
 
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/include/public/sysctl.h       Wed Apr 07 16:22:05 2010 +0100
@@ -34,7 +34,7 @@
 #include "xen.h"
 #include "domctl.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000007
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000008
 
 /*
  * Read console content from Xen buffer ring.
@@ -93,29 +93,14 @@ struct xen_sysctl_physinfo {
 struct xen_sysctl_physinfo {
     uint32_t threads_per_core;
     uint32_t cores_per_socket;
+    uint32_t sockets_per_node;
     uint32_t nr_cpus;
-    uint32_t max_node_id;
+    uint32_t nr_nodes;
     uint32_t cpu_khz;
     uint64_aligned_t total_pages;
     uint64_aligned_t free_pages;
     uint64_aligned_t scrub_pages;
     uint32_t hw_cap[8];
-
-    /*
-     * IN: maximum addressable entry in the caller-provided cpu_to_node array.
-     * OUT: largest cpu identifier in the system.
-     * If OUT is greater than IN then the cpu_to_node array is truncated!
-     */
-    uint32_t max_cpu_id;
-    /*
-     * If not NULL, this array is filled with node identifier for each cpu.
-     * If a cpu has no node information (e.g., cpu not present) then the
-     * sentinel value ~0u is written.
-     * The size of this array is specified by the caller in @max_cpu_id.
-     * If the actual @max_cpu_id is smaller than the array then the trailing
-     * elements of the array will not be written by the sysctl.
-     */
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
 
     /* XEN_SYSCTL_PHYSCAP_??? */
     uint32_t capabilities;
@@ -491,6 +476,73 @@ typedef struct xen_sysctl_lockprof_op xe
 typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
 
+#define XEN_SYSCTL_topologyinfo         16 
+struct xen_sysctl_topologyinfo {
+
+    /*
+     * IN: maximum addressable entry in the caller-provided cpu_to_core, 
+     * cpu_to_socket & cpu_to_node arrays.
+     * OUT: largest cpu identifier in the system.
+     * If OUT is greater than IN then the cpu_to_node array is truncated!
+     */
+    uint32_t max_cpu_index;
+
+    /*
+     * If not NULL, this array is filled with core/socket/node identifier for 
+     * each cpu.
+     * If a cpu has no core/socket/node information (e.g., cpu not present) 
+     * then the sentinel value ~0u is written.
+     * The size of this array is specified by the caller in @max_cpu_index.
+     * If the actual @max_cpu_index is smaller than the array then the trailing
+     * elements of the array will not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint32) cpu_to_core;
+    XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;
+    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;  /* node_number */
+
+};
+typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t);
+
+#define XEN_SYSCTL_numainfo          17        
+struct xen_sysctl_numainfo {
+    /*
+     * IN: maximum addressable entry in the caller-provided node_numbers, 
+     * node_to_memsize & node_to_memfree arrays.
+     * OUT: largest possible node index for the system.
+     * If OUT is greater than IN then these arrays are truncated!
+     */
+    uint32_t max_node_index;
+
+    /* For node_to_memsize & node_to_memfree arrays, the 
+     * entry with same index corrosponds to the same node.
+     * If a entry has no node information (e.g., node not present) then the 
+     * sentinel value ~0u is written for_node_number, and value 0u is written 
+     * for node_to_memsize & node_to_memfree.
+     * The size of this array is specified by the caller in @max_node_index. 
+     * If the actual @max_node_index is smaller than the array then the 
+     * trailing elements of the array will not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint64) node_to_memsize;
+    XEN_GUEST_HANDLE_64(uint64) node_to_memfree;
+
+
+    /* node_to_node_distance is array of size (nr_nodes * nr_nodes) listing
+     * memory access distances between nodes. i'th  entry in the array 
+     * specifies distance between node (i / nr_nodes) & node (i % nr_nodes)
+     * If a entry has no node distance information (e.g., node not present) 
+     * then the sentinel value ~0u is written.
+     * The size of this array is specified by the caller in 
+     * @max_node_distance_index. If the max_node_index*max_node_index is 
+     * smaller than the array then the trailing elements of the array will 
+     * not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint32) node_to_node_distance;
+};
+typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t);
+
+
 struct xen_sysctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -498,6 +550,8 @@ struct xen_sysctl {
         struct xen_sysctl_readconsole       readconsole;
         struct xen_sysctl_tbuf_op           tbuf_op;
         struct xen_sysctl_physinfo          physinfo;
+        struct xen_sysctl_topologyinfo      topologyinfo;
+        struct xen_sysctl_numainfo          numainfo;
         struct xen_sysctl_sched_id          sched_id;
         struct xen_sysctl_perfc_op          perfc_op;
         struct xen_sysctl_getdomaininfolist getdomaininfolist;
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/include/xen/mm.h      Wed Apr 07 16:22:05 2010 +0100
@@ -57,6 +57,7 @@ unsigned long avail_domheap_pages_region
 unsigned long avail_domheap_pages_region(
     unsigned int node, unsigned int min_width, unsigned int max_width);
 unsigned long avail_domheap_pages(void);
+unsigned long avail_node_heap_pages(unsigned int);
 #define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f))
 #define free_domheap_page(p)  (free_domheap_pages(p,0))
 unsigned int online_page(unsigned long mfn, uint32_t *status);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.