[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Change the notion of "online" node



Keir, this is not a patch target for xen 4.0, since it changes some common code 
for non-hotplug situation and now xen is in RC3-pre stage. We send out to get 
some feedback and we can fix this issue in 4.1 cycle.

Thanks
Yunhong Jiang

Currently, if one CPU has no memory populated, it will get nodeid 0. After the 
corresponding memory slot populated, it will still not be updated. This is 
what's achieved by changeset 20726:ddb8c5e798f9.

Two method to resolve the issue:

First method is to change the CPU's node id after memory populated. It is 
stated as "left to do" in changeset 20726. However, that means the cpu<->node 
relationship wil change when system run, that is not a perfect solution.

Second method is this one. We changes the notion of online node. Previously, 
only node with memory is called online. This make sense considering node is 
mainly for memory allocation. However, this can't resolve the above issue. Now, 
all node, either CPU or memory populated will be marked as online. The NULL 
avail[node] will identity the node that has no memory. 

We also switch the seqence of parsing memory affinity and CPU affinity, to make 
sure all node that has memory populated is allocated firstly.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>

diff -r c30899cdbab5 xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c       Thu Feb 04 14:56:45 2010 +0800
+++ b/xen/arch/x86/numa.c       Thu Feb 04 14:58:50 2010 +0800
@@ -26,7 +26,9 @@ custom_param("numa", numa_setup);
 /* from proto.h */
 #define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
 
-struct node_data node_data[MAX_NUMNODES];
+struct node_data node_data[MAX_NUMNODES] = {
+    [0 ... MAX_NUMNODES -1] = {0, 0, -1}
+};
 
 /* Mapping from pdx to node id */
 int memnode_shift;
@@ -350,7 +352,10 @@ static void dump_numa(unsigned char key)
                  (u32)(now>>32), (u32)now);
 
        for_each_online_node(i) {
-               paddr_t pa = (paddr_t)(NODE_DATA(i)->node_start_pfn + 1)<< 
PAGE_SHIFT;
+               paddr_t pa;
+        if ( !(NODE_DATA(i)->node_spanned_pages) )
+            continue;
+               pa = (paddr_t)(NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT;
                printk("idx%d -> NODE%d start->%lu size->%lu\n",
                          i, NODE_DATA(i)->node_id,
                          NODE_DATA(i)->node_start_pfn,
diff -r c30899cdbab5 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Thu Feb 04 14:56:45 2010 +0800
+++ b/xen/arch/x86/setup.c      Thu Feb 04 14:58:50 2010 +0800
@@ -265,9 +265,8 @@ void __devinit srat_detect_node(int cpu)
     u32 apicid = x86_cpu_to_apicid[cpu];
 
     node = apicid_to_node[apicid];
-    if ( node == NUMA_NO_NODE || !node_online(node) )
-        node = 0;
-    numa_set_node(cpu, node);
+    if ( node != NUMA_NO_NODE )
+        numa_set_node(cpu, node);
 
     if ( acpi_numa > 0 )
         printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
@@ -1074,6 +1073,7 @@ void __init __start_xen(unsigned long mb
         srat_detect_node(i);
         /* Set up node_to_cpumask based on cpu_to_node[]. */
         numa_add_cpu(i);        
+        node_set_online(cpu_to_node(i));
     }
 
     printk("Brought up %ld CPUs\n", (long)num_online_cpus());
diff -r c30899cdbab5 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Thu Feb 04 14:56:45 2010 +0800
+++ b/xen/arch/x86/smpboot.c    Thu Feb 04 14:58:50 2010 +0800
@@ -44,6 +44,7 @@
 #include <xen/softirq.h>
 #include <xen/serial.h>
 #include <xen/numa.h>
+#include <xen/nodemask.h>
 #include <xen/event.h>
 #include <asm/current.h>
 #include <asm/mc146818rtc.h>
@@ -1505,6 +1506,7 @@ int cpu_add(uint32_t apic_id, uint32_t a
                        spin_unlock(&cpu_add_remove_lock);
                        return node;
                }
+        node_set_online(node);
                apicid_to_node[apic_id] = node;
        }
 
diff -r c30899cdbab5 xen/arch/x86/srat.c
--- a/xen/arch/x86/srat.c       Thu Feb 04 14:56:45 2010 +0800
+++ b/xen/arch/x86/srat.c       Thu Feb 04 14:58:50 2010 +0800
@@ -399,12 +399,7 @@ int __init acpi_scan_nodes(u64 start, u6
                        continue;
                setup_node_bootmem(i, nodes[i].start, nodes[i].end);
        }
-       for (i = 0; i < NR_CPUS; i++) { 
-               if (cpu_to_node[i] == NUMA_NO_NODE)
-                       continue;
-               if (!node_isset(cpu_to_node[i], nodes_parsed))
-                       numa_set_node(i, NUMA_NO_NODE);
-       }
+    /* We setup random value for cpu_to_node */
        numa_init_array();
        return 0;
 }
diff -r c30899cdbab5 xen/drivers/acpi/numa.c
--- a/xen/drivers/acpi/numa.c   Thu Feb 04 14:56:45 2010 +0800
+++ b/xen/drivers/acpi/numa.c   Thu Feb 04 14:58:50 2010 +0800
@@ -164,10 +164,10 @@ int __init acpi_numa_init(void)
 {
        /* SRAT: Static Resource Affinity Table */
        if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
+               acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, 
acpi_parse_memory_affinity, NR_NODE_MEMBLKS);  // IA64 specific
                acpi_table_parse_srat(ACPI_SRAT_PROCESSOR_AFFINITY,
                                               acpi_parse_processor_affinity,
                                               NR_CPUS);
-               acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, 
acpi_parse_memory_affinity, NR_NODE_MEMBLKS);  // IA64 specific
        }
 
        /* SLIT: System Locality Information Table */


Attachment: numa_node.patch
Description: numa_node.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.