[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: Reorder CPUs at boot time to reflect system topology.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1281374884 -3600
# Node ID 2f4a89ad2528ce16711b195dc998def48c0c98c2
# Parent  e7afe98afd4372010ac4fdb7d0d7c948f821feee
x86: Reorder CPUs at boot time to reflect system topology.

This is an attempt to impose some sensible coherent ordering on the
cpu namespace, where previously there was none (we were at the mercy
of BIOS ordering, which varies wildly across systems).

Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 xen/arch/x86/setup.c |   58 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 56 insertions(+), 2 deletions(-)

diff -r e7afe98afd43 -r 2f4a89ad2528 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Aug 09 16:46:42 2010 +0100
+++ b/xen/arch/x86/setup.c      Mon Aug 09 18:28:04 2010 +0100
@@ -203,6 +203,58 @@ void __devinit srat_detect_node(int cpu)
 
     if ( opt_cpu_info && acpi_numa > 0 )
         printk("CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
+}
+
+/*
+ * Sort CPUs by <node,package,core,thread> tuple. Fortunately this hierarchy is
+ * reflected in the structure of modern APIC identifiers, so we sort based on
+ * those. This is slightly complicated by the fact that the BSP must remain
+ * CPU 0. Hence we do a variation on longest-prefix matching to do the best we
+ * can while keeping CPU 0 static.
+ */
+static void __init normalise_cpu_order(void)
+{
+    unsigned int i, j, min_cpu;
+    uint32_t apicid, diff, min_diff;
+
+    for_each_present_cpu ( i )
+    {
+        apicid = x86_cpu_to_apicid[i];
+        min_diff = min_cpu = ~0u;
+
+        /*
+         * Find remaining CPU with longest-prefix match on APIC ID.
+         * Among identical longest-prefix matches, pick the smallest APIC ID.
+         */
+        for ( j = next_cpu(i, cpu_present_map);
+              j < NR_CPUS;
+              j = next_cpu(j, cpu_present_map) )
+        {
+            diff = x86_cpu_to_apicid[j] ^ apicid;
+            while ( diff & (diff-1) )
+                diff &= diff-1;
+            if ( (diff < min_diff) ||
+                 ((diff == min_diff) &&
+                  (x86_cpu_to_apicid[j] < x86_cpu_to_apicid[min_cpu])) )
+            {
+                min_diff = diff;
+                min_cpu = j;
+            }
+        }
+
+        /* If no match then there must be no CPUs remaining to consider. */
+        if ( min_cpu >= NR_CPUS )
+        {
+            BUG_ON(next_cpu(i, cpu_present_map) < NR_CPUS);
+            break;
+        }
+
+        /* Switch the best-matching CPU with the next CPU in logical order. */
+        j = next_cpu(i, cpu_present_map);
+        apicid = x86_cpu_to_apicid[min_cpu];
+        x86_cpu_to_apicid[min_cpu] = x86_cpu_to_apicid[j];
+        x86_cpu_to_apicid[j] = apicid;
+    }
 }
 
 /*
@@ -952,8 +1004,6 @@ void __init __start_xen(unsigned long mb
 
     acpi_boot_init();
 
-    init_cpu_to_node();
-
     if ( smp_found_config )
         get_smp_config();
 
@@ -963,6 +1013,10 @@ void __init __start_xen(unsigned long mb
 #endif
 
     init_apic_mappings();
+
+    normalise_cpu_order();
+
+    init_cpu_to_node();
 
     if ( x2apic_is_available() )
         enable_x2apic();

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.