[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v10 01/13] x86: add socket_cpumask



Maintain socket_cpumask which contains all the HT and core siblings
in the same socket.

Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Changes in v9:
* Add comments for set_nr_sockets.
* Move set_nr_sockets() invocation from __start_xen() to smp_prepare_cpus().
Changes in v8:
* Remove total_cpus and retrofit the algorithm for calculating nr_sockets.
* Change per-socket cpumask allocation as on demand.
* socket_to_cpumask => socket_cpumask.
Changes in v7:
* Introduce total_cpus to calculate nr_sockets.
* Minor code sequence improvement in set_cpu_sibling_map.
* Improve comments for nr_sockets.
---
 xen/arch/x86/mpparse.c    | 17 +++++++++++++++++
 xen/arch/x86/smpboot.c    | 26 +++++++++++++++++++++++++-
 xen/include/asm-x86/smp.h | 11 +++++++++++
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
index 003c56e..8609f4a 100644
--- a/xen/arch/x86/mpparse.c
+++ b/xen/arch/x86/mpparse.c
@@ -87,6 +87,23 @@ void __init set_nr_cpu_ids(unsigned int max_cpus)
 #endif
 }
 
+void __init set_nr_sockets(void)
+{
+    /*
+     * Count the actual cpus in the socket 0 and use it to calculate nr_sockets
+     * so that the latter will be always >= the actual socket number in the
+     * system even when APIC IDs from MP table are too sparse.
+     */
+    unsigned int cpus = bitmap_weight(phys_cpu_present_map.mask,
+                                      boot_cpu_data.x86_max_cores *
+                                      boot_cpu_data.x86_num_siblings);
+
+    if ( cpus == 0 )
+        cpus = 1;
+
+    nr_sockets = DIV_ROUND_UP(num_processors + disabled_cpus, cpus);
+}
+
 /*
  * Intel MP BIOS table parsing routines:
  */
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 2289284..e75bbd3 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -60,6 +60,9 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask);
 cpumask_t cpu_online_map __read_mostly;
 EXPORT_SYMBOL(cpu_online_map);
 
+unsigned int __read_mostly nr_sockets;
+cpumask_var_t *__read_mostly socket_cpumask;
+
 struct cpuinfo_x86 cpu_data[NR_CPUS];
 
 u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
@@ -245,6 +248,8 @@ static void set_cpu_sibling_map(int cpu)
 
     cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
 
+    cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
+
     if ( c[cpu].x86_num_siblings > 1 )
     {
         for_each_cpu ( i, &cpu_sibling_setup_map )
@@ -649,7 +654,13 @@ void cpu_exit_clear(unsigned int cpu)
 
 static void cpu_smpboot_free(unsigned int cpu)
 {
-    unsigned int order;
+    unsigned int order, socket = cpu_to_socket(cpu);
+
+    if ( cpumask_empty(socket_cpumask[socket]) )
+    {
+        free_cpumask_var(socket_cpumask[socket]);
+        socket_cpumask[socket] = NULL;
+    }
 
     free_cpumask_var(per_cpu(cpu_sibling_mask, cpu));
     free_cpumask_var(per_cpu(cpu_core_mask, cpu));
@@ -694,6 +705,7 @@ static int cpu_smpboot_alloc(unsigned int cpu)
     nodeid_t node = cpu_to_node(cpu);
     struct desc_struct *gdt;
     unsigned long stub_page;
+    unsigned int socket = cpu_to_socket(cpu);
 
     if ( node != NUMA_NO_NODE )
         memflags = MEMF_node(node);
@@ -736,6 +748,10 @@ static int cpu_smpboot_alloc(unsigned int cpu)
         goto oom;
     per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu);
 
+    if ( !socket_cpumask[socket] &&
+         !zalloc_cpumask_var(socket_cpumask + socket) )
+        goto oom;
+
     if ( zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) &&
          zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) )
         return 0;
@@ -786,6 +802,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 
     stack_base[0] = stack_start;
 
+    set_nr_sockets();
+
+    socket_cpumask = xzalloc_array(cpumask_var_t, nr_sockets);
+    if ( !socket_cpumask || !zalloc_cpumask_var(socket_cpumask) )
+        panic("No memory for socket CPU siblings map");
+
     if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) ||
          !zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) )
         panic("No memory for boot CPU sibling/core maps");
@@ -851,6 +873,8 @@ remove_siblinginfo(int cpu)
     int sibling;
     struct cpuinfo_x86 *c = cpu_data;
 
+    cpumask_clear_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
+
     for_each_cpu ( sibling, per_cpu(cpu_core_mask, cpu) )
     {
         cpumask_clear_cpu(cpu, per_cpu(cpu_core_mask, sibling));
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index 67518cf..e594062 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -58,6 +58,17 @@ int hard_smp_processor_id(void);
 
 void __stop_this_cpu(void);
 
+/*
+ * The value may be greater than the actual socket number in the system and
+ * is required not to change from the initial startup.
+ */
+extern unsigned int nr_sockets;
+
+void set_nr_sockets(void);
+
+/* Representing HT and core siblings in each socket. */
+extern cpumask_var_t *socket_cpumask;
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.