[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to common
Hi Wei, On 11/08/2021 11:23, Wei Chen wrote: This function can be reused by Arm device tree based NUMA support. So we move it from x86 to common, as well as its related variables and functions: setup_node_bootmem, numa_init_array and numa_emulation. As numa_initmem_init has been moved to common, _memnodemap is not used cross files. We can restore _memnodemap to static. As we discussed on a previous patch, we should try to avoid this kind of dance. I can help to find a split that would achieve that. Signed-off-by: Wei Chen <wei.chen@xxxxxxx> --- xen/arch/x86/numa.c | 118 ---------------------------------- xen/common/numa.c | 122 +++++++++++++++++++++++++++++++++++- xen/include/asm-x86/numa.h | 5 -- xen/include/asm-x86/setup.h | 1 - xen/include/xen/numa.h | 8 ++- 5 files changed, 128 insertions(+), 126 deletions(-) diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c index f2626b3968..6908738305 100644 --- a/xen/arch/x86/numa.c +++ b/xen/arch/x86/numa.c @@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; -bool numa_off;s8 acpi_numa = 0;int srat_disabled(void)@@ -46,123 +45,6 @@ int srat_disabled(void) return numa_off || acpi_numa < 0; }-/* initialize NODE_DATA given nodeid and start/end */-void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end) -{ - unsigned long start_pfn, end_pfn; - - start_pfn = start >> PAGE_SHIFT; - end_pfn = end >> PAGE_SHIFT; - - NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - - node_set_online(nodeid); -} - -void __init numa_init_array(void) -{ - int rr, i; - - /* There are unfortunately some poorly designed mainboards around - that only connect memory to a single CPU. This breaks the 1:1 cpu->node - mapping. To avoid this fill in the mapping for all possible - CPUs, as the number of CPUs is not known yet. - We round robin the existing nodes. */ - rr = first_node(node_online_map); - for ( i = 0; i < nr_cpu_ids; i++ ) - { - if ( cpu_to_node[i] != NUMA_NO_NODE ) - continue; - numa_set_node(i, rr); - rr = cycle_node(rr, node_online_map); - } -} - -#ifdef CONFIG_NUMA_EMU -static int numa_fake __initdata = 0; - -/* Numa emulation */ -static int __init numa_emulation(u64 start_pfn, u64 end_pfn) -{ - int i; - struct node nodes[MAX_NUMNODES]; - u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; - - /* Kludge needed for the hash function */ - if ( hweight64(sz) > 1 ) - { - u64 x = 1; - while ( (x << 1) < sz ) - x <<= 1; - if ( x < sz/2 ) - printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n"); - sz = x; - } - - memset(&nodes,0,sizeof(nodes)); - for ( i = 0; i < numa_fake; i++ ) - { - nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; - if ( i == numa_fake - 1 ) - sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; - nodes[i].end = nodes[i].start + sz; - printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n", - i, - nodes[i].start, nodes[i].end, - (nodes[i].end - nodes[i].start) >> 20); - node_set_online(i); - } - memnode_shift = compute_hash_shift(nodes, numa_fake, NULL); - if ( memnode_shift < 0 ) - { - memnode_shift = 0; - printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); - return -1; - } - for_each_online_node ( i ) - setup_node_bootmem(i, nodes[i].start, nodes[i].end); - numa_init_array(); - - return 0; -} -#endif - -void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) -{ - int i; - -#ifdef CONFIG_NUMA_EMU - if ( numa_fake && !numa_emulation(start_pfn, end_pfn) ) - return; -#endif - -#ifdef CONFIG_ACPI_NUMA - if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT, - (u64)end_pfn << PAGE_SHIFT) ) - return; -#endif - - printk(KERN_INFO "%s\n", - numa_off ? "NUMA turned off" : "No NUMA configuration found"); - - printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n", - (u64)start_pfn << PAGE_SHIFT, - (u64)end_pfn << PAGE_SHIFT); - /* setup dummy node covering all memory */ - memnode_shift = BITS_PER_LONG - 1; - memnodemap = _memnodemap; - memnodemapsize = ARRAY_SIZE(_memnodemap); - - nodes_clear(node_online_map); - node_set_online(0); - for ( i = 0; i < nr_cpu_ids; i++ ) - numa_set_node(i, 0); - cpumask_copy(&node_to_cpumask[0], cpumask_of(0)); - setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT, - (u64)end_pfn << PAGE_SHIFT); -} - void numa_set_node(int cpu, nodeid_t node) { cpu_to_node[cpu] = node; diff --git a/xen/common/numa.c b/xen/common/numa.c index 1facc8fe2b..26c0006d04 100644 --- a/xen/common/numa.c +++ b/xen/common/numa.c @@ -14,12 +14,13 @@ #include <xen/smp.h> #include <xen/pfn.h> #include <xen/sched.h> NIT: We tend to add a newline betwen <xen/...> headers and <asm/...> headers. +#include <asm/acpi.h>struct node_data node_data[MAX_NUMNODES]; /* Mapping from pdx to node id */int memnode_shift; -typeof(*memnodemap) _memnodemap[64]; +static typeof(*memnodemap) _memnodemap[64]; unsigned long memnodemapsize; u8 *memnodemap;@@ -34,6 +35,8 @@ int num_node_memblks;struct node node_memblk_range[NR_NODE_MEMBLKS]; nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];+bool numa_off;+ /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -191,3 +194,120 @@ void numa_add_cpu(int cpu) { cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]); } + +/* initialize NODE_DATA given nodeid and start/end */ +void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end) From an abstract PoV, start and end should be paddr_t. This should be done on a separate patch though. +{ + unsigned long start_pfn, end_pfn; + + start_pfn = start >> PAGE_SHIFT; + end_pfn = end >> PAGE_SHIFT; + + NODE_DATA(nodeid)->node_start_pfn = start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; + + node_set_online(nodeid); +} + +void __init numa_init_array(void) +{ + int rr, i; + + /* There are unfortunately some poorly designed mainboards around + that only connect memory to a single CPU. This breaks the 1:1 cpu->node + mapping. To avoid this fill in the mapping for all possible + CPUs, as the number of CPUs is not known yet. + We round robin the existing nodes. */ + rr = first_node(node_online_map); + for ( i = 0; i < nr_cpu_ids; i++ ) + { + if ( cpu_to_node[i] != NUMA_NO_NODE ) + continue; + numa_set_node(i, rr); + rr = cycle_node(rr, node_online_map); + } +} + +#ifdef CONFIG_NUMA_EMU +int numa_fake __initdata = 0; + +/* Numa emulation */ +static int __init numa_emulation(u64 start_pfn, u64 end_pfn) Here, this should be either "unsigned long" or ideally "mfn_t". Although, if you use "unsigned long", you will need to... +{ + int i; + struct node nodes[MAX_NUMNODES]; + u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; ... cast "(end_pfn - start_pfn)" to uin64_t or use pfn_to_paddr(). + + /* Kludge needed for the hash function */ + if ( hweight64(sz) > 1 ) + { + u64 x = 1; + while ( (x << 1) < sz ) + x <<= 1; + if ( x < sz/2 ) + printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n"); + sz = x; + } + + memset(&nodes,0,sizeof(nodes)); + for ( i = 0; i < numa_fake; i++ ) + { + nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; + if ( i == numa_fake - 1 ) + sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; + nodes[i].end = nodes[i].start + sz; + printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n", + i, + nodes[i].start, nodes[i].end, + (nodes[i].end - nodes[i].start) >> 20); + node_set_online(i); + } + memnode_shift = compute_hash_shift(nodes, numa_fake, NULL); + if ( memnode_shift < 0 ) + { + memnode_shift = 0; + printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); + return -1; + } + for_each_online_node ( i ) + setup_node_bootmem(i, nodes[i].start, nodes[i].end); + numa_init_array(); + + return 0; +} +#endif + +void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) +{ + int i; + +#ifdef CONFIG_NUMA_EMU + if ( numa_fake && !numa_emulation(start_pfn, end_pfn) ) + return; +#endif + +#ifdef CONFIG_ACPI_NUMA + if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT, + (u64)end_pfn << PAGE_SHIFT) ) (u64)v << PAGE_SHIFT should be switched to use pfn_to_paddr() or mfn_to_paddr() if you decide to make start_pfn and end_pfn typesafe. + return; +#endif + + printk(KERN_INFO "%s\n", + numa_off ? "NUMA turned off" : "No NUMA configuration found"); + + printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n", + (u64)start_pfn << PAGE_SHIFT, + (u64)end_pfn << PAGE_SHIFT); Same remark here. PRIx64 would also have to be switched to PRIpaddr. + /* setup dummy node covering all memory */ + memnode_shift = BITS_PER_LONG - 1; + memnodemap = _memnodemap; + memnodemapsize = ARRAY_SIZE(_memnodemap); + + nodes_clear(node_online_map); + node_set_online(0); + for ( i = 0; i < nr_cpu_ids; i++ ) + numa_set_node(i, 0); + cpumask_copy(&node_to_cpumask[0], cpumask_of(0)); + setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT, + (u64)end_pfn << PAGE_SHIFT); +} diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h index e8a92ad9df..f8e4e15586 100644 --- a/xen/include/asm-x86/numa.h +++ b/xen/include/asm-x86/numa.h @@ -21,16 +21,11 @@ extern nodeid_t pxm_to_node(unsigned int pxm);#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) -extern void numa_init_array(void);-extern bool numa_off; - - extern int srat_disabled(void); extern void numa_set_node(int cpu, nodeid_t node); extern nodeid_t setup_node(unsigned int pxm); extern void srat_detect_node(int cpu);-extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);extern nodeid_t apicid_to_node[]; extern void init_cpu_to_node(void);diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.hindex 24be46115d..63838ba2d1 100644 --- a/xen/include/asm-x86/setup.h +++ b/xen/include/asm-x86/setup.h @@ -17,7 +17,6 @@ void early_time_init(void);void set_nr_cpu_ids(unsigned int max_cpus); -void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);void arch_init_memory(void); void subarch_init_memory(void);diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.hindex 67b79a73a3..258a5cb3db 100644 --- a/xen/include/xen/numa.h +++ b/xen/include/xen/numa.h @@ -26,7 +26,6 @@ extern int memnode_shift; extern unsigned long memnodemapsize; extern u8 *memnodemap; -extern typeof(*memnodemap) _memnodemap[64];struct node_data {unsigned long node_start_pfn; @@ -69,6 +68,13 @@ extern int conflicting_memblks(u64 start, u64 end); extern void cutoff_node(int i, u64 start, u64 end); extern int valid_numa_range(u64 start, u64 end, nodeid_t node);+extern void numa_init_array(void);+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern bool numa_off; +extern int numa_fake; + +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end); + #endif /* CONFIG_NUMA */#endif /* _XEN_NUMA_H */ Cheers, -- Julien Grall
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |