[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to common
Hi Julien, > -----Original Message----- > From: Julien Grall <julien@xxxxxxx> > Sent: 2021年8月25日 18:22 > To: Wei Chen <Wei.Chen@xxxxxxx>; xen-devel@xxxxxxxxxxxxxxxxxxxx; > sstabellini@xxxxxxxxxx; jbeulich@xxxxxxxx > Cc: Bertrand Marquis <Bertrand.Marquis@xxxxxxx> > Subject: Re: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to > common > > Hi Wei, > > On 11/08/2021 11:23, Wei Chen wrote: > > This function can be reused by Arm device tree based > > NUMA support. So we move it from x86 to common, as well > > as its related variables and functions: > > setup_node_bootmem, numa_init_array and numa_emulation. > > > > As numa_initmem_init has been moved to common, _memnodemap > > is not used cross files. We can restore _memnodemap to > > static. > > As we discussed on a previous patch, we should try to avoid this kind of > dance. I can help to find a split that would achieve that. > Yes, thanks! > > > > Signed-off-by: Wei Chen <wei.chen@xxxxxxx> > > --- > > xen/arch/x86/numa.c | 118 ---------------------------------- > > xen/common/numa.c | 122 +++++++++++++++++++++++++++++++++++- > > xen/include/asm-x86/numa.h | 5 -- > > xen/include/asm-x86/setup.h | 1 - > > xen/include/xen/numa.h | 8 ++- > > 5 files changed, 128 insertions(+), 126 deletions(-) > > > > diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c > > index f2626b3968..6908738305 100644 > > --- a/xen/arch/x86/numa.c > > +++ b/xen/arch/x86/numa.c > > @@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = { > > > > nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; > > > > -bool numa_off; > > s8 acpi_numa = 0; > > > > int srat_disabled(void) > > @@ -46,123 +45,6 @@ int srat_disabled(void) > > return numa_off || acpi_numa < 0; > > } > > > > -/* initialize NODE_DATA given nodeid and start/end */ > > -void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end) > > -{ > > - unsigned long start_pfn, end_pfn; > > - > > - start_pfn = start >> PAGE_SHIFT; > > - end_pfn = end >> PAGE_SHIFT; > > - > > - NODE_DATA(nodeid)->node_start_pfn = start_pfn; > > - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; > > - > > - node_set_online(nodeid); > > -} > > - > > -void __init numa_init_array(void) > > -{ > > - int rr, i; > > - > > - /* There are unfortunately some poorly designed mainboards around > > - that only connect memory to a single CPU. This breaks the 1:1 > cpu->node > > - mapping. To avoid this fill in the mapping for all possible > > - CPUs, as the number of CPUs is not known yet. > > - We round robin the existing nodes. */ > > - rr = first_node(node_online_map); > > - for ( i = 0; i < nr_cpu_ids; i++ ) > > - { > > - if ( cpu_to_node[i] != NUMA_NO_NODE ) > > - continue; > > - numa_set_node(i, rr); > > - rr = cycle_node(rr, node_online_map); > > - } > > -} > > - > > -#ifdef CONFIG_NUMA_EMU > > -static int numa_fake __initdata = 0; > > - > > -/* Numa emulation */ > > -static int __init numa_emulation(u64 start_pfn, u64 end_pfn) > > -{ > > - int i; > > - struct node nodes[MAX_NUMNODES]; > > - u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; > > - > > - /* Kludge needed for the hash function */ > > - if ( hweight64(sz) > 1 ) > > - { > > - u64 x = 1; > > - while ( (x << 1) < sz ) > > - x <<= 1; > > - if ( x < sz/2 ) > > - printk(KERN_ERR "Numa emulation unbalanced. Complain to > maintainer\n"); > > - sz = x; > > - } > > - > > - memset(&nodes,0,sizeof(nodes)); > > - for ( i = 0; i < numa_fake; i++ ) > > - { > > - nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; > > - if ( i == numa_fake - 1 ) > > - sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; > > - nodes[i].end = nodes[i].start + sz; > > - printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" > (%"PRIu64"MB)\n", > > - i, > > - nodes[i].start, nodes[i].end, > > - (nodes[i].end - nodes[i].start) >> 20); > > - node_set_online(i); > > - } > > - memnode_shift = compute_hash_shift(nodes, numa_fake, NULL); > > - if ( memnode_shift < 0 ) > > - { > > - memnode_shift = 0; > > - printk(KERN_ERR "No NUMA hash function found. Emulation > disabled.\n"); > > - return -1; > > - } > > - for_each_online_node ( i ) > > - setup_node_bootmem(i, nodes[i].start, nodes[i].end); > > - numa_init_array(); > > - > > - return 0; > > -} > > -#endif > > - > > -void __init numa_initmem_init(unsigned long start_pfn, unsigned long > end_pfn) > > -{ > > - int i; > > - > > -#ifdef CONFIG_NUMA_EMU > > - if ( numa_fake && !numa_emulation(start_pfn, end_pfn) ) > > - return; > > -#endif > > - > > -#ifdef CONFIG_ACPI_NUMA > > - if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT, > > - (u64)end_pfn << PAGE_SHIFT) ) > > - return; > > -#endif > > - > > - printk(KERN_INFO "%s\n", > > - numa_off ? "NUMA turned off" : "No NUMA configuration > found"); > > - > > - printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n", > > - (u64)start_pfn << PAGE_SHIFT, > > - (u64)end_pfn << PAGE_SHIFT); > > - /* setup dummy node covering all memory */ > > - memnode_shift = BITS_PER_LONG - 1; > > - memnodemap = _memnodemap; > > - memnodemapsize = ARRAY_SIZE(_memnodemap); > > - > > - nodes_clear(node_online_map); > > - node_set_online(0); > > - for ( i = 0; i < nr_cpu_ids; i++ ) > > - numa_set_node(i, 0); > > - cpumask_copy(&node_to_cpumask[0], cpumask_of(0)); > > - setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT, > > - (u64)end_pfn << PAGE_SHIFT); > > -} > > - > > void numa_set_node(int cpu, nodeid_t node) > > { > > cpu_to_node[cpu] = node; > > diff --git a/xen/common/numa.c b/xen/common/numa.c > > index 1facc8fe2b..26c0006d04 100644 > > --- a/xen/common/numa.c > > +++ b/xen/common/numa.c > > @@ -14,12 +14,13 @@ > > #include <xen/smp.h> > > #include <xen/pfn.h> > > #include <xen/sched.h> > > NIT: We tend to add a newline betwen <xen/...> headers and <asm/...> > headers. > got it > > +#include <asm/acpi.h> > > > > struct node_data node_data[MAX_NUMNODES]; > > > > /* Mapping from pdx to node id */ > > int memnode_shift; > > -typeof(*memnodemap) _memnodemap[64]; > > +static typeof(*memnodemap) _memnodemap[64]; > > unsigned long memnodemapsize; > > u8 *memnodemap; > > > > @@ -34,6 +35,8 @@ int num_node_memblks; > > struct node node_memblk_range[NR_NODE_MEMBLKS]; > > nodeid_t memblk_nodeid[NR_NODE_MEMBLKS]; > > > > +bool numa_off; > > + > > /* > > * Given a shift value, try to populate memnodemap[] > > * Returns : > > @@ -191,3 +194,120 @@ void numa_add_cpu(int cpu) > > { > > cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]); > > } > > + > > +/* initialize NODE_DATA given nodeid and start/end */ > > +void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end) > > From an abstract PoV, start and end should be paddr_t. This should be > done on a separate patch though. > Ok. > > +{ > > + unsigned long start_pfn, end_pfn; > > + > > + start_pfn = start >> PAGE_SHIFT; > > + end_pfn = end >> PAGE_SHIFT; > > + > > + NODE_DATA(nodeid)->node_start_pfn = start_pfn; > > + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; > > + > > + node_set_online(nodeid); > > +} > > + > > +void __init numa_init_array(void) > > +{ > > + int rr, i; > > + > > + /* There are unfortunately some poorly designed mainboards around > > + that only connect memory to a single CPU. This breaks the 1:1 > cpu->node > > + mapping. To avoid this fill in the mapping for all possible > > + CPUs, as the number of CPUs is not known yet. > > + We round robin the existing nodes. */ > > + rr = first_node(node_online_map); > > + for ( i = 0; i < nr_cpu_ids; i++ ) > > + { > > + if ( cpu_to_node[i] != NUMA_NO_NODE ) > > + continue; > > + numa_set_node(i, rr); > > + rr = cycle_node(rr, node_online_map); > > + } > > +} > > + > > +#ifdef CONFIG_NUMA_EMU > > +int numa_fake __initdata = 0; > > + > > +/* Numa emulation */ > > +static int __init numa_emulation(u64 start_pfn, u64 end_pfn) > > Here, this should be either "unsigned long" or ideally "mfn_t". > Although, if you use "unsigned long", you will need to... > Do we need a separate patch to do it? > > +{ > > + int i; > > + struct node nodes[MAX_NUMNODES]; > > + u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; > > ... cast "(end_pfn - start_pfn)" to uin64_t or use pfn_to_paddr(). > Ok > > + > > + /* Kludge needed for the hash function */ > > + if ( hweight64(sz) > 1 ) > > + { > > + u64 x = 1; > > + while ( (x << 1) < sz ) > > + x <<= 1; > > + if ( x < sz/2 ) > > + printk(KERN_ERR "Numa emulation unbalanced. Complain to > maintainer\n"); > > + sz = x; > > + } > > + > > + memset(&nodes,0,sizeof(nodes)); > > + for ( i = 0; i < numa_fake; i++ ) > > + { > > + nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; > > + if ( i == numa_fake - 1 ) > > + sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; > > + nodes[i].end = nodes[i].start + sz; > > + printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" > (%"PRIu64"MB)\n", > > + i, > > + nodes[i].start, nodes[i].end, > > + (nodes[i].end - nodes[i].start) >> 20); > > + node_set_online(i); > > + } > > + memnode_shift = compute_hash_shift(nodes, numa_fake, NULL); > > + if ( memnode_shift < 0 ) > > + { > > + memnode_shift = 0; > > + printk(KERN_ERR "No NUMA hash function found. Emulation > disabled.\n"); > > + return -1; > > + } > > + for_each_online_node ( i ) > > + setup_node_bootmem(i, nodes[i].start, nodes[i].end); > > + numa_init_array(); > > + > > + return 0; > > +} > > +#endif > > + > > +void __init numa_initmem_init(unsigned long start_pfn, unsigned long > end_pfn) > > +{ > > + int i; > > + > > +#ifdef CONFIG_NUMA_EMU > > + if ( numa_fake && !numa_emulation(start_pfn, end_pfn) ) > > + return; > > +#endif > > + > > +#ifdef CONFIG_ACPI_NUMA > > + if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT, > > + (u64)end_pfn << PAGE_SHIFT) ) > > (u64)v << PAGE_SHIFT should be switched to use pfn_to_paddr() or > mfn_to_paddr() if you decide to make start_pfn and end_pfn typesafe. > Still need a separate patch to change it before move? > > + return; > > +#endif > > + > > + printk(KERN_INFO "%s\n", > > + numa_off ? "NUMA turned off" : "No NUMA configuration > found"); > > + > > + printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n", > > + (u64)start_pfn << PAGE_SHIFT, > > + (u64)end_pfn << PAGE_SHIFT); > > Same remark here. PRIx64 would also have to be switched to PRIpaddr. > Hmm, It seems I'd better to use a separate patch to do PRIpaddr clean up before move code. > > + /* setup dummy node covering all memory */ > > + memnode_shift = BITS_PER_LONG - 1; > > + memnodemap = _memnodemap; > > + memnodemapsize = ARRAY_SIZE(_memnodemap); > > + > > + nodes_clear(node_online_map); > > + node_set_online(0); > > + for ( i = 0; i < nr_cpu_ids; i++ ) > > + numa_set_node(i, 0); > > + cpumask_copy(&node_to_cpumask[0], cpumask_of(0)); > > + setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT, > > + (u64)end_pfn << PAGE_SHIFT); > > +} > > diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h > > index e8a92ad9df..f8e4e15586 100644 > > --- a/xen/include/asm-x86/numa.h > > +++ b/xen/include/asm-x86/numa.h > > @@ -21,16 +21,11 @@ extern nodeid_t pxm_to_node(unsigned int pxm); > > > > #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) > > > > -extern void numa_init_array(void); > > -extern bool numa_off; > > - > > - > > extern int srat_disabled(void); > > extern void numa_set_node(int cpu, nodeid_t node); > > extern nodeid_t setup_node(unsigned int pxm); > > extern void srat_detect_node(int cpu); > > > > -extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end); > > extern nodeid_t apicid_to_node[]; > > extern void init_cpu_to_node(void); > > > > diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h > > index 24be46115d..63838ba2d1 100644 > > --- a/xen/include/asm-x86/setup.h > > +++ b/xen/include/asm-x86/setup.h > > @@ -17,7 +17,6 @@ void early_time_init(void); > > > > void set_nr_cpu_ids(unsigned int max_cpus); > > > > -void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); > > void arch_init_memory(void); > > void subarch_init_memory(void); > > > > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h > > index 67b79a73a3..258a5cb3db 100644 > > --- a/xen/include/xen/numa.h > > +++ b/xen/include/xen/numa.h > > @@ -26,7 +26,6 @@ > > extern int memnode_shift; > > extern unsigned long memnodemapsize; > > extern u8 *memnodemap; > > -extern typeof(*memnodemap) _memnodemap[64]; > > > > struct node_data { > > unsigned long node_start_pfn; > > @@ -69,6 +68,13 @@ extern int conflicting_memblks(u64 start, u64 end); > > extern void cutoff_node(int i, u64 start, u64 end); > > extern int valid_numa_range(u64 start, u64 end, nodeid_t node); > > > > +extern void numa_init_array(void); > > +extern void numa_initmem_init(unsigned long start_pfn, unsigned long > end_pfn); > > +extern bool numa_off; > > +extern int numa_fake; > > + > > +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end); > > + > > #endif /* CONFIG_NUMA */ > > > > #endif /* _XEN_NUMA_H */ > > > > Cheers, > > -- > Julien Grall
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |