[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map functions to common
Hi Julien, > -----Original Message----- > From: Julien Grall <julien@xxxxxxx> > Sent: 2021年8月24日 1:47 > To: Wei Chen <Wei.Chen@xxxxxxx>; xen-devel@xxxxxxxxxxxxxxxxxxxx; > sstabellini@xxxxxxxxxx; jbeulich@xxxxxxxx > Cc: Bertrand Marquis <Bertrand.Marquis@xxxxxxx> > Subject: Re: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map > functions to common > > Hi Wei, > > On 11/08/2021 11:23, Wei Chen wrote: > > In the later patches we will add NUMA support to Arm. Arm > > NUMA support will follow current memory node map management > > as x86. So this part of code can be common, in this case, > > we move this part of code from arch/x86 to common. > > I would add "No functional changes intended" to make clear this patch is > only moving code. Ok, I will do it. > > > > > Signed-off-by: Wei Chen <wei.chen@xxxxxxx> > > --- > > xen/arch/x86/numa.c | 114 -------------------------------- > > xen/common/Makefile | 1 + > > xen/common/numa.c | 131 +++++++++++++++++++++++++++++++++++++ > > xen/include/asm-x86/numa.h | 29 -------- > > xen/include/xen/numa.h | 35 ++++++++++ > > 5 files changed, 167 insertions(+), 143 deletions(-) > > create mode 100644 xen/common/numa.c > > > > diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c > > index d23f4f7919..a6211be121 100644 > > --- a/xen/arch/x86/numa.c > > +++ b/xen/arch/x86/numa.c > > @@ -29,14 +29,6 @@ custom_param("numa", numa_setup); > > /* from proto.h */ > > #define round_up(x,y) ((((x)+(y))-1) & (~((y)-1))) > > > > -struct node_data node_data[MAX_NUMNODES]; > > - > > -/* Mapping from pdx to node id */ > > -int memnode_shift; > > -static typeof(*memnodemap) _memnodemap[64]; > > -unsigned long memnodemapsize; > > -u8 *memnodemap; > > - > > nodeid_t cpu_to_node[NR_CPUS] __read_mostly = { > > [0 ... NR_CPUS-1] = NUMA_NO_NODE > > }; > > @@ -58,112 +50,6 @@ int srat_disabled(void) > > return numa_off || acpi_numa < 0; > > } > > > > -/* > > - * Given a shift value, try to populate memnodemap[] > > - * Returns : > > - * 1 if OK > > - * 0 if memnodmap[] too small (of shift too small) > > - * -1 if node overlap or lost ram (shift too big) > > - */ > > -static int __init populate_memnodemap(const struct node *nodes, > > - int numnodes, int shift, nodeid_t > *nodeids) > > -{ > > - unsigned long spdx, epdx; > > - int i, res = -1; > > - > > - memset(memnodemap, NUMA_NO_NODE, memnodemapsize * > sizeof(*memnodemap)); > > - for ( i = 0; i < numnodes; i++ ) > > - { > > - spdx = paddr_to_pdx(nodes[i].start); > > - epdx = paddr_to_pdx(nodes[i].end - 1) + 1; > > - if ( spdx >= epdx ) > > - continue; > > - if ( (epdx >> shift) >= memnodemapsize ) > > - return 0; > > - do { > > - if ( memnodemap[spdx >> shift] != NUMA_NO_NODE ) > > - return -1; > > - > > - if ( !nodeids ) > > - memnodemap[spdx >> shift] = i; > > - else > > - memnodemap[spdx >> shift] = nodeids[i]; > > - > > - spdx += (1UL << shift); > > - } while ( spdx < epdx ); > > - res = 1; > > - } > > - > > - return res; > > -} > > - > > -static int __init allocate_cachealigned_memnodemap(void) > > -{ > > - unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap)); > > - unsigned long mfn = mfn_x(alloc_boot_pages(size, 1)); > > - > > - memnodemap = mfn_to_virt(mfn); > > - mfn <<= PAGE_SHIFT; > > - size <<= PAGE_SHIFT; > > - printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", > > - mfn, mfn + size); > > - memnodemapsize = size / sizeof(*memnodemap); > > - > > - return 0; > > -} > > - > > -/* > > - * The LSB of all start and end addresses in the node map is the value > of the > > - * maximum possible shift. > > - */ > > -static int __init extract_lsb_from_nodes(const struct node *nodes, > > - int numnodes) > > -{ > > - int i, nodes_used = 0; > > - unsigned long spdx, epdx; > > - unsigned long bitfield = 0, memtop = 0; > > - > > - for ( i = 0; i < numnodes; i++ ) > > - { > > - spdx = paddr_to_pdx(nodes[i].start); > > - epdx = paddr_to_pdx(nodes[i].end - 1) + 1; > > - if ( spdx >= epdx ) > > - continue; > > - bitfield |= spdx; > > - nodes_used++; > > - if ( epdx > memtop ) > > - memtop = epdx; > > - } > > - if ( nodes_used <= 1 ) > > - i = BITS_PER_LONG - 1; > > - else > > - i = find_first_bit(&bitfield, sizeof(unsigned long)*8); > > - memnodemapsize = (memtop >> i) + 1; > > - return i; > > -} > > - > > -int __init compute_hash_shift(struct node *nodes, int numnodes, > > - nodeid_t *nodeids) > > -{ > > - int shift; > > - > > - shift = extract_lsb_from_nodes(nodes, numnodes); > > - if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) ) > > - memnodemap = _memnodemap; > > - else if ( allocate_cachealigned_memnodemap() ) > > - return -1; > > - printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); > > - > > - if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 ) > > - { > > - printk(KERN_INFO "Your memory is not aligned you need to " > > - "rebuild your hypervisor with a bigger NODEMAPSIZE " > > - "shift=%d\n", shift); > > - return -1; > > - } > > - > > - return shift; > > -} > > /* initialize NODE_DATA given nodeid and start/end */ > > void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end) > > { > > diff --git a/xen/common/Makefile b/xen/common/Makefile > > index 54de70d422..f8f667e90a 100644 > > --- a/xen/common/Makefile > > +++ b/xen/common/Makefile > > @@ -54,6 +54,7 @@ obj-y += wait.o > > obj-bin-y += warning.init.o > > obj-$(CONFIG_XENOPROF) += xenoprof.o > > obj-y += xmalloc_tlsf.o > > +obj-$(CONFIG_NUMA) += numa.o > > AFAICT, the Makefile is listing the file in alphabetical order. So > please add numa.o in the correct position. > Thanks for the reminder, I will fix it. > > > > obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma > lzo unlzo unlz4 unzstd earlycpio,$(n).init.o) > > > > diff --git a/xen/common/numa.c b/xen/common/numa.c > > new file mode 100644 > > index 0000000000..e65b6a6676 > > --- /dev/null > > +++ b/xen/common/numa.c > > @@ -0,0 +1,131 @@ > > +/* > > + * Generic VM initialization for x86-64 NUMA setups. > > + * Copyright 2002,2003 Andi Kleen, SuSE Labs. > > + * Adapted for Xen: Ryan Harper <ryanh@xxxxxxxxxx> > > + */ > > + > > +#include <xen/mm.h> > > +#include <xen/string.h> > > +#include <xen/init.h> > > +#include <xen/ctype.h> > > You don't seem to use any helpers./types directly defined by at least > this header... > > > +#include <xen/nodemask.h> > > +#include <xen/numa.h> > > +#include <xen/time.h> > > ... this one and ... > > > +#include <xen/smp.h> > > ... this one. Can you check the list of headers and introduce the > minimum? If the dependency is required by another headers, then I think > that dependency should be moved in the header requiring it. > I will check it in next version. If it isn't needed, I will remove it. > > +#include <xen/pfn.h> > > +#include <xen/sched.h> > > Please sort the includes in alphabetical order. > OK > > + > > +struct node_data node_data[MAX_NUMNODES]; > > + > > +/* Mapping from pdx to node id */ > > +int memnode_shift; > > +typeof(*memnodemap) _memnodemap[64]; > > +unsigned long memnodemapsize; > > +u8 *memnodemap; > > + > > +/* > > + * Given a shift value, try to populate memnodemap[] > > + * Returns : > > + * 1 if OK > > + * 0 if memnodmap[] too small (of shift too small) > > + * -1 if node overlap or lost ram (shift too big) > > + */ > > +static int __init populate_memnodemap(const struct node *nodes, > > + int numnodes, int shift, nodeid_t > *nodeids) > > +{ > > + unsigned long spdx, epdx; > > + int i, res = -1; > > + > > + memset(memnodemap, NUMA_NO_NODE, memnodemapsize * > sizeof(*memnodemap)); > > + for ( i = 0; i < numnodes; i++ ) > > + { > > + spdx = paddr_to_pdx(nodes[i].start); > > + epdx = paddr_to_pdx(nodes[i].end - 1) + 1; > > + if ( spdx >= epdx ) > > + continue; > > + if ( (epdx >> shift) >= memnodemapsize ) > > + return 0; > > + do { > > + if ( memnodemap[spdx >> shift] != NUMA_NO_NODE ) > > + return -1; > > + > > + if ( !nodeids ) > > + memnodemap[spdx >> shift] = i; > > + else > > + memnodemap[spdx >> shift] = nodeids[i]; > > + > > + spdx += (1UL << shift); > > + } while ( spdx < epdx ); > > + res = 1; > > + } > > + > > + return res; > > +} > > + > > +static int __init allocate_cachealigned_memnodemap(void) > > +{ > > + unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap)); > > + unsigned long mfn = mfn_x(alloc_boot_pages(size, 1)); > > + > > + memnodemap = mfn_to_virt(mfn); > > + mfn <<= PAGE_SHIFT; > > + size <<= PAGE_SHIFT; > > + printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", > > + mfn, mfn + size); > > + memnodemapsize = size / sizeof(*memnodemap); > > + > > + return 0; > > +} > > + > > +/* > > + * The LSB of all start and end addresses in the node map is the value > of the > > + * maximum possible shift. > > + */ > > +static int __init extract_lsb_from_nodes(const struct node *nodes, > > + int numnodes) > > +{ > > + int i, nodes_used = 0; > > + unsigned long spdx, epdx; > > + unsigned long bitfield = 0, memtop = 0; > > + > > + for ( i = 0; i < numnodes; i++ ) > > + { > > + spdx = paddr_to_pdx(nodes[i].start); > > + epdx = paddr_to_pdx(nodes[i].end - 1) + 1; > > + if ( spdx >= epdx ) > > + continue; > > + bitfield |= spdx; > > + nodes_used++; > > + if ( epdx > memtop ) > > + memtop = epdx; > > + } > > + if ( nodes_used <= 1 ) > > + i = BITS_PER_LONG - 1; > > + else > > + i = find_first_bit(&bitfield, sizeof(unsigned long)*8); > > + memnodemapsize = (memtop >> i) + 1; > > + return i; > > +} > > + > > +int __init compute_hash_shift(struct node *nodes, int numnodes, > > + nodeid_t *nodeids) > > +{ > > + int shift; > > + > > + shift = extract_lsb_from_nodes(nodes, numnodes); > > + if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) ) > > + memnodemap = _memnodemap; > > + else if ( allocate_cachealigned_memnodemap() ) > > + return -1; > > + printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); > > + > > + if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 ) > > + { > > + printk(KERN_INFO "Your memory is not aligned you need to " > > + "rebuild your hypervisor with a bigger NODEMAPSIZE " > > + "shift=%d\n", shift); > > + return -1; > > + } > > + > > + return shift; > > +} > > diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h > > index bada2c0bb9..abe5617d01 100644 > > --- a/xen/include/asm-x86/numa.h > > +++ b/xen/include/asm-x86/numa.h > > @@ -26,7 +26,6 @@ extern int compute_hash_shift(struct node *nodes, int > numnodes, > > extern nodeid_t pxm_to_node(unsigned int pxm); > > > > #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) > > -#define VIRTUAL_BUG_ON(x) > > > > extern void numa_add_cpu(int cpu); > > extern void numa_init_array(void); > > @@ -47,34 +46,6 @@ static inline void clear_node_cpumask(int cpu) > > cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]); > > } > > > > -/* Simple perfect hash to map pdx to node numbers */ > > -extern int memnode_shift; > > -extern unsigned long memnodemapsize; > > -extern u8 *memnodemap; > > - > > -struct node_data { > > - unsigned long node_start_pfn; > > - unsigned long node_spanned_pages; > > -}; > > - > > -extern struct node_data node_data[]; > > - > > -static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr) > > -{ > > - nodeid_t nid; > > - VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= > memnodemapsize); > > - nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift]; > > - VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); > > - return nid; > > -} > > - > > -#define NODE_DATA(nid) (&(node_data[nid])) > > - > > -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) > > -#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) > > -#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ > > - NODE_DATA(nid)->node_spanned_pages) > > - > > extern int valid_numa_range(u64 start, u64 end, nodeid_t node); > > > > void srat_parse_regions(u64 addr); > > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h > > index 7aef1a88dc..39e8a4e00a 100644 > > --- a/xen/include/xen/numa.h > > +++ b/xen/include/xen/numa.h > > @@ -18,4 +18,39 @@ > > (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \ > > ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE) > > > > +/* The following content can be used when NUMA feature is enabled */ > > +#if defined(CONFIG_NUMA) > > Please use #ifdef CONFIG_NUMA > > > + > > +/* Simple perfect hash to map pdx to node numbers */ > > +extern int memnode_shift; > > +extern unsigned long memnodemapsize; > > +extern u8 *memnodemap; > > +extern typeof(*memnodemap) _memnodemap[64]; > > AFAICT, this will be turned static against in a follow-up patch. Can > this be avoided? > I will try it in next version. > > + > > +struct node_data { > > + unsigned long node_start_pfn; > > + unsigned long node_spanned_pages; > > +}; > > + > > +extern struct node_data node_data[]; > > +#define VIRTUAL_BUG_ON(x) > > + > > +static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr) > > +{ > > + nodeid_t nid; > > + VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= > memnodemapsize); > > + nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift]; > > + VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); > > + return nid; > > +} > > + > > +#define NODE_DATA(nid) (&(node_data[nid])) > > + > > +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) > > +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) > > +#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ > > + NODE_DATA(nid)->node_spanned_pages) > > + > > +#endif /* CONFIG_NUMA */ > > + > > #endif /* _XEN_NUMA_H */ > > > > Cheers, > > -- > Julien Grall
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |