[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map functions to common



Hi Wei,

On 11/08/2021 11:23, Wei Chen wrote:
In the later patches we will add NUMA support to Arm. Arm
NUMA support will follow current memory node map management
as x86. So this part of code can be common, in this case,
we move this part of code from arch/x86 to common.

I would add "No functional changes intended" to make clear this patch is only moving code.


Signed-off-by: Wei Chen <wei.chen@xxxxxxx>
---
  xen/arch/x86/numa.c        | 114 --------------------------------
  xen/common/Makefile        |   1 +
  xen/common/numa.c          | 131 +++++++++++++++++++++++++++++++++++++
  xen/include/asm-x86/numa.h |  29 --------
  xen/include/xen/numa.h     |  35 ++++++++++
  5 files changed, 167 insertions(+), 143 deletions(-)
  create mode 100644 xen/common/numa.c

diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index d23f4f7919..a6211be121 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -29,14 +29,6 @@ custom_param("numa", numa_setup);
  /* from proto.h */
  #define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
-struct node_data node_data[MAX_NUMNODES];
-
-/* Mapping from pdx to node id */
-int memnode_shift;
-static typeof(*memnodemap) _memnodemap[64];
-unsigned long memnodemapsize;
-u8 *memnodemap;
-
  nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
      [0 ... NR_CPUS-1] = NUMA_NO_NODE
  };
@@ -58,112 +50,6 @@ int srat_disabled(void)
      return numa_off || acpi_numa < 0;
  }
-/*
- * Given a shift value, try to populate memnodemap[]
- * Returns :
- * 1 if OK
- * 0 if memnodmap[] too small (of shift too small)
- * -1 if node overlap or lost ram (shift too big)
- */
-static int __init populate_memnodemap(const struct node *nodes,
-                                      int numnodes, int shift, nodeid_t 
*nodeids)
-{
-    unsigned long spdx, epdx;
-    int i, res = -1;
-
-    memset(memnodemap, NUMA_NO_NODE, memnodemapsize * sizeof(*memnodemap));
-    for ( i = 0; i < numnodes; i++ )
-    {
-        spdx = paddr_to_pdx(nodes[i].start);
-        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
-        if ( spdx >= epdx )
-            continue;
-        if ( (epdx >> shift) >= memnodemapsize )
-            return 0;
-        do {
-            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
-                return -1;
-
-            if ( !nodeids )
-                memnodemap[spdx >> shift] = i;
-            else
-                memnodemap[spdx >> shift] = nodeids[i];
-
-            spdx += (1UL << shift);
-        } while ( spdx < epdx );
-        res = 1;
-    }
-
-    return res;
-}
-
-static int __init allocate_cachealigned_memnodemap(void)
-{
-    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
-    unsigned long mfn = mfn_x(alloc_boot_pages(size, 1));
-
-    memnodemap = mfn_to_virt(mfn);
-    mfn <<= PAGE_SHIFT;
-    size <<= PAGE_SHIFT;
-    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
-           mfn, mfn + size);
-    memnodemapsize = size / sizeof(*memnodemap);
-
-    return 0;
-}
-
-/*
- * The LSB of all start and end addresses in the node map is the value of the
- * maximum possible shift.
- */
-static int __init extract_lsb_from_nodes(const struct node *nodes,
-                                         int numnodes)
-{
-    int i, nodes_used = 0;
-    unsigned long spdx, epdx;
-    unsigned long bitfield = 0, memtop = 0;
-
-    for ( i = 0; i < numnodes; i++ )
-    {
-        spdx = paddr_to_pdx(nodes[i].start);
-        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
-        if ( spdx >= epdx )
-            continue;
-        bitfield |= spdx;
-        nodes_used++;
-        if ( epdx > memtop )
-            memtop = epdx;
-    }
-    if ( nodes_used <= 1 )
-        i = BITS_PER_LONG - 1;
-    else
-        i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
-    memnodemapsize = (memtop >> i) + 1;
-    return i;
-}
-
-int __init compute_hash_shift(struct node *nodes, int numnodes,
-                              nodeid_t *nodeids)
-{
-    int shift;
-
-    shift = extract_lsb_from_nodes(nodes, numnodes);
-    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
-        memnodemap = _memnodemap;
-    else if ( allocate_cachealigned_memnodemap() )
-        return -1;
-    printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
-
-    if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
-    {
-        printk(KERN_INFO "Your memory is not aligned you need to "
-               "rebuild your hypervisor with a bigger NODEMAPSIZE "
-               "shift=%d\n", shift);
-        return -1;
-    }
-
-    return shift;
-}
  /* initialize NODE_DATA given nodeid and start/end */
  void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
  {
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 54de70d422..f8f667e90a 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -54,6 +54,7 @@ obj-y += wait.o
  obj-bin-y += warning.init.o
  obj-$(CONFIG_XENOPROF) += xenoprof.o
  obj-y += xmalloc_tlsf.o
+obj-$(CONFIG_NUMA) += numa.o

AFAICT, the Makefile is listing the file in alphabetical order. So please add numa.o in the correct position.

obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma lzo unlzo unlz4 unzstd earlycpio,$(n).init.o) diff --git a/xen/common/numa.c b/xen/common/numa.c
new file mode 100644
index 0000000000..e65b6a6676
--- /dev/null
+++ b/xen/common/numa.c
@@ -0,0 +1,131 @@
+/*
+ * Generic VM initialization for x86-64 NUMA setups.
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Adapted for Xen: Ryan Harper <ryanh@xxxxxxxxxx>
+ */
+
+#include <xen/mm.h>
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/ctype.h>

You don't seem to use any helpers./types directly defined by at least this header...

+#include <xen/nodemask.h>
+#include <xen/numa.h>
+#include <xen/time.h>

... this one and ...

+#include <xen/smp.h>

... this one. Can you check the list of headers and introduce the minimum? If the dependency is required by another headers, then I think that dependency should be moved in the header requiring it.

+#include <xen/pfn.h>
+#include <xen/sched.h>

Please sort the includes in alphabetical order.

+
+struct node_data node_data[MAX_NUMNODES];
+
+/* Mapping from pdx to node id */
+int memnode_shift;
+typeof(*memnodemap) _memnodemap[64];
+unsigned long memnodemapsize;
+u8 *memnodemap;
+
+/*
+ * Given a shift value, try to populate memnodemap[]
+ * Returns :
+ * 1 if OK
+ * 0 if memnodmap[] too small (of shift too small)
+ * -1 if node overlap or lost ram (shift too big)
+ */
+static int __init populate_memnodemap(const struct node *nodes,
+                                      int numnodes, int shift, nodeid_t 
*nodeids)
+{
+    unsigned long spdx, epdx;
+    int i, res = -1;
+
+    memset(memnodemap, NUMA_NO_NODE, memnodemapsize * sizeof(*memnodemap));
+    for ( i = 0; i < numnodes; i++ )
+    {
+        spdx = paddr_to_pdx(nodes[i].start);
+        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
+        if ( spdx >= epdx )
+            continue;
+        if ( (epdx >> shift) >= memnodemapsize )
+            return 0;
+        do {
+            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
+                return -1;
+
+            if ( !nodeids )
+                memnodemap[spdx >> shift] = i;
+            else
+                memnodemap[spdx >> shift] = nodeids[i];
+
+            spdx += (1UL << shift);
+        } while ( spdx < epdx );
+        res = 1;
+    }
+
+    return res;
+}
+
+static int __init allocate_cachealigned_memnodemap(void)
+{
+    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
+    unsigned long mfn = mfn_x(alloc_boot_pages(size, 1));
+
+    memnodemap = mfn_to_virt(mfn);
+    mfn <<= PAGE_SHIFT;
+    size <<= PAGE_SHIFT;
+    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
+           mfn, mfn + size);
+    memnodemapsize = size / sizeof(*memnodemap);
+
+    return 0;
+}
+
+/*
+ * The LSB of all start and end addresses in the node map is the value of the
+ * maximum possible shift.
+ */
+static int __init extract_lsb_from_nodes(const struct node *nodes,
+                                         int numnodes)
+{
+    int i, nodes_used = 0;
+    unsigned long spdx, epdx;
+    unsigned long bitfield = 0, memtop = 0;
+
+    for ( i = 0; i < numnodes; i++ )
+    {
+        spdx = paddr_to_pdx(nodes[i].start);
+        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
+        if ( spdx >= epdx )
+            continue;
+        bitfield |= spdx;
+        nodes_used++;
+        if ( epdx > memtop )
+            memtop = epdx;
+    }
+    if ( nodes_used <= 1 )
+        i = BITS_PER_LONG - 1;
+    else
+        i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
+    memnodemapsize = (memtop >> i) + 1;
+    return i;
+}
+
+int __init compute_hash_shift(struct node *nodes, int numnodes,
+                              nodeid_t *nodeids)
+{
+    int shift;
+
+    shift = extract_lsb_from_nodes(nodes, numnodes);
+    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
+        memnodemap = _memnodemap;
+    else if ( allocate_cachealigned_memnodemap() )
+        return -1;
+    printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
+
+    if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
+    {
+        printk(KERN_INFO "Your memory is not aligned you need to "
+               "rebuild your hypervisor with a bigger NODEMAPSIZE "
+               "shift=%d\n", shift);
+        return -1;
+    }
+
+    return shift;
+}
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index bada2c0bb9..abe5617d01 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -26,7 +26,6 @@ extern int compute_hash_shift(struct node *nodes, int 
numnodes,
  extern nodeid_t pxm_to_node(unsigned int pxm);
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-#define VIRTUAL_BUG_ON(x)
extern void numa_add_cpu(int cpu);
  extern void numa_init_array(void);
@@ -47,34 +46,6 @@ static inline void clear_node_cpumask(int cpu)
        cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
  }
-/* Simple perfect hash to map pdx to node numbers */
-extern int memnode_shift;
-extern unsigned long memnodemapsize;
-extern u8 *memnodemap;
-
-struct node_data {
-    unsigned long node_start_pfn;
-    unsigned long node_spanned_pages;
-};
-
-extern struct node_data node_data[];
-
-static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
-{
-       nodeid_t nid;
-       VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= memnodemapsize);
-       nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
-       VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
-       return nid;
-}
-
-#define NODE_DATA(nid)         (&(node_data[nid]))
-
-#define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
-#define node_spanned_pages(nid)        (NODE_DATA(nid)->node_spanned_pages)
-#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
-                                NODE_DATA(nid)->node_spanned_pages)
-
  extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
void srat_parse_regions(u64 addr);
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 7aef1a88dc..39e8a4e00a 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -18,4 +18,39 @@
    (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \
     ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
+/* The following content can be used when NUMA feature is enabled */
+#if defined(CONFIG_NUMA)

Please use #ifdef CONFIG_NUMA

+
+/* Simple perfect hash to map pdx to node numbers */
+extern int memnode_shift;
+extern unsigned long memnodemapsize;
+extern u8 *memnodemap;
+extern typeof(*memnodemap) _memnodemap[64];

AFAICT, this will be turned static against in a follow-up patch. Can this be avoided?

+
+struct node_data {
+    unsigned long node_start_pfn;
+    unsigned long node_spanned_pages;
+};
+
+extern struct node_data node_data[];
+#define VIRTUAL_BUG_ON(x)
+
+static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
+{
+       nodeid_t nid;
+       VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= memnodemapsize);
+       nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
+       VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
+       return nid;
+}
+
+#define NODE_DATA(nid)         (&(node_data[nid]))
+
+#define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
+#define node_spanned_pages(nid)        (NODE_DATA(nid)->node_spanned_pages)
+#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
+                                NODE_DATA(nid)->node_spanned_pages)
+
+#endif /* CONFIG_NUMA */
+
  #endif /* _XEN_NUMA_H */


Cheers,

--
Julien Grall



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.