[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] xen/x86: add detection of memory interleaves for different nodes



commit b660e19d0f7f3324d61462fc6d9ef6ee98914f2d
Author:     Wei Chen <wei.chen@xxxxxxx>
AuthorDate: Fri Jun 10 13:53:15 2022 +0800
Commit:     Julien Grall <jgrall@xxxxxxxxxx>
CommitDate: Fri Jun 17 09:36:12 2022 +0100

    xen/x86: add detection of memory interleaves for different nodes
    
    One NUMA node may contain several memory blocks. In current Xen
    code, Xen will maintain a node memory range for each node to cover
    all its memory blocks. But here comes the problem, in the gap of
    one node's two memory blocks, if there are some memory blocks don't
    belong to this node (remote memory blocks). This node's memory range
    will be expanded to cover these remote memory blocks.
    
    One node's memory range contains other nodes' memory, this is
    obviously not very reasonable. This means current NUMA code only
    can support node has no interleaved memory blocks. However, on a
    physical machine, the addresses of multiple nodes can be interleaved.
    
    So in this patch, we add code to detect memory interleaves of
    different nodes. NUMA initialization will be failed and error
    messages will be printed when Xen detect such hardware configuration.
    
    As we have checked the node's range before, for a non-empty node,
    the "nd->end == end && nd->start == start" check is unnecesary.
    So we remove it from conflicting_memblks as well.
    
    Signed-off-by: Wei Chen <wei.chen@xxxxxxx>
    Tested-by: Jiamei Xie <jiamei.xie@xxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/srat.c | 139 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 101 insertions(+), 38 deletions(-)

diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
index 8ffe43bdfe..3d02520a5a 100644
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -42,6 +42,12 @@ static struct node node_memblk_range[NR_NODE_MEMBLKS];
 static nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
 static __initdata DECLARE_BITMAP(memblk_hotplug, NR_NODE_MEMBLKS);
 
+enum conflicts {
+       NO_CONFLICT,
+       OVERLAP,
+       INTERLEAVE,
+};
+
 static inline bool node_found(unsigned idx, unsigned pxm)
 {
        return ((pxm2node[idx].pxm == pxm) &&
@@ -119,20 +125,45 @@ int valid_numa_range(paddr_t start, paddr_t end, nodeid_t 
node)
        return 0;
 }
 
-static __init int conflicting_memblks(paddr_t start, paddr_t end)
+static
+enum conflicts __init conflicting_memblks(nodeid_t nid, paddr_t start,
+                                         paddr_t end, paddr_t nd_start,
+                                         paddr_t nd_end, unsigned int *mblkid)
 {
-       int i;
+       unsigned int i;
 
+       /*
+        * Scan all recorded nodes' memory blocks to check conflicts:
+        * Overlap or interleave.
+        */
        for (i = 0; i < num_node_memblks; i++) {
                struct node *nd = &node_memblk_range[i];
+
+               *mblkid = i;
+
+               /* Skip 0 bytes node memory block. */
                if (nd->start == nd->end)
                        continue;
+               /*
+                * Use memblk range to check memblk overlaps, include the
+                * self-overlap case. As nd's range is non-empty, the special
+                * case "nd->end == end && nd->start == start" also can be 
covered.
+                */
                if (nd->end > start && nd->start < end)
-                       return i;
-               if (nd->end == end && nd->start == start)
-                       return i;
+                       return OVERLAP;
+
+               /*
+                * Use node memory range to check whether new range contains
+                * memory from other nodes - interleave check. We just need
+                * to check full contains situation. Because overlaps have
+                * been checked above.
+                */
+               if (nid != memblk_nodeid[i] &&
+                   nd->start >= nd_start && nd->end <= nd_end)
+                       return INTERLEAVE;
        }
-       return -1;
+
+       return NO_CONFLICT;
 }
 
 static __init void cutoff_node(int i, paddr_t start, paddr_t end)
@@ -275,10 +306,12 @@ acpi_numa_processor_affinity_init(const struct 
acpi_srat_cpu_affinity *pa)
 void __init
 acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
 {
+       struct node *nd;
+       paddr_t nd_start, nd_end;
        paddr_t start, end;
        unsigned pxm;
        nodeid_t node;
-       int i;
+       unsigned int i;
 
        if (srat_disabled())
                return;
@@ -310,44 +343,74 @@ acpi_numa_memory_affinity_init(const struct 
acpi_srat_mem_affinity *ma)
                bad_srat();
                return;
        }
+
+       /*
+        * For the node that already has some memory blocks, we will
+        * expand the node memory range temporarily to check memory
+        * interleaves with other nodes. We will not use this node
+        * temp memory range to check overlaps, because it will mask
+        * the overlaps in same node.
+        *
+        * Node with 0 bytes memory doesn't need this expandsion.
+        */
+       nd_start = start;
+       nd_end = end;
+       nd = &nodes[node];
+       if (nd->start != nd->end) {
+               if (nd_start > nd->start)
+                       nd_start = nd->start;
+
+               if (nd_end < nd->end)
+                       nd_end = nd->end;
+       }
+
        /* It is fine to add this area to the nodes data it will be used later*/
-       i = conflicting_memblks(start, end);
-       if (i < 0)
-               /* everything fine */;
-       else if (memblk_nodeid[i] == node) {
-               bool mismatch = !(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
-                               !test_bit(i, memblk_hotplug);
-
-               printk("%sSRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with 
itself (%"PRIpaddr"-%"PRIpaddr")\n",
-                      mismatch ? KERN_ERR : KERN_WARNING, pxm, start, end,
-                      node_memblk_range[i].start, node_memblk_range[i].end);
-               if (mismatch) {
-                       bad_srat();
-                       return;
+       switch (conflicting_memblks(node, start, end, nd_start, nd_end, &i)) {
+       case OVERLAP:
+               if (memblk_nodeid[i] == node) {
+                       bool mismatch = !(ma->flags &
+                                         ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
+                                       !test_bit(i, memblk_hotplug);
+
+                       printk("%sSRAT: PXM %u [%"PRIpaddr", %"PRIpaddr"] 
overlaps with itself [%"PRIpaddr", %"PRIpaddr"]\n",
+                              mismatch ? KERN_ERR : KERN_WARNING, pxm, start,
+                              end - 1, node_memblk_range[i].start,
+                              node_memblk_range[i].end - 1);
+                       if (mismatch) {
+                               bad_srat();
+                               return;
+                       }
+                       break;
                }
-       } else {
+
+               printk(KERN_ERR
+                      "SRAT: PXM %u [%"PRIpaddr", %"PRIpaddr"] overlaps with 
PXM %u [%"PRIpaddr", %"PRIpaddr"]\n",
+                      pxm, start, end - 1, node_to_pxm(memblk_nodeid[i]),
+                      node_memblk_range[i].start,
+                      node_memblk_range[i].end - 1);
+               bad_srat();
+               return;
+
+       case INTERLEAVE:
                printk(KERN_ERR
-                      "SRAT: PXM %u (%"PRIpaddr"-%"PRIpaddr") overlaps with 
PXM %u (%"PRIpaddr"-%"PRIpaddr")\n",
-                      pxm, start, end, node_to_pxm(memblk_nodeid[i]),
-                      node_memblk_range[i].start, node_memblk_range[i].end);
+                      "SRATï¼? PXM %u: [%"PRIpaddr", %"PRIpaddr"] interleaves 
with PXM %u memblk [%"PRIpaddr", %"PRIpaddr"]\n",
+                      pxm, nd_start, nd_end - 1, node_to_pxm(memblk_nodeid[i]),
+                      node_memblk_range[i].start, node_memblk_range[i].end - 
1);
                bad_srat();
                return;
+
+       case NO_CONFLICT:
+               break;
        }
+
        if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
-               struct node *nd = &nodes[node];
-
-               if (!node_test_and_set(node, memory_nodes_parsed)) {
-                       nd->start = start;
-                       nd->end = end;
-               } else {
-                       if (start < nd->start)
-                               nd->start = start;
-                       if (nd->end < end)
-                               nd->end = end;
-               }
+               node_set(node, memory_nodes_parsed);
+               nd->start = nd_start;
+               nd->end = nd_end;
        }
-       printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",
-              node, pxm, start, end,
+
+       printk(KERN_INFO "SRAT: Node %u PXM %u [%"PRIpaddr", %"PRIpaddr"]%s\n",
+              node, pxm, start, end - 1,
               ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
 
        node_memblk_range[num_node_memblks].start = start;
@@ -396,7 +459,7 @@ static int __init nodes_cover_memory(void)
 
                if (start < end) {
                        printk(KERN_ERR "SRAT: No PXM for e820 range: "
-                               "%"PRIpaddr" - %"PRIpaddr"\n", start, end);
+                               "[%"PRIpaddr", %"PRIpaddr"]\n", start, end - 1);
                        return 0;
                }
        }
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.