[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v4 02/10] xen/page_alloc: Optimize getting per-NUMA-node free page counts



From: Alejandro Vallejo <alejandro.vallejo@xxxxxxxxx>

Add per-node free page counters (node_avail_pages[]), protected by
heap_lock, updated in real-time in lockstep with total_avail_pages
as pages are allocated and freed.

This replaces the avail_heap_pages() loop over all online nodes and
zones in avail_node_heap_pages() with a direct O(1) array lookup,
making it efficient to get the total free pages for a given NUMA node.

The per-node counts are currently provided using sysctl for NUMA
placement decisions of domain builders and monitoring, and for
debugging with the debug-key 'u' to print NUMA info to the printk buffer.

They will also be used for checking if a NUMA node may be able to
satisfy a NUMA-node-specific allocation by comparing node availability
against node-specific claims before looking for pages in the zones
of the node.

Also change total_avail_pages and outstanding_claims to unsigned long:

Those never become negative (we protect that with ASSERT/BUG_ON already),
and converting them to unsigned long makes that explicit, and also
fixes signed/unsigned comparison warnings.

This only needs moving the ASSERT to before the subtraction.
See the previous commit moving the BUG_ON for outstanding_claims.

This lays the groundwork for implementing per-node claims.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@xxxxxxxxx>
Signed-off-by: Bernhard Kaindl <bernhard.kaindl@xxxxxxxxxx>
---
 xen/common/page_alloc.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6f7f30c64605..2176cb113fe2 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -483,11 +483,32 @@ static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 
 static unsigned long node_need_scrub[MAX_NUMNODES];
 
+/* avail[node][zone] is the number of free pages on that node and zone. */
 static unsigned long *avail[MAX_NUMNODES];
-static long total_avail_pages;
+/* Global available pages, updated in real-time, protected by heap_lock */
+static unsigned long total_avail_pages;
 
+/* The global heap lock, protecting access to the heap and related structures 
*/
 static DEFINE_SPINLOCK(heap_lock);
-static long outstanding_claims; /* total outstanding claims by all domains */
+
+/*
+ * Per-node count of available pages, protected by heap_lock, updated in
+ * lockstep with total_avail_pages as pages are allocated and freed.
+ *
+ * Each entry holds the sum of avail[node][zone] across all zones, used for
+ * efficiently checking node-local availability for allocation requests.
+ * Also provided via sysctl for NUMA placement decisions of domain builders
+ * and monitoring, and logged with debug-key 'u' for NUMA debugging.
+ *
+ * Maintaining this under heap_lock does not reduce scalability, as the
+ * allocator is already serialized on it. The accessor macro abstracts the
+ * storage to ease future changes (e.g. moving to per-node lock granularity).
+ */
+#define node_avail_pages(node) (node_avail_pages[node])
+static unsigned long node_avail_pages[MAX_NUMNODES];
+
+/* total outstanding claims by all domains */
+static unsigned long outstanding_claims;
 
 static unsigned long avail_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
@@ -1072,8 +1093,10 @@ static struct page_info *alloc_heap_pages(
 
     ASSERT(avail[node][zone] >= request);
     avail[node][zone] -= request;
+    ASSERT(node_avail_pages(node) >= request);
+    node_avail_pages(node) -= request;
+    ASSERT(total_avail_pages >= request);
     total_avail_pages -= request;
-    ASSERT(total_avail_pages >= 0);
 
     if ( !(memflags & MEMF_no_refcount) )
         consume_outstanding_claims(d, request);
@@ -1235,8 +1258,10 @@ static int reserve_offlined_page(struct page_info *head)
             continue;
 
         avail[node][zone]--;
+        ASSERT(node_avail_pages(node) > 0);
+        node_avail_pages(node)--;
+        ASSERT(total_avail_pages > 0);
         total_avail_pages--;
-        ASSERT(total_avail_pages >= 0);
 
         page_list_add_tail(cur_head,
                            test_bit(_PGC_broken, &cur_head->count_info) ?
@@ -1559,6 +1584,7 @@ static void free_heap_pages(
     }
 
     avail[node][zone] += 1 << order;
+    node_avail_pages(node) += 1 << order;
     total_avail_pages += 1 << order;
     if ( need_scrub )
     {
@@ -2816,7 +2842,7 @@ unsigned long avail_domheap_pages_region(
 
 unsigned long avail_node_heap_pages(unsigned int nodeid)
 {
-    return avail_heap_pages(MEMZONE_XEN, NR_ZONES -1, nodeid);
+    return node_avail_pages(nodeid);
 }
 
 
-- 
2.39.5




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.