|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v4 02/10] xen/page_alloc: Optimize getting per-NUMA-node free page counts
From: Alejandro Vallejo <alejandro.vallejo@xxxxxxxxx>
Add per-node free page counters (node_avail_pages[]), protected by
heap_lock, updated in real-time in lockstep with total_avail_pages
as pages are allocated and freed.
This replaces the avail_heap_pages() loop over all online nodes and
zones in avail_node_heap_pages() with a direct O(1) array lookup,
making it efficient to get the total free pages for a given NUMA node.
The per-node counts are currently provided using sysctl for NUMA
placement decisions of domain builders and monitoring, and for
debugging with the debug-key 'u' to print NUMA info to the printk buffer.
They will also be used for checking if a NUMA node may be able to
satisfy a NUMA-node-specific allocation by comparing node availability
against node-specific claims before looking for pages in the zones
of the node.
Also change total_avail_pages and outstanding_claims to unsigned long:
Those never become negative (we protect that with ASSERT/BUG_ON already),
and converting them to unsigned long makes that explicit, and also
fixes signed/unsigned comparison warnings.
This only needs moving the ASSERT to before the subtraction.
See the previous commit moving the BUG_ON for outstanding_claims.
This lays the groundwork for implementing per-node claims.
Signed-off-by: Alejandro Vallejo <alejandro.vallejo@xxxxxxxxx>
Signed-off-by: Bernhard Kaindl <bernhard.kaindl@xxxxxxxxxx>
---
xen/common/page_alloc.c | 36 +++++++++++++++++++++++++++++++-----
1 file changed, 31 insertions(+), 5 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6f7f30c64605..2176cb113fe2 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -483,11 +483,32 @@ static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
static unsigned long node_need_scrub[MAX_NUMNODES];
+/* avail[node][zone] is the number of free pages on that node and zone. */
static unsigned long *avail[MAX_NUMNODES];
-static long total_avail_pages;
+/* Global available pages, updated in real-time, protected by heap_lock */
+static unsigned long total_avail_pages;
+/* The global heap lock, protecting access to the heap and related structures
*/
static DEFINE_SPINLOCK(heap_lock);
-static long outstanding_claims; /* total outstanding claims by all domains */
+
+/*
+ * Per-node count of available pages, protected by heap_lock, updated in
+ * lockstep with total_avail_pages as pages are allocated and freed.
+ *
+ * Each entry holds the sum of avail[node][zone] across all zones, used for
+ * efficiently checking node-local availability for allocation requests.
+ * Also provided via sysctl for NUMA placement decisions of domain builders
+ * and monitoring, and logged with debug-key 'u' for NUMA debugging.
+ *
+ * Maintaining this under heap_lock does not reduce scalability, as the
+ * allocator is already serialized on it. The accessor macro abstracts the
+ * storage to ease future changes (e.g. moving to per-node lock granularity).
+ */
+#define node_avail_pages(node) (node_avail_pages[node])
+static unsigned long node_avail_pages[MAX_NUMNODES];
+
+/* total outstanding claims by all domains */
+static unsigned long outstanding_claims;
static unsigned long avail_heap_pages(
unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
@@ -1072,8 +1093,10 @@ static struct page_info *alloc_heap_pages(
ASSERT(avail[node][zone] >= request);
avail[node][zone] -= request;
+ ASSERT(node_avail_pages(node) >= request);
+ node_avail_pages(node) -= request;
+ ASSERT(total_avail_pages >= request);
total_avail_pages -= request;
- ASSERT(total_avail_pages >= 0);
if ( !(memflags & MEMF_no_refcount) )
consume_outstanding_claims(d, request);
@@ -1235,8 +1258,10 @@ static int reserve_offlined_page(struct page_info *head)
continue;
avail[node][zone]--;
+ ASSERT(node_avail_pages(node) > 0);
+ node_avail_pages(node)--;
+ ASSERT(total_avail_pages > 0);
total_avail_pages--;
- ASSERT(total_avail_pages >= 0);
page_list_add_tail(cur_head,
test_bit(_PGC_broken, &cur_head->count_info) ?
@@ -1559,6 +1584,7 @@ static void free_heap_pages(
}
avail[node][zone] += 1 << order;
+ node_avail_pages(node) += 1 << order;
total_avail_pages += 1 << order;
if ( need_scrub )
{
@@ -2816,7 +2842,7 @@ unsigned long avail_domheap_pages_region(
unsigned long avail_node_heap_pages(unsigned int nodeid)
{
- return avail_heap_pages(MEMZONE_XEN, NR_ZONES -1, nodeid);
+ return node_avail_pages(nodeid);
}
--
2.39.5
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |