[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCHES v8 1/8] mm: Place unscrubbed pages at the end of pagelist



.. so that it's easy to find pages that need to be scrubbed (those pages are
now marked with _PGC_need_scrub bit).

We keep track of the first unscrubbed page in a page buddy using first_dirty
field. For now it can have two values, 0 (whole buddy needs scrubbing) or
INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent patches
will allow scrubbing to be interrupted, resulting in first_dirty taking any
value.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
Changes in v8:
* Changed x86's definition of page_info.u.free from using bitfields to natural
  datatypes
* Swapped order of bitfields in page_info.u.free for ARM
* Added BUILD_BUG_ON to check page_info.u.free.first_dirty size on x86, moved
  previously defined BUILD_BUG_ON from init_heap_pages() to init_boot_pages()
  (to avoid introducing extra '#ifdef x86' and to keep both together)

 xen/common/page_alloc.c  | 159 ++++++++++++++++++++++++++++++++++++++++-------
 xen/include/asm-arm/mm.h |  17 ++++-
 xen/include/asm-x86/mm.h |  15 +++++
 3 files changed, 167 insertions(+), 24 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 444ecf3..a39fd81 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -261,7 +261,11 @@ void __init init_boot_pages(paddr_t ps, paddr_t pe)
 #ifdef CONFIG_X86
     const unsigned long *badpage = NULL;
     unsigned int i, array_size;
+
+    BUILD_BUG_ON(8 * sizeof(((struct page_info *)0)->u.free.first_dirty) <
+                 MAX_ORDER + 1);
 #endif
+    BUILD_BUG_ON(sizeof(((struct page_info *)0)->u) != sizeof(unsigned long));
 
     ps = round_pgup(ps);
     pe = round_pgdown(pe);
@@ -375,6 +379,8 @@ typedef struct page_list_head 
heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 #define heap(node, zone, order) ((*_heap[node])[zone][order])
 
+static unsigned long node_need_scrub[MAX_NUMNODES];
+
 static unsigned long *avail[MAX_NUMNODES];
 static long total_avail_pages;
 
@@ -670,13 +676,30 @@ static void check_low_mem_virq(void)
     }
 }
 
+/* Pages that need a scrub are added to tail, otherwise to head. */
+static void page_list_add_scrub(struct page_info *pg, unsigned int node,
+                                unsigned int zone, unsigned int order,
+                                unsigned int first_dirty)
+{
+    PFN_ORDER(pg) = order;
+    pg->u.free.first_dirty = first_dirty;
+
+    if ( first_dirty != INVALID_DIRTY_IDX )
+    {
+        ASSERT(first_dirty < (1U << order));
+        page_list_add_tail(pg, &heap(node, zone, order));
+    }
+    else
+        page_list_add(pg, &heap(node, zone, order));
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
     unsigned int order, unsigned int memflags,
     struct domain *d)
 {
-    unsigned int i, j, zone = 0, nodemask_retry = 0;
+    unsigned int i, j, zone = 0, nodemask_retry = 0, first_dirty;
     nodeid_t first_node, node = MEMF_get_node(memflags), req_node = node;
     unsigned long request = 1UL << order;
     struct page_info *pg;
@@ -790,12 +813,26 @@ static struct page_info *alloc_heap_pages(
     return NULL;
 
  found: 
+
+    first_dirty = pg->u.free.first_dirty;
+
     /* We may have to halve the chunk a number of times. */
     while ( j != order )
     {
-        PFN_ORDER(pg) = --j;
-        page_list_add_tail(pg, &heap(node, zone, j));
-        pg += 1 << j;
+        j--;
+        page_list_add_scrub(pg, node, zone, j,
+                            (1U << j) > first_dirty ?
+                            first_dirty : INVALID_DIRTY_IDX);
+        pg += 1U << j;
+
+        if ( first_dirty != INVALID_DIRTY_IDX )
+        {
+            /* Adjust first_dirty */
+            if ( first_dirty >= 1U << j )
+                first_dirty -= 1U << j;
+            else
+                first_dirty = 0; /* We've moved past original first_dirty */
+        }
     }
 
     ASSERT(avail[node][zone] >= request);
@@ -842,12 +879,20 @@ static int reserve_offlined_page(struct page_info *head)
     unsigned int node = phys_to_nid(page_to_maddr(head));
     int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
     struct page_info *cur_head;
-    int cur_order;
+    unsigned int cur_order, first_dirty;
 
     ASSERT(spin_is_locked(&heap_lock));
 
     cur_head = head;
 
+    /*
+     * We may break the buddy so let's mark the head as clean. Then, when
+     * merging chunks back into the heap, we will see whether the chunk has
+     * unscrubbed pages and set its first_dirty properly.
+     */
+    first_dirty = head->u.free.first_dirty;
+    head->u.free.first_dirty = INVALID_DIRTY_IDX;
+
     page_list_del(head, &heap(node, zone, head_order));
 
     while ( cur_head < (head + (1 << head_order)) )
@@ -858,6 +903,8 @@ static int reserve_offlined_page(struct page_info *head)
         if ( page_state_is(cur_head, offlined) )
         {
             cur_head++;
+            if ( first_dirty != INVALID_DIRTY_IDX && first_dirty )
+                first_dirty--;
             continue;
         }
 
@@ -884,9 +931,20 @@ static int reserve_offlined_page(struct page_info *head)
             {
             merge:
                 /* We don't consider merging outside the head_order. */
-                page_list_add_tail(cur_head, &heap(node, zone, cur_order));
-                PFN_ORDER(cur_head) = cur_order;
+                page_list_add_scrub(cur_head, node, zone, cur_order,
+                                    (1U << cur_order) > first_dirty ?
+                                    first_dirty : INVALID_DIRTY_IDX);
                 cur_head += (1 << cur_order);
+
+                /* Adjust first_dirty if needed. */
+                if ( first_dirty != INVALID_DIRTY_IDX )
+                {
+                    if ( first_dirty >=  1U << cur_order )
+                        first_dirty -= 1U << cur_order;
+                    else
+                        first_dirty = 0;
+                }
+
                 break;
             }
         }
@@ -911,9 +969,53 @@ static int reserve_offlined_page(struct page_info *head)
     return count;
 }
 
+static void scrub_free_pages(unsigned int node)
+{
+    struct page_info *pg;
+    unsigned int zone;
+
+    ASSERT(spin_is_locked(&heap_lock));
+
+    if ( !node_need_scrub[node] )
+        return;
+
+    for ( zone = 0; zone < NR_ZONES; zone++ )
+    {
+        unsigned int order = MAX_ORDER;
+
+        do {
+            while ( !page_list_empty(&heap(node, zone, order)) )
+            {
+                unsigned int i;
+
+                /* Unscrubbed pages are always at the end of the list. */
+                pg = page_list_last(&heap(node, zone, order));
+                if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+                    break;
+
+                for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
+                {
+                    if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
+                    {
+                        scrub_one_page(&pg[i]);
+                        pg[i].count_info &= ~PGC_need_scrub;
+                        node_need_scrub[node]--;
+                    }
+                }
+
+                page_list_del(pg, &heap(node, zone, order));
+                page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX);
+
+                if ( node_need_scrub[node] == 0 )
+                    return;
+            }
+        } while ( order-- != 0 );
+    }
+}
+
 /* Free 2^@order set of pages. */
 static void free_heap_pages(
-    struct page_info *pg, unsigned int order)
+    struct page_info *pg, unsigned int order, bool need_scrub)
 {
     unsigned long mask, mfn = page_to_mfn(pg);
     unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
@@ -953,10 +1055,20 @@ static void free_heap_pages(
         /* This page is not a guest frame any more. */
         page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
         set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
+
+        if ( need_scrub )
+            pg[i].count_info |= PGC_need_scrub;
     }
 
     avail[node][zone] += 1 << order;
     total_avail_pages += 1 << order;
+    if ( need_scrub )
+    {
+        node_need_scrub[node] += 1 << order;
+        pg->u.free.first_dirty = 0;
+    }
+    else
+        pg->u.free.first_dirty = INVALID_DIRTY_IDX;
 
     if ( tmem_enabled() )
         midsize_alloc_zone_pages = max(
@@ -980,6 +1092,12 @@ static void free_heap_pages(
 
             page_list_del(predecessor, &heap(node, zone, order));
 
+            /* Keep predecessor's first_dirty if it is already set. */
+            if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX &&
+                 pg->u.free.first_dirty != INVALID_DIRTY_IDX )
+                predecessor->u.free.first_dirty = (1U << order) +
+                                                  pg->u.free.first_dirty;
+
             pg = predecessor;
         }
         else
@@ -999,12 +1117,14 @@ static void free_heap_pages(
         order++;
     }
 
-    PFN_ORDER(pg) = order;
-    page_list_add_tail(pg, &heap(node, zone, order));
+    page_list_add_scrub(pg, node, zone, order, pg->u.free.first_dirty);
 
     if ( tainted )
         reserve_offlined_page(pg);
 
+    if ( need_scrub )
+        scrub_free_pages(node);
+
     spin_unlock(&heap_lock);
 }
 
@@ -1225,7 +1345,7 @@ unsigned int online_page(unsigned long mfn, uint32_t 
*status)
     spin_unlock(&heap_lock);
 
     if ( (y & PGC_state) == PGC_state_offlined )
-        free_heap_pages(pg, 0);
+        free_heap_pages(pg, 0, false);
 
     return ret;
 }
@@ -1294,7 +1414,7 @@ static void init_heap_pages(
             nr_pages -= n;
         }
 
-        free_heap_pages(pg+i, 0);
+        free_heap_pages(pg + i, 0, false);
     }
 }
 
@@ -1621,7 +1741,7 @@ void free_xenheap_pages(void *v, unsigned int order)
 
     memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
 
-    free_heap_pages(virt_to_page(v), order);
+    free_heap_pages(virt_to_page(v), order, false);
 }
 
 #else
@@ -1675,12 +1795,9 @@ void free_xenheap_pages(void *v, unsigned int order)
     pg = virt_to_page(v);
 
     for ( i = 0; i < (1u << order); i++ )
-    {
-        scrub_one_page(&pg[i]);
         pg[i].count_info &= ~PGC_xen_heap;
-    }
 
-    free_heap_pages(pg, order);
+    free_heap_pages(pg, order, true);
 }
 
 #endif
@@ -1789,7 +1906,7 @@ struct page_info *alloc_domheap_pages(
     if ( d && !(memflags & MEMF_no_owner) &&
          assign_pages(d, pg, order, memflags) )
     {
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, false);
         return NULL;
     }
     
@@ -1857,11 +1974,7 @@ void free_domheap_pages(struct page_info *pg, unsigned 
int order)
             scrub = 1;
         }
 
-        if ( unlikely(scrub) )
-            for ( i = 0; i < (1 << order); i++ )
-                scrub_one_page(&pg[i]);
-
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, scrub);
     }
 
     if ( drop_dom_ref )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index ef84b72..3b3d38f 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -43,8 +43,16 @@ struct page_info
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+            /*
+             * Index of the first *possibly* unscrubbed page in the buddy.
+             * One more bit than maximum possible order to accommodate
+             * INVALID_DIRTY_IDX.
+             */
+#define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1)
+            unsigned long first_dirty:MAX_ORDER + 1;
+
             /* Do TLBs need flushing for safety before next page use? */
-            bool_t need_tlbflush;
+            bool need_tlbflush:1;
         } free;
 
     } u;
@@ -107,6 +115,13 @@ struct page_info
 #define PGC_count_width   PG_shift(9)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
+/*
+ * Page needs to be scrubbed. Since this bit can only be set on a page that is
+ * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit.
+ */
+#define _PGC_need_scrub   _PGC_allocated
+#define PGC_need_scrub    PGC_allocated
+
 extern mfn_t xenheap_mfn_start, xenheap_mfn_end;
 extern vaddr_t xenheap_virt_end;
 #ifdef CONFIG_ARM_64
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 2bf3f33..86b1723 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -87,6 +87,14 @@ struct page_info
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+            /*
+             * Index of the first *possibly* unscrubbed page in the buddy.
+             * One more bit than maximum possible order to accommodate
+             * INVALID_DIRTY_IDX.
+             */
+#define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1)
+            unsigned int first_dirty;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
@@ -233,6 +241,13 @@ struct page_info
 #define PGC_count_width   PG_shift(9)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
+/*
+ * Page needs to be scrubbed. Since this bit can only be set on a page that is
+ * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit.
+ */
+#define _PGC_need_scrub   _PGC_allocated
+#define PGC_need_scrub    PGC_allocated
+
 #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
 #define is_xen_heap_mfn(mfn) \
     (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.