[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 4/9] mm: Scrub memory from idle loop



Instead of scrubbing pages during guest destruction (from
free_heap_pages()) do this opportunistically, from the idle loop.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
Changes in v3:
* If memory-only nodes exist, select the closest one for scrubbing
* Don't scrub from idle loop until we reach SYS_STATE_active.

 xen/arch/arm/domain.c   |   13 ++++--
 xen/arch/x86/domain.c   |    3 +-
 xen/common/page_alloc.c |   98 +++++++++++++++++++++++++++++++++++++++++-----
 xen/include/xen/mm.h    |    1 +
 4 files changed, 98 insertions(+), 17 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 76310ed..38d6331 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -46,13 +46,16 @@ void idle_loop(void)
         if ( cpu_is_offline(smp_processor_id()) )
             stop_cpu();
 
-        local_irq_disable();
-        if ( cpu_is_haltable(smp_processor_id()) )
+        if ( !scrub_free_pages() )
         {
-            dsb(sy);
-            wfi();
+            local_irq_disable();
+            if ( cpu_is_haltable(smp_processor_id()) )
+            {
+                dsb(sy);
+                wfi();
+            }
+            local_irq_enable();
         }
-        local_irq_enable();
 
         do_tasklet();
         do_softirq();
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 90e2b1f..a5f62b5 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -118,7 +118,8 @@ static void idle_loop(void)
     {
         if ( cpu_is_offline(smp_processor_id()) )
             play_dead();
-        (*pm_idle)();
+        if ( !scrub_free_pages() )
+            (*pm_idle)();
         do_tasklet();
         do_softirq();
         /*
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 055654d..fcd7308 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1035,16 +1035,82 @@ merge_and_free_buddy(struct page_info *pg, unsigned int 
node,
     return pg;
 }
 
-static void scrub_free_pages(unsigned int node)
+static nodemask_t node_scrubbing;
+
+static unsigned int node_to_scrub(bool get_node)
+{
+    nodeid_t node = cpu_to_node(smp_processor_id()), local_node;
+    nodeid_t closest = NUMA_NO_NODE;
+    u8 dist, shortest = 0xff;
+
+    if ( node == NUMA_NO_NODE )
+        node = 0;
+
+    if ( node_need_scrub[node] &&
+         (!get_node || !node_test_and_set(node, node_scrubbing)) )
+        return node;
+
+    /*
+     * See if there are memory-only nodes that need scrubbing and choose
+     * the closest one.
+     */
+    local_node = node;
+    while ( 1 )
+    {
+        do {
+            node = cycle_node(node, node_online_map);
+        } while ( !cpumask_empty(&node_to_cpumask(node)) &&
+                  (node != local_node) );
+
+        if ( node == local_node )
+            break;
+
+        if ( node_need_scrub[node] )
+        {
+            if ( !get_node )
+                return node;
+
+            if ( !node_test_and_set(node, node_scrubbing) )
+            {
+                dist = __node_distance(local_node, node);
+                if ( (dist < shortest) || (dist == NUMA_NO_DISTANCE) )
+                {
+                    /* Release previous node. */
+                    if ( closest != NUMA_NO_NODE )
+                        node_clear(closest, node_scrubbing);
+                    shortest = dist;
+                    closest = node;
+                }
+                else
+                    node_clear(node, node_scrubbing);
+            }
+        }
+    }
+
+    return closest;
+}
+
+bool scrub_free_pages(void)
 {
     struct page_info *pg;
     unsigned int zone, order;
     unsigned long i;
+    unsigned int cpu = smp_processor_id();
+    bool preempt = false;
+    nodeid_t node;
 
-    ASSERT(spin_is_locked(&heap_lock));
+    /*
+     * Don't scrub while dom0 is being constructed since we may
+     * fail trying to call map_domain_page() from scrub_one_page().
+     */
+    if ( system_state < SYS_STATE_active )
+        return false;
+ 
+    node = node_to_scrub(true);
+    if ( node == NUMA_NO_NODE )
+        return false;
 
-    if ( !node_need_scrub[node] )
-        return;
+    spin_lock(&heap_lock);
 
     for ( zone = 0; zone < NR_ZONES; zone++ )
     {
@@ -1065,16 +1131,29 @@ static void scrub_free_pages(unsigned int node)
                         pg[i].count_info &= ~PGC_need_scrub;
                         node_need_scrub[node]--;
                     }
+                    if ( softirq_pending(cpu) )
+                    {
+                        preempt = true;
+                        break;
+                    }
                 }
 
-                page_list_del(pg, &heap(node, zone, order));
-                merge_and_free_buddy(pg, node, zone, order, false);
+                if ( i == (1UL << order) )
+                {
+                    page_list_del(pg, &heap(node, zone, order));
+                    merge_and_free_buddy(pg, node, zone, order, false);
+                }
 
-                if ( node_need_scrub[node] == 0 )
-                    return;
+                if ( preempt || (node_need_scrub[node] == 0) )
+                    goto out;
             }
         } while ( order-- != 0 );
     }
+
+ out:
+    spin_unlock(&heap_lock);
+    node_clear(node, node_scrubbing);
+    return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
 
 /* Free 2^@order set of pages. */
@@ -1141,9 +1220,6 @@ static void free_heap_pages(
     if ( tainted )
         reserve_offlined_page(pg);
 
-    if ( need_scrub )
-        scrub_free_pages(node);
-
     spin_unlock(&heap_lock);
 }
 
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 88de3c1..b66dbbe 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -138,6 +138,7 @@ void init_xenheap_pages(paddr_t ps, paddr_t pe);
 void xenheap_max_mfn(unsigned long mfn);
 void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
 void free_xenheap_pages(void *v, unsigned int order);
+bool scrub_free_pages(void);
 #define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
 #define free_xenheap_page(v) (free_xenheap_pages(v,0))
 /* Map machine page range in Xen virtual address space. */
-- 
1.7.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.