[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/4] xen: use idle vcpus to scrub pages



In case of heavy lock contention, use a percpu list.
 - Delist a batch of pages to a percpu list from "scrub" free page list.
 - Scrub pages on this percpu list.
 - Add those clean pages to normal "head" free page list, merge with other
   chunks if needed.

Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
---
 xen/arch/x86/domain.c   |    1 +
 xen/common/domain.c     |    2 ++
 xen/common/page_alloc.c |   82 +++++++++++++++++++++++++++++++++++++++++++++++
 xen/include/xen/mm.h    |    2 ++
 4 files changed, 87 insertions(+)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 6fddd4c..a46a2ba 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -116,6 +116,7 @@ static void idle_loop(void)
     {
         if ( cpu_is_offline(smp_processor_id()) )
             play_dead();
+        scrub_free_pages();
         (*pm_idle)();
         do_tasklet();
         do_softirq();
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 4291e29..bd386e6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -587,12 +587,14 @@ int domain_kill(struct domain *d)
         d->tmem_client = NULL;
         /* fallthrough */
     case DOMDYING_dying:
+        enable_idle_scrub = 0;
         rc = domain_relinquish_resources(d);
         if ( rc != 0 )
         {
             BUG_ON(rc != -EAGAIN);
             break;
         }
+        enable_idle_scrub = 1;
         for_each_vcpu ( d, v )
             unmap_vcpu_info(v);
         d->is_dying = DOMDYING_dead;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 5698596..2b2fd04 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -79,6 +79,9 @@ PAGE_LIST_HEAD(page_offlined_list);
 /* Broken page list, protected by heap_lock. */
 PAGE_LIST_HEAD(page_broken_list);
 
+volatile bool_t enable_idle_scrub;
+DEFINE_PER_CPU(struct page_list_head, scrub_list_cpu);
+
 /*************************
  * BOOT-TIME ALLOCATOR
  */
@@ -1387,7 +1390,86 @@ void __init scrub_heap_pages(void)
     setup_low_mem_virq();
 }
 
+#define SCRUB_BATCH 1024
+void scrub_free_pages(void)
+{
+    struct page_info *pg;
+    unsigned int i, j, node_empty = 0, nr_delisted = 0;
+    int order;
+    unsigned int cpu = smp_processor_id();
+    unsigned int node = cpu_to_node(cpu);
+    struct page_list_head *temp_list = &this_cpu(scrub_list_cpu);
+
+    if ( !enable_idle_scrub )
+        return;
+
+    do
+    {
+        if ( page_list_empty(temp_list) )
+        {
+            /* Delist a batch of pages from global scrub list */
+            spin_lock(&heap_lock);
+            for ( j = 0; j < NR_ZONES; j++ )
+            {
+                for ( order = MAX_ORDER; order >= 0; order-- )
+                {
+                    if ( (pg = page_list_remove_head(&scrub(node, j, order))) )
+                    {
+                        for ( i = 0; i < (1 << order); i++)
+                            mark_page_offline(&pg[i], 0);
+
+                        page_list_add_tail(pg, temp_list);
+                        nr_delisted += (1 << order);
+                        if ( nr_delisted > SCRUB_BATCH )
+                        {
+                            nr_delisted = 0;
+                            spin_unlock(&heap_lock);
+                            goto start_scrub;
+                        }
+                    }
+                }
+            }
+
+            node_empty = 1;
+            spin_unlock(&heap_lock);
+        }
+        else
+        {
+start_scrub:
+            /* Scrub percpu list */
+            while ( !page_list_empty(temp_list) )
+            {
+                pg = page_list_remove_head(temp_list);
+                ASSERT(pg);
+                order = PFN_ORDER(pg);
+                for ( i = 0; i < (1 << order); i++ )
+                {
+                    ASSERT( test_bit(_PGC_need_scrub, &(pg[i].count_info)) );
+                    scrub_one_page(&pg[i]);
+                    pg[i].count_info &= ~(PGC_need_scrub);
+                }
 
+                /* Add pages to free heap list */
+                spin_lock(&heap_lock);
+                for ( i = 0; i < (1 << order); i++ )
+                {
+                    ASSERT ( !test_bit(_PGC_need_scrub, &(pg[i].count_info)) );
+                    pg[i].count_info |= PGC_state_free;
+                }
+                ASSERT (node == phys_to_nid(page_to_maddr(pg)));
+                merge_free_trunks(pg, order, 0);
+                spin_unlock(&heap_lock);
+
+                if ( softirq_pending(cpu) )
+                    return;
+            }
+        }
+
+        /* Scrub list of this node is empty */
+        if ( node_empty )
+            return;
+    } while ( !softirq_pending(cpu) );
+}
 
 /*************************
  * XEN-HEAP SUB-ALLOCATOR
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index b183189..c3f481d 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -34,6 +34,7 @@
 
 struct domain;
 struct page_info;
+extern volatile bool_t enable_idle_scrub;
 
 /* Boot-time allocator. Turns into generic allocator after bootstrap. */
 void init_boot_pages(paddr_t ps, paddr_t pe);
@@ -78,6 +79,7 @@ int query_page_offline(unsigned long mfn, uint32_t *status);
 unsigned long total_free_pages(void);
 
 void scrub_heap_pages(void);
+void scrub_free_pages(void);
 
 int assign_pages(
     struct domain *d,
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.