[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [BALLOON] Make the domain0 ballooning logic in xend aware of pages that are on the page scrub list.



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 234939c0ec3a14cac448ec65e4dbaa173ccae16b
# Parent  7154e0416313e5dcecd7c2e78ca18e51569202eb
[BALLOON] Make the domain0 ballooning logic in xend aware of pages that are on 
the page scrub list.
This fixes the case where crashing/restarting a domain can cause dom0 to 
balloon more than
necessary.

I changed the physinfo dictionary in Python to be in KiB, rather than
MiB, to avoid accumulating ugly rounding errors.  I tried to avoid
changing units anywhere else.

Signed-off-by: Charles Coffing <ccoffing@xxxxxxxxxx>
---
 tools/python/xen/lowlevel/xc/xc.c |   21 ++++++-----
 tools/python/xen/xend/XendNode.py |    3 +
 tools/python/xen/xend/balloon.py  |   69 ++++++++++++++++++++------------------
 xen/arch/x86/dom0_ops.c           |    1 
 xen/common/page_alloc.c           |   12 +++++-
 xen/include/public/dom0_ops.h     |    1 
 xen/include/xen/mm.h              |    1 
 7 files changed, 64 insertions(+), 44 deletions(-)

diff -r 7154e0416313 -r 234939c0ec3a tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue Jun 27 11:50:57 2006 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue Jun 27 12:03:46 2006 +0100
@@ -582,6 +582,12 @@ static PyObject *pyxc_readconsolering(Xc
 }
 
 
+static unsigned long pages_to_kib(unsigned long pages)
+{
+    return pages * (XC_PAGE_SIZE / 1024);
+}
+
+
 static PyObject *pyxc_pages_to_kib(XcObject *self, PyObject *args)
 {
     unsigned long pages;
@@ -589,13 +595,7 @@ static PyObject *pyxc_pages_to_kib(XcObj
     if (!PyArg_ParseTuple(args, "l", &pages))
         return NULL;
 
-    return PyLong_FromUnsignedLong(pages * (XC_PAGE_SIZE / 1024));
-}
-
-
-static unsigned long pages_to_mb(unsigned long pages)
-{
-    return (pages * (XC_PAGE_SIZE / 1024) + 1023) / 1024;
+    return PyLong_FromUnsignedLong(pages_to_kib(pages));
 }
 
 
@@ -618,13 +618,14 @@ static PyObject *pyxc_physinfo(XcObject 
     if(q>cpu_cap)
         *(q-1)=0;
 
-    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i,s:s}",
+    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
                          "threads_per_core", info.threads_per_core,
                          "cores_per_socket", info.cores_per_socket,
                          "sockets_per_node", info.sockets_per_node,
                          "nr_nodes",         info.nr_nodes,
-                         "total_memory",     pages_to_mb(info.total_pages),
-                         "free_memory",      pages_to_mb(info.free_pages),
+                         "total_memory",     pages_to_kib(info.total_pages),
+                         "free_memory",      pages_to_kib(info.free_pages),
+                         "scrub_memory",     pages_to_kib(info.scrub_pages),
                          "cpu_khz",          info.cpu_khz,
                          "hw_caps",          cpu_cap);
 }
diff -r 7154e0416313 -r 234939c0ec3a tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Tue Jun 27 11:50:57 2006 +0100
+++ b/tools/python/xen/xend/XendNode.py Tue Jun 27 12:03:46 2006 +0100
@@ -64,6 +64,9 @@ class XendNode:
                            info['cores_per_socket'] *
                            info['threads_per_core'])
         info['cpu_mhz'] = info['cpu_khz'] / 1000
+        # physinfo is in KiB
+        info['total_memory'] = info['total_memory'] / 1024
+        info['free_memory']  = info['free_memory'] / 1024
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
diff -r 7154e0416313 -r 234939c0ec3a tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Tue Jun 27 11:50:57 2006 +0100
+++ b/tools/python/xen/xend/balloon.py  Tue Jun 27 12:03:46 2006 +0100
@@ -29,8 +29,6 @@ from XendError import VmError
 
 PROC_XEN_BALLOON = '/proc/xen/balloon'
 
-BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
-                      # rounded.
 RETRY_LIMIT = 20
 RETRY_LIMIT_INCR = 5
 ##
@@ -68,22 +66,22 @@ def _get_proc_balloon(label):
         f.close()
 
 def get_dom0_current_alloc():
-    """Returns the current memory allocation (in MiB) of dom0."""
+    """Returns the current memory allocation (in KiB) of dom0."""
 
     kb = _get_proc_balloon(labels['current'])
     if kb == None:
         raise VmError('Failed to query current memory allocation of dom0.')
-    return kb / 1024
+    return kb
 
 def get_dom0_target_alloc():
-    """Returns the target memory allocation (in MiB) of dom0."""
+    """Returns the target memory allocation (in KiB) of dom0."""
 
     kb = _get_proc_balloon(labels['target'])
     if kb == None:
         raise VmError('Failed to query target memory allocation of dom0.')
-    return kb / 1024
+    return kb
 
-def free(required):
+def free(need_mem):
     """Balloon out memory from the privileged domain so that there is the
     specified required amount (in KiB) free.
     """
@@ -92,9 +90,10 @@ def free(required):
     # to balloon out to free some up.  Memory freed by a destroyed domain may
     # not appear in the free_memory field immediately, because it needs to be
     # scrubbed before it can be released to the free list, which is done
-    # asynchronously by Xen; ballooning is asynchronous also.  No matter where
-    # we expect the free memory to come from, therefore, we need to wait for
-    # it to become available.
+    # asynchronously by Xen; ballooning is asynchronous also.  Such memory
+    # does, however, need to be accounted for when calculating how much dom0
+    # needs to balloon.  No matter where we expect the free memory to come
+    # from, we need to wait for it to become available.
     #
     # We are not allowed to balloon below dom0_min_mem, or if dom0_min_mem
     # is 0, we cannot balloon at all.  Memory can still become available
@@ -108,43 +107,49 @@ def free(required):
     # usage, so we recheck the required alloc each time around the loop, but
     # track the last used value so that we don't trigger too many watches.
 
-    need_mem = (required + 1023) / 1024 + BALLOON_OUT_SLACK
-
     xroot = XendRoot.instance()
     xc = xen.lowlevel.xc.xc()
 
     try:
-        dom0_min_mem = xroot.get_dom0_min_mem()
+        dom0_min_mem = xroot.get_dom0_min_mem() * 1024
 
         retries = 0
         sleep_time = SLEEP_TIME_GROWTH
         last_new_alloc = None
         rlimit = RETRY_LIMIT
         while retries < rlimit:
-            free_mem = xc.physinfo()['free_memory']
+            physinfo = xc.physinfo()
+            free_mem = physinfo['free_memory']
+            scrub_mem = physinfo['scrub_memory']
 
             if free_mem >= need_mem:
-                log.debug("Balloon: free %d; need %d; done.", free_mem,
-                          need_mem)
+                log.debug("Balloon: %d KiB free; need %d; done.",
+                          free_mem, need_mem)
                 return
 
             if retries == 0:
-                rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
-                log.debug("Balloon: free %d; need %d; retries: %d.", 
-                          free_mem, need_mem, rlimit)
+                rlimit += ((need_mem - free_mem)/1024/1024) * RETRY_LIMIT_INCR
+                log.debug("Balloon: %d KiB free; %d to scrub; need %d; 
retries: %d.",
+                          free_mem, scrub_mem, need_mem, rlimit)
 
             if dom0_min_mem > 0:
                 dom0_alloc = get_dom0_current_alloc()
-                new_alloc = dom0_alloc - (need_mem - free_mem)
+                new_alloc = dom0_alloc - (need_mem - free_mem - scrub_mem)
 
-                if (new_alloc >= dom0_min_mem and
-                    new_alloc != last_new_alloc):
-                    log.debug("Balloon: setting dom0 target to %d.",
-                              new_alloc)
-                    dom0 = XendDomain.instance().privilegedDomain()
-                    dom0.setMemoryTarget(new_alloc)
-                    last_new_alloc = new_alloc
-                    # Continue to retry, waiting for ballooning.
+                if free_mem + scrub_mem >= need_mem:
+                    if last_new_alloc == None:
+                        log.debug("Balloon: waiting on scrubbing")
+                        last_new_alloc = dom0_alloc
+                else:
+                    if (new_alloc >= dom0_min_mem and
+                        new_alloc != last_new_alloc):
+                        new_alloc_mb = new_alloc / 1024  # Round down
+                        log.debug("Balloon: setting dom0 target to %d MiB.",
+                                  new_alloc_mb)
+                        dom0 = XendDomain.instance().privilegedDomain()
+                        dom0.setMemoryTarget(new_alloc_mb)
+                        last_new_alloc = new_alloc
+                # Continue to retry, waiting for ballooning or scrubbing.
 
             time.sleep(sleep_time)
             if retries < 2 * RETRY_LIMIT:
@@ -154,15 +159,15 @@ def free(required):
         # Not enough memory; diagnose the problem.
         if dom0_min_mem == 0:
             raise VmError(('Not enough free memory and dom0_min_mem is 0, so '
-                           'I cannot release any more.  I need %d MiB but '
+                           'I cannot release any more.  I need %d KiB but '
                            'only have %d.') %
                           (need_mem, free_mem))
         elif new_alloc < dom0_min_mem:
             raise VmError(
-                ('I need %d MiB, but dom0_min_mem is %d and shrinking to '
-                 '%d MiB would leave only %d MiB free.') %
+                ('I need %d KiB, but dom0_min_mem is %d and shrinking to '
+                 '%d KiB would leave only %d KiB free.') %
                 (need_mem, dom0_min_mem, dom0_min_mem,
-                 free_mem + dom0_alloc - dom0_min_mem))
+                 free_mem + scrub_mem + dom0_alloc - dom0_min_mem))
         else:
             raise VmError('The privileged domain did not balloon!')
 
diff -r 7154e0416313 -r 234939c0ec3a xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/arch/x86/dom0_ops.c   Tue Jun 27 12:03:46 2006 +0100
@@ -194,6 +194,7 @@ long arch_do_dom0_op(struct dom0_op *op,
         pi->nr_nodes         = 1;
         pi->total_pages      = total_pages;
         pi->free_pages       = avail_domheap_pages();
+        pi->scrub_pages      = avail_scrub_pages();
         pi->cpu_khz          = cpu_khz;
         memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
         memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
diff -r 7154e0416313 -r 234939c0ec3a xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/common/page_alloc.c   Tue Jun 27 12:03:46 2006 +0100
@@ -61,6 +61,7 @@ custom_param("lowmem_emergency_pool", pa
 
 static DEFINE_SPINLOCK(page_scrub_lock);
 LIST_HEAD(page_scrub_list);
+static unsigned long scrub_pages;
 
 /*********************
  * ALLOCATION BITMAP
@@ -696,6 +697,7 @@ void free_domheap_pages(struct page_info
             {
                 spin_lock(&page_scrub_lock);
                 list_add(&pg[i].list, &page_scrub_list);
+                scrub_pages++;
                 spin_unlock(&page_scrub_lock);
             }
         }
@@ -784,9 +786,10 @@ static void page_scrub_softirq(void)
         /* Remove peeled pages from the list. */
         ent->next->prev = &page_scrub_list;
         page_scrub_list.next = ent->next;
-        
+        scrub_pages -= (i+1);
+
         spin_unlock(&page_scrub_lock);
-        
+
         /* Working backwards, scrub each page in turn. */
         while ( ent != &page_scrub_list )
         {
@@ -798,6 +801,11 @@ static void page_scrub_softirq(void)
             free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
         }
     } while ( (NOW() - start) < MILLISECS(1) );
+}
+
+unsigned long avail_scrub_pages(void)
+{
+    return scrub_pages;
 }
 
 static __init int page_scrub_init(void)
diff -r 7154e0416313 -r 234939c0ec3a xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/include/public/dom0_ops.h     Tue Jun 27 12:03:46 2006 +0100
@@ -231,6 +231,7 @@ struct dom0_physinfo {
     uint32_t cpu_khz;
     uint64_t total_pages;
     uint64_t free_pages;
+    uint64_t scrub_pages;
     uint32_t hw_cap[8];
 };
 typedef struct dom0_physinfo dom0_physinfo_t;
diff -r 7154e0416313 -r 234939c0ec3a xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Tue Jun 27 11:50:57 2006 +0100
+++ b/xen/include/xen/mm.h      Tue Jun 27 12:03:46 2006 +0100
@@ -91,6 +91,7 @@ extern struct list_head page_scrub_list;
         if ( !list_empty(&page_scrub_list) )    \
             raise_softirq(PAGE_SCRUB_SOFTIRQ);  \
     } while ( 0 )
+unsigned long avail_scrub_pages(void);
 
 #include <asm/mm.h>
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.