[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] [linux-2.6.39.x for xen] tmem: self-ballooning and frontswap-selfshrinking



Hi Konrad (and anyone else interested in reviewing) --

Here is the patch for self-ballooning and frontswap-selfshrinking on
top of 2.6.39, and on top of the frontswap commit-set you recently
merged into your tree.

Since this is the first time I've ported this code to a kernel
later than 2.6.34(?), and since this code hasn't gotten formal
review even though it's been floating about in various forms
for 18 months, I thought I would post it publicly for review,
rather than just ask for a pull.  (I'll put it in a git tree after
a round or two of feedback.)

This code complements the cleancache and frontswap patchsets to
optimize support for Xen Transcendent Memory.  The policy it
implements is rudimentary, so will almost certainly need to
improve over time.

(Sorry if this line-wraps... attached also...)

Thanks,
Dan

P.S. Anybody who actually uses this patch is encouraged to read
http://oss.oracle.com/projects/tmem/dist/documentation/internals/linuxpatch 
from which the following brief documentation was extracted,
which will probably be the git commit comment.

=

[PATCH] [linux-2.6.39.x for xen] tmem: self-ballooning and 
frontswap-selfshrinking

Selfballooning creates memory pressure by using the Xen balloon driver
to decrease and increase available kernel memory, tracking a target value
of "Committed_AS" (see /proc/meminfo).  Sysfs tunables are provided to
adjust the frequency at which memory size is adjusted, the rate at
which ballooning is used to approach the target, and to disable
selfballooning completely.  As of 100827, selfballooning runs in a
separate kernel thread, "selfballooning".

Frontswap shrinking accounts for the fact that pages swapped to disk
may sit on disk for a very long time, even if very rarely used or if
the kernel knows they will never be used again.  This is because
the kernel need not reclaim disk space because the disk space costs
very little and can be overwritten when necessary.  When the same
pages are in frontswap, however, they are taking up valuable RAM
real estate.  So when frontswap activity is otherwise stable and the
guest kernel is not under memory pressure, the frontswap shrinker
removes some pages from frontswap and returns them to kernel memory.
Sysfs tunables are provided to adjust the frequency of shrinking
opportunities and the shrinking rate, and to create more "inertia".

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>

 drivers/xen/Kconfig       |   10 +++
 drivers/xen/balloon.c     |  109 ++++++++++++++++++++++++++
 drivers/xen/tmem.c        |    1 +
 drivers/xen/xen-balloon.c |  191 ++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/balloon.h     |   11 +++
 mm/mmap.c                 |    7 ++
 6 files changed, 328 insertions(+), 1 deletions(-)

===

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index d589fe7..8f21fad 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -9,6 +9,16 @@ config XEN_BALLOON
          the system to expand the domain's memory allocation, or alternatively
          return unneeded memory to the system.
 
+config XEN_SELFBALLOONING
+       bool "dynamically self-balloon kernel memory to target"
+       depends on XEN && XEN_BALLOON && CLEANCACHE
+       default y
+       help
+         Self-ballooning dynamically balloons available kernel memory driven
+         by the current usage of anonymous memory ("committed AS") and
+         controlled by various sysfs-settable parameters.  May be overridden
+         by the noselfballooning kernel boot parameter
+
 config XEN_SCRUB_PAGES
        bool "Scrub pages before returning them to system"
        depends on XEN_BALLOON
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f54290b..a35b056 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -56,6 +56,7 @@
 #include <xen/balloon.h>
 #include <xen/features.h>
 #include <xen/page.h>
+#include <linux/frontswap.h>
 
 /*
  * balloon_process() state:
@@ -74,6 +75,14 @@ enum bp_state {
 
 static DEFINE_MUTEX(balloon_mutex);
 
+#ifdef CONFIG_XEN_SELFBALLOONING
+extern int tmem_enabled;
+static int use_selfballooning __read_mostly = 1;
+#ifdef CONFIG_FRONTSWAP
+static int use_frontswap_selfshrink __read_mostly = 1;
+#endif
+#endif
+
 struct balloon_stats balloon_stats;
 EXPORT_SYMBOL_GPL(balloon_stats);
 
@@ -94,6 +103,10 @@ static LIST_HEAD(ballooned_pages);
 /* Main work function, always executed in process context. */
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
+#ifdef CONFIG_XEN_SELFBALLOONING
+static void selfballoon_process(struct work_struct *work);
+DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
+#endif
 
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
@@ -428,6 +441,86 @@ void free_xenballooned_pages(int nr_pages, struct page** 
pages)
 }
 EXPORT_SYMBOL(free_xenballooned_pages);
 
+#ifdef CONFIG_XEN_SELFBALLOONING
+#ifdef CONFIG_FRONTSWAP
+#define frontswap_selfshrinking_enabled balloon_stats.frontswap_selfshrinking
+
+unsigned long frontswap_inertia_counter = 0;
+
+static void frontswap_selfshrink(void)
+{
+       static unsigned long cur_frontswap_pages = 0;
+       static unsigned long last_frontswap_pages = 0;
+       static unsigned long tgt_frontswap_pages = 0;
+
+       if (!balloon_stats.frontswap_selfshrinking)
+               return;
+       if (!balloon_stats.frontswap_hysteresis)
+               return;
+       last_frontswap_pages = cur_frontswap_pages;
+       cur_frontswap_pages = frontswap_curr_pages();
+       if (!cur_frontswap_pages || (cur_frontswap_pages > 
last_frontswap_pages)) {
+               frontswap_inertia_counter = balloon_stats.frontswap_inertia;
+               return;
+       }
+       if (frontswap_inertia_counter && --frontswap_inertia_counter)
+               return;
+       //frontswap_inertia_counter = balloon_stats.frontswap_inertia;
+       if ( cur_frontswap_pages <= balloon_stats.frontswap_hysteresis)
+               tgt_frontswap_pages = 0;
+       else tgt_frontswap_pages = cur_frontswap_pages -
+               (cur_frontswap_pages / balloon_stats.frontswap_hysteresis);
+       frontswap_shrink(tgt_frontswap_pages);
+}
+
+static int __init no_frontswap_selfshrink_setup(char *s)
+{
+       use_frontswap_selfshrink = 0;
+       return 1;
+}
+
+__setup("noselfshrink", no_frontswap_selfshrink_setup);
+#else
+#define frontswap_selfshrink() do {} while (0)
+#define frontswap_selfshrinking_enabled 0
+#endif
+
+static void selfballoon_process(struct work_struct *work)
+{
+       extern unsigned long vm_get_committed_as(void);
+       unsigned long cur_pages, goal_pages, tgt_pages;
+       int reset_timer = 0;
+
+       if (balloon_stats.selfballooning_enabled) {
+               tgt_pages = cur_pages = totalram_pages;
+               goal_pages = vm_get_committed_as();
+               if (cur_pages > goal_pages)
+                       tgt_pages = cur_pages -
+                               (cur_pages - goal_pages) / 
balloon_stats.selfballoon_downhysteresis;
+               else if (cur_pages < goal_pages)
+                       tgt_pages = cur_pages +
+                               (goal_pages - cur_pages) / 
balloon_stats.selfballoon_uphysteresis;
+               balloon_set_new_target(tgt_pages);
+               reset_timer = 1;
+       }
+       if (frontswap_selfshrinking_enabled) {
+               frontswap_selfshrink();
+               reset_timer = 1;
+       }
+       if (reset_timer)
+               schedule_delayed_work(&selfballoon_worker,
+                       balloon_stats.selfballoon_interval * HZ);
+}
+
+static int __init noselfballooning_setup(char *s)
+{
+       use_selfballooning = 0;
+       return 1;
+}
+
+__setup("noselfballooning", noselfballooning_setup);
+#endif
+
 static int __init balloon_init(void)
 {
        unsigned long pfn, extra_pfn_end;
@@ -468,6 +561,22 @@ static int __init balloon_init(void)
                __balloon_append(page);
        }
 
+#ifdef CONFIG_XEN_SELFBALLOONING
+       balloon_stats.selfballooning_enabled = tmem_enabled &&
+                                               use_selfballooning;
+       balloon_stats.selfballoon_interval = 5;
+       balloon_stats.selfballoon_downhysteresis = 8;
+       balloon_stats.selfballoon_uphysteresis = 1;
+#ifdef CONFIG_FRONTSWAP
+       balloon_stats.frontswap_selfshrinking =
+               use_frontswap_selfshrink && frontswap_enabled;
+       balloon_stats.frontswap_hysteresis = 20;
+       balloon_stats.frontswap_inertia = 3;
+#endif
+       schedule_delayed_work(&selfballoon_worker,
+               balloon_stats.selfballoon_interval * HZ);
+#endif
+
        return 0;
 }
 
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index ad6a8f4..de740d2 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -124,6 +124,7 @@ static int xen_tmem_flush_object(u32 pool_id, struct 
tmem_oid oid)
 }
 
 int tmem_enabled;
+EXPORT_SYMBOL(tmem_enabled);
 
 static int __init enable_tmem(char *s)
 {
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index a4ff225..5e30fc9 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -190,6 +190,184 @@ static ssize_t store_target(struct sys_device *dev,
 static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
                   show_target, store_target);
 
+#ifdef CONFIG_XEN_SELFBALLOONING
+extern struct delayed_work selfballoon_worker;
+
+static ssize_t show_selfballooning(struct sys_device *dev, struct 
sysdev_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", balloon_stats.selfballooning_enabled);
+}
+
+static ssize_t store_selfballooning(struct sys_device *dev,
+                           struct sysdev_attribute *attr,
+                           const char *buf,
+                           size_t count)
+{
+       char *endchar;
+       int was_enabled = balloon_stats.selfballooning_enabled;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       balloon_stats.selfballooning_enabled = !!memparse(buf, &endchar);
+
+       if (!was_enabled && balloon_stats.selfballooning_enabled)
+               schedule_delayed_work(&selfballoon_worker,
+                       balloon_stats.selfballoon_interval * HZ);
+
+       return count;
+}
+
+static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR,
+                  show_selfballooning, store_selfballooning);
+
+static ssize_t show_selfballoon_interval(struct sys_device *dev, struct 
sysdev_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", balloon_stats.selfballoon_interval);
+}
+
+static ssize_t store_selfballoon_interval(struct sys_device *dev,
+                           struct sysdev_attribute *attr,
+                           const char *buf,
+                           size_t count)
+{
+       char *endchar;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       balloon_stats.selfballoon_interval = memparse(buf, &endchar);
+       return count;
+}
+
+static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
+                  show_selfballoon_interval, store_selfballoon_interval);
+
+static ssize_t show_selfballoon_downhys(struct sys_device *dev, struct 
sysdev_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", balloon_stats.selfballoon_downhysteresis);
+}
+
+static ssize_t store_selfballoon_downhys(struct sys_device *dev,
+                           struct sysdev_attribute *attr,
+                           const char *buf,
+                           size_t count)
+{
+       char *endchar;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       balloon_stats.selfballoon_downhysteresis = memparse(buf, &endchar);
+       return count;
+}
+
+static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
+                  show_selfballoon_downhys, store_selfballoon_downhys);
+
+
+static ssize_t show_selfballoon_uphys(struct sys_device *dev, struct 
sysdev_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", balloon_stats.selfballoon_uphysteresis);
+}
+
+static ssize_t store_selfballoon_uphys(struct sys_device *dev,
+                           struct sysdev_attribute *attr,
+                           const char *buf,
+                           size_t count)
+{
+       char *endchar;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       balloon_stats.selfballoon_uphysteresis = memparse(buf, &endchar);
+       return count;
+}
+
+static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
+                  show_selfballoon_uphys, store_selfballoon_uphys);
+
+#ifdef CONFIG_FRONTSWAP
+static ssize_t show_frontswap_selfshrinking(struct sys_device *dev, struct 
sysdev_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", balloon_stats.frontswap_selfshrinking);
+}
+
+static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
+                           struct sysdev_attribute *attr,
+                           const char *buf,
+                           size_t count)
+{
+       char *endchar;
+       int was_enabled = balloon_stats.frontswap_selfshrinking;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       balloon_stats.frontswap_selfshrinking = !!memparse(buf, &endchar);
+
+       if (!was_enabled && !balloon_stats.selfballooning_enabled &&
+                                       balloon_stats.frontswap_selfshrinking)
+               schedule_delayed_work(&selfballoon_worker,
+                       balloon_stats.selfballoon_interval * HZ);
+
+       return count;
+}
+
+static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
+                  show_frontswap_selfshrinking, store_frontswap_selfshrinking);
+
+static ssize_t show_frontswap_inertia(struct sys_device *dev, struct 
sysdev_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", balloon_stats.frontswap_inertia);
+}
+
+static ssize_t store_frontswap_inertia(struct sys_device *dev,
+                           struct sysdev_attribute *attr,
+                           const char *buf,
+                           size_t count)
+{
+       char *endchar;
+       extern unsigned long frontswap_inertia_counter;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       balloon_stats.frontswap_inertia = memparse(buf, &endchar);
+       frontswap_inertia_counter = balloon_stats.frontswap_inertia;
+       return count;
+}
+
+static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
+                  show_frontswap_inertia, store_frontswap_inertia);
+
+static ssize_t show_frontswap_hysteresis(struct sys_device *dev, struct 
sysdev_attribute *attr,
+                             char *buf)
+{
+       return sprintf(buf, "%d\n", balloon_stats.frontswap_hysteresis);
+}
+
+static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
+                           struct sysdev_attribute *attr,
+                           const char *buf,
+                           size_t count)
+{
+       char *endchar;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       balloon_stats.frontswap_hysteresis = memparse(buf, &endchar);
+       return count;
+}
+
+static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
+                  show_frontswap_hysteresis, store_frontswap_hysteresis);
+
+#endif /* CONFIG_FRONTSWAP */
+#endif /* CONFIG_XEN_SELFBALLOONING */
 
 static struct sysdev_attribute *balloon_attrs[] = {
        &attr_target_kb,
@@ -197,7 +375,18 @@ static struct sysdev_attribute *balloon_attrs[] = {
        &attr_schedule_delay.attr,
        &attr_max_schedule_delay.attr,
        &attr_retry_count.attr,
-       &attr_max_retry_count.attr
+       &attr_max_retry_count.attr,
+#ifdef CONFIG_XEN_SELFBALLOONING
+       &attr_selfballooning,
+       &attr_selfballoon_interval,
+       &attr_selfballoon_downhysteresis,
+       &attr_selfballoon_uphysteresis,
+#ifdef CONFIG_FRONTSWAP
+       &attr_frontswap_selfshrinking,
+       &attr_frontswap_hysteresis,
+       &attr_frontswap_inertia,
+#endif
+#endif
 };
 
 static struct attribute *balloon_info_attrs[] = {
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index a2b22f0..aa36b82 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -15,6 +15,17 @@ struct balloon_stats {
        unsigned long max_schedule_delay;
        unsigned long retry_count;
        unsigned long max_retry_count;
+#ifdef CONFIG_XEN_SELFBALLOONING
+       int selfballooning_enabled;
+       unsigned int selfballoon_interval;
+       unsigned int selfballoon_downhysteresis;
+       unsigned int selfballoon_uphysteresis;
+#ifdef CONFIG_FRONTSWAP
+       unsigned int frontswap_selfshrinking;
+       unsigned int frontswap_hysteresis;
+       unsigned int frontswap_inertia;
+#endif
+#endif
 };
 
 extern struct balloon_stats balloon_stats;
diff --git a/mm/mmap.c b/mm/mmap.c
index 772140c..030a47b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -89,6 +89,13 @@ int sysctl_overcommit_ratio = 50;    /* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 struct percpu_counter vm_committed_as;
 
+unsigned long vm_get_committed_as(void)
+{
+       return percpu_counter_read_positive(&vm_committed_as);
+
+}
+EXPORT_SYMBOL(vm_get_committed_as);
+
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to

Attachment: frontswap-balloon-linux-2.6.39.x-110606.patch
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.