[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] [RFC v2] drivers: xen NUMA-aware balloon driver



        Hi all,
        This small patch introduce NUMA awaness to xen balloon driver.
        It could be apply to 
        git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
        as far as I send this email.

        And it's version 2, since the first version is too urgly.

        Full docs could be found under
        xensource/docs/misc/numa-aware-ballooning.markdown
        which belongs to another patch, which contains the patches to
        libxl, and, which is sent together with this one
        in xen-devel. This patch is only for Linux. 

        Please forgive me for the stupid version 1. I have tried to make
        this one a readable patch, so that it could be possible for you
        to review my code, and give me more suggestions :-)

        Also, guest virtual NUMA topology is required for this work.
        it's not something that we have now, but I know that it's been
        working on. I declare some interfaces in this code (which we
        have some kind of a deal on it). Anyway, this code is almost
        working, so I publish it here as an RFC to get some early
        feedback.

        about the code architechure:
        Modification is mainly on linux-kernel/drivers/xen/balloon.c .
        There are several interface functions:
                unsigned long long xen_mnid_to_vnidmask(int mnid);
                int xen_vnid_to_mnid(int vnid);
                int balloon_page_to_vnid(struct page *page);
                struct page* xen_alloc_pages_node(int vnid);
        Now they are marked "todo" for debuging and interface waiting.

        The original increase/decrease reservation function:
                increase_reservation(unsigned long nr_pages),
                decrease_reservation(unsigned long nr_pages, gfp_t gfp)
        now come to :
                __increase_reservation(int vnid, unsigned long nr_pages),
                __decrease_reservation(int vnid, unsigned long nr_pages, gfp_t 
gfp),
        These two functions used to be designed as a batcher. Since we have
        a best-effort request, add another layer on top of them:
                static struct bp_rt
                        increase_reservatin_nodeonly(...)
                        decrease_reservatin_nodeonly(...)
        They will use a while loop to call __increase_reservation_node(..)/
                                __decrease_reservation_node(..) until it
        couldn't get more pages from this v-node.

        Also, we have to know how many pages are settled in
        __increase_reservation_node() and __decrease_reservation_node(),
        a new return struct type is required.

        The struct bp_rt includes the new return message of balloon, so that
        when comes to uppest level:
                increase_reservation_numa(vnidmask, nodeexact, nr_pages)
                decrease_reservation_numa(vnidmask, nodeexact, nr_pages, gfp)
        balloon can decide whether it should go on to the next node or not.
        These two function loops the node according to vnidmask. If pages on
        the first v-node does not meet the requirement, go on to the second,
        etc..
        /* XXX:there is still some code dumplicate here. It could be
           optimized in a later version */

        In the old balloon, when current does not meet target, the balloon
        process runs an infinited loop, reschedule the task until requirement
        meets. But now we may have a danger that we might not get enough pages
        FOREVER if node specified and nodeexact=true. In this case,
                Define NUMA_BALLOON_RETRY_MAX: the maximun balloon_process()
                                   reschedule time when nodeexact=true.
        Balloon will exit if nodeexact=true and the retry counter exceed this
        NUMA_BALLOON_RETRY_MAX limitation.

Signed-off-by: Yechen Li <lccycc123@xxxxxxxxx>
---
 drivers/xen/balloon.c     | 355 ++++++++++++++++++++++++++++++++++++++++------
 drivers/xen/xen-balloon.c |  20 ++-
 include/xen/balloon.h     |  19 +++
 3 files changed, 351 insertions(+), 43 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97..92f5cd9 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -53,6 +53,8 @@
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
 
+#include <linux/numa.h>
+
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
@@ -81,10 +83,26 @@ enum bp_state {
        BP_EAGAIN,
        BP_ECANCELED
 };
+/*
+ * balloon increase/decerase return message
+ * donepages: increase/decrease page number the function does
+ *                     always initial to 0
+ * state:  bp_state that return to balloon_process()
+ */
+struct bp_rt {
+       unsigned long donepages;
+       enum bp_state state;
+};
+#define DECLARE_BP_RT(bp_rt)                   \
+               struct bp_rt bp_rt = {          \
+               .donepages = 0,                 \
+               .state = BP_DONE                \
+       }
 
 
 static DEFINE_MUTEX(balloon_mutex);
 
+/*todo: should this balloon_stats change to balloon_stats[MAX_BALLOONNODES]?*/
 struct balloon_stats balloon_stats;
 EXPORT_SYMBOL_GPL(balloon_stats);
 
@@ -92,7 +110,13 @@ EXPORT_SYMBOL_GPL(balloon_stats);
 static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
 /* List of ballooned pages, threaded through the mem_map array. */
-static LIST_HEAD(ballooned_pages);
+/*
+ * this array is index by vnid,
+ * because we need to use alloc_pages_node(xxx)
+ */
+static struct list_head ballooned_pages[MAX_BALLOONNODES];
+/*ballooned_pages_cnt is for debug only*/
+long long ballooned_pages_cnt[MAX_BALLOONNODES];
 
 /* Main work function, always executed in process context. */
 static void balloon_process(struct work_struct *work);
@@ -110,17 +134,87 @@ static void scrub_page(struct page *page)
 #endif
 }
 
+void ballooned_pages_init(void)
+{
+       int i;
+       for (i = 0; i < MAX_BALLOONNODES; i++) {
+               INIT_LIST_HEAD(&ballooned_pages[i]);
+               ballooned_pages_cnt[i] = 0;
+       }
+}
+EXPORT_SYMBOL_GPL(ballooned_pages_init);
+
+/*
+ * XXX:
+ * The four function:
+ *   unsigned long long xen_pnid_to_vnidmask(int pnid)
+ *   int xen_vnid_to_pnid(int vnid)
+ *   int balloon_page_to_vnid(struct page *page)
+ *   struct page *xen_alloc_pages_node(int vnid)
+ * looks strange here, because they are waiting for guest numa topology's
+ * interface and for debuging.
+ */
+/*
+ * XXX: this function returns the vnid mask of pnid
+ * for example: if pnid -> vnid[1], vnid[2]
+ * it should return 2|4 = 6
+ * now it looks like this because of interface waiting
+ */
+unsigned long long xen_pnid_to_vnidmask(int pnid)
+{
+       /*todo:*/
+       unsigned long long rc = 1;
+       return rc<<pnid;
+}
+
+/*
+ * XXX: this function should actually be
+ *   xen_vnid_to_pnidmask(int vnid)
+ *  return the mask of pnid
+ *  nit it's here because of interface waiting and for debug convinent
+ */
+int xen_vnid_to_pnid(int vnid)
+{
+       /*todo:*/
+       return vnid % MAX_BALLOONNODES;
+}
+
+/*
+ * XXX: this function convert page to virtual nodeid
+ * should return page_to_nid(page);
+ * it return the strange value below now for debug before it get the interface.
+ */
+int balloon_page_to_vnid(struct page *page)
+{
+       /*todo:for debug here. should be
+       return page_to_nid(page);*/
+       return ((unsigned long long)page & (1<<13)) ? 0 : 1;
+}
+
+/*
+ * XXX: this function allocate a free page from guest OS's v-node[vnid]
+ * now return some weird value because of interface waiting and for debug
+ */
+struct page *xen_alloc_pages_node(int vnid)
+{
+       /*todo: vnid = 0 for debug:*/
+       vnid = 0;
+       return alloc_pages_node(vnid, GFP_BALLOON, balloon_order);
+}
+
 /* balloon_append: add the given page to the balloon. */
 static void __balloon_append(struct page *page)
 {
+       int vnid = balloon_page_to_vnid(page);
        /* Lowmem is re-populated first, so highmem pages go at list tail. */
        if (PageHighMem(page)) {
-               list_add_tail(&page->lru, &ballooned_pages);
+               list_add_tail(&page->lru, &ballooned_pages[vnid]);
                balloon_stats.balloon_high++;
        } else {
-               list_add(&page->lru, &ballooned_pages);
+               list_add(&page->lru, &ballooned_pages[vnid]);
                balloon_stats.balloon_low++;
        }
+       ballooned_pages_cnt[vnid]++;
 }
 
 static void balloon_append(struct page *page)
@@ -129,19 +223,20 @@ static void balloon_append(struct page *page)
        adjust_managed_page_count(page, -1);
 }
 
-/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
-static struct page *balloon_retrieve(bool prefer_highmem)
+/* balloon_retrieve_node: rescue a page from virtual node vnid */
+static struct page *balloon_retrieve_node(int vnid, bool prefer_highmem)
 {
        struct page *page;
 
-       if (list_empty(&ballooned_pages))
+       if (list_empty(&(ballooned_pages[vnid])))
                return NULL;
 
        if (prefer_highmem)
-               page = list_entry(ballooned_pages.prev, struct page, lru);
+               page = list_entry(ballooned_pages[vnid].prev, struct page, lru);
        else
-               page = list_entry(ballooned_pages.next, struct page, lru);
+               page = list_entry(ballooned_pages[vnid].next, struct page, lru);
        list_del(&page->lru);
+       ballooned_pages_cnt[vnid]--;
 
        if (PageHighMem(page))
                balloon_stats.balloon_high--;
@@ -153,17 +248,27 @@ static struct page *balloon_retrieve(bool prefer_highmem)
        return page;
 }
 
-static struct page *balloon_first_page(void)
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static struct page *balloon_retrieve(bool prefer_highmem)
+{
+       int i;
+       struct page *page = NULL;
+       for (i = 0; i < MAX_BALLOONNODES && !page; i++)
+               page = balloon_retrieve_node(i, prefer_highmem);
+       return page;
+}
+
+static struct page *balloon_first_page(int vnid)
 {
-       if (list_empty(&ballooned_pages))
+       if (list_empty(&ballooned_pages[vnid]))
                return NULL;
-       return list_entry(ballooned_pages.next, struct page, lru);
+       return list_entry(ballooned_pages[vnid].next, struct page, lru);
 }
 
-static struct page *balloon_next_page(struct page *page)
+static struct page *balloon_next_page(int vnid, struct page *page)
 {
        struct list_head *next = page->lru.next;
-       if (next == &ballooned_pages)
+       if (next == &ballooned_pages[vnid])
                return NULL;
        return list_entry(next, struct page, lru);
 }
@@ -230,7 +335,8 @@ static enum bp_state reserve_additional_memory(long credit)
        balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
        nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
 
-       rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << 
PAGE_SHIFT);
+       rc = add_memory(nid, hotplug_start_paddr,
+                       balloon_hotplug << PAGE_SHIFT);
 
        if (rc) {
                pr_info("%s: add_memory() failed: %i\n", __func__, rc);
@@ -261,7 +367,8 @@ static void xen_online_page(struct page *page)
        mutex_unlock(&balloon_mutex);
 }
 
-static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, 
void *v)
+static int xen_memory_notifier(struct notifier_block *nb,
+                               unsigned long val, void *v)
 {
        if (val == MEM_ONLINE)
                schedule_delayed_work(&balloon_worker, 0);
@@ -301,52 +408,61 @@ static enum bp_state reserve_additional_memory(long 
credit)
 }
 #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
 
-static enum bp_state increase_reservation(unsigned long nr_pages)
+static struct bp_rt __increase_reservation_nodeonly(int vnid,
+                                                    unsigned long nr_pages)
 {
-       int rc;
+       long rc;
        unsigned long  pfn, i;
        struct page   *page;
+       int pnid = xen_vnid_to_pnid(vnid);
        struct xen_memory_reservation reservation = {
-               .address_bits = 0,
+               .address_bits = MEMF_node(pnid) | MEMF_exact_node,
                .extent_order = 0,
-               .domid        = DOMID_SELF
+               .domid        = DOMID_SELF
        };
+       DECLARE_BP_RT(bp_rt);
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
        if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
                nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
                balloon_stats.hotplug_pages += nr_pages;
                balloon_stats.balloon_hotplug -= nr_pages;
-               return BP_DONE;
+               bp_rt.donepages = nr_pages;
+               return bp_rt;
        }
 #endif
 
        if (nr_pages > ARRAY_SIZE(frame_list))
                nr_pages = ARRAY_SIZE(frame_list);
 
-       page = balloon_first_page();
+       page = balloon_first_page(vnid);
        for (i = 0; i < nr_pages; i++) {
                if (!page) {
                        nr_pages = i;
                        break;
                }
                frame_list[i] = page_to_pfn(page);
-               page = balloon_next_page(page);
+               page = balloon_next_page(vnid, page);
        }
 
+       if (nr_pages == 0)
+               return bp_rt;
+
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents = nr_pages;
        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
-       if (rc <= 0)
-               return BP_EAGAIN;
+       if (rc <= 0) {
+               bp_rt.state = BP_EAGAIN;
+               return bp_rt;
+       }
 
        for (i = 0; i < rc; i++) {
-               page = balloon_retrieve(false);
+               page = balloon_retrieve_node(vnid, false);
                BUG_ON(page == NULL);
 
                pfn = page_to_pfn(page);
                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
-                      phys_to_machine_mapping_valid(pfn));
+                          phys_to_machine_mapping_valid(pfn));
 
                set_phys_to_machine(pfn, frame_list[i]);
 
@@ -368,19 +484,89 @@ static enum bp_state increase_reservation(unsigned long 
nr_pages)
 
        balloon_stats.current_pages += rc;
 
-       return BP_DONE;
+       bp_rt.donepages = rc;
+
+       return bp_rt;
 }
 
-static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
+/*
+ * notice that __increase_reservation_nodeonly is a batcher.
+ * it can only do with length(frame_list[]) pages at a time
+ * so run an loop, while still positive pages return (rc>0)
+ *  go on with another batcher
+ */
+static struct bp_rt increase_reservation_nodeonly(int vnid,
+                                                 unsigned long nr_pages)
 {
-       enum bp_state state = BP_DONE;
+       unsigned long ori_nr_pages = nr_pages;
+       DECLARE_BP_RT(bp_rt);
+       while (nr_pages > 0) {
+               bp_rt = __increase_reservation_nodeonly(vnid, nr_pages);
+               nr_pages -= bp_rt.donepages;
+               if (bp_rt.donepages == 0 || bp_rt.state != BP_DONE)
+                       break;
+       }
+       bp_rt.donepages = ori_nr_pages - nr_pages;
+       return bp_rt;
+}
+
+static struct bp_rt increase_reservation_nodemask(unsigned long long vnidmask,
+                                                 unsigned long nr_pages)
+{
+       int i;
+       int ori_nr_pages = nr_pages;
+       DECLARE_BP_RT(bp_rt);
+
+       if (vnidmask == 0)
+               return bp_rt;
+
+       for (i = 0; i < MAX_BALLOONNODES; i++) {
+               if (vnidmask & (1<<i)) {
+                       bp_rt = increase_reservation_nodeonly(i, nr_pages);
+                       nr_pages -= bp_rt.donepages;
+                       if (bp_rt.state != BP_DONE)
+                               break;
+               }
+       }
+       bp_rt.donepages = ori_nr_pages - nr_pages;
+       return bp_rt;
+}
+
+static struct bp_rt increase_reservation_numa(unsigned long long vnidmask,
+                                             bool nodeexact,
+                                             unsigned long nr_pages)
+{
+       int ori_nr_pages = nr_pages;
+       DECLARE_BP_RT(bp_rt);
+       bp_rt = increase_reservation_nodemask(vnidmask, nr_pages);
+       nr_pages -= bp_rt.donepages;
+       if (nodeexact == false) {
+               vnidmask = ((unsigned long long)1<<MAX_BALLOONNODES)-1;
+               bp_rt = increase_reservation_nodemask(vnidmask, nr_pages);
+               nr_pages -= bp_rt.donepages;
+       }
+       bp_rt.donepages  = ori_nr_pages - nr_pages;
+       return bp_rt;
+}
+/*
+static enum bp_state increase_reservation(unsigned long nr_pages) {
+       struct bp_rt bp_rt = increase_reservation_numa(0,false,nr_pages);
+       return bp_rt.state;
+}
+*/
+
+static struct bp_rt __decrease_reservation_nodeonly(int vnid,
+                                                   unsigned long nr_pages,
+                                                   gfp_t gfp)
+{
+       DECLARE_BP_RT(bp_rt);
        unsigned long  pfn, i;
        struct page   *page;
        int ret;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
                .extent_order = 0,
-               .domid        = DOMID_SELF
+               .domid        = DOMID_SELF
        };
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
@@ -388,7 +574,8 @@ static enum bp_state decrease_reservation(unsigned long 
nr_pages, gfp_t gfp)
                nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
                balloon_stats.hotplug_pages -= nr_pages;
                balloon_stats.balloon_hotplug += nr_pages;
-               return BP_DONE;
+               bp_rt.donepages = nr_pages;
+               return bp_rt;
        }
 #endif
 
@@ -396,10 +583,10 @@ static enum bp_state decrease_reservation(unsigned long 
nr_pages, gfp_t gfp)
                nr_pages = ARRAY_SIZE(frame_list);
 
        for (i = 0; i < nr_pages; i++) {
-               page = alloc_page(gfp);
+               page = xen_alloc_pages_node(vnid);
                if (page == NULL) {
                        nr_pages = i;
-                       state = BP_EAGAIN;
+                       bp_rt.state = BP_EAGAIN;
                        break;
                }
 
@@ -436,7 +623,73 @@ static enum bp_state decrease_reservation(unsigned long 
nr_pages, gfp_t gfp)
 
        balloon_stats.current_pages -= nr_pages;
 
-       return state;
+       bp_rt.donepages = nr_pages;
+       return bp_rt;
+}
+
+/*
+ * the same reason to increase_reservaton_readonly
+ * run a loop for another batcher if rc > 0
+ */
+static struct bp_rt decrease_reservation_nodeonly(int vnid,
+                                                 unsigned long nr_pages,
+                                                 gfp_t gfp)
+{
+       int ori_nr_pages = nr_pages;
+       DECLARE_BP_RT(bp_rt);
+       while (nr_pages > 0) {
+               bp_rt = __decrease_reservation_nodeonly(vnid, nr_pages, gfp);
+               nr_pages -= bp_rt.donepages;
+               if (bp_rt.donepages == 0 || bp_rt.state != BP_DONE)
+                       break;
+       }
+       bp_rt.donepages = ori_nr_pages - nr_pages;
+       return bp_rt;
+}
+static struct bp_rt decrease_reservation_nodemask(unsigned long long vnidmask,
+                                                 unsigned long nr_pages,
+                                                 gfp_t gfp)
+{
+       int i;
+       int ori_nr_pages = nr_pages;
+       DECLARE_BP_RT(bp_rt);
+
+       if (vnidmask == 0)
+               return bp_rt;
+
+       for (i = 0; i < MAX_BALLOONNODES; i++) {
+               if (vnidmask & (1<<i)) {
+                       bp_rt = decrease_reservation_nodeonly(i, nr_pages, gfp);
+                       nr_pages -= bp_rt.donepages;
+                       if (bp_rt.state != BP_DONE)
+                               break;
+               }
+       }
+       bp_rt.donepages = ori_nr_pages - nr_pages;
+       return bp_rt;
+}
+
+static struct bp_rt decrease_reservation_numa(unsigned long long vnidmask,
+                                             bool nodeexact,
+                                             unsigned long nr_pages, gfp_t gfp)
+{
+       unsigned long ori_nr_pages = nr_pages;
+       DECLARE_BP_RT(bp_rt);
+       bp_rt = decrease_reservation_nodemask(vnidmask, nr_pages, gfp);
+       nr_pages -= bp_rt.donepages;
+       if (nodeexact == false) {
+               vnidmask = ((unsigned long long)1<<MAX_BALLOONNODES)-1;
+               bp_rt = decrease_reservation_nodemask(vnidmask, nr_pages, gfp);
+               nr_pages -= bp_rt.donepages;
+       }
+       bp_rt.donepages = ori_nr_pages - nr_pages;
+       return bp_rt;
+}
+
+static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
+{
+       struct bp_rt bp_rt = decrease_reservation_numa(0, false, nr_pages, gfp);
+       return bp_rt.state;
 }
 
 /*
@@ -449,6 +702,11 @@ static void balloon_process(struct work_struct *work)
 {
        enum bp_state state = BP_DONE;
        long credit;
+       int pnid = balloon_stats.numa_pnid;
+       bool nodeexact = balloon_stats.numa_nodeexact;
+       int counter = 0;
+       int i;
+       unsigned long long vnidmask = xen_pnid_to_vnidmask(pnid);
 
        mutex_lock(&balloon_mutex);
 
@@ -457,13 +715,16 @@ static void balloon_process(struct work_struct *work)
 
                if (credit > 0) {
                        if (balloon_is_inflated())
-                               state = increase_reservation(credit);
+                               state = increase_reservation_numa(vnidmask,
+                                               nodeexact, credit).state;
                        else
                                state = reserve_additional_memory(credit);
                }
 
-               if (credit < 0)
-                       state = decrease_reservation(-credit, GFP_BALLOON);
+               if (credit < 0) {
+                       state = decrease_reservation_numa(vnidmask, nodeexact,
+                                               -credit, GFP_BALLOON).state;
+               }
 
                state = update_schedule(state);
 
@@ -471,22 +732,36 @@ static void balloon_process(struct work_struct *work)
                if (need_resched())
                        schedule();
 #endif
+               counter++;
+               if (nodeexact && counter >= NUMA_BALLOON_RETRY_MAX)
+                       break;
+
        } while (credit && state == BP_DONE);
 
        /* Schedule more work if there is some still to be done. */
        if (state == BP_EAGAIN)
-               schedule_delayed_work(&balloon_worker, 
balloon_stats.schedule_delay * HZ);
+               schedule_delayed_work(&balloon_worker,
+                                      balloon_stats.schedule_delay * HZ);
 
        mutex_unlock(&balloon_mutex);
 }
 
-/* Resets the Xen limit, sets new target, and kicks off processing. */
-void balloon_set_new_target(unsigned long target)
+void balloon_set_new_target_numa(unsigned long target, int pnid, bool 
nodeexact)
 {
        /* No need for lock. Not read-modify-write updates. */
        balloon_stats.target_pages = target;
+       balloon_stats.numa_pnid = pnid;
+       balloon_stats.numa_nodeexact = nodeexact;
+
        schedule_delayed_work(&balloon_worker, 0);
 }
+EXPORT_SYMBOL_GPL(balloon_set_new_target_numa);
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+void balloon_set_new_target(unsigned long target)
+{
+       balloon_set_new_target_numa(target, -1, false);
+}
 EXPORT_SYMBOL_GPL(balloon_set_new_target);
 
 /**
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index e555845..831cc0f 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -56,6 +56,8 @@ static void watch_target(struct xenbus_watch *watch,
                         const char **vec, unsigned int len)
 {
        unsigned long long new_target;
+       int mnid;
+       int focus;
        int err;
 
        err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
@@ -63,11 +65,21 @@ static void watch_target(struct xenbus_watch *watch,
                /* This is ok (for domain0 at least) - so just return */
                return;
        }
+       err = xenbus_scanf(XBT_NIL, "memory", "target_nid", "%d %d",
+                               &mnid, &focus);
+       if (err != 2)
+               mnid = -1;
+       /* no numa node specify, set focus = false*/
+       if (mnid == -1) {
+               mnid = 0;
+               focus = false;
+       }
 
        /* The given memory/target value is in KiB, so it needs converting to
         * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
         */
-       balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+       balloon_set_new_target_numa(new_target >> (PAGE_SHIFT - 10),
+                                       mnid, focus);
 }
 static struct xenbus_watch target_watch = {
        .node = "memory/target",
@@ -99,6 +111,8 @@ static int __init balloon_init(void)
 
        pr_info("Initialising balloon driver\n");
 
+       ballooned_pages_init();
+
        register_balloon(&balloon_dev);
 
        register_xen_selfballooning(&balloon_dev);
@@ -111,8 +125,8 @@ subsys_initcall(balloon_init);
 
 static void balloon_exit(void)
 {
-    /* XXX - release balloon here */
-    return;
+       /* XXX - release balloon here */
+       return;
 }
 
 module_exit(balloon_exit);
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index cc2e1a7..06feb5f 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -3,11 +3,25 @@
  */
 
 #define RETRY_UNLIMITED        0
+#define NUMA_BALLOON_RETRY_MAX 20
+
+#define balloon_order 0
+/*todo: numa support
+xensource/xen/include/xen/mm.h
+#define MEMF_exact_node (1U<<4)
+*/
+/* below is for debug. (0U<<4) should be (1U<<4)*/
+#define MEMF_exact_node (0U<<4)
+#define MEMF_node(n)     ((((n)+1)&0xff)<<8)
+#define MAX_BALLOONNODES 2
 
 struct balloon_stats {
        /* We aim for 'current allocation' == 'target allocation'. */
        unsigned long current_pages;
        unsigned long target_pages;
+       /* numa support */
+       int numa_pnid;
+       bool numa_nodeexact;
        /* Number of pages in high- and low-memory balloons. */
        unsigned long balloon_low;
        unsigned long balloon_high;
@@ -23,6 +37,11 @@ struct balloon_stats {
 
 extern struct balloon_stats balloon_stats;
 
+void ballooned_pages_init(void);
+
+void balloon_set_new_target_numa(unsigned long target, int mnid,
+                                       bool nodeexact);
+
 void balloon_set_new_target(unsigned long target);
 
 int alloc_xenballooned_pages(int nr_pages, struct page **pages,
-- 
1.8.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.