[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH xen.git] Add hugepage support to balloon driver



This patch adds hugepage support to the balloon driver.  It is activated
by specifying "balloon_hugepages" on the kernel command line.  Once activated,
the balloon driver will work entirely in hugepage sized chunks.

If, when returning pages, it finds a hugepage that is not contiguous
at the machine level, it will return each underlying page separately.
When this page is later repopulated it will be contiguous.

Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>

--------

 balloon.c |  171 +++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 125 insertions(+), 46 deletions(-)

--- 2.6-xen/drivers/xen/balloon.c       2009-10-29 17:48:30.000000000 -0500
+++ 2.6-xen-balloon/drivers/xen/balloon.c       2009-10-29 19:14:33.000000000 
-0500
@@ -59,7 +59,7 @@
 #include <xen/features.h>
 #include <xen/page.h>
 
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10))
 
 #define BALLOON_CLASS_NAME "xen_memory"
 
@@ -85,6 +85,14 @@ static int register_balloon(struct sys_d
 
 static struct balloon_stats balloon_stats;
 
+/*
+ * Work in pages of this order.  Can be either 0 for normal pages
+ * or 9 for hugepages.
+ */
+static int balloon_order;
+static unsigned long balloon_npages;
+static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
@@ -113,10 +121,41 @@ static struct timer_list balloon_timer;
 static void scrub_page(struct page *page)
 {
 #ifdef CONFIG_XEN_SCRUB_PAGES
-       clear_highpage(page);
+       int i;
+
+       for (i = 0; i < balloon_npages; i++)
+               clear_highpage(page++);
 #endif
 }
 
+static void free_discontig_frame(void)
+{
+       int rc;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .domid        = DOMID_SELF,
+               .nr_extents   = balloon_npages,
+               .extent_order = 0
+       };
+
+       set_xen_guest_handle(reservation.extent_start, discontig_frame_list);
+       rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(rc != balloon_npages);
+}
+
+static unsigned long shrink_frame(unsigned long nr_pages)
+{
+       unsigned long i, j;
+
+       for (i = 0, j = 0; i < nr_pages; i++, j++) {
+               if (frame_list[i] == 0)
+                       j++;
+               if (i != j)
+                       frame_list[i] = frame_list[j];
+       }
+       return i;
+}
+
 /* balloon_append: add the given page to the balloon. */
 static void balloon_append(struct page *page)
 {
@@ -190,12 +229,11 @@ static unsigned long current_target(void
 
 static int increase_reservation(unsigned long nr_pages)
 {
-       unsigned long  pfn, i, flags;
+       unsigned long  pfn, mfn, i, j, flags;
        struct page   *page;
        long           rc;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
-               .extent_order = 0,
                .domid        = DOMID_SELF
        };
 
@@ -207,12 +245,14 @@ static int increase_reservation(unsigned
        page = balloon_first_page();
        for (i = 0; i < nr_pages; i++) {
                BUG_ON(page == NULL);
-               frame_list[i] = page_to_pfn(page);;
+               frame_list[i] = page_to_pfn(page);
                page = balloon_next_page(page);
        }
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents = nr_pages;
+       reservation.extent_order = balloon_order;
+
        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
        if (rc < 0)
                goto out;
@@ -222,19 +262,22 @@ static int increase_reservation(unsigned
                BUG_ON(page == NULL);
 
                pfn = page_to_pfn(page);
+               mfn = frame_list[i];
                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
                       phys_to_machine_mapping_valid(pfn));
 
-               set_phys_to_machine(pfn, frame_list[i]);
+               for (j = 0; j < balloon_npages; j++, pfn++, mfn++) {
+                       set_phys_to_machine(pfn, mfn);
 
-               /* Link back into the page tables if not highmem. */
-               if (pfn < max_low_pfn) {
-                       int ret;
-                       ret = HYPERVISOR_update_va_mapping(
-                               (unsigned long)__va(pfn << PAGE_SHIFT),
-                               mfn_pte(frame_list[i], PAGE_KERNEL),
-                               0);
-                       BUG_ON(ret);
+                       /* Link back into the page tables if not highmem. */
+                       if (pfn < max_low_pfn) {
+                               int ret;
+                               ret = HYPERVISOR_update_va_mapping(
+                                       (unsigned long)__va(pfn << PAGE_SHIFT),
+                                       mfn_pte(mfn, PAGE_KERNEL),
+                                       0);
+                               BUG_ON(ret);
+                       }
                }
 
                /* Relinquish the page back to the allocator. */
@@ -253,13 +296,13 @@ static int increase_reservation(unsigned
 
 static int decrease_reservation(unsigned long nr_pages)
 {
-       unsigned long  pfn, i, flags;
+       unsigned long  pfn, lpfn, mfn, i, j, flags;
        struct page   *page;
        int            need_sleep = 0;
-       int ret;
+       int             discontig, discontig_free;
+       int             ret;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
-               .extent_order = 0,
                .domid        = DOMID_SELF
        };
 
@@ -267,7 +310,7 @@ static int decrease_reservation(unsigned
                nr_pages = ARRAY_SIZE(frame_list);
 
        for (i = 0; i < nr_pages; i++) {
-               if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+               if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) {
                        nr_pages = i;
                        need_sleep = 1;
                        break;
@@ -277,14 +320,6 @@ static int decrease_reservation(unsigned
                frame_list[i] = pfn_to_mfn(pfn);
 
                scrub_page(page);
-
-               if (!PageHighMem(page)) {
-                       ret = HYPERVISOR_update_va_mapping(
-                               (unsigned long)__va(pfn << PAGE_SHIFT),
-                               __pte_ma(0), 0);
-                       BUG_ON(ret);
-                }
-
        }
 
        /* Ensure that ballooned highmem pages don't have kmaps. */
@@ -295,18 +330,39 @@ static int decrease_reservation(unsigned
 
        /* No more mappings: invalidate P2M and add to balloon. */
        for (i = 0; i < nr_pages; i++) {
-               pfn = mfn_to_pfn(frame_list[i]);
-               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+               mfn = frame_list[i];
+               lpfn = pfn = mfn_to_pfn(mfn);
                balloon_append(pfn_to_page(pfn));
+               discontig_free = 0;
+               for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) {
+                       if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn)
+                               discontig_free = 1;
+
+                       set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
+                       if (!PageHighMem(page)) {
+                               ret = HYPERVISOR_update_va_mapping(
+                                       (unsigned long)__va(lpfn << PAGE_SHIFT),
+                                       __pte_ma(0), 0);
+                               BUG_ON(ret);
+                       }
+               }
+               if (discontig_free) {
+                       free_discontig_frame();
+                       frame_list[i] = 0;
+                       discontig = 1;
+               }
        }
+       balloon_stats.current_pages -= nr_pages;
+
+       if (discontig)
+               nr_pages = shrink_frame(nr_pages);
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
+       reservation.extent_order = balloon_order;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
        BUG_ON(ret != nr_pages);
 
-       balloon_stats.current_pages -= nr_pages;
-
        spin_unlock_irqrestore(&xen_reservation_lock, flags);
 
        return need_sleep;
@@ -374,7 +430,7 @@ static void watch_target(struct xenbus_w
        /* The given memory/target value is in KiB, so it needs converting to
         * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
         */
-       balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+       balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) + 
balloon_order));
 }
 
 static int balloon_init_watcher(struct notifier_block *notifier,
@@ -399,9 +455,12 @@ static int __init balloon_init(void)
        if (!xen_pv_domain())
                return -ENODEV;
 
-       pr_info("xen_balloon: Initialising balloon driver.\n");
+       pr_info("xen_balloon: Initialising balloon driver with page order 
%d.\n",
+               balloon_order);
+
+       balloon_npages = 1 << balloon_order;
 
-       balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+       balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) 
>> balloon_order;
        balloon_stats.target_pages  = balloon_stats.current_pages;
        balloon_stats.balloon_low   = 0;
        balloon_stats.balloon_high  = 0;
@@ -414,7 +473,7 @@ static int __init balloon_init(void)
        register_balloon(&balloon_sysdev);
 
        /* Initialise the balloon with excess memory space. */
-       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn += 
balloon_npages) {
                if (page_is_ram(pfn)) {
                        struct page *page = pfn_to_page(pfn);
                        if (!PageReserved(page))
@@ -464,16 +523,20 @@ static int dealloc_pte_fn(pte_t *pte, st
 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 {
        struct page *page, **pagevec;
-       int i, ret;
+       int npages;
+       int i, j, ret;
+
+       /* Round up to next number of balloon_order pages */
+       npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
 
-       pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+       pagevec = kmalloc(sizeof(page) * nr_pages << balloon_order, GFP_KERNEL);
        if (pagevec == NULL)
                return NULL;
 
        for (i = 0; i < nr_pages; i++) {
                void *v;
 
-               page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
+               page = alloc_pages(GFP_KERNEL|__GFP_COLD, balloon_order);
                if (page == NULL)
                        goto err;
 
@@ -484,8 +547,8 @@ struct page **alloc_empty_pages_and_page
                v = page_address(page);
 
                ret = apply_to_page_range(&init_mm, (unsigned long)v,
-                                         PAGE_SIZE, dealloc_pte_fn,
-                                         NULL);
+                                         PAGE_SIZE << balloon_order,
+                                         dealloc_pte_fn, NULL);
 
                if (ret != 0) {
                        mutex_unlock(&balloon_mutex);
@@ -493,8 +556,10 @@ struct page **alloc_empty_pages_and_page
                        __free_page(page);
                        goto err;
                }
+               for (j = 0; j < balloon_npages; j++)
+                       pagevec[(i<<balloon_order)+j] = page++;
 
-               totalram_pages = --balloon_stats.current_pages;
+               totalram_pages = balloon_stats.current_pages -= balloon_npages;
 
                mutex_unlock(&balloon_mutex);
        }
@@ -507,7 +572,7 @@ struct page **alloc_empty_pages_and_page
  err:
        mutex_lock(&balloon_mutex);
        while (--i >= 0)
-               balloon_append(pagevec[i]);
+               balloon_append(pagevec[i << balloon_order]);
        mutex_unlock(&balloon_mutex);
        kfree(pagevec);
        pagevec = NULL;
@@ -517,15 +582,21 @@ EXPORT_SYMBOL_GPL(alloc_empty_pages_and_
 
 void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
 {
+       struct page *page;
        int i;
+       int npages;
 
        if (pagevec == NULL)
                return;
 
+       /* Round up to next number of balloon_order pages */
+       npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
+
        mutex_lock(&balloon_mutex);
        for (i = 0; i < nr_pages; i++) {
-               BUG_ON(page_count(pagevec[i]) != 1);
-               balloon_append(pagevec[i]);
+               page = pagevec[i << balloon_order];
+               BUG_ON(page_count(page) != 1);
+               balloon_append(page);
        }
        mutex_unlock(&balloon_mutex);
 
@@ -535,6 +606,14 @@ void free_empty_pages_and_pagevec(struct
 }
 EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
 
+static int __init balloon_parse_huge(char *s)
+{
+       balloon_order = 9;
+       return 1;
+}
+
+__setup("balloon_hugepages", balloon_parse_huge);
+
 #define BALLOON_SHOW(name, format, args...)                            \
        static ssize_t show_##name(struct sys_device *dev,              \
                                   struct sysdev_attribute *attr,       \
@@ -568,7 +647,7 @@ static ssize_t store_target_kb(struct sy
 
        target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
 
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+       balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
        return count;
 }
@@ -582,7 +661,7 @@ static ssize_t show_target(struct sys_de
 {
        return sprintf(buf, "%llu\n",
                       (unsigned long long)balloon_stats.target_pages
-                      << PAGE_SHIFT);
+                      << (PAGE_SHIFT + balloon_order));
 }
 
 static ssize_t store_target(struct sys_device *dev,
@@ -598,7 +677,7 @@ static ssize_t store_target(struct sys_d
 
        target_bytes = memparse(buf, &endchar);
 
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+       balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
        return count;
 }

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.