[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [IA64] make xenLinux/ia64 privcmd mmap not to use dom0 memory



# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 953753661a3bf13d079823fc422b4c93c5a1c240
# Parent  f0f88d9c4c9ede9c37b15a46f790d81f7648518a
[IA64] make xenLinux/ia64 privcmd mmap not to use dom0 memory

xenLinux/ia64 privcmd mmap uses pseudo physical address space.
it used alloc_pages() to allocate the space.
It wastes dom0 memory and sometimes several hundreds megabytes is
allocated depending on domU memory size.
With this patch xenLinux/ia64 trys to find the region which can be
used safely and uses the reasion.

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
---
 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c |  265 +++++++++++++++++-------
 1 files changed, 197 insertions(+), 68 deletions(-)

diff -r f0f88d9c4c9e -r 953753661a3b 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Tue May 23 09:17:57 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Tue May 23 15:09:21 
2006 -0600
@@ -360,49 +360,172 @@ struct address_space xen_ia64_foreign_du
 
 ///////////////////////////////////////////////////////////////////////////
 // foreign mapping
+#include <linux/efi.h>
+#include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
+
+static unsigned long privcmd_resource_min = 0;
+// Xen/ia64 currently can handle pseudo physical address bits up to
+// (PAGE_SHIFT * 3)
+static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << 
(PAGE_SHIFT * 3)) - 1);
+static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
+
+static unsigned long
+md_end_addr(const efi_memory_desc_t *md)
+{
+       return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+}
+
+#define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE        (1024 * 1024 * 1024UL)
+static int
+xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
+{
+       return (start < end &&
+               (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
+}
+
+static int __init
+xen_ia64_privcmd_init(void)
+{
+       void *efi_map_start, *efi_map_end, *p;
+       u64 efi_desc_size;
+       efi_memory_desc_t *md;
+       unsigned long tmp_min;
+       unsigned long tmp_max;
+       unsigned long gap_size;
+       unsigned long prev_end;
+
+       if (!is_running_on_xen())
+               return -1;
+
+       efi_map_start = __va(ia64_boot_param->efi_memmap);
+       efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+       efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+       // at first check the used highest address
+       for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+               // nothing
+       }
+       md = p - efi_desc_size;
+       privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
+       if (xen_ia64_privcmd_check_size(privcmd_resource_min,
+                                       privcmd_resource_max)) {
+               goto out;
+       }
+
+       // the used highest address is too large. try to find the largest gap.
+       tmp_min = privcmd_resource_max;
+       tmp_max = 0;
+       gap_size = 0;
+       prev_end = 0;
+       for (p = efi_map_start;
+            p < efi_map_end - efi_desc_size;
+            p += efi_desc_size) {
+               unsigned long end;
+               efi_memory_desc_t* next;
+               unsigned long next_start;
+
+               md = p;
+               end = md_end_addr(md);
+               if (end > privcmd_resource_max) {
+                       break;
+               }
+               if (end < prev_end) {
+                       // work around. 
+                       // Xen may pass incompletely sorted memory
+                       // descriptors like
+                       // [x, x + length]
+                       // [x, x]
+                       // this order should be reversed.
+                       continue;
+               }
+               next = p + efi_desc_size;
+               next_start = next->phys_addr;
+               if (next_start > privcmd_resource_max) {
+                       next_start = privcmd_resource_max;
+               }
+               if (end < next_start && gap_size < (next_start - end)) {
+                       tmp_min = end;
+                       tmp_max = next_start;
+                       gap_size = tmp_max - tmp_min;
+               }
+               prev_end = end;
+       }
+
+       privcmd_resource_min = GRANULEROUNDUP(tmp_min);
+       if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
+               privcmd_resource_max = tmp_max;
+               goto out;
+       }
+
+       privcmd_resource_min = tmp_min;
+       privcmd_resource_max = tmp_max;
+       if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
+                                        privcmd_resource_max)) {
+               // Any large enough gap isn't found.
+               // go ahead anyway with the warning hoping that large region
+               // won't be requested.
+               printk(KERN_WARNING "xen privcmd: large enough region for 
privcmd mmap is not found.\n");
+       }
+
+out:
+       printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 
0x%lx] (%ldMB)\n",
+              privcmd_resource_min, privcmd_resource_max, 
+              (privcmd_resource_max - privcmd_resource_min) >> 20);
+       BUG_ON(privcmd_resource_min >= privcmd_resource_max);
+       return 0;
+}
+late_initcall(xen_ia64_privcmd_init);
 
 struct xen_ia64_privcmd_entry {
        atomic_t        map_count;
-       struct page*    page;
+#define INVALID_GPFN   (~0UL)
+       unsigned long   gpfn;
+};
+
+struct xen_ia64_privcmd_range {
+       atomic_t                        ref_count;
+       unsigned long                   pgoff; // in PAGE_SIZE
+       struct resource*                res;
+
+       unsigned long                   num_entries;
+       struct xen_ia64_privcmd_entry   entries[0];
+};
+
+struct xen_ia64_privcmd_vma {
+       struct xen_ia64_privcmd_range*  range;
+
+       unsigned long                   num_entries;
+       struct xen_ia64_privcmd_entry*  entries;
 };
 
 static void
 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
 {
        atomic_set(&entry->map_count, 0);
-       entry->page = NULL;
-}
-
-//TODO alloc_page() to allocate pseudo physical address space is 
-//     waste of memory.
-//     When vti domain is created, qemu maps all of vti domain pages which 
-//     reaches to several hundred megabytes at least.
-//     remove alloc_page().
+       entry->gpfn = INVALID_GPFN;
+}
+
 static int
 xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
                            unsigned long addr,
-                           struct xen_ia64_privcmd_entry* entry,
+                           struct xen_ia64_privcmd_range* privcmd_range,
+                           int i,
                            unsigned long mfn,
                            pgprot_t prot,
                            domid_t domid)
 {
        int error = 0;
-       struct page* page;
+       struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
        unsigned long gpfn;
 
        BUG_ON((addr & ~PAGE_MASK) != 0);
        BUG_ON(mfn == INVALID_MFN);
 
-       if (entry->page != NULL) {
+       if (entry->gpfn != INVALID_GPFN) {
                error = -EBUSY;
                goto out;
        }
-       page = alloc_page(GFP_KERNEL);
-       if (page == NULL) {
-               error = -ENOMEM;
-               goto out;
-       }
-       gpfn = page_to_pfn(page);
+       gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
 
        error = HYPERVISOR_add_physmap(gpfn, mfn, 0/* prot:XXX */,
                                       domid);
@@ -413,15 +536,13 @@ xen_ia64_privcmd_entry_mmap(struct vm_ar
        prot = vma->vm_page_prot;
        error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
        if (error != 0) {
-               (void)HYPERVISOR_zap_physmap(gpfn, 0);
-               error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
+               error = HYPERVISOR_zap_physmap(gpfn, 0);
                if (error) {
                        BUG();//XXX
                }
-               __free_page(page);
        } else {
                atomic_inc(&entry->map_count);
-               entry->page = page;
+               entry->gpfn = gpfn;
        }
 
 out:
@@ -429,30 +550,28 @@ out:
 }
 
 static void
-xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_entry* entry)
-{
-       struct page* page = entry->page;
-       unsigned long gpfn = page_to_pfn(page);
+xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
+                             int i)
+{
+       struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
+       unsigned long gpfn = entry->gpfn;
+       //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
+       //      (vma->vm_pgoff - privcmd_range->pgoff);
        int error;
 
        error = HYPERVISOR_zap_physmap(gpfn, 0);
        if (error) {
                BUG();//XXX
        }
-
-       error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
-       if (error) {
-               BUG();//XXX
-       }
-
-       entry->page = NULL;
-       __free_page(page);
+       entry->gpfn = INVALID_GPFN;
 }
 
 static int
-xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_entry* entry)
-{
-       if (entry->page != NULL) {
+xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
+                           int i)
+{
+       struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
+       if (entry->gpfn != INVALID_GPFN) {
                atomic_inc(&entry->map_count);
        } else {
                BUG_ON(atomic_read(&entry->map_count) != 0);
@@ -460,27 +579,15 @@ xen_ia64_privcmd_entry_open(struct xen_i
 }
 
 static int
-xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_entry* entry)
-{
-       if (entry->page != NULL && atomic_dec_and_test(&entry->map_count)) {
-               xen_ia64_privcmd_entry_munmap(entry);
-       }
-}
-
-struct xen_ia64_privcmd_range {
-       atomic_t                        ref_count;
-       unsigned long                   pgoff; // in PAGE_SIZE
-
-       unsigned long                   num_entries;
-       struct xen_ia64_privcmd_entry   entries[0];
-};
-
-struct xen_ia64_privcmd_vma {
-       struct xen_ia64_privcmd_range*  range;
-
-       unsigned long                   num_entries;
-       struct xen_ia64_privcmd_entry*  entries;
-};
+xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
+                            int i)
+{
+       struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
+       if (entry->gpfn != INVALID_GPFN &&
+           atomic_dec_and_test(&entry->map_count)) {
+               xen_ia64_privcmd_entry_munmap(privcmd_range, i);
+       }
+}
 
 static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
 static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
@@ -507,7 +614,7 @@ __xen_ia64_privcmd_vma_open(struct vm_ar
        privcmd_vma->entries = &privcmd_range->entries[entry_offset];
        vma->vm_private_data = privcmd_vma;
        for (i = 0; i < privcmd_vma->num_entries; i++) {
-               xen_ia64_privcmd_entry_open(&privcmd_vma->entries[i]);
+               xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
        }
 
        vma->vm_private_data = privcmd_vma;
@@ -533,10 +640,11 @@ xen_ia64_privcmd_vma_close(struct vm_are
        struct xen_ia64_privcmd_vma* privcmd_vma =
                (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
        struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+       unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
        unsigned long i;
 
        for (i = 0; i < privcmd_vma->num_entries; i++) {
-               xen_ia64_privcmd_entry_close(&privcmd_vma->entries[i]);
+               xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
        }
        vma->vm_private_data = NULL;
        kfree(privcmd_vma);
@@ -547,9 +655,11 @@ xen_ia64_privcmd_vma_close(struct vm_are
                        struct xen_ia64_privcmd_entry* entry =
                                &privcmd_range->entries[i];
                        BUG_ON(atomic_read(&entry->map_count) != 0);
-                       BUG_ON(entry->page != NULL);
+                       BUG_ON(entry->gpfn != INVALID_GPFN);
                }
 #endif
+               release_resource(privcmd_range->res);
+               kfree(privcmd_range->res);
                vfree(privcmd_range);
        }
 }
@@ -557,13 +667,18 @@ int
 int
 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
 {
-       unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-       struct xen_ia64_privcmd_range* privcmd_range;
-       struct xen_ia64_privcmd_vma* privcmd_vma;
+       int error;
+       unsigned long size = vma->vm_end - vma->vm_start;
+       unsigned long num_entries = size >> PAGE_SHIFT;
+       struct xen_ia64_privcmd_range* privcmd_range = NULL;
+       struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
+       struct resource* res = NULL;
        unsigned long i;
        BUG_ON(!running_on_xen);
 
        BUG_ON(file->private_data != NULL);
+
+       error = -ENOMEM;
        privcmd_range =
                vmalloc(sizeof(*privcmd_range) +
                        sizeof(privcmd_range->entries[0]) * num_entries);
@@ -574,6 +689,18 @@ privcmd_mmap(struct file * file, struct 
        if (privcmd_vma == NULL) {
                goto out_enomem1;
        }
+       res = kzalloc(sizeof(*res), GFP_KERNEL);
+       if (res == NULL) {
+               goto out_enomem1;
+       }
+       res->name = "Xen privcmd mmap";
+       error = allocate_resource(&iomem_resource, res, size,
+                                 privcmd_resource_min, privcmd_resource_max,
+                                 privcmd_resource_align, NULL, NULL);
+       if (error) {
+               goto out_enomem1;
+       }
+       privcmd_range->res = res;
 
        /* DONTCOPY is essential for Xen as copy_page_range is broken. */
        vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
@@ -589,10 +716,11 @@ privcmd_mmap(struct file * file, struct 
        return 0;
 
 out_enomem1:
+       kfree(res);
        kfree(privcmd_vma);
 out_enomem0:
        vfree(privcmd_range);
-       return -ENOMEM;
+       return error;
 }
 
 int
@@ -605,6 +733,9 @@ direct_remap_pfn_range(struct vm_area_st
 {
        struct xen_ia64_privcmd_vma* privcmd_vma =
                (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
+       struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+       unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
+
        unsigned long i;
        unsigned long offset;
        int error = 0;
@@ -618,9 +749,7 @@ direct_remap_pfn_range(struct vm_area_st
 
        i = (address - vma->vm_start) >> PAGE_SHIFT;
        for (offset = 0; offset < size; offset += PAGE_SIZE) {
-               struct xen_ia64_privcmd_entry* entry =
-                       &privcmd_vma->entries[i];
-               error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & 
PAGE_MASK, entry, mfn, prot, domid);
+               error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & 
PAGE_MASK, privcmd_range, entry_offset + i, mfn, prot, domid);
                if (error != 0) {
                        break;
                }

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.