[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Improve multi-processor XenLinux fork/exec/destroy times. We do this



ChangeSet 1.1391, 2005/04/27 15:42:32+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        Improve multi-processor XenLinux fork/exec/destroy times. We do this
        by lazily pinning page-tables for p.t. use, and aggressively unpinning
        them on last use, to put as little pressure on the batched wrpt
        interface as possible. Basically this means that the copy loop and
        destroy loop will usually be able to directly write pagetables with no
        Xen intervention at all (implicit or explicit).
        Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>



 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c             |    2 
 linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c                |    9 
 linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c             |  185 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h         |   22 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h |   26 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h     |   21 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h     |   12 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h    |    9 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h  |    9 
 linux-2.6.11-xen-sparse/mm/mmap.c                              | 2108 
++++++++++
 xen/arch/x86/mm.c                                              |   21 
 11 files changed, 2295 insertions(+), 129 deletions(-)


diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c        2005-04-27 
11:02:21 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c        2005-04-27 
11:02:21 -04:00
@@ -100,8 +100,8 @@
        struct mm_struct * old_mm;
        int retval = 0;
 
+       memset(&mm->context, 0, sizeof(mm->context));
        init_MUTEX(&mm->context.sem);
-       mm->context.size = 0;
        old_mm = current->mm;
        if (old_mm && old_mm->context.size > 0) {
                down(&old_mm->context.sem);
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c   2005-04-27 11:02:21 
-04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c   2005-04-27 11:02:21 
-04:00
@@ -710,18 +710,9 @@
 
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
-kmem_cache_t *pte_cache;
 
 void __init pgtable_cache_init(void)
 {
-       pte_cache = kmem_cache_create("pte",
-                               PTRS_PER_PTE*sizeof(pte_t),
-                               PTRS_PER_PTE*sizeof(pte_t),
-                               0,
-                               pte_ctor,
-                               pte_dtor);
-       if (!pte_cache)
-               panic("pgtable_cache_init(): Cannot create pte cache");
        if (PTRS_PER_PMD > 1) {
                pmd_cache = kmem_cache_create("pmd",
                                        PTRS_PER_PMD*sizeof(pmd_t),
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c        2005-04-27 
11:02:21 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c        2005-04-27 
11:02:21 -04:00
@@ -198,59 +198,35 @@
        return pte;
 }
 
-void pte_ctor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
-       struct page *page = virt_to_page(pte);
-       SetPageForeign(page, pte_free);
-       set_page_count(page, 1);
-
-       clear_page(pte);
-       make_page_readonly(pte);
-       xen_pte_pin(__pa(pte));
-}
-
-void pte_dtor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
-       struct page *page = virt_to_page(pte);
-       ClearPageForeign(page);
-
-       xen_pte_unpin(__pa(pte));
-       make_page_writable(pte);
-}
-
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-       pte_t *ptep;
-
-#ifdef CONFIG_HIGHPTE
        struct page *pte;
 
+#ifdef CONFIG_HIGHPTE
        pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-       if (pte == NULL)
-               return pte;
-       if (PageHighMem(pte))
-               return pte;
-       /* not a highmem page -- free page and grab one from the cache */
-       __free_page(pte);
+#else
+       pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+       if (pte) {
+               SetPageForeign(pte, pte_free);
+               set_page_count(pte, 1);
+       }
 #endif
-       ptep = kmem_cache_alloc(pte_cache, GFP_KERNEL);
-       if (ptep)
-               return virt_to_page(ptep);
-       return NULL;
+
+       return pte;
 }
 
 void pte_free(struct page *pte)
 {
+       unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
+
+       if (!pte_write(*virt_to_ptep(va)))
+               HYPERVISOR_update_va_mapping(
+                       va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0);
+
+       ClearPageForeign(pte);
        set_page_count(pte, 1);
-#ifdef CONFIG_HIGHPTE
-       if (!PageHighMem(pte))
-#endif
-               kmem_cache_free(pte_cache,
-                               phys_to_virt(page_to_pseudophys(pte)));
-#ifdef CONFIG_HIGHPTE
-       else
-               __free_page(pte);
-#endif
+
+       __free_page(pte);
 }
 
 void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
@@ -305,14 +281,11 @@
                        (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 
        if (PTRS_PER_PMD > 1)
-               goto out;
+               return;
 
        pgd_list_add(pgd);
        spin_unlock_irqrestore(&pgd_lock, flags);
        memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- out:
-       make_page_readonly(pgd);
-       xen_pgd_pin(__pa(pgd));
 }
 
 /* never called when PTRS_PER_PMD > 1 */
@@ -320,9 +293,6 @@
 {
        unsigned long flags; /* can be called from interrupt context */
 
-       xen_pgd_unpin(__pa(pgd));
-       make_page_writable(pgd);
-
        if (PTRS_PER_PMD > 1)
                return;
 
@@ -357,6 +327,15 @@
 void pgd_free(pgd_t *pgd)
 {
        int i;
+       pte_t *ptep = virt_to_ptep(pgd);
+
+       if (!pte_write(*ptep)) {
+               xen_pgd_unpin(__pa(pgd));
+               HYPERVISOR_update_va_mapping(
+                       (unsigned long)pgd,
+                       pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
+                       0);
+       }
 
        /* in the PAE case user pgd entries are overwritten before usage */
        if (PTRS_PER_PMD > 1)
@@ -369,28 +348,19 @@
 #ifndef CONFIG_XEN_SHADOW_MODE
 void make_lowmem_page_readonly(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_wrprotect(*pte));
 }
 
 void make_lowmem_page_writable(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_mkwrite(*pte));
 }
 
 void make_page_readonly(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_wrprotect(*pte));
        if ( (unsigned long)va >= (unsigned long)high_memory )
        {
@@ -405,10 +375,7 @@
 
 void make_page_writable(void *va)
 {
-       pgd_t *pgd = pgd_offset_k((unsigned long)va);
-       pud_t *pud = pud_offset(pgd, (unsigned long)va);
-       pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
-       pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+       pte_t *pte = virt_to_ptep(va);
        set_pte(pte, pte_mkwrite(*pte));
        if ( (unsigned long)va >= (unsigned long)high_memory )
        {
@@ -439,3 +406,91 @@
        }
 }
 #endif /* CONFIG_XEN_SHADOW_MODE */
+
+void mm_pin(struct mm_struct *mm)
+{
+    pgd_t       *pgd;
+    struct page *page;
+    int          i;
+
+    spin_lock(&mm->page_table_lock);
+
+    for ( i = 0, pgd = mm->pgd; i < USER_PTRS_PER_PGD; i++, pgd++ )
+    {
+        if ( *(unsigned long *)pgd == 0 )
+            continue;
+        page = pmd_page(*(pmd_t *)pgd);
+        if ( !PageHighMem(page) )
+            HYPERVISOR_update_va_mapping(
+                (unsigned long)__va(page_to_pfn(page)<<PAGE_SHIFT),
+                pfn_pte(page_to_pfn(page), PAGE_KERNEL_RO), 0);
+    }
+
+    HYPERVISOR_update_va_mapping(
+        (unsigned long)mm->pgd,
+        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO), 0);
+    xen_pgd_pin(__pa(mm->pgd));
+
+    mm->context.pinned = 1;
+
+    spin_unlock(&mm->page_table_lock);
+}
+
+void mm_unpin(struct mm_struct *mm)
+{
+    pgd_t       *pgd;
+    struct page *page;
+    int          i;
+
+    spin_lock(&mm->page_table_lock);
+
+    xen_pgd_unpin(__pa(mm->pgd));
+    HYPERVISOR_update_va_mapping(
+        (unsigned long)mm->pgd,
+        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0);
+
+    for ( i = 0, pgd = mm->pgd; i < USER_PTRS_PER_PGD; i++, pgd++ )

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.