[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] linux/i386: relax highpte pinning



Just like making lowmem page tables read-only as late as possible, defer the
pinning of highmem page tables.
The use of PG_arch_1 for both PG_foreign and PG_pinned is no longer possible
now, therefore the patch also splits off PG_foreign to use a separate bit.
At once, the patch fixes a bug uncovered by the original highpte patch, in that
using virt_to_page() or pte_offset_kernel() in the context of pte handling is
inappropriate.
The modifications to pmd_populate() include quite a bit of cleanup, resulting in
the new macro being only slightly larger than the old one (otherwise it would
have about doubled in size).
Finally (and I'm not insisting on this part, but I think it is appropriate), it 
adds
PG_foreign and PG_pinned to the set of flags checked for during page
allocation and freeing.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: sle10-sp1-2007-01-12/arch/i386/mm/highmem-xen.c
===================================================================
--- sle10-sp1-2007-01-12.orig/arch/i386/mm/highmem-xen.c        2007-01-15 
17:59:57.000000000 +0100
+++ sle10-sp1-2007-01-12/arch/i386/mm/highmem-xen.c     2007-01-16 
12:40:53.000000000 +0100
@@ -55,7 +55,9 @@ void *kmap_atomic(struct page *page, enu
 /* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
 void *kmap_atomic_pte(struct page *page, enum km_type type)
 {
-       return __kmap_atomic(page, type, PAGE_KERNEL_RO);
+       return __kmap_atomic(page, type,
+                            test_bit(PG_pinned, &page->flags)
+                            ? PAGE_KERNEL_RO : kmap_prot);
 }
 
 void kunmap_atomic(void *kvaddr, enum km_type type)
Index: sle10-sp1-2007-01-12/arch/i386/mm/pgtable-xen.c
===================================================================
--- sle10-sp1-2007-01-12.orig/arch/i386/mm/pgtable-xen.c        2007-01-15 
17:59:57.000000000 +0100
+++ sle10-sp1-2007-01-12/arch/i386/mm/pgtable-xen.c     2007-01-16 
12:59:12.000000000 +0100
@@ -26,7 +26,6 @@
 #include <asm/mmu_context.h>
 
 #include <xen/features.h>
-#include <xen/foreign_page.h>
 #include <asm/hypervisor.h>
 
 static void pgd_test_and_unpin(pgd_t *pgd);
@@ -238,14 +237,6 @@ struct page *pte_alloc_one(struct mm_str
 
 #ifdef CONFIG_HIGHPTE
        pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-       if (pte && PageHighMem(pte)) {
-               struct mmuext_op op;
-
-               kmap_flush_unused();
-               op.cmd = MMUEXT_PIN_L1_TABLE;
-               op.arg1.mfn = pfn_to_mfn(page_to_pfn(pte));
-               BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-       }
 #else
        pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
 #endif
@@ -266,7 +257,7 @@ void pte_free(struct page *pte)
                if (!pte_write(*virt_to_ptep(va)))
                        BUG_ON(HYPERVISOR_update_va_mapping(
                               va, pfn_pte(pfn, PAGE_KERNEL), 0));
-       } else {
+       } else if (test_and_clear_bit(PG_pinned, &pte->flags)) {
                struct mmuext_op op;
 
                op.cmd = MMUEXT_UNPIN_TABLE;
@@ -586,13 +577,30 @@ void make_pages_writable(void *va, unsig
        }
 }
 
-static inline void pgd_walk_set_prot(void *pt, pgprot_t flags)
+static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
 {
-       struct page *page = virt_to_page(pt);
        unsigned long pfn = page_to_pfn(page);
 
-       if (PageHighMem(page))
+       if (PageHighMem(page)) {
+               struct mmuext_op op;
+
+               if (pgprot_val(flags) & _PAGE_RW) {
+                       if (!test_and_clear_bit(PG_pinned, &page->flags))
+                               return;
+                       op.cmd = MMUEXT_UNPIN_TABLE;
+               }
+               else {
+                       if (test_and_set_bit(PG_pinned, &page->flags))
+                               return;
+                       /* Only L1 tables can be allocated from highmem. */
+                       op.cmd = MMUEXT_PIN_L1_TABLE;
+               }
+               op.arg1.mfn = pfn_to_mfn(pfn);
+               kmap_flush_unused();
+               BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
                return;
+       }
+
        BUG_ON(HYPERVISOR_update_va_mapping(
                (unsigned long)__va(pfn << PAGE_SHIFT),
                pfn_pte(pfn, flags), 0));
@@ -603,7 +611,6 @@ static void pgd_walk(pgd_t *pgd_base, pg
        pgd_t *pgd = pgd_base;
        pud_t *pud;
        pmd_t *pmd;
-       pte_t *pte;
        int    g, u, m;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -614,18 +621,17 @@ static void pgd_walk(pgd_t *pgd_base, pg
                        continue;
                pud = pud_offset(pgd, 0);
                if (PTRS_PER_PUD > 1) /* not folded */
-                       pgd_walk_set_prot(pud,flags);
+                       pgd_walk_set_prot(virt_to_page(pud),flags);
                for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
                        if (pud_none(*pud))
                                continue;
                        pmd = pmd_offset(pud, 0);
                        if (PTRS_PER_PMD > 1) /* not folded */
-                               pgd_walk_set_prot(pmd,flags);
+                               pgd_walk_set_prot(virt_to_page(pmd),flags);
                        for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
                                if (pmd_none(*pmd))
                                        continue;
-                               pte = pte_offset_kernel(pmd,0);
-                               pgd_walk_set_prot(pte,flags);
+                               pgd_walk_set_prot(pmd_page(*pmd),flags);
                        }
                }
        }
Index: sle10-sp1-2007-01-12/include/asm-i386/mach-xen/asm/page.h
===================================================================
--- sle10-sp1-2007-01-12.orig/include/asm-i386/mach-xen/asm/page.h      
2007-01-15 17:59:57.000000000 +0100
+++ sle10-sp1-2007-01-12/include/asm-i386/mach-xen/asm/page.h   2007-01-15 
12:58:57.000000000 +0100
@@ -29,13 +29,12 @@
 #include <asm/bug.h>
 #include <xen/interface/xen.h>
 #include <xen/features.h>
-#include <xen/foreign_page.h>
 
-#define arch_free_page(_page,_order)                   \
-({     int foreign = PageForeign(_page);               \
-       if (foreign)                                    \
-               (PageForeignDestructor(_page))(_page);  \
-       foreign;                                        \
+#define arch_free_page(_page,_order)           \
+({     int foreign = PageForeign(_page);       \
+       if (foreign)                            \
+               PageForeignDestructor(_page);   \
+       foreign;                                \
 })
 #define HAVE_ARCH_FREE_PAGE
 
Index: sle10-sp1-2007-01-12/include/asm-i386/mach-xen/asm/pgalloc.h
===================================================================
--- sle10-sp1-2007-01-12.orig/include/asm-i386/mach-xen/asm/pgalloc.h   
2007-01-12 09:35:08.000000000 +0100
+++ sle10-sp1-2007-01-12/include/asm-i386/mach-xen/asm/pgalloc.h        
2007-01-16 12:49:22.000000000 +0100
@@ -6,28 +6,31 @@
 #include <linux/threads.h>
 #include <linux/mm.h>          /* for struct page */
 #include <asm/io.h>            /* for phys_to_virt and page_to_pseudophys */
-
-/* Is this pagetable pinned? */
-#define PG_pinned      PG_arch_1
+#include <asm/hypervisor.h>
 
 #define pmd_populate_kernel(mm, pmd, pte) \
                set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
 
 #define pmd_populate(mm, pmd, pte)                                     \
 do {                                                                   \
+       unsigned long pfn = page_to_pfn(pte);                           \
        if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) {     \
                if (!PageHighMem(pte))                                  \
                        BUG_ON(HYPERVISOR_update_va_mapping(            \
-                         (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT),\
-                         pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));\
-               set_pmd(pmd, __pmd(_PAGE_TABLE +                        \
-                       ((unsigned long long)page_to_pfn(pte) <<        \
-                               (unsigned long long) PAGE_SHIFT)));     \
-       } else {                                                        \
-               *(pmd) = __pmd(_PAGE_TABLE +                            \
-                       ((unsigned long long)page_to_pfn(pte) <<        \
-                               (unsigned long long) PAGE_SHIFT));      \
-       }                                                               \
+                         (unsigned long)__va(pfn << PAGE_SHIFT),       \
+                         pfn_pte(pfn, PAGE_KERNEL_RO), 0));            \
+               else if (!test_and_set_bit(PG_pinned, &pte->flags)) {   \
+                       struct mmuext_op op;                            \
+                       kmap_flush_unused();                            \
+                       op.cmd = MMUEXT_PIN_L1_TABLE;                   \
+                       op.arg1.mfn = pfn_to_mfn(pfn);                  \
+                       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL,       \
+                                                   DOMID_SELF) < 0);   \
+               }                                                       \
+               set_pmd(pmd,                                            \
+                       __pmd(_PAGE_TABLE + ((paddr_t)pfn << PAGE_SHIFT))); \
+       } else                                                  \
+               *(pmd) = __pmd(_PAGE_TABLE + ((paddr_t)pfn << PAGE_SHIFT)); \
 } while (0)
 
 /*
Index: sle10-sp1-2007-01-12/include/asm-i386/mach-xen/asm/pgtable.h
===================================================================
--- sle10-sp1-2007-01-12.orig/include/asm-i386/mach-xen/asm/pgtable.h   
2007-01-12 09:32:30.000000000 +0100
+++ sle10-sp1-2007-01-12/include/asm-i386/mach-xen/asm/pgtable.h        
2007-01-16 12:40:17.000000000 +0100
@@ -26,6 +26,9 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 
+/* Is this pagetable pinned? */
+#define PG_pinned      PG_arch_1
+
 struct mm_struct;
 struct vm_area_struct;
 
Index: sle10-sp1-2007-01-12/include/asm-x86_64/mach-xen/asm/page.h
===================================================================
--- sle10-sp1-2007-01-12.orig/include/asm-x86_64/mach-xen/asm/page.h    
2007-01-15 17:59:57.000000000 +0100
+++ sle10-sp1-2007-01-12/include/asm-x86_64/mach-xen/asm/page.h 2007-01-15 
12:58:02.000000000 +0100
@@ -9,13 +9,12 @@
 #include <asm/bug.h>
 #endif
 #include <xen/interface/xen.h> 
-#include <xen/foreign_page.h>
 
-#define arch_free_page(_page,_order)                   \
-({     int foreign = PageForeign(_page);               \
-       if (foreign)                                    \
-               (PageForeignDestructor(_page))(_page);  \
-       foreign;                                        \
+#define arch_free_page(_page,_order)           \
+({     int foreign = PageForeign(_page);       \
+       if (foreign)                            \
+               PageForeignDestructor(_page);   \
+       foreign;                                \
 })
 #define HAVE_ARCH_FREE_PAGE
 
Index: sle10-sp1-2007-01-12/include/linux/page-flags.h
===================================================================
--- sle10-sp1-2007-01-12.orig/include/linux/page-flags.h        2007-01-15 
17:59:57.000000000 +0100
+++ sle10-sp1-2007-01-12/include/linux/page-flags.h     2007-01-15 
14:17:18.000000000 +0100
@@ -76,6 +76,8 @@
 #define PG_nosave_free         18      /* Free, should not be written */
 #define PG_uncached            19      /* Page has been mapped as uncached */
 
+#define PG_foreign             20      /* Page is owned by foreign allocator. 
*/
+
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
  * allowed.
@@ -344,6 +346,18 @@ extern void __mod_page_state_offset(unsi
 #define SetPageUncached(page)  set_bit(PG_uncached, &(page)->flags)
 #define ClearPageUncached(page)        clear_bit(PG_uncached, &(page)->flags)
 
+#define PageForeign(page)      test_bit(PG_foreign, &(page)->flags)
+#define SetPageForeign(page, dtor) do {                \
+       set_bit(PG_foreign, &(page)->flags);    \
+       (page)->mapping = (void *)dtor;         \
+} while (0)
+#define ClearPageForeign(page) do {            \
+       clear_bit(PG_foreign, &(page)->flags);  \
+       (page)->mapping = NULL;                 \
+} while (0)
+#define PageForeignDestructor(page)            \
+       ( (void (*) (struct page *)) (page)->mapping )(page)
+
 struct page;   /* forward declaration */
 
 int test_clear_page_dirty(struct page *page);
Index: sle10-sp1-2007-01-12/include/xen/foreign_page.h
===================================================================
--- sle10-sp1-2007-01-12.orig/include/xen/foreign_page.h        2007-01-15 
17:59:57.000000000 +0100
+++ /dev/null   1970-01-01 00:00:00.000000000 +0000
@@ -1,30 +0,0 @@
-/******************************************************************************
- * foreign_page.h
- * 
- * Provide a "foreign" page type, that is owned by a foreign allocator and 
- * not the normal buddy allocator in page_alloc.c
- * 
- * Copyright (c) 2004, K A Fraser
- */
-
-#ifndef __ASM_XEN_FOREIGN_PAGE_H__
-#define __ASM_XEN_FOREIGN_PAGE_H__
-
-#define PG_foreign             PG_arch_1
-
-#define PageForeign(page)      test_bit(PG_foreign, &(page)->flags)
-
-#define SetPageForeign(page, dtor) do {                \
-       set_bit(PG_foreign, &(page)->flags);    \
-       (page)->mapping = (void *)dtor;         \
-} while (0)
-
-#define ClearPageForeign(page) do {            \
-       clear_bit(PG_foreign, &(page)->flags);  \
-       (page)->mapping = NULL;                 \
-} while (0)
-
-#define PageForeignDestructor(page)    \
-       ( (void (*) (struct page *)) (page)->mapping )
-
-#endif /* __ASM_XEN_FOREIGN_PAGE_H__ */
Index: sle10-sp1-2007-01-12/mm/page_alloc.c
===================================================================
--- sle10-sp1-2007-01-12.orig/mm/page_alloc.c   2007-01-15 17:59:57.000000000 
+0100
+++ sle10-sp1-2007-01-12/mm/page_alloc.c        2007-01-16 12:41:24.000000000 
+0100
@@ -154,7 +154,11 @@ static void bad_page(struct page *page)
                        1 << PG_slab    |
                        1 << PG_swapcache |
                        1 << PG_writeback |
-                       1 << PG_buddy );
+                       1 << PG_buddy   |
+#ifdef CONFIG_X86_XEN
+                       1 << PG_pinned  |
+#endif
+                       1 << PG_foreign );
        set_page_count(page, 0);
        reset_page_mapcount(page);
        page->mapping = NULL;
@@ -368,7 +372,11 @@ static inline int free_pages_check(struc
                        1 << PG_swapcache |
                        1 << PG_writeback |
                        1 << PG_reserved |
-                       1 << PG_buddy ))))
+                       1 << PG_buddy   |
+#ifdef CONFIG_X86_XEN
+                       1 << PG_pinned  |
+#endif
+                       1 << PG_foreign ))))
                bad_page(page);
        if (PageDirty(page))
                __ClearPageDirty(page);
@@ -528,7 +536,11 @@ static int prep_new_page(struct page *pa
                        1 << PG_swapcache |
                        1 << PG_writeback |
                        1 << PG_reserved |
-                       1 << PG_buddy ))))
+                       1 << PG_buddy   |
+#ifdef CONFIG_X86_XEN
+                       1 << PG_pinned  |
+#endif
+                       1 << PG_foreign ))))
                bad_page(page);
 
        /*


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.