[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] linux/x86-64: sync page table allocation with i386 (was: Re: [PATCH] linux/i386: allow CONFIG_HIGHPTE on i386)



>>> Keir Fraser <keir@xxxxxxxxxxxxx> 10.01.07 12:28 >>>
>On 10/1/07 11:14, "Jan Beulich" <jbeulich@xxxxxxxxxx> wrote:
>
>>> Is it actually safe to circumvent tlb_remove_page()? It does rather more
>>> than just free_page()! That's why I used a ForeignPage hook when writing the
>>> i386 code. It's certainly *not* safe to assume that whoever wrote that part
>>> of the x86/64 port understood all the ramifications of what they were doing.
>> 
>> Not really, as I understand it - it either frees the page (when the TLB is in
>> fast
>> mode) or inserts the page into the gather list, bumping the contained pages
>> count and freeing all of them and flushing the hardware TLBs if exceeding the
>> threshold - the latter step is otherwise done from tlb_finish_mmu().
>
>Presumably this gathering happens because it may not be safe to free the
>pages without having first flushed the TLBs? Freeing the page with no TLB
>flush management at all would be a bad idea in that case.
>
>And I said that sync'ing with i386 is a good general rule. I trust the code
>a lot more. :-)

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: sle10-sp1-2007-01-10/arch/x86_64/mm/pageattr-xen.c
===================================================================
--- sle10-sp1-2007-01-10.orig/arch/x86_64/mm/pageattr-xen.c     2007-01-10 
13:44:37.000000000 +0100
+++ sle10-sp1-2007-01-10/arch/x86_64/mm/pageattr-xen.c  2007-01-10 
14:07:10.000000000 +0100
@@ -164,6 +164,18 @@ void _arch_exit_mmap(struct mm_struct *m
         mm_unpin(mm);
 }
 
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+       struct page *pte;
+
+       pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+       if (pte) {
+               SetPageForeign(pte, pte_free);
+               set_page_count(pte, 1);
+       }
+       return pte;
+}
+
 void pte_free(struct page *pte)
 {
        unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
@@ -171,6 +183,10 @@ void pte_free(struct page *pte)
        if (!pte_write(*virt_to_ptep(va)))
                BUG_ON(HYPERVISOR_update_va_mapping(
                        va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
+
+       ClearPageForeign(pte);
+       set_page_count(pte, 1);
+
        __free_page(pte);
 }
 #endif /* CONFIG_XEN */
Index: sle10-sp1-2007-01-10/include/asm-x86_64/mach-xen/asm/pgalloc.h
===================================================================
--- sle10-sp1-2007-01-10.orig/include/asm-x86_64/mach-xen/asm/pgalloc.h 
2007-01-10 10:09:53.000000000 +0100
+++ sle10-sp1-2007-01-10/include/asm-x86_64/mach-xen/asm/pgalloc.h      
2007-01-10 14:33:04.000000000 +0100
@@ -64,42 +64,35 @@ static inline void pgd_populate(struct m
        }
 }
 
-static inline void pmd_free(pmd_t *pmd)
+extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern void pte_free(struct page *pte);
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       pte_t *ptep = virt_to_ptep(pmd);
+       struct page *pg;
 
-       if (!pte_write(*ptep)) {
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       (unsigned long)pmd,
-                       pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL),
-                       0));
-       }
-       free_page((unsigned long)pmd);
+       pg = pte_alloc_one(mm, addr);
+       return pg ? page_address(pg) : NULL;
 }
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline void pmd_free(pmd_t *pmd)
 {
-        pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
-        return pmd;
+       BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+       pte_free(virt_to_page(pmd));
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-        pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
-        return pud;
+       struct page *pg;
+
+       pg = pte_alloc_one(mm, addr);
+       return pg ? page_address(pg) : NULL;
 }
 
 static inline void pud_free(pud_t *pud)
 {
-       pte_t *ptep = virt_to_ptep(pud);
-
-       if (!pte_write(*ptep)) {
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       (unsigned long)pud,
-                       pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL),
-                       0));
-       }
-       free_page((unsigned long)pud);
+       BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+       pte_free(virt_to_page(pud));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
@@ -167,14 +160,6 @@ static inline pte_t *pte_alloc_one_kerne
        return pte;
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long 
address)
-{
-       struct page *pte;
-
-       pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-       return pte;
-}
-
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
 
@@ -185,14 +170,8 @@ static inline void pte_free_kernel(pte_t
        free_page((unsigned long)pte); 
 }
 
-extern void pte_free(struct page *pte);
-
-//#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) 
-//#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
-//#define __pud_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
-
-#define __pte_free_tlb(tlb,x)   pte_free((x))
-#define __pmd_free_tlb(tlb,x)   pmd_free((x))
-#define __pud_free_tlb(tlb,x)   pud_free((x))
+#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+#define __pud_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
 
 #endif /* _X86_64_PGALLOC_H */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.