[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [linux-2.6.18-xen] xen/x86: synchronize vmalloc_sync_all() with mm_{, un}pin()



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1285142241 -3600
# Node ID 2146cf82c90c48ddc191728f3136a1db098b6c71
# Parent  f5635e334aa5e778a65f01339a63849eb17f6dc3
xen/x86: synchronize vmalloc_sync_all() with mm_{,un}pin()

As recently diagnosed by Citrix engineers, mm_{,un}pin() and
vmalloc_sync_all() aren't properly synchronized. So we add a backlink
to the referencing struct mm_struct to the pgd's struct page, and use
this to lock the page table updates in vmalloc_sync_all().

Due to the way pgd-s get allocated and put on the global list on i386,
we have to account for the backlink not to be set yet (in which case
they cannot be subject to (un)pinning.

Along the way, I found it necessary/desirable to also fix
- a potential NULL dereference in i386's pgd_alloc(),
- x86-64 adding not yet cleaned out pgd-s to the global list, and
- x86-64 removing pgd-s from the global list rather late.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 arch/i386/mm/fault-xen.c                  |   19 ++++++++++++++++---
 arch/i386/mm/pgtable-xen.c                |    9 ++++++++-
 arch/x86_64/mm/fault-xen.c                |    6 ++++++
 include/asm-x86_64/mach-xen/asm/pgalloc.h |   18 ++++++++++++------
 4 files changed, 42 insertions(+), 10 deletions(-)

diff -r f5635e334aa5 -r 2146cf82c90c arch/i386/mm/fault-xen.c
--- a/arch/i386/mm/fault-xen.c  Wed Sep 22 08:56:26 2010 +0100
+++ b/arch/i386/mm/fault-xen.c  Wed Sep 22 08:57:21 2010 +0100
@@ -762,12 +762,25 @@ void vmalloc_sync_all(void)
                                return;
                        }
                        for (page = pgd_list; page; page =
-                                       (struct page *)page->index)
-                               if (!vmalloc_sync_one(page_address(page),
-                                                               address)) {
+                                       (struct page *)page->index) {
+                               spinlock_t *lock = page->mapping
+                                       ? &((struct mm_struct *)page->mapping)
+                                               ->page_table_lock
+                                       : NULL;
+                               pmd_t *pmd;
+
+                               if (lock)
+                                       spin_lock(lock);
+                               pmd = vmalloc_sync_one(page_address(page),
+                                                      address);
+                               if (lock)
+                                       spin_unlock(lock);
+
+                               if (!pmd) {
                                        BUG_ON(page != pgd_list);
                                        break;
                                }
+                       }
                        spin_unlock_irqrestore(&pgd_lock, flags);
                        if (!page)
                                set_bit(sync_index(address), insync);
diff -r f5635e334aa5 -r 2146cf82c90c arch/i386/mm/pgtable-xen.c
--- a/arch/i386/mm/pgtable-xen.c        Wed Sep 22 08:56:26 2010 +0100
+++ b/arch/i386/mm/pgtable-xen.c        Wed Sep 22 08:57:21 2010 +0100
@@ -232,6 +232,7 @@ static inline void pgd_list_del(pgd_t *p
        *pprev = next;
        if (next)
                set_page_private(next, (unsigned long)pprev);
+       page->mapping = NULL;
 }
 
 void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
@@ -273,9 +274,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
        pmd_t **pmd;
        unsigned long flags;
 
+       if (!pgd)
+               return NULL;
+
        pgd_test_and_unpin(pgd);
 
-       if (PTRS_PER_PMD == 1 || !pgd)
+       /* Store a back link for vmalloc_sync_all(). */
+       virt_to_page(pgd)->mapping = (void *)mm;
+
+       if (PTRS_PER_PMD == 1)
                return pgd;
 
        if (HAVE_SHARED_KERNEL_PMD) {
diff -r f5635e334aa5 -r 2146cf82c90c arch/x86_64/mm/fault-xen.c
--- a/arch/x86_64/mm/fault-xen.c        Wed Sep 22 08:56:26 2010 +0100
+++ b/arch/x86_64/mm/fault-xen.c        Wed Sep 22 08:57:21 2010 +0100
@@ -677,6 +677,9 @@ DEFINE_SPINLOCK(pgd_lock);
 DEFINE_SPINLOCK(pgd_lock);
 struct page *pgd_list;
 
+#define pgd_page_table(what, pg) \
+       spin_##what(&((struct mm_struct *)(pg)->mapping)->page_table_lock)
+
 void vmalloc_sync_all(void)
 {
        /* Note that races in the updates of insync and start aren't 
@@ -699,10 +702,13 @@ void vmalloc_sync_all(void)
                             page = (struct page *)page->index) {
                                pgd_t *pgd;
                                pgd = (pgd_t *)page_address(page) + 
pgd_index(address);
+
+                               pgd_page_table(lock, page);
                                if (pgd_none(*pgd))
                                        set_pgd(pgd, *pgd_ref);
                                else
                                        BUG_ON(pgd_page(*pgd) != 
pgd_page(*pgd_ref));
+                               pgd_page_table(unlock, page);
                        }
                        spin_unlock(&pgd_lock);
                        set_bit(pgd_index(address), insync);
diff -r f5635e334aa5 -r 2146cf82c90c include/asm-x86_64/mach-xen/asm/pgalloc.h
--- a/include/asm-x86_64/mach-xen/asm/pgalloc.h Wed Sep 22 08:56:26 2010 +0100
+++ b/include/asm-x86_64/mach-xen/asm/pgalloc.h Wed Sep 22 08:57:21 2010 +0100
@@ -95,9 +95,12 @@ static inline void pud_free(pud_t *pud)
        pte_free(virt_to_page(pud));
 }
 
-static inline void pgd_list_add(pgd_t *pgd)
+static inline void pgd_list_add(pgd_t *pgd, void *mm)
 {
        struct page *page = virt_to_page(pgd);
+
+       /* Store a back link for vmalloc_sync_all(). */
+       page->mapping = mm;
 
        spin_lock(&pgd_lock);
        page->index = (pgoff_t)pgd_list;
@@ -119,6 +122,8 @@ static inline void pgd_list_del(pgd_t *p
        if (next)
                next->private = (unsigned long)pprev;
        spin_unlock(&pgd_lock);
+
+       page->mapping = NULL;
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
@@ -127,22 +132,22 @@ static inline pgd_t *pgd_alloc(struct mm
         * We allocate two contiguous pages for kernel and user.
         */
        unsigned boundary;
-       pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
+       pgd_t *pgd;
+
+       pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 1);
        if (!pgd)
                return NULL;
-       pgd_list_add(pgd);
+       pgd_list_add(pgd, mm);
        /*
         * Copy kernel pointers in from init.
         * Could keep a freelist or slab cache of those because the kernel
         * part never changes.
         */
        boundary = pgd_index(__PAGE_OFFSET);
-       memset(pgd, 0, boundary * sizeof(pgd_t));
        memcpy(pgd + boundary,
               init_level4_pgt + boundary,
               (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
 
-       memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
        /*
         * Set level3_user_pgt for vsyscall area
         */
@@ -154,6 +159,8 @@ static inline void pgd_free(pgd_t *pgd)
 static inline void pgd_free(pgd_t *pgd)
 {
        pte_t *ptep = virt_to_ptep(pgd);
+
+       pgd_list_del(pgd);
 
        if (!pte_write(*ptep)) {
                xen_pgd_unpin(__pa(pgd));
@@ -174,7 +181,6 @@ static inline void pgd_free(pgd_t *pgd)
                               0));
        }
 
-       pgd_list_del(pgd);
        free_pages((unsigned long)pgd, 1);
 }
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.