[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] linux: Various cleanup and locking clarification (and fixing!)



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1174058348 0
# Node ID 3a186e94f613371b4dfa98b8e0a8d5aaf0f0d681
# Parent  ba83d33c961b3e99a2d309b520162f71af56c972
linux: Various cleanup and locking clarification (and fixing!)
of mm pinning/unpinning logic.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c    |   14 ++-
 linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c |   98 ++++++++++++---------
 2 files changed, 68 insertions(+), 44 deletions(-)

diff -r ba83d33c961b -r 3a186e94f613 
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Fri Mar 16 15:18:33 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Fri Mar 16 15:19:08 
2007 +0000
@@ -256,8 +256,9 @@ void pte_free(struct page *pte)
                unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT);
 
                if (!pte_write(*virt_to_ptep(va)))
-                       BUG_ON(HYPERVISOR_update_va_mapping(
-                              va, pfn_pte(pfn, PAGE_KERNEL), 0));
+                       if (HYPERVISOR_update_va_mapping(
+                               va, pfn_pte(pfn, PAGE_KERNEL), 0))
+                               BUG();
        } else
                clear_bit(PG_pinned, &pte->flags);
 
@@ -672,14 +673,23 @@ void mm_pin_all(void)
 void mm_pin_all(void)
 {
        struct page *page;
+       unsigned long flags;
 
        if (xen_feature(XENFEAT_writable_page_tables))
                return;
 
+       /*
+        * Allow uninterrupted access to the pgd_list. Also protects
+        * __pgd_pin() by disabling preemption.
+        * All other CPUs must be at a safe point (e.g., in stop_machine
+        * or offlined entirely).
+        */
+       spin_lock_irqsave(&pgd_lock, flags);
        for (page = pgd_list; page; page = (struct page *)page->index) {
                if (!test_bit(PG_pinned, &page->flags))
                        __pgd_pin((pgd_t *)page_address(page));
        }
+       spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
 void _arch_dup_mmap(struct mm_struct *mm)
diff -r ba83d33c961b -r 3a186e94f613 
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Fri Mar 16 
15:18:33 2007 +0000
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Fri Mar 16 
15:19:08 2007 +0000
@@ -79,14 +79,17 @@ void mm_pin(struct mm_struct *mm)
        spin_lock(&mm->page_table_lock);
 
        mm_walk(mm, PAGE_KERNEL_RO);
-       BUG_ON(HYPERVISOR_update_va_mapping(
-                      (unsigned long)mm->pgd,
-                      pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, 
PAGE_KERNEL_RO),
-                      UVMF_TLB_FLUSH));
-       BUG_ON(HYPERVISOR_update_va_mapping(
-                      (unsigned long)__user_pgd(mm->pgd),
-                      pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, 
PAGE_KERNEL_RO),
-                      UVMF_TLB_FLUSH));
+       if (HYPERVISOR_update_va_mapping(
+               (unsigned long)mm->pgd,
+               pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO),
+               UVMF_TLB_FLUSH))
+               BUG();
+       if (HYPERVISOR_update_va_mapping(
+               (unsigned long)__user_pgd(mm->pgd),
+               pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT,
+                       PAGE_KERNEL_RO),
+               UVMF_TLB_FLUSH))
+               BUG();
        xen_pgd_pin(__pa(mm->pgd)); /* kernel */
        xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */
        mm->context.pinned = 1;
@@ -106,12 +109,15 @@ void mm_unpin(struct mm_struct *mm)
 
        xen_pgd_unpin(__pa(mm->pgd));
        xen_pgd_unpin(__pa(__user_pgd(mm->pgd)));
-       BUG_ON(HYPERVISOR_update_va_mapping(
-                      (unsigned long)mm->pgd,
-                      pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 
0));
-       BUG_ON(HYPERVISOR_update_va_mapping(
-                      (unsigned long)__user_pgd(mm->pgd),
-                      pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, 
PAGE_KERNEL), 0));
+       if (HYPERVISOR_update_va_mapping(
+               (unsigned long)mm->pgd,
+               pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0))
+               BUG();
+       if (HYPERVISOR_update_va_mapping(
+               (unsigned long)__user_pgd(mm->pgd),
+               pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT,
+                       PAGE_KERNEL), 0))
+               BUG();
        mm_walk(mm, PAGE_KERNEL);
        xen_tlb_flush();
        mm->context.pinned = 0;
@@ -127,43 +133,50 @@ void mm_pin_all(void)
        if (xen_feature(XENFEAT_writable_page_tables))
                return;
 
+       /*
+        * Allow uninterrupted access to the mm_unpinned list. We don't
+        * actually take the mm_unpinned_lock as it is taken inside mm_pin().
+        * All other CPUs must be at a safe point (e.g., in stop_machine
+        * or offlined entirely).
+        */
+       preempt_disable();
        while (!list_empty(&mm_unpinned))       
                mm_pin(list_entry(mm_unpinned.next, struct mm_struct,
                                  context.unpinned));
+       preempt_enable();
 }
 
 void _arch_dup_mmap(struct mm_struct *mm)
 {
-    if (!mm->context.pinned)
-        mm_pin(mm);
+       if (!mm->context.pinned)
+               mm_pin(mm);
 }
 
 void _arch_exit_mmap(struct mm_struct *mm)
 {
-    struct task_struct *tsk = current;
-
-    task_lock(tsk);
-
-    /*
-     * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
-     * *much* faster this way, as no tlb flushes means bigger wrpt batches.
-     */
-    if ( tsk->active_mm == mm )
-    {
-        tsk->active_mm = &init_mm;
-        atomic_inc(&init_mm.mm_count);
-
-        switch_mm(mm, &init_mm, tsk);
-
-        atomic_dec(&mm->mm_count);
-        BUG_ON(atomic_read(&mm->mm_count) == 0);
-    }
-
-    task_unlock(tsk);
-
-    if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) &&
-         !mm->context.has_foreign_mappings )
-        mm_unpin(mm);
+       struct task_struct *tsk = current;
+
+       task_lock(tsk);
+
+       /*
+        * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+        * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+        */
+       if (tsk->active_mm == mm) {
+               tsk->active_mm = &init_mm;
+               atomic_inc(&init_mm.mm_count);
+
+               switch_mm(mm, &init_mm, tsk);
+
+               atomic_dec(&mm->mm_count);
+               BUG_ON(atomic_read(&mm->mm_count) == 0);
+       }
+
+       task_unlock(tsk);
+
+       if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) &&
+            !mm->context.has_foreign_mappings )
+               mm_unpin(mm);
 }
 
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -183,8 +196,9 @@ void pte_free(struct page *pte)
        unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
 
        if (!pte_write(*virt_to_ptep(va)))
-               BUG_ON(HYPERVISOR_update_va_mapping(
-                       va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
+               if (HYPERVISOR_update_va_mapping(
+                       va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0))
+                       BUG();
 
        ClearPageForeign(pte);
        init_page_count(pte);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.