[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Re: [PATCH] fix pgd_lock deadlock



On Tue, Feb 22, 2011 at 02:22:53PM +0000, Jan Beulich wrote:
> >>> On 22.02.11 at 14:49, Andrea Arcangeli <aarcange@xxxxxxxxxx> wrote:
> > On Tue, Feb 22, 2011 at 07:48:54AM +0000, Jan Beulich wrote:
> >> A possible alternative would be to acquire the page table lock
> >> in vmalloc_sync_all() only in the Xen case (perhaps by storing
> >> NULL into page->index in pgd_set_mm() when not running on
> >> Xen). This is utilizing the fact that there aren't (supposed to
> >> be - for non-pvops this is definitely the case) any TLB flush IPIs
> >> under Xen, and hence the race you're trying to fix doesn't
> >> exist there (while non-Xen doesn't need the extra locking).
> > 
> > That's sure ok with me. Can we use a global runtime to check if the
> > guest is running under Xen paravirt, instead of passing that info
> > through page->something?
> 
> If everyone's okay with putting a couple of "if (xen_pv_domain())"
> into mm/fault.c - sure. I would have thought that this wouldn't be
> liked, hence the suggestion to make this depend on seeing the
> backlink be non-NULL.

What about this? The page->private logic gets optimized away at
compile time with XEN=n.

The removal of _irqsave from pgd_lock, I'll delay it as it's no bugfix
anymore.

===
Subject: xen: stop taking the page_table_lock with irq disabled

From: Andrea Arcangeli <aarcange@xxxxxxxxxx>

It's forbidden to take the page_table_lock with the irq disabled or if there's
contention the IPIs (for tlb flushes) sent with the page_table_lock held will
never run leading to a deadlock.

Only Xen needs the page_table_lock and Xen won't need IPI TLB flushes hence the
deadlock doesn't exist for Xen.

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
---
 arch/x86/include/asm/pgtable.h |    5 +++--
 arch/x86/mm/fault.c            |   10 ++++++----
 arch/x86/mm/init_64.c          |   10 ++++++----
 arch/x86/mm/pgtable.c          |    9 +++------
 4 files changed, 18 insertions(+), 16 deletions(-)

--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -235,14 +235,16 @@ void vmalloc_sync_all(void)
 
                spin_lock_irqsave(&pgd_lock, flags);
                list_for_each_entry(page, &pgd_list, lru) {
-                       spinlock_t *pgt_lock;
+                       struct mm_struct *mm;
                        pmd_t *ret;
 
-                       pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+                       mm = pgd_page_get_mm(page);
 
-                       spin_lock(pgt_lock);
+                       if (mm)
+                               spin_lock(&mm->page_table_lock);
                        ret = vmalloc_sync_one(page_address(page), address);
-                       spin_unlock(pgt_lock);
+                       if (mm)
+                               spin_unlock(&mm->page_table_lock);
 
                        if (!ret)
                                break;
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -114,11 +114,12 @@ void sync_global_pgds(unsigned long star
                spin_lock_irqsave(&pgd_lock, flags);
                list_for_each_entry(page, &pgd_list, lru) {
                        pgd_t *pgd;
-                       spinlock_t *pgt_lock;
+                       struct mm_struct *mm;
 
                        pgd = (pgd_t *)page_address(page) + pgd_index(address);
-                       pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
-                       spin_lock(pgt_lock);
+                       mm = pgd_page_get_mm(page);
+                       if (mm)
+                               spin_lock(&mm->page_table_lock);
 
                        if (pgd_none(*pgd))
                                set_pgd(pgd, *pgd_ref);
@@ -126,7 +127,8 @@ void sync_global_pgds(unsigned long star
                                BUG_ON(pgd_page_vaddr(*pgd)
                                       != pgd_page_vaddr(*pgd_ref));
 
-                       spin_unlock(pgt_lock);
+                       if (mm)
+                               spin_unlock(&mm->page_table_lock);
                }
                spin_unlock_irqrestore(&pgd_lock, flags);
        }
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -4,6 +4,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
+#include <xen/xen.h>
 
 #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
 
@@ -91,12 +92,8 @@ static inline void pgd_list_del(pgd_t *p
 static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
 {
        BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
-       virt_to_page(pgd)->index = (pgoff_t)mm;
-}
-
-struct mm_struct *pgd_page_get_mm(struct page *page)
-{
-       return (struct mm_struct *)page->index;
+       if (xen_pv_domain())
+               virt_to_page(pgd)->index = (pgoff_t)mm;
 }
 
 static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,8 +28,6 @@ extern unsigned long empty_zero_page[PAG
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
 
-extern struct mm_struct *pgd_page_get_mm(struct page *page);
-
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -83,6 +81,9 @@ extern struct mm_struct *pgd_page_get_mm
 
 #endif /* CONFIG_PARAVIRT */
 
+#define pgd_page_get_mm(__page) \
+       ((struct mm_struct *)(xen_pv_domain() ? (__page)->index : 0))
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.