[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [IA64] fix p2m traversing race



# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 439051df45f3d43793e4b0370da0b55ef0b3dc75
# Parent  4fae646d60daedc9ced751064509884043cb15fd
[IA64] fix p2m traversing race

Fixed by adding volatile to pgd_t, pud_t, pmd_t where necessary.
The Xen/IA64 p2m table is lockless so that acquire semantics is
necessary when traversing the p2m in order to prevent memory-reodering
the following memory load.

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
---
 xen/arch/ia64/xen/mm.c                                     |  138 +++++--------
 xen/include/asm-ia64/domain.h                              |    2 
 xen/include/asm-ia64/linux-xen/asm-generic/pgtable-nopud.h |    8 
 xen/include/asm-ia64/linux-xen/asm/pgalloc.h               |   27 ++
 xen/include/asm-ia64/linux-xen/asm/pgtable.h               |   13 +
 5 files changed, 100 insertions(+), 88 deletions(-)

diff -r 4fae646d60da -r 439051df45f3 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Thu Nov 30 15:57:37 2006 -0700
+++ b/xen/arch/ia64/xen/mm.c    Thu Nov 30 15:57:42 2006 -0700
@@ -249,7 +249,7 @@ try_to_clear_PGC_allocate(struct domain*
 }
 
 static void
-mm_teardown_pte(struct domain* d, pte_t* pte, unsigned long offset)
+mm_teardown_pte(struct domain* d, volatile pte_t* pte, unsigned long offset)
 {
     pte_t old_pte;
     unsigned long mfn;
@@ -286,39 +286,39 @@ mm_teardown_pte(struct domain* d, pte_t*
 }
 
 static void
-mm_teardown_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
+mm_teardown_pmd(struct domain* d, volatile pmd_t* pmd, unsigned long offset)
 {
     unsigned long i;
-    pte_t* pte = pte_offset_map(pmd, offset);
+    volatile pte_t* pte = pte_offset_map(pmd, offset);
 
     for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
-        if (!pte_present(*pte))
+        if (!pte_present(*pte)) // acquire semantics
             continue;
         mm_teardown_pte(d, pte, offset + (i << PAGE_SHIFT));
     }
 }
 
 static void
-mm_teardown_pud(struct domain* d, pud_t *pud, unsigned long offset)
+mm_teardown_pud(struct domain* d, volatile pud_t *pud, unsigned long offset)
 {
     unsigned long i;
-    pmd_t *pmd = pmd_offset(pud, offset);
+    volatile pmd_t *pmd = pmd_offset(pud, offset);
 
     for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
-        if (!pmd_present(*pmd))
+        if (!pmd_present(*pmd)) // acquire semantics
             continue;
         mm_teardown_pmd(d, pmd, offset + (i << PMD_SHIFT));
     }
 }
 
 static void
-mm_teardown_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
+mm_teardown_pgd(struct domain* d, volatile pgd_t *pgd, unsigned long offset)
 {
     unsigned long i;
-    pud_t *pud = pud_offset(pgd, offset);
+    volatile pud_t *pud = pud_offset(pgd, offset);
 
     for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
-        if (!pud_present(*pud))
+        if (!pud_present(*pud)) // acquire semantics
             continue;
         mm_teardown_pud(d, pud, offset + (i << PUD_SHIFT));
     }
@@ -329,30 +329,32 @@ mm_teardown(struct domain* d)
 {
     struct mm_struct* mm = &d->arch.mm;
     unsigned long i;
-    pgd_t* pgd;
+    volatile pgd_t* pgd;
 
     if (mm->pgd == NULL)
         return;
 
     pgd = pgd_offset(mm, 0);
     for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
-        if (!pgd_present(*pgd))
+        if (!pgd_present(*pgd)) // acquire semantics
             continue;
         mm_teardown_pgd(d, pgd, i << PGDIR_SHIFT);
     }
 }
 
 static void
-mm_p2m_teardown_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
+mm_p2m_teardown_pmd(struct domain* d, volatile pmd_t* pmd,
+                    unsigned long offset)
 {
     pte_free_kernel(pte_offset_map(pmd, offset));
 }
 
 static void
-mm_p2m_teardown_pud(struct domain* d, pud_t *pud, unsigned long offset)
+mm_p2m_teardown_pud(struct domain* d, volatile pud_t *pud,
+                    unsigned long offset)
 {
     unsigned long i;
-    pmd_t *pmd = pmd_offset(pud, offset);
+    volatile pmd_t *pmd = pmd_offset(pud, offset);
 
     for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
         if (!pmd_present(*pmd))
@@ -363,10 +365,11 @@ mm_p2m_teardown_pud(struct domain* d, pu
 }
 
 static void
-mm_p2m_teardown_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
+mm_p2m_teardown_pgd(struct domain* d, volatile pgd_t *pgd,
+                    unsigned long offset)
 {
     unsigned long i;
-    pud_t *pud = pud_offset(pgd, offset);
+    volatile pud_t *pud = pud_offset(pgd, offset);
 
     for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
         if (!pud_present(*pud))
@@ -381,7 +384,7 @@ mm_p2m_teardown(struct domain* d)
 {
     struct mm_struct* mm = &d->arch.mm;
     unsigned long i;
-    pgd_t* pgd;
+    volatile pgd_t* pgd;
 
     BUG_ON(mm->pgd == NULL);
     pgd = pgd_offset(mm, 0);
@@ -565,38 +568,6 @@ unsigned long translate_domain_mpaddr(un
 }
 
 //XXX !xxx_present() should be used instread of !xxx_none()?
-// __assign_new_domain_page(), assign_new_domain_page() and
-// assign_new_domain0_page() are used only when domain creation.
-// their accesses aren't racy so that returned pte_t doesn't need
-// volatile qualifier
-static pte_t*
-__lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
-{
-    struct mm_struct *mm = &d->arch.mm;
-    pgd_t *pgd;
-    pud_t *pud;
-    pmd_t *pmd;
-
-    BUG_ON(mm->pgd == NULL);
-    pgd = pgd_offset(mm, mpaddr);
-    if (pgd_none(*pgd)) {
-        pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
-    }
-
-    pud = pud_offset(pgd, mpaddr);
-    if (pud_none(*pud)) {
-        pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
-    }
-
-    pmd = pmd_offset(pud, mpaddr);
-    if (pmd_none(*pmd)) {
-        pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
-    }
-
-    return pte_offset_map(pmd, mpaddr);
-}
-
-//XXX !xxx_present() should be used instread of !xxx_none()?
 // pud, pmd, pte page is zero cleared when they are allocated.
 // Their area must be visible before population so that
 // cmpxchg must have release semantics.
@@ -604,15 +575,15 @@ lookup_alloc_domain_pte(struct domain* d
 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
 {
     struct mm_struct *mm = &d->arch.mm;
-    pgd_t *pgd;
-    pud_t *pud;
-    pmd_t *pmd;
+    volatile pgd_t *pgd;
+    volatile pud_t *pud;
+    volatile pmd_t *pmd;
 
     BUG_ON(mm->pgd == NULL);
 
     pgd = pgd_offset(mm, mpaddr);
  again_pgd:
-    if (unlikely(pgd_none(*pgd))) {
+    if (unlikely(pgd_none(*pgd))) { // acquire semantics
         pud_t *old_pud = NULL;
         pud = pud_alloc_one(mm, mpaddr);
         if (unlikely(!pgd_cmpxchg_rel(mm, pgd, old_pud, pud))) {
@@ -623,7 +594,7 @@ lookup_alloc_domain_pte(struct domain* d
 
     pud = pud_offset(pgd, mpaddr);
  again_pud:
-    if (unlikely(pud_none(*pud))) {
+    if (unlikely(pud_none(*pud))) { // acquire semantics
         pmd_t* old_pmd = NULL;
         pmd = pmd_alloc_one(mm, mpaddr);
         if (unlikely(!pud_cmpxchg_rel(mm, pud, old_pmd, pmd))) {
@@ -634,7 +605,7 @@ lookup_alloc_domain_pte(struct domain* d
 
     pmd = pmd_offset(pud, mpaddr);
  again_pmd:
-    if (unlikely(pmd_none(*pmd))) {
+    if (unlikely(pmd_none(*pmd))) { // acquire semantics
         pte_t* old_pte = NULL;
         pte_t* pte = pte_alloc_one_kernel(mm, mpaddr);
         if (unlikely(!pmd_cmpxchg_kernel_rel(mm, pmd, old_pte, pte))) {
@@ -643,7 +614,7 @@ lookup_alloc_domain_pte(struct domain* d
         }
     }
 
-    return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
+    return pte_offset_map(pmd, mpaddr);
 }
 
 //XXX xxx_none() should be used instread of !xxx_present()?
@@ -651,48 +622,48 @@ lookup_noalloc_domain_pte(struct domain*
 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
 {
     struct mm_struct *mm = &d->arch.mm;
-    pgd_t *pgd;
-    pud_t *pud;
-    pmd_t *pmd;
+    volatile pgd_t *pgd;
+    volatile pud_t *pud;
+    volatile pmd_t *pmd;
 
     BUG_ON(mm->pgd == NULL);
     pgd = pgd_offset(mm, mpaddr);
-    if (unlikely(!pgd_present(*pgd)))
+    if (unlikely(!pgd_present(*pgd))) // acquire semantics
         return NULL;
 
     pud = pud_offset(pgd, mpaddr);
-    if (unlikely(!pud_present(*pud)))
+    if (unlikely(!pud_present(*pud))) // acquire semantics
         return NULL;
 
     pmd = pmd_offset(pud, mpaddr);
-    if (unlikely(!pmd_present(*pmd)))
+    if (unlikely(!pmd_present(*pmd))) // acquire semantics
         return NULL;
 
-    return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
+    return pte_offset_map(pmd, mpaddr);
 }
 
 static volatile pte_t*
 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
 {
     struct mm_struct *mm = &d->arch.mm;
-    pgd_t *pgd;
-    pud_t *pud;
-    pmd_t *pmd;
+    volatile pgd_t *pgd;
+    volatile pud_t *pud;
+    volatile pmd_t *pmd;
 
     BUG_ON(mm->pgd == NULL);
     pgd = pgd_offset(mm, mpaddr);
-    if (unlikely(pgd_none(*pgd)))
+    if (unlikely(pgd_none(*pgd))) // acquire semantics
         return NULL;
 
     pud = pud_offset(pgd, mpaddr);
-    if (unlikely(pud_none(*pud)))
+    if (unlikely(pud_none(*pud))) // acquire semantics
         return NULL;
 
     pmd = pmd_offset(pud, mpaddr);
-    if (unlikely(pmd_none(*pmd)))
+    if (unlikely(pmd_none(*pmd))) // acquire semantics
         return NULL;
 
-    return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
+    return pte_offset_map(pmd, mpaddr);
 }
 
 unsigned long
@@ -783,7 +754,8 @@ xencomm_paddr_to_maddr(unsigned long pad
 /* Allocate a new page for domain and map it to the specified metaphysical
    address.  */
 static struct page_info *
-__assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte)
+__assign_new_domain_page(struct domain *d, unsigned long mpaddr,
+                         volatile pte_t* pte)
 {
     struct page_info *p;
     unsigned long maddr;
@@ -825,7 +797,7 @@ struct page_info *
 struct page_info *
 assign_new_domain_page(struct domain *d, unsigned long mpaddr)
 {
-    pte_t *pte = __lookup_alloc_domain_pte(d, mpaddr);
+    volatile pte_t *pte = lookup_alloc_domain_pte(d, mpaddr);
 
     if (!pte_none(*pte))
         return NULL;
@@ -836,10 +808,10 @@ void
 void
 assign_new_domain0_page(struct domain *d, unsigned long mpaddr)
 {
-    pte_t *pte;
+    volatile pte_t *pte;
 
     BUG_ON(d != dom0);
-    pte = __lookup_alloc_domain_pte(d, mpaddr);
+    pte = lookup_alloc_domain_pte(d, mpaddr);
     if (pte_none(*pte)) {
         struct page_info *p = __assign_new_domain_page(d, mpaddr, pte);
         if (p == NULL) {
@@ -1816,7 +1788,7 @@ void domain_cache_flush (struct domain *
 void domain_cache_flush (struct domain *d, int sync_only)
 {
     struct mm_struct *mm = &d->arch.mm;
-    pgd_t *pgd = mm->pgd;
+    volatile pgd_t *pgd = mm->pgd;
     unsigned long maddr;
     int i,j,k, l;
     int nbr_page = 0;
@@ -1829,22 +1801,22 @@ void domain_cache_flush (struct domain *
         flush_func = &flush_dcache_range;
 
     for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
-        pud_t *pud;
-        if (!pgd_present(*pgd))
+        volatile pud_t *pud;
+        if (!pgd_present(*pgd)) // acquire semantics
             continue;
         pud = pud_offset(pgd, 0);
         for (j = 0; j < PTRS_PER_PUD; pud++, j++) {
-            pmd_t *pmd;
-            if (!pud_present(*pud))
+            volatile pmd_t *pmd;
+            if (!pud_present(*pud)) // acquire semantics
                 continue;
             pmd = pmd_offset(pud, 0);
             for (k = 0; k < PTRS_PER_PMD; pmd++, k++) {
-                pte_t *pte;
-                if (!pmd_present(*pmd))
+                volatile pte_t *pte;
+                if (!pmd_present(*pmd)) // acquire semantics
                     continue;
                 pte = pte_offset_map(pmd, 0);
                 for (l = 0; l < PTRS_PER_PTE; pte++, l++) {
-                    if (!pte_present(*pte))
+                    if (!pte_present(*pte)) // acquire semantics
                         continue;
                     /* Convert PTE to maddr.  */
                     maddr = __va_ul (pte_val(*pte)
diff -r 4fae646d60da -r 439051df45f3 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Thu Nov 30 15:57:37 2006 -0700
+++ b/xen/include/asm-ia64/domain.h     Thu Nov 30 15:57:42 2006 -0700
@@ -42,7 +42,7 @@ extern void panic_domain(struct pt_regs 
      __attribute__ ((noreturn, format (printf, 2, 3)));
 
 struct mm_struct {
-       pgd_t * pgd;
+       volatile pgd_t * pgd;
     // atomic_t mm_users;                      /* How many users with user 
space? */
 };
 
diff -r 4fae646d60da -r 439051df45f3 
xen/include/asm-ia64/linux-xen/asm-generic/pgtable-nopud.h
--- a/xen/include/asm-ia64/linux-xen/asm-generic/pgtable-nopud.h        Thu Nov 
30 15:57:37 2006 -0700
+++ b/xen/include/asm-ia64/linux-xen/asm-generic/pgtable-nopud.h        Thu Nov 
30 15:57:42 2006 -0700
@@ -35,10 +35,18 @@ static inline void pgd_clear(pgd_t *pgd)
  */
 #define set_pgd(pgdptr, pgdval)                        set_pud((pud_t 
*)(pgdptr), (pud_t) { pgdval })
 
+#ifndef XEN
 static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
 {
        return (pud_t *)pgd;
 }
+#else
+static inline volatile pud_t *
+pud_offset(volatile pgd_t * pgd, unsigned long address)
+{
+       return (volatile pud_t *)pgd;
+}
+#endif
 
 #define pud_val(x)                             (pgd_val((x).pgd))
 #define __pud(x)                               ((pud_t) { __pgd(x) } )
diff -r 4fae646d60da -r 439051df45f3 
xen/include/asm-ia64/linux-xen/asm/pgalloc.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgalloc.h      Thu Nov 30 15:57:37 
2006 -0700
+++ b/xen/include/asm-ia64/linux-xen/asm/pgalloc.h      Thu Nov 30 15:57:42 
2006 -0700
@@ -92,10 +92,17 @@ static inline pgd_t *pgd_alloc(struct mm
        return pgtable_quicklist_alloc();
 }
 
+#ifndef XEN
 static inline void pgd_free(pgd_t * pgd)
 {
        pgtable_quicklist_free(pgd);
 }
+#else
+static inline void pgd_free(volatile pgd_t * pgd)
+{
+       pgtable_quicklist_free((void*)pgd);
+}
+#endif
 
 static inline void
 pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
@@ -105,8 +112,8 @@ pud_populate(struct mm_struct *mm, pud_t
 
 #ifdef XEN
 static inline int
-pud_cmpxchg_rel(struct mm_struct *mm, pud_t * pud_entry,
-               pmd_t * old_pmd, pmd_t * new_pmd)
+pud_cmpxchg_rel(struct mm_struct *mm, volatile pud_t * pud_entry,
+                pmd_t * old_pmd, volatile pmd_t * new_pmd)
 {
 #ifdef CONFIG_SMP
        unsigned long r;
@@ -127,10 +134,17 @@ static inline pmd_t *pmd_alloc_one(struc
        return pgtable_quicklist_alloc();
 }
 
+#ifndef XEN
 static inline void pmd_free(pmd_t * pmd)
 {
        pgtable_quicklist_free(pmd);
 }
+#else
+static inline void pmd_free(volatile pmd_t * pmd)
+{
+       pgtable_quicklist_free((void*)pmd);
+}
+#endif
 
 #define __pmd_free_tlb(tlb, pmd)       pmd_free(pmd)
 
@@ -150,7 +164,7 @@ pmd_populate_kernel(struct mm_struct *mm
 
 #ifdef XEN
 static inline int
-pmd_cmpxchg_kernel_rel(struct mm_struct *mm, pmd_t * pmd_entry,
+pmd_cmpxchg_kernel_rel(struct mm_struct *mm, volatile pmd_t * pmd_entry,
                       pte_t * old_pte, pte_t * new_pte)
 {
 #ifdef CONFIG_SMP
@@ -186,12 +200,17 @@ static inline void pte_free(struct page 
 {
        pgtable_quicklist_free(page_address(pte));
 }
-#endif
 
 static inline void pte_free_kernel(pte_t * pte)
 {
        pgtable_quicklist_free(pte);
 }
+#else
+static inline void pte_free_kernel(volatile pte_t * pte)
+{
+       pgtable_quicklist_free((void*)pte);
+}
+#endif
 
 #ifndef XEN
 #define __pte_free_tlb(tlb, pte)       pte_free(pte)
diff -r 4fae646d60da -r 439051df45f3 
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Thu Nov 30 15:57:37 
2006 -0700
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Thu Nov 30 15:57:42 
2006 -0700
@@ -380,7 +380,11 @@ pgd_index (unsigned long address)
 
 /* The offset in the 1-level directory is given by the 3 region bits
    (61..63) and the level-1 bits.  */
+#ifndef XEN
 static inline pgd_t*
+#else
+static inline volatile pgd_t*
+#endif
 pgd_offset (struct mm_struct *mm, unsigned long address)
 {
        return mm->pgd + pgd_index(address);
@@ -397,15 +401,24 @@ pgd_offset (struct mm_struct *mm, unsign
 #define pgd_offset_gate(mm, addr)      pgd_offset_k(addr)
 
 /* Find an entry in the second-level page table.. */
+#ifndef XEN
 #define pmd_offset(dir,addr) \
        ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 
1)))
+#else
+#define pmd_offset(dir,addr) \
+       ((volatile pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & 
(PTRS_PER_PMD - 1)))
+#endif
 
 /*
  * Find an entry in the third-level page table.  This looks more complicated 
than it
  * should be because some platforms place page tables in high memory.
  */
 #define pte_index(addr)                (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE 
- 1))
+#ifndef XEN
 #define pte_offset_kernel(dir,addr)    ((pte_t *) pmd_page_kernel(*(dir)) + 
pte_index(addr))
+#else
+#define pte_offset_kernel(dir,addr)    ((volatile pte_t *) 
pmd_page_kernel(*(dir)) + pte_index(addr))
+#endif
 #define pte_offset_map(dir,addr)       pte_offset_kernel(dir, addr)
 #define pte_offset_map_nested(dir,addr)        pte_offset_map(dir, addr)
 #define pte_unmap(pte)                 do { } while (0)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.