[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [XEN] Revert changeset 11438. Needs fixing for PAE.



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID bfd00b317815f2d1c8989b55a4cfd174da043e43
# Parent  6f36370e373a4a75d0c3a6695a3cef7f1adb8ce6
[XEN] Revert changeset 11438. Needs fixing for PAE.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/ia64/xen/mm.c                   |   95 ++++++----
 xen/arch/powerpc/mm.c                    |   82 +++++---
 xen/arch/x86/domain_build.c              |    8 
 xen/arch/x86/mm.c                        |  285 ++++++++++++++++++++-----------
 xen/arch/x86/mm/shadow/common.c          |   10 -
 xen/arch/x86/mm/shadow/multi.c           |    2 
 xen/include/asm-ia64/mm.h                |    8 
 xen/include/asm-powerpc/mm.h             |    8 
 xen/include/asm-x86/mm.h                 |   50 +++++
 xen/include/asm-x86/x86_32/page-3level.h |    2 
 10 files changed, 392 insertions(+), 158 deletions(-)

diff -r 6f36370e373a -r bfd00b317815 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/ia64/xen/mm.c    Mon Sep 11 01:55:03 2006 +0100
@@ -1624,6 +1624,13 @@ void put_page_type(struct page_info *pag
                 nx &= ~PGT_validated;
             }
         }
+        else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) ==
+                            (PGT_pinned | 1)) &&
+                           ((nx & PGT_type_mask) != PGT_writable_page)) )
+        {
+            /* Page is now only pinned. Make the back pointer mutable again. */
+            nx |= PGT_va_mutable;
+        }
     }
     while ( unlikely((y = cmpxchg_rel(&page->u.inuse.type_info, x, nx)) != x) 
);
 }
@@ -1632,8 +1639,6 @@ int get_page_type(struct page_info *page
 int get_page_type(struct page_info *page, u32 type)
 {
     u32 nx, x, y = page->u.inuse.type_info;
-
-    ASSERT(!(type & ~PGT_type_mask));
 
  again:
     do {
@@ -1646,25 +1651,29 @@ int get_page_type(struct page_info *page
         }
         else if ( unlikely((x & PGT_count_mask) == 0) )
         {
-            if ( (x & PGT_type_mask) != type )
+            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
             {
-                /*
-                 * On type change we check to flush stale TLB entries. This 
-                 * may be unnecessary (e.g., page was GDT/LDT) but those 
-                 * circumstances should be very rare.
-                 */
-                cpumask_t mask =
-                    page_get_owner(page)->domain_dirty_cpumask;
-                tlbflush_filter(mask, page->tlbflush_timestamp);
-
-                if ( unlikely(!cpus_empty(mask)) )
+                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
                 {
-                    perfc_incrc(need_flush_tlb_flush);
-                    flush_tlb_mask(mask);
+                    /*
+                     * On type change we check to flush stale TLB
+                     * entries. This may be unnecessary (e.g., page
+                     * was GDT/LDT) but those circumstances should be
+                     * very rare.
+                     */
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
+                    tlbflush_filter(mask, page->tlbflush_timestamp);
+
+                    if ( unlikely(!cpus_empty(mask)) )
+                    {
+                        perfc_incrc(need_flush_tlb_flush);
+                        flush_tlb_mask(mask);
+                    }
                 }
 
                 /* We lose existing type, back pointer, and validity. */
-                nx &= ~(PGT_type_mask | PGT_validated);
+                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
                 nx |= type;
 
                 /* No special validation needed for writable pages. */
@@ -1673,22 +1682,46 @@ int get_page_type(struct page_info *page
                     nx |= PGT_validated;
             }
         }
-        else if ( unlikely((x & PGT_type_mask) != type) )
+        else
         {
-            if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
-                 (type != PGT_l1_page_table) )
-                MEM_LOG("Bad type (saw %08x != exp %08x) "
-                        "for mfn %016lx (pfn %016lx)",
-                        x, type, page_to_mfn(page),
-                        get_gpfn_from_mfn(page_to_mfn(page)));
-            return 0;
-        }
-        else if ( unlikely(!(x & PGT_validated)) )
-        {
-            /* Someone else is updating validation of this page. Wait... */
-            while ( (y = page->u.inuse.type_info) == x )
-                cpu_relax();
-            goto again;
+            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+            {
+                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+                {
+                    if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
+                         ((type & PGT_type_mask) != PGT_l1_page_table) )
+                        MEM_LOG("Bad type (saw %08x != exp %08x) "
+                                "for mfn %016lx (pfn %016lx)",
+                                x, type, page_to_mfn(page),
+                                get_gpfn_from_mfn(page_to_mfn(page)));
+                    return 0;
+                }
+                else if ( (x & PGT_va_mask) == PGT_va_mutable )
+                {
+                    /* The va backpointer is mutable, hence we update it. */
+                    nx &= ~PGT_va_mask;
+                    nx |= type; /* we know the actual type is correct */
+                }
+                else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
+                          ((type & PGT_va_mask) != (x & PGT_va_mask)) )
+                {
+#ifdef CONFIG_X86_PAE
+                    /* We use backptr as extra typing. Cannot be unknown. */
+                    if ( (type & PGT_type_mask) == PGT_l2_page_table )
+                        return 0;
+#endif
+                    /* This table is possibly mapped at multiple locations. */
+                    nx &= ~PGT_va_mask;
+                    nx |= PGT_va_unknown;
+                }
+            }
+            if ( unlikely(!(x & PGT_validated)) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                    cpu_relax();
+                goto again;
+            }
         }
     }
     while ( unlikely((y = cmpxchg_acq(&page->u.inuse.type_info, x, nx)) != x) 
);
diff -r 6f36370e373a -r bfd00b317815 xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c     Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/powerpc/mm.c     Mon Sep 11 01:55:03 2006 +0100
@@ -87,6 +87,12 @@ void put_page_type(struct page_info *pag
             /* Record TLB information for flush later. */
             page->tlbflush_timestamp = tlbflush_current_time();
         }
+        else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == 
+                           (PGT_pinned | 1)) )
+        {
+            /* Page is now only pinned. Make the back pointer mutable again. */
+            nx |= PGT_va_mutable;
+        }
     }
     while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 }
@@ -95,8 +101,6 @@ int get_page_type(struct page_info *page
 int get_page_type(struct page_info *page, unsigned long type)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
-
-    ASSERT(!(type & ~PGT_type_mask));
 
  again:
     do {
@@ -109,25 +113,29 @@ int get_page_type(struct page_info *page
         }
         else if ( unlikely((x & PGT_count_mask) == 0) )
         {
-            if ( (x & PGT_type_mask) != type )
+            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
             {
-                /*
-                 * On type change we check to flush stale TLB entries. This 
-                 * may be unnecessary (e.g., page was GDT/LDT) but those 
-                 * circumstances should be very rare.
-                 */
-                cpumask_t mask =
-                    page_get_owner(page)->domain_dirty_cpumask;
-                tlbflush_filter(mask, page->tlbflush_timestamp);
-
-                if ( unlikely(!cpus_empty(mask)) )
-                {
-                    perfc_incrc(need_flush_tlb_flush);
-                    flush_tlb_mask(mask);
+                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
+                {
+                    /*
+                     * On type change we check to flush stale TLB
+                     * entries. This may be unnecessary (e.g., page
+                     * was GDT/LDT) but those circumstances should be
+                     * very rare.
+                     */
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
+                    tlbflush_filter(mask, page->tlbflush_timestamp);
+
+                    if ( unlikely(!cpus_empty(mask)) )
+                    {
+                        perfc_incrc(need_flush_tlb_flush);
+                        flush_tlb_mask(mask);
+                    }
                 }
 
                 /* We lose existing type, back pointer, and validity. */
-                nx &= ~(PGT_type_mask | PGT_validated);
+                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
                 nx |= type;
 
                 /* No special validation needed for writable pages. */
@@ -136,16 +144,36 @@ int get_page_type(struct page_info *page
                     nx |= PGT_validated;
             }
         }
-        else if ( unlikely((x & PGT_type_mask) != type) )
-        {
-            return 0;
-        }
-        if ( unlikely(!(x & PGT_validated)) )
-        {
-            /* Someone else is updating validation of this page. Wait... */
-            while ( (y = page->u.inuse.type_info) == x )
-                cpu_relax();
-            goto again;
+        else
+        {
+            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+            {
+                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+                {
+                    return 0;
+                }
+                else if ( (x & PGT_va_mask) == PGT_va_mutable )
+                {
+                    /* The va backpointer is mutable, hence we update it. */
+                    nx &= ~PGT_va_mask;
+                    nx |= type; /* we know the actual type is correct */
+                }
+                else if ( (type & PGT_va_mask) != PGT_va_mutable )
+                {
+                    ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
+
+                    /* This table is possibly mapped at multiple locations. */
+                    nx &= ~PGT_va_mask;
+                    nx |= PGT_va_unknown;
+                }
+            }
+            if ( unlikely(!(x & PGT_validated)) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                    cpu_relax();
+                goto again;
+            }
         }
     }
     while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/domain_build.c       Mon Sep 11 01:55:03 2006 +0100
@@ -510,13 +510,15 @@ int construct_dom0(struct domain *d,
         case 1 ... 4:
             page->u.inuse.type_info &= ~PGT_type_mask;
             page->u.inuse.type_info |= PGT_l2_page_table;
-            if ( count == 4 )
-                page->u.inuse.type_info |= PGT_pae_xen_l2;
+            page->u.inuse.type_info |=
+                (count-1) << PGT_va_shift;
             get_page(page, d); /* an extra ref because of readable mapping */
             break;
         default:
             page->u.inuse.type_info &= ~PGT_type_mask;
             page->u.inuse.type_info |= PGT_l1_page_table;
+            page->u.inuse.type_info |= 
+                ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift;
             get_page(page, d); /* an extra ref because of readable mapping */
             break;
         }
@@ -542,6 +544,8 @@ int construct_dom0(struct domain *d,
         {
             page->u.inuse.type_info &= ~PGT_type_mask;
             page->u.inuse.type_info |= PGT_l1_page_table;
+            page->u.inuse.type_info |= 
+                ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
 
             /*
              * No longer writable: decrement the type_count.
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/mm.c Mon Sep 11 01:55:03 2006 +0100
@@ -625,7 +625,8 @@ get_page_from_l1e(
 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
 static int 
 get_page_from_l2e(
-    l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
+    l2_pgentry_t l2e, unsigned long pfn,
+    struct domain *d, unsigned long vaddr)
 {
     int rc;
 
@@ -638,7 +639,10 @@ get_page_from_l2e(
         return 0;
     }
 
-    rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
+    vaddr >>= L2_PAGETABLE_SHIFT;
+    vaddr <<= PGT_va_shift;
+    rc = get_page_and_type_from_pagenr(
+        l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d);
 #if CONFIG_PAGING_LEVELS == 2
     if ( unlikely(!rc) )
         rc = get_linear_pagetable(l2e, pfn, d);
@@ -650,7 +654,8 @@ get_page_from_l2e(
 #if CONFIG_PAGING_LEVELS >= 3
 static int 
 get_page_from_l3e(
-    l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
+    l3_pgentry_t l3e, unsigned long pfn,
+    struct domain *d, unsigned long vaddr)
 {
     int rc;
 
@@ -663,7 +668,11 @@ get_page_from_l3e(
         return 0;
     }
 
-    rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
+    vaddr >>= L3_PAGETABLE_SHIFT;
+    vaddr <<= PGT_va_shift;
+    rc = get_page_and_type_from_pagenr(
+        l3e_get_pfn(l3e),
+        PGT_l2_page_table | vaddr, d);
     return rc;
 }
 #endif /* 3 level */
@@ -671,7 +680,8 @@ get_page_from_l3e(
 #if CONFIG_PAGING_LEVELS >= 4
 static int 
 get_page_from_l4e(
-    l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
+    l4_pgentry_t l4e, unsigned long pfn, 
+    struct domain *d, unsigned long vaddr)
 {
     int rc;
 
@@ -684,7 +694,11 @@ get_page_from_l4e(
         return 0;
     }
 
-    rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
+    vaddr >>= L4_PAGETABLE_SHIFT;
+    vaddr <<= PGT_va_shift;
+    rc = get_page_and_type_from_pagenr(
+        l4e_get_pfn(l4e), 
+        PGT_l3_page_table | vaddr, d);
 
     if ( unlikely(!rc) )
         rc = get_linear_pagetable(l4e, pfn, d);
@@ -863,8 +877,8 @@ static int create_pae_xen_mappings(l3_pg
     /*
      * The Xen-private mappings include linear mappings. The L2 thus cannot
      * be shared by multiple L3 tables. The test here is adequate because:
-     *  1. Cannot appear in slots != 3 because get_page_type() checks the
-     *     PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
+     *  1. Cannot appear in slots != 3 because the page would then then have
+     *     unknown va backpointer, which get_page_type() explicitly disallows.
      *  2. Cannot appear in another page table's L3:
      *     a. alloc_l3_table() calls this function and this check will fail
      *     b. mod_l3_entry() disallows updates to slot 3 in an existing table
@@ -874,7 +888,6 @@ static int create_pae_xen_mappings(l3_pg
     page = l3e_get_page(l3e3);
     BUG_ON(page->u.inuse.type_info & PGT_pinned);
     BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
-    BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
     if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
     {
         MEM_LOG("PAE L3 3rd slot is shared");
@@ -936,17 +949,61 @@ static void pae_flush_pgd(
     flush_tlb_mask(d->domain_dirty_cpumask);
 }
 
+static inline int l1_backptr(
+    unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
+{
+    unsigned long l2_backptr = l2_type & PGT_va_mask;
+    ASSERT(l2_backptr != PGT_va_unknown);
+    ASSERT(l2_backptr != PGT_va_mutable);
+    *backptr = 
+        ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | 
+        (offset_in_l2 << L2_PAGETABLE_SHIFT);
+    return 1;
+}
+
 #elif CONFIG_X86_64
 # define create_pae_xen_mappings(pl3e) (1)
 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
+
+static inline int l1_backptr(
+    unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
+{
+    unsigned long l2_backptr = l2_type & PGT_va_mask;
+    ASSERT(l2_backptr != PGT_va_unknown);
+    ASSERT(l2_backptr != PGT_va_mutable);
+    *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | 
+        (offset_in_l2 << L2_PAGETABLE_SHIFT);
+    return 1;
+}
+
+static inline int l2_backptr(
+    unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type)
+{
+    unsigned long l3_backptr = l3_type & PGT_va_mask;
+    ASSERT(l3_backptr != PGT_va_unknown);
+    ASSERT(l3_backptr != PGT_va_mutable);
+    *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) | 
+        (offset_in_l3 << L3_PAGETABLE_SHIFT);
+    return 1;
+}
+
+static inline int l3_backptr(
+    unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type)
+{
+    *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT);
+    return 1;
+}
 #else
 # define create_pae_xen_mappings(pl3e) (1)
+# define l1_backptr(bp,l2o,l2t) \
+    ({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; })
 #endif
 
 static int alloc_l2_table(struct page_info *page, unsigned long type)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_mfn(page);
+    unsigned long  vaddr;
     l2_pgentry_t  *pl2e;
     int            i;
 
@@ -956,8 +1013,10 @@ static int alloc_l2_table(struct page_in
 
     for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
     {
+        if ( !l1_backptr(&vaddr, i, type) )
+            goto fail;
         if ( is_guest_l2_slot(type, i) &&
-             unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
+             unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
             goto fail;
         
         adjust_guest_l2e(pl2e[i]);
@@ -992,10 +1051,11 @@ static int alloc_l2_table(struct page_in
 
 
 #if CONFIG_PAGING_LEVELS >= 3
-static int alloc_l3_table(struct page_info *page)
+static int alloc_l3_table(struct page_info *page, unsigned long type)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_mfn(page);
+    unsigned long  vaddr;
     l3_pgentry_t  *pl3e;
     int            i;
 
@@ -1019,21 +1079,14 @@ static int alloc_l3_table(struct page_in
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
     {
-#ifdef CONFIG_X86_PAE
-        if ( i == 3 )
-        {
-            if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
-                 (l3e_get_flags(pl3e[i]) & L3_DISALLOW_MASK) ||
-                 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
-                                                PGT_l2_page_table |
-                                                PGT_pae_xen_l2,
-                                                d) )
-                goto fail;
-        }
-        else
+#if CONFIG_PAGING_LEVELS >= 4
+        if ( !l2_backptr(&vaddr, i, type) )
+            goto fail;
+#else
+        vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT;
 #endif
         if ( is_guest_l3_slot(i) &&
-             unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
+             unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
             goto fail;
         
         adjust_guest_l3e(pl3e[i]);
@@ -1055,23 +1108,27 @@ static int alloc_l3_table(struct page_in
     return 0;
 }
 #else
-#define alloc_l3_table(page) (0)
+#define alloc_l3_table(page, type) (0)
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 4
-static int alloc_l4_table(struct page_info *page)
+static int alloc_l4_table(struct page_info *page, unsigned long type)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_mfn(page);
     l4_pgentry_t  *pl4e = page_to_virt(page);
+    unsigned long vaddr;
     int            i;
 
     ASSERT(!shadow_mode_refcounts(d));
 
     for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
     {
+        if ( !l3_backptr(&vaddr, i, type) )
+            goto fail;
+
         if ( is_guest_l4_slot(i) &&
-             unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
+             unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
             goto fail;
 
         adjust_guest_l4e(pl4e[i]);
@@ -1099,7 +1156,7 @@ static int alloc_l4_table(struct page_in
     return 0;
 }
 #else
-#define alloc_l4_table(page) (0)
+#define alloc_l4_table(page, type) (0)
 #endif
 
 
@@ -1133,8 +1190,6 @@ static void free_l2_table(struct page_in
             put_page_from_l2e(pl2e[i], pfn);
 
     unmap_domain_page(pl2e);
-
-    page->u.inuse.type_info &= ~PGT_pae_xen_l2;
 }
 
 
@@ -1302,6 +1357,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
                         unsigned long type)
 {
     l2_pgentry_t ol2e;
+    unsigned long vaddr = 0;
 
     if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
     {
@@ -1327,7 +1383,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
         if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
             return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
 
-        if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) )
+        if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) ||
+             unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
             return 0;
 
         if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
@@ -1350,9 +1407,11 @@ static int mod_l2_entry(l2_pgentry_t *pl
 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
 static int mod_l3_entry(l3_pgentry_t *pl3e, 
                         l3_pgentry_t nl3e, 
-                        unsigned long pfn)
+                        unsigned long pfn,
+                        unsigned long type)
 {
     l3_pgentry_t ol3e;
+    unsigned long vaddr;
     int okay;
 
     if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
@@ -1388,8 +1447,16 @@ static int mod_l3_entry(l3_pgentry_t *pl
         if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
             return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn);
 
-        if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
+#if CONFIG_PAGING_LEVELS >= 4
+        if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) ||
+             unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
             return 0;
+#else
+        vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
+            << L3_PAGETABLE_SHIFT;
+        if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
+            return 0;
+#endif 
 
         if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
         {
@@ -1418,9 +1485,11 @@ static int mod_l3_entry(l3_pgentry_t *pl
 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
 static int mod_l4_entry(l4_pgentry_t *pl4e, 
                         l4_pgentry_t nl4e, 
-                        unsigned long pfn)
+                        unsigned long pfn,
+                        unsigned long type)
 {
     l4_pgentry_t ol4e;
+    unsigned long vaddr;
 
     if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) )
     {
@@ -1446,7 +1515,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
         if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
             return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn);
 
-        if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
+        if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) ||
+             unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) )
             return 0;
 
         if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
@@ -1480,9 +1550,9 @@ int alloc_page_type(struct page_info *pa
     case PGT_l2_page_table:
         return alloc_l2_table(page, type);
     case PGT_l3_page_table:
-        return alloc_l3_table(page);
+        return alloc_l3_table(page, type);
     case PGT_l4_page_table:
-        return alloc_l4_table(page);
+        return alloc_l4_table(page, type);
     case PGT_gdt_page:
     case PGT_ldt_page:
         return alloc_segdesc_page(page);
@@ -1602,6 +1672,12 @@ void put_page_type(struct page_info *pag
             /* Record TLB information for flush later. */
             page->tlbflush_timestamp = tlbflush_current_time();
         }
+        else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == 
+                           (PGT_pinned|PGT_l1_page_table|1)) )
+        {
+            /* Page is now only pinned. Make the back pointer mutable again. */
+            nx |= PGT_va_mutable;
+        }
     }
     while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 }
@@ -1610,8 +1686,6 @@ int get_page_type(struct page_info *page
 int get_page_type(struct page_info *page, unsigned long type)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
-
-    ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 
  again:
     do {
@@ -1624,26 +1698,29 @@ int get_page_type(struct page_info *page
         }
         else if ( unlikely((x & PGT_count_mask) == 0) )
         {
-            ASSERT(!(x & PGT_pae_xen_l2));
-            if ( (x & PGT_type_mask) != type )
+            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
             {
-                /*
-                 * On type change we check to flush stale TLB entries. This 
-                 * may be unnecessary (e.g., page was GDT/LDT) but those 
-                 * circumstances should be very rare.
-                 */
-                cpumask_t mask =
-                    page_get_owner(page)->domain_dirty_cpumask;
-                tlbflush_filter(mask, page->tlbflush_timestamp);
-
-                if ( unlikely(!cpus_empty(mask)) )
+                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
                 {
-                    perfc_incrc(need_flush_tlb_flush);
-                    flush_tlb_mask(mask);
+                    /*
+                     * On type change we check to flush stale TLB
+                     * entries. This may be unnecessary (e.g., page
+                     * was GDT/LDT) but those circumstances should be
+                     * very rare.
+                     */
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
+                    tlbflush_filter(mask, page->tlbflush_timestamp);
+
+                    if ( unlikely(!cpus_empty(mask)) )
+                    {
+                        perfc_incrc(need_flush_tlb_flush);
+                        flush_tlb_mask(mask);
+                    }
                 }
 
                 /* We lose existing type, back pointer, and validity. */
-                nx &= ~(PGT_type_mask | PGT_validated);
+                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
                 nx |= type;
 
                 /* No special validation needed for writable pages. */
@@ -1652,23 +1729,51 @@ int get_page_type(struct page_info *page
                     nx |= PGT_validated;
             }
         }
-        else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
-        {
-            if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
-                 (type != PGT_l1_page_table) )
-                MEM_LOG("Bad type (saw %" PRtype_info
-                        " != exp %" PRtype_info ") "
-                        "for mfn %lx (pfn %lx)",
-                        x, type, page_to_mfn(page),
-                        get_gpfn_from_mfn(page_to_mfn(page)));
-            return 0;
-        }
-        else if ( unlikely(!(x & PGT_validated)) )
-        {
-            /* Someone else is updating validation of this page. Wait... */
-            while ( (y = page->u.inuse.type_info) == x )
-                cpu_relax();
-            goto again;
+        else
+        {
+            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+            {
+                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+                {
+                    if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
+                         ((type & PGT_type_mask) != PGT_l1_page_table) )
+                        MEM_LOG("Bad type (saw %" PRtype_info
+                                " != exp %" PRtype_info ") "
+                                "for mfn %lx (pfn %lx)",
+                                x, type, page_to_mfn(page),
+                                get_gpfn_from_mfn(page_to_mfn(page)));
+                    return 0;
+                }
+                else if ( (x & PGT_va_mask) == PGT_va_mutable )
+                {
+                    /* The va backpointer is mutable, hence we update it. */
+                    nx &= ~PGT_va_mask;
+                    nx |= type; /* we know the actual type is correct */
+                }
+                else if ( (type & PGT_va_mask) != PGT_va_mutable )
+                {
+                    ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
+#ifdef CONFIG_X86_PAE
+                    /* We use backptr as extra typing. Cannot be unknown. */
+                    if ( (type & PGT_type_mask) == PGT_l2_page_table )
+                        return 0;
+#endif
+                    /* Fixme: add code to propagate va_unknown to subtables. */
+                    if ( ((type & PGT_type_mask) >= PGT_l2_page_table) &&
+                         !shadow_mode_refcounts(page_get_owner(page)) )
+                        return 0;
+                    /* This table is possibly mapped at multiple locations. */
+                    nx &= ~PGT_va_mask;
+                    nx |= PGT_va_unknown;
+                }
+            }
+            if ( unlikely(!(x & PGT_validated)) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                    cpu_relax();
+                goto again;
+            }
         }
     }
     while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
@@ -1925,25 +2030,19 @@ int do_mmuext_op(
         switch ( op.cmd )
         {
         case MMUEXT_PIN_L1_TABLE:
-            type = PGT_l1_page_table;
+            type = PGT_l1_page_table | PGT_va_mutable;
             goto pin_page;
 
         case MMUEXT_PIN_L2_TABLE:
-            type = PGT_l2_page_table;
-            goto pin_page;
-
         case MMUEXT_PIN_L3_TABLE:
-            type = PGT_l3_page_table;
-            goto pin_page;
-
         case MMUEXT_PIN_L4_TABLE:
-            type = PGT_l4_page_table;
+            /* Ignore pinning of subdirectories. */
+            if ( (op.cmd - MMUEXT_PIN_L1_TABLE) != (CONFIG_PAGING_LEVELS - 1) )
+                break;
+
+            type = PGT_root_page_table;
 
         pin_page:
-            /* Ignore pinning of invalid paging levels. */
-            if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
-                break;
-
             if ( shadow_mode_refcounts(FOREIGNDOM) )
                 break;
 
@@ -2227,7 +2326,7 @@ int do_mmu_update(
                 }
 
                 if ( unlikely(!get_page_type(
-                    page, type_info & PGT_type_mask)) )
+                    page, type_info & (PGT_type_mask|PGT_va_mask))) )
                     goto not_a_pt;
 
                 switch ( type_info & PGT_type_mask )
@@ -2249,7 +2348,7 @@ int do_mmu_update(
                 case PGT_l3_page_table:
                 {
                     l3_pgentry_t l3e = l3e_from_intpte(req.val);
-                    okay = mod_l3_entry(va, l3e, mfn);
+                    okay = mod_l3_entry(va, l3e, mfn, type_info);
                 }
                 break;
 #endif
@@ -2257,7 +2356,7 @@ int do_mmu_update(
                 case PGT_l4_page_table:
                 {
                     l4_pgentry_t l4e = l4e_from_intpte(req.val);
-                    okay = mod_l4_entry(va, l4e, mfn);
+                    okay = mod_l4_entry(va, l4e, mfn, type_info);
                 }
                 break;
 #endif
@@ -2355,7 +2454,7 @@ static int create_grant_pte_mapping(
     void *va;
     unsigned long gmfn, mfn;
     struct page_info *page;
-    u32 type;
+    u32 type_info;
     l1_pgentry_t ol1e;
     struct domain *d = v->domain;
 
@@ -2376,8 +2475,9 @@ static int create_grant_pte_mapping(
     va = (void *)((unsigned long)va + (pte_addr & ~PAGE_MASK));
     page = mfn_to_page(mfn);
 
-    type = page->u.inuse.type_info & PGT_type_mask;
-    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+    type_info = page->u.inuse.type_info;
+    if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||         
+         !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
     {
         MEM_LOG("Grant map attempted to update a non-L1 page");
         rc = GNTST_general_error;
@@ -2411,7 +2511,7 @@ static int destroy_grant_pte_mapping(
     void *va;
     unsigned long gmfn, mfn;
     struct page_info *page;
-    u32 type;
+    u32 type_info;
     l1_pgentry_t ol1e;
 
     gmfn = addr >> PAGE_SHIFT;
@@ -2427,8 +2527,9 @@ static int destroy_grant_pte_mapping(
     va = (void *)((unsigned long)va + (addr & ~PAGE_MASK));
     page = mfn_to_page(mfn);
 
-    type = page->u.inuse.type_info & PGT_type_mask;
-    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+    type_info = page->u.inuse.type_info;
+    if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+         !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
     {
         MEM_LOG("Grant map attempted to update a non-L1 page");
         rc = GNTST_general_error;
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Sep 11 01:55:03 2006 +0100
@@ -21,6 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#define SHADOW 1
+
 #include <xen/config.h>
 #include <xen/types.h>
 #include <xen/mm.h>
@@ -223,6 +225,7 @@ struct x86_emulate_ops shadow_emulator_o
     .cmpxchg8b_emulated = sh_x86_emulate_cmpxchg8b_emulated,
 };
 
+
 /**************************************************************************/
 /* Code for "promoting" a guest page to the point where the shadow code is
  * willing to let it be treated as a guest page table.  This generally
@@ -232,6 +235,7 @@ void shadow_promote(struct vcpu *v, mfn_
 void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type)
 {
     struct page_info *page = mfn_to_page(gmfn);
+    unsigned long type_info;
 
     ASSERT(valid_mfn(gmfn));
 
@@ -247,8 +251,10 @@ void shadow_promote(struct vcpu *v, mfn_
         // vcpu or not, or even what kind of type we get; we just want the type
         // count to be > 0.
         //
-        while ( !get_page_type(page, page->u.inuse.type_info & PGT_type_mask) )
-            continue;
+        do {
+            type_info =
+                page->u.inuse.type_info & (PGT_type_mask | PGT_va_mask);
+        } while ( !get_page_type(page, type_info) );
 
         // Now that the type ref is non-zero, we can safely use the
         // shadow_flags.
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Sep 11 01:55:03 2006 +0100
@@ -34,6 +34,8 @@
 // - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
 //   space for both PV and HVM guests.
 //
+
+#define SHADOW 1
 
 #include <xen/config.h>
 #include <xen/types.h>
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-ia64/mm.h Mon Sep 11 01:55:03 2006 +0100
@@ -102,6 +102,14 @@ struct page_info
  /* Owning guest has pinned this page to its current type? */
 #define _PGT_pinned         27
 #define PGT_pinned          (1U<<_PGT_pinned)
+
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        32
+#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
 
  /* 16-bit count of uses of this frame as its current type. */
 #define PGT_count_mask      ((1U<<16)-1)
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h      Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-powerpc/mm.h      Mon Sep 11 01:55:03 2006 +0100
@@ -101,6 +101,14 @@ struct page_extents {
  /* Has this page been validated for use as its current type? */
 #define _PGT_validated      27
 #define PGT_validated       (1U<<_PGT_validated)
+
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        32
+#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
 
  /* 16-bit count of uses of this frame as its current type. */
 #define PGT_count_mask      ((1U<<16)-1)
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-x86/mm.h  Mon Sep 11 01:55:03 2006 +0100
@@ -75,6 +75,19 @@ struct page_info
 #define PGT_gdt_page        (5U<<29) /* using this page in a GDT? */
 #define PGT_ldt_page        (6U<<29) /* using this page in an LDT? */
 #define PGT_writable_page   (7U<<29) /* has writable mappings of this page? */
+
+#ifndef SHADOW
+#define PGT_l1_shadow       PGT_l1_page_table
+#define PGT_l2_shadow       PGT_l2_page_table
+#define PGT_l3_shadow       PGT_l3_page_table
+#define PGT_l4_shadow       PGT_l4_page_table
+#define PGT_hl2_shadow      (5U<<29)
+#define PGT_snapshot        (6U<<29)
+#define PGT_writable_pred   (7U<<29) /* predicted gpfn with writable ref */
+
+#define PGT_fl1_shadow      (5U<<29)
+#endif
+
 #define PGT_type_mask       (7U<<29) /* Bits 29-31. */
 
  /* Owning guest has pinned this page to its current type? */
@@ -83,12 +96,43 @@ struct page_info
  /* Has this page been validated for use as its current type? */
 #define _PGT_validated      27
 #define PGT_validated       (1U<<_PGT_validated)
- /* PAE only: is this an L2 page directory containing Xen-private mappings? */
-#define _PGT_pae_xen_l2     26
-#define PGT_pae_xen_l2      (1U<<_PGT_pae_xen_l2)
+#if defined(__i386__)
+ /* The 11 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        16
+#define PGT_va_mask         (((1U<<11)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable      (((1U<<11)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown      (((1U<<11)-2)<<PGT_va_shift)
+#elif defined(__x86_64__)
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        32
+#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
+#endif
 
  /* 16-bit count of uses of this frame as its current type. */
 #define PGT_count_mask      ((1U<<16)-1)
+
+#ifndef SHADOW
+#ifdef __x86_64__
+#define PGT_high_mfn_shift  52
+#define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
+#define PGT_mfn_mask        (((1U<<27)-1) | PGT_high_mfn_mask)
+#define PGT_high_mfn_nx     (0x800UL << PGT_high_mfn_shift)
+#else
+ /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
+#define PGT_mfn_mask        ((1U<<23)-1)
+ /* NX for PAE xen is not supported yet */
+#define PGT_high_mfn_nx     (1ULL << 63)
+
+#define PGT_score_shift     23
+#define PGT_score_mask      (((1U<<4)-1)<<PGT_score_shift)
+#endif
+#endif /* SHADOW */
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h  Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-x86/x86_32/page-3level.h  Mon Sep 11 01:55:03 2006 +0100
@@ -49,7 +49,7 @@ typedef l3_pgentry_t root_pgentry_t;
 /* misc */
 #define is_guest_l1_slot(s)    (1)
 #define is_guest_l2_slot(t,s)                                              \
-    ( !((t) & PGT_pae_xen_l2) ||                                           \
+    ( ((((t) & PGT_va_mask) >> PGT_va_shift) != 3) ||                      \
       ((s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
 #define is_guest_l3_slot(s)    (1)
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.