[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Fixes for x86/64 writable pagetables, including SMP



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID a1f7e01b0990a378584e718e6d48eac38824fdb9
# Parent  663f0fb1e444239bae280d2867b80cea3f4bf7c1
Fixes for x86/64 writable pagetables, including SMP
guest support.

NOTE: I removed some x86/64 specific tests from get_page_type().
I can't see what good could come of them -- if they caused
things to work then I'm pretty sure there must be some underlying
bug that is what ought to be fixed.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r 663f0fb1e444 -r a1f7e01b0990 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Aug  9 09:34:06 2005
+++ b/xen/arch/x86/mm.c Tue Aug  9 10:42:51 2005
@@ -138,7 +138,7 @@
  * Returns the current foreign domain; defaults to the currently-executing
  * domain if a foreign override hasn't been specified.
  */
-#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? : 
current->domain)
+#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ?: current->domain)
 
 /* Private domain structs for DOMID_XEN and DOMID_IO. */
 static struct domain *dom_xen, *dom_io;
@@ -903,7 +903,8 @@
         return 1;
     ASSERT(!shadow_mode_refcounts(d));
 
-    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) {
+    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+    {
         if ( !l3_backptr(&vaddr, i, type) )
             goto fail;
 
@@ -1122,10 +1123,9 @@
             return 0;
         }
     }
-    else
-    {
-        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
-            return 0;
+    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
+    {
+        return 0;
     }
 
     put_page_from_l2e(ol2e, pfn);
@@ -1188,23 +1188,16 @@
 
         if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
         {
-            BUG_ON(!create_pae_xen_mappings(pl3e));
             put_page_from_l3e(nl3e, pfn);
             return 0;
         }
-
-        put_page_from_l3e(ol3e, pfn);
-        return 1;
-    }
-    else
-   {
-       if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
-           {
-               BUG_ON(!create_pae_xen_mappings(pl3e));
-               return 0;
-           }
-   }
-
+    }
+    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+    {
+        return 0;
+    }
+
+    BUG_ON(!create_pae_xen_mappings(pl3e));
     put_page_from_l3e(ol3e, pfn);
     return 1;
 }
@@ -1254,11 +1247,10 @@
             return 0;
         }
     }
-    else 
-    {
-        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
-            return 0;
-     }
+    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+    {
+        return 0;
+    }
 
     put_page_from_l4e(ol4e, pfn);
     return 1;
@@ -1409,11 +1401,7 @@
         }
         else if ( unlikely((x & PGT_count_mask) == 0) )
         {
-#ifdef CONFIG_X86_64
-            if ( (x & (PGT_type_mask|PGT_va_mask)) != (type & ~PGT_va_mask))
-#else
             if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
-#endif
             {
                 if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
                 {
@@ -1445,17 +1433,14 @@
         }
         else
         {
-#ifdef CONFIG_X86_64
-            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != (type & 
~PGT_va_mask)) )
-#else
             if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
-#endif
             {
                 if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
                 {
                     if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
                          ((type & PGT_type_mask) != PGT_l1_page_table) )
-                        MEM_LOG("Bad type (saw %" PRtype_info "!= exp %" 
PRtype_info ") for pfn %lx",
+                        MEM_LOG("Bad type (saw %" PRtype_info
+                                "!= exp %" PRtype_info ") for pfn %lx",
                                 x, type, page_to_pfn(page));
                     return 0;
                 }
@@ -1718,9 +1703,6 @@
             type = PGT_l1_page_table | PGT_va_mutable;
 
         pin_page:
-#if CONFIG_PAGING_LEVELS >= 4
-            type |= PGT_va_mutable;
-#endif
             if ( shadow_mode_refcounts(FOREIGNDOM) )
                 type = PGT_writable_page;
 
@@ -1744,16 +1726,16 @@
 
 #ifndef CONFIG_X86_PAE /* Unsafe on PAE because of Xen-private mappings. */
         case MMUEXT_PIN_L2_TABLE:
-            type = PGT_l2_page_table;
+            type = PGT_l2_page_table | PGT_va_mutable;
             goto pin_page;
 #endif
 
         case MMUEXT_PIN_L3_TABLE:
-            type = PGT_l3_page_table;
+            type = PGT_l3_page_table | PGT_va_mutable;
             goto pin_page;
 
         case MMUEXT_PIN_L4_TABLE:
-            type = PGT_l4_page_table;
+            type = PGT_l4_page_table | PGT_va_mutable;
             goto pin_page;
 
         case MMUEXT_UNPIN_TABLE:
@@ -1946,9 +1928,9 @@
                      unlikely(_nd != _d) )
                 {
                     MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p,"
-                            " caf=%08x, taf=%" PRtype_info "\n", 
page_to_pfn(page),
-                            d, d->domain_id, unpickle_domptr(_nd), x,
-                            page->u.inuse.type_info);
+                            " caf=%08x, taf=%" PRtype_info "\n",
+                            page_to_pfn(page), d, d->domain_id,
+                            unpickle_domptr(_nd), x, page->u.inuse.type_info);
                     okay = 0;
                     goto reassign_fail;
                 }
@@ -2111,7 +2093,8 @@
                     l1e = l1e_from_intpte(req.val);
                     okay = mod_l1_entry(va, l1e);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l1_normal_pt_update(d, req.ptr, l1e, 
&sh_mapcache);
+                        shadow_l1_normal_pt_update(
+                            d, req.ptr, l1e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
@@ -2124,9 +2107,11 @@
 
                     /* FIXME: doesn't work with PAE */
                     l2e = l2e_from_intpte(req.val);
-                    okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn, 
type_info);
+                    okay = mod_l2_entry(
+                        (l2_pgentry_t *)va, l2e, mfn, type_info);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l2_normal_pt_update(d, req.ptr, l2e, 
&sh_mapcache);
+                        shadow_l2_normal_pt_update(
+                            d, req.ptr, l2e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
@@ -2142,7 +2127,8 @@
                     l3e = l3e_from_intpte(req.val);
                     okay = mod_l3_entry(va, l3e, mfn, type_info);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l3_normal_pt_update(d, req.ptr, l3e, 
&sh_mapcache);
+                        shadow_l3_normal_pt_update(
+                            d, req.ptr, l3e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
@@ -2158,7 +2144,8 @@
                     l4e = l4e_from_intpte(req.val);
                     okay = mod_l4_entry(va, l4e, mfn, type_info);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l4_normal_pt_update(d, req.ptr, l4e, 
&sh_mapcache);
+                        shadow_l4_normal_pt_update(
+                            d, req.ptr, l4e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
@@ -2205,7 +2192,8 @@
             if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
             {
                 shadow_lock(FOREIGNDOM);
-                printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx 
for dom%d\n",
+                printk("privileged guest dom%d requests pfn=%lx to "
+                       "map mfn=%lx for dom%d\n",
                        d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id);
                 set_machinetophys(mfn, gpfn);
                 set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache);
@@ -2629,17 +2617,11 @@
 #endif
 
 /* Re-validate a given p.t. page, given its prior snapshot */
-int revalidate_l1(struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t 
*snapshot)
+int revalidate_l1(
+    struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
 {
     l1_pgentry_t ol1e, nl1e;
     int modified = 0, i;
-
-#if 0
-    if ( d->domain_id )
-        printk("%s: l1page mfn=%lx snapshot mfn=%lx\n", __func__,
-               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned 
long)l1page)]),
-               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned 
long)snapshot)]));
-#endif
 
     for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
     {
@@ -2690,9 +2672,9 @@
     l1_pgentry_t  *pl1e;
     l2_pgentry_t  *pl2e;
     unsigned int   modified;
-#if defined(__x86_64__)
+
+#ifdef CONFIG_X86_64
     struct vcpu *v = current;
-    /* If in user mode, switch to kernel mode just to read LDT mapping. */
     extern void toggle_guest_mode(struct vcpu *);
     int user_mode = !(v->arch.flags & TF_kernel_mode);
 #endif
@@ -2700,8 +2682,10 @@
     ASSERT(!shadow_mode_enabled(d));
 
     if ( unlikely(d->arch.ptwr[which].vcpu != current) )
-        write_ptbase(d->arch.ptwr[which].vcpu);
-    else 
+        /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
+        write_cr3(pagetable_get_paddr(
+            d->arch.ptwr[which].vcpu->arch.guest_table));
+    else
         TOGGLE_MODE();
 
     l1va = d->arch.ptwr[which].l1va;
@@ -2803,7 +2787,7 @@
         /* Align address; read full word. */
         addr &= ~(sizeof(physaddr_t)-1);
         if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
-                                       sizeof(physaddr_t))) )
+                                        sizeof(physaddr_t))) )
             return rc; 
         /* Mask out bits provided by caller. */
         full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
@@ -2829,7 +2813,8 @@
          ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
          (page_get_owner(page) != d) )
     {
-        MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%lx, %" 
PRtype_info ")\n",
+        MEM_LOG("ptwr_emulate: Page is mistyped or bad pte "
+                "(%lx, %" PRtype_info ")\n",
                 l1e_get_pfn(pte), page->u.inuse.type_info);
         return X86EMUL_UNHANDLEABLE;
     }
@@ -2902,42 +2887,13 @@
     .cmpxchg8b_emulated = ptwr_emulated_cmpxchg8b
 };
 
-#if defined(__x86_64__)
-/*
- * Returns zero on if mapped, or -1 otherwise
- */
-static int __not_mapped(l2_pgentry_t *pl2e)
-{
-    unsigned long page = read_cr3();
-
-    page &= PAGE_MASK;
-    page = ((unsigned long *) __va(page))[l4_table_offset((unsigned 
long)pl2e)];
-    if ( !(page & _PAGE_PRESENT) ) 
-        return -1;        
-        
-    page &= PAGE_MASK;
-    page = ((unsigned long *) __va(page))[l3_table_offset((unsigned 
long)pl2e)];
-    if ( !(page & _PAGE_PRESENT) ) 
-        return -1;
-
-    page &= PAGE_MASK;
-    page = ((unsigned long *) __va(page))[l2_table_offset((unsigned 
long)pl2e)];
-    if ( !(page & _PAGE_PRESENT) )
-        return -1;
-
-    return 0;
-}
-#else
-#define __not_mapped(p) (0)
-#endif
-
 /* Write page fault handler: check if guest is trying to modify a PTE. */
 int ptwr_do_page_fault(struct domain *d, unsigned long addr)
 {
     unsigned long    pfn;
     struct pfn_info *page;
     l1_pgentry_t     pte;
-    l2_pgentry_t    *pl2e;
+    l2_pgentry_t    *pl2e, l2e;
     int              which;
     unsigned long    l2_idx;
 
@@ -2984,10 +2940,7 @@
     pl2e = &__linear_l2_table[l2_idx];
     which = PTWR_PT_INACTIVE;
 
-    if ( unlikely(__not_mapped(pl2e)) )
-        goto inactive;
-
-    if ( (l2e_get_pfn(*pl2e)) == pfn )
+    if ( (__get_user(l2e.l2, &pl2e->l2) == 0) && (l2e_get_pfn(l2e) == pfn) )
     {
         /*
          * Check the PRESENT bit to set ACTIVE mode.
@@ -2995,13 +2948,11 @@
          * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
          * The ptwr_flush call below will restore the PRESENT bit.
          */
-        if ( likely(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
+        if ( likely(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
              (d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
               (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
             which = PTWR_PT_ACTIVE;
     }
-
-  inactive:
 
     /*
      * If this is a multi-processor guest then ensure that the page is hooked
diff -r 663f0fb1e444 -r a1f7e01b0990 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Tue Aug  9 09:34:06 2005
+++ b/xen/include/asm-x86/page.h        Tue Aug  9 10:42:51 2005
@@ -208,21 +208,21 @@
      + DOMAIN_ENTRIES_PER_L4_PAGETABLE)
 #endif
 
-#define VA_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_START & VADDR_MASK)
-#define linear_l1_table                                                  \
+#define LINEAR_PT_OFFSET (LINEAR_PT_VIRT_START & VADDR_MASK)
+#define linear_l1_table                                             \
     ((l1_pgentry_t *)(LINEAR_PT_VIRT_START))
-#define __linear_l2_table                                                \
-    ((l2_pgentry_t *)(LINEAR_PT_VIRT_START +                             \
-                     (VA_LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0))))
-#define __linear_l3_table                                                \
-    ((l3_pgentry_t *)(LINEAR_PT_VIRT_START +                             \
-                     (VA_LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) + \
-                     (VA_LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1))))
-#define __linear_l4_table                                                \
-    ((l4_pgentry_t *)(LINEAR_PT_VIRT_START +                             \
-                     (VA_LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) + \
-                     (VA_LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1)) + \
-                     (VA_LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<2))))
+#define __linear_l2_table                                           \
+    ((l2_pgentry_t *)(LINEAR_PT_VIRT_START +                        \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0))))
+#define __linear_l3_table                                           \
+    ((l3_pgentry_t *)(LINEAR_PT_VIRT_START +                        \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) +   \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1))))
+#define __linear_l4_table                                           \
+    ((l4_pgentry_t *)(LINEAR_PT_VIRT_START +                        \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) +   \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1)) +   \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<2))))
 
 #define linear_pg_table linear_l1_table
 #define linear_l2_table(_ed) ((_ed)->arch.guest_vtable)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.