[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Cleanup various shadow mode asserts.



# HG changeset patch
# User Michael.Fetterman@xxxxxxxxxxxx
# Node ID c665ab5a6b442710d9543e002d430d1319eabb15
# Parent  6d298cac0e8d851331096f16a52cdd7208cbc95c
Cleanup various shadow mode asserts.

Separate out the ability for domains to be able to write to their
pagetables (ala "writable page tables", which uses write-protected PTEs
to address the page tables: this is shadow_mode_write_all()) from the
right of a domain to create a PTE with write permissions that points
at a page table (this is shadow_mode_wr_pt_pte())...

Minor cleanup of SHADOW_DEBUG (at least make it compilable) in shadow.c.

diff -r 6d298cac0e8d -r c665ab5a6b44 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Tue Nov  8 11:26:48 2005
+++ b/xen/arch/x86/shadow.c     Tue Nov  8 12:26:50 2005
@@ -37,8 +37,10 @@
 
 extern void free_shadow_pages(struct domain *d);
 
+#if 0 // this code has not been updated for 32pae & 64 bit modes
 #if SHADOW_DEBUG
 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned 
long gpfn);
+#endif
 #endif
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -898,8 +900,10 @@
     entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
     entry->writable_pl1e = -1;
 
+#if 0 // this code has not been updated for 32pae & 64 bit modes
 #if SHADOW_DEBUG
     mark_shadows_as_reflecting_snapshot(d, gpfn);
+#endif
 #endif
 
     // increment guest's ref count to represent the entry in the
@@ -1317,18 +1321,17 @@
 
         if ( !smfn )
         {
+            // For heavy weight shadows: no need to update refcounts if
+            // there's no shadow page.
+            //
             if ( shadow_mode_refcounts(d) )
                 continue;
 
-            // For light weight shadows, even when no shadow page exists,
-            // we need to resync the refcounts to the new contents of the
-            // guest page.
-            // This only applies when we have writable page tables.
+            // For light weight shadows: only need up resync the refcounts to
+            // the new contents of the guest page iff this it has the right
+            // page type.
             //
-            if ( !shadow_mode_write_all(d) &&
-                 !((stype == PGT_l1_shadow) &&
-                   VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
-                // Page is not writable -- no resync necessary
+            if ( stype != ( pfn_to_page(entry->gmfn)->u.inuse.type_info & 
PGT_type_mask) )
                 continue;
         }
 
@@ -1365,8 +1368,8 @@
             guest_l1_pgentry_t *snapshot1 = snapshot;
             int unshadow_l1 = 0;
 
-            ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
-                   shadow_mode_write_all(d));
+            ASSERT(shadow_mode_write_l1(d) ||
+                   shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
 
             if ( !shadow_mode_refcounts(d) )
                 revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t 
*)snapshot1);
@@ -1427,7 +1430,7 @@
             l2_pgentry_t *shadow2 = shadow;
             l2_pgentry_t *snapshot2 = snapshot;
 
-            ASSERT(shadow_mode_write_all(d));
+            ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
             BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
 
             changed = 0;
@@ -1473,7 +1476,7 @@
             l2_pgentry_t *snapshot2 = snapshot;
             l1_pgentry_t *shadow2 = shadow;
 
-            ASSERT(shadow_mode_write_all(d));
+            ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
             BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
 
             changed = 0;
@@ -1822,8 +1825,13 @@
                 goto fail;
             }
         }
-
-        if ( !l1pte_write_fault(v, &gpte, &spte, va) )
+        else if ( unlikely(!shadow_mode_wr_pt_pte(d) && 
mfn_is_page_table(l1e_get_pfn(gpte))) )
+        {
+            SH_LOG("l1pte_write_fault: no write access to page table page");
+            domain_crash_synchronous();
+        }
+
+        if ( unlikely(!l1pte_write_fault(v, &gpte, &spte, va)) )
         {
             SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
             perfc_incrc(write_fault_bail);
@@ -2072,6 +2080,7 @@
 /************************************************************************/
 /************************************************************************/
 
+#if 0 // this code has not been updated for 32pae & 64 bit modes
 #if SHADOW_DEBUG
 
 // The following is entirely for _check_pagetable()'s benefit.
@@ -2118,8 +2127,8 @@
 // BUG: these are not SMP safe...
 static int sh_l2_present;
 static int sh_l1_present;
-char * sh_check_name;
-int shadow_status_noswap;
+static char *sh_check_name;
+// int shadow_status_noswap; // declared in shadow32.c
 
 #define v2m(_v, _adr) ({                                                     \
     unsigned long _a  = (unsigned long)(_adr);                               \
@@ -2218,11 +2227,11 @@
 
     guest_writable =
         (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
-        (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && 
mfn_out_of_sync(eff_guest_mfn));
+        (shadow_mode_write_l1(d) && (level == 1) && 
mfn_out_of_sync(eff_guest_mfn));
 
     if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
     {
-        printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x 
page_table_page=%d\n",
+        printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08lx 
page_table_page=%d\n",
                eff_guest_pfn, eff_guest_mfn, shadow_mfn,
                frame_table[eff_guest_mfn].u.inuse.type_info,
                page_table_page);
@@ -2233,7 +2242,7 @@
          (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
          !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
     {
-        printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x 
page_table_page=%d\n",
+        printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08lx 
page_table_page=%d\n",
                eff_guest_pfn, eff_guest_mfn, shadow_mfn,
                frame_table[eff_guest_mfn].u.inuse.type_info,
                page_table_page);
@@ -2393,13 +2402,12 @@
 }
 #undef FAILPT
 
-static int _check_pagetable(struct vcpu *v, char *s)
+int _check_pagetable(struct vcpu *v, char *s)
 {
     struct domain *d = v->domain;
 #if defined (__x86_64__)
     pagetable_t pt = ((v->arch.flags & TF_kernel_mode)?
-                      pagetable_get_pfn(v->arch.guest_table) :
-                      pagetable_get_pfn(v->arch.guest_table_user));
+                      v->arch.guest_table : v->arch.guest_table_user);
 #else
     pagetable_t pt = v->arch.guest_table;
 #endif
@@ -2539,6 +2547,7 @@
 }
 
 #endif // SHADOW_DEBUG
+#endif // this code has not been updated for 32pae & 64 bit modes
 
 #if CONFIG_PAGING_LEVELS == 3
 static unsigned long shadow_l3_table(
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Tue Nov  8 11:26:48 2005
+++ b/xen/arch/x86/shadow32.c   Tue Nov  8 12:26:50 2005
@@ -624,6 +624,14 @@
     // under us...  First, collect the list of pinned pages, then
     // free them.
     //
+    // FIXME: it would be good to just free all the pages referred to in
+    // the hash table without going through each of them to decrement their
+    // reference counts.  In shadow_mode_refcount(), we've gotta do the hard
+    // work, but only for L1 shadows.  If we're not in refcount mode, then
+    // there's no real hard work to do at all.  Need to be careful with the
+    // writable_pte_predictions and snapshot entries in the hash table, but
+    // that's about it.
+    //
     for ( i = 0; i < shadow_ht_buckets; i++ )
     {
         u32 count;
@@ -634,17 +642,51 @@
             continue;
 
         count = 0;
-        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
-            if ( MFN_PINNED(x->smfn) )
-                count++;
+
+        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
+           /* Skip entries that are writable_pred) */
+           switch(x->gpfn_and_flags & PGT_type_mask){
+               case PGT_l1_shadow:
+               case PGT_l2_shadow:
+               case PGT_l3_shadow:
+               case PGT_l4_shadow:
+               case PGT_hl2_shadow:
+                   if ( MFN_PINNED(x->smfn) )
+                       count++;
+                   break;
+               case PGT_snapshot:
+               case PGT_writable_pred:
+                   break;
+               default:
+                   BUG();
+
+           }
+       }
+
         if ( !count )
             continue;
 
         mfn_list = xmalloc_array(unsigned long, count);
         count = 0;
-        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
-            if ( MFN_PINNED(x->smfn) )
-                mfn_list[count++] = x->smfn;
+        for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
+           /* Skip entries that are writable_pred) */
+           switch(x->gpfn_and_flags & PGT_type_mask){
+               case PGT_l1_shadow:
+               case PGT_l2_shadow:
+               case PGT_l3_shadow:
+               case PGT_l4_shadow:
+               case PGT_hl2_shadow:
+                   if ( MFN_PINNED(x->smfn) )
+                       mfn_list[count++] = x->smfn;
+                   break;
+               case PGT_snapshot:
+               case PGT_writable_pred:
+                   break;
+               default:
+                   BUG();
+
+           }
+       }
 
         while ( count )
         {
@@ -779,6 +821,7 @@
     unsigned long va = pfn << PAGE_SHIFT;
 
     ASSERT(tabpfn != 0);
+    ASSERT(shadow_lock_is_acquired(d));
 
     l2 = map_domain_page_with_cache(tabpfn, l2cache);
     l2e = l2[l2_table_offset(va)];
@@ -2037,7 +2080,12 @@
         while ( count )
         {
             count--;
+            /* delete_shadow_status() may do a shadow_audit(), so we need to
+             * keep an accurate count of writable_pte_predictions to keep it
+             * happy.
+             */
             delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+            perfc_decr(writable_pte_predictions);
         }
 
         xfree(gpfn_list);
@@ -2273,18 +2321,17 @@
 
         if ( !smfn )
         {
+            // For heavy weight shadows: no need to update refcounts if
+            // there's no shadow page.
+            //
             if ( shadow_mode_refcounts(d) )
                 continue;
 
-            // For light weight shadows, even when no shadow page exists,
-            // we need to resync the refcounts to the new contents of the
-            // guest page.
-            // This only applies when we have writable page tables.
+            // For light weight shadows: only need up resync the refcounts to
+            // the new contents of the guest page iff this it has the right
+            // page type.
             //
-            if ( !shadow_mode_write_all(d) &&
-                 !((stype == PGT_l1_shadow) &&
-                   VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
-                // Page is not writable -- no resync necessary
+            if ( stype != ( pfn_to_page(entry->gmfn)->u.inuse.type_info & 
PGT_type_mask) )
                 continue;
         }
 
@@ -2312,8 +2359,8 @@
             l1_pgentry_t *snapshot1 = snapshot;
             int unshadow_l1 = 0;
 
-            ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
-                   shadow_mode_write_all(d));
+            ASSERT(shadow_mode_write_l1(d) ||
+                   shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
 
             if ( !shadow_mode_refcounts(d) )
                 revalidate_l1(d, guest1, snapshot1);
@@ -2380,7 +2427,7 @@
             l2_pgentry_t *shadow2 = shadow;
             l2_pgentry_t *snapshot2 = snapshot;
 
-            ASSERT(shadow_mode_write_all(d));
+            ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
             BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
 
             changed = 0;
@@ -2426,7 +2473,7 @@
             l2_pgentry_t *snapshot2 = snapshot;
             l1_pgentry_t *shadow2 = shadow;
             
-            ASSERT(shadow_mode_write_all(d));
+            ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
             BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
 
             changed = 0;
@@ -2619,8 +2666,13 @@
                 goto fail;
             }
         }
-
-        if ( !l1pte_write_fault(v, &gpte, &spte, va) )
+        else if ( unlikely(!shadow_mode_wr_pt_pte(d) && 
mfn_is_page_table(l1e_get_pfn(gpte))) )
+        {
+            SH_LOG("l1pte_write_fault: no write access to page table page");
+            domain_crash_synchronous();
+        }
+
+        if ( unlikely(!l1pte_write_fault(v, &gpte, &spte, va)) )
         {
             SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
             perfc_incrc(write_fault_bail);
@@ -2954,7 +3006,7 @@
 // BUG: these are not SMP safe...
 static int sh_l2_present;
 static int sh_l1_present;
-char * sh_check_name;
+static char *sh_check_name;
 int shadow_status_noswap;
 
 #define v2m(_v, _adr) ({                                                     \
@@ -3054,7 +3106,7 @@
 
     guest_writable =
         (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
-        (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && 
mfn_out_of_sync(eff_guest_mfn));
+        (shadow_mode_write_l1(d) && (level == 1) && 
mfn_out_of_sync(eff_guest_mfn));
 
     if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
     {
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Tue Nov  8 11:26:48 2005
+++ b/xen/arch/x86/vmx.c        Tue Nov  8 12:26:50 2005
@@ -79,7 +79,7 @@
          * the shared 1:1 page table initially. It shouldn't hurt */
         shadow_mode_enable(v->domain,
                            SHM_enable|SHM_refcounts|
-                           SHM_translate|SHM_external);
+                           SHM_translate|SHM_external|SHM_wr_pt_pte);
     }
 
     vmx_switch_on = 1;
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Tue Nov  8 11:26:48 2005
+++ b/xen/include/asm-x86/page.h        Tue Nov  8 12:26:50 2005
@@ -271,6 +271,9 @@
 #define _PAGE_PAT      0x080U
 #define _PAGE_PSE      0x080U
 #define _PAGE_GLOBAL   0x100U
+#define _PAGE_AVAIL0   0x200U
+#define _PAGE_AVAIL1   0x400U
+#define _PAGE_AVAIL2   0x800U
 #define _PAGE_AVAIL    0xE00U
 
 #define __PAGE_HYPERVISOR \
diff -r 6d298cac0e8d -r c665ab5a6b44 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Tue Nov  8 11:26:48 2005
+++ b/xen/include/asm-x86/shadow.h      Tue Nov  8 12:26:50 2005
@@ -45,15 +45,21 @@
 #define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
                                 regardless of pte write permissions */
 #define SHM_log_dirty (1<<3) /* enable log dirty mode */
-#define SHM_translate (1<<4) /* do p2m tranaltion on guest tables */
-#define SHM_external  (1<<5) /* external page table, not used by Xen */
+#define SHM_translate (1<<4) /* Xen does p2m translation, not guest */
+#define SHM_external  (1<<5) /* Xen does not steal address space from the
+                                domain for its own booking; requires VT or
+                                similar mechanisms */
+#define SHM_wr_pt_pte (1<<6) /* guest allowed to set PAGE_RW bit in PTEs which
+                                point to page table pages. */
 
 #define shadow_mode_enabled(_d)   ((_d)->arch.shadow_mode)
 #define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
+#define shadow_mode_write_l1(_d)  (VM_ASSIST(_d, 
VMASST_TYPE_writable_pagetables))
 #define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
 #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
 #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
 #define shadow_mode_external(_d)  ((_d)->arch.shadow_mode & SHM_external)
+#define shadow_mode_wr_pt_pte(_d) ((_d)->arch.shadow_mode & SHM_wr_pt_pte)
 
 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
 #define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
@@ -324,8 +330,7 @@
 
 #if SHADOW_DEBUG
 extern int shadow_status_noswap;
-#define _SHADOW_REFLECTS_SNAPSHOT ( 9)
-#define SHADOW_REFLECTS_SNAPSHOT  (1u << _SHADOW_REFLECTS_SNAPSHOT)
+#define SHADOW_REFLECTS_SNAPSHOT _PAGE_AVAIL0
 #endif
 
 #ifdef VERBOSE
@@ -1474,7 +1479,8 @@
             if ( stype != PGT_writable_pred )
                 BUG(); // we should never replace entries into the hash table
             x->smfn = smfn;
-            put_page(pfn_to_page(gmfn)); // already had a ref...
+            if ( stype != PGT_writable_pred )
+                put_page(pfn_to_page(gmfn)); // already had a ref...
             goto done;
         }
 
@@ -1656,14 +1662,18 @@
          (type == PGT_writable_page) )
         type = shadow_max_pgtable_type(d, gpfn, NULL);
 
-    if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
-         (type == PGT_l1_page_table) &&
-         (va < HYPERVISOR_VIRT_START) &&
-         KERNEL_MODE(v, regs) )
-        return 1;
-
-    if ( shadow_mode_write_all(d) &&
-         type && (type <= PGT_l4_page_table) &&
+    // Strange but true: writable page tables allow kernel-mode access
+    // to L1 page table pages via write-protected PTEs...  Similarly, write 
+    // access to all page table pages is granted for shadow_mode_write_all
+    // clients.
+    //
+    if ( ((shadow_mode_write_l1(d) && (type == PGT_l1_page_table)) ||
+          (shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table))) &&
+         ((va < HYPERVISOR_VIRT_START)
+#if defined(__x86_64__)
+          || (va >= HYPERVISOR_VIRT_END)
+#endif
+             ) &&
          KERNEL_MODE(v, regs) )
         return 1;
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.