[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] Bug on shadow page mode



Hi,

At 12:50 +0100 on 02 Apr (1364907054), Jan Beulich wrote:
> > (XEN) Xen call trace:
> > (XEN)    [<ffff82c4c01e637f>] guest_walk_tables_4_levels+0x135/0x6a6
> > (XEN)    [<ffff82c4c020d8cc>] sh_page_fault__guest_4+0x505/0x2015
> > (XEN)    [<ffff82c4c01d2135>] vmx_vmexit_handler+0x86c/0x1748
> > (XEN)    
> > (XEN) Pagetable walk from ffff82c406a00000:
> > (XEN)  L4[0x105] = 000000007f26e063 ffffffffffffffff
> > (XEN)  L3[0x110] = 000000005ce30063 ffffffffffffffff
> > (XEN)  L2[0x035] = 0000000014aab063 ffffffffffffffff 
> > (XEN)  L1[0x000] = 0000000000000000 ffffffffffffffff
> 
> Tim,
> 
> I'm afraid this is something for you. From what I can tell, despite
> sh_walk_guest_tables() being called from sh_page_fault() without
> the paging lock held, there doesn't appear to be a way for this to
> race sh_update_cr3(). And with the way the latter updates
> guest_vtable, the only way for a page fault to happen upon use
> of that cached mapping would be between the call to
> sh_unmap_domain_page_global() and the immediately following
> one to sh_map_domain_page_global() (i.e. while the pointer is
> stale).

Hmmm.  So the only way I can see that happening is if some foreign agent
resets the vcpu's state while it's actually running, which AFAICT
shouldn't happen.  I looked at reversing the unmap and map, but realised
that won't solve the problem -- in such a race sh_walk_guest_tables()
could cache the old value.

For testing, here's a patch that gets rid of guest_vtable altogether.
I'm not entirely happy with it, partly because it will have a perf hit
on large-RAM machines, and partly because I'm worried that whatever path
caused this crash might cause other problems.  But it would be good to
confirm that it stops the crashes.

Cheers,

Tim.

commit 5469971c30eca85f270cbd9bc46f63ec0fd543c0
Author: Tim Deegan <tim@xxxxxxx>
Date:   Thu Apr 4 11:16:28 2013 +0100

    x86/mm/shadow: Don't keep a permanent mapping of the top-level guest table.
    
    This addresses two issues.  First, a crash seen where
    sh_walk_guest_tables() found that guest_vtable was not a valid
    mapping.  Second, the lack of any error handling if
    map_domain_page_global() were to fail.
    
    Signed-off-by: Tim Deegan <tim@xxxxxxx>

diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index a593f76..8ca4b9b 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -174,15 +174,26 @@ static inline uint32_t
 sh_walk_guest_tables(struct vcpu *v, unsigned long va, walk_t *gw, 
                      uint32_t pfec)
 {
-    return guest_walk_tables(v, p2m_get_hostp2m(v->domain), va, gw, pfec, 
+    uint32_t rc;
+    mfn_t top_mfn;
+    void *top_map;
+
 #if GUEST_PAGING_LEVELS == 3 /* PAE */
-                             _mfn(INVALID_MFN),
-                             v->arch.paging.shadow.gl3e
+    top_mfn = _mfn(INVALID_MFN);
+    top_map = v->arch.paging.shadow.gl3e;
 #else /* 32 or 64 */
-                             pagetable_get_mfn(v->arch.guest_table),
-                             v->arch.paging.shadow.guest_vtable
+    top_mfn = pagetable_get_mfn(v->arch.guest_table);
+    top_map = sh_map_domain_page(top_mfn);
+#endif
+
+    rc = guest_walk_tables(v, p2m_get_hostp2m(v->domain), va, gw, pfec,
+                           top_mfn, top_map);
+
+#if GUEST_PAGING_LEVELS != 3
+    sh_unmap_domain_page(top_map);
 #endif
-                             );
+
+    return rc;
 }
 
 /* This validation is called with lock held, and after write permission
@@ -219,24 +230,20 @@ shadow_check_gwalk(struct vcpu *v, unsigned long va, 
walk_t *gw, int version)
      * logic simple.
      */
     perfc_incr(shadow_check_gwalk);
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-    l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable;
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit... */
+    l4p = sh_map_domain_page(gw->l4mfn);
     mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4);
+    sh_unmap_domain_page(l4p);
     l3p = sh_map_domain_page(gw->l3mfn);
     mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3);
     sh_unmap_domain_page(l3p);
-#else
+#elif GUEST_PAGING_LEVELS >= 3 /* PAE... */
     mismatch |= (gw->l3e.l3 !=
                  v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3);
 #endif
     l2p = sh_map_domain_page(gw->l2mfn);
     mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
     sh_unmap_domain_page(l2p);
-#else
-    l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable;
-    mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
-#endif
     if ( !(guest_supports_superpages(v) &&
            (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
     {
@@ -3784,7 +3791,7 @@ sh_update_linear_entries(struct vcpu *v)
 }
 
 
-/* Removes vcpu->arch.paging.shadow.guest_vtable and vcpu->arch.shadow_table[].
+/* Removes vcpu->arch.shadow_table[].
  * Does all appropriate management/bookkeeping/refcounting/etc...
  */
 static void
@@ -3794,24 +3801,6 @@ sh_detach_old_tables(struct vcpu *v)
     int i = 0;
 
     ////
-    //// vcpu->arch.paging.shadow.guest_vtable
-    ////
-
-#if GUEST_PAGING_LEVELS == 3
-    /* PAE guests don't have a mapping of the guest top-level table */
-    ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
-#else
-    if ( v->arch.paging.shadow.guest_vtable )
-    {
-        struct domain *d = v->domain;
-        if ( shadow_mode_external(d) || shadow_mode_translate(d) )
-            sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
-        v->arch.paging.shadow.guest_vtable = NULL;
-    }
-#endif // !NDEBUG
-
-
-    ////
     //// vcpu->arch.shadow_table[]
     ////
 
@@ -3970,26 +3959,12 @@ sh_update_cr3(struct vcpu *v, int do_locking)
 
 
     ////
-    //// vcpu->arch.paging.shadow.guest_vtable
+    //// vcpu->arch.paging.shadow.gl3e[]
     ////
-#if GUEST_PAGING_LEVELS == 4
-    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
-    {
-        if ( v->arch.paging.shadow.guest_vtable )
-            sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
-        v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn);
-        /* PAGING_LEVELS==4 implies 64-bit, which means that
-         * map_domain_page_global can't fail */
-        BUG_ON(v->arch.paging.shadow.guest_vtable == NULL);
-    }
-    else
-        v->arch.paging.shadow.guest_vtable = __linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
+#if GUEST_PAGING_LEVELS == 3
      /* On PAE guests we don't use a mapping of the guest's own top-level
       * table.  We cache the current state of that table and shadow that,
       * until the next CR3 write makes us refresh our cache. */
-     ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
- 
      if ( shadow_mode_external(d) ) 
          /* Find where in the page the l3 table is */
          guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]);
@@ -4005,20 +3980,6 @@ sh_update_cr3(struct vcpu *v, int do_locking)
      for ( i = 0; i < 4 ; i++ )
          v->arch.paging.shadow.gl3e[i] = gl3e[i];
      sh_unmap_domain_page(gl3e);
-#elif GUEST_PAGING_LEVELS == 2
-    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
-    {
-        if ( v->arch.paging.shadow.guest_vtable )
-            sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
-        v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn);
-        /* Does this really need map_domain_page_global?  Handle the
-         * error properly if so. */
-        BUG_ON(v->arch.paging.shadow.guest_vtable == NULL); /* XXX */
-    }
-    else
-        v->arch.paging.shadow.guest_vtable = __linear_l2_table;
-#else
-#error this should never happen
 #endif
 
 
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 6f9744a..4921c99 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -122,8 +122,6 @@ struct shadow_vcpu {
     l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
     /* PAE guests: per-vcpu cache of the top-level *guest* entries */
     l3_pgentry_t gl3e[4] __attribute__((__aligned__(32)));
-    /* Non-PAE guests: pointer to guest top-level pagetable */
-    void *guest_vtable;
     /* Last MFN that we emulated a write to as unshadow heuristics. */
     unsigned long last_emulated_mfn_for_unshadow;
     /* MFN of the last shadow that we shot a writeable mapping in */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.