[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-3.0.3-testing] [XEN] Fix race in shadow invlpg



# HG changeset patch
# User Tim Deegan <tim.deegan@xxxxxxxxxxxxx>
# Date 1160647728 -3600
# Node ID 4fdcccd2235213f32e7b2a37cd09206d0c710701
# Parent  bd2be8a8fc7286f44d9fbde172e4a1bacf5cc739
[XEN] Fix race in shadow invlpg
This fixes a crash under mmstress in SMP linux guests, where one vcpu
could remove shadow entries when another was reading them for invlpg.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
 xen/arch/x86/mm/shadow/multi.c   |  134 ++++++++++++++-------------------------
 xen/include/asm-x86/perfc_defn.h |    4 -
 2 files changed, 53 insertions(+), 85 deletions(-)

diff -r bd2be8a8fc72 -r 4fdcccd22352 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Oct 12 10:56:41 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Oct 12 11:08:48 2006 +0100
@@ -1375,80 +1375,6 @@ static int shadow_set_l1e(struct vcpu *v
 
 
 /**************************************************************************/
-/* These functions take a vcpu and a virtual address, and return a pointer
- * to the appropriate level N entry from the shadow tables.  
- * If the necessary tables are not present in the shadow, they return NULL. */
-
-/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
- * more levels than the guest, the upper levels are always fixed and do not 
- * reflect any information from the guest, so we do not use these functions 
- * to access them. */
-
-#if GUEST_PAGING_LEVELS >= 4
-static shadow_l4e_t *
-shadow_get_l4e(struct vcpu *v, unsigned long va)
-{
-    /* Reading the top level table is always valid. */
-    return sh_linear_l4_table(v) + shadow_l4_linear_offset(va);
-}
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-
-#if GUEST_PAGING_LEVELS >= 3
-static shadow_l3e_t *
-shadow_get_l3e(struct vcpu *v, unsigned long va)
-{
-#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
-    /* Get the l4 */
-    shadow_l4e_t *sl4e = shadow_get_l4e(v, va);
-    ASSERT(sl4e != NULL);
-    if ( !(shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT) )
-        return NULL;
-    ASSERT(valid_mfn(shadow_l4e_get_mfn(*sl4e)));
-    /* l4 was present; OK to get the l3 */
-    return sh_linear_l3_table(v) + shadow_l3_linear_offset(va);
-#else /* PAE... */
-    /* Top level is always mapped */
-    ASSERT(v->arch.shadow_vtable);
-    return ((shadow_l3e_t *)v->arch.shadow_vtable) + 
shadow_l3_linear_offset(va);
-#endif 
-}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-
-
-static shadow_l2e_t *
-shadow_get_l2e(struct vcpu *v, unsigned long va)
-{
-#if GUEST_PAGING_LEVELS >= 3  /* 64bit/PAE... */
-    /* Get the l3 */
-    shadow_l3e_t *sl3e = shadow_get_l3e(v, va);
-    if ( sl3e == NULL || !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
-        return NULL;
-    ASSERT(valid_mfn(shadow_l3e_get_mfn(*sl3e)));
-    /* l3 was present; OK to get the l2 */
-#endif
-    return sh_linear_l2_table(v) + shadow_l2_linear_offset(va);
-}
-
-
-#if 0 // avoid the compiler warning for now...
-
-static shadow_l1e_t *
-shadow_get_l1e(struct vcpu *v, unsigned long va)
-{
-    /* Get the l2 */
-    shadow_l2e_t *sl2e = shadow_get_l2e(v, va);
-    if ( sl2e == NULL || !(shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT) )
-        return NULL;
-    ASSERT(valid_mfn(shadow_l2e_get_mfn(*sl2e)));
-    /* l2 was present; OK to get the l1 */
-    return sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
-}
-
-#endif
-
-
-/**************************************************************************/
 /* Macros to walk pagetables.  These take the shadow of a pagetable and 
  * walk every "interesting" entry.  That is, they don't touch Xen mappings, 
  * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every 
@@ -2050,6 +1976,12 @@ sh_make_monitor_table(struct vcpu *v)
  * they are needed.  The "demand" argument is non-zero when handling
  * a demand fault (so we know what to do about accessed bits &c).
  * If the necessary tables are not present in the guest, they return NULL. */
+
+/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
+ * more levels than the guest, the upper levels are always fixed and do not 
+ * reflect any information from the guest, so we do not use these functions 
+ * to access them. */
+
 #if GUEST_PAGING_LEVELS >= 4
 static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v, 
                                                 walk_t *gw, 
@@ -3223,26 +3155,62 @@ sh_invlpg(struct vcpu *v, unsigned long 
  * instruction should be issued on the hardware, or 0 if it's safe not
  * to do so. */
 {
-    shadow_l2e_t *ptr_sl2e = shadow_get_l2e(v, va);
-
-    // XXX -- might be a good thing to prefetch the va into the shadow
-
-    // no need to flush anything if there's no SL2...
-    //
-    if ( !ptr_sl2e )
+    shadow_l2e_t sl2e;
+    
+    perfc_incrc(shadow_invlpg);
+
+    /* First check that we can safely read the shadow l2e.  SMP/PAE linux can
+     * run as high as 6% of invlpg calls where we haven't shadowed the l2 
+     * yet. */
+#if SHADOW_PAGING_LEVELS == 4
+    {
+        shadow_l3e_t sl3e;
+        if ( !(shadow_l4e_get_flags(
+                   sh_linear_l4_table(v)[shadow_l4_linear_offset(va)])
+               & _PAGE_PRESENT) )
+            return 0;
+        /* This must still be a copy-from-user because we don't have the
+         * shadow lock, and the higher-level shadows might disappear
+         * under our feet. */
+        if ( __copy_from_user(&sl3e, (sh_linear_l3_table(v) 
+                                      + shadow_l3_linear_offset(va)),
+                              sizeof (sl3e)) != 0 )
+        {
+            perfc_incrc(shadow_invlpg_fault);
+            return 0;
+        }
+        if ( (!shadow_l3e_get_flags(sl3e) & _PAGE_PRESENT) )
+            return 0;
+    }
+#elif SHADOW_PAGING_LEVELS == 3
+    if ( !(shadow_l3e_get_flags(
+          ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)])
+           & _PAGE_PRESENT) )
+        // no need to flush anything if there's no SL2...
         return 0;
+#endif
+
+    /* This must still be a copy-from-user because we don't have the shadow
+     * lock, and the higher-level shadows might disappear under our feet. */
+    if ( __copy_from_user(&sl2e, 
+                          sh_linear_l2_table(v) + shadow_l2_linear_offset(va),
+                          sizeof (sl2e)) != 0 )
+    {
+        perfc_incrc(shadow_invlpg_fault);
+        return 0;
+    }
 
     // If there's nothing shadowed for this particular sl2e, then
     // there is no need to do an invlpg, either...
     //
-    if ( !(shadow_l2e_get_flags(*ptr_sl2e) & _PAGE_PRESENT) )
+    if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) )
         return 0;
 
     // Check to see if the SL2 is a splintered superpage...
     // If so, then we'll need to flush the entire TLB (because that's
     // easier than invalidating all of the individual 4K pages).
     //
-    if ( (mfn_to_page(shadow_l2e_get_mfn(*ptr_sl2e))->count_info &
+    if ( (mfn_to_page(shadow_l2e_get_mfn(sl2e))->count_info &
           PGC_SH_type_mask) == PGC_SH_fl1_shadow )
     {
         local_flush_tlb();
diff -r bd2be8a8fc72 -r 4fdcccd22352 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Thu Oct 12 10:56:41 2006 +0100
+++ b/xen/include/asm-x86/perfc_defn.h  Thu Oct 12 11:08:48 2006 +0100
@@ -81,8 +81,8 @@ PERFCOUNTER_CPU(shadow_unshadow_bf,    "
 PERFCOUNTER_CPU(shadow_unshadow_bf,    "shadow unshadow brute-force")
 PERFCOUNTER_CPU(shadow_get_page_fail,  "shadow_get_page_from_l1e failed")
 PERFCOUNTER_CPU(shadow_guest_walk,     "shadow walks guest tables")
-PERFCOUNTER_CPU(shadow_walk_cache_hit, "shadow walk-cache hits")
-PERFCOUNTER_CPU(shadow_walk_cache_miss, "shadow walk-cache misses")
+PERFCOUNTER_CPU(shadow_invlpg,         "shadow emulates invlpg")
+PERFCOUNTER_CPU(shadow_invlpg_fault,   "shadow invlpg faults")
 
 
 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.