[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Detect spurious faults taken in the hypervisor that are



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID d78dedc4831f0378335f4e478af61994018e292e
# Parent  dfbf0939350cf8823891c26785c2af15b54e9bcd
Detect spurious faults taken in the hypervisor that are
due to writable pagetable logic.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/mm.c Fri Mar 24 11:14:58 2006
@@ -3351,8 +3351,9 @@
      * permissions in page directories by writing back to the linear mapping.
      */
     if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
-        return !__put_user(
-            pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1);
+        return __put_user(
+            pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1) ?
+            0 : EXCRET_not_a_fault;
 
     /* We are looking only for read-only mappings of p.t. pages. */
     if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/traps.c      Fri Mar 24 11:14:58 2006
@@ -620,6 +620,46 @@
     return 0;
 }
 
+static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+{
+    struct vcpu   *v = current;
+    struct domain *d = v->domain;
+    int            rc;
+
+    /*
+     * The only possible reason for a spurious page fault not to be picked
+     * up already is that a page directory was unhooked by writable page table
+     * logic and then reattached before the faulting VCPU could detect it.
+     */
+    if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
+         IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
+         shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
+         ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault?    */
+        return 0;
+
+    LOCK_BIGLOCK(d);
+
+    /*
+     * The page directory could have been detached again while we weren't
+     * holding the per-domain lock. Detect that and fix up if it's the case.
+     */
+    if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
+         unlikely(l2_linear_offset(addr) ==
+                  d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
+    {
+        ptwr_flush(d, PTWR_PT_ACTIVE);
+        rc = 1;
+    }
+    else
+    {
+        /* Okay, walk the page tables. Only check for not-present faults.*/
+        rc = __spurious_page_fault(addr);
+    }
+
+    UNLOCK_BIGLOCK(d);
+    return rc;
+}
+
 /*
  * #PF error code:
  *  Bit 0: Protection violation (=1) ; Page not present (=0)
@@ -644,6 +684,13 @@
 
     if ( unlikely(!guest_mode(regs)) )
     {
+        if ( spurious_page_fault(addr, regs) )
+        {
+            DPRINTK("Spurious fault in domain %u:%u at addr %lx\n",
+                    current->domain->domain_id, current->vcpu_id, addr);
+            return EXCRET_not_a_fault;
+        }
+
         if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
         {
             perfc_incrc(copy_user_faults);
diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/x86_32/traps.c       Fri Mar 24 11:14:58 2006
@@ -70,38 +70,77 @@
 
 void show_page_walk(unsigned long addr)
 {
+    unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+#ifdef CONFIG_X86_PAE
+    l3_pgentry_t l3e, *l3t;
+#endif
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+
+    printk("Pagetable walk from %08lx:\n", addr);
+
+#ifdef CONFIG_X86_PAE
+    l3t = map_domain_page(mfn);
+    l3e = l3t[l3_table_offset(addr)];
+    mfn = l3e_get_pfn(l3e);
+    pfn = get_gpfn_from_mfn(mfn);
+    printk(" L3 = %"PRIpte" %08lx\n", l3e_get_intpte(l3e), pfn);
+    unmap_domain_page(l3t);
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        return;
+#endif
+
+    l2t = map_domain_page(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    mfn = l2e_get_pfn(l2e);
+    pfn = get_gpfn_from_mfn(mfn);
+    printk("  L2 = %"PRIpte" %08lx %s\n", l2e_get_intpte(l2e), pfn, 
+           (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
+    unmap_domain_page(l2t);
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+         (l2e_get_flags(l2e) & _PAGE_PSE) )
+        return;
+
+    l1t = map_domain_page(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    mfn = l1e_get_pfn(l1e);
+    pfn = get_gpfn_from_mfn(mfn);
+    printk("   L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
+    unmap_domain_page(l1t);
+}
+
+int __spurious_page_fault(unsigned long addr)
+{
     unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-    intpte_t *ptab, ent;
-    unsigned long pfn; 
-
-    printk("Pagetable walk from %08lx:\n", addr);
-
 #ifdef CONFIG_X86_PAE
-    ptab = map_domain_page(mfn);
-    ent  = ptab[l3_table_offset(addr)];
-    pfn  = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT)); 
-    printk(" L3 = %"PRIpte" %08lx\n", ent, pfn);
-    unmap_domain_page(ptab);
-    if ( !(ent & _PAGE_PRESENT) )
-        return;
-    mfn = ent >> PAGE_SHIFT;
+    l3_pgentry_t l3e, *l3t;
 #endif
-
-    ptab = map_domain_page(mfn);
-    ent  = ptab[l2_table_offset(addr)];
-    pfn  = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
-    printk("  L2 = %"PRIpte" %08lx %s\n", ent, pfn, 
-           (ent & _PAGE_PSE) ? "(PSE)" : "");
-    unmap_domain_page(ptab);
-    if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
-        return;
-    mfn = ent >> PAGE_SHIFT;
-
-    ptab = map_domain_page(ent >> PAGE_SHIFT);
-    ent  = ptab[l1_table_offset(addr)];
-    pfn  = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
-    printk("   L1 = %"PRIpte" %08lx\n", ent, pfn);
-    unmap_domain_page(ptab);
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+
+#ifdef CONFIG_X86_PAE
+    l3t = map_domain_page(mfn);
+    l3e = l3t[l3_table_offset(addr)];
+    mfn = l3e_get_pfn(l3e);
+    unmap_domain_page(l3t);
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        return 0;
+#endif
+
+    l2t = map_domain_page(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    mfn = l2e_get_pfn(l2e);
+    unmap_domain_page(l2t);
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+        return 0;
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+        return 1;
+
+    l1t = map_domain_page(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    mfn = l1e_get_pfn(l1e);
+    unmap_domain_page(l1t);
+    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
 }
 
 #define DOUBLEFAULT_STACK_SIZE 1024
diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/x86_64/traps.c       Fri Mar 24 11:14:58 2006
@@ -70,31 +70,79 @@
 
 void show_page_walk(unsigned long addr)
 {
-    unsigned long page = read_cr3();
-    
+    unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+    l4_pgentry_t l4e, *l4t;
+    l3_pgentry_t l3e, *l3t;
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+
     printk("Pagetable walk from %016lx:\n", addr);
 
-    page &= PAGE_MASK;
-    page = ((unsigned long *) __va(page))[l4_table_offset(addr)];
-    printk(" L4 = %016lx\n", page);
-    if ( !(page & _PAGE_PRESENT) )
+    l4t = mfn_to_virt(mfn);
+    l4e = l4t[l4_table_offset(addr)];
+    mfn = l4e_get_pfn(l4e);
+    pfn = get_gpfn_from_mfn(mfn);
+    printk(" L4 = %"PRIpte" %016lx\n", l4e_get_intpte(l4e), pfn);
+    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
         return;
 
-    page &= PAGE_MASK;
-    page = ((unsigned long *) __va(page))[l3_table_offset(addr)];
-    printk("  L3 = %016lx\n", page);
-    if ( !(page & _PAGE_PRESENT) )
+    l3t = mfn_to_virt(mfn);
+    l3e = l3t[l3_table_offset(addr)];
+    mfn = l3e_get_pfn(l3e);
+    pfn = get_gpfn_from_mfn(mfn);
+    printk("  L3 = %"PRIpte" %016lx\n", l3e_get_intpte(l3e), pfn);
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
         return;
 
-    page &= PAGE_MASK;
-    page = ((unsigned long *) __va(page))[l2_table_offset(addr)];
-    printk("   L2 = %016lx %s\n", page, (page & _PAGE_PSE) ? "(2MB)" : "");
-    if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
+    l2t = mfn_to_virt(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    mfn = l2e_get_pfn(l2e);
+    pfn = get_gpfn_from_mfn(mfn);
+    printk("   L2 = %"PRIpte" %016lx %s\n", l2e_get_intpte(l2e), pfn,
+           (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+         (l2e_get_flags(l2e) & _PAGE_PSE) )
         return;
 
-    page &= PAGE_MASK;
-    page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
-    printk("    L1 = %016lx\n", page);
+    l1t = mfn_to_virt(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    mfn = l1e_get_pfn(l1e);
+    pfn = get_gpfn_from_mfn(mfn);
+    printk("    L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
+}
+
+int __spurious_page_fault(unsigned long addr)
+{
+    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
+    l4_pgentry_t l4e, *l4t;
+    l3_pgentry_t l3e, *l3t;
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+
+    l4t = mfn_to_virt(mfn);
+    l4e = l4t[l4_table_offset(addr)];
+    mfn = l4e_get_pfn(l4e);
+    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+        return 0;
+
+    l3t = mfn_to_virt(mfn);
+    l3e = l3t[l3_table_offset(addr)];
+    mfn = l3e_get_pfn(l3e);
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        return 0;
+
+    l2t = mfn_to_virt(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    mfn = l2e_get_pfn(l2e);
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+        return 0;
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+        return 1;
+
+    l1t = mfn_to_virt(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    mfn = l1e_get_pfn(l1e);
+    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
 }
 
 asmlinkage void double_fault(void);
diff -r dfbf0939350c -r d78dedc4831f xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Mar 24 09:59:31 2006
+++ b/xen/include/asm-x86/processor.h   Fri Mar 24 11:14:58 2006
@@ -524,6 +524,7 @@
 void show_stack(struct cpu_user_regs *regs);
 void show_registers(struct cpu_user_regs *regs);
 void show_page_walk(unsigned long addr);
+int __spurious_page_fault(unsigned long addr);
 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
 
 extern void mtrr_ap_init(void);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.