[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Build the phys_to_machine_mapping array in Xen rather than



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 0e7bdd973e17fc36bc3accfb48048665fdab4434
# Parent  0bd023cf351eae5d41b86889d3febb4d41b6df69
Build the phys_to_machine_mapping array in Xen rather than
reusing the 1:1 page table built by the builder, and it also establishes
the 1:1 direct mapping at runtime a la shadow page tables. Since the
builder constructs the full 1:1 direct mapping (but <4GB), the current
implementation wastes memory but cannot support guests with >=4GB
memory (even for x64-64).

This is also required for HVM support on the PAE host. That patch
should be sent soon.

For SVM, I think the svm code needs changes. Please look at the changes
to vmx.c and vmcs.c; should be straightforward.

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Xiaohui Xin <xiaohui.xin@xxxxxxxxx>

diff -r 0bd023cf351e -r 0e7bdd973e17 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Fri Feb  3 10:54:05 2006
+++ b/tools/libxc/xc_hvm_build.c        Fri Feb  3 11:02:30 2006
@@ -167,133 +167,6 @@
 
     return 0;
 }
-
-#ifdef __i386__
-static int zap_mmio_range(int xc_handle, uint32_t dom,
-                          l2_pgentry_32_t *vl2tab,
-                          unsigned long mmio_range_start,
-                          unsigned long mmio_range_size)
-{
-    unsigned long mmio_addr;
-    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
-    unsigned long vl2e;
-    l1_pgentry_32_t *vl1tab;
-
-    mmio_addr = mmio_range_start & PAGE_MASK;
-    for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
-        vl2e = vl2tab[l2_table_offset(mmio_addr)];
-        if (vl2e == 0)
-            continue;
-        vl1tab = xc_map_foreign_range(
-            xc_handle, dom, PAGE_SIZE,
-            PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
-        if ( vl1tab == 0 )
-        {
-            PERROR("Failed zap MMIO range");
-            return -1;
-        }
-        vl1tab[l1_table_offset(mmio_addr)] = 0;
-        munmap(vl1tab, PAGE_SIZE);
-    }
-    return 0;
-}
-
-static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l2tab,
-                           unsigned char e820_map_nr, unsigned char *e820map)
-{
-    unsigned int i;
-    struct e820entry *e820entry = (struct e820entry *)e820map;
-
-    l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                   PROT_READ|PROT_WRITE,
-                                                   l2tab >> PAGE_SHIFT);
-    if ( vl2tab == 0 )
-        return -1;
-
-    for ( i = 0; i < e820_map_nr; i++ )
-    {
-        if ( (e820entry[i].type == E820_IO) &&
-             (zap_mmio_range(xc_handle, dom, vl2tab,
-                             e820entry[i].addr, e820entry[i].size) == -1))
-            return -1;
-    }
-
-    munmap(vl2tab, PAGE_SIZE);
-    return 0;
-}
-#else
-static int zap_mmio_range(int xc_handle, uint32_t dom,
-                          l3_pgentry_t *vl3tab,
-                          unsigned long mmio_range_start,
-                          unsigned long mmio_range_size)
-{
-    unsigned long mmio_addr;
-    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
-    unsigned long vl2e = 0;
-    unsigned long vl3e;
-    l1_pgentry_t *vl1tab;
-    l2_pgentry_t *vl2tab;
-
-    mmio_addr = mmio_range_start & PAGE_MASK;
-    for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
-    {
-        vl3e = vl3tab[l3_table_offset(mmio_addr)];
-        if ( vl3e == 0 )
-            continue;
-
-        vl2tab = xc_map_foreign_range(
-            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
-        if ( vl2tab == NULL )
-        {
-            PERROR("Failed zap MMIO range");
-            return -1;
-        }
-
-        vl2e = vl2tab[l2_table_offset(mmio_addr)];
-        if ( vl2e == 0 )
-        {
-            munmap(vl2tab, PAGE_SIZE);
-            continue;
-        }
-
-        vl1tab = xc_map_foreign_range(
-            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
-        if ( vl1tab == NULL )
-        {
-            PERROR("Failed zap MMIO range");
-            munmap(vl2tab, PAGE_SIZE);
-            return -1;
-        }
-
-        vl1tab[l1_table_offset(mmio_addr)] = 0;
-        munmap(vl2tab, PAGE_SIZE);
-        munmap(vl1tab, PAGE_SIZE);
-    }
-    return 0;
-}
-
-static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l3tab,
-                           unsigned char e820_map_nr, unsigned char *e820map)
-{
-    unsigned int i;
-    struct e820entry *e820entry = (struct e820entry *)e820map;
-
-    l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l3tab >> PAGE_SHIFT);
-    if (vl3tab == 0)
-        return -1;
-    for ( i = 0; i < e820_map_nr; i++ ) {
-        if ( (e820entry[i].type == E820_IO) &&
-             (zap_mmio_range(xc_handle, dom, vl3tab,
-                             e820entry[i].addr, e820entry[i].size) == -1) )
-            return -1;
-    }
-    munmap(vl3tab, PAGE_SIZE);
-    return 0;
-}
-
-#endif
 
 static int setup_guest(int xc_handle,
                        uint32_t dom, int memsize,
@@ -308,15 +181,8 @@
                        unsigned int store_evtchn,
                        unsigned long *store_mfn)
 {
-    l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
-    l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
     unsigned long *page_array = NULL;
-#ifdef __x86_64__
-    l3_pgentry_t *vl3tab=NULL;
-    unsigned long l3tab;
-#endif
-    unsigned long l2tab = 0;
-    unsigned long l1tab = 0;
+
     unsigned long count, i;
     shared_info_t *shared_info;
     void *e820_page;
@@ -325,7 +191,6 @@
     int rc;
 
     unsigned long nr_pt_pages;
-    unsigned long ppt_alloc;
 
     struct domain_setup_info dsi;
     unsigned long vpt_start;
@@ -391,120 +256,6 @@
     if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
-    /* First allocate page for page dir or pdpt */
-    ppt_alloc = vpt_start >> PAGE_SHIFT;
-    if ( page_array[ppt_alloc] > 0xfffff )
-    {
-        unsigned long nmfn;
-        nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
-        if ( nmfn == 0 )
-        {
-            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-            goto error_out;
-        }
-        page_array[ppt_alloc] = nmfn;
-    }
-
-#ifdef __i386__
-    l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-    ctxt->ctrlreg[3] = l2tab;
-
-    if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                        PROT_READ|PROT_WRITE,
-                                        l2tab >> PAGE_SHIFT)) == NULL )
-        goto error_out;
-    memset(vl2tab, 0, PAGE_SIZE);
-    vl2e = &vl2tab[l2_table_offset(0)];
-    for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
-    {
-        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
-        {
-            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-            if ( vl1tab != NULL )
-                munmap(vl1tab, PAGE_SIZE);
-            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l1tab >> PAGE_SHIFT)) == NULL )
-            {
-                munmap(vl2tab, PAGE_SIZE);
-                goto error_out;
-            }
-            memset(vl1tab, 0, PAGE_SIZE);
-            vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
-            *vl2e++ = l1tab | L2_PROT;
-        }
-
-        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
-        vl1e++;
-    }
-    munmap(vl1tab, PAGE_SIZE);
-    munmap(vl2tab, PAGE_SIZE);
-#else
-    l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-    ctxt->ctrlreg[3] = l3tab;
-
-    if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                        PROT_READ|PROT_WRITE,
-                                        l3tab >> PAGE_SHIFT)) == NULL )
-        goto error_out;
-    memset(vl3tab, 0, PAGE_SIZE);
-
-    /* Fill in every PDPT entry. */
-    for ( i = 0; i < L3_PAGETABLE_ENTRIES_PAE; i++ )
-    {
-        l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-        if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                            PROT_READ|PROT_WRITE,
-                                            l2tab >> PAGE_SHIFT)) == NULL )
-            goto error_out;
-        memset(vl2tab, 0, PAGE_SIZE);
-        munmap(vl2tab, PAGE_SIZE);
-        vl2tab = NULL;
-        vl3tab[i] = l2tab | L3_PROT;
-    }
-
-    for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
-    {
-        if ( !(count & ((1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)) - 1)) 
)
-        {
-            l2tab = vl3tab[count >> (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]
-                    & PAGE_MASK;
-
-            if (vl2tab != NULL)
-                munmap(vl2tab, PAGE_SIZE);
-
-            if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l2tab >> PAGE_SHIFT)) == NULL )
-                goto error_out;
-
-            vl2e = &vl2tab[l2_table_offset(count << PAGE_SHIFT)];
-        }
-        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
-        {
-            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-            if ( vl1tab != NULL )
-                munmap(vl1tab, PAGE_SIZE);
-            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l1tab >> PAGE_SHIFT)) == NULL )
-            {
-                munmap(vl2tab, PAGE_SIZE);
-                goto error_out;
-            }
-            memset(vl1tab, 0, PAGE_SIZE);
-            vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
-            *vl2e++ = l1tab | L2_PROT;
-        }
-
-        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
-        vl1e++;
-    }
-
-    munmap(vl1tab, PAGE_SIZE);
-    munmap(vl2tab, PAGE_SIZE);
-    munmap(vl3tab, PAGE_SIZE);
-#endif
     /* Write the machine->phys table entries. */
     for ( count = 0; count < nr_pages; count++ )
     {
@@ -525,14 +276,6 @@
         goto error_out;
     memset(e820_page, 0, PAGE_SIZE);
     e820_map_nr = build_e820map(e820_page, v_end);
-#if defined (__i386__)
-    if (zap_mmio_ranges(xc_handle, dom, l2tab, e820_map_nr,
-                        ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
-#else
-    if (zap_mmio_ranges(xc_handle, dom, l3tab, e820_map_nr,
-                        ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
-#endif
-        goto error_out;
     munmap(e820_page, PAGE_SIZE);
 
     /* shared_info page starts its life empty. */
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Feb  3 10:54:05 2006
+++ b/xen/arch/x86/hvm/hvm.c    Fri Feb  3 11:02:30 2006
@@ -53,6 +53,20 @@
 
 struct hvm_function_table hvm_funcs;
 
+static void vmx_zap_mmio_range(
+    struct domain *d, unsigned long pfn, unsigned long nr_pfn)
+{
+    unsigned long i, val = INVALID_MFN;
+
+    for ( i = 0; i < nr_pfn; i++ )
+    {
+        if ( pfn + i >= 0xfffff ) 
+            break;
+        
+        __copy_to_user(&phys_to_machine_mapping[pfn + i], &val, sizeof (val));
+    }
+}
+
 static void hvm_map_io_shared_page(struct domain *d)
 {
     int i;
@@ -84,8 +98,12 @@
         if (e820entry[i].type == E820_SHARED_PAGE)
         {
             gpfn = (e820entry[i].addr >> PAGE_SHIFT);
-            break;
         }
+        if ( e820entry[i].type == E820_IO )
+            vmx_zap_mmio_range(
+                d, 
+                e820entry[i].addr >> PAGE_SHIFT,
+                e820entry[i].size >> PAGE_SHIFT);
     }
 
     if ( gpfn == 0 ) {
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Fri Feb  3 10:54:05 2006
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Fri Feb  3 11:02:30 2006
@@ -34,7 +34,8 @@
 #include <asm/flushtlb.h>
 #include <xen/event.h>
 #include <xen/kernel.h>
-#if CONFIG_PAGING_LEVELS >= 4
+#include <asm/shadow.h>
+#if CONFIG_PAGING_LEVELS >= 3
 #include <asm/shadow_64.h>
 #endif
 
@@ -218,6 +219,7 @@
     error |= __vmwrite(GUEST_TR_BASE, 0);
     error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
 
+    shadow_direct_map_init(v);
     __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
     __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
     __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Feb  3 10:54:05 2006
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri Feb  3 11:02:30 2006
@@ -563,7 +563,12 @@
     }
 #endif
 
-    if (!vmx_paging_enabled(current)){
+    if ( !vmx_paging_enabled(current) )
+    {
+        /* construct 1-to-1 direct mapping */
+        if ( shadow_direct_map_fault(va, regs) ) 
+            return 1;
+
         handle_mmio(va, va);
         TRACE_VMEXIT (2,2);
         return 1;
@@ -1212,6 +1217,9 @@
                 __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
             }
         }
+#endif
+#if CONFIG_PAGING_LEVELS == 2
+        shadow_direct_map_clean(v);
 #endif
         /*
          * Now arch.guest_table points to machine physical.
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Fri Feb  3 10:54:05 2006
+++ b/xen/arch/x86/shadow.c     Fri Feb  3 11:02:30 2006
@@ -2858,7 +2858,7 @@
     if (!page)
         domain_crash_synchronous();
 
-    for (count = 0; count < PDP_ENTRIES; count++)
+    for ( count = 0; count < PAE_L3_PAGETABLE_ENTRIES; count++ )
     {
         sl2mfn = page_to_mfn(page+count);
         l2 = map_domain_page(sl2mfn);
@@ -3568,6 +3568,7 @@
     shadow_unlock(d);
 }
 
+
 #if CONFIG_PAGING_LEVELS == 4
 static unsigned long gva_to_gpa_64(unsigned long gva)
 {
@@ -3637,6 +3638,79 @@
 
 #endif
 
+#if CONFIG_PAGING_LEVELS == 3 ||                                \
+    ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) )
+
+/* 
+ * Use GUEST_PGENTRY_32 to force PAE_SHADOW_SELF_ENTRY for L4.
+ *
+ * Very simple shadow code to handle 1:1 direct mapping for guest 
+ * non-paging code, which actually is running in PAE/vm86 mode with 
+ * paging-enabled.
+ *
+ * We expect that the top level (L3) page has been allocated and initialized.
+ */
+int shadow_direct_map_fault(unsigned long vpa, struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct domain *d = v->domain;
+    l3_pgentry_t sl3e;
+    l2_pgentry_t sl2e;
+    l1_pgentry_t sl1e;
+    unsigned long mfn, smfn;
+    struct page_info *page;
+
+    /*
+     * If the faulting address is within the MMIO range, we continue
+     * on handling the #PF as such.
+     */
+    if ( (mfn = get_mfn_from_gpfn(vpa >> PAGE_SHIFT)) == INVALID_MFN )
+    {
+         goto fail;
+    }
+
+    shadow_lock(d);
+
+    __shadow_get_l3e(v, vpa, &sl3e);
+
+    if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT) ) 
+    {
+        page = alloc_domheap_page(NULL);
+        if ( !page )
+            goto fail; 
+        smfn = page_to_mfn(page);
+        sl3e = l3e_from_pfn(smfn, _PAGE_PRESENT);
+        __shadow_set_l3e(v, vpa, &sl3e);
+    }
+
+    __shadow_get_l2e(v, vpa, &sl2e);
+
+    if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) 
+    {
+        page = alloc_domheap_page(NULL);
+        if ( !page )
+            goto fail; 
+        smfn = page_to_mfn(page);
+
+        sl2e = l2e_from_pfn(smfn, __PAGE_HYPERVISOR | _PAGE_USER);
+        __shadow_set_l2e(v, vpa, &sl2e);
+    }
+
+    __shadow_get_l1e(v, vpa, &sl1e);
+        
+    if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) ) 
+    {
+        sl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR | _PAGE_USER);
+        __shadow_set_l1e(v, vpa, &sl1e);
+    } 
+
+    shadow_unlock(d);
+    return EXCRET_fault_fixed;
+
+fail:
+    return 0;
+}
+#endif
 
 /*
  * Local variables:
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Fri Feb  3 10:54:05 2006
+++ b/xen/arch/x86/shadow32.c   Fri Feb  3 11:02:30 2006
@@ -42,6 +42,8 @@
 #if SHADOW_DEBUG
 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned 
long gpfn);
 #endif
+
+static void free_p2m_table(struct vcpu *v);
 
 /********
 
@@ -746,19 +748,18 @@
             l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
                           __PAGE_HYPERVISOR);
 
-    // map the phys_to_machine map into the Read-Only MPT space for this domain
-    mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
-        l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
-                        __PAGE_HYPERVISOR);
-
     // Don't (yet) have mappings for these...
     // Don't want to accidentally see the idle_pg_table's linear mapping.
     //
     mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
     mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
+    mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = l2e_empty();
 
     v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
     v->arch.monitor_vtable = mpl2e;
+
+    if ( v->vcpu_id == 0 )
+        alloc_p2m_table(d);
 }
 
 /*
@@ -791,6 +792,9 @@
         ASSERT(mfn);
         put_shadow_ref(mfn);
     }
+
+    if ( v->vcpu_id == 0 )
+        free_p2m_table(v);
 
     /*
      * Then free monitor_table.
@@ -844,67 +848,209 @@
     return 1;
 }
 
-static int
+int
 alloc_p2m_table(struct domain *d)
 {
     struct list_head *list_ent;
-    struct page_info *page, *l2page;
-    l2_pgentry_t *l2;
-    unsigned long mfn, pfn;
-    struct domain_mmap_cache l1cache, l2cache;
-
-    l2page = alloc_domheap_page(NULL);
-    if ( l2page == NULL )
-        return 0;
-
-    domain_mmap_cache_init(&l1cache);
-    domain_mmap_cache_init(&l2cache);
-
-    d->arch.phys_table = mk_pagetable(page_to_maddr(l2page));
-    l2 = map_domain_page_with_cache(page_to_mfn(l2page), &l2cache);
-    memset(l2, 0, PAGE_SIZE);
-    unmap_domain_page_with_cache(l2, &l2cache);
+    unsigned long va = RO_MPT_VIRT_START;   /* phys_to_machine_mapping */
+
+    l2_pgentry_t *l2tab = NULL;
+    l1_pgentry_t *l1tab = NULL;
+    unsigned long *l0tab = NULL;
+    l2_pgentry_t l2e = { 0 };
+    l1_pgentry_t l1e = { 0 };
+
+    unsigned long pfn;
+    int i;
+
+    ASSERT ( pagetable_get_pfn(d->vcpu[0]->arch.monitor_table));
+
+    l2tab = map_domain_page(
+        pagetable_get_pfn(d->vcpu[0]->arch.monitor_table));
 
     list_ent = d->page_list.next;
-    while ( list_ent != &d->page_list )
-    {
+
+    for ( i = 0; list_ent != &d->page_list; i++ )
+    {
+        struct page_info *page;
         page = list_entry(list_ent, struct page_info, list);
-        mfn = page_to_mfn(page);
-        pfn = get_gpfn_from_mfn(mfn);
-        ASSERT(pfn != INVALID_M2P_ENTRY);
-        ASSERT(pfn < (1u<<20));
-
-        set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
-
-        list_ent = page->list.next;
-    }
-
-    list_ent = d->xenpage_list.next;
-    while ( list_ent != &d->xenpage_list )
-    {
-        page = list_entry(list_ent, struct page_info, list);
-        mfn = page_to_mfn(page);
-        pfn = get_gpfn_from_mfn(mfn);
-        if ( (pfn != INVALID_M2P_ENTRY) &&
-             (pfn < (1u<<20)) )
-        {
-            set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
-        }
-
-        list_ent = page->list.next;
-    }
-
-    domain_mmap_cache_destroy(&l2cache);
-    domain_mmap_cache_destroy(&l1cache);
+        pfn = page_to_mfn(page);
+
+        l2e = l2tab[l2_table_offset(va)];
+        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+        {
+            page = alloc_domheap_page(NULL);
+
+            if ( !l1tab )
+                unmap_domain_page(l1tab);
+            l1tab = map_domain_page(page_to_mfn(page));
+            memset(l1tab, 0, PAGE_SIZE);
+            l2e = l2tab[l2_table_offset(va)] =
+                l2e_from_page(page, __PAGE_HYPERVISOR);
+        }
+        else if ( l1tab == NULL)
+            l1tab = map_domain_page(l2e_get_pfn(l2e));
+
+        l1e = l1tab[l1_table_offset(va)];
+        if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
+        {
+            page = alloc_domheap_page(NULL);
+            if ( !l0tab  )
+                unmap_domain_page(l0tab);
+            l0tab = map_domain_page(page_to_mfn(page));
+            memset(l0tab, 0, PAGE_SIZE );
+            l1e = l1tab[l1_table_offset(va)] =
+                l1e_from_page(page, __PAGE_HYPERVISOR);
+        }
+        else if ( l0tab == NULL)
+            l0tab = map_domain_page(l1e_get_pfn(l1e));
+
+        l0tab[i & ((1 << PAGETABLE_ORDER) - 1) ] = pfn;
+        list_ent = frame_table[pfn].list.next;
+        va += sizeof(pfn);
+    }
+
+    unmap_domain_page(l2tab);
+    unmap_domain_page(l1tab);
+    unmap_domain_page(l0tab);
 
     return 1;
 }
 
-static void
-free_p2m_table(struct domain *d)
-{
-    // uh, this needs some work...  :)
-    BUG();
+static void 
+free_p2m_table(struct vcpu *v)
+{
+    unsigned long va;
+    l2_pgentry_t *l2tab;
+    l1_pgentry_t *l1tab;
+    l2_pgentry_t l2e;
+    l1_pgentry_t l1e;
+
+    ASSERT ( pagetable_get_pfn(v->arch.monitor_table) );
+
+    l2tab = map_domain_page(
+        pagetable_get_pfn(v->arch.monitor_table));
+
+    for ( va = RO_MPT_VIRT_START; va < RO_MPT_VIRT_END; )
+    {
+        int i;
+
+        l2e = l2tab[l2_table_offset(va)];
+        if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+        {
+            l1tab = map_domain_page(l2e_get_pfn(l2e));
+            for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+            {
+                l1e = l1tab[l1_table_offset(va)];
+
+                if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
+                    free_domheap_page(mfn_to_page(l1e_get_pfn(l1e)));
+                va += PAGE_SIZE; 
+            }
+            unmap_domain_page(l1tab);
+            free_domheap_page(mfn_to_page(l2e_get_pfn(l2e)));
+        }
+    }
+    unmap_domain_page(l2tab);
+}
+
+int shadow_direct_map_fault(unsigned long vpa, struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct domain *d = v->domain;
+    l2_pgentry_t sl2e;
+    l1_pgentry_t sl1e;
+    l1_pgentry_t *sple = NULL;
+    unsigned long mfn, smfn;
+    struct page_info *page;
+
+    /*
+     * If the faulting address is within the MMIO range, we continue
+     * on handling the #PF as such.
+     */
+    if ( (mfn = get_mfn_from_gpfn(vpa >> PAGE_SHIFT)) == INVALID_MFN )
+    {
+         goto fail;
+    }
+
+    shadow_lock(d);
+
+    __shadow_get_l2e(v, vpa, &sl2e);
+
+   if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
+    {
+        page = alloc_domheap_page(NULL);
+        if ( !page )
+            goto fail;
+
+        smfn = page_to_mfn(page);
+        sl2e = l2e_from_pfn(smfn, __PAGE_HYPERVISOR | _PAGE_USER);
+        __shadow_set_l2e(v, vpa, sl2e);
+    }
+
+    sple = (l1_pgentry_t *)map_domain_page(l2e_get_pfn(sl2e));
+    sl1e = sple[l1_table_offset(vpa)];
+
+    if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) )
+    {
+        sl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR | _PAGE_USER);
+        sple[l1_table_offset(vpa)] = sl1e;
+    }
+    unmap_domain_page(sple);
+    shadow_unlock(d);
+
+    return EXCRET_fault_fixed;
+
+fail:
+    return 0;
+}
+
+
+int shadow_direct_map_init(struct vcpu *v)
+{
+    struct page_info *page;
+    l2_pgentry_t *root;
+
+    if ( !(page = alloc_domheap_page(NULL)) )
+        goto fail;
+
+    root = map_domain_page_global(page_to_mfn(page));
+    memset(root, 0, PAGE_SIZE);
+
+    v->domain->arch.phys_table = mk_pagetable(page_to_maddr(page));
+    /* 
+     * We need to set shadow_vtable to get __shadow_set/get_xxx
+     * working
+     */
+    v->arch.shadow_vtable = (l2_pgentry_t *) root;
+    v->arch.shadow_table = mk_pagetable(0);
+    return 1;
+
+fail:
+    return 0;
+}
+
+void shadow_direct_map_clean(struct vcpu *v)
+{
+    int i;
+    l2_pgentry_t *l2e;
+
+    ASSERT ( v->arch.shadow_vtable );
+
+    l2e = v->arch.shadow_vtable;
+
+    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+    {
+        if ( l2e_get_flags(l2e[i]) & _PAGE_PRESENT )
+            free_domheap_page(mfn_to_page(l2e_get_pfn(l2e[i])));
+    }
+
+    free_domheap_page(
+            mfn_to_page(pagetable_get_pfn(v->domain->arch.phys_table)));
+
+    unmap_domain_page_global(v->arch.shadow_vtable);
+    v->arch.shadow_vtable = 0;
+    v->domain->arch.phys_table = mk_pagetable(0);
 }
 
 int __shadow_mode_enable(struct domain *d, unsigned int mode)
@@ -1092,11 +1238,7 @@
         xfree(d->arch.shadow_dirty_bitmap);
         d->arch.shadow_dirty_bitmap = NULL;
     }
-    if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
-         pagetable_get_paddr(d->arch.phys_table) )
-    {
-        free_p2m_table(d);
-    }
+
     return -ENOMEM;
 }
 
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Fri Feb  3 10:54:05 2006
+++ b/xen/arch/x86/shadow_public.c      Fri Feb  3 11:02:30 2006
@@ -37,11 +37,74 @@
 #if CONFIG_PAGING_LEVELS == 4
 extern struct shadow_ops MODE_F_HANDLER;
 extern struct shadow_ops MODE_D_HANDLER;
+
+static void free_p2m_table(struct vcpu *v);
 #endif
 
 extern struct shadow_ops MODE_A_HANDLER;
 
 #define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - 
((_encoded) >> 16))
+
+
+int shadow_direct_map_init(struct vcpu *v)
+{
+    struct page_info *page;
+    l3_pgentry_t *root;
+
+    if ( !(page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA)) )
+        goto fail;
+
+    root = map_domain_page_global(page_to_mfn(page));
+    memset(root, 0, PAGE_SIZE);
+    root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR);
+
+    v->domain->arch.phys_table = mk_pagetable(page_to_maddr(page));
+    /* 
+     * We need to set shadow_vtable to get __shadow_set/get_xxx
+     * working
+     */
+    v->arch.shadow_vtable = (l2_pgentry_t *) root;
+
+    return 1;
+    
+fail:
+    return 0;
+}
+
+static void shadow_direct_map_clean(struct vcpu *v)
+{
+    l2_pgentry_t *l2e;
+    l3_pgentry_t *l3e;
+    int i, j;
+
+    ASSERT ( v->arch.shadow_vtable );
+
+    l3e = (l3_pgentry_t *) v->arch.shadow_vtable;
+    
+    for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
+    {
+        if ( l3e_get_flags(l3e[i]) & _PAGE_PRESENT )
+        {
+            l2e = map_domain_page(l3e_get_pfn(l3e[i]));
+
+            for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
+            {
+                if ( l2e_get_flags(l2e[j]) & _PAGE_PRESENT )
+                    free_domheap_page(mfn_to_page(l2e_get_pfn(l2e[j])));
+            }
+            unmap_domain_page(l2e);
+            free_domheap_page(mfn_to_page(l3e_get_pfn(l3e[i])));
+        }
+    }
+
+    free_domheap_page(
+        mfn_to_page(pagetable_get_pfn(v->domain->arch.phys_table)));
+
+    unmap_domain_page_global(v->arch.shadow_vtable);
+    v->arch.shadow_vtable = 0;
+    v->domain->arch.phys_table = mk_pagetable(0);
+}
+
 /****************************************************************************/
 /************* export interface functions ***********************************/
 /****************************************************************************/
@@ -49,7 +112,12 @@
 
 int shadow_set_guest_paging_levels(struct domain *d, int levels)
 {
+    struct vcpu *v = current;
     shadow_lock(d);
+
+    if ( shadow_mode_translate(d) && 
+         !(pagetable_get_paddr(v->domain->arch.phys_table)) )
+         shadow_direct_map_clean(v);
 
     switch(levels) {
 #if CONFIG_PAGING_LEVELS >= 4
@@ -171,7 +239,7 @@
     if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
     {
         struct page_info *page = mfn_to_page(smfn);
-        for ( i = 0; i < PDP_ENTRIES; i++ )
+        for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
         {
             if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
                 free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
@@ -229,48 +297,12 @@
 #endif
 
 #if CONFIG_PAGING_LEVELS == 4
-/*
- * Convert PAE 3-level page-table to 4-level page-table
- */
-static pagetable_t page_table_convert(struct domain *d)
-{
-    struct page_info *l4page, *l3page;
-    l4_pgentry_t *l4;
-    l3_pgentry_t *l3, *pae_l3;
-    int i;
-
-    l4page = alloc_domheap_page(NULL);
-    if (l4page == NULL)
-        domain_crash_synchronous();
-    l4 = map_domain_page(page_to_mfn(l4page));
-    memset(l4, 0, PAGE_SIZE);
-
-    l3page = alloc_domheap_page(NULL);
-    if (l3page == NULL)
-        domain_crash_synchronous();
-    l3 = map_domain_page(page_to_mfn(l3page));
-    memset(l3, 0, PAGE_SIZE);
-
-    l4[0] = l4e_from_page(l3page, __PAGE_HYPERVISOR);
-
-    pae_l3 = map_domain_page(pagetable_get_pfn(d->arch.phys_table));
-    for (i = 0; i < PDP_ENTRIES; i++)
-        l3[i] = l3e_from_pfn(l3e_get_pfn(pae_l3[i]), __PAGE_HYPERVISOR);
-    unmap_domain_page(pae_l3);
-
-    unmap_domain_page(l4);
-    unmap_domain_page(l3);
-
-    return mk_pagetable(page_to_maddr(l4page));
-}
-
 static void alloc_monitor_pagetable(struct vcpu *v)
 {
     unsigned long mmfn;
     l4_pgentry_t *mpl4e;
     struct page_info *mmfn_info;
     struct domain *d = v->domain;
-    pagetable_t phys_table;
 
     ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get 
called once */
 
@@ -284,13 +316,13 @@
         l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
 
     /* map the phys_to_machine map into the per domain Read-Only MPT space */
-    phys_table = page_table_convert(d);
-    mpl4e[l4_table_offset(RO_MPT_VIRT_START)] =
-        l4e_from_paddr(pagetable_get_paddr(phys_table),
-                       __PAGE_HYPERVISOR);
 
     v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
     v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
+    mpl4e[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+
+    if ( v->vcpu_id == 0 )
+        alloc_p2m_table(d);
 }
 
 void free_monitor_pagetable(struct vcpu *v)
@@ -300,99 +332,8 @@
     /*
      * free monitor_table.
      */
-    mfn = pagetable_get_pfn(v->arch.monitor_table);
-    unmap_domain_page_global(v->arch.monitor_vtable);
-    free_domheap_page(mfn_to_page(mfn));
-
-    v->arch.monitor_table = mk_pagetable(0);
-    v->arch.monitor_vtable = 0;
-}
-
-#elif CONFIG_PAGING_LEVELS == 3
-
-static void alloc_monitor_pagetable(struct vcpu *v)
-{
-    BUG(); /* PAE not implemented yet */
-}
-
-void free_monitor_pagetable(struct vcpu *v)
-{
-    BUG(); /* PAE not implemented yet */
-}
-
-#elif CONFIG_PAGING_LEVELS == 2
-
-static void alloc_monitor_pagetable(struct vcpu *v)
-{
-    unsigned long mmfn;
-    l2_pgentry_t *mpl2e;
-    struct page_info *mmfn_info;
-    struct domain *d = v->domain;
-    int i;
-
-    ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
-
-    mmfn_info = alloc_domheap_page(NULL);
-    ASSERT(mmfn_info != NULL);
-
-    mmfn = page_to_mfn(mmfn_info);
-    mpl2e = (l2_pgentry_t *)map_domain_page_global(mmfn);
-    memset(mpl2e, 0, PAGE_SIZE);
-
-    memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
-           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
-           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
-
-    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
-        mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
-            l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
-                          __PAGE_HYPERVISOR);
-
-    // map the phys_to_machine map into the Read-Only MPT space for this domain
-    mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
-        l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
-                       __PAGE_HYPERVISOR);
-
-    // Don't (yet) have mappings for these...
-    // Don't want to accidentally see the idle_pg_table's linear mapping.
-    //
-    mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
-    mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
-
-    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
-    v->arch.monitor_vtable = mpl2e;
-}
-
-/*
- * Free the pages for monitor_table and hl2_table
- */
-void free_monitor_pagetable(struct vcpu *v)
-{
-    l2_pgentry_t *mpl2e, hl2e, sl2e;
-    unsigned long mfn;
-
-    ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
-
-    mpl2e = v->arch.monitor_vtable;
-
-    /*
-     * First get the mfn for hl2_table by looking at monitor_table
-     */
-    hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
-    if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
-    {
-        mfn = l2e_get_pfn(hl2e);
-        ASSERT(mfn);
-        put_shadow_ref(mfn);
-    }
-
-    sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
-    if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
-    {
-        mfn = l2e_get_pfn(sl2e);
-        ASSERT(mfn);
-        put_shadow_ref(mfn);
-    }
+    if ( v->vcpu_id == 0 )
+        free_p2m_table(v);
 
     /*
      * Then free monitor_table.
@@ -403,6 +344,17 @@
 
     v->arch.monitor_table = mk_pagetable(0);
     v->arch.monitor_vtable = 0;
+}
+#elif CONFIG_PAGING_LEVELS == 3
+
+static void alloc_monitor_pagetable(struct vcpu *v)
+{
+    BUG(); /* PAE not implemented yet */
+}
+
+void free_monitor_pagetable(struct vcpu *v)
+{
+    BUG(); /* PAE not implemented yet */
 }
 #endif
 
@@ -939,14 +891,6 @@
 
     for_each_vcpu(d, v)
         update_pagetables(v);
-}
-
-
-static void
-free_p2m_table(struct domain *d)
-{
-    // uh, this needs some work...  :)
-    BUG();
 }
 
 
@@ -1143,11 +1087,7 @@
         xfree(d->arch.shadow_dirty_bitmap);
         d->arch.shadow_dirty_bitmap = NULL;
     }
-    if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
-         pagetable_get_paddr(d->arch.phys_table) )
-    {
-        free_p2m_table(d);
-    }
+
     return -ENOMEM;
 }
 
@@ -1375,57 +1315,221 @@
 alloc_p2m_table(struct domain *d)
 {
     struct list_head *list_ent;
-    struct page_info *page, *l2page;
-    l2_pgentry_t *l2;
-    unsigned long mfn, pfn;
-    struct domain_mmap_cache l1cache, l2cache;
-
-    l2page = alloc_domheap_page(NULL);
-    if ( l2page == NULL )
-        return 0;
-
-    domain_mmap_cache_init(&l1cache);
-    domain_mmap_cache_init(&l2cache);
-
-    d->arch.phys_table = mk_pagetable(page_to_maddr(l2page));
-    l2 = map_domain_page_with_cache(page_to_mfn(l2page), &l2cache);
-    memset(l2, 0, PAGE_SIZE);
-    unmap_domain_page_with_cache(l2, &l2cache);
+    unsigned long va = RO_MPT_VIRT_START; /*  phys_to_machine_mapping */
+//    unsigned long va = PML4_ADDR(264);
+
+#if CONFIG_PAGING_LEVELS >= 4
+    l4_pgentry_t *l4tab = NULL;
+    l4_pgentry_t l4e = { 0 };
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    l3_pgentry_t *l3tab = NULL;
+    l3_pgentry_t l3e = { 0 };
+#endif
+    l2_pgentry_t *l2tab = NULL;
+    l1_pgentry_t *l1tab = NULL;
+    unsigned long *l0tab = NULL;
+    l2_pgentry_t l2e = { 0 };
+    l1_pgentry_t l1e = { 0 };
+
+    unsigned long pfn;
+    int i;
+
+    ASSERT ( pagetable_get_pfn(d->vcpu[0]->arch.monitor_table) );
+
+#if CONFIG_PAGING_LEVELS >= 4
+    l4tab = map_domain_page(
+        pagetable_get_pfn(d->vcpu[0]->arch.monitor_table));
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    l3tab = map_domain_page(
+        pagetable_get_pfn(d->vcpu[0]->arch.monitor_table));
+#endif
 
     list_ent = d->page_list.next;
-    while ( list_ent != &d->page_list )
-    {
+
+    for ( i = 0; list_ent != &d->page_list; i++ ) 
+    {
+        struct page_info *page;
+
         page = list_entry(list_ent, struct page_info, list);
-        mfn = page_to_mfn(page);
-        pfn = get_gpfn_from_mfn(mfn);
-        ASSERT(pfn != INVALID_M2P_ENTRY);
-        ASSERT(pfn < (1u<<20));
-
-        set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
-
-        list_ent = page->list.next;
-    }
-
-    list_ent = d->xenpage_list.next;
-    while ( list_ent != &d->xenpage_list )
-    {
-        page = list_entry(list_ent, struct page_info, list);
-        mfn = page_to_mfn(page);
-        pfn = get_gpfn_from_mfn(mfn);
-        if ( (pfn != INVALID_M2P_ENTRY) &&
-             (pfn < (1u<<20)) )
-        {
-            set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
-        }
-
-        list_ent = page->list.next;
-    }
-
-    domain_mmap_cache_destroy(&l2cache);
-    domain_mmap_cache_destroy(&l1cache);
+        pfn = page_to_mfn(page);
+
+#if CONFIG_PAGING_LEVELS >= 4
+        l4e = l4tab[l4_table_offset(va)];
+        if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) 
+        {
+            page = alloc_domheap_page(NULL);
+
+            if ( !l3tab )
+                unmap_domain_page(l3tab);
+
+            l3tab = map_domain_page(page_to_mfn(page));
+            memset(l3tab, 0, PAGE_SIZE);
+            l4e = l4tab[l4_table_offset(va)] = 
+                l4e_from_page(page, __PAGE_HYPERVISOR);
+        } 
+        else if ( l3tab == NULL)
+            l3tab = map_domain_page(l4e_get_pfn(l4e));
+#endif
+        l3e = l3tab[l3_table_offset(va)];
+        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) 
+        {
+            page = alloc_domheap_page(NULL);
+            if ( !l2tab )
+                unmap_domain_page(l2tab);
+
+            l2tab = map_domain_page(page_to_mfn(page));
+            memset(l2tab, 0, PAGE_SIZE);
+            l3e = l3tab[l3_table_offset(va)] = 
+                l3e_from_page(page, __PAGE_HYPERVISOR);
+        } 
+        else if ( l2tab == NULL) 
+            l2tab = map_domain_page(l3e_get_pfn(l3e));
+
+        l2e = l2tab[l2_table_offset(va)];
+        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) 
+        {
+            page = alloc_domheap_page(NULL);
+
+            if ( !l1tab )
+                unmap_domain_page(l1tab);
+            
+            l1tab = map_domain_page(page_to_mfn(page));
+            memset(l1tab, 0, PAGE_SIZE);
+            l2e = l2tab[l2_table_offset(va)] = 
+                l2e_from_page(page, __PAGE_HYPERVISOR);
+        } 
+        else if ( l1tab == NULL) 
+            l1tab = map_domain_page(l2e_get_pfn(l2e));
+
+        l1e = l1tab[l1_table_offset(va)];
+        if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) 
+        {
+            page = alloc_domheap_page(NULL);
+            if ( !l0tab )
+                unmap_domain_page(l0tab);
+
+            l0tab = map_domain_page(page_to_mfn(page));
+            memset(l0tab, 0, PAGE_SIZE);
+            l1e = l1tab[l1_table_offset(va)] = 
+                l1e_from_page(page, __PAGE_HYPERVISOR);
+        }
+        else if ( l0tab == NULL) 
+            l0tab = map_domain_page(l1e_get_pfn(l1e));
+
+        l0tab[i & ((1 << PAGETABLE_ORDER) - 1) ] = pfn;
+        list_ent = frame_table[pfn].list.next;
+        va += sizeof (pfn);
+    }
+#if CONFIG_PAGING_LEVELS >= 4
+    unmap_domain_page(l4tab);
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    unmap_domain_page(l3tab);
+#endif
+    unmap_domain_page(l2tab);
+    unmap_domain_page(l1tab);
+    unmap_domain_page(l0tab);
 
     return 1;
 }
+
+#if CONFIG_PAGING_LEVELS == 4
+static void
+free_p2m_table(struct vcpu *v)
+{
+    unsigned long va;
+    l1_pgentry_t *l1tab;
+    l1_pgentry_t l1e;
+    l2_pgentry_t *l2tab;
+    l2_pgentry_t l2e;
+#if CONFIG_PAGING_LEVELS >= 3
+    l3_pgentry_t *l3tab; 
+    l3_pgentry_t l3e;
+    int i3;
+#endif
+#if CONFIG_PAGING_LEVELS == 4
+    l4_pgentry_t *l4tab; 
+    l4_pgentry_t l4e;
+#endif
+
+    ASSERT ( pagetable_get_pfn(v->arch.monitor_table) );
+
+#if CONFIG_PAGING_LEVELS == 4
+    l4tab = map_domain_page(
+        pagetable_get_pfn(v->arch.monitor_table));
+#endif
+#if CONFIG_PAGING_LEVELS == 3
+    l3tab = map_domain_page(
+        pagetable_get_pfn(v->arch.monitor_table));
+#endif
+
+    for ( va = RO_MPT_VIRT_START; va < RO_MPT_VIRT_END; )
+    {
+#if CONFIG_PAGING_LEVELS == 4
+        l4e = l4tab[l4_table_offset(va)];
+
+        if ( l4e_get_flags(l4e) & _PAGE_PRESENT )
+        {
+            l3tab = map_domain_page(l4e_get_pfn(l4e));
+#endif
+            for ( i3 = 0; i3 < L1_PAGETABLE_ENTRIES; i3++ )
+            {
+                l3e = l3tab[l3_table_offset(va)];
+                if ( l3e_get_flags(l3e) & _PAGE_PRESENT )
+                {
+                    int i2;
+
+                    l2tab = map_domain_page(l3e_get_pfn(l3e));
+
+                    for ( i2 = 0; i2 < L1_PAGETABLE_ENTRIES; i2++ )
+                    {
+                        l2e = l2tab[l2_table_offset(va)];
+                        if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+                        {
+                            int i1;
+
+                            l1tab = map_domain_page(l2e_get_pfn(l2e));
+
+                            for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++ )
+                            {
+                                l1e = l1tab[l1_table_offset(va)];
+
+                                if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
+                                    
free_domheap_page(mfn_to_page(l1e_get_pfn(l1e)));
+
+                                va += 1UL << L1_PAGETABLE_SHIFT;
+                            }
+                            unmap_domain_page(l1tab);
+                            free_domheap_page(mfn_to_page(l2e_get_pfn(l2e)));
+                        }
+                        else
+                            va += 1UL << L2_PAGETABLE_SHIFT;
+                    }
+                    unmap_domain_page(l2tab);
+                    free_domheap_page(mfn_to_page(l3e_get_pfn(l3e)));
+                }
+                else
+                    va += 1UL << L3_PAGETABLE_SHIFT;
+            }
+#if CONFIG_PAGING_LEVELS == 4
+            unmap_domain_page(l3tab);
+            free_domheap_page(mfn_to_page(l4e_get_pfn(l4e)));
+        }
+        else
+            va += 1UL << L4_PAGETABLE_SHIFT;
+#endif
+    }
+
+#if CONFIG_PAGING_LEVELS == 4
+    unmap_domain_page(l4tab);
+#endif
+#if CONFIG_PAGING_LEVELS == 3
+    unmap_domain_page(l3tab);
+#endif
+}
+#endif
 
 void shadow_l1_normal_pt_update(
     struct domain *d,
@@ -1770,6 +1874,7 @@
     shadow_unlock(d);
 }
 
+
 /*
  * Local variables:
  * mode: C
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Feb  3 10:54:05 2006
+++ b/xen/include/asm-x86/mm.h  Fri Feb  3 11:02:30 2006
@@ -279,14 +279,9 @@
 static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
 {
     unsigned long mfn;
-    l1_pgentry_t pte;
-
-    if ( (__copy_from_user(&pte, &phys_to_machine_mapping[pfn],
-                           sizeof(pte)) == 0) &&
-         (l1e_get_flags(pte) & _PAGE_PRESENT) )
-        mfn = l1e_get_pfn(pte);
-    else
-        mfn = INVALID_MFN;
+
+    if ( __copy_from_user(&mfn, &phys_to_machine_mapping[pfn], sizeof(mfn)) )
+       mfn = INVALID_MFN;
 
     return mfn;
 }
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Fri Feb  3 10:54:05 2006
+++ b/xen/include/asm-x86/shadow.h      Fri Feb  3 11:02:30 2006
@@ -115,7 +115,12 @@
 #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) 
- (_max)) << 16) | (_min))
 #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
 #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) 
>> 16))
-
+#if CONFIG_PAGING_LEVELS == 2
+extern void shadow_direct_map_clean(struct vcpu *v);
+#endif
+extern int shadow_direct_map_init(struct vcpu *v);
+extern int shadow_direct_map_fault(
+    unsigned long vpa, struct cpu_user_regs *regs);
 extern void shadow_mode_init(void);
 extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
 extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs);
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h   Fri Feb  3 10:54:05 2006
+++ b/xen/include/asm-x86/shadow_64.h   Fri Feb  3 11:02:30 2006
@@ -92,7 +92,7 @@
         ( !!(((x).lo ^ (y).lo) & 
((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
 
 #define PAE_SHADOW_SELF_ENTRY   259
-#define PDP_ENTRIES   4
+#define PAE_L3_PAGETABLE_ENTRIES   4
 
 static inline int  table_offset_64(unsigned long va, int level)
 {
diff -r 0bd023cf351e -r 0e7bdd973e17 xen/include/asm-x86/shadow_public.h
--- a/xen/include/asm-x86/shadow_public.h       Fri Feb  3 10:54:05 2006
+++ b/xen/include/asm-x86/shadow_public.h       Fri Feb  3 11:02:30 2006
@@ -21,10 +21,11 @@
 
 #ifndef _XEN_SHADOW_PUBLIC_H
 #define _XEN_SHADOW_PUBLIC_H
+
+extern int alloc_p2m_table(struct domain *d);
+
 #if CONFIG_PAGING_LEVELS >= 3
 #define MFN_PINNED(_x) (mfn_to_page(_x)->u.inuse.type_info & PGT_pinned)
-
-extern int alloc_p2m_table(struct domain *d);
 
 extern void shadow_sync_and_drop_references(
       struct domain *d, struct page_info *page);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.