[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Many fixes for save/restore and related areas for PAE in particular. Now



# HG changeset patch
# User smh22@xxxxxxxxxxxxxxxxxxxx
# Node ID fe3a892b33b4ccd3593bde788ceafa0668227450
# Parent  9b345321fd0676436d399c6eca0afd625b886ca4
Many fixes for save/restore and related areas for PAE in particular. Now
should be able to save/restore successfully on machines with up to 16GB 
and any size of guest. 

Signed-off-by: Steven Hand <steven@xxxxxxxxxxxxx>

diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Wed Nov 16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Wed Nov 16 16:45:03 2005
@@ -136,21 +136,19 @@
 }
 EXPORT_SYMBOL(direct_kernel_remap_pfn_range);
 
-/* FIXME: This is horribly broken on PAE */ 
 static int lookup_pte_fn(
        pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
 {
-       unsigned long *ptep = (unsigned long *)data;
+       uint64_t *ptep = (uint64_t *)data;
        if (ptep)
-               *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
-                        PAGE_SHIFT) |
-                       ((unsigned long)pte & ~PAGE_MASK);
+               *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+                        PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
        return 0;
 }
 
 int create_lookup_pte_addr(struct mm_struct *mm, 
                           unsigned long address,
-                          unsigned long *ptep)
+                          uint64_t *ptep)
 {
        return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep);
 }
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Wed Nov 16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Wed Nov 16 16:45:03 2005
@@ -412,7 +412,7 @@
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
        unsigned int i, op = 0;
        struct grant_handle_pair *handle;
-       unsigned long ptep;
+       uint64_t ptep;
        int ret;
 
        for ( i = 0; i < nr_pages; i++)
@@ -427,9 +427,9 @@
                op++;
 
                if (create_lookup_pte_addr(
-                       blktap_vma->vm_mm,
-                       MMAP_VADDR(user_vstart, idx, i), 
-                       &ptep) !=0) {
+                           blktap_vma->vm_mm,
+                           MMAP_VADDR(user_vstart, idx, i), 
+                           &ptep) !=0) {
                        DPRINTK("Couldn't get a pte addr!\n");
                        return;
                }
@@ -705,7 +705,7 @@
 
                unsigned long uvaddr;
                unsigned long kvaddr;
-               unsigned long ptep;
+               uint64_t ptep;
 
                uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
                kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Nov 16 
14:50:36 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Nov 16 
16:45:03 2005
@@ -152,7 +152,8 @@
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
                unsigned long *p, addr;
-               unsigned long mfn, ptep;
+               unsigned long mfn; 
+               uint64_t ptep;
                int i;
 
                if (copy_from_user(&m, (void *)data, sizeof(m))) {
@@ -217,15 +218,39 @@
 #endif
 
 #ifndef __ia64__
-       case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: {
-               unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
-               pgd_t *pgd = pgd_offset_k(m2pv);
-               pud_t *pud = pud_offset(pgd, m2pv);
-               pmd_t *pmd = pmd_offset(pud, m2pv);
-               unsigned long m2p_start_mfn =
-                       (*(unsigned long *)pmd) >> PAGE_SHIFT; 
-               ret = put_user(m2p_start_mfn, (unsigned long *)data) ?
-                       -EFAULT: 0;
+       case IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS: {
+
+               pgd_t *pgd; 
+               pud_t *pud; 
+               pmd_t *pmd; 
+               unsigned long m2pv, m2p_mfn;    
+               privcmd_m2pmfns_t m; 
+               unsigned long *p; 
+               int i; 
+
+               if (copy_from_user(&m, (void *)data, sizeof(m)))
+                       return -EFAULT;
+
+               m2pv = (unsigned long)machine_to_phys_mapping;
+
+               p = m.arr; 
+
+               for(i=0; i < m.num; i++) { 
+
+                       pgd = pgd_offset_k(m2pv);
+                       pud = pud_offset(pgd, m2pv);
+                       pmd = pmd_offset(pud, m2pv);
+                       m2p_mfn = (*(uint64_t *)pmd >> PAGE_SHIFT)&0xFFFFFFFF;
+                       
+                       if (put_user(m2p_mfn, p + i))
+                               return -EFAULT;
+
+                       m2pv += (1 << 21); 
+               }
+
+               ret = 0; 
+               break; 
+
        }
        break;
 #endif
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Wed Nov 16 
14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Wed Nov 16 
16:45:03 2005
@@ -450,11 +450,11 @@
 #endif /* !CONFIG_DISCONTIGMEM */
 
 int direct_remap_pfn_range(struct vm_area_struct *vma,
-                            unsigned long address, 
-                            unsigned long mfn,
-                            unsigned long size, 
-                            pgprot_t prot,
-                            domid_t  domid);
+                           unsigned long address, 
+                           unsigned long mfn,
+                           unsigned long size, 
+                           pgprot_t prot,
+                           domid_t  domid);
 int direct_kernel_remap_pfn_range(unsigned long address, 
                                  unsigned long mfn,
                                  unsigned long size, 
@@ -462,7 +462,7 @@
                                  domid_t  domid);
 int create_lookup_pte_addr(struct mm_struct *mm,
                            unsigned long address,
-                           unsigned long *ptep);
+                           uint64_t *ptep);
 int touch_pte_range(struct mm_struct *mm,
                     unsigned long address,
                     unsigned long size);
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16 
14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16 
16:45:03 2005
@@ -541,7 +541,7 @@
 
 int create_lookup_pte_addr(struct mm_struct *mm,
                            unsigned long address,
-                           unsigned long *ptep);
+                           uint64_t *ptep);
 
 int touch_pte_range(struct mm_struct *mm,
                     unsigned long address,
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h
--- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h       Wed Nov 
16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h       Wed Nov 
16 16:45:03 2005
@@ -55,6 +55,11 @@
        unsigned long *arr; /* array of mfns - top nibble set on err */
 } privcmd_mmapbatch_t; 
 
+typedef struct privcmd_m2pmfns { 
+       int num;    /* max number of mfns to return */
+       unsigned long *arr; /* array of mfns */
+} privcmd_m2pmfns_t; 
+
 typedef struct privcmd_blkmsg
 {
        unsigned long op;
@@ -69,12 +74,11 @@
  */
 #define IOCTL_PRIVCMD_HYPERCALL                                        \
        _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
-
 #define IOCTL_PRIVCMD_MMAP                                     \
        _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
 #define IOCTL_PRIVCMD_MMAPBATCH                                        \
        _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t))
-#define IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN                  \
+#define IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS                       \
        _IOC(_IOC_READ, 'P', 4, sizeof(unsigned long))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_linux_restore.c    Wed Nov 16 16:45:03 2005
@@ -13,13 +13,13 @@
 #include "xg_save_restore.h"
 
 /* max mfn of the whole machine */
-static uint32_t max_mfn; 
+static unsigned long max_mfn; 
 
 /* virtual starting address of the hypervisor */
-static uint32_t hvirt_start; 
+static unsigned long hvirt_start; 
 
 /* #levels of page tables used by the currrent guest */
-static uint32_t pt_levels; 
+static unsigned int pt_levels; 
 
 /* total number of pages used by the current guest */
 static unsigned long max_pfn;
@@ -49,7 +49,6 @@
 
     return (r == count) ? 1 : 0; 
 }
-
 
 /*
 ** In the state file (or during transfer), all page-table pages are 
@@ -60,23 +59,11 @@
 */
 int uncanonicalize_pagetable(unsigned long type, void *page) 
 { 
-    int i, pte_last, xen_start, xen_end; 
+    int i, pte_last; 
     unsigned long pfn; 
     uint64_t pte; 
 
-    /* 
-    ** We need to determine which entries in this page table hold
-    ** reserved hypervisor mappings. This depends on the current
-    ** page table type as well as the number of paging levels. 
-    */
-    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); 
-    
-    if (pt_levels == 2 && type == L2TAB)
-        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); 
-
-    if (pt_levels == 3 && type == L3TAB) 
-        xen_start = L3_PAGETABLE_ENTRIES_PAE; 
-
+    pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); 
 
     /* Now iterate through the page table, uncanonicalizing each PTE */
     for(i = 0; i < pte_last; i++) { 
@@ -85,13 +72,10 @@
             pte = ((uint32_t *)page)[i]; 
         else 
             pte = ((uint64_t *)page)[i]; 
-        
-        if(i >= xen_start && i < xen_end) 
-            pte = 0; 
-        
+
         if(pte & _PAGE_PRESENT) { 
-            
-            pfn = pte >> PAGE_SHIFT; 
+
+            pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
             
             if(pfn >= max_pfn) { 
                 ERR("Frame number in type %lu page table is out of range: "
@@ -101,17 +85,16 @@
             } 
             
             
-            if(type == L1TAB) 
-                pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
-            else 
-                pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE);
-            
-            pte |= p2m[pfn] << PAGE_SHIFT;
-            
+            pte &= 0xffffff0000000fffULL;
+            pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
+
             if(pt_levels == 2) 
                 ((uint32_t *)page)[i] = (uint32_t)pte; 
             else 
                 ((uint64_t *)page)[i] = (uint64_t)pte; 
+
+        
+
         }
     }
     
@@ -143,6 +126,9 @@
     /* A table of MFNs to map in the current region */
     unsigned long *region_mfn = NULL;
 
+    /* Types of the pfns in the current region */
+    unsigned long region_pfn_type[MAX_BATCH_SIZE];
+
     /* A temporary mapping, and a copy, of one frame of guest memory. */
     unsigned long *page = NULL;
 
@@ -233,10 +219,12 @@
     
     if(xc_domain_memory_increase_reservation(
            xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { 
-        ERR("Failed to increase reservation by %lx KB\n", max_pfn); 
+        ERR("Failed to increase reservation by %lx KB\n", PFN_TO_KB(max_pfn));
         errno = ENOMEM;
         goto out;
     }
+
+    DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); 
 
     /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
     if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) {
@@ -248,6 +236,7 @@
         ERR("Could not initialise for MMU updates");
         goto out;
     }
+
 
     DPRINTF("Reloading memory pages:   0%%\n");
 
@@ -261,7 +250,6 @@
     while (1) { 
 
         int j;
-        unsigned long region_pfn_type[MAX_BATCH_SIZE];
 
         this_pc = (n * 100) / max_pfn;
         if ( (this_pc - prev_pc) >= 5 )
@@ -322,7 +310,7 @@
             if (pagetype == XTAB) 
                 /* a bogus/unmapped page: skip it */
                 continue;
-            
+
             if (pfn > max_pfn) {
                 ERR("pfn out of range");
                 goto out;
@@ -348,10 +336,20 @@
                 ** A page table page - need to 'uncanonicalize' it, i.e. 
                 ** replace all the references to pfns with the corresponding 
                 ** mfns for the new domain. 
-                */ 
-                if(!uncanonicalize_pagetable(pagetype, page))
-                    goto out; 
-
+                ** 
+                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and 
+                ** so we may need to update the p2m after the main loop. 
+                ** Hence we defer canonicalization of L1s until then. 
+                */
+                if(pt_levels != 3 || pagetype != L1TAB) { 
+
+                    if(!uncanonicalize_pagetable(pagetype, page)) {
+                        ERR("failed uncanonicalize pt!\n"); 
+                        goto out; 
+                    }
+
+                } 
+                    
             } else if(pagetype != NOTAB) { 
 
                 ERR("Bogus page type %lx page table is out of range: "
@@ -359,7 +357,6 @@
                 goto out;
 
             } 
-
 
 
             if (verify) {
@@ -386,9 +383,9 @@
             }
 
             if (xc_add_mmu_update(xc_handle, mmu, 
-                                  (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
-                                  pfn)) {
-                ERR("machpys mfn=%ld pfn=%ld", mfn, pfn);
+                                  (((unsigned long long)mfn) << PAGE_SHIFT) 
+                                  | MMU_MACHPHYS_UPDATE, pfn)) {
+                ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
                 goto out;
             }
         } /* end of 'batch' for loop */
@@ -399,14 +396,39 @@
 
     DPRINTF("Received all pages\n");
 
-    if (pt_levels == 3) {
-
-        /* Get all PGDs below 4GB. */
+    if(pt_levels == 3) { 
+
+        /* 
+        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This 
+        ** is a little awkward and involves (a) finding all such PGDs and
+        ** replacing them with 'lowmem' versions; (b) upating the p2m[] 
+        ** with the new info; and (c) canonicalizing all the L1s using the
+        ** (potentially updated) p2m[]. 
+        ** 
+        ** This is relatively slow (and currently involves two passes through
+        ** the pfn_type[] array), but at least seems to be correct. May wish
+        ** to consider more complex approaches to optimize this later. 
+        */
+
+        int j, k; 
+
+        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
         for (i = 0; i < max_pfn; i++) {
             
             if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
 
                 unsigned long new_mfn; 
+                uint64_t l3ptes[4]; 
+                uint64_t *l3tab; 
+
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                         PROT_READ, p2m[i]); 
+
+                for(j = 0; j < 4; j++) 
+                    l3ptes[j] = l3tab[j]; 
+                
+                munmap(l3tab, PAGE_SIZE); 
 
                 if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
                     ERR("Couldn't get a page below 4GB :-(");
@@ -414,15 +436,58 @@
                 }
                 
                 p2m[i] = new_mfn;
-                if (xc_add_mmu_update(
-                        xc_handle, mmu, 
-                        (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i)) {
+                if (xc_add_mmu_update(xc_handle, mmu, 
+                                      (((unsigned long long)new_mfn) 
+                                       << PAGE_SHIFT) | 
+                                      MMU_MACHPHYS_UPDATE, i)) {
                     ERR("Couldn't m2p on PAE root pgdir");
                     goto out;
                 }
+                
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                         PROT_READ | PROT_WRITE, p2m[i]); 
+                
+                for(j = 0; j < 4; j++) 
+                    l3tab[j] = l3ptes[j]; 
+                
+                munmap(l3tab, PAGE_SIZE); 
+                
             }
         }
-        
+
+        /* Second pass: find all L1TABs and uncanonicalize them */
+        j = 0; 
+
+        for(i = 0; i < max_pfn; i++) { 
+            
+            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { 
+                region_mfn[j] = p2m[i]; 
+                j++; 
+            }
+
+            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { 
+
+                if (!(region_base = xc_map_foreign_batch(
+                          xc_handle, dom, PROT_READ | PROT_WRITE, 
+                          region_mfn, j))) {  
+                    ERR("map batch failed");
+                    goto out;
+                }
+
+                for(k = 0; k < j; k++) {
+                    if(!uncanonicalize_pagetable(L1TAB, 
+                                                 region_base + k*PAGE_SIZE)) {
+                        ERR("failed uncanonicalize pt!\n"); 
+                        goto out; 
+                    } 
+                }
+                
+                munmap(region_base, j*PAGE_SIZE); 
+                j = 0; 
+            }
+        }
+
     }
 
 
@@ -430,6 +495,7 @@
         ERR("Error doing finish_mmu_updates()"); 
         goto out;
     } 
+
 
     /*
      * Pin page tables. Do this after writing to them as otherwise Xen
@@ -439,7 +505,7 @@
 
         if ( (pfn_type[i] & LPINTAB) == 0 )
             continue;
-        
+
         switch(pfn_type[i]) { 
 
         case (L1TAB|LPINTAB): 
@@ -463,22 +529,15 @@
         }
 
         pin[nr_pins].arg1.mfn = p2m[i];
+
+        nr_pins ++; 
         
-        if (++nr_pins == MAX_PIN_BATCH) {
+        if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) {
             if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { 
                 ERR("Failed to pin batch of %d page tables", nr_pins); 
                 goto out;
             } 
-            DPRINTF("successfully pinned batch of %d page tables", nr_pins); 
             nr_pins = 0;
-        }
-    }
-    
-    if (nr_pins != 0) { 
-        if((rc = xc_mmuext_op(xc_handle, pin, nr_pins, dom)) < 0) { 
-            ERR("Failed (2) to pin batch of %d page tables", nr_pins); 
-            DPRINTF("rc is %d\n", rc); 
-            goto out;
         }
     }
 
@@ -579,23 +638,20 @@
     pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT;
 
     if (pfn >= max_pfn) {
-        DPRINTF("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx\n",
-                pfn, max_pfn, pfn_type[pfn]); 
-        ERR("PT base is bad.");
+        ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
+            pfn, max_pfn, pfn_type[pfn]); 
         goto out;
     }
 
     if ((pt_levels == 2) && ((pfn_type[pfn]&LTABTYPE_MASK) != L2TAB)) { 
-        DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
-                pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB);
-        ERR("PT base is bad.");
+        ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+            pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB);
         goto out;
     }
 
     if ((pt_levels == 3) && ((pfn_type[pfn]&LTABTYPE_MASK) != L3TAB)) { 
-        DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
-                pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB);
-        ERR("PT base is bad.");
+        ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+            pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB);
         goto out;
     }
     
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_linux_save.c       Wed Nov 16 16:45:03 2005
@@ -27,13 +27,13 @@
 
 
 /* max mfn of the whole machine */
-static uint32_t max_mfn; 
+static unsigned long max_mfn; 
 
 /* virtual starting address of the hypervisor */
-static uint32_t hvirt_start; 
+static unsigned long hvirt_start; 
 
 /* #levels of page tables used by the currrent guest */
-static uint32_t pt_levels; 
+static unsigned int pt_levels; 
 
 /* total number of pages used by the current guest */
 static unsigned long max_pfn;
@@ -500,6 +500,70 @@
 
 
 
+static unsigned long *xc_map_m2p(int xc_handle, 
+                                 unsigned long max_mfn, 
+                                 int prot) 
+{ 
+    privcmd_m2pmfns_t m2p_mfns; 
+    privcmd_mmap_t ioctlx; 
+    privcmd_mmap_entry_t *entries; 
+    unsigned long m2p_chunks, m2p_size; 
+    unsigned long *m2p; 
+    int i, rc; 
+
+    m2p_size   = M2P_SIZE(max_mfn); 
+    m2p_chunks = M2P_CHUNKS(max_mfn); 
+
+
+    m2p_mfns.num = m2p_chunks; 
+
+    if(!(m2p_mfns.arr = malloc(m2p_chunks * sizeof(unsigned long)))) { 
+        ERR("failed to allocate space for m2p mfns!\n"); 
+        return NULL; 
+    } 
+
+    if (ioctl(xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS, &m2p_mfns) < 0) {
+        ERR("xc_get_m2p_mfns:"); 
+        return NULL;
+    }
+
+    if((m2p = mmap(NULL, m2p_size, prot, 
+                   MAP_SHARED, xc_handle, 0)) == MAP_FAILED) {
+        ERR("failed to mmap m2p"); 
+        return NULL; 
+    } 
+    
+
+    if(!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { 
+        ERR("failed to allocate space for mmap entries!\n"); 
+        return NULL; 
+    } 
+
+
+    ioctlx.num   = m2p_chunks;
+    ioctlx.dom   = DOMID_XEN; 
+    ioctlx.entry = entries; 
+    
+    for(i=0; i < m2p_chunks; i++) { 
+        
+        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); 
+        entries[i].mfn = m2p_mfns.arr[i]; 
+        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
+
+    }
+
+    if((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) {
+        ERR("ioctl_mmap failed (rc = %d)", rc); 
+        return NULL; 
+    }
+        
+    free(m2p_mfns.arr); 
+    free(entries); 
+
+    return m2p; 
+}
+
+
 
 int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, 
                   uint32_t max_factor, uint32_t flags)
@@ -531,16 +595,12 @@
     /* A copy of the pfn-to-mfn table frame list. */
     unsigned long *p2m_frame_list = NULL;
 
-    unsigned long m2p_start_mfn;
-    
     /* Live mapping of shared info structure */
     shared_info_t *live_shinfo = NULL;
 
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
-
-    
     /* power of 2 order of max_pfn */
     int order_nr; 
 
@@ -563,9 +623,6 @@
         max_factor = DEF_MAX_FACTOR; 
     
     initialize_mbit_rate(); 
-
-    DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live ? 
-            "true" : "false"); 
 
     if(!get_platform_info(xc_handle, dom, 
                           &max_mfn, &hvirt_start, &pt_levels)) {
@@ -647,11 +704,13 @@
     }
 
     /* Setup the mfn_to_pfn table mapping */
-    m2p_start_mfn = xc_get_m2p_start_mfn(xc_handle);
-    live_m2p      = xc_map_foreign_range(xc_handle, DOMID_XEN, M2P_SIZE, 
-                                         PROT_READ, m2p_start_mfn);
-    
-    /* Get a local copy fo the live_P2M_frame_list */
+    if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { 
+        ERR("Failed to map live M2P table"); 
+        goto out; 
+    } 
+
+    
+    /* Get a local copy of the live_P2M_frame_list */
     if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { 
         ERR("Couldn't allocate p2m_frame_list array");
         goto out;
@@ -662,6 +721,8 @@
     for (i = 0; i < max_pfn; i += ulpp) {
         if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { 
             ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
+            ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, 
+                p2m_frame_list[i/ulpp]); 
             goto out;
         }
     }
@@ -693,20 +754,14 @@
         
     }
 
-#if 0
-    sent_last_iter = 0xFFFFFFFF; /* Pretend we sent a /lot/ last time */
-#else
-    sent_last_iter = 1 << 20; 
-#endif
+    /* pretend we sent all the pages last iteration */
+    sent_last_iter = max_pfn; 
 
 
     /* calculate the power of 2 order of max_pfn, e.g.
        15->4 16->4 17->5 */
     for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
         continue;
-
-#undef BITMAP_SIZE
-#define BITMAP_SIZE ((1<<20)/8) 
 
     /* Setup to_send / to_fix and to_skip bitmaps */
     to_send = malloc(BITMAP_SIZE); 
@@ -922,10 +977,8 @@
 
 
                 /* write out pages in batch */
-                if (pagetype == XTAB) {
-                    DPRINTF("SKIP BOGUS page %i mfn %08lx\n", j, pfn_type[j]);
+                if (pagetype == XTAB)
                     continue;
-                }
 
                 pagetype &= LTABTYPE_MASK; 
                 
@@ -950,10 +1003,10 @@
             } /* end of the write out for this batch */
             
             sent_this_iter += batch;
-            
+
+            munmap(region_base, batch*PAGE_SIZE);
+        
         } /* end of this while loop for this iteration */
-        
-        munmap(region_base, batch*PAGE_SIZE);
         
       skip: 
         
@@ -1027,13 +1080,9 @@
 
     DPRINTF("All memory is saved\n");
 
-    /* Success! */
-    rc = 0;
-    
-    /* ^^^^^^ XXX SMH: hmm.. not sure that's really success! */
-    
     /* Zero terminate */
-    if (!write_exact(io_fd, &rc, sizeof(int))) { 
+    i = 0; 
+    if (!write_exact(io_fd, &i, sizeof(int))) { 
         ERR("Error when writing to state file (6)");
         goto out;
     }
@@ -1043,17 +1092,17 @@
         unsigned int i,j;
         unsigned long pfntab[1024]; 
 
-        for ( i = 0, j = 0; i < max_pfn; i++ ) {
-            if ( ! is_mapped(live_p2m[i]) )
+        for (i = 0, j = 0; i < max_pfn; i++) {
+            if (!is_mapped(live_p2m[i]))
                 j++;
         }
-
+        
         if(!write_exact(io_fd, &j, sizeof(unsigned int))) { 
             ERR("Error when writing to state file (6a)");
             goto out;
         }      
         
-        for ( i = 0, j = 0; i < max_pfn; ) {
+        for (i = 0, j = 0; i < max_pfn; ) {
 
             if (!is_mapped(live_p2m[i]))
                 pfntab[j++] = i;
@@ -1097,7 +1146,10 @@
         ERR("Error when writing to state file (1)");
         goto out;
     }
-    
+
+    /* Success! */
+    rc = 0;
+
  out:
 
     if (live_shinfo)
@@ -1110,7 +1162,7 @@
         munmap(live_p2m, P2M_SIZE); 
 
     if(live_m2p) 
-        munmap(live_m2p, M2P_SIZE); 
+        munmap(live_m2p, M2P_SIZE(max_mfn)); 
 
     free(pfn_type);
     free(pfn_batch);
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_private.c  Wed Nov 16 16:45:03 2005
@@ -260,18 +260,6 @@
 }
 
 
-unsigned long xc_get_m2p_start_mfn ( int xc_handle )
-{
-    unsigned long mfn;
-
-    if ( ioctl( xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN, &mfn ) < 0 )
-    {
-        perror("xc_get_m2p_start_mfn:");
-        return 0;
-    }
-    return mfn;
-}
-
 int xc_get_pfn_list(int xc_handle,
                     uint32_t domid, 
                     unsigned long *pfn_buf, 
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xg_private.h  Wed Nov 16 16:45:03 2005
@@ -153,8 +153,6 @@
     
 } mfn_mapper_t;
 
-unsigned long xc_get_m2p_start_mfn (int xc_handle);
-
 int xc_copy_to_domain_page(int xc_handle, uint32_t domid,
                             unsigned long dst_pfn, void *src_page);
 
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h     Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xg_save_restore.h     Wed Nov 16 16:45:03 2005
@@ -3,6 +3,8 @@
 ** 
 ** Defintions and utilities for save / restore. 
 */
+
+#include "xc_private.h"
 
 #define DEBUG    1
 #define PROGRESS 0
@@ -55,25 +57,24 @@
 ** Returns 1 on success, 0 on failure. 
 */
 static int get_platform_info(int xc_handle, uint32_t dom, 
-                             /* OUT */ uint32_t *max_mfn,  
-                             /* OUT */ uint32_t *hvirt_start, 
-                             /* OUT */ uint32_t *pt_levels)
+                             /* OUT */ unsigned long *max_mfn,  
+                             /* OUT */ unsigned long *hvirt_start, 
+                             /* OUT */ unsigned int *pt_levels)
     
 { 
     xen_capabilities_info_t xen_caps = "";
     xen_platform_parameters_t xen_params;
-    xc_physinfo_t physinfo;
     
-    if (xc_physinfo(xc_handle, &physinfo) != 0) 
-        return 0;
-    
+
     if (xc_version(xc_handle, XENVER_platform_parameters, &xen_params) != 0)
         return 0;
     
     if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
         return 0;
 
-    *max_mfn =     physinfo.total_pages;
+    if (xc_memory_op(xc_handle, XENMEM_maximum_ram_page, max_mfn) != 0)
+        return 0; 
+    
     *hvirt_start = xen_params.virt_start;
 
     if (strstr(xen_caps, "xen-3.0-x86_64"))
@@ -95,13 +96,22 @@
 ** entry tell us whether or not the the PFN is currently mapped.
 */
 
-#define PFN_TO_KB(_pfn) ((_pfn) * PAGE_SIZE / 1024)
+#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
 
-/* Size in bytes of the M2P and P2M (both rounded up to nearest PAGE_SIZE) */
-#define M2P_SIZE ROUNDUP((max_mfn * sizeof(unsigned long)), PAGE_SHIFT) 
-#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) 
 
+/* 
+** The M2P is made up of some number of 'chunks' of at least 2MB in size. 
+** The below definitions and utility function(s) deal with mapping the M2P 
+** regarldess of the underlying machine memory size or architecture. 
+*/
+#define M2P_SHIFT       L2_PAGETABLE_SHIFT_PAE 
+#define M2P_CHUNK_SIZE  (1 << M2P_SHIFT) 
+#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) 
+#define M2P_CHUNKS(_m)  (M2P_SIZE((_m)) >> M2P_SHIFT)
+
+/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
+#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) 
 
 /* Number of unsigned longs in a page */
 #define ulpp            (PAGE_SIZE/sizeof(unsigned long))
diff -r 9b345321fd06 -r fe3a892b33b4 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Wed Nov 16 14:50:36 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py   Wed Nov 16 16:45:03 2005
@@ -129,7 +129,7 @@
         l = read_exact(fd, sizeof_unsigned_long,
                        "not a valid guest state file: pfn count read")
         nr_pfns = unpack("=L", l)[0]   # XXX endianess
-        if nr_pfns > 1024*1024:     # XXX
+        if nr_pfns > 16*1024*1024:     # XXX 
             raise XendError(
                 "not a valid guest state file: pfn count out of range")
 
diff -r 9b345321fd06 -r fe3a892b33b4 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Nov 16 14:50:36 2005
+++ b/xen/arch/x86/mm.c Wed Nov 16 16:45:03 2005
@@ -898,6 +898,7 @@
     return 1;
 
  fail:
+    MEM_LOG("Failure in alloc_l3_table: entry %d", i);
     while ( i-- > 0 )
         if ( is_guest_l3_slot(i) )
             put_page_from_l3e(pl3e[i], pfn);
@@ -948,6 +949,7 @@
     return 1;
 
  fail:
+    MEM_LOG("Failure in alloc_l4_table: entry %d", i);
     while ( i-- > 0 )
         if ( is_guest_l4_slot(i) )
             put_page_from_l4e(pl4e[i], pfn);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.