[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID 3f39f030fa894d29d04b748513bf48000d6a17f5 # Parent cbf6f95e9c62ab2fcb7c430a51b5444f5139945e # Parent e4e1674a747d4b69f194e8ccbc4dd72c481da5f0 Merged. diff -r cbf6f95e9c62 -r 3f39f030fa89 linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Wed Nov 16 19:33:12 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Wed Nov 16 19:33:23 2005 @@ -136,21 +136,19 @@ } EXPORT_SYMBOL(direct_kernel_remap_pfn_range); -/* FIXME: This is horribly broken on PAE */ static int lookup_pte_fn( pte_t *pte, struct page *pte_page, unsigned long addr, void *data) { - unsigned long *ptep = (unsigned long *)data; + uint64_t *ptep = (uint64_t *)data; if (ptep) - *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << - PAGE_SHIFT) | - ((unsigned long)pte & ~PAGE_MASK); + *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pte_page)) << + PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); return 0; } int create_lookup_pte_addr(struct mm_struct *mm, unsigned long address, - unsigned long *ptep) + uint64_t *ptep) { return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep); } diff -r cbf6f95e9c62 -r 3f39f030fa89 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Wed Nov 16 19:33:12 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Wed Nov 16 19:33:23 2005 @@ -770,9 +770,9 @@ pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = virt_to_mfn(pfn_to_mfn_frame_list_list); - + fpp = PAGE_SIZE/sizeof(unsigned long); - for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ ) + for ( i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++ ) { if ( (j % fpp) == 0 ) { @@ -786,8 +786,11 @@ pfn_to_mfn_frame_list[k][j] = virt_to_mfn(&phys_to_machine_mapping[i]); } - HYPERVISOR_shared_info->arch.max_pfn = max_pfn; - } + HYPERVISOR_shared_info->arch.max_pfn = end_pfn; + + } + + if ( ! (xen_start_info->flags & SIF_INITDOMAIN)) { diff -r cbf6f95e9c62 -r 3f39f030fa89 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Nov 16 19:33:12 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Nov 16 19:33:23 2005 @@ -412,7 +412,7 @@ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int i, op = 0; struct grant_handle_pair *handle; - unsigned long ptep; + uint64_t ptep; int ret; for ( i = 0; i < nr_pages; i++) @@ -427,9 +427,9 @@ op++; if (create_lookup_pte_addr( - blktap_vma->vm_mm, - MMAP_VADDR(user_vstart, idx, i), - &ptep) !=0) { + blktap_vma->vm_mm, + MMAP_VADDR(user_vstart, idx, i), + &ptep) !=0) { DPRINTK("Couldn't get a pte addr!\n"); return; } @@ -705,7 +705,7 @@ unsigned long uvaddr; unsigned long kvaddr; - unsigned long ptep; + uint64_t ptep; uvaddr = MMAP_VADDR(user_vstart, pending_idx, i); kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i); diff -r cbf6f95e9c62 -r 3f39f030fa89 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Nov 16 19:33:12 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Nov 16 19:33:23 2005 @@ -152,7 +152,8 @@ privcmd_mmapbatch_t m; struct vm_area_struct *vma = NULL; unsigned long *p, addr; - unsigned long mfn, ptep; + unsigned long mfn; + uint64_t ptep; int i; if (copy_from_user(&m, (void *)data, sizeof(m))) { @@ -217,15 +218,39 @@ #endif #ifndef __ia64__ - case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: { - unsigned long m2pv = (unsigned long)machine_to_phys_mapping; - pgd_t *pgd = pgd_offset_k(m2pv); - pud_t *pud = pud_offset(pgd, m2pv); - pmd_t *pmd = pmd_offset(pud, m2pv); - unsigned long m2p_start_mfn = - (*(unsigned long *)pmd) >> PAGE_SHIFT; - ret = put_user(m2p_start_mfn, (unsigned long *)data) ? - -EFAULT: 0; + case IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS: { + + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long m2pv, m2p_mfn; + privcmd_m2pmfns_t m; + unsigned long *p; + int i; + + if (copy_from_user(&m, (void *)data, sizeof(m))) + return -EFAULT; + + m2pv = (unsigned long)machine_to_phys_mapping; + + p = m.arr; + + for(i=0; i < m.num; i++) { + + pgd = pgd_offset_k(m2pv); + pud = pud_offset(pgd, m2pv); + pmd = pmd_offset(pud, m2pv); + m2p_mfn = (*(uint64_t *)pmd >> PAGE_SHIFT)&0xFFFFFFFF; + + if (put_user(m2p_mfn, p + i)) + return -EFAULT; + + m2pv += (1 << 21); + } + + ret = 0; + break; + } break; #endif diff -r cbf6f95e9c62 -r 3f39f030fa89 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Wed Nov 16 19:33:12 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Wed Nov 16 19:33:23 2005 @@ -450,11 +450,11 @@ #endif /* !CONFIG_DISCONTIGMEM */ int direct_remap_pfn_range(struct vm_area_struct *vma, - unsigned long address, - unsigned long mfn, - unsigned long size, - pgprot_t prot, - domid_t domid); + unsigned long address, + unsigned long mfn, + unsigned long size, + pgprot_t prot, + domid_t domid); int direct_kernel_remap_pfn_range(unsigned long address, unsigned long mfn, unsigned long size, @@ -462,7 +462,7 @@ domid_t domid); int create_lookup_pte_addr(struct mm_struct *mm, unsigned long address, - unsigned long *ptep); + uint64_t *ptep); int touch_pte_range(struct mm_struct *mm, unsigned long address, unsigned long size); diff -r cbf6f95e9c62 -r 3f39f030fa89 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16 19:33:12 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16 19:33:23 2005 @@ -541,7 +541,7 @@ int create_lookup_pte_addr(struct mm_struct *mm, unsigned long address, - unsigned long *ptep); + uint64_t *ptep); int touch_pte_range(struct mm_struct *mm, unsigned long address, diff -r cbf6f95e9c62 -r 3f39f030fa89 linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h --- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Wed Nov 16 19:33:12 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Wed Nov 16 19:33:23 2005 @@ -55,6 +55,11 @@ unsigned long *arr; /* array of mfns - top nibble set on err */ } privcmd_mmapbatch_t; +typedef struct privcmd_m2pmfns { + int num; /* max number of mfns to return */ + unsigned long *arr; /* array of mfns */ +} privcmd_m2pmfns_t; + typedef struct privcmd_blkmsg { unsigned long op; @@ -69,12 +74,11 @@ */ #define IOCTL_PRIVCMD_HYPERCALL \ _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) - #define IOCTL_PRIVCMD_MMAP \ _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) #define IOCTL_PRIVCMD_MMAPBATCH \ _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t)) -#define IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN \ +#define IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS \ _IOC(_IOC_READ, 'P', 4, sizeof(unsigned long)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/examples/vif-common.sh --- a/tools/examples/vif-common.sh Wed Nov 16 19:33:12 2005 +++ b/tools/examples/vif-common.sh Wed Nov 16 19:33:23 2005 @@ -63,7 +63,9 @@ fi iptables "$c" FORWARD -m physdev --physdev-in "$vif" "$@" -j ACCEPT || - fatal "iptables $c FORWARD -m physdev --physdev-in $vif $@ -j ACCEPT failed" + log err \ + "iptables $c FORWARD -m physdev --physdev-in $vif $@ -j ACCEPT failed. +If you are using iptables, this may affect networking for guest domains." } diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Wed Nov 16 19:33:12 2005 +++ b/tools/libxc/xc_linux_build.c Wed Nov 16 19:33:23 2005 @@ -629,7 +629,7 @@ memset(start_info, 0, sizeof(*start_info)); rc = xc_version(xc_handle, XENVER_version, NULL); sprintf(start_info->magic, "xen-%i.%i-x86_%d%s", - rc >> 16, rc & (0xFFFF), sizeof(long)*8, + rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8, dsi.pae_kernel ? "p" : ""); start_info->nr_pages = nr_pages; start_info->shared_info = shared_info_frame << PAGE_SHIFT; diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Wed Nov 16 19:33:12 2005 +++ b/tools/libxc/xc_linux_restore.c Wed Nov 16 19:33:23 2005 @@ -13,13 +13,13 @@ #include "xg_save_restore.h" /* max mfn of the whole machine */ -static uint32_t max_mfn; +static unsigned long max_mfn; /* virtual starting address of the hypervisor */ -static uint32_t hvirt_start; +static unsigned long hvirt_start; /* #levels of page tables used by the currrent guest */ -static uint32_t pt_levels; +static unsigned int pt_levels; /* total number of pages used by the current guest */ static unsigned long max_pfn; @@ -49,7 +49,6 @@ return (r == count) ? 1 : 0; } - /* ** In the state file (or during transfer), all page-table pages are @@ -60,23 +59,11 @@ */ int uncanonicalize_pagetable(unsigned long type, void *page) { - int i, pte_last, xen_start, xen_end; + int i, pte_last; unsigned long pfn; uint64_t pte; - /* - ** We need to determine which entries in this page table hold - ** reserved hypervisor mappings. This depends on the current - ** page table type as well as the number of paging levels. - */ - xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - - if (pt_levels == 2 && type == L2TAB) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); - - if (pt_levels == 3 && type == L3TAB) - xen_start = L3_PAGETABLE_ENTRIES_PAE; - + pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); /* Now iterate through the page table, uncanonicalizing each PTE */ for(i = 0; i < pte_last; i++) { @@ -85,13 +72,10 @@ pte = ((uint32_t *)page)[i]; else pte = ((uint64_t *)page)[i]; - - if(i >= xen_start && i < xen_end) - pte = 0; - + if(pte & _PAGE_PRESENT) { - - pfn = pte >> PAGE_SHIFT; + + pfn = (pte >> PAGE_SHIFT) & 0xffffffff; if(pfn >= max_pfn) { ERR("Frame number in type %lu page table is out of range: " @@ -101,17 +85,16 @@ } - if(type == L1TAB) - pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT); - else - pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE); - - pte |= p2m[pfn] << PAGE_SHIFT; - + pte &= 0xffffff0000000fffULL; + pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; + if(pt_levels == 2) ((uint32_t *)page)[i] = (uint32_t)pte; else ((uint64_t *)page)[i] = (uint64_t)pte; + + + } } @@ -143,6 +126,9 @@ /* A table of MFNs to map in the current region */ unsigned long *region_mfn = NULL; + /* Types of the pfns in the current region */ + unsigned long region_pfn_type[MAX_BATCH_SIZE]; + /* A temporary mapping, and a copy, of one frame of guest memory. */ unsigned long *page = NULL; @@ -233,10 +219,12 @@ if(xc_domain_memory_increase_reservation( xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { - ERR("Failed to increase reservation by %lx KB\n", max_pfn); + ERR("Failed to increase reservation by %lx KB\n", PFN_TO_KB(max_pfn)); errno = ENOMEM; goto out; } + + DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */ if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) { @@ -248,6 +236,7 @@ ERR("Could not initialise for MMU updates"); goto out; } + DPRINTF("Reloading memory pages: 0%%\n"); @@ -261,7 +250,6 @@ while (1) { int j; - unsigned long region_pfn_type[MAX_BATCH_SIZE]; this_pc = (n * 100) / max_pfn; if ( (this_pc - prev_pc) >= 5 ) @@ -322,7 +310,7 @@ if (pagetype == XTAB) /* a bogus/unmapped page: skip it */ continue; - + if (pfn > max_pfn) { ERR("pfn out of range"); goto out; @@ -348,10 +336,20 @@ ** A page table page - need to 'uncanonicalize' it, i.e. ** replace all the references to pfns with the corresponding ** mfns for the new domain. - */ - if(!uncanonicalize_pagetable(pagetype, page)) - goto out; - + ** + ** On PAE we need to ensure that PGDs are in MFNs < 4G, and + ** so we may need to update the p2m after the main loop. + ** Hence we defer canonicalization of L1s until then. + */ + if(pt_levels != 3 || pagetype != L1TAB) { + + if(!uncanonicalize_pagetable(pagetype, page)) { + ERR("failed uncanonicalize pt!\n"); + goto out; + } + + } + } else if(pagetype != NOTAB) { ERR("Bogus page type %lx page table is out of range: " @@ -359,7 +357,6 @@ goto out; } - if (verify) { @@ -386,9 +383,9 @@ } if (xc_add_mmu_update(xc_handle, mmu, - (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, - pfn)) { - ERR("machpys mfn=%ld pfn=%ld", mfn, pfn); + (((unsigned long long)mfn) << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE, pfn)) { + ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); goto out; } } /* end of 'batch' for loop */ @@ -399,14 +396,39 @@ DPRINTF("Received all pages\n"); - if (pt_levels == 3) { - - /* Get all PGDs below 4GB. */ + if(pt_levels == 3) { + + /* + ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This + ** is a little awkward and involves (a) finding all such PGDs and + ** replacing them with 'lowmem' versions; (b) upating the p2m[] + ** with the new info; and (c) canonicalizing all the L1s using the + ** (potentially updated) p2m[]. + ** + ** This is relatively slow (and currently involves two passes through + ** the pfn_type[] array), but at least seems to be correct. May wish + ** to consider more complex approaches to optimize this later. + */ + + int j, k; + + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ for (i = 0; i < max_pfn; i++) { if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) { unsigned long new_mfn; + uint64_t l3ptes[4]; + uint64_t *l3tab; + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, p2m[i]); + + for(j = 0; j < 4; j++) + l3ptes[j] = l3tab[j]; + + munmap(l3tab, PAGE_SIZE); if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { ERR("Couldn't get a page below 4GB :-("); @@ -414,15 +436,58 @@ } p2m[i] = new_mfn; - if (xc_add_mmu_update( - xc_handle, mmu, - (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i)) { + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)new_mfn) + << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE, i)) { ERR("Couldn't m2p on PAE root pgdir"); goto out; } + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, p2m[i]); + + for(j = 0; j < 4; j++) + l3tab[j] = l3ptes[j]; + + munmap(l3tab, PAGE_SIZE); + } } - + + /* Second pass: find all L1TABs and uncanonicalize them */ + j = 0; + + for(i = 0; i < max_pfn; i++) { + + if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { + region_mfn[j] = p2m[i]; + j++; + } + + if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { + + if (!(region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ | PROT_WRITE, + region_mfn, j))) { + ERR("map batch failed"); + goto out; + } + + for(k = 0; k < j; k++) { + if(!uncanonicalize_pagetable(L1TAB, + region_base + k*PAGE_SIZE)) { + ERR("failed uncanonicalize pt!\n"); + goto out; + } + } + + munmap(region_base, j*PAGE_SIZE); + j = 0; + } + } + } @@ -430,6 +495,7 @@ ERR("Error doing finish_mmu_updates()"); goto out; } + /* * Pin page tables. Do this after writing to them as otherwise Xen @@ -439,7 +505,7 @@ if ( (pfn_type[i] & LPINTAB) == 0 ) continue; - + switch(pfn_type[i]) { case (L1TAB|LPINTAB): @@ -463,22 +529,15 @@ } pin[nr_pins].arg1.mfn = p2m[i]; + + nr_pins ++; - if (++nr_pins == MAX_PIN_BATCH) { + if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) { if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { ERR("Failed to pin batch of %d page tables", nr_pins); goto out; } - DPRINTF("successfully pinned batch of %d page tables", nr_pins); nr_pins = 0; - } - } - - if (nr_pins != 0) { - if((rc = xc_mmuext_op(xc_handle, pin, nr_pins, dom)) < 0) { - ERR("Failed (2) to pin batch of %d page tables", nr_pins); - DPRINTF("rc is %d\n", rc); - goto out; } } @@ -579,23 +638,20 @@ pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT; if (pfn >= max_pfn) { - DPRINTF("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx\n", - pfn, max_pfn, pfn_type[pfn]); - ERR("PT base is bad."); + ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", + pfn, max_pfn, pfn_type[pfn]); goto out; } if ((pt_levels == 2) && ((pfn_type[pfn]<ABTYPE_MASK) != L2TAB)) { - DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n", - pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB); - ERR("PT base is bad."); + ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", + pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB); goto out; } if ((pt_levels == 3) && ((pfn_type[pfn]<ABTYPE_MASK) != L3TAB)) { - DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n", - pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB); - ERR("PT base is bad."); + ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", + pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB); goto out; } diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Wed Nov 16 19:33:12 2005 +++ b/tools/libxc/xc_linux_save.c Wed Nov 16 19:33:23 2005 @@ -27,13 +27,13 @@ /* max mfn of the whole machine */ -static uint32_t max_mfn; +static unsigned long max_mfn; /* virtual starting address of the hypervisor */ -static uint32_t hvirt_start; +static unsigned long hvirt_start; /* #levels of page tables used by the currrent guest */ -static uint32_t pt_levels; +static unsigned int pt_levels; /* total number of pages used by the current guest */ static unsigned long max_pfn; @@ -73,7 +73,7 @@ */ #define BITS_PER_LONG (sizeof(unsigned long) * 8) -#define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) #define BITMAP_ENTRY(_nr,_bmap) \ ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] @@ -500,6 +500,70 @@ +static unsigned long *xc_map_m2p(int xc_handle, + unsigned long max_mfn, + int prot) +{ + privcmd_m2pmfns_t m2p_mfns; + privcmd_mmap_t ioctlx; + privcmd_mmap_entry_t *entries; + unsigned long m2p_chunks, m2p_size; + unsigned long *m2p; + int i, rc; + + m2p_size = M2P_SIZE(max_mfn); + m2p_chunks = M2P_CHUNKS(max_mfn); + + + m2p_mfns.num = m2p_chunks; + + if(!(m2p_mfns.arr = malloc(m2p_chunks * sizeof(unsigned long)))) { + ERR("failed to allocate space for m2p mfns!\n"); + return NULL; + } + + if (ioctl(xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS, &m2p_mfns) < 0) { + ERR("xc_get_m2p_mfns:"); + return NULL; + } + + if((m2p = mmap(NULL, m2p_size, prot, + MAP_SHARED, xc_handle, 0)) == MAP_FAILED) { + ERR("failed to mmap m2p"); + return NULL; + } + + + if(!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { + ERR("failed to allocate space for mmap entries!\n"); + return NULL; + } + + + ioctlx.num = m2p_chunks; + ioctlx.dom = DOMID_XEN; + ioctlx.entry = entries; + + for(i=0; i < m2p_chunks; i++) { + + entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); + entries[i].mfn = m2p_mfns.arr[i]; + entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; + + } + + if((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) { + ERR("ioctl_mmap failed (rc = %d)", rc); + return NULL; + } + + free(m2p_mfns.arr); + free(entries); + + return m2p; +} + + int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags) @@ -531,16 +595,12 @@ /* A copy of the pfn-to-mfn table frame list. */ unsigned long *p2m_frame_list = NULL; - unsigned long m2p_start_mfn; - /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; /* base of the region in which domain memory is mapped */ unsigned char *region_base = NULL; - - /* power of 2 order of max_pfn */ int order_nr; @@ -563,9 +623,6 @@ max_factor = DEF_MAX_FACTOR; initialize_mbit_rate(); - - DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live ? - "true" : "false"); if(!get_platform_info(xc_handle, dom, &max_mfn, &hvirt_start, &pt_levels)) { @@ -647,11 +704,13 @@ } /* Setup the mfn_to_pfn table mapping */ - m2p_start_mfn = xc_get_m2p_start_mfn(xc_handle); - live_m2p = xc_map_foreign_range(xc_handle, DOMID_XEN, M2P_SIZE, - PROT_READ, m2p_start_mfn); - - /* Get a local copy fo the live_P2M_frame_list */ + if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { + ERR("Failed to map live M2P table"); + goto out; + } + + + /* Get a local copy of the live_P2M_frame_list */ if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { ERR("Couldn't allocate p2m_frame_list array"); goto out; @@ -662,6 +721,8 @@ for (i = 0; i < max_pfn; i += ulpp) { if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { ERR("Frame# in pfn-to-mfn frame list is not in pseudophys"); + ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, + p2m_frame_list[i/ulpp]); goto out; } } @@ -693,20 +754,14 @@ } -#if 0 - sent_last_iter = 0xFFFFFFFF; /* Pretend we sent a /lot/ last time */ -#else - sent_last_iter = 1 << 20; -#endif + /* pretend we sent all the pages last iteration */ + sent_last_iter = max_pfn; /* calculate the power of 2 order of max_pfn, e.g. 15->4 16->4 17->5 */ for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++) continue; - -#undef BITMAP_SIZE -#define BITMAP_SIZE ((1<<20)/8) /* Setup to_send / to_fix and to_skip bitmaps */ to_send = malloc(BITMAP_SIZE); @@ -922,10 +977,8 @@ /* write out pages in batch */ - if (pagetype == XTAB) { - DPRINTF("SKIP BOGUS page %i mfn %08lx\n", j, pfn_type[j]); + if (pagetype == XTAB) continue; - } pagetype &= LTABTYPE_MASK; @@ -950,10 +1003,10 @@ } /* end of the write out for this batch */ sent_this_iter += batch; - + + munmap(region_base, batch*PAGE_SIZE); + } /* end of this while loop for this iteration */ - - munmap(region_base, batch*PAGE_SIZE); skip: @@ -1027,13 +1080,9 @@ DPRINTF("All memory is saved\n"); - /* Success! */ - rc = 0; - - /* ^^^^^^ XXX SMH: hmm.. not sure that's really success! */ - /* Zero terminate */ - if (!write_exact(io_fd, &rc, sizeof(int))) { + i = 0; + if (!write_exact(io_fd, &i, sizeof(int))) { ERR("Error when writing to state file (6)"); goto out; } @@ -1043,17 +1092,17 @@ unsigned int i,j; unsigned long pfntab[1024]; - for ( i = 0, j = 0; i < max_pfn; i++ ) { - if ( ! is_mapped(live_p2m[i]) ) + for (i = 0, j = 0; i < max_pfn; i++) { + if (!is_mapped(live_p2m[i])) j++; } - + if(!write_exact(io_fd, &j, sizeof(unsigned int))) { ERR("Error when writing to state file (6a)"); goto out; } - for ( i = 0, j = 0; i < max_pfn; ) { + for (i = 0, j = 0; i < max_pfn; ) { if (!is_mapped(live_p2m[i])) pfntab[j++] = i; @@ -1097,7 +1146,10 @@ ERR("Error when writing to state file (1)"); goto out; } - + + /* Success! */ + rc = 0; + out: if (live_shinfo) @@ -1110,7 +1162,7 @@ munmap(live_p2m, P2M_SIZE); if(live_m2p) - munmap(live_m2p, M2P_SIZE); + munmap(live_m2p, M2P_SIZE(max_mfn)); free(pfn_type); free(pfn_batch); diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Wed Nov 16 19:33:12 2005 +++ b/tools/libxc/xc_private.c Wed Nov 16 19:33:23 2005 @@ -260,18 +260,6 @@ } -unsigned long xc_get_m2p_start_mfn ( int xc_handle ) -{ - unsigned long mfn; - - if ( ioctl( xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN, &mfn ) < 0 ) - { - perror("xc_get_m2p_start_mfn:"); - return 0; - } - return mfn; -} - int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Wed Nov 16 19:33:12 2005 +++ b/tools/libxc/xg_private.h Wed Nov 16 19:33:23 2005 @@ -153,8 +153,6 @@ } mfn_mapper_t; -unsigned long xc_get_m2p_start_mfn (int xc_handle); - int xc_copy_to_domain_page(int xc_handle, uint32_t domid, unsigned long dst_pfn, void *src_page); diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Wed Nov 16 19:33:12 2005 +++ b/tools/libxc/xg_save_restore.h Wed Nov 16 19:33:23 2005 @@ -3,6 +3,8 @@ ** ** Defintions and utilities for save / restore. */ + +#include "xc_private.h" #define DEBUG 1 #define PROGRESS 0 @@ -55,25 +57,24 @@ ** Returns 1 on success, 0 on failure. */ static int get_platform_info(int xc_handle, uint32_t dom, - /* OUT */ uint32_t *max_mfn, - /* OUT */ uint32_t *hvirt_start, - /* OUT */ uint32_t *pt_levels) + /* OUT */ unsigned long *max_mfn, + /* OUT */ unsigned long *hvirt_start, + /* OUT */ unsigned int *pt_levels) { xen_capabilities_info_t xen_caps = ""; xen_platform_parameters_t xen_params; - xc_physinfo_t physinfo; - if (xc_physinfo(xc_handle, &physinfo) != 0) - return 0; - + if (xc_version(xc_handle, XENVER_platform_parameters, &xen_params) != 0) return 0; if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) return 0; - *max_mfn = physinfo.total_pages; + if (xc_memory_op(xc_handle, XENMEM_maximum_ram_page, max_mfn) != 0) + return 0; + *hvirt_start = xen_params.virt_start; if (strstr(xen_caps, "xen-3.0-x86_64")) @@ -95,13 +96,22 @@ ** entry tell us whether or not the the PFN is currently mapped. */ -#define PFN_TO_KB(_pfn) ((_pfn) * PAGE_SIZE / 1024) +#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) -/* Size in bytes of the M2P and P2M (both rounded up to nearest PAGE_SIZE) */ -#define M2P_SIZE ROUNDUP((max_mfn * sizeof(unsigned long)), PAGE_SHIFT) -#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) +/* +** The M2P is made up of some number of 'chunks' of at least 2MB in size. +** The below definitions and utility function(s) deal with mapping the M2P +** regarldess of the underlying machine memory size or architecture. +*/ +#define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE +#define M2P_CHUNK_SIZE (1 << M2P_SHIFT) +#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) +#define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT) + +/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */ +#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) /* Number of unsigned longs in a page */ #define ulpp (PAGE_SIZE/sizeof(unsigned long)) diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Wed Nov 16 19:33:12 2005 +++ b/tools/python/xen/xend/XendCheckpoint.py Wed Nov 16 19:33:23 2005 @@ -129,7 +129,7 @@ l = read_exact(fd, sizeof_unsigned_long, "not a valid guest state file: pfn count read") nr_pfns = unpack("=L", l)[0] # XXX endianess - if nr_pfns > 1024*1024: # XXX + if nr_pfns > 16*1024*1024: # XXX raise XendError( "not a valid guest state file: pfn count out of range") diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Wed Nov 16 19:33:12 2005 +++ b/tools/python/xen/xend/XendDomain.py Wed Nov 16 19:33:23 2005 @@ -63,14 +63,19 @@ self.domains = {} self.domains_lock = threading.RLock() - xswatch("@releaseDomain", self.onReleaseDomain) - self.domains_lock.acquire() try: self._add_domain( XendDomainInfo.recreate(self.xen_domains()[PRIV_DOMAIN], True)) self.dom0_setup() + + # This watch registration needs to be before the refresh call, so + # that we're sure that we haven't missed any releases, but inside + # the domains_lock, as we don't want the watch to fire until after + # the refresh call has completed. + xswatch("@releaseDomain", self.onReleaseDomain) + self.refresh(True) finally: self.domains_lock.release() diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Nov 16 19:33:12 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Nov 16 19:33:23 2005 @@ -45,6 +45,8 @@ from xen.xend.xenstore.xstransact import xstransact from xen.xend.xenstore.xsutil import GetDomainPath, IntroduceDomain +from xen.xend.xenstore.xswatch import xswatch + """Shutdown code for poweroff.""" DOMAIN_POWEROFF = 0 @@ -82,7 +84,6 @@ SHUTDOWN_TIMEOUT = 30 -DOMROOT = '/local/domain/' VMROOT = '/vm/' ZOMBIE_PREFIX = 'Zombie-' @@ -100,26 +101,52 @@ #log.setLevel(logging.TRACE) -## Configuration entries that we expect to round-trip -- be read from the +## +# All parameters of VMs that may be configured on-the-fly, or at start-up. +# +VM_CONFIG_PARAMS = [ + ('name', str), + ('on_poweroff', str), + ('on_reboot', str), + ('on_crash', str), + ] + + +## +# Configuration entries that we expect to round-trip -- be read from the # config file or xc, written to save-files (i.e. through sxpr), and reused as # config on restart or restore, all without munging. Some configuration # entries are munged for backwards compatibility reasons, or because they # don't come out of xc in the same form as they are specified in the config # file, so those are handled separately. ROUNDTRIPPING_CONFIG_ENTRIES = [ - ('name', str), - ('uuid', str), - ('ssidref', int), - ('vcpus', int), - ('vcpu_avail', int), - ('cpu_weight', float), - ('memory', int), - ('maxmem', int), - ('bootloader', str), - ('on_poweroff', str), - ('on_reboot', str), - ('on_crash', str) + ('uuid', str), + ('ssidref', int), + ('vcpus', int), + ('vcpu_avail', int), + ('cpu_weight', float), + ('memory', int), + ('maxmem', int), + ('bootloader', str), ] + +ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS + + +## +# All entries written to the store. This is VM_CONFIGURATION_PARAMS, plus +# those entries written to the store that cannot be reconfigured on-the-fly. +# +VM_STORE_ENTRIES = [ + ('uuid', str), + ('ssidref', int), + ('vcpus', int), + ('vcpu_avail', int), + ('memory', int), + ('maxmem', int), + ] + +VM_STORE_ENTRIES += VM_CONFIG_PARAMS # @@ -156,6 +183,7 @@ vm.initDomain() vm.storeVmDetails() vm.storeDomDetails() + vm.registerWatch() vm.refreshShutdown() return vm except: @@ -211,6 +239,7 @@ vm.storeVmDetails() vm.storeDomDetails() + vm.registerWatch() vm.refreshShutdown(xeninfo) return vm @@ -371,12 +400,50 @@ self.console_port = None self.console_mfn = None + self.vmWatch = None + self.state = STATE_DOM_OK self.state_updated = threading.Condition() self.refresh_shutdown_lock = threading.Condition() ## private: + + def readVMDetails(self, params): + """Read from the store all of those entries that we consider + """ + try: + return self.gatherVm(*params) + except ValueError: + # One of the int/float entries in params has a corresponding store + # entry that is invalid. We recover, because older versions of + # Xend may have put the entry there (memory/target, for example), + # but this is in general a bad situation to have reached. + log.exception( + "Store corrupted at %s! Domain %d's configuration may be " + "affected.", self.vmpath, self.domid) + return [] + + + def storeChanged(self): + log.debug("XendDomainInfo.storeChanged"); + + changed = False + + def f(x, y): + if y is not None and self.info[x[0]] != y: + self.info[x[0]] = y + changed = True + + map(f, VM_CONFIG_PARAMS, self.readVMDetails(VM_CONFIG_PARAMS)) + + if changed: + # Update the domain section of the store, as this contains some + # parameters derived from the VM configuration. + self.storeDomDetails() + + return 1 + def augmentInfo(self): """Augment self.info, as given to us through {@link #recreate}, with @@ -387,30 +454,8 @@ if not self.infoIsSet(name) and val is not None: self.info[name] = val - params = (("name", str), - ("on_poweroff", str), - ("on_reboot", str), - ("on_crash", str), - ("image", str), - ("memory", int), - ("maxmem", int), - ("vcpus", int), - ("vcpu_avail", int), - ("start_time", float)) - - try: - from_store = self.gatherVm(*params) - except ValueError, exn: - # One of the int/float entries in params has a corresponding store - # entry that is invalid. We recover, because older versions of - # Xend may have put the entry there (memory/target, for example), - # but this is in general a bad situation to have reached. - log.exception( - "Store corrupted at %s! Domain %d's configuration may be " - "affected.", self.vmpath, self.domid) - return - - map(lambda x, y: useIfNeeded(x[0], y), params, from_store) + map(lambda x, y: useIfNeeded(x[0], y), VM_STORE_ENTRIES, + self.readVMDetails(VM_STORE_ENTRIES)) device = [] for c in controllerClasses: @@ -536,23 +581,24 @@ self.introduceDomain() self.storeDomDetails() + self.registerWatch() self.refreshShutdown() log.debug("XendDomainInfo.completeRestore done") def storeVmDetails(self): - to_store = { - 'uuid': self.info['uuid'] - } + to_store = {} + + for k in VM_STORE_ENTRIES: + if self.infoIsSet(k[0]): + to_store[k[0]] = str(self.info[k[0]]) if self.infoIsSet('image'): to_store['image'] = sxp.to_string(self.info['image']) - for k in ['name', 'ssidref', 'memory', 'maxmem', 'on_poweroff', - 'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail']: - if self.infoIsSet(k): - to_store[k] = str(self.info[k]) + if self.infoIsSet('start_time'): + to_store['start_time'] = str(self.info['start_time']) log.debug("Storing VM details: %s", to_store) @@ -599,13 +645,16 @@ return result - def setDomid(self, domid): - """Set the domain id. - - @param dom: domain id - """ - self.domid = domid - self.storeDom("domid", self.domid) + ## public: + + def registerWatch(self): + """Register a watch on this VM's entries in the store, so that + when they are changed externally, we keep up to date. This should + only be called by {@link #create}, {@link #recreate}, or {@link + #restore}, once the domain's details have been written, but before the + new instance is returned.""" + self.vmWatch = xswatch(self.vmpath, self.storeChanged) + def getDomid(self): return self.domid @@ -1116,6 +1165,13 @@ """Cleanup VM resources. Idempotent. Nothrow guarantee.""" try: + try: + if self.vmWatch: + self.vmWatch.unwatch() + self.vmWatch = None + except: + log.exception("Unwatching VM path failed.") + self.removeVm() except: log.exception("Removing VM path failed.") diff -r cbf6f95e9c62 -r 3f39f030fa89 tools/python/xen/xend/xenstore/xswatch.py --- a/tools/python/xen/xend/xenstore/xswatch.py Wed Nov 16 19:33:12 2005 +++ b/tools/python/xen/xend/xenstore/xswatch.py Wed Nov 16 19:33:23 2005 @@ -20,6 +20,10 @@ self.kwargs = kwargs watchStart() xs.watch(path, self) + + + def unwatch(self): + xs.unwatch(self.path, self) watchThread = None @@ -49,7 +53,7 @@ watch = we[1] res = watch.fn(*watch.args, **watch.kwargs) if not res: - xs.unwatch(watch.path, watch) + watch.unwatch() except: log.exception("read_watch failed") # Ignore this exception -- there's no point throwing it diff -r cbf6f95e9c62 -r 3f39f030fa89 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Nov 16 19:33:12 2005 +++ b/xen/arch/x86/mm.c Wed Nov 16 19:33:23 2005 @@ -898,6 +898,7 @@ return 1; fail: + MEM_LOG("Failure in alloc_l3_table: entry %d", i); while ( i-- > 0 ) if ( is_guest_l3_slot(i) ) put_page_from_l3e(pl3e[i], pfn); @@ -948,6 +949,7 @@ return 1; fail: + MEM_LOG("Failure in alloc_l4_table: entry %d", i); while ( i-- > 0 ) if ( is_guest_l4_slot(i) ) put_page_from_l4e(pl4e[i], pfn); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |