Xen project Mailing List

[Xen-changelog] [xen-4.0-testing] xc_domain_restore: Revert 20126:442bc6f82cf3, 19639:205b1badbcfd

From: "Xen patchbot-4.0-testing" <patchbot-4.0-testing@xxxxxxxxxxxxxxxxxxx>

Date: Thu, 10 Jun 2010 02:25:45 -0700

Delivery-date: Thu, 10 Jun 2010 02:29:05 -0700

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1276160396 -3600 # Node ID a684fbef232d306b8d13ed54be891935d8cb1045 # Parent d6466f49c0a3f04478a0294e52b1941547f5d0ce xc_domain_restore: Revert 20126:442bc6f82cf3, 19639:205b1badbcfd This disables superpage restore support, but should gain us acceptable performance when restoring a domain using a pv_ops dom0 kernel. This is because single-page allocations will be batched rather than issued singly. Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> xen-unstable changeset: 21588:6c3d8aec202d xen-unstable date: Thu Jun 10 09:35:16 2010 +0100 --- tools/libxc/xc_domain_restore.c | 548 ++++++++-------------------------------- 1 files changed, 120 insertions(+), 428 deletions(-) diff -r d6466f49c0a3 -r a684fbef232d tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Thu Jun 10 09:51:16 2010 +0100 +++ b/tools/libxc/xc_domain_restore.c Thu Jun 10 09:59:56 2010 +0100 @@ -39,407 +39,9 @@ struct restore_ctx { unsigned long nr_pfns; /* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its current MFN. */ xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */ - unsigned no_superpage_mem; /* If have enough continuous memory for super page allocation */ + xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region. */ struct domain_info_context dinfo; }; - -/* -** -** -*/ -#define SUPERPAGE_PFN_SHIFT 9 -#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT) - -/* - * Setting bit 31 force to allocate super page even not all pfns come out, - * bit 30 indicate that not is in a super page tracking. - */ -#define FORCE_SP_SHIFT 31 -#define FORCE_SP_MASK (1UL << FORCE_SP_SHIFT) - -#define INVALID_SUPER_PAGE ((1UL << 30) + 1) -#define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 ) -#define SUPER_PAGE_TRACKING(pfn) ( (pfn) != INVALID_SUPER_PAGE ) -#define SUPER_PAGE_DONE(pfn) ( SUPER_PAGE_START(pfn) ) - -static int super_page_populated(struct restore_ctx *ctx, unsigned long pfn) -{ - int i; - pfn &= ~(SUPERPAGE_NR_PFNS - 1); - for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++ ) - { - if ( ctx->p2m[i] != INVALID_P2M_ENTRY ) - return 1; - } - return 0; -} - -/* - * Break a 2M page and move contents of [extent start, next_pfn-1] to - * some new allocated 4K pages - */ -static int break_super_page(int xc_handle, - uint32_t dom, - struct restore_ctx *ctx, - xen_pfn_t next_pfn) -{ - xen_pfn_t *page_array, start_pfn, mfn; - uint8_t *ram_base, *save_buf; - unsigned long i; - int tot_pfns, rc = 0; - - tot_pfns = (next_pfn & (SUPERPAGE_NR_PFNS - 1)); - - start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1); - for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ ) - { - /* check the 2M page are populated */ - if ( ctx->p2m[i] == INVALID_P2M_ENTRY ) { - DPRINTF("Previous super page was populated wrongly!\n"); - return 1; - } - } - - page_array = (xen_pfn_t*)malloc(tot_pfns * sizeof(xen_pfn_t)); - save_buf = (uint8_t*)malloc(tot_pfns * PAGE_SIZE); - - if ( !page_array || !save_buf ) - { - ERROR("alloc page_array failed\n"); - errno = ENOMEM; - rc = 1; - goto out; - } - - /* save previous super page contents */ - for ( i = 0; i < tot_pfns; i++ ) - { - /* only support HVM, as the mfn of the 2M page is missing */ - page_array[i] = start_pfn + i; - } - - ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_READ, - page_array, tot_pfns); - - if ( ram_base == NULL ) - { - ERROR("map batch failed\n"); - rc = 1; - goto out; - } - - memcpy(save_buf, ram_base, tot_pfns * PAGE_SIZE); - munmap(ram_base, tot_pfns * PAGE_SIZE); - - /* free the super page */ - if ( xc_domain_memory_decrease_reservation(xc_handle, dom, 1, - SUPERPAGE_PFN_SHIFT, &start_pfn) != 0 ) - { - ERROR("free 2M page failure @ 0x%ld.\n", next_pfn); - rc = 1; - goto out; - } - - start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1); - for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ ) - { - ctx->p2m[i] = INVALID_P2M_ENTRY; - } - - for ( i = start_pfn; i < start_pfn + tot_pfns; i++ ) - { - mfn = i; - if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, - 0, &mfn) != 0) - { - ERROR("Failed to allocate physical memory.!\n"); - errno = ENOMEM; - rc = 1; - goto out; - } - ctx->p2m[i] = mfn; - } - - /* restore contents */ - for ( i = 0; i < tot_pfns; i++ ) - { - page_array[i] = start_pfn + i; - } - - ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_WRITE, - page_array, tot_pfns); - if ( ram_base == NULL ) - { - ERROR("map batch failed\n"); - rc = 1; - goto out; - } - - memcpy(ram_base, save_buf, tot_pfns * PAGE_SIZE); - munmap(ram_base, tot_pfns * PAGE_SIZE); - -out: - free(page_array); - free(save_buf); - return rc; -} - - -/* - * According to pfn list allocate pages: one 2M page or series of 4K pages. - * Also optimistically allocate a 2M page even when not all pages in the 2M - * extent come out, and fix it up in next batch: - * If new pages fit the missing one in the 2M extent, do nothing; Else take - * place of the original 2M page by some 4K pages. - */ -static int allocate_mfn_list(int xc_handle, - uint32_t dom, - struct restore_ctx *ctx, - unsigned long nr_extents, - xen_pfn_t *batch_buf, - xen_pfn_t *next_pfn, - int superpages) -{ - unsigned int i; - unsigned long mfn, pfn, sp_pfn; - - /*Check if force super page, then clear it */ - unsigned force_super_page = !!(*next_pfn & FORCE_SP_MASK); - *next_pfn &= ~FORCE_SP_MASK; - - sp_pfn = *next_pfn; - - if ( !superpages || - ctx->no_superpage_mem || - !SUPER_PAGE_TRACKING(sp_pfn) ) - goto normal_page; - - if ( !batch_buf ) - { - /* Break previous 2M page, if 512 pages split across a batch boundary */ - if ( SUPER_PAGE_TRACKING(sp_pfn) && - !SUPER_PAGE_DONE(sp_pfn)) - { - /* break previously allocated super page*/ - if ( break_super_page(xc_handle, dom, ctx, sp_pfn) != 0 ) - { - ERROR("Break previous super page fail!\n"); - return 1; - } - } - - /* follwing pages fit the order in 2M extent */ - return 0; - } - - /* - * We try to allocate a 2M page only when: - * user require this(superpages), - * AND have enough memory, - * AND is in the tracking, - * AND tracked all pages in 2M extent, OR partial 2M extent for speculation - * AND any page in 2M extent are not populated - */ - if ( !SUPER_PAGE_DONE(sp_pfn) && !force_super_page ) - goto normal_page; - - pfn = batch_buf[0] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - if ( super_page_populated(ctx, pfn) ) - goto normal_page; - - pfn &= ~(SUPERPAGE_NR_PFNS - 1); - mfn = pfn; - - if ( xc_domain_memory_populate_physmap(xc_handle, dom, 1, - SUPERPAGE_PFN_SHIFT, 0, &mfn) == 0) - { - for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++, mfn++ ) - { - ctx->p2m[i] = mfn; - } - return 0; - } - DPRINTF("No 2M page available for pfn 0x%lx, fall back to 4K page.\n", - pfn); - ctx->no_superpage_mem = 1; - -normal_page: - if ( !batch_buf ) - return 0; - - /* End the tracking, if want a 2M page but end by 4K pages, */ - *next_pfn = INVALID_SUPER_PAGE; - - for ( i = 0; i < nr_extents; i++ ) - { - unsigned long pagetype = batch_buf[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) - continue; - - pfn = mfn = batch_buf[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY ) - { - if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, - 0, &mfn) != 0) - { - ERROR("Failed to allocate physical memory.! pfn=0x%lx, mfn=0x%lx.\n", - pfn, mfn); - errno = ENOMEM; - return 1; - } - ctx->p2m[pfn] = mfn; - } - } - - return 0; -} - -static int allocate_physmem(int xc_handle, uint32_t dom, - struct restore_ctx *ctx, - unsigned long *region_pfn_type, int region_size, - unsigned int hvm, xen_pfn_t *region_mfn, int superpages) -{ - int i; - unsigned long pfn; - unsigned long pagetype; - - /* Next expected pfn in order to track a possible 2M page */ - static unsigned long required_pfn = INVALID_SUPER_PAGE; - - /* Buffer of pfn list for 2M page, or series of 4K pages */ - xen_pfn_t *batch_buf; - unsigned int batch_buf_len; - struct domain_info_context *dinfo = &ctx->dinfo; - - if ( !superpages ) - { - batch_buf = &region_pfn_type[0]; - batch_buf_len = region_size; - goto alloc_page; - } - - batch_buf = NULL; - batch_buf_len = 0; - /* This loop tracks the possible 2M page */ - for (i = 0; i < region_size; i++) - { - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if (pagetype == XEN_DOMCTL_PFINFO_XTAB) - { - /* Do not start collecting pfns until get a valid pfn */ - if ( batch_buf_len != 0 ) - batch_buf_len++; - continue; - } - - if ( SUPER_PAGE_START(pfn) ) - { - /* Start of a 2M extent, populate previsous buf */ - if ( allocate_mfn_list(xc_handle, dom, ctx, - batch_buf_len, batch_buf, - &required_pfn, superpages) != 0 ) - { - errno = ENOMEM; - return 1; - } - - /* start new tracking for 2M page */ - batch_buf = &region_pfn_type[i]; - batch_buf_len = 1; - required_pfn = pfn + 1; - } - else if ( pfn == required_pfn ) - { - /* this page fit the 2M extent in order */ - batch_buf_len++; - required_pfn++; - } - else if ( SUPER_PAGE_TRACKING(required_pfn) ) - { - /* break of a 2M extent, populate previous buf */ - if ( allocate_mfn_list(xc_handle, dom, ctx, - batch_buf_len, batch_buf, - &required_pfn, superpages) != 0 ) - { - errno = ENOMEM; - return 1; - } - /* start new tracking for a series of 4K pages */ - batch_buf = &region_pfn_type[i]; - batch_buf_len = 1; - required_pfn = INVALID_SUPER_PAGE; - } - else - { - /* this page is 4K */ - if ( !batch_buf ) - batch_buf = &region_pfn_type[i]; - batch_buf_len++; - } - } - - /* - * populate rest batch_buf in the end. - * In a speculative way, we allocate a 2M page even when not see all the - * pages in order(set bit 31). If not require super page support, - * we can skip the tracking loop and come here directly. - * Speculative allocation can't be used for PV guest, as we have no mfn to - * map previous 2M mem range if need break it. - */ - if ( SUPER_PAGE_TRACKING(required_pfn) && - !SUPER_PAGE_DONE(required_pfn) ) - { - if (hvm) - required_pfn |= FORCE_SP_MASK; - else - required_pfn = INVALID_SUPER_PAGE; - } - -alloc_page: - if ( batch_buf ) - { - if ( allocate_mfn_list(xc_handle, dom, ctx, - batch_buf_len, batch_buf, - &required_pfn, - superpages) != 0 ) - { - errno = ENOMEM; - return 1; - } - } - - for (i = 0; i < region_size; i++) - { - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if ( pfn > dinfo->p2m_size ) - { - ERROR("pfn out of range"); - return 1; - } - if (pagetype == XEN_DOMCTL_PFINFO_XTAB) - { - region_mfn[i] = ~0UL; - } - else - { - if (ctx->p2m[pfn] == INVALID_P2M_ENTRY) - { - DPRINTF("Warning: pfn 0x%lx are not allocated!\n", pfn); - /*XXX:allocate this page?*/ - } - - /* setup region_mfn[] for batch map. - * For HVM guests, this interface takes PFNs, not MFNs */ - region_mfn[i] = hvm ? pfn : ctx->p2m[pfn]; - } - } - return 0; -} - /* set when a consistent image is available */ static int completed = 0; @@ -493,16 +95,59 @@ static ssize_t read_exact_timed(int fd, ** This function inverts that operation, replacing the pfn values with ** the (now known) appropriate mfn values. */ -static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, struct restore_ctx *ctx, - void *page, int superpages) -{ - int i, pte_last; +static int uncanonicalize_pagetable( + int xc_handle, uint32_t dom, struct restore_ctx *ctx, void *page) +{ + int i, pte_last, nr_mfns = 0; unsigned long pfn; uint64_t pte; struct domain_info_context *dinfo = &ctx->dinfo; pte_last = PAGE_SIZE / ((ctx->pt_levels == 2)? 4 : 8); + /* First pass: work out how many (if any) MFNs we need to alloc */ + for ( i = 0; i < pte_last; i++ ) + { + if ( ctx->pt_levels == 2 ) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if ( !(pte & _PAGE_PRESENT) ) + continue; + + pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; + + if ( pfn >= dinfo->p2m_size ) + { + /* This "page table page" is probably not one; bail. */ + ERROR("Frame number in page table is out of range: " + "i=%d pfn=0x%lx p2m_size=%lu", + i, pfn, dinfo->p2m_size); + return 0; + } + + if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY ) + { + /* Have a 'valid' PFN without a matching MFN - need to alloc */ + ctx->p2m_batch[nr_mfns++] = pfn; + ctx->p2m[pfn]--; + } + } + + /* Allocate the requisite number of mfns. */ + if ( nr_mfns && + (xc_domain_memory_populate_physmap(xc_handle, dom, nr_mfns, 0, 0, + ctx->p2m_batch) != 0) ) + { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + return 0; + } + + /* Second pass: uncanonicalize each present PTE */ + nr_mfns = 0; for ( i = 0; i < pte_last; i++ ) { if ( ctx->pt_levels == 2 ) @@ -516,14 +161,9 @@ static int uncanonicalize_pagetable(int pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; - /* Allocate mfn if necessary */ - if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY ) - { - unsigned long force_pfn = superpages ? FORCE_SP_MASK : pfn; - if (allocate_mfn_list(xc_handle, dom, ctx, - 1, &pfn, &force_pfn, superpages) != 0) - return 0; - } + if ( ctx->p2m[pfn] == (INVALID_P2M_ENTRY-1) ) + ctx->p2m[pfn] = ctx->p2m_batch[nr_mfns++]; + pte &= ~MADDR_MASK_X86; pte |= (uint64_t)ctx->p2m[pfn] << PAGE_SHIFT; @@ -1160,9 +800,9 @@ static int apply_batch(int xc_handle, ui static int apply_batch(int xc_handle, uint32_t dom, struct restore_ctx *ctx, xen_pfn_t* region_mfn, unsigned long* pfn_type, int pae_extended_cr3, unsigned int hvm, struct xc_mmu* mmu, - pagebuf_t* pagebuf, int curbatch, int superpages) -{ - int i, j, curpage; + pagebuf_t* pagebuf, int curbatch) +{ + int i, j, curpage, nr_mfns; /* used by debug verify code */ unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; /* Our mapping of the current region (batch) */ @@ -1180,11 +820,56 @@ static int apply_batch(int xc_handle, ui if (j > MAX_BATCH_SIZE) j = MAX_BATCH_SIZE; - if (allocate_physmem(xc_handle, dom, ctx, &pagebuf->pfn_types[curbatch], - j, hvm, region_mfn, superpages) != 0) - { - ERROR("allocate_physmem() failed\n"); - return -1; + /* First pass for this batch: work out how much memory to alloc */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = pagebuf->pfn_types[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = pagebuf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && + (ctx->p2m[pfn] == INVALID_P2M_ENTRY) ) + { + /* Have a live PFN which hasn't had an MFN allocated */ + ctx->p2m_batch[nr_mfns++] = pfn; + ctx->p2m[pfn]--; + } + } + + /* Now allocate a bunch of mfns for this batch */ + if ( nr_mfns && + (xc_domain_memory_populate_physmap(xc_handle, dom, nr_mfns, 0, + 0, ctx->p2m_batch) != 0) ) + { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + return -1; + } + + /* Second pass for this batch: update p2m[] and region_mfn[] */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = pagebuf->pfn_types[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = pagebuf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) + region_mfn[i] = ~0UL; /* map will fail but we don't care */ + else + { + if ( ctx->p2m[pfn] == (INVALID_P2M_ENTRY-1) ) + { + /* We just allocated a new mfn above; update p2m */ + ctx->p2m[pfn] = ctx->p2m_batch[nr_mfns++]; + ctx->nr_pfns++; + } + + /* setup region_mfn[] for batch map. + * For HVM guests, this interface takes PFNs, not MFNs */ + region_mfn[i] = hvm ? pfn : ctx->p2m[pfn]; + } } /* Map relevant mfns */ @@ -1249,8 +934,7 @@ static int apply_batch(int xc_handle, ui pae_extended_cr3 || (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { - if (!uncanonicalize_pagetable(xc_handle, dom, ctx, - page, superpages)) { + if (!uncanonicalize_pagetable(xc_handle, dom, ctx, page)) { /* ** Failing to uncanonicalize a page table can be ok ** under live migration since the pages type may have @@ -1363,7 +1047,6 @@ int xc_domain_restore(int xc_handle, int static struct restore_ctx _ctx = { .live_p2m = NULL, .p2m = NULL, - .no_superpage_mem = 0, }; static struct restore_ctx *ctx = &_ctx; struct domain_info_context *dinfo = &ctx->dinfo; @@ -1375,9 +1058,8 @@ int xc_domain_restore(int xc_handle, int /* For info only */ ctx->nr_pfns = 0; - /* Always try to allocate 2M pages for HVM */ - if ( hvm ) - superpages = 1; + if ( superpages ) + return 1; if ( read_exact(io_fd, &dinfo->p2m_size, sizeof(unsigned long)) ) { @@ -1426,9 +1108,11 @@ int xc_domain_restore(int xc_handle, int region_mfn = xc_memalign(PAGE_SIZE, ROUNDUP( MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); + ctx->p2m_batch = xc_memalign( + PAGE_SIZE, ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); if ( (ctx->p2m == NULL) || (pfn_type == NULL) || - (region_mfn == NULL) ) + (region_mfn == NULL) || (ctx->p2m_batch == NULL) ) { ERROR("memory alloc failed"); errno = ENOMEM; @@ -1437,10 +1121,18 @@ int xc_domain_restore(int xc_handle, int memset(region_mfn, 0, ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); + memset(ctx->p2m_batch, 0, + ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); if ( lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE) ) { ERROR("Could not lock region_mfn"); + goto out; + } + + if ( lock_pages(ctx->p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE) ) + { + ERROR("Could not lock p2m_batch"); goto out; } @@ -1517,7 +1209,7 @@ int xc_domain_restore(int xc_handle, int int brc; brc = apply_batch(xc_handle, dom, ctx, region_mfn, pfn_type, - pae_extended_cr3, hvm, mmu, &pagebuf, curbatch, superpages); + pae_extended_cr3, hvm, mmu, &pagebuf, curbatch); if ( brc < 0 ) goto out; @@ -1681,7 +1373,7 @@ int xc_domain_restore(int xc_handle, int { if ( !uncanonicalize_pagetable( xc_handle, dom, ctx, - region_base + k*PAGE_SIZE, superpages) ) + region_base + k*PAGE_SIZE) ) { ERROR("failed uncanonicalize pt!"); goto out; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.