[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: map M2P table sparsely
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1253603956 -3600 # Node ID 51152e4f995f383eccc7c686afc3ab67d626327d # Parent d9b50ae2bf18fdeae57159bc56d414057ade502a x86: map M2P table sparsely Avoid backing M2P table holes with memory, when those holes are large enough to cover an exact multiple of large pages. For the sake of saving and migrating guests, XENMEM_machphys_mfn_list fills the holes in the array it returns with the MFN for the previous range returned (thanks to Keir pointing out that it really doesn't matter *what* MFN gets returned for invalid ranges). Using the most recently encountered MFN (rather than e.g. always the first one) represents an attempt to cut down on the number of references these pages will get when they get mapped into a privileged domain's address space. This also allows for saving a couple of 2M pages even on certain "normal" systems. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> --- xen/arch/x86/x86_32/mm.c | 47 ++++++++++---- xen/arch/x86/x86_64/compat/mm.c | 23 +++++-- xen/arch/x86/x86_64/mm.c | 130 +++++++++++++++++++++++++++------------- 3 files changed, 141 insertions(+), 59 deletions(-) diff -r d9b50ae2bf18 -r 51152e4f995f xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Tue Sep 22 08:18:19 2009 +0100 +++ b/xen/arch/x86/x86_32/mm.c Tue Sep 22 08:19:16 2009 +0100 @@ -72,7 +72,7 @@ void __init paging_init(void) { unsigned long v; struct page_info *pg; - int i; + unsigned int i, n; if ( cpu_has_pge ) { @@ -96,8 +96,18 @@ void __init paging_init(void) */ mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long)) +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ + sizeof(*machine_to_phys_mapping)) + BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ + sizeof(*machine_to_phys_mapping)); for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + continue; if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL ) panic("Not enough memory to bootstrap Xen.\n"); l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i], @@ -106,11 +116,12 @@ void __init paging_init(void) l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i], l2e_from_page( pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW)); - } - - /* Fill with an obvious debug pattern. */ - for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++) - set_gpfn_from_mfn(i, 0x55555555); + /* Fill with an obvious debug pattern. */ + memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55, + 1UL << L2_PAGETABLE_SHIFT); + } +#undef CNT +#undef MFN /* Create page tables for ioremap()/map_domain_page_global(). */ for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) @@ -163,14 +174,17 @@ void __init subarch_init_memory(void) { unsigned long m2p_start_mfn; unsigned int i, j; + l2_pgentry_t l2e; BUILD_BUG_ON(sizeof(struct page_info) != 24); /* M2P table is mappable read-only by privileged domains. */ for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { - m2p_start_mfn = l2e_get_pfn( - idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]); + l2e = idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]; + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + continue; + m2p_start_mfn = l2e_get_pfn(l2e); for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ ) { struct page_info *page = mfn_to_page(m2p_start_mfn + j); @@ -191,8 +205,9 @@ long subarch_memory_op(int op, XEN_GUEST long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) { struct xen_machphys_mfn_list xmml; - unsigned long mfn; + unsigned long mfn, last_mfn; unsigned int i, max; + l2_pgentry_t l2e; long rc = 0; switch ( op ) @@ -203,12 +218,18 @@ long subarch_memory_op(int op, XEN_GUEST max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21); - for ( i = 0; i < max; i++ ) - { - mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset( - RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21); + for ( i = 0, last_mfn = 0; i < max; i++ ) + { + l2e = idle_pg_table_l2[l2_linear_offset( + RDWR_MPT_VIRT_START + (i << 21))]; + if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) + mfn = l2e_get_pfn(l2e); + else + mfn = last_mfn; + ASSERT(mfn); if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) ) return -EFAULT; + last_mfn = mfn; } xmml.nr_extents = i; diff -r d9b50ae2bf18 -r 51152e4f995f xen/arch/x86/x86_64/compat/mm.c --- a/xen/arch/x86/x86_64/compat/mm.c Tue Sep 22 08:18:19 2009 +0100 +++ b/xen/arch/x86/x86_64/compat/mm.c Tue Sep 22 08:19:16 2009 +0100 @@ -153,19 +153,31 @@ int compat_arch_memory_op(int op, XEN_GU } case XENMEM_machphys_mfn_list: + { + unsigned long limit; + compat_pfn_t last_mfn; + if ( copy_from_guest(&xmml, arg, 1) ) return -EFAULT; - for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START; - (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END); + limit = (unsigned long)(compat_machine_to_phys_mapping + + min_t(unsigned long, max_page, + MACH2PHYS_COMPAT_NR_ENTRIES(current->domain))); + if ( limit > RDWR_COMPAT_MPT_VIRT_END ) + limit = RDWR_COMPAT_MPT_VIRT_END; + for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START, last_mfn = 0; + (i != xmml.max_extents) && (v < limit); i++, v += 1 << L2_PAGETABLE_SHIFT ) { l2e = compat_idle_pg_table_l2[l2_table_offset(v)]; - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - break; - mfn = l2e_get_pfn(l2e) + l1_table_offset(v); + if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) + mfn = l2e_get_pfn(l2e); + else + mfn = last_mfn; + ASSERT(mfn); if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) ) return -EFAULT; + last_mfn = mfn; } xmml.nr_extents = i; @@ -173,6 +185,7 @@ int compat_arch_memory_op(int op, XEN_GU rc = -EFAULT; break; + } default: rc = -ENOSYS; diff -r d9b50ae2bf18 -r 51152e4f995f xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Tue Sep 22 08:18:19 2009 +0100 +++ b/xen/arch/x86/x86_64/mm.c Tue Sep 22 08:19:16 2009 +0100 @@ -194,7 +194,7 @@ void __init paging_init(void) void __init paging_init(void) { unsigned long i, mpt_size, va; - unsigned int memflags; + unsigned int n, memflags; l3_pgentry_t *l3_ro_mpt; l2_pgentry_t *l2_ro_mpt = NULL; struct page_info *l1_pg, *l2_pg, *l3_pg; @@ -213,6 +213,11 @@ void __init paging_init(void) */ mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long)) +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ + sizeof(*machine_to_phys_mapping)) + BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ + sizeof(*machine_to_phys_mapping)); for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); @@ -222,37 +227,63 @@ void __init paging_init(void) if ( cpu_has_page1gb && !((unsigned long)l2_ro_mpt & ~PAGE_MASK) && - (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) && - (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER, - memflags)) != NULL ) + (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) ) + { + unsigned int k, holes; + + for ( holes = k = 0; k < 1 << PAGETABLE_ORDER; ++k) + { + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i + k) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + ++holes; + } + if ( k == holes ) + { + i += (1UL << PAGETABLE_ORDER) - 1; + continue; + } + if ( holes == 0 && + (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER, + memflags)) != NULL ) + { + map_pages_to_xen( + RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), + page_to_mfn(l1_pg), + 1UL << (2 * PAGETABLE_ORDER), + PAGE_HYPERVISOR); + memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), + 0x77, 1UL << L3_PAGETABLE_SHIFT); + + ASSERT(!l2_table_offset(va)); + /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ + l3e_write(&l3_ro_mpt[l3_table_offset(va)], + l3e_from_page(l1_pg, + /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); + i += (1UL << PAGETABLE_ORDER) - 1; + continue; + } + } + + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + l1_pg = NULL; + else if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, + memflags)) == NULL ) + goto nomem; + else { map_pages_to_xen( RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), page_to_mfn(l1_pg), - 1UL << (2 * PAGETABLE_ORDER), + 1UL << PAGETABLE_ORDER, PAGE_HYPERVISOR); memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), - 0x77, 1UL << L3_PAGETABLE_SHIFT); - - ASSERT(!l2_table_offset(va)); - /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - l3e_write(&l3_ro_mpt[l3_table_offset(va)], - l3e_from_page(l1_pg, - /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); - i += (1UL << PAGETABLE_ORDER) - 1; - continue; - } - - if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, - memflags)) == NULL ) - goto nomem; - map_pages_to_xen( - RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), - page_to_mfn(l1_pg), - 1UL << PAGETABLE_ORDER, - PAGE_HYPERVISOR); - memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55, - 1UL << L2_PAGETABLE_SHIFT); + 0x55, 1UL << L2_PAGETABLE_SHIFT); + } if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) ) { if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL ) @@ -264,10 +295,13 @@ void __init paging_init(void) ASSERT(!l2_table_offset(va)); } /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - l2e_write(l2_ro_mpt, l2e_from_page( - l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); + if ( l1_pg ) + l2e_write(l2_ro_mpt, l2e_from_page( + l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); l2_ro_mpt++; } +#undef CNT +#undef MFN /* Create user-accessible L2 directory to map the MPT for compat guests. */ BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) != @@ -288,12 +322,22 @@ void __init paging_init(void) mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END ) m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size; - for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int)) +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ + sizeof(*compat_machine_to_phys_mapping)) + BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ + sizeof(*compat_machine_to_phys_mapping)); + for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ ) { memflags = MEMF_node(phys_to_nid(i << (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT))); + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + continue; if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, - memflags)) == NULL ) + memflags)) == NULL ) goto nomem; map_pages_to_xen( RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), @@ -306,8 +350,9 @@ void __init paging_init(void) 1UL << L2_PAGETABLE_SHIFT); /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */ l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT)); - l2_ro_mpt++; - } + } +#undef CNT +#undef MFN /* Set up linear page table mapping. */ l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)], @@ -428,7 +473,7 @@ long subarch_memory_op(int op, XEN_GUEST l3_pgentry_t l3e; l2_pgentry_t l2e; unsigned long v; - xen_pfn_t mfn; + xen_pfn_t mfn, last_mfn; unsigned int i; long rc = 0; @@ -440,29 +485,32 @@ long subarch_memory_op(int op, XEN_GUEST BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1)); - for ( i = 0, v = RDWR_MPT_VIRT_START; - (i != xmml.max_extents) && (v != RDWR_MPT_VIRT_END); + for ( i = 0, v = RDWR_MPT_VIRT_START, last_mfn = 0; + (i != xmml.max_extents) && + (v < (unsigned long)(machine_to_phys_mapping + max_page)); i++, v += 1UL << L2_PAGETABLE_SHIFT ) { l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ l3_table_offset(v)]; if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) - break; - if ( !(l3e_get_flags(l3e) & _PAGE_PSE) ) + mfn = last_mfn; + else if ( !(l3e_get_flags(l3e) & _PAGE_PSE) ) { l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - break; - mfn = l2e_get_pfn(l2e); + if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) + mfn = l2e_get_pfn(l2e); + else + mfn = last_mfn; } else { mfn = l3e_get_pfn(l3e) + (l2_table_offset(v) << PAGETABLE_ORDER); } - ASSERT(!l1_table_offset(v)); + ASSERT(mfn); if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) ) return -EFAULT; + last_mfn = mfn; } xmml.nr_extents = i; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |