[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: Speed up PV-guest superpage mapping
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1274947486 -3600 # Node ID 26a0942a9c180c38a91aa223cfd3e6766a93c1fa # Parent 7fb801ac0a5951115d0da82e73f8681b1ae71b3a x86: Speed up PV-guest superpage mapping The current version of superpage mapping takes a PGT_writable reference to every page in a superpage each time it is mapped. This is extremely slow, so slow that applications become unusable. My solution for this is to introduce a superpage table in the hypervisor, similar to the frametable structure for pages. Currently this table only has a type_info element. There are three types a superpage can have, SGT_mark, SGT_dynamic, or SGT_none. In normal operation, the first time a superpage is mapped, a PGT_writable reference is taken to each page in the superpage, and the superpage is set to type SGT_dynamic and the superpage typecount is incremented. On subsequent mappings and unmappings, only the superpage typecount changes. On the last unmap, the PGT_writable reference on each page is removed. The SGT_mark type is set and cleared through two new MMUEXT hypercalls, mark_super and unmark_super. When the hypercall is made, the superpage's type is set to SGT_mark and a PGT_writable reference is taken to its pages. On unmark, the type is cleared and the reference removed. If a page is already set to SGT_dynamic when mark_super is called, the type is changed to SGT_mark and no additional PGT_writable reference is taken. If there are still outstanding mappings of this superpage when unmark_super is called, the type is set to SGT_dynamic and the PGT_writable reference is not removed. Fast superpage mapping is only supported on 64 bit hypervisors. For 32 bit hyperviors, superpage mapping is supported but will be extremely slow. Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx> --- xen/arch/x86/domain.c | 2 xen/arch/x86/mm.c | 360 ++++++++++++++++++++++++++++++++------ xen/include/asm-x86/config.h | 5 xen/include/asm-x86/guest_pt.h | 2 xen/include/asm-x86/mm.h | 24 ++ xen/include/asm-x86/page.h | 10 + xen/include/asm-x86/x86_32/page.h | 5 xen/include/asm-x86/x86_64/page.h | 14 + 8 files changed, 370 insertions(+), 52 deletions(-) diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu May 27 08:34:44 2010 +0100 +++ b/xen/arch/x86/domain.c Thu May 27 09:04:46 2010 +0100 @@ -1739,6 +1739,8 @@ static int relinquish_memory( BUG(); } + clear_superpage_mark(page); + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu May 27 08:34:44 2010 +0100 +++ b/xen/arch/x86/mm.c Thu May 27 09:04:46 2010 +0100 @@ -151,8 +151,11 @@ unsigned long __read_mostly pdx_group_va #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) -int opt_allow_hugepage; -boolean_param("allowhugepage", opt_allow_hugepage); +int opt_allow_superpage; +boolean_param("allowsuperpage", opt_allow_superpage); + +static int get_superpage(unsigned long mfn, struct domain *d); +static void put_superpage(unsigned long mfn); #define l1_disallow_mask(d) \ ((d != dom_io) && \ @@ -169,6 +172,30 @@ l2_pgentry_t *compat_idle_pg_table_l2 = COMPAT_L3_DISALLOW_MASK) #else #define l3_disallow_mask(d) L3_DISALLOW_MASK +#endif + +#ifdef __x86_64__ +static void __init init_spagetable(void) +{ + unsigned long s, start = SPAGETABLE_VIRT_START; + unsigned long end = SPAGETABLE_VIRT_END; + unsigned long step, mfn; + unsigned int max_entries; + + step = 1UL << PAGETABLE_ORDER; + max_entries = (max_pdx + ((1UL<<SUPERPAGE_ORDER)-1)) >> SUPERPAGE_ORDER; + end = start + (((max_entries * sizeof(*spage_table)) + + ((1UL<<SUPERPAGE_SHIFT)-1)) & (~((1UL<<SUPERPAGE_SHIFT)-1))); + + for (s = start; s < end; s += step << PAGE_SHIFT) + { + mfn = alloc_boot_pages(step, step); + if ( !mfn ) + panic("Not enough memory for spage table"); + map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR); + } + memset((void *)start, 0, end - start); +} #endif static void __init init_frametable_chunk(void *start, void *end) @@ -232,6 +259,10 @@ void __init init_frametable(void) (unsigned long)pdx_to_page(max_idx * PDX_GROUP_COUNT) - (unsigned long)pdx_to_page(max_pdx)); } +#ifdef __x86_64__ + if (opt_allow_superpage) + init_spagetable(); +#endif } void __init arch_init_memory(void) @@ -652,19 +683,7 @@ static int get_page_and_type_from_pagenr return rc; } -static int get_data_page( - struct page_info *page, struct domain *d, int writeable) -{ - int rc; - - if ( writeable ) - rc = get_page_and_type(page, d, PGT_writable_page); - else - rc = get_page(page, d); - - return rc; -} - +#ifdef __x86_64__ static void put_data_page( struct page_info *page, int writeable) { @@ -673,6 +692,7 @@ static void put_data_page( else put_page(page); } +#endif /* * We allow root tables to map each other (a.k.a. linear page tables). It @@ -887,30 +907,23 @@ get_page_from_l2e( rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0); if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) rc = 0; - } - else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) ) - { - rc = -EINVAL; - } - else - { - unsigned long m = mfn; - int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW); - - do { - if ( !mfn_valid(m) || - !get_data_page(mfn_to_page(m), d, writeable) ) - { - while ( m-- > mfn ) - put_data_page(mfn_to_page(m), writeable); - return -EINVAL; - } - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); - - rc = 1; - } - - return rc; + return rc; + } + + if ( !opt_allow_superpage ) + { + MEM_LOG("Attempt to map superpage without allowsuperpage " + "flag in hypervisor"); + return -EINVAL; + } + + if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) + { + MEM_LOG("Unaligned superpage map attempt mfn %lx", mfn); + return -EINVAL; + } + + return get_superpage(mfn, d); } @@ -1100,19 +1113,9 @@ static int put_page_from_l2e(l2_pgentry_ return 1; if ( l2e_get_flags(l2e) & _PAGE_PSE ) - { - unsigned long mfn = l2e_get_pfn(l2e), m = mfn; - int writeable = l2e_get_flags(l2e) & _PAGE_RW; - - ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1))); - do { - put_data_page(mfn_to_page(m), writeable); - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); - } + put_superpage(l2e_get_pfn(l2e)); else - { put_page_and_type(l2e_get_page(l2e)); - } return 0; } @@ -2445,6 +2448,209 @@ int get_page_type_preemptible(struct pag return __get_page_type(page, type, 1); } +static int get_spage_pages(struct page_info *page, struct domain *d) +{ + int i; + + for (i = 0; i < (1<<PAGETABLE_ORDER); i++, page++) + { + if (!get_page_and_type(page, d, PGT_writable_page)) + { + while (--i >= 0) + put_page_and_type(--page); + return 0; + } + } + return 1; +} + +static void put_spage_pages(struct page_info *page) +{ + int i; + + for (i = 0; i < (1<<PAGETABLE_ORDER); i++, page++) + { + put_page_and_type(page); + } + return; +} + +#ifdef __x86_64__ + +static int mark_superpage(struct spage_info *spage, struct domain *d) +{ + unsigned long x, nx, y = spage->type_info; + int pages_done = 0; + + ASSERT(opt_allow_superpage); + + do { + x = y; + nx = x + 1; + if ( (x & SGT_type_mask) == SGT_mark ) + { + MEM_LOG("Duplicate superpage mark attempt mfn %lx", + spage_to_mfn(spage)); + if ( pages_done ) + put_spage_pages(spage_to_page(spage)); + return -EINVAL; + } + if ( (x & SGT_type_mask) == SGT_dynamic ) + { + if ( pages_done ) + { + put_spage_pages(spage_to_page(spage)); + pages_done = 0; + } + } + else if ( !pages_done ) + { + if ( !get_spage_pages(spage_to_page(spage), d) ) + { + MEM_LOG("Superpage type conflict in mark attempt mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + pages_done = 1; + } + nx = (nx & ~SGT_type_mask) | SGT_mark; + + } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + + return 0; +} + +static int unmark_superpage(struct spage_info *spage) +{ + unsigned long x, nx, y = spage->type_info; + unsigned long do_pages = 0; + + ASSERT(opt_allow_superpage); + + do { + x = y; + nx = x - 1; + if ( (x & SGT_type_mask) != SGT_mark ) + { + MEM_LOG("Attempt to unmark unmarked superpage mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + if ( (nx & SGT_count_mask) == 0 ) + { + nx = (nx & ~SGT_type_mask) | SGT_none; + do_pages = 1; + } + else + { + nx = (nx & ~SGT_type_mask) | SGT_dynamic; + } + } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + + if ( do_pages ) + put_spage_pages(spage_to_page(spage)); + + return 0; +} + +void clear_superpage_mark(struct page_info *page) +{ + struct spage_info *spage; + + if ( !opt_allow_superpage ) + return; + + spage = page_to_spage(page); + if ((spage->type_info & SGT_type_mask) == SGT_mark) + unmark_superpage(spage); + +} + +static int get_superpage(unsigned long mfn, struct domain *d) +{ + struct spage_info *spage; + unsigned long x, nx, y; + int pages_done = 0; + + ASSERT(opt_allow_superpage); + + spage = mfn_to_spage(mfn); + y = spage->type_info; + do { + x = y; + nx = x + 1; + if ( (x & SGT_type_mask) != SGT_none ) + { + if ( pages_done ) + { + put_spage_pages(spage_to_page(spage)); + pages_done = 0; + } + } + else + { + if ( !get_spage_pages(spage_to_page(spage), d) ) + { + MEM_LOG("Type conflict on superpage mapping mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + pages_done = 1; + nx = (nx & ~SGT_type_mask) | SGT_dynamic; + } + } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + + return 0; +} + +static void put_superpage(unsigned long mfn) +{ + struct spage_info *spage; + unsigned long x, nx, y; + unsigned long do_pages = 0; + + ASSERT(opt_allow_superpage); + + spage = mfn_to_spage(mfn); + y = spage->type_info; + do { + x = y; + nx = x - 1; + if ((x & SGT_type_mask) == SGT_dynamic) + { + if ((nx & SGT_count_mask) == 0) + { + nx = (nx & ~SGT_type_mask) | SGT_none; + do_pages = 1; + } + } + + } while ((y = cmpxchg(&spage->type_info, x, nx)) != x); + + if (do_pages) + put_spage_pages(spage_to_page(spage)); + + return; +} + +#else /* __i386__ */ + +void clear_superpage_mark(struct page_info *page) +{ +} + +static int get_superpage(unsigned long mfn, struct domain *d) +{ + return get_spage_pages(mfn_to_page(mfn), d); +} + +static void put_superpage(unsigned long mfn) +{ + put_spage_pages(mfn_to_page(mfn)); +} + +#endif + void cleanup_page_cacheattr(struct page_info *page) { uint32_t cacheattr = @@ -3001,6 +3207,60 @@ int do_mmuext_op( put_page(mfn_to_page(src_mfn)); break; } + +#ifdef __x86_64__ + case MMUEXT_MARK_SUPER: + { + unsigned long mfn; + struct spage_info *spage; + + mfn = op.arg1.mfn; + if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + + if ( !opt_allow_superpage ) + { + MEM_LOG("Superpages disallowed"); + okay = 0; + rc = -ENOSYS; + break; + } + + spage = mfn_to_spage(mfn); + okay = (mark_superpage(spage, d) >= 0); + break; + } + + case MMUEXT_UNMARK_SUPER: + { + unsigned long mfn; + struct spage_info *spage; + + mfn = op.arg1.mfn; + if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + + if ( !opt_allow_superpage ) + { + MEM_LOG("Superpages disallowed"); + okay = 0; + rc = -ENOSYS; + break; + } + + spage = mfn_to_spage(mfn); + okay = (unmark_superpage(spage) >= 0); + break; + } +#endif default: MEM_LOG("Invalid extended pt command 0x%x", op.cmd); diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/include/asm-x86/config.h --- a/xen/include/asm-x86/config.h Thu May 27 08:34:44 2010 +0100 +++ b/xen/include/asm-x86/config.h Thu May 27 09:04:46 2010 +0100 @@ -225,6 +225,11 @@ extern unsigned int video_mode, video_fl /* Slot 261: xen text, static data and bss (1GB). */ #define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END) #define XEN_VIRT_END (XEN_VIRT_START + GB(1)) +/* Slot 261: superpage information array (20MB). */ +#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START +#define SPAGETABLE_SIZE ((DIRECTMAP_SIZE >> SUPERPAGE_SHIFT) * \ + sizeof(struct spage_info)) +#define SPAGETABLE_VIRT_START (SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) /* Slot 261: page-frame information array (40GB). */ #define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START #define FRAMETABLE_SIZE ((DIRECTMAP_SIZE >> PAGE_SHIFT) * \ diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/include/asm-x86/guest_pt.h --- a/xen/include/asm-x86/guest_pt.h Thu May 27 08:34:44 2010 +0100 +++ b/xen/include/asm-x86/guest_pt.h Thu May 27 09:04:46 2010 +0100 @@ -187,7 +187,7 @@ guest_supports_superpages(struct vcpu *v * CR4.PSE is set or the guest is in PAE or long mode. * It's also used in the dummy PT for vcpus with CR4.PG cleared. */ return (!is_hvm_vcpu(v) - ? opt_allow_hugepage + ? opt_allow_superpage : (GUEST_PAGING_LEVELS != 2 || !hvm_paging_enabled(v) || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))); diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Thu May 27 08:34:44 2010 +0100 +++ b/xen/include/asm-x86/mm.h Thu May 27 09:04:46 2010 +0100 @@ -214,6 +214,23 @@ struct page_info #define PGC_count_width PG_shift(9) #define PGC_count_mask ((1UL<<PGC_count_width)-1) +#ifdef __x86_64__ +struct spage_info +{ + unsigned long type_info; +}; + + /* The following page types are MUTUALLY EXCLUSIVE. */ +#define SGT_none PG_mask(0, 2) /* superpage not in use */ +#define SGT_mark PG_mask(1, 2) /* Marked as a superpage */ +#define SGT_dynamic PG_mask(2, 2) /* has been dynamically mapped as a superpage */ +#define SGT_type_mask PG_mask(3, 2) /* Bits 30-31 or 62-63. */ + + /* Count of uses of this superpage as its current type. */ +#define SGT_count_width PG_shift(3) +#define SGT_count_mask ((1UL<<SGT_count_width)-1) +#endif + #if defined(__i386__) #define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page)) #define is_xen_heap_mfn(mfn) ({ \ @@ -262,6 +279,9 @@ extern void share_xen_page_with_privileg struct page_info *page, int readonly); #define frame_table ((struct page_info *)FRAMETABLE_VIRT_START) +#ifdef __x86_64__ +#define spage_table ((struct spage_info *)SPAGETABLE_VIRT_START) +#endif extern unsigned long max_page; extern unsigned long total_pages; void init_frametable(void); @@ -304,6 +324,8 @@ void cleanup_page_cacheattr(struct page_ void cleanup_page_cacheattr(struct page_info *page); int is_iomem_page(unsigned long mfn); + +void clear_superpage_mark(struct page_info *page); struct domain *page_get_owner_and_reference(struct page_info *page); void put_page(struct page_info *page); @@ -370,7 +392,7 @@ pae_copy_root(struct vcpu *v, l3_pgentry int check_descriptor(const struct domain *, struct desc_struct *d); -extern int opt_allow_hugepage; +extern int opt_allow_superpage; extern int mem_hotplug; /****************************************************************************** diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Thu May 27 08:34:44 2010 +0100 +++ b/xen/include/asm-x86/page.h Thu May 27 09:04:46 2010 +0100 @@ -240,6 +240,14 @@ void copy_page_sse2(void *, const void * #define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) #define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) +/* Convert between machine frame numbers and spage-info structures. */ +#define __mfn_to_spage(mfn) (spage_table + pfn_to_sdx(mfn)) +#define __spage_to_mfn(pg) sdx_to_pfn((unsigned long)((pg) - spage_table)) + +/* Convert between page-info structures and spage-info structures. */ +#define page_to_spage(page) (spage_table+(((page)-frame_table)>>(SUPERPAGE_SHIFT-PAGE_SHIFT))) +#define spage_to_page(spage) (frame_table+(((spage)-spage_table)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))) + /* * We define non-underscored wrappers for above conversion functions. These are * overridden in various source files while underscored versions remain intact. @@ -251,6 +259,8 @@ void copy_page_sse2(void *, const void * #define maddr_to_virt(ma) __maddr_to_virt((unsigned long)(ma)) #define mfn_to_page(mfn) __mfn_to_page(mfn) #define page_to_mfn(pg) __page_to_mfn(pg) +#define mfn_to_spage(mfn) __mfn_to_spage(mfn) +#define spage_to_mfn(pg) __spage_to_mfn(pg) #define maddr_to_page(ma) __maddr_to_page(ma) #define page_to_maddr(pg) __page_to_maddr(pg) #define virt_to_page(va) __virt_to_page(va) diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/include/asm-x86/x86_32/page.h --- a/xen/include/asm-x86/x86_32/page.h Thu May 27 08:34:44 2010 +0100 +++ b/xen/include/asm-x86/x86_32/page.h Thu May 27 09:04:46 2010 +0100 @@ -6,6 +6,7 @@ #define L2_PAGETABLE_SHIFT 21 #define L3_PAGETABLE_SHIFT 30 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -13,6 +14,7 @@ #define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) #define L3_PAGETABLE_ENTRIES 4 #define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES +#define SUPERPAGE_ORDER PAGETABLE_ORDER /* * Architecturally, physical addresses may be up to 52 bits. However, the @@ -52,6 +54,9 @@ #define pdx_to_pfn(pdx) (pdx) #define virt_to_pdx(va) virt_to_mfn(va) #define pdx_to_virt(pdx) mfn_to_virt(pdx) + +#define pfn_to_sdx(pfn) ((pfn)>>(SUPERPAGE_SHIFT-PAGE_SHIFT)) +#define sdx_to_pfn(sdx) ((sdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT)) static inline unsigned long __virt_to_maddr(unsigned long va) { diff -r 7fb801ac0a59 -r 26a0942a9c18 xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Thu May 27 08:34:44 2010 +0100 +++ b/xen/include/asm-x86/x86_64/page.h Thu May 27 09:04:46 2010 +0100 @@ -7,6 +7,7 @@ #define L3_PAGETABLE_SHIFT 30 #define L4_PAGETABLE_SHIFT 39 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L4_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -15,6 +16,7 @@ #define L3_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) #define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) #define ROOT_PAGETABLE_ENTRIES L4_PAGETABLE_ENTRIES +#define SUPERPAGE_ORDER PAGETABLE_ORDER #define __PAGE_OFFSET DIRECTMAP_VIRT_START #define __XEN_VIRT_START XEN_VIRT_START @@ -41,6 +43,8 @@ extern void pfn_pdx_hole_setup(unsigned #define page_to_pdx(pg) ((pg) - frame_table) #define pdx_to_page(pdx) (frame_table + (pdx)) +#define spage_to_pdx(spg) ((spg>>(SUPERPAGE_SHIFT-PAGE_SHIFT)) - spage_table) +#define pdx_to_spage(pdx) (spage_table + ((pdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))) /* * Note: These are solely for the use by page_{get,set}_owner(), and * therefore don't need to handle the XEN_VIRT_{START,END} range. @@ -62,6 +66,16 @@ static inline unsigned long pdx_to_pfn(u { return (pdx & pfn_pdx_bottom_mask) | ((pdx << pfn_pdx_hole_shift) & pfn_top_mask); +} + +static inline unsigned long pfn_to_sdx(unsigned long pfn) +{ + return pfn_to_pdx(pfn) >> (SUPERPAGE_SHIFT-PAGE_SHIFT); +} + +static inline unsigned long sdx_to_pfn(unsigned long sdx) +{ + return pdx_to_pfn(sdx << (SUPERPAGE_SHIFT-PAGE_SHIFT)); } static inline unsigned long __virt_to_maddr(unsigned long va) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |