[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] x86-64: use 1Gb pages in 1:1 mapping if available
At once adjust the 2/4Mb page handling slightly in a few places (to match the newly added code): - when re-creating a large page mapping after finding that all small page mappings in the respective area are using identical flags and suitable MFNs, the virtual address was already incremented pas the area to be dealt with, which needs to be accounted for in the invocation of flush_area() in that path - don't or-in/and-out _PAGE_PSE on non-present pages - when comparing flags, try minimse the number of l1f_to_lNf()/ lNf_to_l1f() instances used - instead of skipping a single page when encountering a big page mapping equalling to what a small page mapping would establish, skip to the next larger page boundary This patch won't apply cleanly without the previously sent patch adjusting show_page_walk(). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> Index: 2008-01-18/xen/arch/x86/mm.c =================================================================== --- 2008-01-18.orig/xen/arch/x86/mm.c 2008-01-23 15:39:18.000000000 +0100 +++ 2008-01-18/xen/arch/x86/mm.c 2008-01-23 16:22:01.000000000 +0100 @@ -113,6 +113,8 @@ #include <xsm/xsm.h> #include <xen/trace.h> +extern int early_boot; + #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a) /* @@ -3659,7 +3661,13 @@ int ptwr_do_page_fault(struct vcpu *v, u void free_xen_pagetable(void *v) { - extern int early_boot; +#ifdef __x86_64__ + unsigned long ma = virt_to_maddr(v); + unsigned long l2_ident_ma = virt_to_maddr(l2_identmap); + + if ( ma >= l2_ident_ma && ma < l2_ident_ma + sizeof(l2_identmap) ) + return; +#endif BUG_ON(early_boot); @@ -3670,8 +3678,8 @@ void free_xen_pagetable(void *v) } /* Convert to from superpage-mapping flags for map_pages_to_xen(). */ -#define l1f_to_l2f(f) ((f) | _PAGE_PSE) -#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE) +#define l1f_to_lNf(f) ((f) & _PAGE_PRESENT ? (f) | _PAGE_PSE : (f)) +#define lNf_to_l1f(f) ((f) & _PAGE_PRESENT ? (f) & ~_PAGE_PSE : (f)) /* * map_pages_to_xen() can be called with interrupts disabled: @@ -3697,6 +3705,126 @@ int map_pages_to_xen( while ( nr_mfns != 0 ) { +#ifdef __x86_64__ + l3_pgentry_t *pl3e = virt_to_xen_l3e(virt); + l3_pgentry_t ol3e = *pl3e; + + if ( cpu_has_page1gb && + !(((virt >> PAGE_SHIFT) | mfn) & + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) && + nr_mfns >= (1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) && + !(flags & (_PAGE_PAT | MAP_SMALL_PAGES)) ) + { + /* 1Gb-page mapping. */ + l3e_write_atomic(pl3e, l3e_from_pfn(mfn, l1f_to_lNf(flags))); + + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) ) + { + unsigned int flush_flags = + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); + + if ( l3e_get_flags(ol3e) & _PAGE_PSE ) + { + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + if ( (l1f_to_lNf(l3e_get_flags(ol3e)) ^ flags) & + PAGE_CACHE_ATTRS ) + flush_flags |= FLUSH_CACHE; + flush_area(virt, flush_flags); + } + else + { + pl2e = l3e_to_l2e(ol3e); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + { + ol2e = pl2e[i]; + if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) ) + continue; + if ( l2e_get_flags(ol2e) & _PAGE_PSE ) + { + if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) & + PAGE_CACHE_ATTRS ) + flush_flags |= FLUSH_CACHE; + } + else + { + unsigned int j; + + pl1e = l2e_to_l1e(ol2e); + for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ ) + { + ol1e = pl1e[j]; + if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + if ( (l1e_get_flags(ol1e) ^ flags) & + PAGE_CACHE_ATTRS ) + flush_flags |= FLUSH_CACHE; + } + } + } + flush_area(virt, flush_flags); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + { + ol2e = pl2e[i]; + if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) && + !(l2e_get_flags(ol2e) & _PAGE_PSE) ) + free_xen_pagetable(l2e_to_l1e(ol2e)); + } + free_xen_pagetable(pl2e); + } + } + + virt += 1UL << L3_PAGETABLE_SHIFT; + mfn += 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + nr_mfns -= 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) && + (l3e_get_flags(ol3e) & _PAGE_PSE) ) + { + unsigned int flush_flags = + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); + + /* Skip this PTE if there is no change. */ + if ( ((l3e_get_pfn(ol3e) & ~(L2_PAGETABLE_ENTRIES * + L1_PAGETABLE_ENTRIES - 1)) + + (l2_table_offset(virt) << PAGETABLE_ORDER) + + l1_table_offset(virt) == mfn) && + ((lNf_to_l1f(l3e_get_flags(ol3e)) ^ flags) & + ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0 ) + { + i = (1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - + (mfn & ((1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)); + if ( i > nr_mfns ) + i = nr_mfns; + virt += i << PAGE_SHIFT; + mfn += i; + nr_mfns -= i; + continue; + } + + pl2e = alloc_xen_pagetable(); + if ( pl2e == NULL ) + return -ENOMEM; + + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + l2e_write(pl2e + i, + l2e_from_pfn(l3e_get_pfn(ol3e) + + (i << PAGETABLE_ORDER), + l3e_get_flags(ol3e))); + + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), + __PAGE_HYPERVISOR)); + flush_area(virt, flush_flags); + } +#endif + pl2e = virt_to_xen_l2e(virt); if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) && @@ -3705,7 +3833,7 @@ int map_pages_to_xen( { /* Super-page mapping. */ ol2e = *pl2e; - l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_l2f(flags))); + l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_lNf(flags))); if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) { @@ -3716,8 +3844,8 @@ int map_pages_to_xen( { if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL ) flush_flags |= FLUSH_TLB_GLOBAL; - if ( (l2e_get_flags(ol2e) ^ l1f_to_l2f(flags)) & - l1f_to_l2f(PAGE_CACHE_ATTRS) ) + if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) & + PAGE_CACHE_ATTRS ) flush_flags |= FLUSH_CACHE; flush_area(virt, flush_flags); } @@ -3761,13 +3889,17 @@ int map_pages_to_xen( /* Skip this PTE if there is no change. */ if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) + l1_table_offset(virt)) == mfn) && - (((l2f_to_l1f(l2e_get_flags(*pl2e)) ^ flags) & + (((lNf_to_l1f(l2e_get_flags(*pl2e)) ^ flags) & ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) ) { - virt += 1UL << L1_PAGETABLE_SHIFT; - mfn += 1UL; - nr_mfns -= 1UL; - continue; + i = (1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - + (mfn & ((1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)); + if ( i > nr_mfns ) + i = nr_mfns; + virt += i << L1_PAGETABLE_SHIFT; + mfn += i; + nr_mfns -= i; + goto check_l3; } pl1e = alloc_xen_pagetable(); @@ -3777,7 +3909,7 @@ int map_pages_to_xen( for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) l1e_write(&pl1e[i], l1e_from_pfn(l2e_get_pfn(*pl2e) + i, - l2f_to_l1f(l2e_get_flags(*pl2e)))); + lNf_to_l1f(l2e_get_flags(*pl2e)))); if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL ) flush_flags |= FLUSH_TLB_GLOBAL; @@ -3820,13 +3952,43 @@ int map_pages_to_xen( { ol2e = *pl2e; l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn, - l1f_to_l2f(flags))); - flush_area(virt, (FLUSH_TLB_GLOBAL | - FLUSH_ORDER(PAGETABLE_ORDER))); + l1f_to_lNf(flags))); + flush_area(virt - PAGE_SIZE, + FLUSH_TLB_GLOBAL | FLUSH_ORDER(PAGETABLE_ORDER)); free_xen_pagetable(l2e_to_l1e(ol2e)); } } } + + check_l3: ; +#ifdef __x86_64__ + if ( cpu_has_page1gb && + !early_boot && + flags == PAGE_HYPERVISOR && + (nr_mfns == 0 || + !(((virt >> PAGE_SHIFT) | mfn) & + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))) ) + { + unsigned long base_mfn; + + ol3e = *pl3e; + pl2e = l3e_to_l2e(ol3e); + base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES * + L1_PAGETABLE_ENTRIES - 1); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) + if ( l2e_get_pfn(*pl2e) != base_mfn + (i << PAGETABLE_ORDER) || + l2e_get_flags(*pl2e) != l1f_to_lNf(flags) ) + break; + if ( i == L2_PAGETABLE_ENTRIES ) + { + l3e_write_atomic(pl3e, l3e_from_pfn(base_mfn, + l1f_to_lNf(flags))); + flush_area(virt - PAGE_SIZE, + FLUSH_TLB_GLOBAL | FLUSH_ORDER(2*PAGETABLE_ORDER)); + free_xen_pagetable(l3e_to_l2e(ol3e)); + } + } +#endif } return 0; @@ -3844,6 +4006,40 @@ void destroy_xen_mappings(unsigned long while ( v < e ) { +#ifdef __x86_64__ + l3_pgentry_t *pl3e = virt_to_xen_l3e(v); + + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) + { + v += 1UL << L3_PAGETABLE_SHIFT; + v &= ~((1UL << L3_PAGETABLE_SHIFT) - 1); + continue; + } + + if ( l3e_get_flags(*pl3e) & _PAGE_PSE ) + { + if ( l2_table_offset(v) == 0 && + l1_table_offset(v) == 0 && + ((e - v) >= (1UL << L3_PAGETABLE_SHIFT)) ) + { + /* PAGE1GB: whole superpage is destroyed. */ + l3e_write_atomic(pl3e, l3e_empty()); + v += 1UL << L3_PAGETABLE_SHIFT; + continue; + } + + /* PAGE1GB: shatter the superpage and fall through. */ + pl2e = alloc_xen_pagetable(); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + l2e_write(pl2e + i, + l2e_from_pfn(l3e_get_pfn(*pl3e) + + (i << PAGETABLE_ORDER), + l3e_get_flags(*pl3e))); + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), + __PAGE_HYPERVISOR)); + } +#endif + pl2e = virt_to_xen_l2e(v); if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) @@ -3896,6 +4092,23 @@ void destroy_xen_mappings(unsigned long free_xen_pagetable(pl1e); } } + +#ifdef __x86_64__ + /* If we are done with the L3E, check if it is now empty. */ + if ( (v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0) ) + continue; + pl2e = l3e_to_l2e(*pl3e); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + if ( l2e_get_intpte(pl2e[i]) != 0 ) + break; + if ( i == L2_PAGETABLE_ENTRIES ) + { + /* Empty: zap the L3E and free the L2 page. */ + l3e_write_atomic(pl3e, l3e_empty()); + flush_all(FLUSH_TLB_GLOBAL); /* flush before free */ + free_xen_pagetable(pl2e); + } +#endif } flush_all(FLUSH_TLB_GLOBAL); Index: 2008-01-18/xen/arch/x86/setup.c =================================================================== --- 2008-01-18.orig/xen/arch/x86/setup.c 2008-01-23 15:39:18.000000000 +0100 +++ 2008-01-18/xen/arch/x86/setup.c 2008-01-23 16:51:48.000000000 +0100 @@ -672,8 +672,9 @@ void __init __start_xen(unsigned long mb pl3e = l4e_to_l3e(*pl4e); for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ ) { - /* Not present or already relocated? */ + /* Not present, 1Gb mapping, or already relocated? */ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) || + (l3e_get_flags(*pl3e) & _PAGE_PSE) || (l3e_get_pfn(*pl3e) > 0x1000) ) continue; *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) + Index: 2008-01-18/xen/arch/x86/x86_64/mm.c =================================================================== --- 2008-01-18.orig/xen/arch/x86/x86_64/mm.c 2008-01-23 15:39:18.000000000 +0100 +++ 2008-01-18/xen/arch/x86/x86_64/mm.c 2008-01-23 11:56:42.000000000 +0100 @@ -70,30 +70,36 @@ void *alloc_xen_pagetable(void) return mfn_to_virt(mfn); } -l2_pgentry_t *virt_to_xen_l2e(unsigned long v) +l3_pgentry_t *virt_to_xen_l3e(unsigned long v) { l4_pgentry_t *pl4e; - l3_pgentry_t *pl3e; - l2_pgentry_t *pl2e; pl4e = &idle_pg_table[l4_table_offset(v)]; if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) { - pl3e = alloc_xen_pagetable(); + l3_pgentry_t *pl3e = alloc_xen_pagetable(); + clear_page(pl3e); l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); } - pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v); + return l4e_to_l3e(*pl4e) + l3_table_offset(v); +} + +l2_pgentry_t *virt_to_xen_l2e(unsigned long v) +{ + l3_pgentry_t *pl3e; + + pl3e = virt_to_xen_l3e(v); if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) { - pl2e = alloc_xen_pagetable(); + l2_pgentry_t *pl2e = alloc_xen_pagetable(); + clear_page(pl2e); l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); } - pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v); - return pl2e; + return l3e_to_l2e(*pl3e) + l2_table_offset(v); } void __init paging_init(void) Index: 2008-01-18/xen/arch/x86/x86_64/traps.c =================================================================== --- 2008-01-18.orig/xen/arch/x86/x86_64/traps.c 2008-01-23 15:39:18.000000000 +0100 +++ 2008-01-18/xen/arch/x86/x86_64/traps.c 2008-01-23 11:58:58.000000000 +0100 @@ -148,9 +148,11 @@ void show_page_walk(unsigned long addr) mfn = l3e_get_pfn(l3e); pfn = mfn_valid(mfn) && mpt_valid ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY; - printk(" L3[0x%03lx] = %"PRIpte" %016lx\n", - l3_table_offset(addr), l3e_get_intpte(l3e), pfn); - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n", + l3_table_offset(addr), l3e_get_intpte(l3e), pfn, + (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : ""); + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || + (l3e_get_flags(l3e) & _PAGE_PSE) ) return; l2t = mfn_to_virt(mfn); Index: 2008-01-18/xen/include/asm-x86/page.h =================================================================== --- 2008-01-18.orig/xen/include/asm-x86/page.h 2008-01-23 15:39:18.000000000 +0100 +++ 2008-01-18/xen/include/asm-x86/page.h 2008-01-22 15:35:32.000000000 +0100 @@ -350,6 +350,9 @@ static inline int get_order_from_pages(u void *alloc_xen_pagetable(void); void free_xen_pagetable(void *v); l2_pgentry_t *virt_to_xen_l2e(unsigned long v); +#ifdef __x86_64__ +l3_pgentry_t *virt_to_xen_l3e(unsigned long v); +#endif /* Map machine page range in Xen virtual address space. */ #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping */ Index: 2008-01-18/xen/include/asm-x86/x86_64/page.h =================================================================== --- 2008-01-18.orig/xen/include/asm-x86/x86_64/page.h 2008-01-23 15:39:18.000000000 +0100 +++ 2008-01-18/xen/include/asm-x86/x86_64/page.h 2008-01-23 11:14:54.000000000 +0100 @@ -59,6 +59,8 @@ typedef struct { intpte_t l3; } l3_pgent typedef struct { intpte_t l4; } l4_pgentry_t; typedef l4_pgentry_t root_pgentry_t; +extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES]; + #endif /* !__ASSEMBLY__ */ #define pte_read_atomic(ptep) (*(ptep)) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |