[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86-64: use 1Gb pages in 1:1 mapping if available
Is there any measurable performance benefit to doing this? -- Keir On 24/1/08 15:04, "Jan Beulich" <jbeulich@xxxxxxxxxx> wrote: > At once adjust the 2/4Mb page handling slightly in a few places (to > match the newly added code): > - when re-creating a large page mapping after finding that all small > page mappings in the respective area are using identical flags and > suitable MFNs, the virtual address was already incremented pas the > area to be dealt with, which needs to be accounted for in the > invocation of flush_area() in that path > - don't or-in/and-out _PAGE_PSE on non-present pages > - when comparing flags, try minimse the number of l1f_to_lNf()/ > lNf_to_l1f() instances used > - instead of skipping a single page when encountering a big page > mapping equalling to what a small page mapping would establish, skip > to the next larger page boundary > > This patch won't apply cleanly without the previously sent patch > adjusting show_page_walk(). > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> > > Index: 2008-01-18/xen/arch/x86/mm.c > =================================================================== > --- 2008-01-18.orig/xen/arch/x86/mm.c 2008-01-23 15:39:18.000000000 +0100 > +++ 2008-01-18/xen/arch/x86/mm.c 2008-01-23 16:22:01.000000000 +0100 > @@ -113,6 +113,8 @@ > #include <xsm/xsm.h> > #include <xen/trace.h> > > +extern int early_boot; > + > #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a) > > /* > @@ -3659,7 +3661,13 @@ int ptwr_do_page_fault(struct vcpu *v, u > > void free_xen_pagetable(void *v) > { > - extern int early_boot; > +#ifdef __x86_64__ > + unsigned long ma = virt_to_maddr(v); > + unsigned long l2_ident_ma = virt_to_maddr(l2_identmap); > + > + if ( ma >= l2_ident_ma && ma < l2_ident_ma + sizeof(l2_identmap) ) > + return; > +#endif > > BUG_ON(early_boot); > > @@ -3670,8 +3678,8 @@ void free_xen_pagetable(void *v) > } > > /* Convert to from superpage-mapping flags for map_pages_to_xen(). */ > -#define l1f_to_l2f(f) ((f) | _PAGE_PSE) > -#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE) > +#define l1f_to_lNf(f) ((f) & _PAGE_PRESENT ? (f) | _PAGE_PSE : (f)) > +#define lNf_to_l1f(f) ((f) & _PAGE_PRESENT ? (f) & ~_PAGE_PSE : (f)) > > /* > * map_pages_to_xen() can be called with interrupts disabled: > @@ -3697,6 +3705,126 @@ int map_pages_to_xen( > > while ( nr_mfns != 0 ) > { > +#ifdef __x86_64__ > + l3_pgentry_t *pl3e = virt_to_xen_l3e(virt); > + l3_pgentry_t ol3e = *pl3e; > + > + if ( cpu_has_page1gb && > + !(((virt >> PAGE_SHIFT) | mfn) & > + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) && > + nr_mfns >= (1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) && > + !(flags & (_PAGE_PAT | MAP_SMALL_PAGES)) ) > + { > + /* 1Gb-page mapping. */ > + l3e_write_atomic(pl3e, l3e_from_pfn(mfn, l1f_to_lNf(flags))); > + > + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) ) > + { > + unsigned int flush_flags = > + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); > + > + if ( l3e_get_flags(ol3e) & _PAGE_PSE ) > + { > + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) > + flush_flags |= FLUSH_TLB_GLOBAL; > + if ( (l1f_to_lNf(l3e_get_flags(ol3e)) ^ flags) & > + PAGE_CACHE_ATTRS ) > + flush_flags |= FLUSH_CACHE; > + flush_area(virt, flush_flags); > + } > + else > + { > + pl2e = l3e_to_l2e(ol3e); > + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) > + { > + ol2e = pl2e[i]; > + if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) ) > + continue; > + if ( l2e_get_flags(ol2e) & _PAGE_PSE ) > + { > + if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL ) > + flush_flags |= FLUSH_TLB_GLOBAL; > + if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) & > + PAGE_CACHE_ATTRS ) > + flush_flags |= FLUSH_CACHE; > + } > + else > + { > + unsigned int j; > + > + pl1e = l2e_to_l1e(ol2e); > + for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ ) > + { > + ol1e = pl1e[j]; > + if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL ) > + flush_flags |= FLUSH_TLB_GLOBAL; > + if ( (l1e_get_flags(ol1e) ^ flags) & > + PAGE_CACHE_ATTRS ) > + flush_flags |= FLUSH_CACHE; > + } > + } > + } > + flush_area(virt, flush_flags); > + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) > + { > + ol2e = pl2e[i]; > + if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) && > + !(l2e_get_flags(ol2e) & _PAGE_PSE) ) > + free_xen_pagetable(l2e_to_l1e(ol2e)); > + } > + free_xen_pagetable(pl2e); > + } > + } > + > + virt += 1UL << L3_PAGETABLE_SHIFT; > + mfn += 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); > + nr_mfns -= 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); > + continue; > + } > + > + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) && > + (l3e_get_flags(ol3e) & _PAGE_PSE) ) > + { > + unsigned int flush_flags = > + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); > + > + /* Skip this PTE if there is no change. */ > + if ( ((l3e_get_pfn(ol3e) & ~(L2_PAGETABLE_ENTRIES * > + L1_PAGETABLE_ENTRIES - 1)) + > + (l2_table_offset(virt) << PAGETABLE_ORDER) + > + l1_table_offset(virt) == mfn) && > + ((lNf_to_l1f(l3e_get_flags(ol3e)) ^ flags) & > + ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0 ) > + { > + i = (1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - > + (mfn & ((1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)); > + if ( i > nr_mfns ) > + i = nr_mfns; > + virt += i << PAGE_SHIFT; > + mfn += i; > + nr_mfns -= i; > + continue; > + } > + > + pl2e = alloc_xen_pagetable(); > + if ( pl2e == NULL ) > + return -ENOMEM; > + > + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) > + l2e_write(pl2e + i, > + l2e_from_pfn(l3e_get_pfn(ol3e) + > + (i << PAGETABLE_ORDER), > + l3e_get_flags(ol3e))); > + > + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) > + flush_flags |= FLUSH_TLB_GLOBAL; > + > + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), > + __PAGE_HYPERVISOR)); > + flush_area(virt, flush_flags); > + } > +#endif > + > pl2e = virt_to_xen_l2e(virt); > > if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) > && > @@ -3705,7 +3833,7 @@ int map_pages_to_xen( > { > /* Super-page mapping. */ > ol2e = *pl2e; > - l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_l2f(flags))); > + l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_lNf(flags))); > > if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) > { > @@ -3716,8 +3844,8 @@ int map_pages_to_xen( > { > if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL ) > flush_flags |= FLUSH_TLB_GLOBAL; > - if ( (l2e_get_flags(ol2e) ^ l1f_to_l2f(flags)) & > - l1f_to_l2f(PAGE_CACHE_ATTRS) ) > + if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) & > + PAGE_CACHE_ATTRS ) > flush_flags |= FLUSH_CACHE; > flush_area(virt, flush_flags); > } > @@ -3761,13 +3889,17 @@ int map_pages_to_xen( > /* Skip this PTE if there is no change. */ > if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) + > l1_table_offset(virt)) == mfn) && > - (((l2f_to_l1f(l2e_get_flags(*pl2e)) ^ flags) & > + (((lNf_to_l1f(l2e_get_flags(*pl2e)) ^ flags) & > ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) ) > { > - virt += 1UL << L1_PAGETABLE_SHIFT; > - mfn += 1UL; > - nr_mfns -= 1UL; > - continue; > + i = (1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - > + (mfn & ((1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - > 1)); > + if ( i > nr_mfns ) > + i = nr_mfns; > + virt += i << L1_PAGETABLE_SHIFT; > + mfn += i; > + nr_mfns -= i; > + goto check_l3; > } > > pl1e = alloc_xen_pagetable(); > @@ -3777,7 +3909,7 @@ int map_pages_to_xen( > for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) > l1e_write(&pl1e[i], > l1e_from_pfn(l2e_get_pfn(*pl2e) + i, > - > l2f_to_l1f(l2e_get_flags(*pl2e)))); > + > lNf_to_l1f(l2e_get_flags(*pl2e)))); > > if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL ) > flush_flags |= FLUSH_TLB_GLOBAL; > @@ -3820,13 +3952,43 @@ int map_pages_to_xen( > { > ol2e = *pl2e; > l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn, > - l1f_to_l2f(flags))); > - flush_area(virt, (FLUSH_TLB_GLOBAL | > - FLUSH_ORDER(PAGETABLE_ORDER))); > + l1f_to_lNf(flags))); > + flush_area(virt - PAGE_SIZE, > + FLUSH_TLB_GLOBAL | > FLUSH_ORDER(PAGETABLE_ORDER)); > free_xen_pagetable(l2e_to_l1e(ol2e)); > } > } > } > + > + check_l3: ; > +#ifdef __x86_64__ > + if ( cpu_has_page1gb && > + !early_boot && > + flags == PAGE_HYPERVISOR && > + (nr_mfns == 0 || > + !(((virt >> PAGE_SHIFT) | mfn) & > + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))) ) > + { > + unsigned long base_mfn; > + > + ol3e = *pl3e; > + pl2e = l3e_to_l2e(ol3e); > + base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES * > + L1_PAGETABLE_ENTRIES - 1); > + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) > + if ( l2e_get_pfn(*pl2e) != base_mfn + (i << PAGETABLE_ORDER) > || > + l2e_get_flags(*pl2e) != l1f_to_lNf(flags) ) > + break; > + if ( i == L2_PAGETABLE_ENTRIES ) > + { > + l3e_write_atomic(pl3e, l3e_from_pfn(base_mfn, > + l1f_to_lNf(flags))); > + flush_area(virt - PAGE_SIZE, > + FLUSH_TLB_GLOBAL | > FLUSH_ORDER(2*PAGETABLE_ORDER)); > + free_xen_pagetable(l3e_to_l2e(ol3e)); > + } > + } > +#endif > } > > return 0; > @@ -3844,6 +4006,40 @@ void destroy_xen_mappings(unsigned long > > while ( v < e ) > { > +#ifdef __x86_64__ > + l3_pgentry_t *pl3e = virt_to_xen_l3e(v); > + > + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) > + { > + v += 1UL << L3_PAGETABLE_SHIFT; > + v &= ~((1UL << L3_PAGETABLE_SHIFT) - 1); > + continue; > + } > + > + if ( l3e_get_flags(*pl3e) & _PAGE_PSE ) > + { > + if ( l2_table_offset(v) == 0 && > + l1_table_offset(v) == 0 && > + ((e - v) >= (1UL << L3_PAGETABLE_SHIFT)) ) > + { > + /* PAGE1GB: whole superpage is destroyed. */ > + l3e_write_atomic(pl3e, l3e_empty()); > + v += 1UL << L3_PAGETABLE_SHIFT; > + continue; > + } > + > + /* PAGE1GB: shatter the superpage and fall through. */ > + pl2e = alloc_xen_pagetable(); > + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) > + l2e_write(pl2e + i, > + l2e_from_pfn(l3e_get_pfn(*pl3e) + > + (i << PAGETABLE_ORDER), > + l3e_get_flags(*pl3e))); > + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), > + __PAGE_HYPERVISOR)); > + } > +#endif > + > pl2e = virt_to_xen_l2e(v); > > if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) > @@ -3896,6 +4092,23 @@ void destroy_xen_mappings(unsigned long > free_xen_pagetable(pl1e); > } > } > + > +#ifdef __x86_64__ > + /* If we are done with the L3E, check if it is now empty. */ > + if ( (v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0) ) > + continue; > + pl2e = l3e_to_l2e(*pl3e); > + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) > + if ( l2e_get_intpte(pl2e[i]) != 0 ) > + break; > + if ( i == L2_PAGETABLE_ENTRIES ) > + { > + /* Empty: zap the L3E and free the L2 page. */ > + l3e_write_atomic(pl3e, l3e_empty()); > + flush_all(FLUSH_TLB_GLOBAL); /* flush before free */ > + free_xen_pagetable(pl2e); > + } > +#endif > } > > flush_all(FLUSH_TLB_GLOBAL); > Index: 2008-01-18/xen/arch/x86/setup.c > =================================================================== > --- 2008-01-18.orig/xen/arch/x86/setup.c 2008-01-23 15:39:18.000000000 +0100 > +++ 2008-01-18/xen/arch/x86/setup.c 2008-01-23 16:51:48.000000000 +0100 > @@ -672,8 +672,9 @@ void __init __start_xen(unsigned long mb > pl3e = l4e_to_l3e(*pl4e); > for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ ) > { > - /* Not present or already relocated? */ > + /* Not present, 1Gb mapping, or already relocated? */ > if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) || > + (l3e_get_flags(*pl3e) & _PAGE_PSE) || > (l3e_get_pfn(*pl3e) > 0x1000) ) > continue; > *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) + > Index: 2008-01-18/xen/arch/x86/x86_64/mm.c > =================================================================== > --- 2008-01-18.orig/xen/arch/x86/x86_64/mm.c 2008-01-23 15:39:18.000000000 > +0100 > +++ 2008-01-18/xen/arch/x86/x86_64/mm.c 2008-01-23 11:56:42.000000000 +0100 > @@ -70,30 +70,36 @@ void *alloc_xen_pagetable(void) > return mfn_to_virt(mfn); > } > > -l2_pgentry_t *virt_to_xen_l2e(unsigned long v) > +l3_pgentry_t *virt_to_xen_l3e(unsigned long v) > { > l4_pgentry_t *pl4e; > - l3_pgentry_t *pl3e; > - l2_pgentry_t *pl2e; > > pl4e = &idle_pg_table[l4_table_offset(v)]; > if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) > { > - pl3e = alloc_xen_pagetable(); > + l3_pgentry_t *pl3e = alloc_xen_pagetable(); > + > clear_page(pl3e); > l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); > } > > - pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v); > + return l4e_to_l3e(*pl4e) + l3_table_offset(v); > +} > + > +l2_pgentry_t *virt_to_xen_l2e(unsigned long v) > +{ > + l3_pgentry_t *pl3e; > + > + pl3e = virt_to_xen_l3e(v); > if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) > { > - pl2e = alloc_xen_pagetable(); > + l2_pgentry_t *pl2e = alloc_xen_pagetable(); > + > clear_page(pl2e); > l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); > } > > - pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v); > - return pl2e; > + return l3e_to_l2e(*pl3e) + l2_table_offset(v); > } > > void __init paging_init(void) > Index: 2008-01-18/xen/arch/x86/x86_64/traps.c > =================================================================== > --- 2008-01-18.orig/xen/arch/x86/x86_64/traps.c 2008-01-23 15:39:18.000000000 > +0100 > +++ 2008-01-18/xen/arch/x86/x86_64/traps.c 2008-01-23 11:58:58.000000000 +0100 > @@ -148,9 +148,11 @@ void show_page_walk(unsigned long addr) > mfn = l3e_get_pfn(l3e); > pfn = mfn_valid(mfn) && mpt_valid ? > get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY; > - printk(" L3[0x%03lx] = %"PRIpte" %016lx\n", > - l3_table_offset(addr), l3e_get_intpte(l3e), pfn); > - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) > + printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n", > + l3_table_offset(addr), l3e_get_intpte(l3e), pfn, > + (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : ""); > + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || > + (l3e_get_flags(l3e) & _PAGE_PSE) ) > return; > > l2t = mfn_to_virt(mfn); > Index: 2008-01-18/xen/include/asm-x86/page.h > =================================================================== > --- 2008-01-18.orig/xen/include/asm-x86/page.h 2008-01-23 15:39:18.000000000 > +0100 > +++ 2008-01-18/xen/include/asm-x86/page.h 2008-01-22 15:35:32.000000000 +0100 > @@ -350,6 +350,9 @@ static inline int get_order_from_pages(u > void *alloc_xen_pagetable(void); > void free_xen_pagetable(void *v); > l2_pgentry_t *virt_to_xen_l2e(unsigned long v); > +#ifdef __x86_64__ > +l3_pgentry_t *virt_to_xen_l3e(unsigned long v); > +#endif > > /* Map machine page range in Xen virtual address space. */ > #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping > */ > Index: 2008-01-18/xen/include/asm-x86/x86_64/page.h > =================================================================== > --- 2008-01-18.orig/xen/include/asm-x86/x86_64/page.h 2008-01-23 > 15:39:18.000000000 +0100 > +++ 2008-01-18/xen/include/asm-x86/x86_64/page.h 2008-01-23 11:14:54.000000000 > +0100 > @@ -59,6 +59,8 @@ typedef struct { intpte_t l3; } l3_pgent > typedef struct { intpte_t l4; } l4_pgentry_t; > typedef l4_pgentry_t root_pgentry_t; > > +extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES]; > + > #endif /* !__ASSEMBLY__ */ > > #define pte_read_atomic(ptep) (*(ptep)) > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |