[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: add SSE-based copy_page()
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1226491455 0 # Node ID 8de4b4e9a435cea9b8e85863fcb832c213281076 # Parent 8e18dd41c6c7bb0980b29393b275c564cfb96437 x86: add SSE-based copy_page() In top of the highmem asstance hypercalls added earlier, this provides a performance improvement of another 12% (measured on Xeon E5345) for the page copying case. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> --- xen/arch/x86/Makefile | 1 xen/arch/x86/copy_page.S | 66 ++++++++++++++++++++++++++++++++++++++++++++ xen/arch/x86/domain.c | 3 +- xen/arch/x86/domain_build.c | 5 ++- xen/include/asm-x86/page.h | 5 ++- 5 files changed, 76 insertions(+), 4 deletions(-) diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Wed Nov 12 12:01:35 2008 +0000 +++ b/xen/arch/x86/Makefile Wed Nov 12 12:04:15 2008 +0000 @@ -11,6 +11,7 @@ obj-y += apic.o obj-y += apic.o obj-y += bitops.o obj-y += clear_page.o +obj-y += copy_page.o obj-y += compat.o obj-y += delay.o obj-y += dmi_scan.o diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/copy_page.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/copy_page.S Wed Nov 12 12:04:15 2008 +0000 @@ -0,0 +1,66 @@ +#include <xen/config.h> +#include <asm/page.h> + +#ifdef __i386__ +#define src_reg %esi +#define dst_reg %edi +#define WORD_SIZE 4 +#define tmp1_reg %eax +#define tmp2_reg %edx +#define tmp3_reg %ebx +#define tmp4_reg %ebp +#else +#define src_reg %rsi +#define dst_reg %rdi +#define WORD_SIZE 8 +#define tmp1_reg %r8 +#define tmp2_reg %r9 +#define tmp3_reg %r10 +#define tmp4_reg %r11 +#endif + +ENTRY(copy_page_sse2) +#ifdef __i386__ + push %ebx + push %ebp + push %esi + push %edi + mov 6*4(%esp), src_reg + mov 5*4(%esp), dst_reg +#endif + mov $PAGE_SIZE/(4*WORD_SIZE)-3, %ecx + + prefetchnta 2*4*WORD_SIZE(src_reg) + mov (src_reg), tmp1_reg + mov WORD_SIZE(src_reg), tmp2_reg + mov 2*WORD_SIZE(src_reg), tmp3_reg + mov 3*WORD_SIZE(src_reg), tmp4_reg + +0: prefetchnta 3*4*WORD_SIZE(src_reg) +1: add $4*WORD_SIZE, src_reg + movnti tmp1_reg, (dst_reg) + mov (src_reg), tmp1_reg + dec %ecx + movnti tmp2_reg, WORD_SIZE(dst_reg) + mov WORD_SIZE(src_reg), tmp2_reg + movnti tmp3_reg, 2*WORD_SIZE(dst_reg) + mov 2*WORD_SIZE(src_reg), tmp3_reg + movnti tmp4_reg, 3*WORD_SIZE(dst_reg) + lea 4*WORD_SIZE(dst_reg), dst_reg + mov 3*WORD_SIZE(src_reg), tmp4_reg + jg 0b + jpe 1b + + movnti tmp1_reg, (dst_reg) + movnti tmp2_reg, WORD_SIZE(dst_reg) + movnti tmp3_reg, 2*WORD_SIZE(dst_reg) + movnti tmp4_reg, 3*WORD_SIZE(dst_reg) + +#ifdef __i386__ + pop %edi + pop %esi + pop %ebp + pop %ebx +#endif + sfence + ret diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Nov 12 12:01:35 2008 +0000 +++ b/xen/arch/x86/domain.c Wed Nov 12 12:04:15 2008 +0000 @@ -184,7 +184,8 @@ static int setup_compat_l4(struct vcpu * /* This page needs to look like a pagetable so that it can be shadowed */ pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; - l4tab = copy_page(page_to_virt(pg), idle_pg_table); + l4tab = page_to_virt(pg); + copy_page(l4tab, idle_pg_table); l4tab[0] = l4e_empty(); l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_page(pg, __PAGE_HYPERVISOR); diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Wed Nov 12 12:01:35 2008 +0000 +++ b/xen/arch/x86/domain_build.c Wed Nov 12 12:04:15 2008 +0000 @@ -455,8 +455,9 @@ int __init construct_dom0( /* WARNING: The new domain must have its 'processor' field filled in! */ l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE; - memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE); - for (i = 0; i < 4; i++) { + for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) { + copy_page(l2tab + i * L2_PAGETABLE_ENTRIES, + idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES); l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT); l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] = l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR); diff -r 8e18dd41c6c7 -r 8de4b4e9a435 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Wed Nov 12 12:01:35 2008 +0000 +++ b/xen/include/asm-x86/page.h Wed Nov 12 12:04:15 2008 +0000 @@ -215,7 +215,10 @@ void clear_page_sse2(void *); #define clear_page(_p) (cpu_has_xmm2 ? \ clear_page_sse2((void *)(_p)) : \ (void)memset((void *)(_p), 0, PAGE_SIZE)) -#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE) +void copy_page_sse2(void *, const void *); +#define copy_page(_t,_f) (cpu_has_xmm2 ? \ + copy_page_sse2(_t, _f) : \ + (void)memcpy(_t, _f, PAGE_SIZE)) #define mfn_valid(mfn) ((mfn) < max_page) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |