[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86/asm: allow some unlikely taken branches to be statically predicted this way
# HG changeset patch # User Keir Fraser <keir@xxxxxxx> # Date 1292524640 0 # Node ID 8dc27840025c966cb57aaa1d56e61c69d5178189 # Parent 774908fc38221ccc106254c9a046ef3131482c4a x86/asm: allow some unlikely taken branches to be statically predicted this way ... by moving the respective code out of line (into sub-section 1 of the particular section). A few other branches could be eliminated altogether. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> --- xen/arch/x86/x86_32/entry.S | 41 ++++++++++++++++----------------- xen/arch/x86/x86_64/compat/entry.S | 9 ++++--- xen/arch/x86/x86_64/entry.S | 17 ++++++++----- xen/include/asm-x86/asm_defns.h | 14 +++++++++++ xen/include/asm-x86/x86_32/asm_defns.h | 5 +++- xen/include/xen/stringify.h | 12 +++++++++ 6 files changed, 66 insertions(+), 32 deletions(-) diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Thu Dec 16 18:25:33 2010 +0000 +++ b/xen/arch/x86/x86_32/entry.S Thu Dec 16 18:37:20 2010 +0000 @@ -146,7 +146,7 @@ ENTRY(hypercall) ENTRY(hypercall) subl $4,%esp FIXUP_RING0_GUEST_STACK - SAVE_ALL(1f,1f) + SAVE_ALL(,1f) 1: sti GET_CURRENT(%ebx) cmpl $NR_hypercalls,%eax @@ -186,12 +186,14 @@ 1: sti #define SHADOW_BYTES 24 /* 6 shadow parameters */ #endif cmpb $0,tb_init_done - je 1f +UNLIKELY_START(ne, trace) call trace_hypercall /* Now restore all the registers that trace_hypercall clobbered */ movl UREGS_eax+SHADOW_BYTES(%esp),%eax /* Hypercall # */ +UNLIKELY_END(trace) + call *hypercall_table(,%eax,4) + movl %eax,UREGS_eax+SHADOW_BYTES(%esp) # save the return value #undef SHADOW_BYTES -1: call *hypercall_table(,%eax,4) addl $24,%esp # Discard the shadow parameters #ifndef NDEBUG /* Deliberately corrupt real parameter regs used by this hypercall. */ @@ -201,13 +203,10 @@ 1: call *hypercall_table(,%eax,4) jne skip_clobber # If EIP has changed then don't clobber movzb hypercall_args_table(,%ecx,1),%ecx movl %esp,%edi - movl %eax,%esi movl $0xDEADBEEF,%eax rep stosl - movl %esi,%eax skip_clobber: #endif - movl %eax,UREGS_eax(%esp) # save the return value test_all_events: xorl %ecx,%ecx @@ -297,8 +296,8 @@ create_bounce_frame: jz ring1 /* jump if returning to an existing ring-1 activation */ movl VCPU_kernel_sp(%ebx),%esi .Lft6: mov VCPU_kernel_ss(%ebx),%gs - testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp) - jz .Lnvm86_1 + testl $X86_EFLAGS_VM,%ecx +UNLIKELY_START(nz, bounce_vm86_1) subl $16,%esi /* push ES/DS/FS/GS (VM86 stack frame) */ movl UREGS_es+4(%esp),%eax .Lft7: movl %eax,%gs:(%esi) @@ -308,7 +307,7 @@ create_bounce_frame: .Lft9: movl %eax,%gs:8(%esi) movl UREGS_gs+4(%esp),%eax .Lft10: movl %eax,%gs:12(%esi) -.Lnvm86_1: +UNLIKELY_END(bounce_vm86_1) subl $8,%esi /* push SS/ESP (inter-priv iret) */ movl UREGS_esp+4(%esp),%eax .Lft11: movl %eax,%gs:(%esi) @@ -350,17 +349,10 @@ 1: /* Construct a stack frame: EFLA movl TRAPBOUNCE_error_code(%edx),%eax .Lft17: movl %eax,%gs:(%esi) 1: testb $TBF_FAILSAFE,%cl - jz 2f +UNLIKELY_START(nz, bounce_failsafe) subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp) - jz .Lnvm86_2 - xorl %eax,%eax # VM86: we write zero selector values -.Lft18: movl %eax,%gs:(%esi) -.Lft19: movl %eax,%gs:4(%esi) -.Lft20: movl %eax,%gs:8(%esi) -.Lft21: movl %eax,%gs:12(%esi) - jmp 2f -.Lnvm86_2: + jnz .Lvm86_2 movl UREGS_ds+4(%esp),%eax # non-VM86: write real selector values .Lft22: movl %eax,%gs:(%esi) movl UREGS_es+4(%esp),%eax @@ -369,13 +361,22 @@ 1: testb $TBF_FAILSAFE,%cl .Lft24: movl %eax,%gs:8(%esi) movl UREGS_gs+4(%esp),%eax .Lft25: movl %eax,%gs:12(%esi) -2: testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp) - jz .Lnvm86_3 + jmp .Lnvm86_3 +.Lvm86_2: + xorl %eax,%eax # VM86: we write zero selector values +.Lft18: movl %eax,%gs:(%esi) +.Lft19: movl %eax,%gs:4(%esi) +.Lft20: movl %eax,%gs:8(%esi) +.Lft21: movl %eax,%gs:12(%esi) +UNLIKELY_END(bounce_failsafe) + testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp) +UNLIKELY_START(nz, bounce_vm86_3) xorl %eax,%eax /* zero DS-GS, just as a real CPU would */ movl %eax,UREGS_ds+4(%esp) movl %eax,UREGS_es+4(%esp) movl %eax,UREGS_fs+4(%esp) movl %eax,UREGS_gs+4(%esp) +UNLIKELY_END(bounce_vm86_3) .Lnvm86_3: /* Rewrite our stack frame and return to ring 1. */ /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_64/compat/entry.S --- a/xen/arch/x86/x86_64/compat/entry.S Thu Dec 16 18:25:33 2010 +0000 +++ b/xen/arch/x86/x86_64/compat/entry.S Thu Dec 16 18:37:20 2010 +0000 @@ -49,7 +49,7 @@ ENTRY(compat_hypercall) #define SHADOW_BYTES 0 /* No on-stack shadow state */ #endif cmpb $0,tb_init_done(%rip) - je 1f +UNLIKELY_START(ne, compat_trace) call trace_hypercall /* Now restore all the registers that trace_hypercall clobbered */ movl UREGS_rax+SHADOW_BYTES(%rsp),%eax /* Hypercall # */ @@ -60,7 +60,8 @@ ENTRY(compat_hypercall) movl UREGS_rdi+SHADOW_BYTES(%rsp),%r8d /* Arg 5 */ movl UREGS_rbp+SHADOW_BYTES(%rsp),%r9d /* Arg 6 */ #undef SHADOW_BYTES -1: leaq compat_hypercall_table(%rip),%r10 +UNLIKELY_END(compat_trace) + leaq compat_hypercall_table(%rip),%r10 PERFC_INCR(PERFC_hypercalls, %rax, %rbx) callq *(%r10,%rax,8) #ifndef NDEBUG @@ -299,7 +300,7 @@ 2: .Lft8: movl %eax,%fs:(%rsi) # ERROR CODE 1: testb $TBF_FAILSAFE,%cl - jz 2f +UNLIKELY_START(nz, compat_bounce_failsafe) subl $4*4,%esi movl %gs,%eax .Lft9: movl %eax,%fs:3*4(%rsi) # GS @@ -308,7 +309,7 @@ 1: .Lft11: movl %eax,%fs:1*4(%rsi) # ES movl %ds,%eax .Lft12: movl %eax,%fs:0*4(%rsi) # DS -2: +UNLIKELY_END(compat_bounce_failsafe) /* Rewrite our stack frame and return to guest-OS mode. */ /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\ diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Thu Dec 16 18:25:33 2010 +0000 +++ b/xen/arch/x86/x86_64/entry.S Thu Dec 16 18:37:20 2010 +0000 @@ -152,7 +152,7 @@ ENTRY(syscall_enter) #define SHADOW_BYTES 0 /* No on-stack shadow state */ #endif cmpb $0,tb_init_done(%rip) - je 1f +UNLIKELY_START(ne, trace) call trace_hypercall /* Now restore all the registers that trace_hypercall clobbered */ movq UREGS_rax+SHADOW_BYTES(%rsp),%rax /* Hypercall # */ @@ -163,7 +163,8 @@ ENTRY(syscall_enter) movq UREGS_r8 +SHADOW_BYTES(%rsp),%r8 /* Arg 5 */ movq UREGS_r9 +SHADOW_BYTES(%rsp),%r9 /* Arg 6 */ #undef SHADOW_BYTES -1: leaq hypercall_table(%rip),%r10 +UNLIKELY_END(trace) + leaq hypercall_table(%rip),%r10 PERFC_INCR(PERFC_hypercalls, %rax, %rbx) callq *(%r10,%rax,8) #ifndef NDEBUG @@ -345,11 +346,12 @@ 2: andq $~0xf,%rsi 2: andq $~0xf,%rsi # Stack frames are 16-byte aligned. movq $HYPERVISOR_VIRT_START,%rax cmpq %rax,%rsi - jb 1f # In +ve address space? Then okay. movq $HYPERVISOR_VIRT_END+60,%rax + sbb %ecx,%ecx # In +ve address space? Then okay. cmpq %rax,%rsi - jb domain_crash_synchronous # Above Xen private area? Then okay. -1: movb TRAPBOUNCE_flags(%rdx),%cl + adc %ecx,%ecx # Above Xen private area? Then okay. + jg domain_crash_synchronous + movb TRAPBOUNCE_flags(%rdx),%cl subq $40,%rsi movq UREGS_ss+8(%rsp),%rax .Lft2: movq %rax,32(%rsi) # SS @@ -380,7 +382,7 @@ 1: movb TRAPBOUNCE_flags(%rdx),%cl movl TRAPBOUNCE_error_code(%rdx),%eax .Lft7: movq %rax,(%rsi) # ERROR CODE 1: testb $TBF_FAILSAFE,%cl - jz 2f +UNLIKELY_START(nz, bounce_failsafe) subq $32,%rsi movl %gs,%eax .Lft8: movq %rax,24(%rsi) # GS @@ -390,7 +392,8 @@ 1: testb $TBF_FAILSAFE,%cl .Lft10: movq %rax,8(%rsi) # ES movl %ds,%eax .Lft11: movq %rax,(%rsi) # DS -2: subq $16,%rsi +UNLIKELY_END(bounce_failsafe) + subq $16,%rsi movq UREGS_r11+8(%rsp),%rax .Lft12: movq %rax,8(%rsi) # R11 movq UREGS_rcx+8(%rsp),%rax diff -r 774908fc3822 -r 8dc27840025c xen/include/asm-x86/asm_defns.h --- a/xen/include/asm-x86/asm_defns.h Thu Dec 16 18:25:33 2010 +0000 +++ b/xen/include/asm-x86/asm_defns.h Thu Dec 16 18:37:20 2010 +0000 @@ -12,4 +12,18 @@ #include <asm/x86_32/asm_defns.h> #endif +#ifdef __ASSEMBLY__ + +#define UNLIKELY_START(cond, tag) \ + j##cond .Lunlikely.tag; \ + .subsection 1; \ + .Lunlikely.tag: + +#define UNLIKELY_END(tag) \ + jmp .Llikely.tag; \ + .subsection 0; \ + .Llikely.tag: + +#endif + #endif /* __X86_ASM_DEFNS_H__ */ diff -r 774908fc3822 -r 8dc27840025c xen/include/asm-x86/x86_32/asm_defns.h --- a/xen/include/asm-x86/x86_32/asm_defns.h Thu Dec 16 18:25:33 2010 +0000 +++ b/xen/include/asm-x86/x86_32/asm_defns.h Thu Dec 16 18:37:20 2010 +0000 @@ -1,6 +1,7 @@ #ifndef __X86_32_ASM_DEFNS_H__ #define __X86_32_ASM_DEFNS_H__ +#include <xen/stringify.h> #include <asm/percpu.h> #ifdef CONFIG_FRAME_POINTER @@ -53,12 +54,14 @@ 1: addl $4,%esp; mov %es,%esi; \ mov $(__HYPERVISOR_DS),%ecx; \ jnz 86f; \ - .text 1; \ + .subsection 1; \ 86: call setup_vm86_frame; \ jmp vm86_lbl; \ .previous; \ + .ifnes __stringify(xen_lbl), ""; \ testb $3,UREGS_cs(%esp); \ jz xen_lbl; \ + .endif; \ /* \ * We are the outermost Xen context, but our \ * life is complicated by NMIs and MCEs. These \ diff -r 774908fc3822 -r 8dc27840025c xen/include/xen/stringify.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/xen/stringify.h Thu Dec 16 18:37:20 2010 +0000 @@ -0,0 +1,12 @@ +#ifndef __XEN_STRINGIFY_H +#define __XEN_STRINGIFY_H + +/* Indirect stringification. Doing two levels allows the parameter to be a + * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) + * converts to "bar". + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#endif /* !__XEN_STRINGIFY_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |