[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86/traps: Drop exception_table[] and use if/else dispatching
commit 327db3837a223dd0772348192126302852de5762 Author: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> AuthorDate: Thu Oct 7 14:04:03 2021 +0100 Commit: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> CommitDate: Fri Dec 17 17:03:54 2021 +0000 x86/traps: Drop exception_table[] and use if/else dispatching There is also a lot of redundancy in the table. 8 vectors head to do_trap(), 3 are handled in the IST logic, and that only leaves 7 others not heading to the do_reserved_trap() catch-all. This also removes the fragility that any accidental NULL entry in the table becomes a ticking timebomb. Function pointers are expensive under retpoline, and different vectors have wildly different frequences. Drop the indirect call, and use an if/else chain instead, which is a code layout technique used by profile-guided optimsiation. Using Xen's own perfcounter infrastructure, we see the following frequences of vectors measured from boot until I can SSH into dom0 and collect the stats: vec | CFL-R | Milan | Notes ----+---------+---------+ NMI | 345 | 3768 | Watchdog. Milan has many more CPUs. ----+---------+---------+ #PF | 1233234 | 2006441 | #GP | 90054 | 96193 | #UD | 848 | 851 | #NM | 0 | 132 | Per-vendor lazy vs eager FPU policy. #DB | 67 | 67 | No clue, but it's something in userspace. Bloat-o-meter (after some manual insertion of ELF metadata) reports: add/remove: 0/1 grow/shrink: 2/0 up/down: 102/-256 (-154) Function old new delta handle_exception_saved 148 226 +78 handle_ist_exception 453 477 +24 exception_table 256 - -256 showing that the if/else chains are less than half the size that exception_table[] was in the first place. As part of this change, make two other minor changes. do_reserved_trap() is renamed to do_unhandled_trap() because it is the catchall, and already covers things that aren't reserved any more (#VE/#VC/#HV/#SX). Furthermore, don't forward #TS to guests. #TS is specifically for errors relating to the Task State Segment, which is a Xen-owned structure, not a guest-owned structure. Even in the 32bit days, we never let guests register their own Task State Segments. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Acked-by: Jan Beulich <jbeulich@xxxxxxxx> --- xen/arch/x86/include/asm/processor.h | 3 -- xen/arch/x86/traps.c | 34 ++---------------- xen/arch/x86/x86_64/entry.S | 67 ++++++++++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 42 deletions(-) diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h index 400b4fac5e..e2e1eaf5bd 100644 --- a/xen/arch/x86/include/asm/processor.h +++ b/xen/arch/x86/include/asm/processor.h @@ -505,9 +505,6 @@ extern void mtrr_bp_init(void); void mcheck_init(struct cpuinfo_x86 *c, bool_t bsp); -/* Dispatch table for exceptions */ -extern void (* const exception_table[TRAP_nr])(struct cpu_user_regs *regs); - #define DECLARE_TRAP_HANDLER(_name) \ void _name(void); \ void do_ ## _name(struct cpu_user_regs *regs) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index df7ffc448b..581d8be2aa 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -135,36 +135,6 @@ const unsigned int nmi_cpu; #define stack_words_per_line 4 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp) -static void do_trap(struct cpu_user_regs *regs); -static void do_reserved_trap(struct cpu_user_regs *regs); - -void (* const exception_table[TRAP_nr])(struct cpu_user_regs *regs) = { - [TRAP_divide_error] = do_trap, - [TRAP_debug] = do_debug, - [TRAP_nmi] = (void *)do_nmi, - [TRAP_int3] = do_int3, - [TRAP_overflow] = do_trap, - [TRAP_bounds] = do_trap, - [TRAP_invalid_op] = do_invalid_op, - [TRAP_no_device] = do_device_not_available, - [TRAP_double_fault] = do_reserved_trap, - [TRAP_copro_seg] = do_reserved_trap, - [TRAP_invalid_tss] = do_trap, - [TRAP_no_segment] = do_trap, - [TRAP_stack_error] = do_trap, - [TRAP_gp_fault] = do_general_protection, - [TRAP_page_fault] = do_page_fault, - [TRAP_spurious_int] = do_reserved_trap, - [TRAP_copro_error] = do_trap, - [TRAP_alignment_check] = do_trap, - [TRAP_machine_check] = (void *)do_machine_check, - [TRAP_simd_error] = do_trap, - [TRAP_virtualisation] = do_reserved_trap, - [X86_EXC_CP] = do_entry_CP, - [X86_EXC_CP + 1 ... - (ARRAY_SIZE(exception_table) - 1)] = do_reserved_trap, -}; - void show_code(const struct cpu_user_regs *regs) { unsigned char insns_before[8] = {}, insns_after[16] = {}; @@ -889,7 +859,7 @@ void fatal_trap(const struct cpu_user_regs *regs, bool show_remote) (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT"); } -static void do_reserved_trap(struct cpu_user_regs *regs) +void do_unhandled_trap(struct cpu_user_regs *regs) { unsigned int trapnr = regs->entry_vector; @@ -981,7 +951,7 @@ static bool extable_fixup(struct cpu_user_regs *regs, bool print) return true; } -static void do_trap(struct cpu_user_regs *regs) +void do_trap(struct cpu_user_regs *regs) { unsigned int trapnr = regs->entry_vector; diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index 3caa565476..3eaf0e67b2 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -773,14 +773,48 @@ handle_exception_saved: sti 1: movq %rsp,%rdi movzbl UREGS_entry_vector(%rsp),%eax - leaq exception_table(%rip),%rdx #ifdef CONFIG_PERF_COUNTERS lea per_cpu__perfcounters(%rip), %rcx add STACK_CPUINFO_FIELD(per_cpu_offset)(%r14), %rcx incl ASM_PERFC_exceptions * 4(%rcx, %rax, 4) #endif - mov (%rdx, %rax, 8), %rdx - INDIRECT_CALL %rdx + + /* + * Dispatch to appropriate C handlers. + * + * The logic is implemented as an if/else chain. DISPATCH() calls + * need be in frequency order for best performance. + */ +#define DISPATCH(vec, handler) \ + cmp $vec, %al; \ + jne .L_ ## vec ## _done; \ + call handler; \ + jmp .L_exn_dispatch_done; \ +.L_ ## vec ## _done: + + DISPATCH(X86_EXC_PF, do_page_fault) + DISPATCH(X86_EXC_GP, do_general_protection) + DISPATCH(X86_EXC_UD, do_invalid_op) + DISPATCH(X86_EXC_NM, do_device_not_available) + DISPATCH(X86_EXC_BP, do_int3) + + /* Logically "if ( (1 << vec) & MASK ) { do_trap(); }" */ + mov $(1 << X86_EXC_DE) | (1 << X86_EXC_OF) | (1 << X86_EXC_BR) |\ + (1 << X86_EXC_NP) | (1 << X86_EXC_SS) | (1 << X86_EXC_MF) |\ + (1 << X86_EXC_AC) | (1 << X86_EXC_XM), %edx + bt %eax, %edx + jnc .L_do_trap_done + call do_trap + jmp .L_exn_dispatch_done +.L_do_trap_done: + + DISPATCH(X86_EXC_CP, do_entry_CP) +#undef DISPATCH + + call do_unhandled_trap + BUG /* do_unhandled_trap() shouldn't return. */ + +.L_exn_dispatch_done: mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) mov %r13b, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) #ifdef CONFIG_PV @@ -1012,9 +1046,28 @@ handle_ist_exception: incl ASM_PERFC_exceptions * 4(%rcx, %rax, 4) #endif - leaq exception_table(%rip),%rdx - mov (%rdx, %rax, 8), %rdx - INDIRECT_CALL %rdx + /* + * Dispatch to appropriate C handlers. + * + * The logic is implemented as an if/else chain. DISPATCH() calls + * need be in frequency order for best performance. + */ +#define DISPATCH(vec, handler) \ + cmp $vec, %al; \ + jne .L_ ## vec ## _done; \ + call handler; \ + jmp .L_ist_dispatch_done; \ +.L_ ## vec ## _done: + + DISPATCH(X86_EXC_NMI, do_nmi) + DISPATCH(X86_EXC_DB, do_debug) + DISPATCH(X86_EXC_MC, do_machine_check) +#undef DISPATCH + + call do_unhandled_trap + BUG /* do_unhandled_trap() shouldn't return. */ + +.L_ist_dispatch_done: mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) @@ -1088,7 +1141,7 @@ autogen_stubs: /* Automatically generated stubs. */ entrypoint 1b - /* Reserved exceptions, heading towards do_reserved_trap(). */ + /* Reserved exceptions, heading towards do_unhandled_trap(). */ .elseif vec == X86_EXC_CSO || vec == X86_EXC_SPV || \ vec == X86_EXC_VE || (vec > X86_EXC_CP && vec < TRAP_nr) -- generated by git-patchbot for /home/xen/git/xen.git#master
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |