x86emul: catch exceptions occurring in stubs Before adding more use of stubs cloned from decoded guest insns, guard ourselves against mistakes there: Should an exception (with the noteworthy exception of #PF) occur inside the stub, forward it to the guest. Since the exception fixup table entry can't encode the address of the faulting insn itself, attach it to the return address instead. This at once provides a convenient place to hand the exception information back: The return address is being overwritten by it before branching to the recovery code. Take the opportunity and (finally!) add symbol resolution to the respective log messages (the new one is intentionally not being coded that way, as it covers stub addresses only, which don't have symbols associated). Also take the opportunity and make search_one_extable() static again. Suggested-by: Andrew Cooper Signed-off-by: Jan Beulich --- v3: Also recover from #PF and #DB, eliminating the need for the 2nd parameter of search_exception_table(). Move its invocation in do_trap(), as already suggested as option in v1. Use union stub_exception_token also on the producing side. Shrink the union's ec field to 16 bits. Only propagate #UD to guest, crash it for all other unexpected exceptions. Log a message in both cases. --- There's one possible caveat here: A stub invocation immediately followed by another instruction having fault revovery attached to it would not work properly, as the table lookup can only ever find one of the two entries. Such CALL instructions would therefore need to be followed by a NOP for disambiguation (even if only a slim chance exists for the compiler to emit things that way). Note that the two SIMD related stub invocations in the insn emulator intentionally don't get adjusted here, as subsequent patches will replace them anyway. --- a/xen/arch/x86/extable.c +++ b/xen/arch/x86/extable.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,7 @@ void __init sort_exception_tables(void) sort_exception_table(__start___pre_ex_table, __stop___pre_ex_table); } -unsigned long +static unsigned long search_one_extable(const struct exception_table_entry *first, const struct exception_table_entry *last, unsigned long value) @@ -85,15 +86,91 @@ search_one_extable(const struct exceptio } unsigned long -search_exception_table(unsigned long addr) +search_exception_table(const struct cpu_user_regs *regs) { - const struct virtual_region *region = find_text_region(addr); + const struct virtual_region *region = find_text_region(regs->rip); + unsigned long stub = this_cpu(stubs.addr); if ( region && region->ex ) - return search_one_extable(region->ex, region->ex_end - 1, addr); + return search_one_extable(region->ex, region->ex_end - 1, regs->rip); + + if ( regs->rip >= stub + STUB_BUF_SIZE / 2 && + regs->rip < stub + STUB_BUF_SIZE && + regs->rsp > (unsigned long)regs && + regs->rsp < (unsigned long)get_cpu_info() ) + { + unsigned long retptr = *(unsigned long *)regs->rsp; + + region = find_text_region(retptr); + retptr = region && region->ex + ? search_one_extable(region->ex, region->ex_end - 1, retptr) + : 0; + if ( retptr ) + { + /* + * Put trap number and error code on the stack (in place of the + * original return address) for recovery code to pick up. + */ + union stub_exception_token token = { + .fields.ec = regs->error_code, + .fields.trapnr = regs->entry_vector, + }; + + *(unsigned long *)regs->rsp = token.raw; + return retptr; + } + } + + return 0; +} + +#ifndef NDEBUG +static int __init stub_selftest(void) +{ + static const struct { + uint8_t opc[4]; + uint64_t rax; + union stub_exception_token res; + } tests[] __initconst = { + { .opc = { 0x0f, 0xb9, 0xc3, 0xc3 }, /* ud1 */ + .res.fields.trapnr = TRAP_invalid_op }, + { .opc = { 0x90, 0x02, 0x00, 0xc3 }, /* nop; add (%rax),%al */ + .rax = 0x0123456789abcdef, + .res.fields.trapnr = TRAP_gp_fault }, + { .opc = { 0x02, 0x04, 0x04, 0xc3 }, /* add (%rsp,%rax),%al */ + .rax = 0xfedcba9876543210, + .res.fields.trapnr = TRAP_stack_error }, + }; + unsigned long addr = this_cpu(stubs.addr) + STUB_BUF_SIZE / 2; + unsigned int i; + + for ( i = 0; i < ARRAY_SIZE(tests); ++i ) + { + uint8_t *ptr = map_domain_page(_mfn(this_cpu(stubs.mfn))) + + (addr & ~PAGE_MASK); + unsigned long res = ~0; + + memset(ptr, 0xcc, STUB_BUF_SIZE / 2); + memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc)); + unmap_domain_page(ptr); + + asm volatile ( "call *%[stb]\n" + ".Lret%=:\n\t" + ".pushsection .fixup,\"ax\"\n" + ".Lfix%=:\n\t" + "pop %[exn]\n\t" + "jmp .Lret%=\n\t" + ".popsection\n\t" + _ASM_EXTABLE(.Lret%=, .Lfix%=) + : [exn] "+m" (res) + : [stb] "rm" (addr), "a" (tests[i].rax)); + ASSERT(res == tests[i].res.raw); + } return 0; } +__initcall(stub_selftest); +#endif unsigned long search_pre_exception_table(struct cpu_user_regs *regs) --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -799,15 +799,6 @@ void do_trap(struct cpu_user_regs *regs) return; } - if ( likely((fixup = search_exception_table(regs->rip)) != 0) ) - { - dprintk(XENLOG_ERR, "Trap %d: %p -> %p\n", - trapnr, _p(regs->rip), _p(fixup)); - this_cpu(last_extable_addr) = regs->rip; - regs->rip = fixup; - return; - } - if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) && system_state >= SYS_STATE_active && has_hvm_container_vcpu(curr) && curr->arch.hvm_vcpu.fpu_exception_callback ) @@ -817,6 +808,15 @@ void do_trap(struct cpu_user_regs *regs) return; } + if ( likely((fixup = search_exception_table(regs)) != 0) ) + { + dprintk(XENLOG_ERR, "Trap %u: %p [%ps] -> %p\n", + trapnr, _p(regs->rip), _p(regs->rip), _p(fixup)); + this_cpu(last_extable_addr) = regs->rip; + regs->rip = fixup; + return; + } + hardware_trap: if ( debugger_trap_fatal(trapnr, regs) ) return; @@ -1186,7 +1186,7 @@ void do_invalid_op(struct cpu_user_regs } die: - if ( (fixup = search_exception_table(regs->rip)) != 0 ) + if ( (fixup = search_exception_table(regs)) != 0 ) { this_cpu(last_extable_addr) = regs->rip; regs->rip = fixup; @@ -1516,7 +1516,7 @@ void do_page_fault(struct cpu_user_regs if ( pf_type != real_fault ) return; - if ( likely((fixup = search_exception_table(regs->rip)) != 0) ) + if ( likely((fixup = search_exception_table(regs)) != 0) ) { perfc_incr(copy_user_faults); if ( unlikely(regs->error_code & PFEC_reserved_bit) ) @@ -3463,10 +3463,10 @@ void do_general_protection(struct cpu_us gp_in_kernel: - if ( likely((fixup = search_exception_table(regs->rip)) != 0) ) + if ( likely((fixup = search_exception_table(regs)) != 0) ) { - dprintk(XENLOG_INFO, "GPF (%04x): %p -> %p\n", - regs->error_code, _p(regs->rip), _p(fixup)); + dprintk(XENLOG_INFO, "GPF (%04x): %p [%ps] -> %p\n", + regs->error_code, _p(regs->rip), _p(regs->rip), _p(fixup)); this_cpu(last_extable_addr) = regs->rip; regs->rip = fixup; return; @@ -3742,7 +3742,7 @@ void do_debug(struct cpu_user_regs *regs * watchpoint set on it. No need to bump EIP; the only faulting * trap is an instruction breakpoint, which can't happen to us. */ - WARN_ON(!search_exception_table(regs->rip)); + WARN_ON(!search_exception_table(regs)); } goto out; } --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -604,14 +604,42 @@ do{ asm volatile ( #define __emulate_1op_8byte(_op, _dst, _eflags) #endif /* __i386__ */ +#ifdef __XEN__ +# define invoke_stub(pre, post, constraints...) do { \ + union stub_exception_token res_ = { .raw = ~0 }; \ + asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n" \ + ".Lret%=:\n\t" \ + ".pushsection .fixup,\"ax\"\n" \ + ".Lfix%=:\n\t" \ + "pop %[exn]\n\t" \ + "jmp .Lret%=\n\t" \ + ".popsection\n\t" \ + _ASM_EXTABLE(.Lret%=, .Lfix%=) \ + : [exn] "+g" (res_), constraints, \ + [stub] "rm" (stub.func) ); \ + if ( unlikely(~res_.raw) ) \ + { \ + gprintk(XENLOG_WARNING, \ + "exception %u (ec=%04x) in emulation stub (line %u)\n", \ + res_.fields.trapnr, res_.fields.ec, __LINE__); \ + if ( res_.fields.trapnr != EXC_UD ) \ + domain_crash(current->domain); \ + else \ + generate_exception(EXC_UD); \ + } \ +} while (0) +#else +# define invoke_stub(pre, post, constraints...) \ + asm volatile ( pre "\n\tcall *%[stub]\n\t" post \ + : constraints, [stub] "rm" (stub.func) ) +#endif + #define emulate_stub(dst, src...) do { \ unsigned long tmp; \ - asm volatile ( _PRE_EFLAGS("[efl]", "[msk]", "[tmp]") \ - "call *%[stub];" \ - _POST_EFLAGS("[efl]", "[msk]", "[tmp]") \ - : dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs._eflags) \ - : [stub] "r" (stub.func), \ - [msk] "i" (EFLAGS_MASK), ## src ); \ + invoke_stub(_PRE_EFLAGS("[efl]", "[msk]", "[tmp]"), \ + _POST_EFLAGS("[efl]", "[msk]", "[tmp]"), \ + dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs._eflags) \ + : [msk] "i" (EFLAGS_MASK), ## src); \ } while (0) /* Fetch next part of the instruction being emulated. */ @@ -858,8 +886,7 @@ do { unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \ fic.insn_bytes = nr_; \ memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \ - asm volatile ( "call *%[stub]" : "+m" (fic) : \ - [stub] "rm" (stub.func) ); \ + invoke_stub("", "", "=m" (fic) : "m" (fic)); \ put_stub(stub); \ } while (0) @@ -869,14 +896,11 @@ do { unsigned long tmp_; \ fic.insn_bytes = nr_; \ memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \ - asm volatile ( _PRE_EFLAGS("[eflags]", "[mask]", "[tmp]") \ - "call *%[func];" \ - _POST_EFLAGS("[eflags]", "[mask]", "[tmp]") \ - : [eflags] "+g" (_regs._eflags), \ - [tmp] "=&r" (tmp_), "+m" (fic) \ - : [func] "rm" (stub.func), \ - [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF| \ - X86_EFLAGS_CF) ); \ + invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"), \ + _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), \ + [eflags] "+g" (_regs._eflags), [tmp] "=&r" (tmp_), \ + "+m" (fic) \ + : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \ put_stub(stub); \ } while (0) --- a/xen/include/asm-x86/uaccess.h +++ b/xen/include/asm-x86/uaccess.h @@ -275,7 +275,15 @@ extern struct exception_table_entry __st extern struct exception_table_entry __start___pre_ex_table[]; extern struct exception_table_entry __stop___pre_ex_table[]; -extern unsigned long search_exception_table(unsigned long); +union stub_exception_token { + struct { + uint16_t ec; + uint8_t trapnr; + } fields; + unsigned long raw; +}; + +extern unsigned long search_exception_table(const struct cpu_user_regs *regs); extern void sort_exception_tables(void); extern void sort_exception_table(struct exception_table_entry *start, const struct exception_table_entry *stop);