[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1/4] x86: move syscall trampolines off the stack
On 18/05/15 13:46, Jan Beulich wrote: > --- a/xen/arch/x86/smpboot.c > +++ b/xen/arch/x86/smpboot.c > @@ -25,6 +25,7 @@ > #include <xen/kernel.h> > #include <xen/mm.h> > #include <xen/domain.h> > +#include <xen/domain_page.h> > #include <xen/sched.h> > #include <xen/sched-if.h> > #include <xen/irq.h> > @@ -603,6 +604,42 @@ static int do_boot_cpu(int apicid, int c > return rc; > } > > +unsigned long alloc_stub_page(unsigned int cpu, unsigned long *mfn) > +{ > + unsigned long stub_va; > + struct page_info *pg; > + > + if ( *mfn ) > + pg = mfn_to_page(*mfn); > + else > + { > + nodeid_t node = cpu_to_node(cpu); > + unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0; > + > + pg = alloc_domheap_page(NULL, memflags); > + if ( !pg ) > + return 0; > + > + unmap_domain_page(memset(__map_domain_page(pg), 0xcc, PAGE_SIZE)); Seems like memset_page(pg, int val) might be a nice piece of cleanup. > + } > + > + stub_va = XEN_VIRT_END - (cpu + 1) * PAGE_SIZE; > + if ( map_pages_to_xen(stub_va, page_to_mfn(pg), 1, > + PAGE_HYPERVISOR_RX | MAP_SMALL_PAGES) ) > + { > + if ( !*mfn ) > + free_domheap_page(pg); > + stub_va = 0; > + } > + else > + { > + stub_va += (cpu & ~(STUBS_PER_PAGE - 1)) * STUB_BUF_SIZE; "(cpu & ~(STUBS_PER_PAGE - 1)) * STUB_BUF_SIZE" comes up very frequently through this patch. I think the logic would be easier to read given: #define STUB_OFFSET(cpu) (((cpu) & ~(STUBS_PER_PAGE - 1)) * STUB_BUF_SIZE) > + *mfn = page_to_mfn(pg); > + } > + > + return stub_va; > +} > + > void cpu_exit_clear(unsigned int cpu) > { > cpu_uninit(cpu); > @@ -616,6 +653,24 @@ static void cpu_smpboot_free(unsigned in > free_cpumask_var(per_cpu(cpu_sibling_mask, cpu)); > free_cpumask_var(per_cpu(cpu_core_mask, cpu)); > > + if ( per_cpu(stubs.addr, cpu) ) > + { > + unsigned long mfn = per_cpu(stubs.mfn, cpu); > + unsigned char *stub_page = map_domain_page(mfn); > + unsigned int i; > + > + memset(stub_page + (cpu & (STUBS_PER_PAGE - 1)) * STUB_BUF_SIZE, > + 0xcc, STUB_BUF_SIZE); > + for ( i = 0; i < STUBS_PER_PAGE; ++i ) > + if ( stub_page[i * STUB_BUF_SIZE] != 0xcc) > + break; There is a race condition between allocate and free, as write_stub_trampoline() is written by the cpu itself. Just finding 0xcc here doesn't mean there isn't a different cpu in the process of coming up and intending to use the stub page. I suspect the race can be fixed by having the alloc side clobber the first instruction, perhaps to ud2 ? (Also, style at the end of the if) > + unmap_domain_page(stub_page); > + destroy_xen_mappings(per_cpu(stubs.addr, cpu) & PAGE_MASK, > + (per_cpu(stubs.addr, cpu) | ~PAGE_MASK) + 1); > + if ( i == STUBS_PER_PAGE ) > + free_domheap_page(mfn_to_page(mfn)); > + } > + > order = get_order_from_pages(NR_RESERVED_GDT_PAGES); > free_xenheap_pages(per_cpu(gdt_table, cpu), order); > > --- a/xen/arch/x86/x86_64/compat/entry.S > +++ b/xen/arch/x86/x86_64/compat/entry.S > @@ -219,7 +219,19 @@ ENTRY(compat_post_handle_exception) > movb $0,TRAPBOUNCE_flags(%rdx) > jmp compat_test_all_events > I would be tempted to leave a comment here referring to 'lstar_enter' for stack/register details. > -ENTRY(compat_syscall) > +ENTRY(cstar_enter) > + sti > + movq 8(%rsp),%rax /* Restore %rax. */ > + movq $FLAT_KERNEL_SS,8(%rsp) > + pushq %r11 > + pushq $FLAT_USER_CS32 > + pushq %rcx > + pushq $0 > + SAVE_VOLATILE TRAP_syscall > + GET_CURRENT(%rbx) > + movq VCPU_domain(%rbx),%rcx > + cmpb $0,DOMAIN_is_32bit_pv(%rcx) > + je switch_to_kernel > cmpb $0,VCPU_syscall32_disables_events(%rbx) > movzwl VCPU_syscall32_sel(%rbx),%esi > movq VCPU_syscall32_addr(%rbx),%rax > --- a/xen/arch/x86/x86_64/entry.S > +++ b/xen/arch/x86/x86_64/entry.S > @@ -13,9 +13,8 @@ > #include <public/xen.h> > #include <irq_vectors.h> > > - ALIGN > /* %rbx: struct vcpu */ > -switch_to_kernel: > +ENTRY(switch_to_kernel) > leaq VCPU_trap_bounce(%rbx),%rdx > /* TB_eip = (32-bit syscall && syscall32_addr) ? > * syscall32_addr : syscall_addr */ > @@ -114,22 +113,21 @@ restore_all_xen: > * Vector directly to the registered arch.syscall_addr. > * > * Initial work is done by per-CPU stack trampolines. At this point %rsp The trampolines are no longer on the stack, so I think this line needs editing as well. > - * has been initialised to point at the correct Xen stack, and %rsp, %rflags > - * and %cs have been saved. All other registers are still to be saved onto > - * the stack, starting with %rip, and an appropriate %ss must be saved into > - * the space left by the trampoline. > + * has been initialised to point at the correct Xen stack, %rsp has been > + * saved, and %rax needs to be restored from the %ss save slot. All other > + * registers are still to be saved onto the stack, starting with RFLAGS, and > + * an appropriate %ss must be saved into the space left by the trampoline. > */ > -ENTRY(syscall_enter) > +ENTRY(lstar_enter) > sti > - movl $FLAT_KERNEL_SS,24(%rsp) > + movq 8(%rsp),%rax /* Restore %rax. */ > + movq $FLAT_KERNEL_SS,8(%rsp) > + pushq %r11 > + pushq $FLAT_KERNEL_CS64 > pushq %rcx > pushq $0 > - movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before saving */ > SAVE_VOLATILE TRAP_syscall > GET_CURRENT(%rbx) > - movq VCPU_domain(%rbx),%rcx > - testb $1,DOMAIN_is_32bit_pv(%rcx) > - jnz compat_syscall > testb $TF_kernel_mode,VCPU_thread_flags(%rbx) > jz switch_to_kernel > > --- a/xen/arch/x86/x86_64/traps.c > +++ b/xen/arch/x86/x86_64/traps.c > > +DEFINE_PER_CPU(struct stubs, stubs); > +void lstar_enter(void); > +void cstar_enter(void); > + > void __devinit subarch_percpu_traps_init(void) > { > - char *stack_bottom, *stack; > - > - stack_bottom = (char *)get_stack_bottom(); > - stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1)); > + unsigned long stack_bottom = get_stack_bottom(); > + unsigned long stub_va = this_cpu(stubs.addr); > + unsigned char *stub_page; > + unsigned int offset; > > /* IST_MAX IST pages + 1 syscall page + 1 guard page + primary stack. */ > BUILD_BUG_ON((IST_MAX + 2) * PAGE_SIZE + PRIMARY_STACK_SIZE > > STACK_SIZE); > > - /* Trampoline for SYSCALL entry from long mode. */ > - stack = &stack[IST_MAX * PAGE_SIZE]; /* Skip the IST stacks. */ > - wrmsrl(MSR_LSTAR, (unsigned long)stack); > - stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64); > + stub_page = map_domain_page(this_cpu(stubs.mfn)); > + > + /* Trampoline for SYSCALL entry from 64-bit mode. */ > + wrmsrl(MSR_LSTAR, stub_va); > + offset = write_stub_trampoline(stub_page + (stub_va & (PAGE_SIZE - 1)), I would personally opt for "stub_va & ~PAGE_MASK" > + stub_va, stack_bottom, > + (unsigned long)lstar_enter); > + stub_va += offset; > > if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || > boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR ) > { > /* SYSENTER entry. */ > - wrmsrl(MSR_IA32_SYSENTER_ESP, (unsigned long)stack_bottom); > + wrmsrl(MSR_IA32_SYSENTER_ESP, stack_bottom); > wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry); > wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0); > } > > /* Trampoline for SYSCALL entry from compatibility mode. */ > - stack = (char *)L1_CACHE_ALIGN((unsigned long)stack); > - wrmsrl(MSR_CSTAR, (unsigned long)stack); > - stack += write_stack_trampoline(stack, stack_bottom, FLAT_USER_CS32); > + wrmsrl(MSR_CSTAR, stub_va); > + offset += write_stub_trampoline(stub_page + (stub_va & (PAGE_SIZE - 1)), > + stub_va, stack_bottom, > + (unsigned long)cstar_enter); > + > + /* Don't consume more than half of the stub space here. */ > + ASSERT(offset <= STUB_BUF_SIZE / 2); > + > + unmap_domain_page(stub_page); > > /* Common SYSCALL parameters. */ > wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS); As for the rest of the patch, I have checked the asm changes carefully and can't see any issues. ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |