[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] stack overflow during pv-guest restore
When secondary cpus are initialized during an i386 pv-guest restore (due to save/restore or live migration), and the guest has a load that generates a fair number of interrupts (e.g., parallel kernel make), a stack overflow can occur because cpu_initialize_context() has a 2800 byte structure it declares on its stack. linux-i386 has 4K stacks, by default. Using 2800 bytes out of 4K by a single function in a call list isn't nice; add the beginning of interrupt handling at just the right point, before the switch to the interrupt stack is made... and... the scale tips... Simple fix: malloc & free structure as needed. Would fail save/restore testing of an i386-guest running a parallel, kernel make after 50->100 save/restores; with the fix, haven't seen it fail after 650 save/restores. Note: this is a basic port of this function in Jeremy Fitzharinge's Xen implementation of pv-ops in upstream Linux, part of 15/32 patch, Xen SMP guest support. Original patch done on rhel5; did a simple diff & merge to xen-unstable's version of smpboot.c to generate the attached patch, so it cleanly applies; but haven't built/run/tested the xen-unstable version. Signed-off-by: Donald Dutile <ddutile@xxxxxxxxxx> --- linux-2.6.18-xen.hg/drivers/xen/core/smpboot.c.orig 2008-01-31 13:45:10.000000000 -0500 +++ linux-2.6.18-xen.hg/drivers/xen/core/smpboot.c 2008-01-31 13:56:07.000000000 -0500 @@ -180,9 +180,9 @@ cpu_idle(); } -static void __cpuinit cpu_initialize_context(unsigned int cpu) +static int cpu_initialize_context(unsigned int cpu) { - vcpu_guest_context_t ctxt; + vcpu_guest_context_t *ctxt; struct task_struct *idle = idle_task(cpu); #ifdef __x86_64__ struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; @@ -191,58 +191,65 @@ #endif if (cpu_test_and_set(cpu, cpu_initialized_map)) - return; + return 0; - memset(&ctxt, 0, sizeof(ctxt)); + ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); + if (ctxt == NULL) + return -ENOMEM; - ctxt.flags = VGCF_IN_KERNEL; - ctxt.user_regs.ds = __USER_DS; - ctxt.user_regs.es = __USER_DS; - ctxt.user_regs.fs = 0; - ctxt.user_regs.gs = 0; - ctxt.user_regs.ss = __KERNEL_DS; - ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle; - ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */ + memset(ctxt, 0, sizeof(*ctxt)); - memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + ctxt->flags = VGCF_IN_KERNEL; + ctxt->user_regs.ds = __USER_DS; + ctxt->user_regs.es = __USER_DS; + ctxt->user_regs.fs = 0; + ctxt->user_regs.gs = 0; + ctxt->user_regs.ss = __KERNEL_DS; + ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */ - smp_trap_init(ctxt.trap_ctxt); + memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - ctxt.ldt_ents = 0; + smp_trap_init(ctxt->trap_ctxt); - ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address); - ctxt.gdt_ents = gdt_descr->size / 8; + ctxt->ldt_ents = 0; + + ctxt->gdt_frames[0] = virt_to_mfn(gdt_descr->address); + ctxt->gdt_ents = gdt_descr->size / 8; #ifdef __i386__ - ctxt.user_regs.cs = __KERNEL_CS; - ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); + ctxt->user_regs.cs = __KERNEL_CS; + ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); - ctxt.kernel_ss = __KERNEL_DS; - ctxt.kernel_sp = idle->thread.esp0; + ctxt->kernel_ss = __KERNEL_DS; + ctxt->kernel_sp = idle->thread.esp0; - ctxt.event_callback_cs = __KERNEL_CS; - ctxt.event_callback_eip = (unsigned long)hypervisor_callback; - ctxt.failsafe_callback_cs = __KERNEL_CS; - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + ctxt->event_callback_cs = __KERNEL_CS; + ctxt->event_callback_eip = (unsigned long)hypervisor_callback; + ctxt->failsafe_callback_cs = __KERNEL_CS; + ctxt->failsafe_callback_eip = (unsigned long)failsafe_callback; - ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); + ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); #else /* __x86_64__ */ - ctxt.user_regs.cs = __KERNEL_CS; - ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); + ctxt->user_regs.cs = __KERNEL_CS; + ctxt->user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); - ctxt.kernel_ss = __KERNEL_DS; - ctxt.kernel_sp = idle->thread.rsp0; + ctxt->kernel_ss = __KERNEL_DS; + ctxt->kernel_sp = idle->thread.rsp0; - ctxt.event_callback_eip = (unsigned long)hypervisor_callback; - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; - ctxt.syscall_callback_eip = (unsigned long)system_call; + ctxt->event_callback_eip = (unsigned long)hypervisor_callback; + ctxt->failsafe_callback_eip = (unsigned long)failsafe_callback; + ctxt->syscall_callback_eip = (unsigned long)system_call; - ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); + ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); - ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); + ctxt->gs_base_kernel = (unsigned long)(cpu_pda(cpu)); #endif - BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); + BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)); + + kfree(ctxt); + return 0; } void __init smp_prepare_cpus(unsigned int max_cpus) @@ -400,7 +407,9 @@ if (rc) return rc; - cpu_initialize_context(cpu); + rc = cpu_initialize_context(cpu); + if (rc) + return rc; if (num_online_cpus() == 1) alternatives_smp_switch(1); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |