[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Clean up and fix VCPU hotplug and SMP save/restore.
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID 5b1a10f9da4c3663d09f9d000a612b85f1bb24c8 # Parent 4109c4e7804abeabe3b222673f2ba4dd1375be53 Clean up and fix VCPU hotplug and SMP save/restore. 1. No longer hold xenbus_lock while taking down VCPUs in SMP suspend path. This allows block device hotplug to continue working and so we will not deadlock on paging in userspace hotplug code. 2. Track xenbus and local-admin permitted cpumasks for VCPUs to bring online. So, if a local admin takes a CPU down, that won't surprisingly get overridden next time the kernel interrogates xenstore. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> diff -r 4109c4e7804a -r 5b1a10f9da4c linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue Mar 28 13:19:22 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue Mar 28 17:43:30 2006 @@ -15,6 +15,7 @@ #include <xen/xenbus.h> #include <linux/cpu.h> #include <linux/kthread.h> +#include <xen/gnttab.h> #include <xen/xencons.h> #if defined(__i386__) || defined(__x86_64__) @@ -76,30 +77,23 @@ static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); -#ifndef CONFIG_HOTPLUG_CPU -#define cpu_down(x) (-EOPNOTSUPP) -#define cpu_up(x) (-EOPNOTSUPP) +#ifdef CONFIG_SMP +int smp_suspend(void); +void smp_resume(void); +#else +#define smp_suspend() (0) +#define smp_resume() ((void)0) #endif - static int __do_suspend(void *ignore) { - int i, j, k, fpp; + int i, j, k, fpp, err; extern unsigned long max_pfn; extern unsigned long *pfn_to_mfn_frame_list_list; extern unsigned long *pfn_to_mfn_frame_list[]; - extern int gnttab_suspend(void); - extern int gnttab_resume(void); extern void time_resume(void); - -#ifdef CONFIG_SMP - cpumask_t prev_online_cpus; - int vcpu_prepare(int vcpu); -#endif - - int err = 0; BUG_ON(smp_processor_id() != 0); BUG_ON(in_interrupt()); @@ -110,39 +104,11 @@ return -EOPNOTSUPP; } -#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) - if (num_online_cpus() > 1) { - printk(KERN_WARNING "Can't suspend SMP guests " - "without CONFIG_HOTPLUG_CPU\n"); - return -EOPNOTSUPP; - } -#endif + err = smp_suspend(); + if (err) + return err; xenbus_suspend(); - - lock_cpu_hotplug(); -#ifdef CONFIG_SMP - /* - * Take all other CPUs offline. We hold the hotplug mutex to - * avoid other processes bringing up CPUs under our feet. - */ - cpus_clear(prev_online_cpus); - while (num_online_cpus() > 1) { - for_each_online_cpu(i) { - if (i == 0) - continue; - unlock_cpu_hotplug(); - err = cpu_down(i); - lock_cpu_hotplug(); - if (err != 0) { - printk(KERN_CRIT "Failed to take all CPUs " - "down: %d.\n", err); - goto out_reenable_cpus; - } - cpu_set(i, prev_online_cpus); - } - } -#endif preempt_disable(); @@ -153,7 +119,6 @@ __cli(); preempt_enable(); - unlock_cpu_hotplug(); gnttab_suspend(); @@ -203,30 +168,9 @@ xencons_resume(); -#ifdef CONFIG_SMP - for_each_cpu(i) - vcpu_prepare(i); - -#endif - - /* - * Only resume xenbus /after/ we've prepared our VCPUs; otherwise - * the VCPU hotplug callback can race with our vcpu_prepare - */ xenbus_resume(); -#ifdef CONFIG_SMP - out_reenable_cpus: - for_each_cpu_mask(i, prev_online_cpus) { - j = cpu_up(i); - if ((j != 0) && !cpu_online(i)) { - printk(KERN_CRIT "Failed to bring cpu " - "%d back up (%d).\n", - i, j); - err = j; - } - } -#endif + smp_resume(); return err; } @@ -334,7 +278,6 @@ kfree(str); } -#ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { @@ -360,45 +303,35 @@ if (err == -EAGAIN) goto again; - if (sysrq_key != '\0') { +#ifdef CONFIG_MAGIC_SYSRQ + if (sysrq_key != '\0') handle_sysrq(sysrq_key, NULL, NULL); - } -} #endif +} static struct xenbus_watch shutdown_watch = { .node = "control/shutdown", .callback = shutdown_handler }; -#ifdef CONFIG_MAGIC_SYSRQ static struct xenbus_watch sysrq_watch = { .node ="control/sysrq", .callback = sysrq_handler }; -#endif static int setup_shutdown_watcher(struct notifier_block *notifier, unsigned long event, void *data) { - int err1 = 0; -#ifdef CONFIG_MAGIC_SYSRQ - int err2 = 0; -#endif - - err1 = register_xenbus_watch(&shutdown_watch); -#ifdef CONFIG_MAGIC_SYSRQ - err2 = register_xenbus_watch(&sysrq_watch); -#endif - - if (err1) + int err; + + err = register_xenbus_watch(&shutdown_watch); + if (err) printk(KERN_ERR "Failed to set shutdown watcher\n"); -#ifdef CONFIG_MAGIC_SYSRQ - if (err2) + err = register_xenbus_watch(&sysrq_watch); + if (err) printk(KERN_ERR "Failed to set sysrq watcher\n"); -#endif return NOTIFY_DONE; } diff -r 4109c4e7804a -r 5b1a10f9da4c linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue Mar 28 13:19:22 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue Mar 28 17:43:30 2006 @@ -79,6 +79,15 @@ unsigned int maxcpus = NR_CPUS; #endif +/* + * Set of CPUs that remote admin software will allow us to bring online. + * Notified to us via xenbus. + */ +static cpumask_t xenbus_allowed_cpumask; + +/* Set of CPUs that local admin will allow us to bring online. */ +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; + void __init prefill_possible_map(void) { int i, rc; @@ -146,7 +155,7 @@ cpu_idle(); } -void vcpu_prepare(int vcpu) +static void vcpu_prepare(int vcpu) { vcpu_guest_context_t ctxt; struct task_struct *idle = idle_task(vcpu); @@ -278,6 +287,8 @@ vcpu_prepare(cpu); } + xenbus_allowed_cpumask = cpu_present_map; + /* Currently, Xen gives no dynamic NUMA/HT info. */ for (cpu = 1; cpu < NR_CPUS; cpu++) { cpu_sibling_map[cpu] = cpumask_of_cpu(cpu); @@ -301,6 +312,15 @@ cpu_online_map = cpumask_of_cpu(0); } +static int local_cpu_hotplug_request(void) +{ + /* + * We assume a CPU hotplug request comes from local admin if it is made + * via a userspace process (i.e., one with a real mm_struct). + */ + return (current->mm != NULL); +} + #ifdef CONFIG_HOTPLUG_CPU /* @@ -331,8 +351,10 @@ } if (strcmp(state, "online") == 0) { + cpu_set(cpu, xenbus_allowed_cpumask); (void)cpu_up(cpu); } else if (strcmp(state, "offline") == 0) { + cpu_clear(cpu, xenbus_allowed_cpumask); (void)cpu_down(cpu); } else { printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", @@ -353,6 +375,22 @@ } } +static int smpboot_cpu_notify(struct notifier_block *notifier, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + /* + * We do this in a callback notifier rather than __cpu_disable() + * because local_cpu_hotplug_request() does not work in the latter + * as it's always executed from within a stopmachine kthread. + */ + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) + cpu_clear(cpu, local_allowed_cpumask); + + return NOTIFY_OK; +} + static int setup_cpu_watcher(struct notifier_block *notifier, unsigned long event, void *data) { @@ -360,7 +398,8 @@ static struct xenbus_watch cpu_watch = { .node = "cpu", - .callback = handle_vcpu_hotplug_event }; + .callback = handle_vcpu_hotplug_event, + .flags = XBWF_new_thread }; (void)register_xenbus_watch(&cpu_watch); if (!(xen_start_info->flags & SIF_INITDOMAIN)) { @@ -375,13 +414,61 @@ static int __init setup_vcpu_hotplug_event(void) { + static struct notifier_block hotplug_cpu = { + .notifier_call = smpboot_cpu_notify }; static struct notifier_block xsn_cpu = { .notifier_call = setup_cpu_watcher }; + + register_cpu_notifier(&hotplug_cpu); register_xenstore_notifier(&xsn_cpu); + return 0; } arch_initcall(setup_vcpu_hotplug_event); + +int smp_suspend(void) +{ + int i, err; + + lock_cpu_hotplug(); + + /* + * Take all other CPUs offline. We hold the hotplug mutex to + * avoid other processes bringing up CPUs under our feet. + */ + while (num_online_cpus() > 1) { + unlock_cpu_hotplug(); + for_each_online_cpu(i) { + if (i == 0) + continue; + err = cpu_down(i); + if (err) { + printk(KERN_CRIT "Failed to take all CPUs " + "down: %d.\n", err); + for_each_cpu(i) + vcpu_hotplug(i); + return err; + } + } + lock_cpu_hotplug(); + } + + return 0; +} + +void smp_resume(void) +{ + int i; + + for_each_cpu(i) + vcpu_prepare(i); + + unlock_cpu_hotplug(); + + for_each_cpu(i) + vcpu_hotplug(i); +} int __cpu_disable(void) { @@ -415,6 +502,20 @@ #else /* !CONFIG_HOTPLUG_CPU */ +int smp_suspend(void) +{ + if (num_online_cpus() > 1) { + printk(KERN_WARNING "Can't suspend SMP guests " + "without CONFIG_HOTPLUG_CPU\n"); + return -EOPNOTSUPP; + } + return 0; +} + +void smp_resume(void) +{ +} + int __cpu_disable(void) { return -ENOSYS; @@ -429,6 +530,20 @@ int __devinit __cpu_up(unsigned int cpu) { + int rc; + + if (local_cpu_hotplug_request()) { + cpu_set(cpu, local_allowed_cpumask); + if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { + printk("%s: attempt to bring up CPU %u disallowed by " + "remote admin.\n", __FUNCTION__, cpu); + return -EBUSY; + } + } else if (!cpu_isset(cpu, local_allowed_cpumask) || + !cpu_isset(cpu, xenbus_allowed_cpumask)) { + return -EBUSY; + } + #ifdef CONFIG_SMP_ALTERNATIVES if (num_online_cpus() == 1) prepare_for_smp(); @@ -436,7 +551,9 @@ xen_smp_intr_init(cpu); cpu_set(cpu, cpu_online_map); - if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL) != 0) + + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + if (rc != 0) BUG(); return 0; diff -r 4109c4e7804a -r 5b1a10f9da4c linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue Mar 28 13:19:22 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue Mar 28 17:43:30 2006 @@ -16,7 +16,7 @@ { struct pciback_device *pdev; - pdev = kmalloc(sizeof(struct pciback_device), GFP_KERNEL); + pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); if (pdev == NULL) goto out; dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); diff -r 4109c4e7804a -r 5b1a10f9da4c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Mar 28 13:19:22 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Mar 28 17:43:30 2006 @@ -685,6 +685,24 @@ up_write(&xs_state.suspend_mutex); } +static int xenwatch_handle_callback(void *data) +{ + struct xs_stored_msg *msg = data; + + msg->u.watch.handle->callback(msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + + kfree(msg->u.watch.vec); + kfree(msg); + + /* Kill this kthread if we were spawned just for this callback. */ + if (current->pid != xenwatch_pid) + do_exit(0); + + return 0; +} + static int xenwatch_thread(void *unused) { struct list_head *ent; @@ -707,12 +725,11 @@ if (ent != &watch_events) { msg = list_entry(ent, struct xs_stored_msg, list); - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - kfree(msg->u.watch.vec); - kfree(msg); + if (msg->u.watch.handle->flags & XBWF_new_thread) + kthread_run(xenwatch_handle_callback, + msg, "xenwatch_cb"); + else + xenwatch_handle_callback(msg); } mutex_unlock(&xenwatch_mutex); diff -r 4109c4e7804a -r 5b1a10f9da4c linux-2.6-xen-sparse/include/xen/gnttab.h --- a/linux-2.6-xen-sparse/include/xen/gnttab.h Tue Mar 28 13:19:22 2006 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Tue Mar 28 17:43:30 2006 @@ -110,6 +110,9 @@ #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) #endif +int gnttab_suspend(void); +int gnttab_resume(void); + #endif /* __ASM_GNTTAB_H__ */ /* diff -r 4109c4e7804a -r 5b1a10f9da4c linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Tue Mar 28 13:19:22 2006 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Tue Mar 28 17:43:30 2006 @@ -55,8 +55,17 @@ /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); + + /* See XBWF_ definitions below. */ + unsigned long flags; }; +/* + * Execute callback in its own kthread. Useful if the callback is long + * running or heavily serialised, to avoid taking out the main xenwatch thread + * for a long period of time (or even unwittingly causing a deadlock). + */ +#define XBWF_new_thread 1 /* A xenbus device. */ struct xenbus_device { _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |