[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [linux-ppc-2.6] [XEN][POWERPC] Turn on SMP.. Finally.
# HG changeset patch # User Jimi Xenidis <jimix@xxxxxxxxxxxxxx> # Node ID a384dbf50d5934ba93eea17eccb7e43cf408dd87 # Parent bbf2db4ddf5400e908ee6bf92ac798e5cfed82a0 [XEN][POWERPC] Turn on SMP.. Finally. The following patch uses Xen specific methods to spin up secondary processors and add them to the Linux devtree (not the flat-devtree). Specifically: - Adds HYPERVISOR_vcpu_op() for probing and spinning. - "Hot-Plug" new CPU entries into the devtree - Start CPUs int he same place tha OF/prom_init.c would have - Wire up SMP IPI to Xen event channels - 6 line common code change in LinuxPPC to set the # possible CPUs correctly Tested on JS21 (4-way) and Maple(2-way) creating 1-1 Dom0 and several VIO/DomUs up to 32-way. NOTE: we cannot yet: - _add_ a CPU after the normal boot spinup process - remove a CPU Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> --- arch/powerpc/kernel/setup-common.c | 6 arch/powerpc/platforms/xen/Makefile | 1 arch/powerpc/platforms/xen/hcall.c | 30 ++ arch/powerpc/platforms/xen/setup.c | 36 -- arch/powerpc/platforms/xen/setup.h | 1 arch/powerpc/platforms/xen/smp.c | 424 +++++++++++++++++++++++++++++++ include/asm-powerpc/xen/asm/hypercall.h | 1 include/asm-powerpc/xen/asm/hypervisor.h | 2 8 files changed, 468 insertions(+), 33 deletions(-) diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/kernel/setup-common.c --- a/arch/powerpc/kernel/setup-common.c Tue Dec 19 09:22:37 2006 -0500 +++ b/arch/powerpc/kernel/setup-common.c Sun Jan 21 08:34:45 2007 -0500 @@ -388,6 +388,12 @@ void __init smp_setup_cpu_maps(void) } } + if (machine_is(xen)) { + /* something more inteligent perhaps? */ + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpu_set(cpu, cpu_possible_map); + } + #ifdef CONFIG_PPC64 /* * On pSeries LPAR, we need to know how many cpus diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/Makefile --- a/arch/powerpc/platforms/xen/Makefile Tue Dec 19 09:22:37 2006 -0500 +++ b/arch/powerpc/platforms/xen/Makefile Sun Jan 21 08:34:45 2007 -0500 @@ -3,6 +3,7 @@ obj-y += hcall.o obj-y += hcall.o obj-y += reboot.o obj-y += setup.o +obj-y += smp.o obj-y += time.o obj-y += udbg_xen.o obj-y += xen_guest.o diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/hcall.c --- a/arch/powerpc/platforms/xen/hcall.c Tue Dec 19 09:22:37 2006 -0500 +++ b/arch/powerpc/platforms/xen/hcall.c Sun Jan 21 08:34:45 2007 -0500 @@ -33,7 +33,7 @@ #include <xen/interface/sched.h> #include <xen/interface/event_channel.h> #include <xen/interface/physdev.h> -#include <xen/interface/grant_table.h> +#include <xen/interface/vcpu.h> #include <xen/public/privcmd.h> #include <asm/hypercall.h> #include <asm/page.h> @@ -599,3 +599,31 @@ int arch_privcmd_hypercall(privcmd_hyper } } +int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) +{ + int argsize; + const unsigned long hcall = __HYPERVISOR_vcpu_op; + void *desc; + + switch (cmd) { + case VCPUOP_initialise: + argsize = sizeof(vcpu_guest_context_t); + break; + case VCPUOP_up: + case VCPUOP_down: + case VCPUOP_is_up: + return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, 0); + + case VCPUOP_get_runstate_info: + argsize = sizeof (vcpu_runstate_info_t); + break; + default: + printk(KERN_ERR "%s: unknown version cmd %d\n", __func__, cmd); + return -ENOSYS; + } + + desc = xencomm_create_inline(extra_args); + (void)argsize; + return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, desc); +} + diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.c --- a/arch/powerpc/platforms/xen/setup.c Tue Dec 19 09:22:37 2006 -0500 +++ b/arch/powerpc/platforms/xen/setup.c Sun Jan 21 08:34:45 2007 -0500 @@ -168,42 +168,10 @@ static void xen_power_save(void) HYPERVISOR_sched_op(SCHEDOP_block, NULL); } -#ifdef CONFIG_SMP - -int __init smp_xen_probe(void) -{ - return 1; -} - -void smp_xen_message_pass(int target, int msg) -{ - printk("%s(%d, %d)\n", __func__, target, msg); -} - -void __devinit smp_xen_setup_cpu(int cpu) -{ - printk("%s(%d)\n", __func__, cpu); -} - -struct smp_ops_t xen_smp_ops = { - .probe = smp_xen_probe, - .message_pass = smp_xen_message_pass, - .kick_cpu = smp_generic_kick_cpu, - .setup_cpu = smp_xen_setup_cpu, - .give_timebase = smp_generic_give_timebase, - .take_timebase = smp_generic_take_timebase, -}; -#endif /* CONFIG_SMP */ - void __init xen_setup_arch(void) { /* init to some ~sane value until calibrate_delay() runs */ loops_per_jiffy = 50000000; - - /* Setup SMP callback */ -#ifdef CONFIG_SMP - smp_ops = &xen_smp_ops; -#endif /* Lookup PCI hosts */ if (is_initial_xendomain()) @@ -211,6 +179,10 @@ void __init xen_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; +#endif +#ifdef CONFIG_SMP + /* let them fly */ + xen_setup_smp(); #endif printk(KERN_INFO "Using Xen idle loop\n"); diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.h --- a/arch/powerpc/platforms/xen/setup.h Tue Dec 19 09:22:37 2006 -0500 +++ b/arch/powerpc/platforms/xen/setup.h Sun Jan 21 08:34:45 2007 -0500 @@ -27,3 +27,4 @@ extern void free_foreign_page(struct pag extern void free_foreign_page(struct page *page); extern void __init xen_setup_time(struct machdep_calls *host_md); +extern void xen_setup_smp(void); diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypercall.h --- a/include/asm-powerpc/xen/asm/hypercall.h Tue Dec 19 09:22:37 2006 -0500 +++ b/include/asm-powerpc/xen/asm/hypercall.h Sun Jan 21 08:34:45 2007 -0500 @@ -44,6 +44,7 @@ extern int HYPERVISOR_physdev_op(int cmd extern int HYPERVISOR_physdev_op(int cmd, void *op); extern int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count); +extern int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); extern int HYPERVISOR_memory_op(unsigned int cmd, void *arg); extern int HYPERVISOR_multicall(void *call_list, int nr_calls); diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypervisor.h --- a/include/asm-powerpc/xen/asm/hypervisor.h Tue Dec 19 09:22:37 2006 -0500 +++ b/include/asm-powerpc/xen/asm/hypervisor.h Sun Jan 21 08:34:45 2007 -0500 @@ -146,6 +146,8 @@ int direct_remap_pfn_range(struct vm_are #define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) #define NR_DYNIRQS 256 +#define NR_IPIS 4 /* PPC_MSG_DEBUGGER_BREAK + 1 */ + #if NR_IRQS < (NR_PIRQS + NR_DYNIRQS) #error to many Xen IRQs #endif diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/smp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/powerpc/platforms/xen/smp.c Sun Jan 21 08:34:45 2007 -0500 @@ -0,0 +1,424 @@ +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/bootmem.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <xen/interface/xen.h> +#include <xen/interface/vcpu.h> +#include <xen/evtchn.h> +#include <asm/prom.h> +#include <asm/udbg.h> +#include <asm/hypervisor.h> +#include "setup.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(KERN_EMERG fmt) +#else +#define DBG(fmt...) +#endif + +static inline void *xen_of_alloc(ulong size) +{ + if (mem_init_done) + return kmalloc(size, GFP_KERNEL); + return alloc_bootmem(size); +} +static inline void xen_of_free(void *ptr) +{ + /* if this happens with the boot allocator then we are screwed */ + BUG_ON(!mem_init_done); + kfree(ptr); +} + +static struct property *dup_prop(struct property *op) +{ + struct property *np; + void *p; + ulong sz; + + + /* allocate everything in one go in case it fails */ + sz = sizeof (*np); /* prop node */ + sz += strlen(op->name) + 1; /* prop name */ + sz += op->length; /* prop value */ + + p = xen_of_alloc(sz); + if (!p) + return NULL; + memset(p, 0, sz); + + /* prop node first */ + np = p; + p += sizeof (*np); + + /* value next becuase we want it aligned */ + np->value = p; + p += op->length; + + /* name */ + np->name = p; + + /* copy it all */ + strcpy(np->name, op->name); + np->length = op->length; + memcpy(np->value, op->value, np->length); + + return np; +} + +static int dup_properties(struct device_node *dst, struct device_node *src) +{ + struct property *op; + struct property *np; + struct property *lp; + int rc = 0; + + DBG("%s: duping to new cpu node: %s\n", __func__, dst->full_name); + + np = lp = NULL; + for (op = src->properties; op != 0; op = op->next) { + lp = np; + np = dup_prop(op); + if (!np) + break; + + prom_add_property(dst, np); + } + + if (!np) { + DBG("%s: FAILED duping: %s\n", __func__, dst->full_name); + /* we could not allocate enuff so free what we have + * allocated */ + rc = -ENOMEM; + for (op = dst->properties; lp && op != lp; op = op->next) + xen_of_free(op); + } + + return rc; +} + +/* returns added device node so it can be added to procfs in the case + * of hotpluging */ +static struct device_node *xen_add_vcpu_node(struct device_node *boot_cpu, + uint cpu) +{ + struct device_node *new_cpu; + struct property *pp; + void *p; + int sz; + int type_sz; + int name_sz; + + DBG("%s: boot cpu: %s\n", __func__, boot_cpu->full_name); + + /* allocate in one shot in case we fail */ + name_sz = strlen(boot_cpu->name) + 1; + type_sz = strlen(boot_cpu->type) + 1; + + sz = sizeof (*new_cpu); /* the node */ + sz += strlen(boot_cpu->full_name) + 3; /* full_name */ + sz += name_sz; /* name */ + sz += type_sz; /* type */ + + p = xen_of_alloc(sz); + if (!p) + return NULL; + memset(p, 0, sz); + + /* the node */ + new_cpu = p; + p += sizeof (*new_cpu); + + /* name */ + new_cpu->name = p; + strcpy(new_cpu->name, boot_cpu->name); + p += name_sz; + + /* type */ + new_cpu->type = p; + strcpy(new_cpu->type, boot_cpu->type); + p += type_sz; + + /* full_name */ + new_cpu->full_name = p; + + /* assemble new full_name */ + pp = of_find_property(boot_cpu, "name", NULL); + if (!pp) + panic("%s: no name prop\n", __func__); + + DBG("%s: name is: %s = %s\n", __func__, pp->name, pp->value); + sprintf(new_cpu->full_name, "/cpus/%s@%u", pp->value, cpu); + + if (dup_properties(new_cpu, boot_cpu)) { + xen_of_free(new_cpu); + return NULL; + } + + /* fixup reg property */ + DBG("%s: updating reg: %d\n", __func__, cpu); + pp = of_find_property(new_cpu, "reg", NULL); + if (!pp) + panic("%s: no reg prop\n", __func__); + *(int *)pp->value = cpu; + + if (mem_init_done) + OF_MARK_DYNAMIC(new_cpu); + + kref_init(&new_cpu->kref); + + /* insert the node */ + new_cpu->parent = of_get_parent(boot_cpu); + of_attach_node(new_cpu); + of_node_put(new_cpu->parent); + + return new_cpu; +} + +static void cpu_initialize_context(unsigned int vcpu, ulong entry) +{ + vcpu_guest_context_t ctxt; + + memset(&ctxt.user_regs, 0x55, sizeof(ctxt.user_regs)); + + ctxt.user_regs.pc = entry; + ctxt.user_regs.msr = 0; + ctxt.user_regs.gprs[1] = 0; /* Linux uses its own stack */ + ctxt.user_regs.gprs[3] = vcpu; + + /* XXX verify this *** */ + /* There is a buggy kernel that does not zero the "local_paca", so + * we must make sure this register is 0 */ + ctxt.user_regs.gprs[13] = 0; + + DBG("%s: initializing vcpu: %d\n", __func__, vcpu); + + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt)) + panic("%s: VCPUOP_initialise failed, vcpu: %d\n", + __func__, vcpu); + +} + +static int xen_start_vcpu(uint vcpu, ulong entry) +{ + DBG("%s: starting vcpu: %d\n", __func__, vcpu); + + cpu_initialize_context(vcpu, entry); + + DBG("%s: Spinning up vcpu: %d\n", __func__, vcpu); + return HYPERVISOR_vcpu_op(VCPUOP_up, vcpu, NULL); +} + +extern void __secondary_hold(void); +extern unsigned long __secondary_hold_spinloop; +extern unsigned long __secondary_hold_acknowledge; + +static void xen_boot_secondary_vcpus(void) +{ + int vcpu; + int rc; + const unsigned long mark = (unsigned long)-1; + unsigned long *spinloop = &__secondary_hold_spinloop; + unsigned long *acknowledge = &__secondary_hold_acknowledge; +#ifdef CONFIG_PPC64 + /* __secondary_hold is actually a descriptor, not the text address */ + unsigned long secondary_hold = __pa(*(unsigned long *)__secondary_hold); +#else + unsigned long secondary_hold = __pa(__secondary_hold); +#endif + struct device_node *boot_cpu; + + DBG("%s: finding CPU node\n", __func__); + boot_cpu = of_find_node_by_type(NULL, "cpu"); + if (!boot_cpu) + panic("%s: Cannot find Booting CPU node\n", __func__); + + /* Set the common spinloop variable, so all of the secondary cpus + * will block when they are awakened from their OF spinloop. + * This must occur for both SMP and non SMP kernels, since OF will + * be trashed when we move the kernel. + */ + *spinloop = 0; + + DBG("%s: Searching for all vcpu numbers > 0\n", __func__); + /* try and start as many as we can */ + for (vcpu = 1; vcpu < NR_CPUS; vcpu++) { + int i; + + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, vcpu, NULL); + if (rc < 0) + continue; + + DBG("%s: Found vcpu: %d\n", __func__, vcpu); + /* Init the acknowledge var which will be reset by + * the secondary cpu when it awakens from its OF + * spinloop. + */ + *acknowledge = mark; + + DBG("%s: Starting vcpu: %d at pc: 0x%lx\n", __func__, + vcpu, secondary_hold); + rc = xen_start_vcpu(vcpu, secondary_hold); + if (rc) + panic("%s: xen_start_vpcu() failed\n", __func__); + + + DBG("%s: Waiting for ACK on vcpu: %d\n", __func__, vcpu); + for (i = 0; (i < 100000000) && (*acknowledge == mark); i++) + mb(); + + if (*acknowledge == vcpu) + DBG("%s: Recieved for ACK on vcpu: %d\n", + __func__, vcpu); + + xen_add_vcpu_node(boot_cpu, vcpu); + + cpu_set(vcpu, cpu_present_map); + set_hard_smp_processor_id(vcpu, vcpu); + } + of_node_put(boot_cpu); + DBG("%s: end...\n", __func__); +} + +static int __init smp_xen_probe(void) +{ + return cpus_weight(cpu_present_map); +} + +static irqreturn_t xen_ppc_msg_reschedule(int irq, void *dev_id, + struct pt_regs *regs) +{ + smp_message_recv(PPC_MSG_RESCHEDULE, regs); + return IRQ_HANDLED; +} + +static irqreturn_t xen_ppc_msg_call_function(int irq, void *dev_id, + struct pt_regs *regs) +{ + smp_message_recv(PPC_MSG_CALL_FUNCTION, regs); + return IRQ_HANDLED; +} + +static irqreturn_t xen_ppc_msg_debugger_break(int irq, void *dev_id, + struct pt_regs *regs) +{ + smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs); + return IRQ_HANDLED; +} + +struct message { + irqreturn_t (*f)(int, void *, struct pt_regs *); + int num; + char *name; +}; +static struct message ipi_msgs[] = { + { + .num = PPC_MSG_RESCHEDULE, + .f = xen_ppc_msg_reschedule, + .name = "IPI-resched" + }, + { + .num = PPC_MSG_CALL_FUNCTION, + .f = xen_ppc_msg_call_function, + .name = "IPI-function" + }, + { + .num = PPC_MSG_DEBUGGER_BREAK, + .f = xen_ppc_msg_debugger_break, + .name = "IPI-debug" + } +}; + +DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); + +static void __devinit smp_xen_setup_cpu(int cpu) +{ + int irq; + int i; + const int nr_ipis = ARRAY_SIZE(__get_cpu_var(ipi_to_irq)); + + /* big scary include web could mess with our values, so we + * make sure they are sane */ + BUG_ON(ARRAY_SIZE(ipi_msgs) > nr_ipis); + + for (i = 0; i < ARRAY_SIZE(ipi_msgs); i++) { + BUG_ON(ipi_msgs[i].num >= nr_ipis); + + irq = bind_ipi_to_irqhandler(ipi_msgs[i].num, + cpu, + ipi_msgs[i].f, + SA_INTERRUPT, + ipi_msgs[i].name, + NULL); + BUG_ON(irq < 0); + per_cpu(ipi_to_irq, cpu)[ipi_msgs[i].num] = irq; + DBG("%s: cpu: %d vector :%d irq: %d\n", + __func__, cpu, ipi_msgs[i].num, irq); + } +} + +static inline void send_IPI_one(unsigned int cpu, int vector) +{ + int irq; + + irq = per_cpu(ipi_to_irq, cpu)[vector]; + BUG_ON(irq < 0); + + DBG("%s: cpu: %d vector :%d irq: %d!\n", + __func__, cpu, vector, irq); + DBG("%s: per_cpu[%p]: %d %d %d %d\n", + __func__, per_cpu(ipi_to_irq, cpu), + per_cpu(ipi_to_irq, cpu)[0], + per_cpu(ipi_to_irq, cpu)[1], + per_cpu(ipi_to_irq, cpu)[2], + per_cpu(ipi_to_irq, cpu)[3]); + + notify_remote_via_irq(irq); +} + +static void smp_xen_message_pass(int target, int msg) +{ + int cpu; + + switch (msg) { + case PPC_MSG_RESCHEDULE: + case PPC_MSG_CALL_FUNCTION: + case PPC_MSG_DEBUGGER_BREAK: + break; + default: + panic("SMP %d: smp_message_pass: unknown msg %d\n", + smp_processor_id(), msg); + return; + } + switch (target) { + case MSG_ALL: + case MSG_ALL_BUT_SELF: + for_each_online_cpu(cpu) { + if (target == MSG_ALL_BUT_SELF && + cpu == smp_processor_id()) + continue; + send_IPI_one(cpu, msg); + } + break; + default: + send_IPI_one(target, msg); + break; + } +} + +static struct smp_ops_t xen_smp_ops = { + .probe = smp_xen_probe, + .message_pass = smp_xen_message_pass, + .kick_cpu = smp_generic_kick_cpu, + .setup_cpu = smp_xen_setup_cpu, +}; + +void xen_setup_smp(void) +{ + smp_ops = &xen_smp_ops; + + xen_boot_secondary_vcpus(); + smp_release_cpus(); +} _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |