[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] The following patch provides x86_64 SMP support for xen linux. Many,



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID d95ea17567c695cb7883d5a5842a164951fccd10
# Parent  c66cac10524ab281222f4ebbc4edf436767bd235
The following patch provides x86_64 SMP support for xen linux.  Many,
many thanks to the help for bug fixes, cleanups, good domU support,
and more from Jun and Xin.  I've tested this on a couple different
machines, and made sure the UP build still boots and works.  dom0 and
domU smp both work.  There's still some cleanups to do, but we'd prefer
to do the last bit in tree.  This doesn't update defconfigs just yet.
That'll be a follow-on patch shortly, so for now, you'll have to manually
enable CONFIG_SMP if you'd like to test.

Signed-off-by: Chris Wright <chrisw@xxxxxxxx>
Signed-off-by: Xin Li <xin.b.li@xxxxxxxxx>
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>

diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig      Thu Aug 11 08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig      Thu Aug 11 08:59:47 2005
@@ -125,6 +125,10 @@
 config X86_IO_APIC
        bool
        default XEN_PRIVILEGED_GUEST
+
+config X86_XEN_GENAPIC
+       bool
+       default XEN_PRIVILEGED_GUEST || SMP
 
 config X86_LOCAL_APIC
        bool
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Thu Aug 11 
08:59:47 2005
@@ -25,11 +25,12 @@
 c-obj-$(CONFIG_X86_MSR)                += msr.o
 obj-$(CONFIG_MICROCODE)                += microcode.o
 obj-$(CONFIG_X86_CPUID)                += cpuid.o
-#obj-$(CONFIG_SMP)             += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_SMP)              += smp.o smpboot.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o
 c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o
-c-obj-$(CONFIG_X86_IO_APIC)    += genapic.o genapic_cluster.o genapic_flat.o
+obj-$(CONFIG_X86_XEN_GENAPIC)  += genapic.o genapic_xen.o
+c-obj-$(CONFIG_X86_IO_APIC)    += genapic_cluster.o genapic_flat.o
 #obj-$(CONFIG_PM)              += suspend.o
 #obj-$(CONFIG_SOFTWARE_SUSPEND)        += suspend_asm.o
 #obj-$(CONFIG_CPU_FREQ)                += cpufreq/
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c        Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c        Thu Aug 11 
08:59:47 2005
@@ -48,7 +48,7 @@
        int cpu = smp_processor_id();
 
        profile_tick(CPU_PROFILING, regs);
-#if 0
+#ifndef CONFIG_XEN
        if (--per_cpu(prof_counter, cpu) <= 0) {
                /*
                 * The multiplier may have changed since the last time we got
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Thu Aug 11 
08:59:47 2005
@@ -41,7 +41,6 @@
 startup_64:
 ENTRY(_start)
         cld                
-       movq init_rsp(%rip),%rsp
        /* Copy the necessary stuff from xen_start_info structure. */
        movq  $xen_start_info_union,%rdi
        movq  $64,%rcx          /* sizeof (union xen_start_info_union) / sizeof 
(long) */
@@ -52,6 +51,7 @@
        cld
 #endif /* CONFIG_SMP */
 
+       movq init_rsp(%rip),%rsp
        /* zero EFLAGS after setting rsp */
        pushq $0
        popfq
@@ -204,6 +204,7 @@
        .quad   0,0                     /* TSS */
        .quad   0,0                     /* LDT */
        .quad   0,0,0                   /* three TLS descriptors */ 
+       .quad   0                       /* unused now?   __KERNEL16_CS - 16bit 
PM for S3 wakeup. */
 
 gdt_end:       
        /* asm/segment.h:GDT_ENTRIES must match this */ 
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Thu Aug 11 08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Thu Aug 11 08:59:47 2005
@@ -21,6 +21,11 @@
 
 atomic_t irq_err_count;
 
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
 
 /*
  * Generic, controller-independent functions:
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Thu Aug 11 
08:59:47 2005
@@ -200,12 +200,14 @@
 #define ADAPTER_ROM_RESOURCES \
        (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 static struct resource video_rom_resource = {
        .name = "Video ROM",
        .start = 0xc0000,
        .end = 0xc7fff,
        .flags = IORESOURCE_ROM,
 };
+#endif
 
 static struct resource video_ram_resource = {
        .name = "Video RAM area",
@@ -599,6 +601,19 @@
         }
 }
 
+#ifdef CONFIG_XEN
+void __init smp_alloc_memory(void)
+{
+       int cpu;
+
+       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+               cpu_gdt_descr[cpu].address = (unsigned long)
+                       alloc_bootmem_low_pages(PAGE_SIZE);
+               /* XXX free unused pages later */
+       }
+}
+#endif
+
 void __init setup_arch(char **cmdline_p)
 {
        int i, j;
@@ -739,6 +754,11 @@
                        initrd_start = 0;
                }
        }
+#endif
+#ifdef CONFIG_SMP
+#ifdef CONFIG_XEN
+       smp_alloc_memory();
+#endif
 #endif
        paging_init();
 #ifdef CONFIG_X86_LOCAL_APIC
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Thu Aug 11 
08:59:47 2005
@@ -276,9 +276,11 @@
         * Initialize the per-CPU GDT with the boot GDT,
         * and set up the GDT descriptor:
         */
+#ifndef CONFIG_XEN 
        if (cpu) {
                memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
        }       
+#endif
 
        cpu_gdt_descr[cpu].size = GDT_SIZE;
        cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Thu Aug 11 08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Thu Aug 11 08:59:47 2005
@@ -28,7 +28,12 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/apicdef.h>
-
+#ifdef CONFIG_XEN
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
+#else
 /*
  *     Smarter SMP flushing macros. 
  *             c/o Linus Torvalds.
@@ -44,6 +49,7 @@
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
 #define FLUSH_ALL      -1ULL
+#endif
 
 /*
  * We cannot call mmdrop() because we are in interrupt context, 
@@ -57,6 +63,7 @@
        load_cr3(swapper_pg_dir);
 }
 
+#ifndef CONFIG_XEN
 /*
  *
  * The flush IPI assumes that a thread switch happens in this order:
@@ -250,6 +257,18 @@
 {
        on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
 }
+#else
+asmlinkage void smp_invalidate_interrupt (void)
+{ return; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
+void flush_tlb_mm (struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+#endif /* Xen */
 
 void smp_kdb_stop(void)
 {
@@ -310,13 +329,21 @@
 
        /* Wait for response */
        while (atomic_read(&data.started) != cpus)
+#ifndef CONFIG_XEN
                cpu_relax();
+#else
+               barrier();
+#endif
 
        if (!wait)
                return;
 
        while (atomic_read(&data.finished) != cpus)
+#ifndef CONFIG_XEN
                cpu_relax();
+#else
+               barrier();
+#endif
 }
 
 /*
@@ -350,7 +377,11 @@
         */
        cpu_clear(smp_processor_id(), cpu_online_map);
        local_irq_disable();
+#ifndef CONFIG_XEN
        disable_local_APIC();
+#else
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#endif
        local_irq_enable(); 
 }
 
@@ -364,8 +395,10 @@
 void smp_send_stop(void)
 {
        int nolock = 0;
+#ifndef CONFIG_XEN
        if (reboot_force)
                return;
+#endif
        /* Don't deadlock on the call lock in panic */
        if (!spin_trylock(&call_lock)) {
                /* ignore locking because we have paniced anyways */
@@ -376,7 +409,11 @@
                spin_unlock(&call_lock);
 
        local_irq_disable();
+#ifdef CONFIG_XEN
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        local_irq_enable();
 }
 
@@ -385,18 +422,32 @@
  * all the work is done automatically when
  * we return from the interrupt.
  */
+#ifndef CONFIG_XEN
 asmlinkage void smp_reschedule_interrupt(void)
-{
+#else
+asmlinkage irqreturn_t smp_reschedule_interrupt(void)
+#endif
+{
+#ifndef CONFIG_XEN
        ack_APIC_irq();
-}
-
+#else
+       return IRQ_HANDLED;
+#endif
+}
+
+#ifndef CONFIG_XEN
 asmlinkage void smp_call_function_interrupt(void)
+#else
+asmlinkage irqreturn_t smp_call_function_interrupt(void)
+#endif
 {
        void (*func) (void *info) = call_data->func;
        void *info = call_data->info;
        int wait = call_data->wait;
 
+#ifndef CONFIG_XEN
        ack_APIC_irq();
+#endif
        /*
         * Notify initiating CPU that I've grabbed the data and am
         * about to execute the function
@@ -413,10 +464,16 @@
                mb();
                atomic_inc(&call_data->finished);
        }
+#ifdef CONFIG_XEN
+       return IRQ_HANDLED;
+#endif
 }
 
 int safe_smp_processor_id(void)
 {
+#ifdef CONFIG_XEN
+       return smp_processor_id();
+#else
        int apicid, i;
 
        if (disable_apic)
@@ -437,4 +494,5 @@
                return 0;
 
        return 0; /* Should not happen */
-}
+#endif
+}
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Thu Aug 11 
08:59:47 2005
@@ -47,6 +47,9 @@
 #include <linux/bootmem.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
+#ifdef CONFIG_XEN
+#include <linux/interrupt.h>
+#endif
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -57,11 +60,20 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm/nmi.h>
+#ifdef CONFIG_XEN
+#include <asm/arch_hooks.h>
+
+#include <asm-xen/evtchn.h>
+#endif
 
 /* Change for real CPU hotplug. Note other files need to be fixed
    first too. */
 #define __cpuinit __init
 #define __cpuinitdata __initdata
+
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       unsigned int maxcpus = NR_CPUS;
+#endif
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -96,6 +108,7 @@
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_core_map);
 
+#ifndef CONFIG_XEN
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -115,6 +128,7 @@
        memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
        return virt_to_phys(tramp);
 }
+#endif
 
 /*
  * The bootstrap kernel entry code has set these up. Save them for
@@ -130,6 +144,7 @@
        print_cpu_info(c);
 }
 
+#ifndef CONFIG_XEN
 /*
  * New Funky TSC sync algorithm borrowed from IA64.
  * Main advantage is that it doesn't reset the TSCs fully and
@@ -331,6 +346,7 @@
        return 0;
 }
 __setup("notscsync", notscsync_setup);
+#endif
 
 static atomic_t init_deasserted __cpuinitdata;
 
@@ -343,6 +359,7 @@
        int cpuid, phys_id;
        unsigned long timeout;
 
+#ifndef CONFIG_XEN
        /*
         * If waken up by an INIT in an 82489DX configuration
         * we may get here before an INIT-deassert IPI reaches
@@ -352,10 +369,15 @@
        while (!atomic_read(&init_deasserted))
                cpu_relax();
 
+#endif
        /*
         * (This works even if the APIC is not enabled.)
         */
+#ifndef CONFIG_XEN
        phys_id = GET_APIC_ID(apic_read(APIC_ID));
+#else
+       phys_id = smp_processor_id();
+#endif
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
@@ -389,6 +411,7 @@
                        cpuid);
        }
 
+#ifndef CONFIG_XEN
        /*
         * the boot CPU has finished the init stage and is spinning
         * on callin_map until we finish. We are free to set up this
@@ -398,6 +421,7 @@
 
        Dprintk("CALLIN, before setup_local_APIC().\n");
        setup_local_APIC();
+#endif
 
        /*
         * Get our bogomips.
@@ -405,7 +429,9 @@
        calibrate_delay();
        Dprintk("Stack at about %p\n",&cpuid);
 
+#ifndef CONFIG_XEN
        disable_APIC_timer();
+#endif
 
        /*
         * Save our processor parameters
@@ -417,6 +443,29 @@
         */
        cpu_set(cpuid, cpu_callin_map);
 }
+
+#ifdef CONFIG_XEN
+static irqreturn_t ldebug_interrupt(
+       int irq, void *dev_id, struct pt_regs *regs)
+{
+       return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+       sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+       BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+                          SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+extern void local_setup_timer(void);
+#endif
 
 /*
  * Setup code on secondary processor (after comming out of the trampoline)
@@ -434,6 +483,7 @@
        /* otherwise gcc will move up the smp_processor_id before the cpu_init 
*/
        barrier();
 
+#ifndef CONFIG_XEN
        Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());         
        setup_secondary_APIC_clock();
 
@@ -446,6 +496,12 @@
        }
 
        enable_APIC_timer();
+#else
+       local_setup_timer();
+       ldebug_setup();
+       smp_intr_init();
+       local_irq_enable();
+#endif
 
        /*
         * Allow the master to continue.
@@ -453,10 +509,12 @@
        cpu_set(smp_processor_id(), cpu_online_map);
        mb();
 
+#ifndef CONFIG_XEN
        /* Wait for TSC sync to not schedule things before.
           We still process interrupts, which could see an inconsistent
           time in that window unfortunately. */
        tsc_sync_wait();
+#endif
 
        cpu_idle();
 }
@@ -464,6 +522,7 @@
 extern volatile unsigned long init_rsp;
 extern void (*initial_code)(void);
 
+#ifndef CONFIG_XEN
 #if APIC_DEBUG
 static void inquire_remote_apic(int apicid)
 {
@@ -627,6 +686,7 @@
 
        return (send_status | accept_status);
 }
+#endif
 
 /*
  * Boot one CPU.
@@ -637,6 +697,14 @@
        unsigned long boot_error;
        int timeout;
        unsigned long start_rip;
+#ifdef CONFIG_XEN
+       vcpu_guest_context_t ctxt;
+       extern void startup_64_smp(void);
+       extern void hypervisor_callback(void);
+       extern void failsafe_callback(void);
+       extern void smp_trap_init(trap_info_t *);
+       int i;
+#endif
        /*
         * We can't use kernel_thread since we must avoid to
         * reschedule the child.
@@ -649,7 +717,11 @@
 
        cpu_pda[cpu].pcurrent = idle;
 
+#ifndef CONFIG_XEN
        start_rip = setup_trampoline();
+#else
+       start_rip = (unsigned long)startup_64_smp;
+#endif
 
        init_rsp = idle->thread.rsp;
        per_cpu(init_tss,cpu).rsp0 = init_rsp;
@@ -666,6 +738,93 @@
 
        atomic_set(&init_deasserted, 0);
 
+#ifdef CONFIG_XEN
+       if (cpu_gdt_descr[0].size > PAGE_SIZE)
+               BUG();
+       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+       memcpy((void *)cpu_gdt_descr[cpu].address,
+               (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.flags = VGCF_IN_KERNEL;
+       ctxt.user_regs.ds = __USER_DS;
+       ctxt.user_regs.es = __USER_DS;
+       ctxt.user_regs.fs = 0;
+       ctxt.user_regs.gs = 0;
+       ctxt.user_regs.ss = __KERNEL_DS|0x3;
+       ctxt.user_regs.cs = __KERNEL_CS|0x3;
+       ctxt.user_regs.rip = start_rip;
+       ctxt.user_regs.rsp = idle->thread.rsp;
+#define X86_EFLAGS_IOPL_RING3 0x3000
+       ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING3;
+
+       /* FPU is set up to default initial state. */
+       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       smp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       {
+               unsigned long va;
+               int f;
+
+               for (va = cpu_gdt_descr[cpu].address, f = 0;
+                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+                    va += PAGE_SIZE, f++) {
+                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       make_page_readonly((void *)va);
+               }
+               ctxt.gdt_ents = GDT_ENTRIES;
+       }
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss = __KERNEL_DS;
+       ctxt.kernel_sp = idle->thread.rsp;
+
+       /* Callback handlers. */
+       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+       ctxt.syscall_callback_eip  = (unsigned long)system_call;
+
+       ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt);
+
+       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+#else
        Dprintk("Setting warm reset code and vector.\n");
 
        CMOS_WRITE(0xa, 0xf);
@@ -729,6 +888,7 @@
 #endif
                }
        }
+#endif
        if (boot_error) {
                cpu_clear(cpu, cpu_callout_map); /* was set here 
(do_boot_cpu()) */
                clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
@@ -790,6 +950,7 @@
        }
 }
 
+#ifndef CONFIG_XEN
 /*
  * Cleanup possible dangling ends...
  */
@@ -817,6 +978,7 @@
        free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
 #endif
 }
+#endif
 
 /*
  * Fall back to non SMP mode after errors.
@@ -827,10 +989,12 @@
 {
        cpu_present_map = cpumask_of_cpu(0);
        cpu_possible_map = cpumask_of_cpu(0);
+#ifndef CONFIG_XEN
        if (smp_found_config)
                phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
        else
                phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
        cpu_set(0, cpu_sibling_map[0]);
        cpu_set(0, cpu_core_map[0]);
 }
@@ -857,6 +1021,7 @@
  */
 static int __cpuinit smp_sanity_check(unsigned max_cpus)
 {
+#ifndef CONFIG_XEN
        if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
                printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
                       hard_smp_processor_id());
@@ -896,13 +1061,19 @@
                nr_ioapics = 0;
                return -1;
        }
+#endif
 
        /*
         * If SMP should be disabled, then really disable it!
         */
        if (!max_cpus) {
+#ifdef CONFIG_XEN
+               HYPERVISOR_shared_info->n_vcpu = 1;
+#endif
                printk(KERN_INFO "SMP mode deactivated, forcing use of dummy 
APIC emulation.\n");
+#ifndef CONFIG_XEN
                nr_ioapics = 0;
+#endif
                return -1;
        }
 
@@ -917,7 +1088,10 @@
 {
        int i;
 
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+#else
        nmi_watchdog_default();
+#endif
        current_cpu_data = boot_cpu_data;
        current_thread_info()->cpu = 0;  /* needed? */
 
@@ -927,8 +1101,12 @@
         * Fill in cpu_present_mask
         */
        for (i = 0; i < NR_CPUS; i++) {
+#ifndef CONFIG_XEN
                int apicid = cpu_present_to_apicid(i);
                if (physid_isset(apicid, phys_cpu_present_map)) {
+#else
+               if (i < HYPERVISOR_shared_info->n_vcpu) {
+#endif
                        cpu_set(i, cpu_present_map);
                        /* possible map would be different if we supported real
                           CPU hotplug. */
@@ -942,6 +1120,9 @@
                return;
        }
 
+#ifdef CONFIG_XEN
+       smp_intr_init();
+#else
 
        /*
         * Switch from PIC to APIC mode.
@@ -954,20 +1135,26 @@
                      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
                /* Or can we switch back to PIC here? */
        }
+#endif
 
        /*
         * Now start the IO-APICs
         */
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+#else
        if (!skip_ioapic_setup && nr_ioapics)
                setup_IO_APIC();
        else
                nr_ioapics = 0;
+#endif
 
        /*
         * Set up local APIC timer on boot CPU.
         */
 
+#ifndef CONFIG_XEN
        setup_boot_APIC_clock();
+#endif
 }
 
 /*
@@ -989,17 +1176,23 @@
 int __cpuinit __cpu_up(unsigned int cpu)
 {
        int err;
+#ifndef CONFIG_XEN
        int apicid = cpu_present_to_apicid(cpu);
+#else
+       int apicid = cpu;
+#endif
 
        WARN_ON(irqs_disabled());
 
        Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
 
+#ifndef CONFIG_XEN
        if (apicid == BAD_APICID || apicid == boot_cpu_id ||
            !physid_isset(apicid, phys_cpu_present_map)) {
                printk("__cpu_up: bad cpu %d\n", cpu);
                return -EINVAL;
        }
+#endif
 
        /* Boot it! */
        err = do_boot_cpu(cpu, apicid);
@@ -1021,15 +1214,76 @@
  */
 void __cpuinit smp_cpus_done(unsigned int max_cpus)
 {
+#ifndef CONFIG_XEN
        zap_low_mappings();
        smp_cleanup_boot();
 
 #ifdef CONFIG_X86_IO_APIC
        setup_ioapic_dest();
 #endif
+#endif
 
        detect_siblings();
+#ifndef CONFIG_XEN
        time_init_gtod();
 
        check_nmi_watchdog();
-}
+#endif
+}
+
+#ifdef CONFIG_XEN
+extern int bind_ipi_to_irq(int ipi);
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+void smp_intr_init(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(resched_irq, cpu) =
+               bind_ipi_to_irq(RESCHEDULE_VECTOR);
+       sprintf(resched_name[cpu], "resched%d", cpu);
+       BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+                          SA_INTERRUPT, resched_name[cpu], NULL));
+
+       per_cpu(callfunc_irq, cpu) =
+               bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
+       sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+       BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+                          smp_call_function_interrupt,
+                          SA_INTERRUPT, callfunc_name[cpu], NULL));
+}
+
+static void smp_intr_exit(void)
+{
+       int cpu = smp_processor_id();
+
+       free_irq(per_cpu(resched_irq, cpu), NULL);
+       unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+       free_irq(per_cpu(callfunc_irq, cpu), NULL);
+       unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+extern void local_setup_timer_irq(void);
+extern void local_teardown_timer_irq(void);
+
+void smp_suspend(void)
+{
+       /* XXX todo: take down time and ipi's on all cpus */
+       local_teardown_timer_irq();
+       smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+       /* XXX todo: restore time and ipi's on all cpus */
+       smp_intr_init();
+       local_setup_timer_irq();
+}
+#endif
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c       Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c       Thu Aug 11 
08:59:47 2005
@@ -953,6 +953,17 @@
        cpu_init();
 }
 
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+       trap_info_t *t = trap_table;
+
+       for (t = trap_table; t->address; t++) {
+               trap_ctxt[t->vector].flags = t->flags;
+               trap_ctxt[t->vector].cs = t->cs;
+               trap_ctxt[t->vector].address = t->address;
+       }
+}
+
 
 /* Actual parsing is done early in setup.c. */
 static int __init oops_dummy(char *s)
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S   Thu Aug 11 
08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S   Thu Aug 11 
08:59:47 2005
@@ -8,11 +8,14 @@
 #define sizeof_vcpu_shift              3
 
 #ifdef CONFIG_SMP
-#define preempt_disable(reg)   incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg)    decl threadinfo_preempt_count(reg)
+//#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
+//#define preempt_enable(reg)  decl threadinfo_preempt_count(reg)
+#define preempt_disable(reg)
+#define preempt_enable(reg)
 #define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp)                   ; \
                                movq %gs:pda_cpunumber,reg              ; \
-                               shl  $sizeof_vcpu_shift,reg             ; \
+                               shl  $32, reg                           ; \
+                               shr  $32-sizeof_vcpu_shift,reg          ; \
                                addq HYPERVISOR_shared_info,reg
 #define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp)                    ; \
 #define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
--- /dev/null   Thu Aug 11 08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c     Thu Aug 11 
08:59:47 2005
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+
+#if defined(CONFIG_ACPI_BUS)
+#include <acpi/acpi_bus.h>
+#endif
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+extern struct genapic apic_cluster;
+extern struct genapic apic_flat;
+
+#ifndef CONFIG_XEN
+struct genapic *genapic = &apic_flat;
+#else
+extern struct genapic apic_xen;
+struct genapic *genapic = &apic_xen;
+#endif
+
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init clustered_apic_check(void)
+{
+#ifndef CONFIG_XEN
+       long i;
+       u8 clusters, max_cluster;
+       u8 id;
+       u8 cluster_cnt[NUM_APIC_CLUSTERS];
+
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+               /* AMD always uses flat mode right now */
+               genapic = &apic_flat;
+               goto print;
+       }
+
+#if defined(CONFIG_ACPI_BUS)
+       /*
+        * Some x86_64 machines use physical APIC mode regardless of how many
+        * procs/clusters are present (x86_64 ES7000 is an example).
+        */
+       if (acpi_fadt.revision > FADT2_REVISION_ID)
+               if (acpi_fadt.force_apic_physical_destination_mode) {
+                       genapic = &apic_cluster;
+                       goto print;
+               }
+#endif
+
+       memset(cluster_cnt, 0, sizeof(cluster_cnt));
+
+       for (i = 0; i < NR_CPUS; i++) {
+               id = bios_cpu_apicid[i];
+               if (id != BAD_APICID)
+                       cluster_cnt[APIC_CLUSTERID(id)]++;
+       }
+
+       clusters = 0;
+       max_cluster = 0;
+       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+               if (cluster_cnt[i] > 0) {
+                       ++clusters;
+                       if (cluster_cnt[i] > max_cluster)
+                               max_cluster = cluster_cnt[i];
+               }
+       }
+
+       /*
+        * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
+        * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
+        * else physical mode.
+        * (We don't use lowest priority delivery + HW APIC IRQ steering, so
+        * can ignore the clustered logical case and go straight to physical.)
+        */
+       if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+               genapic = &apic_flat;
+       else
+               genapic = &apic_cluster;
+
+print:
+#else
+       /* hardcode to xen apic functions */
+       genapic = &apic_xen;
+#endif
+       printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+}
+
+/* Same for both flat and clustered. */
+
+#ifdef CONFIG_XEN
+extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest);
+#endif
+
+void send_IPI_self(int vector)
+{
+#ifndef CONFIG_XEN
+       __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+#else
+       xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+#endif
+}
diff -r c66cac10524a -r d95ea17567c6 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
--- /dev/null   Thu Aug 11 08:58:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c Thu Aug 11 
08:59:47 2005
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Xen APIC subarch code.  Maximum 8 CPUs, logical delivery.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ *
+ * Hacked to pieces for Xen by Chris Wright.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#else
+#include <asm/apic.h>
+#include <asm/apicdef.h>
+#include <asm/genapic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+       unsigned int evtchn;
+       Dprintk("%s\n", __FUNCTION__);
+
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       if (evtchn)
+               notify_via_evtchn(evtchn);
+       else
+               printk("send_IPI to unbound port %d/%d", cpu, vector);
+}
+
+void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int 
dest)
+{
+       int cpu;
+
+       switch (shortcut) {
+       case APIC_DEST_SELF:
+               __send_IPI_one(smp_processor_id(), vector);
+               break;
+       case APIC_DEST_ALLBUT:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       case APIC_DEST_ALLINC:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       default:
+               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+                      vector);
+               break;
+       }
+}
+
+static cpumask_t xen_target_cpus(void)
+{
+       return cpu_online_map;
+}
+
+/*
+ * Set up the logical destination ID.
+ * Do nothing, not called now.
+ */
+static void xen_init_apic_ldr(void)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return;
+}
+
+static void xen_send_IPI_allbutself(int vector)
+{
+       /*
+        * if there are no other CPUs in the system then
+        * we get an APIC send error if we try to broadcast.
+        * thus we have to avoid sending IPIs in this case.
+        */
+       Dprintk("%s\n", __FUNCTION__);
+       if (num_online_cpus() > 1)
+               xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, 
APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_all(int vector)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+       unsigned long mask = cpus_addr(cpumask)[0];
+       unsigned int cpu;
+       unsigned long flags;
+
+       Dprintk("%s\n", __FUNCTION__);
+       local_irq_save(flags);
+       WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+
+       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               if (cpu_isset(cpu, cpumask)) {
+                       __send_IPI_one(cpu, vector);
+               }
+       }
+       local_irq_restore(flags);
+}
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+static int xen_apic_id_registered(void)
+{
+       /* better be set */
+       Dprintk("%s\n", __FUNCTION__);
+       return physid_isset(smp_processor_id(), phys_cpu_present_map);
+}
+#endif
+
+static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+       u32 ebx;
+
+       Dprintk("%s\n", __FUNCTION__);
+       ebx = cpuid_ebx(1);
+       return ((ebx >> 24) & 0xFF) >> index_msb;
+}
+
+struct genapic apic_xen =  {
+       .name = "xen",
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       .int_delivery_mode = dest_LowestPrio,
+#endif
+       .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+       .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+       .target_cpus = xen_target_cpus,
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       .apic_id_registered = xen_apic_id_registered,
+#endif
+       .init_apic_ldr = xen_init_apic_ldr,
+       .send_IPI_all = xen_send_IPI_all,
+       .send_IPI_allbutself = xen_send_IPI_allbutself,
+       .send_IPI_mask = xen_send_IPI_mask,
+       .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+};

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.