[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] AP boot support


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: Kip Macy <kmacy@xxxxxxxxxx>
  • Date: Sat, 7 May 2005 21:36:21 -0700 (PDT)
  • Delivery-date: Sun, 08 May 2005 04:36:06 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/05/07 21:32:49-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx 
#   get AP booting working 
#   currently crashing in init_secondary - will fix after adding SMP debug 
support
#   Signed-off-by: Kip Macy <kmacy@xxxxxxxxxxx>
# 
# freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +4 -0
#   add declaration for per-cpu clock init
# 
# freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +1 -0
#   make pmap_lazyfix_action global
# 
# freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +6 -1
#   add IPI fields
# 
# freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +16 -0
#   add boot_vcpu call
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +0 -2
#   make PANIC_IF declaration global
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +0 -1
#   make pmap_lazyfix_action global
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +229 -55
#   add support for booting APs
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +78 -46
#   do per-cpu GDT initialization up-front
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
#   2005/05/07 21:32:47-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +15 -8
#   special case AST IPI
# 
# freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
#   2005/05/07 21:32:46-07:00 kmacy@xxxxxxxxxxxxxxxxxxxx +60 -14
#   add per-cpu clock support
# 
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c  2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c  2005-05-06 21:37:31 
-07:00
@@ -87,6 +87,12 @@
 
 /* XEN specific defines */
 #include <machine/xen_intr.h>
+#include <vm/vm.h>   /* needed by machine/pmap.h */
+#include <vm/pmap.h> /* needed by machine/pmap.h */
+#include <machine/pmap.h> /* needed by xen-os.h */
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h> /* needed by xenfunc.h */
+#include <machine/xenfunc.h>
 
 /*
  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -129,7 +135,15 @@
 static uint32_t shadow_time_version;
 static struct timeval shadow_tv;
 
+#define DEFINE_PER_CPU(type, name) \
+    __typeof__(type) per_cpu__##name
+
+#define per_cpu(var, cpu)           (*((void)cpu, &per_cpu__##var))
+
+
 static uint64_t processed_system_time;/* System time (ns) at last processing. 
*/
+static DEFINE_PER_CPU(uint64_t, processed_system_time);
+
 
 #define NS_PER_TICK (1000000000ULL/hz)
 
@@ -202,18 +216,19 @@
 static void 
 clkintr(struct clockframe *frame)
 {
-    int64_t delta;
+    int64_t cpu_delta, delta;
+    int cpu = smp_processor_id();
     long ticks = 0;
 
-
     do {
        __get_time_values_from_xen();
-       delta = (int64_t)(shadow_system_time + 
-                         xen_get_offset() * 1000 - 
-                         processed_system_time);
+       delta = cpu_delta = (int64_t)shadow_system_time + 
+               (int64_t)xen_get_offset() * 1000;
+       delta -= processed_system_time;
+       cpu_delta -= per_cpu(processed_system_time, cpu);
     } while (!TIME_VALUES_UP_TO_DATE);
 
-    if (unlikely(delta < 0)) {
+    if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) {
         printk("Timer ISR: Time went backwards: %lld\n", delta);
         return;
     }
@@ -225,15 +240,28 @@
         delta -= NS_PER_TICK;
         processed_system_time += NS_PER_TICK;
     }
-
-    if (ticks > 0) {
-       if (frame)
-               timer_func(frame);
-#ifdef SMP
-       if (timer_func == hardclock && frame)
-               forward_hardclock();
+    /* Local CPU jiffy work. */
+    while (cpu_delta >= NS_PER_TICK) {
+           cpu_delta -= NS_PER_TICK;
+           per_cpu(processed_system_time, cpu) += NS_PER_TICK;
+#if 0
+           update_process_times(user_mode(regs));
+           profile_tick(CPU_PROFILING, regs);
 #endif
     }
+    if (ticks > 0) {
+       if (frame) timer_func(frame);
+    }
+    
+    if (cpu != 0)
+           return;
+    /*
+     * Take synchronised time from Xen once a minute if we're not
+     * synchronised ourselves, and we haven't chosen to keep an independent
+     * time base.
+     */
+    
+    /* XXX TODO */
 }
 
 #include "opt_ddb.h"
@@ -429,7 +457,7 @@
  * Start clocks running.
  */
 void
-cpu_initclocks()
+cpu_initclocks(void)
 {
        int diag;
        int time_irq = bind_virq_to_irq(VIRQ_TIMER);
@@ -445,7 +473,25 @@
        /* initialize xen values */
        __get_time_values_from_xen();
        processed_system_time = shadow_system_time;
+       per_cpu(processed_system_time, 0) = processed_system_time;
+
+}
+
+#ifdef SMP 
+void
+ap_cpu_initclocks(void)
+{
+       int irq;
+       int cpu = smp_processor_id();
+
+       per_cpu(processed_system_time, cpu) = shadow_system_time;
+       
+       irq = bind_virq_to_irq(VIRQ_TIMER);
+       PCPU_SET(time_irq, irq);
+       PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, 
+                                 NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
 }
+#endif
 
 void
 cpu_startprofclock(void)
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 2005-05-06 21:37:31 
-07:00
@@ -79,9 +79,14 @@
                 l2 &= ~(1 << l2i);
             
                 port = (l1i << 5) + l2i;
+               irq = evtchn_to_irq[port];
+#ifdef SMP             
+               if (irq == PCPU_GET(cpuast)) 
+                       continue;
+#endif
                 if ( (owned = mtx_owned(&sched_lock)) != 0 )
                     mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
-                if ( (irq = evtchn_to_irq[port]) != -1 ) {
+                if ( irq != -1 ) {
                    struct intsrc *isrc = intr_lookup_source(irq);
                    intr_execute_handlers(isrc, frame);
                } else {
@@ -584,6 +589,7 @@
         PCPU_GET(virq_to_irq)[i] = -1;
 }
 
+
 static void 
 evtchn_init(void *dummy __unused)
 {
@@ -591,13 +597,6 @@
     struct xenpic *xp;
     struct xenpic_intsrc *pin;
 
-    /*
-     * xenpic_lock: in order to allow an interrupt to occur in a critical
-     *                 section, to set pcpu->ipending (etc...) properly, we
-     *         must be able to get the icu lock, so it can't be
-     *         under witness.
-     */
-    mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
 
     /* XXX -- expedience hack */
     PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]);
@@ -657,3 +656,11 @@
 }
 
 SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
+    /*
+     * xenpic_lock: in order to allow an interrupt to occur in a critical
+     *                 section, to set pcpu->ipending (etc...) properly, we
+     *         must be able to get the icu lock, so it can't be
+     *         under witness.
+     */
+
+MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", 
MTX_DEF|MTX_NOWITNESS);
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c        2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c        2005-05-06 
21:37:31 -07:00
@@ -78,6 +78,7 @@
 #include <sys/sched.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
+#include <sys/smp.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
@@ -883,14 +884,6 @@
 static void
 cpu_idle_default(void)
 {
-#if 0
-       /*
-        * we must absolutely guarentee that hlt is the
-        * absolute next instruction after sti or we
-        * introduce a timing window.
-        */
-       __asm __volatile("sti; hlt");
-#endif
        idle_block();
        enable_intr();
 }
@@ -1376,6 +1369,7 @@
 unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
 int preemptable;
 int gdt_set;
+static int ncpus;
 
 /* Linux infection */
 #define PAGE_OFFSET  KERNBASE
@@ -1387,6 +1381,10 @@
     int i;
     vm_paddr_t pdir_shadow_ma, KPTphys;
     vm_offset_t *pdir_shadow;
+#ifdef SMP
+    int j;
+#endif
+
 #ifdef WRITABLE_PAGETABLES
     printk("using writable pagetables\n");
     HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
@@ -1447,18 +1445,19 @@
 
 
 #ifdef SMP
+#if 0
     /* allocate cpu0 private page */
     cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
     tmpindex++; 
-
+#endif
     /* allocate SMP page table */
     SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
-
+#if 0
     /* Map the private page into the SMP page table */
     SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
-
+#endif
     /* map SMP page table RO */
-    PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW);
+    PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);
 
     /* put the page table into the page directory */
     xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), 
@@ -1496,44 +1495,61 @@
     tmpindex++;
 
     HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned 
long)xen_phys_machine;
+    ncpus = HYPERVISOR_shared_info->n_vcpu; 
+#ifdef SMP
+    for (i = 0; i < ncpus; i++) {
+           int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
+           for (j = 0; j < npages; j++) {
+                   vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
+                   tmpindex++;
+                   PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW 
| PG_M, FALSE);
+           }
+    }
+    xen_flush_queue();
+#endif
     
     init_first = tmpindex;
     
 }
 
+
+trap_info_t trap_table[] = {
+       { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
+       { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
+       { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
+       { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
+       /* This is UPL on Linux and KPL on BSD */
+       { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
+       { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
+       { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
+       /*
+        * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
+        *   no handler for double fault
+        */
+       { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
+       {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
+       {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
+       {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
+       {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
+       {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
+       {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
+       {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
+       {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
+       {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
+       {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
+       {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(int0x80_syscall)},
+       {  0, 0,           0, 0 }
+};
+
 void
 init386(void)
 {
        int gsel_tss, metadata_missing, off, x, error;
        struct pcpu *pc;
        unsigned long gdtmachpfn;
-       trap_info_t trap_table[] = {
-           { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
-           { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
-           { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
-           { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
-           /* This is UPL on Linux and KPL on BSD */
-           { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
-           { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
-           { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
-           /*
-            * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(XXX)},
-            *   no handler for double fault
-            */
-           { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(fpusegm)},
-           {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
-           {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(missing)},
-           {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
-           {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
-           {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
-           {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
-           {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
-           {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
-           {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
-           {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
-           {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) 
&IDTVEC(int0x80_syscall)},
-           {  0, 0,           0, 0 }
-        };
+#ifdef SMP
+       int i;
+#endif
        proc0.p_uarea = proc0uarea;
        thread0.td_kstack = proc0kstack;
        thread0.td_pcb = (struct pcb *)
@@ -1583,26 +1599,42 @@
        gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 
16))); 
 #endif
 #ifdef SMP
-       /* this correspond to the cpu private page as mapped into the SMP page 
-        * table in initvalues
+       /* XXX this will blow up if there are more than 512/NGDT vcpus - will 
never 
+        * be an issue in the real world but should add an assert on general 
principles
+        * we'll likely blow up when we hit LAST_RESERVED_GDT_ENTRY, at which 
point we
+        * would need to start allocating more pages for the GDT
         */
        pc = &SMP_prvspace[0].pcpu;
-       gdt_segs[GPRIV_SEL].ssd_limit =
-               atop(sizeof(struct privatespace) - 1);
+       for (i = 0; i < ncpus; i++) {
+               cpu_add(i, (i == 0));
+
+               gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i];
+               gdt_segs[GPRIV_SEL].ssd_limit =
+                       atop(sizeof(struct privatespace) - 1);
+               gdt_segs[GPROC0_SEL].ssd_base =
+                       (int) &SMP_prvspace[i].pcpu.pc_common_tss;
+               SMP_prvspace[i].pcpu.pc_prvspace =
+                       &SMP_prvspace[i].pcpu;
+               
+               for (x = 0; x < NGDT; x++) {
+                       ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd);
+               }
+       }
 #else
        pc = &__pcpu;
        gdt_segs[GPRIV_SEL].ssd_limit =
                atop(sizeof(struct pcpu) - 1);
-#endif
        gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
        gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
        for (x = 0; x < NGDT; x++)
            ssdtosd(&gdt_segs[x], &gdt[x].sd);
+#endif
+
 
        PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW);
        gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
-       if ((error = HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 
1))) 
-           panic("set_gdt failed");
+       PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) 
!= 0);
+
        
        lgdt_finish();
        gdt_set = 1;
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c     2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c     2005-05-06 
21:37:31 -07:00
@@ -83,7 +83,16 @@
 #include <machine/specialreg.h>
 #include <machine/privatespace.h>
 
+
+/* XEN includes */
 #include <machine/xenfunc.h>
+#include <machine/xen_intr.h>
+
+void Xhypervisor_callback(void);
+void failsafe_callback(void);
+
+/***************/
+
 
 #define WARMBOOT_TARGET                0
 #define WARMBOOT_OFF           (KERNBASE + 0x0467)
@@ -94,6 +103,10 @@
 #define BIOS_RESET             (0x0f)
 #define BIOS_WARM              (0x0a)
 
+
+#undef POSTCODE
+#define POSTCODE(x)
+
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
@@ -175,6 +188,8 @@
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
+extern trap_info_t trap_table[];
+
 struct pcb stoppcbs[MAXCPU];
 
 /* Variables needed for SMP tlb shootdown. */
@@ -208,7 +223,9 @@
 
 static void    set_logical_apic_ids(void);
 static int     start_all_aps(void);
+#if 0
 static void    install_ap_tramp(void);
+#endif
 static int     start_ap(int apic_id);
 static void    release_aps(void *dummy);
 
@@ -314,6 +331,7 @@
 cpu_mp_probe(void)
 {
 
+       mp_ncpus = HYPERVISOR_shared_info->n_vcpu;
        /*
         * Always record BSP in CPU map so that the mbuf init code works
         * correctly.
@@ -342,20 +360,24 @@
        return (1);
 }
 
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void)
+static void
+cpu_mp_ipi_init(void)
 {
-       int i;
-
-       POSTCODE(MP_START_POST);
-
-       /* Initialize the logical ID to APIC ID table. */
-       for (i = 0; i < MAXCPU; i++)
-               cpu_apic_ids[i] = -1;
-
+       int irq;
+       int cpu = smp_processor_id();
+       /* 
+        * these are not needed by XenFreeBSD - from Keir:
+        * For TLB-flush related IPIs, Xen has hypercalls 
+        * you should use instead. You can pass a pointer 
+        * to a vcpu bitmap to update_va_mapping(), and to
+        * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. 
+        * Xen will then make sure that those vcpus get 
+        * flushed appropriately before returning to the
+        * caller.
+        * There is also no indication that we need to forward
+        * clock interrupts.
+        */
+#if 0 
        /* Install an inter-CPU IPI for TLB invalidation */
        setidt(IPI_INVLTLB, IDTVEC(invltlb),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -371,22 +393,69 @@
        /* Install an inter-CPU IPI for forwarding statclock() */
        setidt(IPI_STATCLOCK, IDTVEC(statclock),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-       
+#endif
+
+       /* 
+        * These can all be consolidated. For now leaving 
+        * as individual IPIs.
+        *
+        */
+#if 0
        /* Install an inter-CPU IPI for lazy pmap release */
        setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+       irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP);
+       PCPU_SET(lazypmap, irq);
+       PANIC_IF(intr_add_handler("pmap_lazyfix", irq, 
+                                 (driver_intr_t *)pmap_lazyfix_action, 
+                                 NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
+#endif
 
+#if 0
        /* Install an inter-CPU IPI for all-CPU rendezvous */
        setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else 
+       irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS);
+       PCPU_SET(rendezvous, irq);
+       PANIC_IF(intr_add_handler("smp_rendezvous", irq, 
+                                 (driver_intr_t *)smp_rendezvous_action, 
+                                 NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
+#endif
 
+#if 0
        /* Install an inter-CPU IPI for forcing an additional software trap */
        setidt(IPI_AST, IDTVEC(cpuast),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-
+#else
+       irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST);
+       PCPU_SET(cpuast, irq);
+#endif
+       /* XXX ignore for now */
+#if 0 
        /* Install an inter-CPU IPI for CPU stop/restart */
        setidt(IPI_STOP, IDTVEC(cpustop),
               SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#endif
+
+}
+
+SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL);
+
+/*
+ * Initialize the IPI handlers and start up the AP's.
+ */
+void
+cpu_mp_start(void) /* --- Start here --- */
+{
+       int i;
+
+       POSTCODE(MP_START_POST);
+
+       /* Initialize the logical ID to APIC ID table. */
+       for (i = 0; i < MAXCPU; i++)
+               cpu_apic_ids[i] = -1;
 
 
        /* Set boot_cpu_id if needed. */
@@ -437,35 +506,44 @@
 void
 init_secondary(void)
 {
-       int     gsel_tss;
-       int     x, myid;
+       int     myid;
+       unsigned long gdtmachpfn;
+       printk("MADE IT!!");
+
 #if 0
        u_int   cr0;
 #endif
+       /* Steps to booting SMP on xen as gleaned from XenLinux:
+        * - cpu_init() - processor specific initialization
+        * - smp_callin() 
+        *    - wait 2s for BP to finish its startup sequence
+        *    - map_cpu_to_logical_apicid()
+        *    - save cpuid info
+        *    - set bit in callin map to let master (BP?) continue
+        * - local setup timer() - per cpu timer initialization
+        * - ldebug_setup() - bind debug IRQ to local CPU.
+        * - smp_intr_init() - IPI setup that we do in cpu_mp_start
+        * - local_irq_enable() - enable interrupts locally
+        * - cpu_set(id, map) - announce that we're up
+        * - cpu_idle() - make us schedulable
+        */
+
+
        /* bootAP is set in start_ap() to our ID. */
        myid = bootAP;
-       gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
-       gdt_segs[GPROC0_SEL].ssd_base =
-               (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
-       SMP_prvspace[myid].pcpu.pc_prvspace =
-               &SMP_prvspace[myid].pcpu;
 
-       for (x = 0; x < NGDT; x++) {
-               ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
-       }
+       gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
+       PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) 
!= 0); 
 
-#if 0
-       r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
-       r_gdt.rd_base = (int) &gdt[myid * NGDT];
-       lgdt(&r_gdt);                   /* does magic intra-segment return */
+       
+       lgdt_finish();
 
-       lidt(&r_idt);
-       lldt(_default_ldt);
-#endif
+       PCPU_SET(cpuid, myid);
+
+
+       set_user_ldt((struct mdproc *)_default_ldt);
        PCPU_SET(currentldt, _default_ldt);
 
-       gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-       gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
        PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
        PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
        PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
@@ -557,6 +635,13 @@
        while (smp_started == 0)
                ia32_pause();
 
+       /* need to wait until now to setup the IPIs as SI_SUB_CPU is
+        * much earlier than SI_SUB_INTR
+        */  
+       ap_evtchn_init(myid);
+       ap_cpu_initclocks();
+       cpu_mp_ipi_init();
+
        /* ok, now grab sched_lock and enter the scheduler */
        mtx_lock_spin(&sched_lock);
 
@@ -610,28 +695,35 @@
 static int
 start_all_aps(void)
 {
-#ifndef PC98
-       u_char mpbiosreason;
-#endif
-       u_long mpbioswarmvec;
        struct pcpu *pc;
        char *stack;
-       uintptr_t kptbase;
-       int i, pg, apic_id, cpu;
+       int i, apic_id, cpu;
+
+       /* 
+        * This function corresponds most closely to 
+        * smp_boot_cpus in XenLinux - the sequence there 
+        * is:
+        * - check if SMP config is found - if not:
+        *     - clear the I/O APIC IRQs
+        *     - map cpu to logical apicid
+        *     - exit
+        * - smp_intr_init - IPI initialization
+        * - map cpu to logical apicid
+        * - boot each of the vcpus
+        * - clear and then construct the cpu sibling [logical CPUs] map.
+        *
+        */
 
        POSTCODE(START_ALL_APS_POST);
 
        mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-
+#if 0
        /* install the AP 1st level boot code */
        install_ap_tramp();
 
        /* save the current value of the warm-start vector */
        mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
-#ifndef PC98
-       outb(CMOS_REG, BIOS_RESET);
-       mpbiosreason = inb(CMOS_DATA);
-#endif
+
 
        /* set up temporary P==V mapping for AP boot */
        /* XXX this is a hack, we should boot the AP on its own stack/PTD */
@@ -640,7 +732,7 @@
                PTD[i] = (pd_entry_t)(PG_V | PG_RW |
                    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
        invltlb();
-
+#endif
        /* start each AP */
        for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
                if (!cpu_info[apic_id].cpu_present ||
@@ -650,7 +742,7 @@
 
                /* save APIC ID for this logical ID */
                cpu_apic_ids[cpu] = apic_id;
-
+#if 0
                /* first page of AP's private space */
                pg = cpu * i386_btop(sizeof(struct privatespace));
 
@@ -665,11 +757,14 @@
                for (i = 0; i < KSTACK_PAGES; i++)
                        SMPpt[pg + 1 + i] = (pt_entry_t)
                            (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
+#endif
+               pc = &SMP_prvspace[cpu].pcpu;
 
                /* prime data page for it to use */
                pcpu_init(pc, cpu, sizeof(struct pcpu));
                pc->pc_apic_id = apic_id;
 
+#if 0
                /* setup a vector to our boot code */
                *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
                *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
@@ -677,7 +772,7 @@
                outb(CMOS_REG, BIOS_RESET);
                outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
 #endif
-
+#endif
                bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
                    PAGE_SIZE];
                bootAP = cpu;
@@ -700,13 +795,10 @@
        /* build our map of 'other' CPUs */
        PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 
+#if 0
        /* restore the warmstart vector */
        *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
-#ifndef PC98
-       outb(CMOS_REG, BIOS_RESET);
-       outb(CMOS_DATA, mpbiosreason);
 #endif
-
        /*
         * Set up the idle context for the BSP.  Similar to above except
         * that some was done by locore, some by pmap.c and some is implicit
@@ -739,7 +831,7 @@
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
-
+#if 0
 static void
 install_ap_tramp(void)
 {
@@ -791,6 +883,21 @@
        *dst16 = (u_int) boot_address & 0xffff;
        *dst8 = ((u_int) boot_address >> 16) & 0xff;
 }
+#endif
+
+static int 
+cpu_mp_trap_init(trap_info_t *trap_ctxt)
+{
+
+        trap_info_t *t = trap_table;
+
+        for (t = trap_table; t->address; t++) {
+                trap_ctxt[t->vector].flags = t->flags;
+                trap_ctxt[t->vector].cs = t->cs;
+                trap_ctxt[t->vector].address = t->address;
+        }
+        return 0x80 /*SYSCALL_VECTOR*/;
+}
 
 /*
  * This function starts the AP (application processor) identified
@@ -802,8 +909,25 @@
 static int
 start_ap(int apic_id)
 {
-       int vector, ms;
-       int cpus;
+       int vector, ms, i;
+       int cpus, boot_error;
+       vcpu_guest_context_t ctxt;
+
+       /* 
+        * This is the FreeBSD equivalent to do_boot_cpu(apicid) in
+        * smpboot.c. 
+        * its initialization sequence consists of:
+        * - fork_idle(cpu) to create separate idle context
+        * - initialization of idle's context to start_secondary
+        * - initialization of cpu ctxt to start in startup_32_smp
+        * - then we call HYPERVISOR_boot_vcpu with the cpu index and
+        *   a pointer to the context.
+        * - on boot success we:
+        *   - set ourselves in the callout_map
+        *   - wait up to 5 seconds for us to be set in the callin map
+        * - set x86_cpu_to_apicid[cpu] = apicid;
+        *
+        */
 
        POSTCODE(START_AP_POST);
 
@@ -813,6 +937,55 @@
        /* used as a watchpoint to signal AP startup */
        cpus = mp_naps;
 
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
+       ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
+       ctxt.user_regs.fs = 0;
+       ctxt.user_regs.gs = 0;
+       ctxt.user_regs.ss = __KERNEL_DS;
+       ctxt.user_regs.cs = __KERNEL_CS;
+       ctxt.user_regs.eip = (unsigned long)init_secondary;
+       ctxt.user_regs.esp = (unsigned long)bootSTK;
+#ifdef notyet
+       ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+#else
+       ctxt.user_regs.eflags = (1<<9) | (1<<2);
+#endif
+       /* FPU is set up to default initial state. */
+       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       ctxt.fast_trap_idx = cpu_mp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss = __KERNEL_DS;
+       ctxt.kernel_sp = (unsigned long)bootSTK;
+
+       /* Callback handlers. */
+       ctxt.event_callback_cs     = __KERNEL_CS;
+       ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
+       ctxt.failsafe_callback_cs  = __KERNEL_CS;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+       ctxt.pt_base = (vm_paddr_t)IdlePTD;
+
+       boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt);
+
+       
+       if (boot_error) 
+               printk("Houston we have a problem\n");
+       else
+               printk("boot_vcpu succeeded\n");
+#if 0
        /*
         * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
         * and running the target CPU. OR this INIT IPI might be latched (P5
@@ -862,6 +1035,7 @@
            APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
            vector, apic_id);
        lapic_ipi_wait(-1);
+#endif
        DELAY(200);             /* wait ~200uS */
 
        /* Wait up to 5 seconds for it to start. */
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c   2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c   2005-05-06 21:37:31 
-07:00
@@ -1374,7 +1374,6 @@
 static u_int lazyptd;
 static volatile u_int lazywait;
 
-void pmap_lazyfix_action(void);
 
 void
 pmap_lazyfix_action(void)
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c 
b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c    2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c    2005-05-06 
21:37:31 -07:00
@@ -380,8 +380,6 @@
         (void)HYPERVISOR_console_write(buf, ret);
 }
 
-#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); 
panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
-
 
 #define XPQUEUE_SIZE 128
 #ifdef SMP
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h      2005-05-06 
21:37:31 -07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h      2005-05-06 
21:37:31 -07:00
@@ -441,4 +441,20 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_boot_vcpu(
+    unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
+       : "memory");
+
+    return ret;
+}
+
 #endif /* __HYPERVISOR_H__ */
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h    2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h    2005-05-06 21:37:31 
-07:00
@@ -53,7 +53,12 @@
         int     *pc_ipi_to_evtchn;                                      \
         int     *pc_virq_to_irq;                                        \
         u_int   pc_cr2;                                                 \
-        u_int   pc_pdir                                        
+        u_int   pc_pdir;                                                \
+        u_int   pc_lazypmap;                                            \
+        u_int   pc_rendezvous;                                          \
+        u_int   pc_cpuast;                                              \
+        u_int   pc_time_irq;                                              \
+        uint64_t pc_processed_system_time;  
 
 #if defined(lint)
  
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h    2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h    2005-05-06 21:37:31 
-07:00
@@ -343,6 +343,7 @@
 void   pmap_invalidate_page(pmap_t, vm_offset_t);
 void   pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
 void   pmap_invalidate_all(pmap_t);
+void    pmap_lazyfix_action(void);
 
 void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
 void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 
b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 2005-05-06 21:37:31 
-07:00
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 2005-05-06 21:37:31 
-07:00
@@ -61,6 +61,9 @@
 void xen_machphys_update(unsigned long, unsigned long);
 void xen_update_descriptor(union descriptor *, union descriptor *);
 void lldt(u_short sel);
+void ap_cpu_initclocks(void);
+
+
 /*
  * Invalidate a patricular VA on all cpus
  *
@@ -79,5 +82,6 @@
        
 }
 
+#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); 
panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
 
 #endif /* _XEN_XENFUNC_H_ */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.