[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [Xen-devel] x86_64 SMP status update



 
 
> This patch gets me up to init.  Using init=/bin/sh I actually 
> got the shell prompt, although it was unstable and died 
> shortly after an ls or two.  Note, linux-2.6.11-xen0 should 
> be populated before applying this patch.  Seems to be working 
> fine on a single CPU (i.e. maxcpus=1).
> This is a backport of my current patch (which is against a 
> 2.6.12 tree).

Chris -- thanks!

I think we'll try and get Vincent's 2.6.12 upgrade patch checked-in in
the next day or two.

On the subject of x86_64 SMP, I personally believe the #1 priority is to
get x86_64 using writeable page tables. It'll be impossible to make SMP
work reliably until this is done.
 
The first step of this is just to stop set_pte using queue_l1_update and
have it write to the pte directly. Xen is currently hard configured to
only ever use the emulation path for x86_64 (see ptwr_do_page_fault). I
suspect that letting it use the 'real' path will probably crash, but
should be easy to fix up.

Ideally, we should also pull in equivalent code from this chageset for
i386 so as to remove the static update queues altogether, and to build
them on the stack as required:

 http://xen.bkbits.net:8080/xeno-unstable.bk/cset@xxxxxxxxxxxx

If we do more remerging with i386-xen we should just get this for free.

[BTW: in your SMP patch, any reason not to just merge with the i386-xen
smpboot.c?]

Please keep up the good work!

Thanks,
Ian 
 
> 
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/Makefile
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile
> @@ -25,10 +25,10 @@ obj-$(CONFIG_ACPI_BOOT)           += acpi/
>  c-obj-$(CONFIG_X86_MSR)              += msr.o
>  obj-$(CONFIG_MICROCODE)              += microcode.o
>  obj-$(CONFIG_X86_CPUID)              += cpuid.o
> -#obj-$(CONFIG_SMP)           += smp.o smpboot.o trampoline.o
> +obj-$(CONFIG_SMP)            += smp.o smpboot.o trampoline.o
>  obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
>  c-obj-$(CONFIG_X86_LOCAL_APIC)       += nmi.o
> -obj-$(CONFIG_X86_IO_APIC)    += io_apic.o mpparse.o
> +obj-$(CONFIG_X86_IO_APIC)    += io_apic.o mpparse.o genapic_xen.o
>  c-obj-$(CONFIG_X86_IO_APIC)  += genapic.o genapic_cluster.o 
> genapic_flat.o
>  #obj-$(CONFIG_PM)            += suspend.o
>  #obj-$(CONFIG_SOFTWARE_SUSPEND)      += suspend_asm.o
> @@ -53,7 +53,7 @@ intel_cacheinfo-y           += ../../../i386/kern
>  quirks-y                     += ../../../i386/kernel/quirks.o
>  
>  c-link       := init_task.o
> -s-link       := vsyscall.o 
> +s-link       := vsyscall.o trampoline.o
>  
>  $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) 
> $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
>       @ln -fsn $(srctree)/arch/x86_64/kernel/$(notdir $@) $@
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c
> +++ xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c
> @@ -24,8 +24,13 @@
>  #include <asm/mtrr.h>
>  #include <asm/pgalloc.h>
>  #include <asm/tlbflush.h>
> +#if 1 /* Xen */
>  #include <asm/mach_apic.h>
> +#endif
>  #include <asm/proto.h>
> +#include <asm-xen/evtchn.h>
> +
> +#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, 
> +strlen(msg), msg)
>  
>  /*
>   *   Smarter SMP flushing macros. 
> @@ -101,6 +106,7 @@ static inline void leave_mm (unsigned lo
>   * 2) Leave the mm if we are in the lazy tlb mode.
>   */
>  
> +#if 0 /* Xen */
>  asmlinkage void smp_invalidate_interrupt (void)  {
>       unsigned long cpu;
> @@ -249,6 +255,20 @@ void flush_tlb_all(void)
>       on_each_cpu(do_flush_tlb_all, NULL, 1, 1);  }
>  
> +#else
> +asmlinkage void smp_invalidate_interrupt (void) { return; } void 
> +flush_tlb_current_task(void) { 
> +xen_tlb_flush_mask(&current->mm->cpu_vm_mask); } void flush_tlb_mm 
> +(struct mm_struct * mm) { 
> xen_tlb_flush_mask(&mm->cpu_vm_mask); } void 
> +flush_tlb_page(struct vm_area_struct * vma, unsigned long va) { 
> +xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); } void 
> +flush_tlb_all(void) { xen_tlb_flush_all(); } #endif /* Xen */
> +
> +
>  void smp_kdb_stop(void)
>  {
>       send_IPI_allbutself(KDB_VECTOR);
> @@ -308,13 +328,13 @@ static void __smp_call_function (void (*
>  
>       /* Wait for response */
>       while (atomic_read(&data.started) != cpus)
> -             cpu_relax();
> +             barrier();
>  
>       if (!wait)
>               return;
>  
>       while (atomic_read(&data.finished) != cpus)
> -             cpu_relax();
> +             barrier();
>  }
>  
>  /*
> @@ -348,7 +368,11 @@ void smp_stop_cpu(void)
>        */
>       cpu_clear(smp_processor_id(), cpu_online_map);
>       local_irq_disable();
> +#if 1
> +     xxprint("stop_this_cpu disable_local_APIC\n"); #else
>       disable_local_APIC();
> +#endif
>       local_irq_enable();
>  }
>  
> @@ -362,8 +386,10 @@ static void smp_really_stop_cpu(void *du 
>  void smp_send_stop(void)  {
>       int nolock = 0;
> +#if 0
>       if (reboot_force)
>               return;
> +#endif
>       /* Don't deadlock on the call lock in panic */
>       if (!spin_trylock(&call_lock)) {
>               /* ignore locking because we have paniced 
> anyways */ @@ -380,18 +406,17 @@ void smp_send_stop(void)
>   * all the work is done automatically when
>   * we return from the interrupt.
>   */
> -asmlinkage void smp_reschedule_interrupt(void)
> +asmlinkage irqreturn_t smp_reschedule_interrupt(void)
>  {
> -     ack_APIC_irq();
> +     return IRQ_HANDLED;
>  }
>  
> -asmlinkage void smp_call_function_interrupt(void)
> +asmlinkage irqreturn_t smp_call_function_interrupt(void)
>  {
>       void (*func) (void *info) = call_data->func;
>       void *info = call_data->info;
>       int wait = call_data->wait;
>  
> -     ack_APIC_irq();
>       /*
>        * Notify initiating CPU that I've grabbed the data and am
>        * about to execute the function
> @@ -408,4 +433,6 @@ asmlinkage void smp_call_function_interr
>               mb();
>               atomic_inc(&call_data->finished);
>       }
> +     
> +     return IRQ_HANDLED;
>  }
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/smpboot.c
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.
> +++ c
> @@ -53,6 +53,7 @@
>  #include <asm/kdebug.h>
>  #include <asm/tlbflush.h>
>  #include <asm/proto.h>
> +#include <asm/arch_hooks.h>
>  
>  /* Number of siblings per CPU package */  int 
> smp_num_siblings = 1; @@ -75,6 +76,7 @@ int smp_threads_ready;
>  
>  cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
>  
> +#if 0
>  /*
>   * Trampoline 80x86 program as an array.
>   */
> @@ -96,6 +98,7 @@ static unsigned long __init setup_trampo
>       memcpy(tramp, trampoline_data, trampoline_end - 
> trampoline_data);
>       return virt_to_phys(tramp);
>  }
> +#endif
>  
>  /*
>   * The bootstrap kernel entry code has set these up. Save 
> them for @@ -110,6 +113,7 @@ static void __init smp_store_cpu_info(in
>       identify_cpu(c);
>  }
>  
> +#if 0
>  /*
>   * TSC synchronization.
>   *
> @@ -246,6 +250,7 @@ static void __init synchronize_tsc_ap (v
>       }
>  }
>  #undef NR_LOOPS
> +#endif
>  
>  static atomic_t init_deasserted;
>  
> @@ -254,6 +259,7 @@ void __init smp_callin(void)
>       int cpuid, phys_id;
>       unsigned long timeout;
>  
> +#if 0
>       /*
>        * If waken up by an INIT in an 82489DX configuration
>        * we may get here before an INIT-deassert IPI reaches 
> @@ -261,11 +267,12 @@ void __init smp_callin(void)
>        * lock up on an APIC access.
>        */
>       while (!atomic_read(&init_deasserted));
> +#endif
>  
>       /*
>        * (This works even if the APIC is not enabled.)
>        */
> -     phys_id = GET_APIC_ID(apic_read(APIC_ID));
> +     phys_id = smp_processor_id();
>       cpuid = smp_processor_id();
>       if (cpu_isset(cpuid, cpu_callin_map)) {
>               panic("smp_callin: phys CPU#%d, CPU#%d already 
> present??\n", @@ -299,6 +306,7 @@ void __init smp_callin(void)
>                       cpuid);
>       }
>  
> +#if 0
>       /*
>        * the boot CPU has finished the init stage and is spinning
>        * on callin_map until we finish. We are free to set up 
> this @@ -308,6 +316,7 @@ void __init smp_callin(void)
>  
>       Dprintk("CALLIN, before setup_local_APIC().\n");
>       setup_local_APIC();
> +#endif
>  
>       local_irq_enable();
>  
> @@ -317,7 +326,9 @@ void __init smp_callin(void)
>       calibrate_delay();
>       Dprintk("Stack at about %p\n",&cpuid);
>  
> +#if 0
>       disable_APIC_timer();
> +#endif
>  
>       /*
>        * Save our processor parameters
> @@ -331,15 +342,39 @@ void __init smp_callin(void)
>        */
>       cpu_set(cpuid, cpu_callin_map);
>  
> +#if 0
>       /*
>        *      Synchronize the TSC with the BP
>        */
>       if (cpu_has_tsc)
>               synchronize_tsc_ap();
> +#endif
>  }
>  
>  int cpucount;
>  
> +#include <linux/interrupt.h>
> +static irqreturn_t ldebug_interrupt(
> +     int irq, void *dev_id, struct pt_regs *regs) {
> +     return IRQ_HANDLED;
> +}
> +
> +static DEFINE_PER_CPU(int, ldebug_irq); static char 
> +ldebug_name[NR_CPUS][15];
> +
> +void ldebug_setup(void)
> +{
> +     int cpu = smp_processor_id();
> +
> +     per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
> +     sprintf(ldebug_name[cpu], "ldebug%d", cpu);
> +     BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
> +                        SA_INTERRUPT, ldebug_name[cpu], NULL)); }
> +
> +extern void local_setup_timer(void);
> +
>  /*
>   * Activate a secondary processor.
>   */
> @@ -360,6 +395,7 @@ void __init start_secondary(void)
>       while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
>               rep_nop();
>  
> +#if 0
>       Dprintk("cpu %d: setting up apic clock\n", 
> smp_processor_id());  
>       setup_secondary_APIC_clock();
>  
> @@ -373,6 +409,12 @@ void __init start_secondary(void)
>  
>  
>       enable_APIC_timer(); 
> +#else
> +     local_setup_timer();
> +     ldebug_setup();
> +     smp_intr_init();
> +     local_irq_enable();
> +#endif
>  
>       /*
>        * low-memory mappings have been cleared, flush them 
> from @@ -428,6 +470,7 @@ static inline void 
> inquire_remote_apic(i  }  #endif
>  
> +#if 0
>  static int __init wakeup_secondary_via_INIT(int phys_apicid, 
> unsigned int start_rip)  {
>       unsigned long send_status = 0, accept_status = 0; @@ 
> -550,6 +593,7 @@ static int __init wakeup_secondary_via_I
>  
>       return (send_status | accept_status);
>  }
> +#endif
>  
>  static void __init do_boot_cpu (int apicid)  { @@ -557,6 
> +601,14 @@ static void __init do_boot_cpu (int apic
>       unsigned long boot_error;
>       int timeout, cpu;
>       unsigned long start_rip;
> +#if 1
> +     vcpu_guest_context_t ctxt;
> +     extern void startup_64_smp(void);
> +     extern void hypervisor_callback(void);
> +     extern void failsafe_callback(void);
> +     extern void smp_trap_init(trap_info_t *);
> +     int i;
> +#endif
>  
>       cpu = ++cpucount;
>       /*
> @@ -570,7 +622,7 @@ static void __init do_boot_cpu (int apic
>  
>       cpu_pda[cpu].pcurrent = idle;
>  
> -     start_rip = setup_trampoline();
> +     start_rip = (unsigned long)startup_64_smp;
>  
>       init_rsp = idle->thread.rsp; 
>       per_cpu(init_tss,cpu).rsp0 = init_rsp; @@ -587,6 
> +639,94 @@ static void __init do_boot_cpu (int apic
>  
>       atomic_set(&init_deasserted, 0);
>  
> +#if 1
> +     if (cpu_gdt_descr[0].size > PAGE_SIZE)
> +             BUG();
> +     cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
> +     memcpy((void *)cpu_gdt_descr[cpu].address,
> +             (void *)cpu_gdt_descr[0].address, 
> cpu_gdt_descr[0].size);
> +
> +     memset(&ctxt, 0, sizeof(ctxt));
> +
> +     ctxt.flags = VGCF_IN_KERNEL;
> +     ctxt.user_regs.ds = __USER_DS;
> +     ctxt.user_regs.es = __USER_DS;
> +     ctxt.user_regs.fs = 0;
> +     ctxt.user_regs.gs = 0;
> +     ctxt.user_regs.ss = __KERNEL_DS|0x3;
> +     ctxt.user_regs.cs = __KERNEL_CS|0x3;
> +     ctxt.user_regs.rip = start_rip;
> +     ctxt.user_regs.rsp = idle->thread.rsp;
> +     ctxt.user_regs.eflags = (1<<9) | (1<<2) | 
> (idle->thread.io_pl<<12);
> +
> +     /* FPU is set up to default initial state. */
> +     memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
> +
> +     /* Virtual IDT is empty at start-of-day. */
> +     for ( i = 0; i < 256; i++ )
> +     {
> +             ctxt.trap_ctxt[i].vector = i;
> +             ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
> +     }
> +     smp_trap_init(ctxt.trap_ctxt);
> +
> +     /* No LDT. */
> +     ctxt.ldt_ents = 0;
> +
> +     {
> +             unsigned long va;
> +             int f;
> +
> +             for (va = cpu_gdt_descr[cpu].address, f = 0;
> +                  va < cpu_gdt_descr[cpu].address + 
> cpu_gdt_descr[cpu].size;
> +                  va += PAGE_SIZE, f++) {
> +                     ctxt.gdt_frames[f] = 
> virt_to_machine(va) >> PAGE_SHIFT;
> +                     make_page_readonly((void *)va);
> +             }
> +             ctxt.gdt_ents = GDT_ENTRIES;
> +     }
> +
> +     /* Ring 1 stack is the initial stack. */
> +     ctxt.kernel_ss = __KERNEL_DS;
> +     ctxt.kernel_sp = idle->thread.rsp;
> +
> +     /* Callback handlers. */
> +     ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
> +     ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
> +
> +     ctxt.ctrlreg[3] = (unsigned 
> long)virt_to_machine(init_level4_pgt);
> +
> +     boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
> +
> +     if (!boot_error) {
> +             /*
> +              * allow APs to start initializing.
> +              */
> +             Dprintk("Before Callout %d.\n", cpu);
> +             cpu_set(cpu, cpu_callout_map);
> +             Dprintk("After Callout %d.\n", cpu);
> +
> +             /*
> +              * Wait 5s total for a response
> +              */
> +             for (timeout = 0; timeout < 50000; timeout++) {
> +                     if (cpu_isset(cpu, cpu_callin_map))
> +                             break;  /* It has booted */
> +                     udelay(100);
> +             }
> +
> +             if (cpu_isset(cpu, cpu_callin_map)) {
> +                     /* number CPUs logically, starting from 
> 1 (BSP is 0) */
> +                     Dprintk("OK.\n");
> +                     printk("CPU%d: ", cpu);
> +                     print_cpu_info(&cpu_data[cpu]);
> +                     Dprintk("CPU has booted.\n");
> +             } else {
> +                     boot_error= 1;
> +             }
> +     }
> +     x86_cpu_to_apicid[cpu] = apicid;
> +#else
>       Dprintk("Setting warm reset code and vector.\n");
>  
>       CMOS_WRITE(0xa, 0xf);
> @@ -652,6 +792,7 @@ static void __init do_boot_cpu (int apic  #endif
>               }
>       }
> +#endif
>       if (boot_error) {
>               cpu_clear(cpu, cpu_callout_map); /* was set 
> here (do_boot_cpu()) */
>               clear_bit(cpu, &cpu_initialized); /* was set by 
> cpu_init() */ @@ -709,9 +850,15 @@ static void 
> smp_tune_scheduling (void)
>   * Cycle through the processors sending APIC IPIs to boot each.
>   */
>  
> +/* XXX fix me */
> +#define time_init_smp()
> +
>  static void __init smp_boot_cpus(unsigned int max_cpus)  {
> -     unsigned apicid, cpu, bit, kicked;
> +     unsigned cpu, kicked;
> +#if 0
> +     unsigned apicid, bit;
> +#endif
>  
>       nmi_watchdog_default();
>  
> @@ -725,11 +872,13 @@ static void __init smp_boot_cpus(unsigne
>       current_thread_info()->cpu = 0;
>       smp_tune_scheduling();
>  
> +#if 0
>       if (!physid_isset(hard_smp_processor_id(), 
> phys_cpu_present_map)) {
>               printk("weird, boot CPU (#%d) not listed by the 
> BIOS.\n",
>                      hard_smp_processor_id());
>               physid_set(hard_smp_processor_id(), 
> phys_cpu_present_map);
>       }
> +#endif
>  
>       /*
>        * If we couldn't find an SMP configuration at boot 
> time, @@ -739,13 +888,16 @@ static void __init smp_boot_cpus(unsigne
>               printk(KERN_NOTICE "SMP motherboard not detected.\n");
>               io_apic_irqs = 0;
>               cpu_online_map = cpumask_of_cpu(0);
> +#if 0
>               phys_cpu_present_map = physid_mask_of_physid(0);
> +#endif
>               if (APIC_init_uniprocessor())
>                       printk(KERN_NOTICE "Local APIC not detected."
>                                          " Using dummy APIC 
> emulation.\n");
>               goto smp_done;
>       }
>  
> +#if 0
>       /*
>        * Should not be necessary because the MP table should 
> list the boot
>        * CPU too, but we do it for the sake of robustness anyway.
> @@ -771,51 +923,50 @@ static void __init smp_boot_cpus(unsigne
>       }
>  
>       verify_local_APIC();
> +#endif
>  
>       /*
>        * If SMP should be disabled, then really disable it!
>        */
>       if (!max_cpus) {
> -             smp_found_config = 0;
> +             HYPERVISOR_shared_info->n_vcpu = 1;
>               printk(KERN_INFO "SMP mode deactivated, forcing 
> use of dummy APIC emulation.\n");
>               io_apic_irqs = 0;
>               cpu_online_map = cpumask_of_cpu(0);
> +#if 0
>               phys_cpu_present_map = physid_mask_of_physid(0);
> +#endif
>               disable_apic = 1;
>               goto smp_done;
>       }
>  
> +     smp_intr_init();
> +
> +#if 0
>       connect_bsp_APIC();
>       setup_local_APIC();
>  
>       if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
>               BUG();
> +#endif
>  
>       x86_cpu_to_apicid[0] = boot_cpu_id;
>  
>       /*
>        * Now scan the CPU present map and fire up the other CPUs.
>        */
> -     Dprintk("CPU present map: %lx\n", 
> physids_coerce(phys_cpu_present_map));
> +     Dprintk("CPU present map: %lx\n", 
> HYPERVISOR_shared_info->n_vcpu)
>  
>       kicked = 1;
> -     for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
> -             apicid = cpu_present_to_apicid(bit);
> -             /*
> -              * Don't even attempt to start the boot CPU!
> -              */
> -             if (apicid == boot_cpu_id || (apicid == BAD_APICID))
> -                     continue;
> -
> -             if (!physid_isset(apicid, phys_cpu_present_map))
> +     for (cpu = 1; kicked < NR_CPUS &&
> +                  cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
> +             if (max_cpus <= cpucount+1)
>                       continue;
> -             if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
> -                     continue;
> -
> -             do_boot_cpu(apicid);
> +             do_boot_cpu(cpu);
>               ++kicked;
>       }
>  
> +#if 0
>       /*
>        * Cleanup possible dangling ends...
>        */
> @@ -833,6 +984,7 @@ static void __init smp_boot_cpus(unsigne
>  
>               *((volatile int *) phys_to_virt(0x467)) = 0;
>       }
> +#endif
>  
>       /*
>        * Allow the user to impress friends.
> @@ -899,6 +1051,7 @@ static void __init smp_boot_cpus(unsigne
>       else
>               nr_ioapics = 0;
>  
> +#if 0
>       setup_boot_APIC_clock();
>  
>       /*
> @@ -906,6 +1059,7 @@ static void __init smp_boot_cpus(unsigne
>        */
>       if (cpu_has_tsc && cpucount)
>               synchronize_tsc_bp();
> +#endif
>  
>   smp_done:
>       time_init_smp();
> @@ -950,9 +1104,36 @@ int __devinit __cpu_up(unsigned int cpu)
>  
>  void __init smp_cpus_done(unsigned int max_cpus)  {
> +#if 0
>  #ifdef CONFIG_X86_IO_APIC
>       setup_ioapic_dest();
>  #endif
>       zap_low_mappings();
> +#endif
>  }
>  
> +extern irqreturn_t smp_reschedule_interrupt(int, void *, 
> struct pt_regs 
> +*); extern irqreturn_t smp_call_function_interrupt(int, void 
> *, struct 
> +pt_regs *);
> +
> +static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, 
> +callfunc_irq); static char resched_name[NR_CPUS][15]; static char 
> +callfunc_name[NR_CPUS][15];
> +
> +void __init smp_intr_init(void)
> +{
> +     int cpu = smp_processor_id();
> +
> +     per_cpu(resched_irq, cpu) =
> +             bind_ipi_on_cpu_to_irq(cpu, RESCHEDULE_VECTOR);
> +     sprintf(resched_name[cpu], "resched%d", cpu);
> +     BUG_ON(request_irq(per_cpu(resched_irq, cpu), 
> smp_reschedule_interrupt,
> +                        SA_INTERRUPT, resched_name[cpu], NULL));
> +
> +     per_cpu(callfunc_irq, cpu) =
> +             bind_ipi_on_cpu_to_irq(cpu, CALL_FUNCTION_VECTOR);
> +     sprintf(callfunc_name[cpu], "callfunc%d", cpu);
> +     BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
> +                        smp_call_function_interrupt,
> +                        SA_INTERRUPT, callfunc_name[cpu], NULL)); }
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/traps.c
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
> @@ -957,6 +957,17 @@ void __init trap_init(void)
>       cpu_init();
>  }
>  
> +void smp_trap_init(trap_info_t *trap_ctxt) {
> +     trap_info_t *t = trap_table;
> +
> +     for (t = trap_table; t->address; t++) {
> +             trap_ctxt[t->vector].flags = t->flags;
> +             trap_ctxt[t->vector].cs = t->cs;
> +             trap_ctxt[t->vector].address = t->address;
> +     }
> +}
> +
>  
>  /* Actual parsing is done early in setup.c. */  static int 
> __init oops_dummy(char *s)
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xe
> n_entry.S
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/xen_entry.S
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entr
> +++ y.S
> @@ -8,11 +8,14 @@
>  #define sizeof_vcpu_shift            3
>  
>  #ifdef CONFIG_SMP
> -#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
> -#define preempt_enable(reg)  decl threadinfo_preempt_count(reg)
> +//#define preempt_disable(reg)       incl 
> threadinfo_preempt_count(reg)
> +//#define preempt_enable(reg)        decl 
> threadinfo_preempt_count(reg)
> +#define preempt_disable(reg)
> +#define preempt_enable(reg)
>  #define XEN_GET_VCPU_INFO(reg)       preempt_disable(%rbp)   
>               ; \
>                               movq %gs:pda_cpunumber,reg      
>       ; \
> -                             shl  $sizeof_vcpu_shift,reg     
>       ; \
> +                             shl  $32, reg                   
>       ; \
> +                             shr  $32-sizeof_vcpu_shift,reg  
>       ; \
>                               addq HYPERVISOR_shared_info,reg
>  #define XEN_PUT_VCPU_INFO(reg)       preempt_enable(%rbp)    
>               ; \
>  #define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c
> +++ xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c
> @@ -20,7 +20,11 @@
>   */
>  
>  atomic_t irq_err_count;
> -
> +#ifdef CONFIG_X86_IO_APIC
> +#ifdef APIC_MISMATCH_DEBUG
> +atomic_t irq_mis_count;
> +#endif
> +#endif
>  
>  /*
>   * Generic, controller-independent functions:
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head.S
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/head.S
> +++ xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head.S
> @@ -41,7 +41,6 @@
>       .code64
>  ENTRY(_start)
>          cld                
> -     movq init_rsp(%rip),%rsp
>       /* Copy the necessary stuff from xen_start_info structure. */
>       movq  $xen_start_info_union,%rdi
>       movq  $64,%rcx          /* sizeof (union 
> xen_start_info_union) / sizeof (long) */
> @@ -52,6 +51,7 @@ ENTRY(_start)
>       cld
>  #endif /* CONFIG_SMP */
>  
> +     movq init_rsp(%rip),%rsp
>       /* zero EFLAGS after setting rsp */
>       pushq $0
>       popfq
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/setup.c
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c
> @@ -601,6 +601,17 @@ static void __init print_memory_map(char
>          }
>  }
>  
> +void __init smp_alloc_memory(void)
> +{
> +     int cpu;
> +
> +     for (cpu = 1; cpu < NR_CPUS; cpu++) {
> +             cpu_gdt_descr[cpu].address = (unsigned long)
> +                     alloc_bootmem_low_pages(PAGE_SIZE);
> +             /* XXX free unused pages later */
> +     }
> +}
> +
>  void __init setup_arch(char **cmdline_p)  {
>       unsigned long low_mem_size;
> @@ -742,6 +753,9 @@ void __init setup_arch(char **cmdline_p)
>               }
>       }
>  #endif
> +#ifdef CONFIG_SMP
> +     smp_alloc_memory();
> +#endif
>       paging_init();
>  #ifdef CONFIG_X86_LOCAL_APIC
>       /*
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup64.c
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/setup64.c
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup64.
> +++ c
> @@ -264,13 +264,13 @@ void __init cpu_init (void)
>        * Initialize the per-CPU GDT with the boot GDT,
>        * and set up the GDT descriptor:
>        */
> +#if 0
>       if (cpu) {
>               memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
>       }       
>  
>       cpu_gdt_descr[cpu].size = GDT_SIZE;
>       cpu_gdt_descr[cpu].address = (unsigned 
> long)cpu_gdt_table[cpu]; -#if 0
>       asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
>       asm volatile("lidt %0" :: "m" (idt_descr));  #endif
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/smp.h
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/include/asm-xen/asm-
> x86_64/smp.h
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/smp.
> +++ h
> @@ -77,7 +77,7 @@ extern __inline int hard_smp_processor_i  }  #endif
>  
> -#define safe_smp_processor_id() (disable_apic ? 0 : 
> x86_apicid_to_cpu(hard_smp_processor_id()))
> +#define safe_smp_processor_id() (disable_apic ? 0 : 
> smp_processor_id())
>  
>  #endif /* !ASSEMBLY */
>  
> Index: xen-unstable/linux-2.6.11-xen0/arch/x86_64/kernel/genapic.c
> ===================================================================
> --- xen-unstable.orig/linux-2.6.11-xen0/arch/x86_64/kernel/genapic.c
> +++ xen-unstable/linux-2.6.11-xen0/arch/x86_64/kernel/genapic.c
> @@ -27,9 +27,9 @@ u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0
>  
>  extern struct genapic apic_cluster;
>  extern struct genapic apic_flat;
> +extern struct genapic apic_xen;
>  
> -struct genapic *genapic = &apic_flat;
> -
> +struct genapic *genapic = &apic_xen;
>  
>  /*
>   * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
> @@ -78,12 +78,16 @@ void __init clustered_apic_check(void)
>               genapic = &apic_cluster;
>  
>  print:
> +     /* hardcode to xen apic functions */
> +     genapic = &apic_xen;
>       printk(KERN_INFO "Setting APIC routing to %s\n", 
> genapic->name);  }
>  
>  /* Same for both flat and clustered. */
>  
> +extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, 
> +unsigned int dest);
> +
>  void send_IPI_self(int vector)
>  {
> -     __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
> +     xen_send_IPI_shortcut(APIC_DEST_SELF, vector, 
> APIC_DEST_PHYSICAL);
>  }
> Index: 
> xen-unstable/linux-2.6.11-xen0/arch/xen/x86_64/kernel/genapic_xen.c
> ===================================================================
> --- /dev/null
> +++ 
> xen-unstable/linux-2.6.11-xen0/arch/xen/x86_64/kernel/genapic_xen.c
> @@ -0,0 +1,149 @@
> +/*
> + * Copyright 2004 James Cleverdon, IBM.
> + * Subject to the GNU Public License, v.2
> + *
> + * Xen APIC subarch code.  Maximum 8 CPUs, logical delivery.
> + *
> + * Hacked for x86-64 by James Cleverdon from i386 
> architecture code by
> + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
> + * James Cleverdon.
> + *
> + * Hacked to pieces for Xen by Chris Wright.
> + */
> +#include <linux/config.h>
> +#include <linux/threads.h>
> +#include <linux/cpumask.h>
> +#include <linux/string.h>
> +#include <linux/kernel.h>
> +#include <linux/ctype.h>
> +#include <linux/init.h>
> +#include <asm/smp.h>
> +#include <asm/ipi.h>
> +#include <asm-xen/evtchn.h>
> +
> +DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
> +
> +static inline void __send_IPI_one(unsigned int cpu, int vector) {
> +     unsigned int evtchn;
> +
> +     evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
> +     if (evtchn)
> +             notify_via_evtchn(evtchn);
> +     else
> +             printk("send_IPI to unbound port %d/%d", cpu, vector);
> +
> +}
> +
> +void xen_send_IPI_shortcut(unsigned int shortcut, int 
> vector, unsigned 
> +int dest) {
> +     int cpu;
> +
> +     switch (shortcut) {
> +     case APIC_DEST_SELF:
> +             __send_IPI_one(smp_processor_id(), vector);
> +             break;
> +     case APIC_DEST_ALLBUT:
> +             for (cpu = 0; cpu < NR_CPUS; ++cpu) {
> +                     if (cpu == smp_processor_id())
> +                             continue;
> +                     if (cpu_isset(cpu, cpu_online_map)) {
> +                             __send_IPI_one(cpu, vector);
> +                     }
> +             }
> +             break;
> +     case APIC_DEST_ALLINC:
> +             for (cpu = 0; cpu < NR_CPUS; ++cpu) {
> +                     if (cpu_isset(cpu, cpu_online_map)) {
> +                             __send_IPI_one(cpu, vector);
> +                     }
> +             }
> +             break;
> +     default:
> +             printk("XXXXXX __send_IPI_shortcut %08x vector 
> %d\n", shortcut,
> +                    vector);
> +             break;
> +     }
> +
> +}
> +
> +static cpumask_t xen_target_cpus(void)
> +{
> +     return cpu_online_map;
> +}
> +
> +/*
> + * Set up the logical destination ID.
> + * Do nothing, not called now.
> + */
> +static void xen_init_apic_ldr(void)
> +{
> +     return;
> +}
> +
> +static void xen_send_IPI_allbutself(int vector) {
> +     /*
> +      * if there are no other CPUs in the system then
> +      * we get an APIC send error if we try to broadcast.
> +      * thus we have to avoid sending IPIs in this case.
> +      */
> +     if (num_online_cpus() > 1)
> +             xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, 
> APIC_DEST_LOGICAL); }
> +
> +static void xen_send_IPI_all(int vector) {
> +     xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, 
> APIC_DEST_LOGICAL); }
> +
> +static void xen_send_IPI_mask(cpumask_t cpumask, int vector) {
> +     unsigned long mask = cpus_addr(cpumask)[0];
> +     unsigned int cpu;
> +     unsigned long flags;
> +
> +     local_irq_save(flags);
> +     WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
> +
> +     for (cpu = 0; cpu < NR_CPUS; ++cpu) {
> +             if (cpu_isset(cpu, cpumask)) {
> +                     __send_IPI_one(cpu, vector);
> +             }
> +     }
> +     local_irq_restore(flags);
> +}
> +
> +static int xen_apic_id_registered(void) {
> +     /* better be set */
> +     return physid_isset(smp_processor_id(), phys_cpu_present_map); }
> +
> +static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask) {
> +     return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; }
> +
> +static unsigned int phys_pkg_id(int index_msb) {
> +     u32 ebx;
> +
> +     ebx = cpuid_ebx(1);
> +     return ((ebx >> 24) & 0xFF) >> index_msb; }
> +
> +struct genapic apic_xen =  {
> +     .name = "xen",
> +     .int_delivery_mode = dest_LowestPrio,
> +     .int_dest_mode = (APIC_DEST_LOGICAL != 0),
> +     .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
> +     .target_cpus = xen_target_cpus,
> +     .apic_id_registered = xen_apic_id_registered,
> +     .init_apic_ldr = xen_init_apic_ldr,
> +     .send_IPI_all = xen_send_IPI_all,
> +     .send_IPI_allbutself = xen_send_IPI_allbutself,
> +     .send_IPI_mask = xen_send_IPI_mask,
> +     .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
> +     .phys_pkg_id = phys_pkg_id,
> +};
> Index: 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
> ===================================================================
> --- 
> xen-unstable.orig/linux-2.6.11-xen-sparse/arch/xen/x86_64/kern
> el/mpparse.c
> +++ 
> xen-unstable/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.
> +++ c
> @@ -104,7 +104,8 @@ static int __init mpf_checksum(unsigned 
>       return sum & 0xFF;
>  }
>  
> -#ifndef CONFIG_XEN
> +//#ifndef CONFIG_XEN
> +#if 1
>  static void __init MP_processor_info (struct 
> mpc_config_processor *m)  {
>       int ver;
> @@ -693,7 +694,8 @@ void __init mp_register_lapic (
>       if (id == boot_cpu_physical_apicid)
>               boot_cpu = 1;
>  
> -#ifndef CONFIG_XEN
> +//#ifndef CONFIG_XEN
> +#if 1
>       processor.mpc_type = MP_PROCESSOR;
>       processor.mpc_apicid = id;
>       processor.mpc_apicver = 0x10; /* TBD: lapic version */
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.