[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] apci issue
> On Thu, Apr 21, 2005 at 03:26:53PM +0100, Keir Fraser wrote: > > > > On 21 Apr 2005, at 15:03, Gerd Knorr wrote: > > > > >On Thu, Apr 21, 2005 at 02:15:06PM +0100, Ian Pratt wrote: > > >> > > >>>Updated to current bk today, and my machine stopped booting ... > > >>>I've tracked it down to the apic changes from end of last week. > > >>>cset 1.1307 boots fine, 1.1308 doesn't. > > > > Try changing the following line in xen/arch/x86/apic.c (in > > setup_local_APIC): > > if (!smp_processor_id() && (pic_mode || !value)) { > > To: > > if (!smp_processor_id()) { > > Yep, that fixes it. > > > My guess is that your IRQs are routed through the legacy PIC > > It's indeed a old machine with a classic PIC, no IO-APIC yet. Excellent. I think there may be a better patch though. Please can you try replacing arch/x86/apic.c and arch/x86/setup.c with the versions attached to this email? With luck this will both fix the problem and bring us a little closer to the Linux codebase (probably not a bad thing with all this fragile bootstrap code). Cheers, Keir /* * based on linux-2.6.10/arch/i386/kernel/apic.c * * Local APIC handling, local APIC timers * * (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx> * * Fixes * Maciej W. Rozycki : Bits for genuine 82489DX APICs; * thanks to Eric Gilmore * and Rolf G. Tews * for testing these extensively. * Maciej W. Rozycki : Various updates and fixes. * Mikael Pettersson : Power Management for UP-APIC. * Pavel Machek and * Mikael Pettersson : PM converted to driver model. */ #include <xen/config.h> #include <xen/perfc.h> #include <xen/errno.h> #include <xen/init.h> #include <xen/mm.h> #include <xen/sched.h> #include <xen/irq.h> #include <xen/delay.h> #include <xen/smp.h> #include <xen/softirq.h> #include <asm/mc146818rtc.h> #include <asm/msr.h> #include <asm/atomic.h> #include <asm/mpspec.h> #include <asm/flushtlb.h> #include <asm/hardirq.h> #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/mach_apic.h> #include <asm/io_ports.h> /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; static int enabled_via_apicbase; int get_maxlvt(void) { unsigned int v, ver, maxlvt; v = apic_read(APIC_LVR); ver = GET_APIC_VERSION(v); /* 82489DXs do not report # of LVT entries. */ maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; return maxlvt; } void clear_local_APIC(void) { int maxlvt; unsigned long v; maxlvt = get_maxlvt(); /* * Masking an LVT entry on a P6 can trigger a local APIC error * if the vector is zero. Mask LVTERR first to prevent this. */ if (maxlvt >= 3) { v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); } /* * Careful: we have to set masks only first to deassert * any level-triggered sources. */ v = apic_read(APIC_LVTT); apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT0); apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT1); apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); if (maxlvt >= 4) { v = apic_read(APIC_LVTPC); apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); } /* * Clean APIC state for other OSs: */ apic_write_around(APIC_LVTT, APIC_LVT_MASKED); apic_write_around(APIC_LVT0, APIC_LVT_MASKED); apic_write_around(APIC_LVT1, APIC_LVT_MASKED); if (maxlvt >= 3) apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); v = GET_APIC_VERSION(apic_read(APIC_LVR)); if (APIC_INTEGRATED(v)) { /* !82489DX */ if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } } void __init connect_bsp_APIC(void) { if (pic_mode) { /* * Do not trust the local APIC being empty at bootup. */ clear_local_APIC(); /* * PIC mode, enable APIC mode in the IMCR, i.e. * connect BSP's local APIC to INT and NMI lines. */ printk("leaving PIC mode, enabling APIC mode.\n"); outb(0x70, 0x22); outb(0x01, 0x23); } } void disconnect_bsp_APIC(void) { if (pic_mode) { /* * Put the board back into PIC mode (has an effect * only on certain older boards). Note that APIC * interrupts, including IPIs, won't work beyond * this point! The only exception are INIT IPIs. */ printk("disabling APIC mode, entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); } } void disable_local_APIC(void) { unsigned long value; clear_local_APIC(); /* * Disable APIC (implies clearing of registers * for 82489DX!). */ value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write_around(APIC_SPIV, value); if (enabled_via_apicbase) { unsigned int l, h; rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_ENABLE; wrmsr(MSR_IA32_APICBASE, l, h); } } /* * This is to verify that we're looking at a real local APIC. * Check these against your board if the CPUs aren't getting * started for no apparent reason. */ int __init verify_local_APIC(void) { unsigned int reg0, reg1; /* * The version register is read-only in a real APIC. */ reg0 = apic_read(APIC_LVR); Dprintk("Getting VERSION: %x\n", reg0); apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); reg1 = apic_read(APIC_LVR); Dprintk("Getting VERSION: %x\n", reg1); /* * The two version reads above should print the same * numbers. If the second one is different, then we * poke at a non-APIC. */ if (reg1 != reg0) return 0; /* * Check if the version looks reasonably. */ reg1 = GET_APIC_VERSION(reg0); if (reg1 == 0x00 || reg1 == 0xff) return 0; reg1 = get_maxlvt(); if (reg1 < 0x02 || reg1 == 0xff) return 0; /* * The ID register is read/write in a real APIC. */ reg0 = apic_read(APIC_ID); Dprintk("Getting ID: %x\n", reg0); /* * The next two are just to see if we have sane values. * They're only really relevant if we're in Virtual Wire * compatibility mode, but most boxes are anymore. */ reg0 = apic_read(APIC_LVT0); Dprintk("Getting LVT0: %x\n", reg0); reg1 = apic_read(APIC_LVT1); Dprintk("Getting LVT1: %x\n", reg1); return 1; } void __init sync_Arb_IDs(void) { /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); if (ver >= 0x14) /* P4 or higher */ return; /* * Wait for idle. */ apic_wait_icr_idle(); Dprintk("Synchronizing Arb IDs.\n"); apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } extern void __error_in_apic_c (void); void __init init_bsp_APIC(void) { unsigned long value, ver; /* * Don't do the setup now if we have a SMP BIOS as the through-I/O-APIC * virtual wire mode might be active. */ if (smp_found_config || !cpu_has_apic) return; value = apic_read(APIC_LVR); ver = GET_APIC_VERSION(value); /* * Do not trust the local APIC being empty at bootup. */ clear_local_APIC(); /* * Enable APIC. */ value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; /* This bit is reserved on P4/Xeon and should be cleared */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) value &= ~APIC_SPIV_FOCUS_DISABLED; else value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write_around(APIC_SPIV, value); /* * Set up the virtual wire mode. */ apic_write_around(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; if (!APIC_INTEGRATED(ver)) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); } void __init setup_local_APIC (void) { unsigned long oldvalue, value, ver, maxlvt; /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (esr_disable) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); } value = apic_read(APIC_LVR); ver = GET_APIC_VERSION(value); if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) __error_in_apic_c(); /* * Double-check whether this APIC is really registered. */ if (!apic_id_registered()) BUG(); /* * Intel recommends to set DFR, LDR and TPR before enabling * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ init_apic_ldr(); /* * Set Task Priority to 'accept all'. We never change this * later on. */ value = apic_read(APIC_TASKPRI); value &= ~APIC_TPRI_MASK; apic_write_around(APIC_TASKPRI, value); /* * Now that we are all set up, enable the APIC */ value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; /* * Enable APIC */ value |= APIC_SPIV_APIC_ENABLED; /* * Some unknown Intel IO/APIC (or APIC) errata is biting us with * certain networking cards. If high frequency interrupts are * happening on a particular IOAPIC pin, plus the IOAPIC routing * entry is masked/unmasked at a high rate as well then sooner or * later IOAPIC line gets 'stuck', no more interrupts are received * from the device. If focus CPU is disabled then the hang goes * away, oh well :-( * * [ This bug can be reproduced easily with a level-triggered * PCI Ne2000 networking cards and PII/PIII processors, dual * BX chipset. ] */ /* * Actually disabling the focus CPU check just makes the hang less * frequent as it makes the interrupt distributon model be more * like LRU than MRU (the short-term load is more even across CPUs). * See also the comment in end_level_ioapic_irq(). --macro */ #if 1 /* Enable focus processor (bit==0) */ value &= ~APIC_SPIV_FOCUS_DISABLED; #else /* Disable focus processor (bit==1) */ value |= APIC_SPIV_FOCUS_DISABLED; #endif /* * Set spurious IRQ vector */ value |= SPURIOUS_APIC_VECTOR; apic_write_around(APIC_SPIV, value); /* * Set up LVT0, LVT1: * * set up through-local-APIC on the BP's LINT0. This is not * strictly necessery in pure symmetric-IO mode, but sometimes * we delegate interrupts to the 8259A. */ /* * TODO: set up through-local-APIC from through-I/O-APIC? --macro */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; if (!smp_processor_id() && (pic_mode || !value)) { value = APIC_DM_EXTINT; printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; printk("masked ExtINT on CPU#%d\n", smp_processor_id()); } apic_write_around(APIC_LVT0, value); /* * only the BP should see the LINT1 NMI signal, obviously. */ if (!smp_processor_id()) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; if (!APIC_INTEGRATED(ver)) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ maxlvt = get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); oldvalue = apic_read(APIC_ESR); value = ERROR_APIC_VECTOR; // enables sending errors apic_write_around(APIC_LVTERR, value); /* * spec says clear errors after enabling vector. */ if (maxlvt > 3) apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); if (value != oldvalue) printk("ESR value before enabling vector: 0x%08lx " "after: 0x%08lx\n", oldvalue, value); } else { if (esr_disable) /* * Something untraceble is creating bad interrupts on * secondary quads ... for the moment, just leave the * ESR disabled - we can't do anything useful with the * errors anyway - mbligh */ printk("Leaving ESR disabled.\n"); else printk("No ESR for 82489DX.\n"); } if (nmi_watchdog == NMI_LOCAL_APIC) setup_apic_nmi_watchdog(); } /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. */ static int __init detect_init_APIC (void) { u32 h, l, features; extern void get_cpu_vendor(struct cpuinfo_x86*); /* Workaround for us being called before identify_cpu(). */ get_cpu_vendor(&boot_cpu_data); switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || (boot_cpu_data.x86 == 15)) break; goto no_apic; case X86_VENDOR_INTEL: if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || (boot_cpu_data.x86 == 5 && cpu_has_apic)) break; goto no_apic; default: goto no_apic; } if (!cpu_has_apic) { /* * Some BIOSes disable the local APIC in the * APIC_BASE MSR. This can only be done in * software for Intel P6 or later and AMD K7 * (Model > 1) or later. */ rdmsr(MSR_IA32_APICBASE, l, h); if (!(l & MSR_IA32_APICBASE_ENABLE)) { printk("Local APIC disabled by BIOS -- reenabling.\n"); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; wrmsr(MSR_IA32_APICBASE, l, h); enabled_via_apicbase = 1; } } /* The APIC feature bit should now be enabled in `cpuid' */ features = cpuid_edx(1); if (!(features & (1 << X86_FEATURE_APIC))) { printk("Could not enable APIC!\n"); return -1; } set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; /* The BIOS may have set up the APIC at some other address */ rdmsr(MSR_IA32_APICBASE, l, h); if (l & MSR_IA32_APICBASE_ENABLE) mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; if (nmi_watchdog != NMI_NONE) nmi_watchdog = NMI_LOCAL_APIC; printk("Found and enabled local APIC!\n"); return 0; no_apic: printk("No local APIC present or hardware disabled\n"); return -1; } void __init init_apic_mappings(void) { unsigned long apic_phys; /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another * one for the IO-APIC. */ if (!smp_found_config && detect_init_APIC()) { apic_phys = alloc_xenheap_page(); apic_phys = __pa(apic_phys); } else apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); #ifdef CONFIG_X86_IO_APIC { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; if (!ioapic_phys) { printk(KERN_ERR "WARNING: bogus zero IO-APIC " "address found in MPTABLE, " "disabling IO/APIC support!\n"); smp_found_config = 0; skip_ioapic_setup = 1; goto fake_ioapic_page; } } else { fake_ioapic_page: ioapic_phys = alloc_xenheap_page(); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); Dprintk("mapped IOAPIC to %08lx (%08lx)\n", fix_to_virt(idx), ioapic_phys); idx++; } } #endif } /***************************************************************************** * APIC calibration * * The APIC is programmed in bus cycles. * Timeout values should specified in real time units. * The "cheapest" time source is the cyclecounter. * * Thus, we need a mappings from: bus cycles <- cycle counter <- system time * * The calibration is currently a bit shoddy since it requires the external * timer chip to generate periodic timer interupts. *****************************************************************************/ /* used for system time scaling */ static unsigned long bus_freq; /* KAF: pointer-size avoids compile warns. */ static u32 bus_cycle; /* length of one bus cycle in pico-seconds */ static u32 bus_scale; /* scaling factor convert ns to bus cycles */ /* * The timer chip is already set up at HZ interrupts per second here, * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. */ static unsigned int __init get_8254_timer_count(void) { /*extern spinlock_t i8253_lock;*/ /*unsigned long flags;*/ unsigned int count; /*spin_lock_irqsave(&i8253_lock, flags);*/ outb_p(0x00, PIT_MODE); count = inb_p(PIT_CH0); count |= inb_p(PIT_CH0) << 8; /*spin_unlock_irqrestore(&i8253_lock, flags);*/ return count; } /* next tick in 8254 can be caught by catching timer wraparound */ static void __init wait_8254_wraparound(void) { unsigned int curr_count, prev_count=~0; int delta; curr_count = get_8254_timer_count(); do { prev_count = curr_count; curr_count = get_8254_timer_count(); delta = curr_count-prev_count; /* * This limit for delta seems arbitrary, but it isn't, it's slightly * above the level of error a buggy Mercury/Neptune chipset timer can * cause. */ } while (delta < 300); } /* * Default initialization for 8254 timers. If we use other timers like HPET, * we override this later */ void (*wait_timer_tick)(void) = wait_8254_wraparound; /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call * this function with a very large value and read the current time after * a well defined period of time as expired. * * Calibration is only performed once, for CPU0! * * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ #define APIC_DIVISOR 1 static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; ver = GET_APIC_VERSION(apic_read(APIC_LVR)); /* NB. Xen uses local APIC timer in one-shot mode. */ lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR; if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); apic_write_around(APIC_LVTT, lvtt_value); tmp_value = apic_read(APIC_TDCR); apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1)); apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } /* * this is done for every CPU from setup_APIC_clocks() below. * We setup each local APIC with a zero timeout value for now. * Unlike Linux, we don't have to wait for slices etc. */ void setup_APIC_timer(void * data) { unsigned long flags; __save_flags(flags); __sti(); __setup_APIC_LVTT(0); __restore_flags(flags); } /* * In this function we calibrate APIC bus clocks to the external timer. * * As a result we have the Bus Speed and CPU speed in Hz. * * We want to do the calibration only once (for CPU0). CPUs connected by the * same APIC bus have the very same bus frequency. * * This bit is a bit shoddy since we use the very same periodic timer interrupt * we try to eliminate to calibrate the APIC. */ int __init calibrate_APIC_clock(void) { unsigned long long t1 = 0, t2 = 0; long tt1, tt2; long result; int i; const int LOOPS = HZ/10; printk("Calibrating APIC timer for CPU%d...\n", smp_processor_id()); /* * Put whatever arbitrary (but long enough) timeout * value into the APIC clock, we just want to get the * counter running for calibration. */ __setup_APIC_LVTT(1000000000); /* * The timer chip counts down to zero. Let's wait * for a wraparound to start exact measurement: * (the current tick might have been already half done) */ wait_timer_tick(); /* * We wrapped around just now. Let's start: */ if (cpu_has_tsc) rdtscll(t1); tt1 = apic_read(APIC_TMCCT); /* * Let's wait LOOPS wraprounds: */ for (i = 0; i < LOOPS; i++) wait_timer_tick(); tt2 = apic_read(APIC_TMCCT); if (cpu_has_tsc) rdtscll(t2); /* * The APIC bus clock counter is 32 bits only, it * might have overflown, but note that we use signed * longs, thus no extra care needed. * [underflown to be exact, as the timer counts down ;)] */ result = (tt1-tt2)*APIC_DIVISOR/LOOPS; if (cpu_has_tsc) printk("..... CPU clock speed is %ld.%04ld MHz.\n", ((long)(t2-t1)/LOOPS)/(1000000/HZ), ((long)(t2-t1)/LOOPS)%(1000000/HZ)); printk("..... host bus clock speed is %ld.%04ld MHz.\n", result/(1000000/HZ), result%(1000000/HZ)); /* set up multipliers for accurate timer code */ bus_freq = result*HZ; bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */ bus_scale = (1000*262144)/bus_cycle; printk("..... bus_scale = 0x%08X\n", bus_scale); /* reset APIC to zero timeout value */ __setup_APIC_LVTT(0); return result; } /* * initialise the APIC timers for all CPUs * we start with the first and find out processor frequency and bus speed */ void __init setup_APIC_clocks (void) { printk("Using local APIC timer interrupts.\n"); using_apic_timer = 1; __cli(); /* calibrate CPU0 for CPU speed and BUS speed */ bus_freq = calibrate_APIC_clock(); /* Now set up the timer for real. */ setup_APIC_timer((void *)bus_freq); __sti(); /* and update all other cpus */ smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1); } #undef APIC_DIVISOR /* * reprogram the APIC timer. Timeoutvalue is in ns from start of boot * returns 1 on success * returns 0 if the timeout value is too small or in the past. */ int reprogram_ac_timer(s_time_t timeout) { s_time_t now; s_time_t expire; u64 apic_tmict; /* * We use this value because we don't trust zero (we think it may just * cause an immediate interrupt). At least this is guaranteed to hold it * off for ages (esp. since the clock ticks on bus clock, not cpu clock!). */ if ( timeout == 0 ) { apic_tmict = 0xffffffff; goto reprogram; } now = NOW(); expire = timeout - now; /* value from now */ if ( expire <= 0 ) { Dprintk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n", smp_processor_id(), (u32)(now>>32), (u32)now, (u32)(timeout>>32),(u32)timeout); return 0; } /* * If we don't have local APIC then we just poll the timer list off the * PIT interrupt. Cheesy but good enough to work on eg. VMware :-) */ if ( !cpu_has_apic ) return 1; /* conversion to bus units */ apic_tmict = (((u64)bus_scale) * expire)>>18; if ( apic_tmict >= 0xffffffff ) { Dprintk("APICT[%02d] Timeout value too large\n", smp_processor_id()); apic_tmict = 0xffffffff; } if ( apic_tmict == 0 ) { Dprintk("APICT[%02d] timeout value too small\n", smp_processor_id()); return 0; } reprogram: /* Program the timer. */ apic_write(APIC_TMICT, (unsigned long)apic_tmict); return 1; } void smp_apic_timer_interrupt(struct xen_regs * regs) { ack_APIC_irq(); perfc_incrc(apic_timer); raise_softirq(AC_TIMER_SOFTIRQ); } /* * This interrupt should _never_ happen with our APIC/SMP architecture */ asmlinkage void smp_spurious_interrupt(struct xen_regs *regs) { unsigned long v; /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... * Spurious interrupts should not be ACKed. */ v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk("spurious APIC interrupt on CPU#%d, should never happen.\n", smp_processor_id()); } /* * This interrupt should never happen with our APIC/SMP architecture */ asmlinkage void smp_error_interrupt(struct xen_regs *regs) { unsigned long v, v1; /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); v1 = apic_read(APIC_ESR); ack_APIC_irq(); atomic_inc(&irq_err_count); /* Here is what the APIC error bits mean: 0: Send CS error 1: Receive CS error 2: Send accept error 3: Receive accept error 4: Reserved 5: Send illegal vector 6: Received illegal vector 7: Illegal register address */ printk("APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v, v1); } /* * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ int __init APIC_init_uniprocessor (void) { if (!smp_found_config && !cpu_has_apic) return -1; /* * Complain if the BIOS pretends there is one. */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { printk("BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); return -1; } verify_local_APIC(); connect_bsp_APIC(); #ifdef CONFIG_SMP cpu_online_map = 1; #endif phys_cpu_present_map = 1; apic_write_around(APIC_ID, boot_cpu_physical_apicid); setup_local_APIC(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config) if (!skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); #endif setup_APIC_clocks(); return 0; } #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> #include <xen/sched.h> #include <xen/pci.h> #include <xen/serial.h> #include <xen/softirq.h> #include <xen/acpi.h> #include <xen/console.h> #include <xen/serial.h> #include <xen/trace.h> #include <xen/multiboot.h> #include <asm/bitops.h> #include <asm/smp.h> #include <asm/processor.h> #include <asm/mpspec.h> #include <asm/apic.h> #include <asm/desc.h> #include <asm/domain_page.h> #include <asm/shadow.h> #include <asm/e820.h> /* * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the * pfn_info table and allocation bitmap. */ static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB; #if defined(__x86_64__) integer_param("xenheap_megabytes", opt_xenheap_megabytes); #endif /* opt_noht: If true, Hyperthreading is ignored. */ int opt_noht = 0; boolean_param("noht", opt_noht); /* opt_noacpi: If true, ACPI tables are not parsed. */ static int opt_noacpi = 0; boolean_param("noacpi", opt_noacpi); /* opt_nosmp: If true, secondary processors are ignored. */ static int opt_nosmp = 0; boolean_param("nosmp", opt_nosmp); /* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */ /* NB. This flag implies 'nosmp' and 'noacpi'. */ static int opt_ignorebiostables = 0; boolean_param("ignorebiostables", opt_ignorebiostables); /* opt_watchdog: If true, run a watchdog NMI on each processor. */ static int opt_watchdog = 0; boolean_param("watchdog", opt_watchdog); int early_boot = 1; unsigned long xenheap_phys_end; extern void arch_init_memory(void); extern void init_IRQ(void); extern void trap_init(void); extern void time_init(void); extern void ac_timer_init(void); extern void initialize_keytable(); extern int do_timer_lists_from_pit; char ignore_irq13; /* set if exception 16 works */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 }; #if defined(__x86_64__) unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE; #else unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE; #endif EXPORT_SYMBOL(mmu_cr4_features); unsigned long wait_init_idle; struct exec_domain *idle_task[NR_CPUS] = { &idle0_exec_domain }; #ifdef CONFIG_ACPI_INTERPRETER int acpi_disabled = 0; #else int acpi_disabled = 1; #endif EXPORT_SYMBOL(acpi_disabled); int phys_proc_id[NR_CPUS]; int logical_proc_id[NR_CPUS]; /* Standard macro to see if a specific flag is changeable. */ static inline int flag_is_changeable_p(unsigned long flag) { unsigned long f1, f2; asm("pushf\n\t" "pushf\n\t" "pop %0\n\t" "mov %0,%1\n\t" "xor %2,%0\n\t" "push %0\n\t" "popf\n\t" "pushf\n\t" "pop %0\n\t" "popf\n\t" : "=&r" (f1), "=&r" (f2) : "ir" (flag)); return ((f1^f2) & flag) != 0; } /* Probe for the CPUID instruction */ static int __init have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } void __init get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; if (!strcmp(v, "GenuineIntel")) c->x86_vendor = X86_VENDOR_INTEL; else if (!strcmp(v, "AuthenticAMD")) c->x86_vendor = X86_VENDOR_AMD; else if (!strcmp(v, "CyrixInstead")) c->x86_vendor = X86_VENDOR_CYRIX; else if (!strcmp(v, "UMC UMC UMC ")) c->x86_vendor = X86_VENDOR_UMC; else if (!strcmp(v, "CentaurHauls")) c->x86_vendor = X86_VENDOR_CENTAUR; else if (!strcmp(v, "NexGenDriven")) c->x86_vendor = X86_VENDOR_NEXGEN; else if (!strcmp(v, "RiseRiseRise")) c->x86_vendor = X86_VENDOR_RISE; else if (!strcmp(v, "GenuineTMx86") || !strcmp(v, "TransmetaCPU")) c->x86_vendor = X86_VENDOR_TRANSMETA; else c->x86_vendor = X86_VENDOR_UNKNOWN; } static void __init init_intel(struct cpuinfo_x86 *c) { /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) clear_bit(X86_FEATURE_SEP, &c->x86_capability); #ifdef CONFIG_SMP if ( test_bit(X86_FEATURE_HT, &c->x86_capability) ) { u32 eax, ebx, ecx, edx; int initial_apic_id, siblings, cpu = smp_processor_id(); cpuid(1, &eax, &ebx, &ecx, &edx); ht_per_core = siblings = (ebx & 0xff0000) >> 16; if ( opt_noht ) clear_bit(X86_FEATURE_HT, &c->x86_capability[0]); if ( siblings <= 1 ) { printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu); } else if ( siblings > 2 ) { panic("We don't support more than two logical CPUs per package!"); } else { initial_apic_id = ebx >> 24 & 0xff; phys_proc_id[cpu] = initial_apic_id >> 1; logical_proc_id[cpu] = initial_apic_id & 1; printk(KERN_INFO "CPU#%d: Physical ID: %d, Logical ID: %d\n", cpu, phys_proc_id[cpu], logical_proc_id[cpu]); } } #endif #ifdef CONFIG_VMX start_vmx(); #endif } static void __init init_amd(struct cpuinfo_x86 *c) { /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_bit(0*32+31, &c->x86_capability); switch(c->x86) { case 5: panic("AMD K6 is not supported.\n"); case 6: /* An Athlon/Duron. We can trust the BIOS probably */ break; } } /* * This does the hard work of actually picking apart the CPU stuff... */ void __init identify_cpu(struct cpuinfo_x86 *c) { int i, cpu = smp_processor_id(); u32 xlvl, tfms, junk; phys_proc_id[cpu] = cpu; logical_proc_id[cpu] = 0; c->x86_vendor = X86_VENDOR_UNKNOWN; c->cpuid_level = -1; /* CPUID not detected */ c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ memset(&c->x86_capability, 0, sizeof c->x86_capability); if ( !have_cpuid_p() ) panic("Ancient processors not supported\n"); /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, (unsigned int *)&c->x86_vendor_id[0], (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); get_cpu_vendor(c); if ( c->cpuid_level == 0 ) panic("Decrepit CPUID not supported\n"); cpuid(0x00000001, &tfms, &junk, &junk, &c->x86_capability[0]); c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; c->x86_mask = tfms & 15; /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); if ( (xlvl & 0xffff0000) == 0x80000000 ) { if ( xlvl >= 0x80000001 ) c->x86_capability[1] = cpuid_edx(0x80000001); } /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); if ( (xlvl & 0xffff0000) == 0x80860000 ) { if ( xlvl >= 0x80860001 ) c->x86_capability[2] = cpuid_edx(0x80860001); } printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", smp_processor_id(), c->x86_capability[0], c->x86_capability[1], c->x86_capability[2], c->x86_vendor); switch ( c->x86_vendor ) { case X86_VENDOR_INTEL: init_intel(c); break; case X86_VENDOR_AMD: init_amd(c); break; case X86_VENDOR_UNKNOWN: /* Connectix Virtual PC reports this */ break; case X86_VENDOR_CENTAUR: break; default: printk("Unknown CPU identifier (%d): continuing anyway, " "but might fail.\n", c->x86_vendor); } printk("CPU caps: %08x %08x %08x %08x\n", c->x86_capability[0], c->x86_capability[1], c->x86_capability[2], c->x86_capability[3]); /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are * common between the CPUs. The first time this routine gets * executed, c == &boot_cpu_data. */ if ( c != &boot_cpu_data ) { /* AND the already accumulated flags with these */ for ( i = 0 ; i < NCAPINTS ; i++ ) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } } unsigned long cpu_initialized; void __init cpu_init(void) { int nr = smp_processor_id(); struct tss_struct *t = &init_tss[nr]; if ( test_and_set_bit(nr, &cpu_initialized) ) panic("CPU#%d already initialized!!!\n", nr); printk("Initializing CPU#%d\n", nr); SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES); SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS); __asm__ __volatile__ ( "lgdt %0" : "=m" (*current->arch.gdt) ); /* No nested task. */ __asm__ __volatile__ ( "pushf ; andw $0xbfff,(%"__OP"sp) ; popf" ); /* Ensure FPU gets initialised for each domain. */ stts(); /* Set up and load the per-CPU TSS and LDT. */ t->bitmap = IOBMP_INVALID_OFFSET; #if defined(__i386__) t->ss0 = __HYPERVISOR_DS; t->esp0 = get_stack_bottom(); #elif defined(__x86_64__) t->rsp0 = get_stack_bottom(); #endif set_tss_desc(nr,t); load_TR(nr); __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) ); /* Clear all 6 debug registers. */ #define CD(register) __asm__ ( "mov %0,%%db" #register : : "r" (0UL) ); CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); #undef CD /* Install correct page table. */ write_ptbase(current); init_idle_task(); } static void __init do_initcalls(void) { initcall_t *call; for ( call = &__initcall_start; call < &__initcall_end; call++ ) (*call)(); } unsigned long pci_mem_start = 0x10000000; static void __init start_of_day(void) { unsigned long low_mem_size; #ifdef MEMORY_GUARD /* Unmap the first page of CPU0's stack. */ extern unsigned long cpu0_stack[]; memguard_guard_stack(cpu0_stack); #endif open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period); if ( opt_watchdog ) nmi_watchdog = NMI_LOCAL_APIC; sort_exception_tables(); arch_do_createdomain(current); /* Tell the PCI layer not to allocate too close to the RAM area.. */ low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff; if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size; identify_cpu(&boot_cpu_data); /* get CPU type info */ if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR); if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT); #ifdef CONFIG_SMP if ( opt_ignorebiostables ) { opt_nosmp = 1; /* No SMP without configuration */ opt_noacpi = 1; /* ACPI will just confuse matters also */ } else { find_smp_config(); smp_alloc_memory(); /* trampoline which other CPUs jump at */ } #endif paging_init(); /* not much here now, but sets up fixmap */ if ( !opt_noacpi ) acpi_boot_init(); #ifdef CONFIG_SMP if ( smp_found_config ) get_smp_config(); #endif init_apic_mappings(); /* make APICs addressable in our pagetables. */ scheduler_init(); init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */ trap_init(); time_init(); /* installs software handler for HZ clock. */ arch_init_memory(); #ifndef CONFIG_SMP APIC_init_uniprocessor(); #else if ( opt_nosmp ) APIC_init_uniprocessor(); else smp_boot_cpus(); /* * Does loads of stuff, including kicking the local * APIC, and the IO APIC after other CPUs are booted. * Each IRQ is preferably handled by IO-APIC, but * fall thru to 8259A if we have to (but slower). */ #endif __sti(); initialize_keytable(); /* call back handling for key codes */ serial_init_stage2(); if ( !cpu_has_apic ) { do_timer_lists_from_pit = 1; if ( smp_num_cpus != 1 ) panic("We need local APICs on SMP machines!"); } ac_timer_init(); /* init accurate timers */ init_xen_time(); /* initialise the time */ schedulers_start(); /* start scheduler for each CPU */ check_nmi_watchdog(); #ifdef CONFIG_PCI pci_init(); #endif do_initcalls(); #ifdef CONFIG_SMP wait_init_idle = cpu_online_map; clear_bit(smp_processor_id(), &wait_init_idle); smp_threads_ready = 1; smp_commence(); /* Tell other CPUs that state of the world is stable. */ while ( wait_init_idle != 0 ) cpu_relax(); #endif watchdog_on = 1; #ifdef __x86_64__ /* x86_32 uses low mappings when building DOM0. */ zap_low_mappings(); #endif } void __init __start_xen(multiboot_info_t *mbi) { char *cmdline; module_t *mod = (module_t *)__va(mbi->mods_addr); void *heap_start; unsigned long firsthole_start, nr_pages; unsigned long initial_images_start, initial_images_end; struct e820entry e820_raw[E820MAX]; int i, e820_raw_nr = 0, bytes = 0; /* Parse the command-line options. */ if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) ) cmdline_parse(__va(mbi->cmdline)); /* Must do this early -- e.g., spinlocks rely on get_current(). */ set_current(&idle0_exec_domain); /* We initialise the serial devices very early so we can get debugging. */ serial_init_stage1(); init_console(); /* Check that we have at least one Multiboot module. */ if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) ) { printk("FATAL ERROR: Require at least one Multiboot module.\n"); for ( ; ; ) ; } xenheap_phys_end = opt_xenheap_megabytes << 20; if ( mbi->flags & MBI_MEMMAP ) { while ( bytes < mbi->mmap_length ) { memory_map_t *map = __va(mbi->mmap_addr + bytes); e820_raw[e820_raw_nr].addr = ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low; e820_raw[e820_raw_nr].size = ((u64)map->length_high << 32) | (u64)map->length_low; e820_raw[e820_raw_nr].type = (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type; e820_raw_nr++; bytes += map->size + 4; } } else if ( mbi->flags & MBI_MEMLIMITS ) { e820_raw[0].addr = 0; e820_raw[0].size = mbi->mem_lower << 10; e820_raw[0].type = E820_RAM; e820_raw[1].addr = 0x100000; e820_raw[1].size = mbi->mem_upper << 10; e820_raw[1].type = E820_RAM; e820_raw_nr = 2; } else { printk("FATAL ERROR: Bootloader provided no memory information.\n"); for ( ; ; ) ; } max_page = init_e820(e820_raw, e820_raw_nr); /* Find the first high-memory RAM hole. */ for ( i = 0; i < e820.nr_map; i++ ) if ( (e820.map[i].type == E820_RAM) && (e820.map[i].addr >= 0x100000) ) break; firsthole_start = e820.map[i].addr + e820.map[i].size; /* Relocate the Multiboot modules. */ initial_images_start = xenheap_phys_end; initial_images_end = initial_images_start + (mod[mbi->mods_count-1].mod_end - mod[0].mod_start); if ( initial_images_end > firsthole_start ) { printk("Not enough memory to stash the DOM0 kernel image.\n"); for ( ; ; ) ; } #if defined(__i386__) memmove((void *)initial_images_start, /* use low mapping */ (void *)mod[0].mod_start, /* use low mapping */ mod[mbi->mods_count-1].mod_end - mod[0].mod_start); #elif defined(__x86_64__) memmove(__va(initial_images_start), __va(mod[0].mod_start), mod[mbi->mods_count-1].mod_end - mod[0].mod_start); #endif /* Initialise boot-time allocator with all RAM situated after modules. */ heap_start = memguard_init(&_end); heap_start = __va(init_boot_allocator(__pa(heap_start))); nr_pages = 0; for ( i = 0; i < e820.nr_map; i++ ) { if ( e820.map[i].type != E820_RAM ) continue; nr_pages += e820.map[i].size >> PAGE_SHIFT; if ( (e820.map[i].addr + e820.map[i].size) >= initial_images_end ) init_boot_pages((e820.map[i].addr < initial_images_end) ? initial_images_end : e820.map[i].addr, e820.map[i].addr + e820.map[i].size); } printk("System RAM: %luMB (%lukB)\n", nr_pages >> (20 - PAGE_SHIFT), nr_pages << (PAGE_SHIFT - 10)); init_frametable(); end_boot_allocator(); init_xenheap_pages(__pa(heap_start), xenheap_phys_end); printk("Xen heap: %luMB (%lukB)\n", (xenheap_phys_end-__pa(heap_start)) >> 20, (xenheap_phys_end-__pa(heap_start)) >> 10); early_boot = 0; start_of_day(); grant_table_init(); shadow_mode_init(); /* Create initial domain 0. */ dom0 = do_createdomain(0, 0); if ( dom0 == NULL ) panic("Error creating domain 0\n"); set_bit(DF_PRIVILEGED, &dom0->d_flags); /* Grab the DOM0 command line. Skip past the image name. */ cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL); if ( cmdline != NULL ) { while ( *cmdline == ' ' ) cmdline++; if ( (cmdline = strchr(cmdline, ' ')) != NULL ) while ( *cmdline == ' ' ) cmdline++; } /* * We're going to setup domain0 using the module(s) that we stashed safely * above our heap. The second module, if present, is an initrd ramdisk. */ if ( construct_dom0(dom0, initial_images_start, mod[0].mod_end-mod[0].mod_start, (mbi->mods_count == 1) ? 0 : initial_images_start + (mod[1].mod_start-mod[0].mod_start), (mbi->mods_count == 1) ? 0 : mod[mbi->mods_count-1].mod_end - mod[1].mod_start, cmdline) != 0) panic("Could not set up DOM0 guest OS\n"); /* Scrub RAM that is still free and so may go to an unprivileged domain. */ scrub_heap_pages(); init_trace_bufs(); /* Give up the VGA console if DOM0 is configured to grab it. */ console_endboot(cmdline && strstr(cmdline, "tty0")); /* Hide UART from DOM0 if we're using it */ serial_endboot(); domain_unpause_by_systemcontroller(current->domain); domain_unpause_by_systemcontroller(dom0); startup_cpu_idle_loop(); } /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |