[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] pci-ioapic-x86-64.patch
Enable PCI, IOAPIC and ACPI on domain 0 for x86-64. This patch is based on apic.c,io_apic.c and mpparse.c on x86-64. It applies changes analogous to the i386 files for paravirtualization. The patch was tested on UP and DP with acpi=on and off. We did this on a 5/10 bk tree + Jun's fixes. The latest bk tree doesn't boot yet and needs more work. Signed-off-by: Arun Sharma <arun.sharma@xxxxxxxxx> --- a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 2005-05-20 10:25:14 -07:00 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-xen0 -# Thu Apr 14 19:45:44 2005 +# Linux kernel version: 2.6.11.1-xen0 +# Tue May 10 11:07:02 2005 # CONFIG_XEN=y CONFIG_ARCH_XEN=y @@ -88,8 +88,11 @@ # CONFIG_X86_CPUID is not set # CONFIG_NUMA is not set # CONFIG_MTRR is not set +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y CONFIG_PCI=y CONFIG_PCI_DIRECT=y +# CONFIG_PCI_MMCONFIG is not set CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y @@ -118,11 +121,6 @@ # CONFIG_PM is not set # -# ACPI (Advanced Configuration and Power Interface) Support -# -# CONFIG_ACPI is not set - -# # CPU Frequency scaling # # CONFIG_CPU_FREQ is not set @@ -130,7 +128,6 @@ # # Bus options (PCI etc.) # -# CONFIG_PCI_MMCONFIG is not set # CONFIG_UNORDERED_IO is not set # @@ -168,6 +165,7 @@ # # Plug and Play support # +# CONFIG_PNP is not set # # Block devices @@ -725,6 +723,7 @@ CONFIG_DRM_SIS=m # CONFIG_MWAVE is not set # CONFIG_RAW_DRIVER is not set +# CONFIG_HPET is not set # CONFIG_HANGCHECK_TIMER is not set # @@ -794,6 +793,35 @@ # InfiniBand support # # CONFIG_INFINIBAND is not set + +# +# Power management options +# + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI=y +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_AC=m +CONFIG_ACPI_BATTERY=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_VIDEO=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_THERMAL=m +CONFIG_ACPI_ASUS=m +CONFIG_ACPI_IBM=m +CONFIG_ACPI_TOSHIBA=m +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y +# CONFIG_ACPI_CONTAINER is not set # # File systems diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig 2005-05-20 10:25:14 -07:00 @@ -188,13 +188,13 @@ config EISA bool -#config X86_IO_APIC -# bool -# default n +config X86_LOCAL_APIC + bool + default y -#config X86_LOCAL_APIC -# bool -# default n +config X86_IO_APIC + bool + default y config MTRR bool "MTRR (Memory Type Range Register) support" diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile 2005-05-20 10:25:14 -07:00 @@ -12,7 +12,7 @@ x8664_ksyms.o vsyscall.o \ setup64.o e820.o irq.o early_printk.o c-obj-y := semaphore.o i387.o sys_x86_64.o \ - ptrace.o quirks.o syscall.o + ptrace.o quirks.o syscall.o bootflag.o i386-obj-y := time.o obj-y += ../../i386/kernel/timers/ @@ -21,14 +21,15 @@ #obj-$(CONFIG_X86_MCE) += mce.o #obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ -#obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI_BOOT) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o #obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o -#obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o -#obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ -# genapic.o genapic_cluster.o genapic_flat.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o +c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o +obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o +c-obj-$(CONFIG_X86_IO_APIC) += genapic.o genapic_cluster.o genapic_flat.o #obj-$(CONFIG_PM) += suspend.o #obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o #obj-$(CONFIG_CPU_FREQ) += cpufreq/ diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile 2005-05-20 10:25:14 -07:00 @@ -0,0 +1,16 @@ +i386-obj-$(CONFIG_ACPI_BOOT) := boot.o +c-obj-$(CONFIG_X86_IO_APIC) := earlyquirk.o +c-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o + +c-link := + +$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)): + @ln -fsn $(srctree)/arch/i386/kernel/acpi/$(notdir $@) $@ + +$(patsubst %.o,$(obj)/%.c,$(i386-obj-y)): + ln -fsn $(srctree)/arch/xen/i386/kernel/acpi/$(notdir $@) $@ + +obj-y += $(c-obj-y) $(s-obj-y) $(i386-obj-y) + +clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link)) +clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link)) diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c 2005-05-20 10:25:14 -07:00 @@ -0,0 +1,200 @@ +/* + * Local APIC handling, local APIC timers + * + * (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx> + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively. + * Maciej W. Rozycki : Various updates and fixes. + * Mikael Pettersson : Power Management for UP-APIC. + * Pavel Machek and + * Mikael Pettersson : PM converted to driver model. + */ + +#include <linux/config.h> +#include <linux/init.h> + +#include <linux/mm.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/mc146818rtc.h> +#include <linux/kernel_stat.h> +#include <linux/sysdev.h> + +#include <asm/atomic.h> +#include <asm/smp.h> +#include <asm/mtrr.h> +#include <asm/mpspec.h> +#include <asm/desc.h> +#include <asm/arch_hooks.h> +#include <asm/hpet.h> + +#include "io_ports.h" + +/* + * Debug level + */ +int apic_verbosity; +int disable_apic; + +void smp_local_timer_interrupt(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + profile_tick(CPU_PROFILING, regs); +#if 0 + if (--per_cpu(prof_counter, cpu) <= 0) { + /* + * The multiplier may have changed since the last time we got + * to this point as a result of the user writing to + * /proc/profile. In this case we need to adjust the APIC + * timer accordingly. + * + * Interrupts are already masked off at this point. + */ + per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); + if (per_cpu(prof_counter, cpu) != + per_cpu(prof_old_multiplier, cpu)) { + __setup_APIC_LVTT(calibration_result/ + per_cpu(prof_counter, cpu)); + per_cpu(prof_old_multiplier, cpu) = + per_cpu(prof_counter, cpu); + } + +#ifdef CONFIG_SMP + update_process_times(user_mode(regs)); +#endif + } +#endif + + /* + * We take the 'long' return path, and there every subsystem + * grabs the appropriate locks (kernel lock/ irq lock). + * + * we might want to decouple profiling from the 'long path', + * and do the profiling totally in assembly. + * + * Currently this isn't too much of an issue (performance wise), + * we can take more than 100K local irqs per second on a 100 MHz P5. + */ +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesn't support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +void smp_apic_timer_interrupt(struct pt_regs *regs) +{ + /* + * the NMI deadlock-detector uses this. + */ + add_pda(apic_timer_irqs, 1); + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ + irq_enter(); + smp_local_timer_interrupt(regs); + irq_exit(); +} + +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +asmlinkage void smp_spurious_interrupt(void) +{ + unsigned int v; + irq_enter(); + /* + * Check if this really is a spurious interrupt and ACK it + * if it is a vectored one. Just in case... + * Spurious interrupts should not be ACKed. + */ + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + ack_APIC_irq(); + +#if 0 + static unsigned long last_warning; + static unsigned long skipped; + + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + if (time_before(last_warning+30*HZ,jiffies)) { + printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n", + smp_processor_id(), skipped); + last_warning = jiffies; + skipped = 0; + } else { + skipped++; + } +#endif + irq_exit(); +} + +/* + * This interrupt should never happen with our APIC/SMP architecture + */ + +asmlinkage void smp_error_interrupt(void) +{ + unsigned int v, v1; + + irq_enter(); + /* First tickle the hardware, only then report what went on. -- REW */ + v = apic_read(APIC_ESR); + apic_write(APIC_ESR, 0); + v1 = apic_read(APIC_ESR); + ack_APIC_irq(); + atomic_inc(&irq_err_count); + + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", + smp_processor_id(), v , v1); + irq_exit(); +} + +int get_physical_broadcast(void) +{ + return 0xff; +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int __init APIC_init_uniprocessor (void) +{ +#ifdef CONFIG_X86_IO_APIC + if (smp_found_config) + if (!skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +#endif + + return 0; +} diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S 2005-05-20 10:25:14 -07:00 @@ -48,6 +48,7 @@ #include <asm/errno.h> #include <asm-xen/xen-public/arch-x86_64.h> +#include "irq_vectors.h" EVENT_MASK = (CS+4) VGCF_IN_SYSCALL = (1<<8) @@ -584,7 +585,7 @@ .macro apicinterrupt num,func pushq $\num-256 interrupt \func - jmp ret_from_intr + jmp error_entry CFI_ENDPROC .endm diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c 2005-05-20 10:25:14 -07:00 @@ -0,0 +1,2051 @@ +/* + * Intel IO-APIC support for multi-Pentium hosts. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * + * Many thanks to Stig Venaas for trying out countless experimental + * patches and reporting/debugging problems patiently! + * + * (c) 1999, Multiple IO-APIC support, developed by + * Ken-ichi Yaku <yaku@xxxxxxxxxxxxxxxxxxxx> and + * Hidemi Kishimoto <kisimoto@xxxxxxxxxxxxxxxxxxxx>, + * further tested and cleaned up by Zach Brown <zab@xxxxxxxxxx> + * and Ingo Molnar <mingo@xxxxxxxxxx> + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively + * Paul Diefenbaugh : Added full ACPI support + */ + +#include <linux/mm.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/config.h> +#include <linux/smp_lock.h> +#include <linux/mc146818rtc.h> +#include <linux/acpi.h> +#include <linux/sysdev.h> + +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/desc.h> +#include <asm/proto.h> +#include <asm/mach_apic.h> + +#define __apicdebuginit __init + +int sis_apic_bug; /* not actually supported, dummy for compile */ + +static DEFINE_SPINLOCK(ioapic_lock); + +/* + * # of IRQ routing registers + */ +int nr_ioapic_registers[MAX_IO_APICS]; + +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +#define MAX_PLUS_SHARED_IRQS NR_IRQS +#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +static struct irq_pin_list { + short apic, pin, next; +} irq_2_pin[PIN_MAP_SIZE]; + +int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +#ifdef CONFIG_PCI_MSI +#define vector_to_irq(vector) \ + (platform_legacy_irq(vector) ? vector : vector_irq[vector]) +#else +#define vector_to_irq(vector) (vector) +#endif + +#ifdef CONFIG_XEN + +#include <asm-xen/xen-public/xen.h> +#include <asm-xen/xen-public/physdev.h> + +/* Fake i8259 */ +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) +#define disable_8259A_irq(_irq) ((void)0) +#define i8259A_irq_pending(_irq) (0) + +unsigned long io_apic_irqs; + +static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) +{ + physdev_op_t op; + int ret; + + op.cmd = PHYSDEVOP_APIC_READ; + op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid; + op.u.apic_op.offset = reg; + ret = HYPERVISOR_physdev_op(&op); + if (ret) + return ret; + return op.u.apic_op.value; +} + +static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + physdev_op_t op; + + op.cmd = PHYSDEVOP_APIC_WRITE; + op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid; + op.u.apic_op.offset = reg; + op.u.apic_op.value = value; + HYPERVISOR_physdev_op(&op); +} + +#define io_apic_read(a,r) xen_io_apic_read(a,r) +#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) + +#define clear_IO_APIC() ((void)0) + +#endif /* !CONFIG_XEN */ + +/* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are + * shared ISA-space IRQs, so we have to support them. We are super + * fast in the common case, and fast for shared ISA-space IRQs. + */ +static void add_pin_to_irq(unsigned int irq, int apic, int pin) +{ + static int first_free_entry = NR_IRQS; + struct irq_pin_list *entry = irq_2_pin + irq; + + while (entry->next) + entry = irq_2_pin + entry->next; + + if (entry->pin != -1) { + entry->next = first_free_entry; + entry = irq_2_pin + entry->next; + if (++first_free_entry >= PIN_MAP_SIZE) + panic("io_apic.c: whoops"); + } + entry->apic = apic; + entry->pin = pin; +} + +#ifndef CONFIG_XEN +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ + static void name##_IO_APIC_irq (unsigned int irq) \ + __DO_ACTION(R, ACTION, FINAL) + +DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) + /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) + /* mask = 0 */ + +static void mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + /* Check delivery_mode to be sure we're not clearing an SMI pin */ + spin_lock_irqsave(&ioapic_lock, flags); + *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); + *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (entry.delivery_mode == dest_SMI) + return; + /* + * Disable it in the IO-APIC irq-routing table: + */ + memset(&entry, 0, sizeof(entry)); + entry.mask = 1; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void clear_IO_APIC (void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); +} + +#endif /* !CONFIG_XEN */ + +/* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +int pirq_entries [MAX_PIRQS]; +int pirqs_enabled; +int skip_ioapic_setup; +int ioapic_force; + +/* dummy parsing: see setup.c */ + +static int __init disable_ioapic_setup(char *str) +{ + skip_ioapic_setup = 1; + return 1; +} + +static int __init enable_ioapic_setup(char *str) +{ + ioapic_force = 1; + skip_ioapic_setup = 0; + return 1; +} + + +__setup("noapic", disable_ioapic_setup); +__setup("apic", enable_ioapic_setup); + + +#include <asm/pci-direct.h> +#include <linux/pci_ids.h> +#include <linux/pci.h> + +/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC + off. Check for an Nvidia or VIA PCI bridge and turn it off. + Use pci direct infrastructure because this runs before the PCI subsystem. + + Can be overwritten with "apic" + + And another hack to disable the IOMMU on VIA chipsets. + + Kludge-O-Rama. */ +void __init check_ioapic(void) +{ + int num,slot,func; + if (ioapic_force) + return; + + /* Poor man's PCI discovery */ + for (num = 0; num < 32; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + u32 class; + u32 vendor; + u8 type; + class = read_pci_config(num,slot,func, + PCI_CLASS_REVISION); + if (class == 0xffffffff) + break; + + if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) + continue; + + vendor = read_pci_config(num, slot, func, + PCI_VENDOR_ID); + vendor &= 0xffff; + switch (vendor) { + case PCI_VENDOR_ID_VIA: +#ifdef CONFIG_GART_IOMMU + if ((end_pfn >= (0xffffffff>>PAGE_SHIFT) || + force_iommu) && + !iommu_aperture_allowed) { + printk(KERN_INFO + "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n"); + iommu_aperture_disabled = 1; + } +#endif + return; + case PCI_VENDOR_ID_NVIDIA: +#ifdef CONFIG_ACPI + /* All timer overrides on Nvidia + seem to be wrong. Skip them. */ + acpi_skip_timer_override = 1; + printk(KERN_INFO + "Nvidia board detected. Ignoring ACPI timer override.\n"); +#endif + /* RED-PEN skip them on mptables too? */ + return; + } + + /* No multi-function device? */ + type = read_pci_config_byte(num,slot,func, + PCI_HEADER_TYPE); + if (!(type & 0x80)) + break; + } + } + } +} + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); + +/* + * Find the IRQ entry number of a certain pin. + */ +static int find_irq_entry(int apic, int pin, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == type && + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && + mp_irqs[i].mpc_dstirq == pin) + return i; + + return -1; +} + +#ifndef CONFIG_XEN +/* + * Find the pin to which IRQ[irq] (ISA) is connected + */ +static int __init find_isa_irq_pin(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || + mp_bus_id_to_type[lbus] == MP_BUS_MCA) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + + return mp_irqs[i].mpc_dstirq; + } + return -1; +} +#endif + +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +static int pin_2_irq(int idx, int apic, int pin); + +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) +{ + int apic, i, best_guess = -1; + + apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if (mp_bus_id_to_pci_bus[bus] == -1) { + apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + break; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && + !mp_irqs[i].mpc_irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + return irq; + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) + best_guess = irq; + } + } + return best_guess; +} + +/* + * EISA Edge/Level control register, ELCR + */ +static int EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +#define default_EISA_polarity(idx) (0) + +/* ISA interrupts are always polarity zero edge triggered, + * when listed as conforming in the MP table. */ + +#define default_ISA_trigger(idx) (0) +#define default_ISA_polarity(idx) (0) + +/* PCI interrupts are always polarity one level triggered, + * when listed as conforming in the MP table. */ + +#define default_PCI_trigger(idx) (1) +#define default_PCI_polarity(idx) (1) + +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) (0) + +static int __init MPBIOS_polarity(int idx) +{ + int bus = mp_irqs[idx].mpc_srcbus; + int polarity; + + /* + * Determine IRQ line polarity (high active or low active): + */ + switch (mp_irqs[idx].mpc_irqflag & 3) + { + case 0: /* conforms, ie. bus-type dependent polarity */ + { + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + { + polarity = default_ISA_polarity(idx); + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + polarity = default_EISA_polarity(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + polarity = default_PCI_polarity(idx); + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + polarity = default_MCA_polarity(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + break; + } + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + return polarity; +} + +static int MPBIOS_trigger(int idx) +{ + int bus = mp_irqs[idx].mpc_srcbus; + int trigger; + + /* + * Determine IRQ trigger mode (edge or level sensitive): + */ + switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + { + case 0: /* conforms, ie. bus-type dependent */ + { + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + { + trigger = default_ISA_trigger(idx); + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + trigger = default_PCI_trigger(idx); + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } + break; + } + case 1: /* edge */ + { + trigger = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + case 3: /* level */ + { + trigger = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 0; + break; + } + } + return trigger; +} + +static inline int irq_polarity(int idx) +{ + return MPBIOS_polarity(idx); +} + +static inline int irq_trigger(int idx) +{ + return MPBIOS_trigger(idx); +} + +static int pin_2_irq(int idx, int apic, int pin) +{ + int irq, i; + int bus = mp_irqs[idx].mpc_srcbus; + + /* + * Debugging check, we are in big trouble if this message pops up! + */ + if (mp_irqs[idx].mpc_dstirq != pin) + printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + case MP_BUS_EISA: + case MP_BUS_MCA: + { + irq = mp_irqs[idx].mpc_srcbusirq; + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* + * PCI IRQs are mapped in order + */ + i = irq = 0; + while (i < apic) + irq += nr_ioapic_registers[i++]; + irq += pin; + break; + } + default: + { + printk(KERN_ERR "unknown bus type %d.\n",bus); + irq = 0; + break; + } + } + + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } + return irq; +} + +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic,pin,mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} + +/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ +u8 irq_vector[NR_IRQ_VECTORS]; + +int assign_irq_vector(int irq) +{ + static int current_vector = FIRST_DEVICE_VECTOR; + physdev_op_t op; + + BUG_ON(irq >= NR_IRQ_VECTORS); + if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) + return IO_APIC_VECTOR(irq); + + op.cmd = PHYSDEVOP_ASSIGN_VECTOR; + op.u.irq_op.irq = irq; + if (HYPERVISOR_physdev_op(&op)) + return -ENOSPC; + current_vector = op.u.irq_op.vector; + + vector_irq[current_vector] = irq; + if (irq != AUTO_ASSIGN) + IO_APIC_VECTOR(irq) = current_vector; + + return current_vector; +} + +extern void (*interrupt[NR_IRQS])(void); +#ifndef CONFIG_XEN +static struct hw_interrupt_type ioapic_level_type; +static struct hw_interrupt_type ioapic_edge_type; + +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) +{ + if (use_pci_vector() && !platform_legacy_irq(irq)) { + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + irq_desc[vector].handler = &ioapic_level_type; + else + irq_desc[vector].handler = &ioapic_edge_type; + set_intr_gate(vector, interrupt[vector]); + } else { + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + irq_desc[irq].handler = &ioapic_level_type; + else + irq_desc[irq].handler = &ioapic_edge_type; + set_intr_gate(vector, interrupt[irq]); + } +} +#else +#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) +#endif /* !CONFIG_XEN */ + +void __init setup_IO_APIC_irqs(void) +{ + struct IO_APIC_route_entry entry; + int apic, pin, idx, irq, first_notcon = 1, vector; + unsigned long flags; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); + + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* enable IRQ */ + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + + idx = find_irq_entry(apic,pin,mp_INT); + if (idx == -1) { + if (first_notcon) { + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); + first_notcon = 0; + } else + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); + continue; + } + + entry.trigger = irq_trigger(idx); + entry.polarity = irq_polarity(idx); + + if (irq_trigger(idx)) { + entry.trigger = 1; + entry.mask = 1; + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + } + + irq = pin_2_irq(idx, apic, pin); + add_pin_to_irq(irq, apic, pin); + + if (/* !apic && */ !IO_APIC_IRQ(irq)) + continue; + + if (IO_APIC_IRQ(irq)) { + vector = assign_irq_vector(irq); + entry.vector = vector; + + ioapic_register_intr(irq, vector, IOAPIC_AUTO); + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + } + + if (!first_notcon) + apic_printk(APIC_VERBOSE," not connected.\n"); +} + +#ifndef CONFIG_XEN +/* + * Set up the 8259A-master output pin as broadcast to all + * CPUs. + */ +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + memset(&entry,0,sizeof(entry)); + + disable_8259A_irq(0); + + /* mask LVT0 */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + + /* + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* unmask IRQ now */ + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.polarity = 0; + entry.trigger = 0; + entry.vector = vector; + + /* + * The timer IRQ doesn't have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ + irq_desc[0].handler = &ioapic_edge_type; + + /* + * Add it to the IO-APIC irq-routing table: + */ + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + enable_8259A_irq(0); +} + +void __init UNEXPECTED_IO_APIC(void) +{ +} + +void __apicdebuginit print_IO_APIC(void) +{ + int apic, i; + union IO_APIC_reg_00 reg_00; + union IO_APIC_reg_01 reg_01; + union IO_APIC_reg_02 reg_02; + unsigned long flags; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (i = 0; i < nr_ioapics; i++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", + mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + + /* + * We are a bit conservative about what we expect. We have to + * know about every hardware change ASAP. + */ + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + reg_01.raw = io_apic_read(apic, 1); + if (reg_01.bits.version >= 0x10) + reg_02.raw = io_apic_read(apic, 2); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk("\n"); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) + UNEXPECTED_IO_APIC(); + + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); + if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ + (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ + (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ + (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ + (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ + (reg_01.bits.entries != 0x2E) && + (reg_01.bits.entries != 0x3F) && + (reg_01.bits.entries != 0x03) + ) + UNEXPECTED_IO_APIC(); + + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); + if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ + (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */ + (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ + (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ + (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ + (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ + ) + UNEXPECTED_IO_APIC(); + if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) + UNEXPECTED_IO_APIC(); + + if (reg_01.bits.version >= 0x10) { + printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); + if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) + UNEXPECTED_IO_APIC(); + } + + printk(KERN_DEBUG ".... IRQ redirection table:\n"); + + printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" + " Stat Dest Deli Vect: \n"); + + for (i = 0; i <= reg_01.bits.entries; i++) { + struct IO_APIC_route_entry entry; + + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); + *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk(KERN_DEBUG " %02x %03X %02X ", + i, + entry.dest.logical.logical_dest, + entry.dest.physical.physical_dest + ); + + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector + ); + } + } + if (use_pci_vector()) + printk(KERN_INFO "Using vector-based indexing\n"); + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for (i = 0; i < NR_IRQS; i++) { + struct irq_pin_list *entry = irq_2_pin + i; + if (entry->pin < 0) + continue; + if (use_pci_vector() && !platform_legacy_irq(i)) + printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); + else + printk(KERN_DEBUG "IRQ%d ", i); + for (;;) { + printk("-> %d:%d", entry->apic, entry->pin); + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } + printk("\n"); + } + + printk(KERN_INFO ".................................... done.\n"); + + return; +} + +static __apicdebuginit void print_APIC_bitfield (int base) +{ + unsigned int v; + int i, j; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); + for (i = 0; i < 8; i++) { + v = apic_read(base + i*0x10); + for (j = 0; j < 32; j++) { + if (v & (1<<j)) + printk("1"); + else + printk("0"); + } + printk("\n"); + } +} + +void __apicdebuginit print_local_APIC(void * dummy) +{ + unsigned int v, ver, maxlvt; + + if (apic_verbosity == APIC_QUIET) + return; + + printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", + smp_processor_id(), hard_smp_processor_id()); + v = apic_read(APIC_ID); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v)); + v = apic_read(APIC_LVR); + printk(KERN_INFO "... APIC VERSION: %08x\n", v); + ver = GET_APIC_VERSION(v); + maxlvt = get_maxlvt(); + + v = apic_read(APIC_TASKPRI); + printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); + + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + v = apic_read(APIC_PROCPRI); + printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); + } + + v = apic_read(APIC_EOI); + printk(KERN_DEBUG "... APIC EOI: %08x\n", v); + v = apic_read(APIC_RRR); + printk(KERN_DEBUG "... APIC RRR: %08x\n", v); + v = apic_read(APIC_LDR); + printk(KERN_DEBUG "... APIC LDR: %08x\n", v); + v = apic_read(APIC_DFR); + printk(KERN_DEBUG "... APIC DFR: %08x\n", v); + v = apic_read(APIC_SPIV); + printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); + + printk(KERN_DEBUG "... APIC ISR field:\n"); + print_APIC_bitfield(APIC_ISR); + printk(KERN_DEBUG "... APIC TMR field:\n"); + print_APIC_bitfield(APIC_TMR); + printk(KERN_DEBUG "... APIC IRR field:\n"); + print_APIC_bitfield(APIC_IRR); + + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } + + v = apic_read(APIC_ICR); + printk(KERN_DEBUG "... APIC ICR: %08x\n", v); + v = apic_read(APIC_ICR2); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + + v = apic_read(APIC_LVTT); + printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + printk("\n"); +} + +void print_all_local_APICs (void) +{ + on_each_cpu(print_local_APIC, NULL, 1, 1); +} + +void __apicdebuginit print_PIC(void) +{ + extern spinlock_t i8259A_lock; + unsigned int v; + unsigned long flags; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "\nprinting PIC contents\n"); + + spin_lock_irqsave(&i8259A_lock, flags); + + v = inb(0xa1) << 8 | inb(0x21); + printk(KERN_DEBUG "... PIC IMR: %04x\n", v); + + v = inb(0xa0) << 8 | inb(0x20); + printk(KERN_DEBUG "... PIC IRR: %04x\n", v); + + outb(0x0b,0xa0); + outb(0x0b,0x20); + v = inb(0xa0) << 8 | inb(0x20); + outb(0x0a,0xa0); + outb(0x0a,0x20); + + spin_unlock_irqrestore(&i8259A_lock, flags); + + printk(KERN_DEBUG "... PIC ISR: %04x\n", v); + + v = inb(0x4d1) << 8 | inb(0x4d0); + printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); +} +#else +void __init print_IO_APIC(void) { } +#endif /* !CONFIG_XEN */ + +static void __init enable_IO_APIC(void) +{ + union IO_APIC_reg_01 reg_01; + int i; + unsigned long flags; + + for (i = 0; i < PIN_MAP_SIZE; i++) { + irq_2_pin[i].pin = -1; + irq_2_pin[i].next = 0; + } + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (i = 0; i < nr_ioapics; i++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(i, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[i] = reg_01.bits.entries+1; + } + + /* + * Do not trust the IO-APIC being empty at bootup + */ + clear_IO_APIC(); +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ + /* + * Clear the IO-APIC before rebooting: + */ + clear_IO_APIC(); +#ifndef CONFIG_XEN + disconnect_bsp_APIC(); +#endif +} + +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch <Matt_Domsch@xxxxxxxx> Tue Dec 21 12:25:05 CST 1999 + */ +#ifndef CONFIG_XEN +static void __init setup_ioapic_ids_from_mpc (void) +{ + union IO_APIC_reg_00 reg_00; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mpc_apicid; + + + printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid); + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mpc_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_dstapic == old_id) + mp_irqs[i].mpc_dstapic + = mp_ioapics[apic].mpc_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mpc_apicid); + + reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0, reg_00.raw); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) + printk("could not set ID!\n"); + else + apic_printk(APIC_VERBOSE," ok.\n"); + } +} +#else +static void __init setup_ioapic_ids_from_mpc(void) { } +#endif + +/* + * There is a nasty bug in some older SMP boards, their mptable lies + * about the timer IRQ. We do the following to work around the situation: + * + * - timer IRQ defaults to IO-APIC IRQ + * - if this function detects that timer IRQs are defunct, then we fall + * back to ISA timer IRQs + */ +#ifndef CONFIG_XEN +static int __init timer_irq_works(void) +{ + unsigned long t1 = jiffies; + + local_irq_enable(); + /* Let ten ticks pass... */ + mdelay((10 * 1000) / HZ); + + /* + * Expect a few ticks at least, to be sure some possible + * glue logic does not lock up after one or two first + * ticks in a non-ExtINT mode. Also the local APIC + * might have cached one ExtINT interrupt. Finally, at + * least one tick may be lost due to delays. + */ + + /* jiffies wrap? */ + if (jiffies - t1 > 4) + return 1; + return 0; +} + +/* + * In the SMP+IOAPIC case it might happen that there are an unspecified + * number of pending IRQ events unhandled. These cases are very rare, + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much + * better to do it this way as thus we do not have to be aware of + * 'pending' interrupts in the IRQ path, except at this point. + */ +/* + * Edge triggered needs to resend any interrupt + * that was delayed but this is now handled in the device + * independent code. + */ + +/* + * Starting up a edge-triggered IO-APIC interrupt is + * nasty - we need to make sure that we get the edge. + * If it is already asserted for some reason, we need + * return 1 to indicate that is was pending. + * + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... + */ + +static unsigned int startup_edge_ioapic_irq(unsigned int irq) +{ + int was_pending = 0; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + if (irq < 16) { + disable_8259A_irq(irq); + if (i8259A_irq_pending(irq)) + was_pending = 1; + } + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +} + +/* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled + * devices. + */ +static void ack_edge_ioapic_irq(unsigned int irq) +{ + if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) + == (IRQ_PENDING | IRQ_DISABLED)) + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +/* + * Level triggered interrupts can just be masked, + * and shutting down and starting up the interrupt + * is the same as enabling and disabling them -- except + * with a startup need to return a "was pending" value. + * + * Level triggered interrupts are special because we + * do not touch any IO-APIC register while handling + * them. We ack the APIC in the end-IRQ handler, not + * in the start-IRQ-handler. Protection against reentrance + * from the same interrupt is still provided, both by the + * generic IRQ layer and by the fact that an unacked local + * APIC does not accept IRQs. + */ +static unsigned int startup_level_ioapic_irq (unsigned int irq) +{ + unmask_IO_APIC_irq(irq); + + return 0; /* don't check for pending */ +} + +static void end_level_ioapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + unsigned long flags; + unsigned int dest; + + dest = cpu_mask_to_apicid(mask); + + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = dest, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#ifdef CONFIG_PCI_MSI +static unsigned int startup_edge_ioapic_vector(unsigned int vector) +{ + int irq = vector_to_irq(vector); + + return startup_edge_ioapic_irq(irq); +} + +static void ack_edge_ioapic_vector(unsigned int vector) +{ + int irq = vector_to_irq(vector); + + ack_edge_ioapic_irq(irq); +} + +static unsigned int startup_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + return startup_level_ioapic_irq (irq); +} + +static void end_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + end_level_ioapic_irq(irq); +} + +static void mask_IO_APIC_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_IO_APIC_irq(irq); +} + +static void unmask_IO_APIC_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + unmask_IO_APIC_irq(irq); +} + +static void set_ioapic_affinity_vector (unsigned int vector, + cpumask_t cpu_mask) +{ + int irq = vector_to_irq(vector); + + set_ioapic_affinity_irq(irq, cpu_mask); +} +#endif + +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ + +static struct hw_interrupt_type ioapic_edge_type = { + .typename = "IO-APIC-edge", + .startup = startup_edge_ioapic, + .shutdown = shutdown_edge_ioapic, + .enable = enable_edge_ioapic, + .disable = disable_edge_ioapic, + .ack = ack_edge_ioapic, + .end = end_edge_ioapic, + .set_affinity = set_ioapic_affinity, +}; + +static struct hw_interrupt_type ioapic_level_type = { + .typename = "IO-APIC-level", + .startup = startup_level_ioapic, + .shutdown = shutdown_level_ioapic, + .enable = enable_level_ioapic, + .disable = disable_level_ioapic, + .ack = mask_and_ack_level_ioapic, + .end = end_level_ioapic, + .set_affinity = set_ioapic_affinity, +}; +#endif /* !CONFIG_XEN */ + +static inline void init_IO_APIC_traps(void) +{ + int irq; + + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + for (irq = 0; irq < NR_IRQS ; irq++) { + int tmp = irq; + if (use_pci_vector()) { + if (!platform_legacy_irq(tmp)) + if ((tmp = vector_to_irq(tmp)) == -1) + continue; + } + if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 + * interrupt if we can.. + */ + if (irq < 16) + make_8259A_irq(irq); +#ifndef CONFIG_XEN + else + /* Strange. Oh, well.. */ + irq_desc[irq].handler = &no_irq_type; +#endif + } + } +} + +#ifndef CONFIG_XEN +static void enable_lapic_irq (unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); +} + +static void disable_lapic_irq (unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); +} + +static void ack_lapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +static void end_lapic_irq (unsigned int i) { /* nothing */ } + +static struct hw_interrupt_type lapic_irq_type = { + .typename = "local-APIC-edge", + .startup = NULL, /* startup_irq() not used for IRQ0 */ + .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ + .enable = enable_lapic_irq, + .disable = disable_lapic_irq, + .ack = ack_lapic_irq, + .end = end_lapic_irq, +}; + +static void setup_nmi (void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on all local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + printk(KERN_INFO "activating NMI Watchdog ..."); + + enable_NMI_through_LVT0(NULL); + + printk(" done.\n"); +} + +/* + * This looks a bit hackish but it's about the only one way of sending + * a few INTA cycles to 8259As and any associated glue logic. ICR does + * not support the ExtINT mode, unfortunately. We need to send these + * cycles as some i82489DX-based boards have glue logic that keeps the + * 8259A interrupt line asserted until INTA. --macro + */ +static inline void unlock_ExtINT_logic(void) +{ + int pin, i; + struct IO_APIC_route_entry entry0, entry1; + unsigned char save_control, save_freq_select; + unsigned long flags; + + pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) + return; + + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + clear_IO_APIC_pin(0, pin); + + memset(&entry1, 0, sizeof(entry1)); + + entry1.dest_mode = 0; /* physical delivery */ + entry1.mask = 0; /* unmask IRQ now */ + entry1.dest.physical.physical_dest = hard_smp_processor_id(); + entry1.delivery_mode = dest_ExtINT; + entry1.polarity = entry0.polarity; + entry1.trigger = 0; + entry1.vector = 0; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + save_control = CMOS_READ(RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, + RTC_FREQ_SELECT); + CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); + + i = 100; + while (i-- > 0) { + mdelay(10); + if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) + i -= 10; + } + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + clear_IO_APIC_pin(0, pin); + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +/* + * This code may look a bit paranoid, but it's supposed to cooperate with + * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ + * is so screwy. Thanks to Brian Perkins for testing/hacking this beast + * fanatically on his truly buggy board. + */ +static inline void check_timer(void) +{ + int pin1, pin2; + int vector; + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); + vector = assign_irq_vector(0); + set_intr_gate(vector, interrupt[0]); + + /* + * Subtle, code in do_timer_interrupt() expects an AEOI + * mode for the 8259A whenever interrupts are routed + * through I/O APICs. Also IRQ0 has to be enabled in + * the 8259A which implies the virtual wire has to be + * disabled in the local APIC. + */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + init_8259A(1); + enable_8259A_irq(0); + + pin1 = find_isa_irq_pin(0, mp_INT); + pin2 = find_isa_irq_pin(0, mp_ExtINT); + + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); + + if (pin1 != -1) { + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + unmask_IO_APIC_irq(0); + if (timer_irq_works()) { + nmi_watchdog_default(); + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + check_nmi_watchdog(); + } + return; + } + clear_IO_APIC_pin(0, pin1); + apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + } + + apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); + if (pin2 != -1) { + apic_printk(APIC_VERBOSE,"\n..... (found pin %d) ...", pin2); + /* + * legacy devices should be connected to IO APIC #0 + */ + setup_ExtINT_IRQ0_pin(pin2, vector); + if (timer_irq_works()) { + printk("works.\n"); + nmi_watchdog_default(); + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); + check_nmi_watchdog(); + } + return; + } + /* + * Cleanup, just in case ... + */ + clear_IO_APIC_pin(0, pin2); + } + printk(" failed.\n"); + + if (nmi_watchdog) { + printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); + nmi_watchdog = 0; + } + + apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); + + disable_8259A_irq(0); + irq_desc[0].handler = &lapic_irq_type; + apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + enable_8259A_irq(0); + + if (timer_irq_works()) { + apic_printk(APIC_QUIET, " works.\n"); + return; + } + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); + apic_printk(APIC_VERBOSE," failed.\n"); + + apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); + + init_8259A(0); + make_8259A_irq(0); + apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + + unlock_ExtINT_logic(); + + if (timer_irq_works()) { + apic_printk(APIC_VERBOSE," works.\n"); + return; + } + apic_printk(APIC_VERBOSE," failed :(.\n"); + panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); +} +#else +#define check_timer() ((void)0) +#endif /* !CONFIG_XEN */ + +/* + * + * IRQ's that are handled by the PIC in the MPS IOAPIC case. + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. + * Linux doesn't really care, as it's not actually used + * for any interrupt handling anyway. + */ +#define PIC_IRQS (1<<2) + +void __init setup_IO_APIC(void) +{ + enable_IO_APIC(); + + if (acpi_ioapic) + io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ + else + io_apic_irqs = ~PIC_IRQS; + + apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + + /* + * Set up the IO-APIC IRQ routing table. + */ + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#ifndef CONFIG_XEN + sync_Arb_IDs(); +#endif /* !CONFIG_XEN */ + setup_IO_APIC_irqs(); + init_IO_APIC_traps(); + check_timer(); + if (!acpi_ioapic) + print_IO_APIC(); +} + +struct sysfs_ioapic_data { + struct sys_device dev; + struct IO_APIC_route_entry entry[0]; +}; +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; + +static int ioapic_suspend(struct sys_device *dev, u32 state) +{ + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; + unsigned long flags; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; + spin_lock_irqsave(&ioapic_lock, flags); + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { + *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); + *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +} + +static int ioapic_resume(struct sys_device *dev) +{ + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; + unsigned long flags; + union IO_APIC_reg_00 reg_00; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(dev->id, 0); + if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + io_apic_write(dev->id, 0, reg_00.raw); + } + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { + io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); + io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +} + +static struct sysdev_class ioapic_sysdev_class = { + set_kset_name("ioapic"), + .suspend = ioapic_suspend, + .resume = ioapic_resume, +}; + +static int __init ioapic_init_sysfs(void) +{ + struct sys_device * dev; + int i, size, error = 0; + + error = sysdev_class_register(&ioapic_sysdev_class); + if (error) + return error; + + for (i = 0; i < nr_ioapics; i++ ) { + size = sizeof(struct sys_device) + nr_ioapic_registers[i] + * sizeof(struct IO_APIC_route_entry); + mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + if (!mp_ioapic_data[i]) { + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); + continue; + } + memset(mp_ioapic_data[i], 0, size); + dev = &mp_ioapic_data[i]->dev; + dev->id = i; + dev->cls = &ioapic_sysdev_class; + error = sysdev_register(dev); + if (error) { + kfree(mp_ioapic_data[i]); + mp_ioapic_data[i] = NULL; + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); + continue; + } + } + + return 0; +} + +device_initcall(ioapic_init_sysfs); + +/* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_BOOT + +#define IO_APIC_MAX_ID 0xFE + +int __init io_apic_get_unique_id (int ioapic, int apic_id) +{ +#ifndef CONFIG_XEN + union IO_APIC_reg_00 reg_00; + static physid_mask_t apic_id_map; + unsigned long flags; + int i = 0; + + /* + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only + * supports up to 16 on one shared APIC bus. + * + * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full + * advantage of new APIC bus architecture. + */ + + if (physids_empty(apic_id_map)) + apic_id_map = phys_cpu_present_map; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + if (apic_id >= IO_APIC_MAX_ID) { + apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " + "%d\n", ioapic, apic_id, reg_00.bits.ID); + apic_id = reg_00.bits.ID; + } + + /* + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (physid_isset(apic_id, apic_id_map)) { + + for (i = 0; i < IO_APIC_MAX_ID; i++) { + if (!physid_isset(i, apic_id_map)) + break; + } + + if (i == IO_APIC_MAX_ID) + panic("Max apic_id exceeded!\n"); + + apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " + "trying %d\n", ioapic, apic_id, i); + + apic_id = i; + } + + physid_set(apic_id, apic_id_map); + + if (reg_00.bits.ID != apic_id) { + reg_00.bits.ID = apic_id; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* Sanity check */ + if (reg_00.bits.ID != apic_id) + panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + } + + apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); +#endif /* !CONFIG_XEN */ + + return apic_id; +} + + +int __init io_apic_get_version (int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.version; +} + + +int __init io_apic_get_redir_entries (int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.entries; +} + + +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); + return -EINVAL; + } + + /* + * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. + * Note that we mask (disable) IRQs now -- these get enabled when the + * corresponding device driver registers for this IRQ. + */ + + memset(&entry,0,sizeof(entry)); + + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.trigger = edge_level; + entry.polarity = active_high_low; + entry.mask = 1; /* Disabled (masked) */ + + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ + if (irq >= 16) + add_pin_to_irq(irq, ioapic, pin); + + entry.vector = assign_irq_vector(irq); + + apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", ioapic, + mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + edge_level, active_high_low); + + ioapic_register_intr(irq, entry.vector, edge_level); + + if (!ioapic && (irq < 16)) + disable_8259A_irq(irq); + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +} + +#endif /*CONFIG_ACPI_BOOT*/ + + +#ifndef CONFIG_XEN +/* + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online + * so mask in all cases should simply be TARGET_CPUS + */ +void __init setup_ioapic_dest(void) +{ + int pin, ioapic, irq, irq_entry; + + if (skip_ioapic_setup == 1) + return; + + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); + set_ioapic_affinity_irq(irq, TARGET_CPUS); + } + + } +} +#endif /* !CONFIG_XEN */ diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c 2005-05-20 10:25:14 -07:00 @@ -0,0 +1,954 @@ +/* + * Intel Multiprocessor Specification 1.1 and 1.4 + * compliant MP-table parsing routines. + * + * (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx> + * (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx> + * + * Fixes + * Erich Boleyn : MP v1.4 and additional changes. + * Alan Cox : Added EBDA scanning + * Ingo Molnar : various cleanups and rewrites + * Maciej W. Rozycki: Bits for default MP configurations + * Paul Diefenbaugh: Added full ACPI support + */ + +#include <linux/mm.h> +#include <linux/irq.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/config.h> +#include <linux/bootmem.h> +#include <linux/smp_lock.h> +#include <linux/kernel_stat.h> +#include <linux/mc146818rtc.h> +#include <linux/acpi.h> + +#include <asm/smp.h> +#include <asm/mtrr.h> +#include <asm/mpspec.h> +#include <asm/pgalloc.h> +#include <asm/io_apic.h> +#include <asm/proto.h> + +/* Have we found an MP table */ +int smp_found_config; +unsigned int __initdata maxcpus = NR_CPUS; + +int acpi_found_madt; + +/* + * Various Linux-internal data structures created from the + * MP-table. + */ +int apic_version [MAX_APICS]; +unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL }; + +int mp_current_pci_id = 0; +/* I/O APIC entries */ +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* MP IRQ source entries */ +int mp_irq_entries; + +int nr_ioapics; +int pic_mode; +unsigned long mp_lapic_addr = 0; + + + +/* Processor that is doing the boot up */ +unsigned int boot_cpu_id = -1U; +/* Internal processor count */ +static unsigned int num_processors = 0; + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; + +/* ACPI MADT entry parsing functions */ +#ifdef CONFIG_ACPI_BOOT +extern struct acpi_boot_flags acpi_boot; +#ifdef CONFIG_X86_LOCAL_APIC +extern int acpi_parse_lapic (acpi_table_entry_header *header); +extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header); +extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header); +#endif /*CONFIG_X86_LOCAL_APIC*/ +#ifdef CONFIG_X86_IO_APIC +extern int acpi_parse_ioapic (acpi_table_entry_header *header); +#endif /*CONFIG_X86_IO_APIC*/ +#endif /*CONFIG_ACPI_BOOT*/ + +u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + + +/* + * Intel MP BIOS table parsing routines: + */ + +/* + * Checksum an MP configuration block. + */ + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum = 0; + + while (len--) + sum += *mp++; + + return sum & 0xFF; +} + +#ifndef CONFIG_XEN +static void __init MP_processor_info (struct mpc_config_processor *m) +{ + int ver; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) + return; + + printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8, + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4, + m->mpc_apicver); + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + Dprintk(" Bootup CPU\n"); + boot_cpu_id = m->mpc_apicid; + } + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + + num_processors++; + + if (m->mpc_apicid > MAX_APICS) { + printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", + m->mpc_apicid, MAX_APICS); + return; + } + ver = m->mpc_apicver; + + physid_set(m->mpc_apicid, phys_cpu_present_map); + /* + * Validate version + */ + if (ver == 0x0) { + printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + ver = 0x10; + } + apic_version[m->mpc_apicid] = ver; + bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; +} +#else +void __init MP_processor_info (struct mpc_config_processor *m) +{ + num_processors++; +} +#endif /* CONFIG_XEN */ + +static void __init MP_bus_info (struct mpc_config_bus *m) +{ + char str[7]; + + memcpy(str, m->mpc_bustype, 6); + str[6] = 0; + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + + if (strncmp(str, "ISA", 3) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; + } else if (strncmp(str, "EISA", 4) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else if (strncmp(str, "PCI", 3) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; + } else if (strncmp(str, "MCA", 3) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; + } else { + printk(KERN_ERR "Unknown bustype %s\n", str); + } +} + +static void __init MP_ioapic_info (struct mpc_config_ioapic *m) +{ + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + + printk("I/O APIC #%d Version %d at 0x%X.\n", + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", + MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); + } + if (!m->mpc_apicaddr) { + printk(KERN_ERR "WARNING: bogus zero I/O APIC address" + " found in MP table, skipping!\n"); + return; + } + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; +} + +static void __init MP_intsrc_info (struct mpc_config_intsrc *m) +{ + mp_irqs [mp_irq_entries] = *m; + Dprintk("Int: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) +{ + Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC LINT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); + /* + * Well it seems all SMP boards in existence + * use ExtINT/LVT1 == LINT0 and + * NMI/LVT2 == LINT1 - the following check + * will show us if this assumptions is false. + * Until then we do not have to add baggage. + */ + if ((m->mpc_irqtype == mp_ExtINT) && + (m->mpc_destapiclint != 0)) + BUG(); + if ((m->mpc_irqtype == mp_NMI) && + (m->mpc_destapiclint != 1)) + BUG(); +} + +/* + * Read/parse the MPC + */ + +static int __init smp_read_mpc(struct mp_config_table *mpc) +{ + char str[16]; + int count=sizeof(*mpc); + unsigned char *mpt=((unsigned char *)mpc)+count; + + if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { + printk("SMP mptable: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], + mpc->mpc_signature[3]); + return 0; + } + if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { + printk("SMP mptable: checksum error!\n"); + return 0; + } + if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { + printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", + mpc->mpc_spec); + return 0; + } + if (!mpc->mpc_lapic) { + printk(KERN_ERR "SMP mptable: null local APIC address!\n"); + return 0; + } + memcpy(str,mpc->mpc_oem,8); + str[8]=0; + printk(KERN_INFO "OEM ID: %s ",str); + + memcpy(str,mpc->mpc_productid,12); + str[12]=0; + printk(KERN_INFO "Product ID: %s ",str); + + printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic); + + /* save the local APIC address, it might be non-default */ + if (!acpi_lapic) + mp_lapic_addr = mpc->mpc_lapic; + + /* + * Now process the configuration blocks. + */ + while (count < mpc->mpc_length) { + switch(*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + if (!acpi_lapic) + MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m= + (struct mpc_config_bus *)mpt; + MP_bus_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_INTSRC: + { + struct mpc_config_intsrc *m= + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m= + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + } + } + clustered_apic_check(); + if (!num_processors) + printk(KERN_ERR "SMP mptable: no processors registered!\n"); + return num_processors; +} + +static int __init ELCR_trigger(unsigned int irq) +{ + unsigned int port; + + port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; +} + +static void __init construct_default_ioirq_mptable(int mpc_default_type) +{ + struct mpc_config_intsrc intsrc; + int i; + int ELCR_fallback = 0; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqflag = 0; /* conforming */ + intsrc.mpc_srcbus = 0; + intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + + intsrc.mpc_irqtype = mp_INT; + + /* + * If true, we have an ISA/PCI system with no IRQ entries + * in the MP table. To prevent the PCI interrupts from being set up + * incorrectly, we try to use the ELCR. The sanity check to see if + * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can + * never be level sensitive, so we simply see if the ELCR agrees. + * If it does, we assume it's valid. + */ + if (mpc_default_type == 5) { + printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); + + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) + printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n"); + else { + printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); + ELCR_fallback = 1; + } + } + + for (i = 0; i < 16; i++) { + switch (mpc_default_type) { + case 2: + if (i == 0 || i == 13) + continue; /* IRQ0 & IRQ13 not connected */ + /* fall through */ + default: + if (i == 2) + continue; /* IRQ2 is never connected */ + } + + if (ELCR_fallback) { + /* + * If the ELCR indicates a level-sensitive interrupt, we + * copy that information over to the MP table in the + * irqflag field (level sensitive, active high polarity). + */ + if (ELCR_trigger(i)) + intsrc.mpc_irqflag = 13; + else + intsrc.mpc_irqflag = 0; + } + + intsrc.mpc_srcbusirq = i; + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + MP_intsrc_info(&intsrc); + } + + intsrc.mpc_irqtype = mp_ExtINT; + intsrc.mpc_srcbusirq = 0; + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ + MP_intsrc_info(&intsrc); +} + +static inline void __init construct_default_ISA_mptable(int mpc_default_type) +{ + struct mpc_config_processor processor; + struct mpc_config_bus bus; + struct mpc_config_ioapic ioapic; + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | + boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + switch (mpc_default_type) { + default: + printk(KERN_ERR "???\nUnknown standard configuration %d\n", + mpc_default_type); + /* fall through */ + case 1: + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; + case 2: + case 6: + case 3: + memcpy(bus.mpc_bustype, "EISA ", 6); + break; + case 4: + case 7: + memcpy(bus.mpc_bustype, "MCA ", 6); + } + MP_bus_info(&bus); + if (mpc_default_type > 4) { + bus.mpc_busid = 1; + memcpy(bus.mpc_bustype, "PCI ", 6); + MP_bus_info(&bus); + } + + ioapic.mpc_type = MP_IOAPIC; + ioapic.mpc_apicid = 2; + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + ioapic.mpc_flags = MPC_APIC_USABLE; + ioapic.mpc_apicaddr = 0xFEC00000; + MP_ioapic_info(&ioapic); + + /* + * We set up most of the low 16 IO-APIC pins according to MPS rules. + */ + construct_default_ioirq_mptable(mpc_default_type); + + lintsrc.mpc_type = MP_LINTSRC; + lintsrc.mpc_irqflag = 0; /* conforming */ + lintsrc.mpc_srcbusid = 0; + lintsrc.mpc_srcbusirq = 0; + lintsrc.mpc_destapic = MP_APIC_ALL; + for (i = 0; i < 2; i++) { + lintsrc.mpc_irqtype = linttypes[i]; + lintsrc.mpc_destapiclint = i; + MP_lintsrc_info(&lintsrc); + } +} + +static struct intel_mp_floating *mpf_found; + +/* + * Scan the memory blocks for an SMP configuration block. + */ +void __init get_smp_config (void) +{ + struct intel_mp_floating *mpf = mpf_found; + + /* + * ACPI may be used to obtain the entire SMP configuration or just to + * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that + * ACPI supports both logical (e.g. Hyper-Threading) and physical + * processors, where MPS only supports physical. + */ + if (acpi_lapic && acpi_ioapic) { + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); + return; + } + else if (acpi_lapic) + printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); + + printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); + if (mpf->mpf_feature2 & (1<<7)) { + printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(KERN_INFO " Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } + + /* + * Now see if we need to read further. + */ + if (mpf->mpf_feature1 != 0) { + + printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); + construct_default_ISA_mptable(mpf->mpf_feature1); + + } else if (mpf->mpf_physptr) { + + /* + * Read the physical hardware table. Anything here will + * override the defaults. + */ + if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) { + smp_found_config = 0; + printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); + return; + } + /* + * If there are no explicit MP IRQ entries, then we are + * broken. We set up most of the low 16 IO-APIC pins to + * ISA defaults and hope it will work. + */ + if (!mp_irq_entries) { + struct mpc_config_bus bus; + + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + memcpy(bus.mpc_bustype, "ISA ", 6); + MP_bus_info(&bus); + + construct_default_ioirq_mptable(0); + } + + } else + BUG(); + + printk(KERN_INFO "Processors: %d\n", num_processors); + /* + * Only use the first configuration found. + */ +} + +static int __init smp_scan_config (unsigned long base, unsigned long length) +{ + extern void __bad_mpf_size(void); + unsigned int *bp = isa_bus_to_virt(base); + struct intel_mp_floating *mpf; + + Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); + if (sizeof(*mpf) != 16) + __bad_mpf_size(); + + while (length > 0) { + mpf = (struct intel_mp_floating *)bp; + if ((*bp == SMP_MAGIC_IDENT) && + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4)) ) { + + smp_found_config = 1; + mpf_found = mpf; + return 1; + } + bp += 4; + length -= 16; + } + return 0; +} + +void __init find_intel_smp (void) +{ + unsigned int address; + + /* + * FIXME: Linux assumes you have 640K of base ram.. + * this continues the error... + * + * 1) Scan the bottom 1K for a signature + * 2) Scan the top 1K of base RAM + * 3) Scan the 64K of bios + */ + if (smp_scan_config(0x0,0x400) || + smp_scan_config(639*0x400,0x400) || + smp_scan_config(0xF0000,0x10000)) + return; + /* + * If it is an SMP machine we should know now, unless the + * configuration is in an EISA/MCA bus machine with an + * extended bios data area. + * + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E, calculate and scan it here. + * + * NOTE! There are Linux loaders that will corrupt the EBDA + * area, and as such this kind of SMP config may be less + * trustworthy, simply because the SMP table may have been + * stomped on during early boot. These loaders are buggy and + * should be fixed. + */ + + address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + if (smp_scan_config(address, 0x1000)) + return; + + /* If we have come this far, we did not find an MP table */ + printk(KERN_INFO "No mptable found.\n"); +} + +/* + * - Intel MP Configuration Table + */ +void __init find_smp_config (void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + find_intel_smp(); +#endif +} + + +/* -------------------------------------------------------------------------- + ACPI-based MP Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_BOOT + +void __init mp_register_lapic_address ( + u64 address) +{ +#ifndef CONFIG_XEN + mp_lapic_addr = (unsigned long) address; + + if (boot_cpu_id == -1U) + boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + + Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); +#endif +} + + +void __init mp_register_lapic ( + u8 id, + u8 enabled) +{ + struct mpc_config_processor processor; + int boot_cpu = 0; + + if (id >= MAX_APICS) { + printk(KERN_WARNING "Processor #%d invalid (max %d)\n", + id, MAX_APICS); + return; + } + + if (id == boot_cpu_physical_apicid) + boot_cpu = 1; + +#ifndef CONFIG_XEN + processor.mpc_type = MP_PROCESSOR; + processor.mpc_apicid = id; + processor.mpc_apicver = 0x10; /* TBD: lapic version */ + processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); + processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; +#endif + + MP_processor_info(&processor); +} + +#ifdef CONFIG_X86_IO_APIC + +#define MP_ISA_BUS 0 +#define MP_MAX_IOAPIC_PIN 127 + +struct mp_ioapic_routing { + int apic_id; + int gsi_start; + int gsi_end; + u32 pin_programmed[4]; +} mp_ioapic_routing[MAX_IO_APICS]; + + +static int mp_find_ioapic ( + int gsi) +{ + int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { + if ((gsi >= mp_ioapic_routing[i].gsi_start) + && (gsi <= mp_ioapic_routing[i].gsi_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + + return -1; +} + + +void __init mp_register_ioapic ( + u8 id, + u32 address, + u32 gsi_base) +{ + int idx = 0; + + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in MADT table, skipping!\n"); + return; + } + + idx = nr_ioapics++; + + mp_ioapics[idx].mpc_type = MP_IOAPIC; + mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mpc_apicaddr = address; + + mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); + + /* + * Build basic IRQ lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI IRQs). + */ + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; + mp_ioapic_routing[idx].gsi_start = gsi_base; + mp_ioapic_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_start, + mp_ioapic_routing[idx].gsi_end); + + return; +} + + +void __init mp_override_legacy_irq ( + u8 bus_irq, + u8 polarity, + u8 trigger, + u32 gsi) +{ + struct mpc_config_intsrc intsrc; + int ioapic = -1; + int pin = -1; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return; + pin = gsi - mp_ioapic_routing[ioapic].gsi_start; + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqtype = mp_INT; + intsrc.mpc_irqflag = (trigger << 2) | polarity; + intsrc.mpc_srcbus = MP_ISA_BUS; + intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + intsrc.mpc_dstirq = pin; /* INTIN# */ + + Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", + intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); + + mp_irqs[mp_irq_entries] = intsrc; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); + + return; +} + + +void __init mp_config_acpi_legacy_irqs (void) +{ + struct mpc_config_intsrc intsrc; + int i = 0; + int ioapic = -1; + + /* + * Fabricate the legacy ISA bus (bus #31). + */ + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). + */ + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + return; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqflag = 0; /* Conforming */ + intsrc.mpc_srcbus = MP_ISA_BUS; + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; + + /* + * Use the default configuration for the IRQs 0-15. Unless + * overridden by (MADT) interrupt source override entries. + */ + for (i = 0; i < 16; i++) { + int idx; + + for (idx = 0; idx < mp_irq_entries; idx++) { + struct mpc_config_intsrc *irq = mp_irqs + idx; + + /* Do we already have a mapping for this ISA IRQ? */ + if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) + break; + + /* Do we already have a mapping for this IOAPIC pin */ + if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && + (irq->mpc_dstirq == i)) + break; + } + + if (idx != mp_irq_entries) { + printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); + continue; /* IRQ already used */ + } + + intsrc.mpc_irqtype = mp_INT; + intsrc.mpc_srcbusirq = i; /* Identity mapped */ + intsrc.mpc_dstirq = i; + + Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " + "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, + intsrc.mpc_dstirq); + + mp_irqs[mp_irq_entries] = intsrc; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); + } + + return; +} + +int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) +{ + int ioapic = -1; + int ioapic_pin = 0; + int idx, bit = 0; + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; + +#ifdef CONFIG_ACPI_BUS + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_fadt.sci_int == gsi) + return gsi; +#endif + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); + return gsi; + } + + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start; + + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + bit = ioapic_pin % 32; + idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); + if (idx > 3) { + printk(KERN_ERR "Invalid reference to IOAPIC pin " + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); + return gsi; + } + if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", + mp_ioapic_routing[ioapic].apic_id, ioapic_pin); + return gsi; + } + + mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, + active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); + return gsi; +} + +#endif /*CONFIG_X86_IO_APIC*/ +#endif /*CONFIG_ACPI_BOOT*/ diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c 2005-05-20 10:25:14 -07:00 @@ -675,14 +675,6 @@ init_memory_mapping(0, (end_pfn << PAGE_SHIFT)); -#ifdef CONFIG_ACPI_BOOT - /* - * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). - * Call this early for SRAT node setup. - */ - acpi_boot_table_init(); -#endif - #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. @@ -733,12 +725,6 @@ */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); -#endif #ifdef CONFIG_BLK_DEV_INITRD if (xen_start_info.mod_start) { if (LOADER_TYPE && INITRD_START) { @@ -759,7 +745,12 @@ } #endif paging_init(); - +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); +#endif /* Make sure we have a large enough P->M table. */ if (end_pfn > xen_start_info.nr_pages) { phys_to_machine_mapping = alloc_bootmem( @@ -789,18 +780,25 @@ #ifdef CONFIG_ACPI_BOOT /* + * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). + * Call this early for SRAT node setup. + */ + acpi_boot_table_init(); + + /* * Read APIC and some other early information from ACPI tables. */ acpi_boot_init(); #endif - #ifdef CONFIG_X86_LOCAL_APIC /* * get boot-time SMP configuration: */ if (smp_found_config) get_smp_config(); +#ifndef CONFIG_XEN init_apic_mappings(); +#endif #endif /* XXX Disable irqdebug until we have a way to avoid interrupt diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c 2005-05-20 10:25:14 -07:00 @@ -389,7 +389,6 @@ die(str, regs, err); } -#if 0 void die_nmi(char *str, struct pt_regs *regs) { oops_begin(); @@ -405,7 +404,6 @@ oops_end(); do_exit(SIGSEGV); } -#endif static void do_trap(int trapnr, int signr, char *str, struct pt_regs * regs, long error_code, siginfo_t *info) @@ -556,7 +554,6 @@ } } -#if 0 static void mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); @@ -630,7 +627,6 @@ outb(0x0f, 0x70); inb(0x71); /* dummy */ } -#endif asmlinkage void do_int3(struct pt_regs * regs, long error_code) { diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h 2005-05-20 10:25:14 -07:00 @@ -18,6 +18,7 @@ #include <asm/page.h> #include <asm/vsyscall.h> #include <asm/vsyscall32.h> +#include <asm/acpi.h> /* * Here we define all the compile-time 'special' virtual @@ -45,6 +46,11 @@ #ifdef CONFIG_X86_IO_APIC FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, +#endif +#ifdef CONFIG_ACPI_BOOT + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, + FIX_ACPI_RSDP_PAGE, #endif FIX_SHARED_INFO, FIX_GNTTAB_BEGIN, diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h 2005-05-20 10:25:14 -07:00 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h 2005-05-20 10:25:14 -07:00 @@ -44,8 +44,6 @@ * * Vectors 0xf0-0xfa are free (reserved for future Linux use). */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe #define INVALIDATE_TLB_VECTOR 0xfd #define RESCHEDULE_VECTOR 0xfc #define CALL_FUNCTION_VECTOR 0xfb @@ -58,6 +56,9 @@ */ #define LOCAL_TIMER_VECTOR 0xef #endif + +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe /* * First APIC vector available to drivers: (vectors 0x30-0xee) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |