[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merge
# HG changeset patch # User Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> # Date 1278413680 -3600 # Node ID db6234d3eafbd2d7b1469d8b98a13d6ab0b89973 # Parent ce278fdaced3ff898651657fda848c2b4daee648 # Parent 9d965ac1b0dbcb2f1fd4845e30753251d68d064f Merge --- xen/arch/x86/cpu/amd.h | 103 ----------- xen/arch/x86/hvm/vmx/vpmu.c | 119 ------------- tools/blktap2/drivers/tapdisk-vbd.c | 2 xen/Rules.mk | 2 xen/arch/x86/Makefile | 2 xen/arch/x86/apic.c | 198 +++++++++++++++++++--- xen/arch/x86/cpu/amd.c | 50 +++++ xen/arch/x86/domctl.c | 9 - xen/arch/x86/genapic/x2apic.c | 19 ++ xen/arch/x86/hvm/mtrr.c | 2 xen/arch/x86/hvm/svm/asid.c | 4 xen/arch/x86/hvm/svm/svm.c | 7 xen/arch/x86/hvm/vmx/vmcs.c | 4 xen/arch/x86/hvm/vmx/vmx.c | 5 xen/arch/x86/i8259.c | 20 ++ xen/arch/x86/io_apic.c | 120 +++++++++++++ xen/arch/x86/mm/hap/p2m-ept.c | 297 ++++++++++++++++++--------------- xen/arch/x86/setup.c | 9 - xen/common/memory.c | 2 xen/common/page_alloc.c | 8 xen/common/trace.c | 216 +++++++++++++++--------- xen/drivers/passthrough/vtd/dmar.c | 20 +- xen/drivers/passthrough/vtd/dmar.h | 1 xen/drivers/passthrough/vtd/extern.h | 3 xen/drivers/passthrough/vtd/intremap.c | 125 ++++++++++++- xen/drivers/passthrough/vtd/iommu.c | 54 ++---- xen/drivers/passthrough/vtd/qinval.c | 19 +- xen/drivers/passthrough/vtd/vtd.h | 3 xen/include/asm-x86/amd.h | 138 +++++++++++++++ xen/include/asm-x86/apic.h | 2 xen/include/asm-x86/debugger.h | 2 xen/include/asm-x86/domain.h | 2 xen/include/asm-x86/genapic.h | 1 xen/include/asm-x86/io_apic.h | 6 xen/include/asm-x86/irq.h | 2 xen/include/asm-x86/msr-index.h | 4 xen/include/asm-x86/mtrr.h | 2 xen/include/public/io/ring.h | 15 + xen/include/public/memory.h | 3 xen/include/xen/iommu.h | 2 xen/include/xen/mm.h | 2 xen/include/xen/trace.h | 14 - 42 files changed, 1073 insertions(+), 545 deletions(-) diff -r ce278fdaced3 -r db6234d3eafb tools/blktap2/drivers/tapdisk-vbd.c --- a/tools/blktap2/drivers/tapdisk-vbd.c Fri Jul 02 18:04:54 2010 +0100 +++ b/tools/blktap2/drivers/tapdisk-vbd.c Tue Jul 06 11:54:40 2010 +0100 @@ -1684,7 +1684,7 @@ tapdisk_vbd_check_ring_message(td_vbd_t if (!vbd->ring.sring) return -EINVAL; - switch (vbd->ring.sring->pad[0]) { + switch (vbd->ring.sring->private.tapif_user.msg) { case 0: return 0; diff -r ce278fdaced3 -r db6234d3eafb xen/Rules.mk --- a/xen/Rules.mk Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/Rules.mk Tue Jul 06 11:54:40 2010 +0100 @@ -8,7 +8,6 @@ perfc_arrays ?= n perfc_arrays ?= n lock_profile ?= n crash_debug ?= n -gdbsx ?= n frame_pointer ?= n XEN_ROOT=$(BASEDIR)/.. @@ -53,7 +52,6 @@ CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS CFLAGS-$(lock_profile) += -DLOCK_PROFILE CFLAGS-$(frame_pointer) += -fno-omit-frame-pointer -DCONFIG_FRAME_POINTER -CFLAGS-$(gdbsx) += -DXEN_GDBSX_CONFIG ifneq ($(max_phys_cpus),) CFLAGS-y += -DMAX_PHYS_CPUS=$(max_phys_cpus) diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/Makefile Tue Jul 06 11:54:40 2010 +0100 @@ -13,6 +13,7 @@ obj-y += clear_page.o obj-y += clear_page.o obj-y += copy_page.o obj-y += compat.o +obj-y += debug.o obj-y += delay.o obj-y += dmi_scan.o obj-y += domctl.o @@ -57,7 +58,6 @@ obj-y += bzimage.o obj-y += bzimage.o obj-$(crash_debug) += gdbstub.o -obj-$(gdbsx) += debug.o x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/apic.c Tue Jul 06 11:54:40 2010 +0100 @@ -70,6 +70,9 @@ int x2apic_enabled __read_mostly = 0; int x2apic_enabled __read_mostly = 0; int directed_eoi_enabled __read_mostly = 0; +/* x2APIC is enabled in BIOS */ +static int x2apic_preenabled; + /* * The following vectors are part of the Linux architecture, there * is no hardware IRQ pin equivalent for them, they are triggered @@ -487,6 +490,47 @@ static void apic_pm_activate(void) static void apic_pm_activate(void) { apic_pm_state.active = 1; +} + +static void resume_x2apic(void) +{ + uint64_t msr_content; + struct IO_APIC_route_entry **ioapic_entries = NULL; + + ASSERT(x2apic_enabled); + + ioapic_entries = alloc_ioapic_entries(); + if ( !ioapic_entries ) + { + printk("Allocate ioapic_entries failed\n"); + goto out; + } + + if ( save_IO_APIC_setup(ioapic_entries) ) + { + printk("Saving IO-APIC state failed\n"); + goto out; + } + + mask_8259A(); + mask_IO_APIC_setup(ioapic_entries); + + iommu_enable_IR(); + + rdmsrl(MSR_IA32_APICBASE, msr_content); + if ( !(msr_content & MSR_IA32_APICBASE_EXTD) ) + { + msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD; + msr_content = (uint32_t)msr_content; + wrmsrl(MSR_IA32_APICBASE, msr_content); + } + + restore_IO_APIC_setup(ioapic_entries); + unmask_8259A(); + +out: + if ( ioapic_entries ) + free_ioapic_entries(ioapic_entries); } void __devinit setup_local_APIC(void) @@ -727,7 +771,7 @@ int lapic_resume(void) msr_content | MSR_IA32_APICBASE_ENABLE | mp_lapic_addr); } else - enable_x2apic(); + resume_x2apic(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -894,35 +938,138 @@ no_apic: return -1; } -void enable_x2apic(void) +void check_x2apic_preenabled(void) { uint64_t msr_content; - if ( smp_processor_id() == 0 ) + if ( !x2apic_is_available() ) + return; + + rdmsrl(MSR_IA32_APICBASE, msr_content); + if ( msr_content & MSR_IA32_APICBASE_EXTD ) { - if ( !iommu_supports_eim() ) + printk("x2APIC mode is already enabled by BIOS.\n"); + x2apic_preenabled = 1; + x2apic_enabled = 1; + } +} + +static void enable_bsp_x2apic(void) +{ + struct IO_APIC_route_entry **ioapic_entries = NULL; + const struct genapic *x2apic_genapic = NULL; + + ASSERT(smp_processor_id() == 0); + + if ( x2apic_preenabled ) + { + /* + * Interrupt remapping should be also enabled by BIOS when + * x2APIC is already enabled by BIOS, otherwise it's a BIOS + * bug + */ + if ( !intremap_enabled() ) + panic("Interrupt remapping is not enabled by BIOS while " + "x2APIC is already enabled by BIOS!\n"); + } + + x2apic_genapic = apic_x2apic_probe(); + if ( x2apic_genapic ) + genapic = x2apic_genapic; + else + { + if ( x2apic_cmdline_disable() ) { - printk("x2APIC would not be enabled without EIM.\n"); - return; + if ( x2apic_preenabled ) + { + /* Ignore x2apic=0, and set default x2apic mode */ + genapic = &apic_x2apic_cluster; + printk("x2APIC: already enabled by BIOS, ignore x2apic=0.\n"); + } + else + { + printk("Not enable x2APIC due to x2apic=0 is set.\n"); + return; + } } - - if ( apic_x2apic_phys.probe() ) - genapic = &apic_x2apic_phys; - else if ( apic_x2apic_cluster.probe() ) - genapic = &apic_x2apic_cluster; else { - printk("x2APIC would not be enabled due to x2apic=off.\n"); - return; + if ( !iommu_enabled || !iommu_intremap || !iommu_qinval ) + panic("Cannot enable x2APIC due to iommu or interrupt " + "remapping or queued invalidation is disabled " + "by command line!\n"); + else + { + if ( x2apic_preenabled ) + panic("x2APIC: already enabled by BIOS, but " + "iommu_supports_eim fails\n"); + else + { + printk("Not enable x2APIC due to " + "iommu_supports_eim fails!\n"); + return; + } + } } - - x2apic_enabled = 1; - printk("Switched to APIC driver %s.\n", genapic->name); - } - else + } + + ioapic_entries = alloc_ioapic_entries(); + if ( !ioapic_entries ) { - BUG_ON(!x2apic_enabled); /* APs only enable x2apic when BSP did so. */ - } + printk("Allocate ioapic_entries failed\n"); + goto out; + } + + if ( save_IO_APIC_setup(ioapic_entries) ) + { + printk("Saving IO-APIC state failed\n"); + goto out; + } + + mask_8259A(); + mask_IO_APIC_setup(ioapic_entries); + + if ( iommu_enable_IR() ) + { + printk("Would not enable x2APIC due to interrupt remapping " + "cannot be enabled.\n"); + goto restore_out; + } + + x2apic_enabled = 1; + printk("Switched to APIC driver %s.\n", genapic->name); + + if ( !x2apic_preenabled ) + { + uint64_t msr_content; + rdmsrl(MSR_IA32_APICBASE, msr_content); + if ( !(msr_content & MSR_IA32_APICBASE_EXTD) ) + { + msr_content |= MSR_IA32_APICBASE_ENABLE | + MSR_IA32_APICBASE_EXTD; + msr_content = (uint32_t)msr_content; + wrmsrl(MSR_IA32_APICBASE, msr_content); + printk("x2APIC mode enabled.\n"); + } + } + +restore_out: + restore_IO_APIC_setup(ioapic_entries); + unmask_8259A(); + +out: + if ( ioapic_entries ) + free_ioapic_entries(ioapic_entries); +} + +static void enable_ap_x2apic(void) +{ + uint64_t msr_content; + + ASSERT(smp_processor_id() != 0); + + /* APs only enable x2apic when BSP did so. */ + BUG_ON(!x2apic_enabled); rdmsrl(MSR_IA32_APICBASE, msr_content); if ( !(msr_content & MSR_IA32_APICBASE_EXTD) ) @@ -930,10 +1077,15 @@ void enable_x2apic(void) msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD; msr_content = (uint32_t)msr_content; wrmsrl(MSR_IA32_APICBASE, msr_content); - printk("x2APIC mode enabled.\n"); - } + } +} + +void enable_x2apic(void) +{ + if ( smp_processor_id() == 0 ) + enable_bsp_x2apic(); else - printk("x2APIC mode enabled by BIOS.\n"); + enable_ap_x2apic(); } void __init init_apic_mappings(void) diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.c --- a/xen/arch/x86/cpu/amd.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/cpu/amd.c Tue Jul 06 11:54:40 2010 +0100 @@ -7,12 +7,12 @@ #include <asm/io.h> #include <asm/msr.h> #include <asm/processor.h> +#include <asm/amd.h> #include <asm/hvm/support.h> #include <asm/setup.h> /* amd_init_cpu */ #include <asm/acpi.h> #include "cpu.h" -#include "amd.h" /* * Pre-canned values for overriding the CPUID features @@ -148,6 +148,54 @@ static void __devinit set_cpuidmask(cons } /* + * Check for the presence of an AMD erratum. Arguments are defined in amd.h + * for each known erratum. Return 1 if erratum is found. + */ +int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw, ...) +{ + va_list ap; + u32 range; + u32 ms; + + if (cpu->x86_vendor != X86_VENDOR_AMD) + return 0; + + va_start(ap, osvw); + + if (osvw) { + u16 osvw_id = va_arg(ap, int); + + if (cpu_has(cpu, X86_FEATURE_OSVW)) { + u64 osvw_len; + rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len); + + if (osvw_id < osvw_len) { + u64 osvw_bits; + rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6), + osvw_bits); + + va_end(ap); + return (osvw_bits >> (osvw_id & 0x3f)) & 0x01; + } + } + } + + /* OSVW unavailable or ID unknown, match family-model-stepping range */ + ms = (cpu->x86_model << 8) | cpu->x86_mask; + while ((range = va_arg(ap, int))) { + if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && + (ms >= AMD_MODEL_RANGE_START(range)) && + (ms <= AMD_MODEL_RANGE_END(range))) { + va_end(ap); + return 1; + } + } + + va_end(ap); + return 0; +} + +/* * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush * filter on AMD 64-bit processors. */ diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.h --- a/xen/arch/x86/cpu/amd.h Fri Jul 02 18:04:54 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -/* - * amd.h - AMD processor specific definitions - */ - -#ifndef __AMD_H__ -#define __AMD_H__ - -#include <asm/cpufeature.h> - -/* CPUID masked for use by AMD-V Extended Migration */ - -#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32) -#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_)) - -/* Family 0Fh, Revision C */ -#define AMD_FEATURES_K8_REV_C_ECX 0 -#define AMD_FEATURES_K8_REV_C_EDX ( \ - __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ - __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ - __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ - __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ - __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ - __bit(X86_FEATURE_SEP) | __bit(X86_FEATURE_MTRR) | \ - __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ - __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ - __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_CLFLSH)| \ - __bit(X86_FEATURE_MMX) | __bit(X86_FEATURE_FXSR) | \ - __bit(X86_FEATURE_XMM) | __bit(X86_FEATURE_XMM2)) -#define AMD_EXTFEATURES_K8_REV_C_ECX 0 -#define AMD_EXTFEATURES_K8_REV_C_EDX ( \ - __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ - __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ - __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ - __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ - __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ - __bit(X86_FEATURE_SYSCALL) | __bit(X86_FEATURE_MTRR) | \ - __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ - __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ - __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_NX) | \ - __bit(X86_FEATURE_MMXEXT) | __bit(X86_FEATURE_MMX) | \ - __bit(X86_FEATURE_FXSR) | __bit(X86_FEATURE_LM) | \ - __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW)) - -/* Family 0Fh, Revision D */ -#define AMD_FEATURES_K8_REV_D_ECX AMD_FEATURES_K8_REV_C_ECX -#define AMD_FEATURES_K8_REV_D_EDX AMD_FEATURES_K8_REV_C_EDX -#define AMD_EXTFEATURES_K8_REV_D_ECX (AMD_EXTFEATURES_K8_REV_C_ECX |\ - __bit(X86_FEATURE_LAHF_LM)) -#define AMD_EXTFEATURES_K8_REV_D_EDX (AMD_EXTFEATURES_K8_REV_C_EDX |\ - __bit(X86_FEATURE_FFXSR)) - -/* Family 0Fh, Revision E */ -#define AMD_FEATURES_K8_REV_E_ECX (AMD_FEATURES_K8_REV_D_ECX | \ - __bit(X86_FEATURE_XMM3)) -#define AMD_FEATURES_K8_REV_E_EDX (AMD_FEATURES_K8_REV_D_EDX | \ - __bit(X86_FEATURE_HT)) -#define AMD_EXTFEATURES_K8_REV_E_ECX (AMD_EXTFEATURES_K8_REV_D_ECX |\ - __bit(X86_FEATURE_CMP_LEGACY)) -#define AMD_EXTFEATURES_K8_REV_E_EDX AMD_EXTFEATURES_K8_REV_D_EDX - -/* Family 0Fh, Revision F */ -#define AMD_FEATURES_K8_REV_F_ECX (AMD_FEATURES_K8_REV_E_ECX | \ - __bit(X86_FEATURE_CX16)) -#define AMD_FEATURES_K8_REV_F_EDX AMD_FEATURES_K8_REV_E_EDX -#define AMD_EXTFEATURES_K8_REV_F_ECX (AMD_EXTFEATURES_K8_REV_E_ECX |\ - __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) | \ - __bit(X86_FEATURE_ALTMOVCR)) -#define AMD_EXTFEATURES_K8_REV_F_EDX (AMD_EXTFEATURES_K8_REV_E_EDX |\ - __bit(X86_FEATURE_RDTSCP)) - -/* Family 0Fh, Revision G */ -#define AMD_FEATURES_K8_REV_G_ECX AMD_FEATURES_K8_REV_F_ECX -#define AMD_FEATURES_K8_REV_G_EDX AMD_FEATURES_K8_REV_F_EDX -#define AMD_EXTFEATURES_K8_REV_G_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ - __bit(X86_FEATURE_3DNOWPF)) -#define AMD_EXTFEATURES_K8_REV_G_EDX AMD_EXTFEATURES_K8_REV_F_EDX - -/* Family 10h, Revision B */ -#define AMD_FEATURES_FAM10h_REV_B_ECX (AMD_FEATURES_K8_REV_F_ECX | \ - __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT)) -#define AMD_FEATURES_FAM10h_REV_B_EDX AMD_FEATURES_K8_REV_F_EDX -#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ - __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) | \ - __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) | \ - __bit(X86_FEATURE_IBS)) -#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\ - __bit(X86_FEATURE_PAGE1GB)) - -/* Family 10h, Revision C */ -#define AMD_FEATURES_FAM10h_REV_C_ECX AMD_FEATURES_FAM10h_REV_B_ECX -#define AMD_FEATURES_FAM10h_REV_C_EDX AMD_FEATURES_FAM10h_REV_B_EDX -#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\ - __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT)) -#define AMD_EXTFEATURES_FAM10h_REV_C_EDX AMD_EXTFEATURES_FAM10h_REV_B_EDX - -/* Family 11h, Revision B */ -#define AMD_FEATURES_FAM11h_REV_B_ECX AMD_FEATURES_K8_REV_G_ECX -#define AMD_FEATURES_FAM11h_REV_B_EDX AMD_FEATURES_K8_REV_G_EDX -#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\ - __bit(X86_FEATURE_SKINIT)) -#define AMD_EXTFEATURES_FAM11h_REV_B_EDX AMD_EXTFEATURES_K8_REV_G_EDX - -#endif /* __AMD_H__ */ diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/domctl.c Tue Jul 06 11:54:40 2010 +0100 @@ -34,7 +34,6 @@ #include <public/mem_event.h> #include <asm/mem_sharing.h> -#ifdef XEN_GDBSX_CONFIG #ifdef XEN_KDB_CONFIG #include "../kdb/include/kdbdefs.h" #include "../kdb/include/kdbproto.h" @@ -43,8 +42,9 @@ typedef unsigned char kdbbyt_t; typedef unsigned char kdbbyt_t; extern int dbg_rw_mem(kdbva_t, kdbbyt_t *, int, domid_t, int, uint64_t); #endif -static int -gdbsx_guest_mem_io(domid_t domid, struct xen_domctl_gdbsx_memio *iop) + +static int gdbsx_guest_mem_io( + domid_t domid, struct xen_domctl_gdbsx_memio *iop) { ulong l_uva = (ulong)iop->uva; iop->remain = dbg_rw_mem( @@ -52,7 +52,6 @@ gdbsx_guest_mem_io(domid_t domid, struct iop->gwr, iop->pgd3val); return (iop->remain ? -EFAULT : 0); } -#endif /* XEN_GDBSX_CONFIG */ long arch_do_domctl( struct xen_domctl *domctl, @@ -1309,7 +1308,6 @@ long arch_do_domctl( } break; -#ifdef XEN_GDBSX_CONFIG case XEN_DOMCTL_gdbsx_guestmemio: { struct domain *d; @@ -1418,7 +1416,6 @@ long arch_do_domctl( rcu_unlock_domain(d); } break; -#endif /* XEN_GDBSX_CONFIG */ #ifdef __x86_64__ case XEN_DOMCTL_mem_event_op: diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/genapic/x2apic.c --- a/xen/arch/x86/genapic/x2apic.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/genapic/x2apic.c Tue Jul 06 11:54:40 2010 +0100 @@ -33,6 +33,11 @@ static int x2apic_phys; /* By default w static int x2apic_phys; /* By default we use logical cluster mode. */ boolean_param("x2apic_phys", x2apic_phys); +int x2apic_cmdline_disable(void) +{ + return (x2apic == 0); +} + static int probe_x2apic_phys(void) { return x2apic && x2apic_phys && x2apic_is_available() && @@ -54,6 +59,20 @@ const struct genapic apic_x2apic_cluster APIC_INIT("x2apic_cluster", probe_x2apic_cluster), GENAPIC_X2APIC_CLUSTER }; + +const struct genapic *apic_x2apic_probe(void) +{ + if ( !x2apic || !x2apic_is_available() ) + return NULL; + + if ( !iommu_supports_eim() ) + return NULL; + + if ( x2apic_phys ) + return &apic_x2apic_phys; + else + return &apic_x2apic_cluster; +} void init_apic_ldr_x2apic_phys(void) { diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/mtrr.c --- a/xen/arch/x86/hvm/mtrr.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/hvm/mtrr.c Tue Jul 06 11:54:40 2010 +0100 @@ -707,7 +707,7 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save 1, HVMSR_PER_VCPU); uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn, - uint8_t *ipat, int direct_mmio) + uint8_t *ipat, bool_t direct_mmio) { uint8_t gmtrr_mtype, hmtrr_mtype; uint32_t type; diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/asid.c --- a/xen/arch/x86/hvm/svm/asid.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/hvm/svm/asid.c Tue Jul 06 11:54:40 2010 +0100 @@ -21,14 +21,14 @@ #include <xen/lib.h> #include <xen/perfc.h> #include <asm/hvm/svm/asid.h> +#include <asm/amd.h> void svm_asid_init(struct cpuinfo_x86 *c) { int nasids = 0; /* Check for erratum #170, and leave ASIDs disabled if it's present. */ - if ( (c->x86 == 0x10) || - ((c->x86 == 0xf) && (c->x86_model >= 0x68) && (c->x86_mask >= 1)) ) + if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_170) ) nasids = cpuid_ebx(0x8000000A); hvm_asid_init(nasids); diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Jul 06 11:54:40 2010 +0100 @@ -34,6 +34,7 @@ #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> +#include <asm/amd.h> #include <asm/types.h> #include <asm/debugreg.h> #include <asm/msr.h> @@ -846,8 +847,8 @@ static void svm_init_erratum_383(struct { uint64_t msr_content; - /* only family 10h is affected */ - if ( c->x86 != 0x10 ) + /* check whether CPU is affected */ + if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_383) ) return; /* use safe methods to be compatible with nested virtualization */ @@ -1492,9 +1493,7 @@ asmlinkage void svm_vmexit_handler(struc if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 ) break; __update_guest_eip(regs, inst_len); -#ifdef XEN_GDBSX_CONFIG current->arch.gdbsx_vcpu_event = TRAP_int3; -#endif domain_pause_for_debugger(); break; diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Jul 06 11:54:40 2010 +0100 @@ -1064,8 +1064,10 @@ void vmx_do_resume(struct vcpu *v) * 1: flushing cache (wbinvd) when the guest is scheduled out if * there is no wbinvd exit, or * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits. + * If VT-d engine can force snooping, we don't need to do these. */ - if ( has_arch_pdevs(v->domain) && !cpu_has_wbinvd_exiting ) + if ( has_arch_pdevs(v->domain) && !iommu_snoop + && !cpu_has_wbinvd_exiting ) { int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Jul 06 11:54:40 2010 +0100 @@ -2089,6 +2089,9 @@ static void vmx_wbinvd_intercept(void) if ( !has_arch_mmios(current->domain) ) return; + if ( iommu_snoop ) + return; + if ( cpu_has_wbinvd_exiting ) on_each_cpu(wbinvd_ipi, NULL, 1); else @@ -2406,9 +2409,7 @@ asmlinkage void vmx_vmexit_handler(struc goto exit_and_crash; inst_len = __get_instruction_length(); /* Safe: INT3 */ __update_guest_eip(inst_len); -#ifdef XEN_GDBSX_CONFIG current->arch.gdbsx_vcpu_event = TRAP_int3; -#endif domain_pause_for_debugger(); break; case TRAP_no_device: diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vpmu.c --- a/xen/arch/x86/hvm/vmx/vpmu.c Fri Jul 02 18:04:54 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,119 +0,0 @@ -/* - * vpmu.c: PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan <haitao.shan@xxxxxxxxx> - */ - -#include <xen/config.h> -#include <xen/sched.h> -#include <asm/regs.h> -#include <asm/types.h> -#include <asm/msr.h> -#include <asm/hvm/support.h> -#include <asm/hvm/vmx/vmx.h> -#include <asm/hvm/vmx/vmcs.h> -#include <public/sched.h> -#include <public/hvm/save.h> -#include <asm/hvm/vmx/vpmu.h> - -static int __read_mostly opt_vpmu_enabled; -boolean_param("vpmu", opt_vpmu_enabled); - -int vpmu_do_wrmsr(struct cpu_user_regs *regs) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu->arch_vpmu_ops ) - return vpmu->arch_vpmu_ops->do_wrmsr(regs); - return 0; -} - -int vpmu_do_rdmsr(struct cpu_user_regs *regs) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu->arch_vpmu_ops ) - return vpmu->arch_vpmu_ops->do_rdmsr(regs); - return 0; -} - -int vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu->arch_vpmu_ops ) - return vpmu->arch_vpmu_ops->do_interrupt(regs); - return 0; -} - -void vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops ) - vpmu->arch_vpmu_ops->arch_vpmu_save(v); -} - -void vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops ) - vpmu->arch_vpmu_ops->arch_vpmu_load(v); -} - -extern struct arch_vpmu_ops core2_vpmu_ops; -void vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !opt_vpmu_enabled ) - return; - - if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED ) - vpmu_destroy(v); - - if ( current_cpu_data.x86 == 6 ) - { - switch ( current_cpu_data.x86_model ) - { - case 15: - case 23: - case 26: - case 29: - vpmu->arch_vpmu_ops = &core2_vpmu_ops; - break; - } - } - - if ( vpmu->arch_vpmu_ops != NULL ) - { - vpmu->flags = 0; - vpmu->context = NULL; - vpmu->arch_vpmu_ops->arch_vpmu_initialise(v); - } -} - -void vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops ) - vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); -} - diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/i8259.c Tue Jul 06 11:54:40 2010 +0100 @@ -173,6 +173,26 @@ int i8259A_irq_pending(unsigned int irq) spin_unlock_irqrestore(&i8259A_lock, flags); return ret; +} + +void mask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + outb(0xff, 0xA1); + outb(0xff, 0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void unmask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + outb(cached_A1, 0xA1); + outb(cached_21, 0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); } /* diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/io_apic.c Tue Jul 06 11:54:40 2010 +0100 @@ -134,6 +134,126 @@ static void __init replace_pin_at_irq(un break; entry = irq_2_pin + entry->next; } +} + +struct IO_APIC_route_entry **alloc_ioapic_entries(void) +{ + int apic; + struct IO_APIC_route_entry **ioapic_entries; + + ioapic_entries = xmalloc_array(struct IO_APIC_route_entry *, nr_ioapics); + if (!ioapic_entries) + return 0; + + for (apic = 0; apic < nr_ioapics; apic++) { + ioapic_entries[apic] = + xmalloc_array(struct IO_APIC_route_entry, + nr_ioapic_registers[apic]); + if (!ioapic_entries[apic]) + goto nomem; + } + + return ioapic_entries; + +nomem: + while (--apic >= 0) + xfree(ioapic_entries[apic]); + xfree(ioapic_entries); + + return 0; +} + +/* + * Saves all the IO-APIC RTE's + */ +int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) +{ + int apic, pin; + + if (!ioapic_entries) + return -ENOMEM; + + for (apic = 0; apic < nr_ioapics; apic++) { + if (!ioapic_entries[apic]) + return -ENOMEM; + + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + *(((int *)&ioapic_entries[apic][pin])+0) = + __io_apic_read(apic, 0x10+pin*2); + *(((int *)&ioapic_entries[apic][pin])+1) = + __io_apic_read(apic, 0x11+pin*2); + } + } + + return 0; +} + +/* + * Mask all IO APIC entries. + */ +void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) +{ + int apic, pin; + + if (!ioapic_entries) + return; + + for (apic = 0; apic < nr_ioapics; apic++) { + if (!ioapic_entries[apic]) + break; + + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + unsigned long flags; + + entry = ioapic_entries[apic][pin]; + if (!entry.mask) { + entry.mask = 1; + + spin_lock_irqsave(&ioapic_lock, flags); + __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + } + } +} + +/* + * Restore IO APIC entries which was saved in ioapic_entries. + */ +int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) +{ + int apic, pin; + unsigned long flags; + struct IO_APIC_route_entry entry; + + if (!ioapic_entries) + return -ENOMEM; + + for (apic = 0; apic < nr_ioapics; apic++) { + if (!ioapic_entries[apic]) + return -ENOMEM; + + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + entry = ioapic_entries[apic][pin]; + spin_lock_irqsave(&ioapic_lock, flags); + __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + return 0; +} + +void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) +{ + int apic; + + for (apic = 0; apic < nr_ioapics; apic++) + xfree(ioapic_entries[apic]); + + xfree(ioapic_entries); } static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Tue Jul 06 11:54:40 2010 +0100 @@ -118,6 +118,74 @@ static int ept_set_middle_entry(struct d return 1; } +/* free ept sub tree behind an entry */ +void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level) +{ + /* End if the entry is a leaf entry. */ + if ( level == 0 || !is_epte_present(ept_entry) || + is_epte_superpage(ept_entry) ) + return; + + if ( level > 1 ) + { + ept_entry_t *epte = map_domain_page(ept_entry->mfn); + for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) + ept_free_entry(d, epte + i, level - 1); + unmap_domain_page(epte); + } + + d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn)); +} + +static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry, + int level, int target) +{ + ept_entry_t new_ept, *table; + uint64_t trunk; + int rv = 1; + + /* End if the entry is a leaf entry or reaches the target level. */ + if ( level == 0 || level == target ) + return rv; + + ASSERT(is_epte_superpage(ept_entry)); + + if ( !ept_set_middle_entry(d, &new_ept) ) + return 0; + + table = map_domain_page(new_ept.mfn); + trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER); + + for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) + { + ept_entry_t *epte = table + i; + + epte->emt = ept_entry->emt; + epte->ipat = ept_entry->ipat; + epte->sp = (level > 1) ? 1 : 0; + epte->avail1 = ept_entry->avail1; + epte->avail2 = 0; + epte->mfn = ept_entry->mfn + i * trunk; + + ept_p2m_type_to_flags(epte, epte->avail1); + + if ( (level - 1) == target ) + continue; + + ASSERT(is_epte_superpage(epte)); + + if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) ) + break; + } + + unmap_domain_page(table); + + /* Even failed we should install the newly allocated ept page. */ + *ept_entry = new_ept; + + return rv; +} + /* Take the currently mapped table, find the corresponding gfn entry, * and map the next table, if available. If the entry is empty * and read_only is set, @@ -134,13 +202,18 @@ static int ept_set_middle_entry(struct d */ static int ept_next_level(struct domain *d, bool_t read_only, ept_entry_t **table, unsigned long *gfn_remainder, - u32 shift) -{ + int next_level) +{ + unsigned long mfn; ept_entry_t *ept_entry; - ept_entry_t *next; - u32 index; + u32 shift, index; + + shift = next_level * EPT_TABLE_ORDER; index = *gfn_remainder >> shift; + + /* index must be falling into the page */ + ASSERT(index < EPT_PAGETABLE_ENTRIES); ept_entry = (*table) + index; @@ -159,69 +232,12 @@ static int ept_next_level(struct domain /* The only time sp would be set here is if we had hit a superpage */ if ( is_epte_superpage(ept_entry) ) return GUEST_TABLE_SUPER_PAGE; - else - { - *gfn_remainder &= (1UL << shift) - 1; - next = map_domain_page(ept_entry->mfn); - unmap_domain_page(*table); - *table = next; - return GUEST_TABLE_NORMAL_PAGE; - } -} - -/* It's super page before and we should break down it now. */ -static int ept_split_large_page(struct domain *d, - ept_entry_t **table, u32 *index, - unsigned long gfn, int level) -{ - ept_entry_t *prev_table = *table; - ept_entry_t *split_table = NULL; - ept_entry_t *split_entry = NULL; - ept_entry_t *ept_entry = (*table) + (*index); - ept_entry_t temp_ept_entry; - unsigned long s_gfn, s_mfn; - unsigned long offset, trunk; - int i; - - /* alloc new page for new ept middle level entry which is - * before a leaf super entry - */ - - if ( !ept_set_middle_entry(d, &temp_ept_entry) ) - return 0; - - /* split the super page to small next level pages */ - split_table = map_domain_page(temp_ept_entry.mfn); - offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1); - trunk = (1UL << ((level-1) * EPT_TABLE_ORDER)); - - for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ ) - { - s_gfn = gfn - offset + i * trunk; - s_mfn = ept_entry->mfn + i * trunk; - - split_entry = split_table + i; - split_entry->emt = ept_entry->emt; - split_entry->ipat = ept_entry->ipat; - - split_entry->sp = (level > 1) ? 1 : 0; - - split_entry->mfn = s_mfn; - - split_entry->avail1 = ept_entry->avail1; - split_entry->avail2 = 0; - /* last step */ - split_entry->r = split_entry->w = split_entry->x = 1; - ept_p2m_type_to_flags(split_entry, ept_entry->avail1); - } - - *ept_entry = temp_ept_entry; - - *index = offset / trunk; - *table = split_table; - unmap_domain_page(prev_table); - - return 1; + + mfn = ept_entry->mfn; + unmap_domain_page(*table); + *table = map_domain_page(mfn); + *gfn_remainder &= (1UL << shift) - 1; + return GUEST_TABLE_NORMAL_PAGE; } /* @@ -229,56 +245,64 @@ static int ept_split_large_page(struct d * by observing whether any gfn->mfn translations are modified. */ static int -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, unsigned int order, p2m_type_t p2mt) { - ept_entry_t *table = NULL; + ept_entry_t *table, *ept_entry; unsigned long gfn_remainder = gfn; unsigned long offset = 0; - ept_entry_t *ept_entry = NULL; u32 index; - int i; + int i, target = order / EPT_TABLE_ORDER; int rv = 0; int ret = 0; - int split_level = 0; - int walk_level = order / EPT_TABLE_ORDER; - int direct_mmio = (p2mt == p2m_mmio_direct); + bool_t direct_mmio = (p2mt == p2m_mmio_direct); uint8_t ipat = 0; int need_modify_vtd_table = 1; int needs_sync = 1; - if ( order != 0 ) - if ( (gfn & ((1UL << order) - 1)) ) - return 1; + /* + * the caller must make sure: + * 1. passing valid gfn and mfn at order boundary. + * 2. gfn not exceeding guest physical address width. + * 3. passing a valid order. + */ + if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) || + (gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) || + (order % EPT_TABLE_ORDER) ) + return 0; + + ASSERT((target == 2 && hvm_hap_has_1gb(d)) || + (target == 1 && hvm_hap_has_2mb(d)) || + (target == 0)); table = map_domain_page(ept_get_asr(d)); ASSERT(table != NULL); - for ( i = ept_get_wl(d); i > walk_level; i-- ) - { - ret = ept_next_level(d, 0, &table, &gfn_remainder, i * EPT_TABLE_ORDER); + for ( i = ept_get_wl(d); i > target; i-- ) + { + ret = ept_next_level(d, 0, &table, &gfn_remainder, i); if ( !ret ) goto out; else if ( ret != GUEST_TABLE_NORMAL_PAGE ) break; } - /* If order == 0, we should only get POD if we have a POD superpage. - * If i > walk_level, we need to split the page; otherwise, - * just behave as normal. */ - ASSERT(ret != GUEST_TABLE_POD_PAGE || i != walk_level); - - index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order); - offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1))); - - split_level = i; + ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target); + + index = gfn_remainder >> (i * EPT_TABLE_ORDER); + offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1); ept_entry = table + index; - if ( i == walk_level ) - { - /* We reached the level we're looking for */ + /* + * When we are here, we must be on a leaf ept entry + * with i == target or i > target. + */ + + if ( i == target ) + { + /* We reached the target level. */ /* No need to flush if the old entry wasn't valid */ if ( !is_epte_present(ept_entry) ) @@ -291,15 +315,14 @@ ept_set_entry(struct domain *d, unsigned direct_mmio); ept_entry->ipat = ipat; ept_entry->sp = order ? 1 : 0; + ept_entry->avail1 = p2mt; + ept_entry->avail2 = 0; if ( ept_entry->mfn == mfn_x(mfn) ) need_modify_vtd_table = 0; else ept_entry->mfn = mfn_x(mfn); - ept_entry->avail1 = p2mt; - ept_entry->avail2 = 0; - ept_p2m_type_to_flags(ept_entry, p2mt); } else @@ -307,32 +330,51 @@ ept_set_entry(struct domain *d, unsigned } else { - int level; - ept_entry_t *split_ept_entry; - - for ( level = split_level; level > walk_level ; level-- ) - { - rv = ept_split_large_page(d, &table, &index, gfn, level); - if ( !rv ) - goto out; - } - - split_ept_entry = table + index; - split_ept_entry->avail1 = p2mt; - ept_p2m_type_to_flags(split_ept_entry, p2mt); - split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, - direct_mmio); - split_ept_entry->ipat = ipat; - - if ( split_ept_entry->mfn == mfn_x(mfn) ) - need_modify_vtd_table = 0; - else - split_ept_entry->mfn = mfn_x(mfn); + /* We need to split the original page. */ + ept_entry_t split_ept_entry; + + ASSERT(is_epte_superpage(ept_entry)); + + split_ept_entry = *ept_entry; + + if ( !ept_split_super_page(d, &split_ept_entry, i, target) ) + { + ept_free_entry(d, &split_ept_entry, i); + goto out; + } + + /* now install the newly split ept sub-tree */ + /* NB: please make sure domian is paused and no in-fly VT-d DMA. */ + *ept_entry = split_ept_entry; + + /* then move to the level we want to make real changes */ + for ( ; i > target; i-- ) + ept_next_level(d, 0, &table, &gfn_remainder, i); + + ASSERT(i == target); + + index = gfn_remainder >> (i * EPT_TABLE_ORDER); + offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1); + + ept_entry = table + index; + + ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio); + ept_entry->ipat = ipat; + ept_entry->sp = i ? 1 : 0; + ept_entry->avail1 = p2mt; + ept_entry->avail2 = 0; + + if ( ept_entry->mfn == mfn_x(mfn) ) + need_modify_vtd_table = 0; + else /* the caller should take care of the previous page */ + ept_entry->mfn = mfn_x(mfn); + + ept_p2m_type_to_flags(ept_entry, p2mt); } /* Track the highest gfn for which we have ever had a valid mapping */ - if ( mfn_valid(mfn_x(mfn)) - && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) ) + if ( mfn_valid(mfn_x(mfn)) && + (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) ) d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1; /* Success */ @@ -354,11 +396,11 @@ out: for ( i = 0; i < (1 << order); i++ ) iommu_map_page( d, gfn - offset + i, mfn_x(mfn) - offset + i, - IOMMUF_readable|IOMMUF_writable); + IOMMUF_readable | IOMMUF_writable); } else if ( !order ) iommu_map_page( - d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable); + d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable); } else { @@ -398,8 +440,7 @@ static mfn_t ept_get_entry(struct domain for ( i = ept_get_wl(d); i > 0; i-- ) { retry: - ret = ept_next_level(d, 1, &table, &gfn_remainder, - i * EPT_TABLE_ORDER); + ret = ept_next_level(d, 1, &table, &gfn_remainder, i); if ( !ret ) goto out; else if ( ret == GUEST_TABLE_POD_PAGE ) @@ -486,8 +527,7 @@ static ept_entry_t ept_get_entry_content for ( i = ept_get_wl(d); i > 0; i-- ) { - ret = ept_next_level(d, 1, &table, &gfn_remainder, - i * EPT_TABLE_ORDER); + ret = ept_next_level(d, 1, &table, &gfn_remainder, i); if ( !ret || ret == GUEST_TABLE_POD_PAGE ) goto out; else if ( ret == GUEST_TABLE_SUPER_PAGE ) @@ -559,7 +599,7 @@ static mfn_t ept_get_entry_current(unsig return ept_get_entry(current->domain, gfn, t, q); } -/* +/* * To test if the new emt type is the same with old, * return 1 to not to reset ept entry. */ @@ -569,14 +609,14 @@ static int need_modify_ept_entry(struct { uint8_t ipat; uint8_t emt; - int direct_mmio = (p2mt == p2m_mmio_direct); + bool_t direct_mmio = (p2mt == p2m_mmio_direct); emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio); if ( (emt == o_emt) && (ipat == o_ipat) ) return 0; - return 1; + return 1; } void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn, @@ -710,8 +750,7 @@ static void ept_dump_p2m_table(unsigned for ( i = ept_get_wl(d); i > 0; i-- ) { - ret = ept_next_level(d, 1, &table, &gfn_remainder, - i * EPT_TABLE_ORDER); + ret = ept_next_level(d, 1, &table, &gfn_remainder, i); if ( ret != GUEST_TABLE_NORMAL_PAGE ) break; } diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/arch/x86/setup.c Tue Jul 06 11:54:40 2010 +0100 @@ -909,6 +909,9 @@ void __init __start_xen(unsigned long mb tboot_probe(); + /* Check if x2APIC is already enabled in BIOS */ + check_x2apic_preenabled(); + /* Unmap the first page of CPU0's stack. */ memguard_guard_stack(cpu0_stack); @@ -926,9 +929,6 @@ void __init __start_xen(unsigned long mb generic_apic_probe(); acpi_boot_init(); - - if ( x2apic_is_available() ) - enable_x2apic(); init_cpu_to_node(); @@ -941,6 +941,9 @@ void __init __start_xen(unsigned long mb #endif init_apic_mappings(); + + if ( x2apic_is_available() ) + enable_x2apic(); init_IRQ(); diff -r ce278fdaced3 -r db6234d3eafb xen/common/memory.c --- a/xen/common/memory.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/common/memory.c Tue Jul 06 11:54:40 2010 +0100 @@ -545,6 +545,8 @@ long do_memory_op(unsigned long cmd, XEN } args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags)); + if (reservation.mem_flags & XENMEMF_exact_node_request) + args.memflags |= MEMF_exact_node; if ( op == XENMEM_populate_physmap && (reservation.mem_flags & XENMEMF_populate_on_demand) ) diff -r ce278fdaced3 -r db6234d3eafb xen/common/page_alloc.c --- a/xen/common/page_alloc.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/common/page_alloc.c Tue Jul 06 11:54:40 2010 +0100 @@ -300,11 +300,15 @@ static struct page_info *alloc_heap_page unsigned int i, j, zone = 0; unsigned int num_nodes = num_online_nodes(); unsigned long request = 1UL << order; + bool_t exact_node_request = !!(memflags & MEMF_exact_node); cpumask_t extra_cpus_mask, mask; struct page_info *pg; if ( node == NUMA_NO_NODE ) + { node = cpu_to_node(smp_processor_id()); + exact_node_request = 0; + } ASSERT(node >= 0); ASSERT(zone_lo <= zone_hi); @@ -345,6 +349,9 @@ static struct page_info *alloc_heap_page goto found; } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */ + if ( exact_node_request ) + goto not_found; + /* Pick next node, wrapping around if needed. */ node = next_node(node, node_online_map); if (node == MAX_NUMNODES) @@ -360,6 +367,7 @@ static struct page_info *alloc_heap_page return pg; } + not_found: /* No suitable memory blocks. Fail the request. */ spin_unlock(&heap_lock); return NULL; diff -r ce278fdaced3 -r db6234d3eafb xen/common/trace.c --- a/xen/common/trace.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/common/trace.c Tue Jul 06 11:54:40 2010 +0100 @@ -50,16 +50,15 @@ static struct t_info *t_info; static struct t_info *t_info; #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */ #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE)) -/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */ -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) / sizeof(uint32_t)) static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); -static int data_size; +static u32 data_size; +static u32 t_info_first_offset __read_mostly; /* High water mark for trace buffers; */ /* Send virtual interrupt when buffer level reaches this point */ -static int t_buf_highwater; +static u32 t_buf_highwater; /* Number of records lost due to per-CPU trace buffer being full. */ static DEFINE_PER_CPU(unsigned long, lost_records); @@ -75,13 +74,37 @@ static cpumask_t tb_cpu_mask = CPU_MASK_ /* which tracing events are enabled */ static u32 tb_event_mask = TRC_ALL; +/* Return the number of elements _type necessary to store at least _x bytes of data + * i.e., sizeof(_type) * ans >= _x. */ +#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type)) + +static void calc_tinfo_first_offset(void) +{ + int offset_in_bytes; + + offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]); + + t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes); + + gdprintk(XENLOG_INFO, "%s: NR_CPUs %d, offset_in_bytes %d, t_info_first_offset %u\n", + __func__, NR_CPUS, offset_in_bytes, (unsigned)t_info_first_offset); +} + /** * check_tbuf_size - check to make sure that the proposed size will fit - * in the currently sized struct t_info. - */ -static inline int check_tbuf_size(int size) -{ - return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / sizeof(uint32_t)); + * in the currently sized struct t_info and allows prod and cons to + * reach double the value without overflow. + */ +static int check_tbuf_size(u32 pages) +{ + struct t_buf dummy; + typeof(dummy.prod) size; + + size = ((typeof(dummy.prod))pages) * PAGE_SIZE; + + return (size / PAGE_SIZE != pages) + || (size + size < size) + || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE / sizeof(uint32_t)); } /** @@ -100,7 +123,7 @@ static int alloc_trace_bufs(void) unsigned long nr_pages; /* Start after a fixed-size array of NR_CPUS */ uint32_t *t_info_mfn_list = (uint32_t *)t_info; - int offset = T_INFO_FIRST_OFFSET; + int offset = t_info_first_offset; BUG_ON(check_tbuf_size(opt_tbuf_size)); @@ -115,7 +138,7 @@ static int alloc_trace_bufs(void) } t_info->tbuf_size = opt_tbuf_size; - printk("tbuf_size %d\n", t_info->tbuf_size); + printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size); nr_pages = opt_tbuf_size; order = get_order_from_pages(nr_pages); @@ -140,7 +163,7 @@ static int alloc_trace_bufs(void) spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); - buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf; + per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf; buf->cons = buf->prod = 0; per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); @@ -172,7 +195,7 @@ static int alloc_trace_bufs(void) /* Write list first, then write per-cpu offset. */ wmb(); t_info->mfn_offset[cpu]=offset; - printk("p%d mfn %"PRIx32" offset %d\n", + printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n", cpu, mfn, offset); offset+=i; } @@ -191,6 +214,7 @@ out_dealloc: spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) { + per_cpu(t_bufs, cpu) = NULL; ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); free_xenheap_pages(rawbuf, order); } @@ -293,6 +317,10 @@ void __init init_trace_bufs(void) void __init init_trace_bufs(void) { int i; + + /* Calculate offset in u32 of first mfn */ + calc_tinfo_first_offset(); + /* t_info size fixed at 2 pages for now. That should be big enough / small enough * until it's worth making it dynamic. */ t_info = alloc_xenheap_pages(1, 0); @@ -405,19 +433,39 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc return rc; } -static inline int calc_rec_size(int cycles, int extra) -{ - int rec_size; - rec_size = 4; +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) +{ + unsigned int rec_size = 4; + if ( cycles ) rec_size += 8; rec_size += extra; return rec_size; } -static inline int calc_unconsumed_bytes(struct t_buf *buf) -{ - int x = buf->prod - buf->cons; +static inline bool_t bogus(u32 prod, u32 cons) +{ + if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) || + unlikely(cons & 3) || unlikely(cons >= 2 * data_size) ) + { + tb_init_done = 0; + printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n", + smp_processor_id(), prod, cons); + return 1; + } + return 0; +} + +static inline u32 calc_unconsumed_bytes(const struct t_buf *buf) +{ + u32 prod = buf->prod, cons = buf->cons; + s32 x; + + barrier(); /* must read buf->prod and buf->cons only once */ + if ( bogus(prod, cons) ) + return data_size; + + x = prod - cons; if ( x < 0 ) x += 2*data_size; @@ -427,9 +475,16 @@ static inline int calc_unconsumed_bytes( return x; } -static inline int calc_bytes_to_wrap(struct t_buf *buf) -{ - int x = data_size - buf->prod; +static inline u32 calc_bytes_to_wrap(const struct t_buf *buf) +{ + u32 prod = buf->prod, cons = buf->cons; + s32 x; + + barrier(); /* must read buf->prod and buf->cons only once */ + if ( bogus(prod, cons) ) + return 0; + + x = data_size - prod; if ( x <= 0 ) x += data_size; @@ -439,54 +494,60 @@ static inline int calc_bytes_to_wrap(str return x; } -static inline int calc_bytes_avail(struct t_buf *buf) +static inline u32 calc_bytes_avail(const struct t_buf *buf) { return data_size - calc_unconsumed_bytes(buf); } -static inline struct t_rec * -next_record(struct t_buf *buf) -{ - int x = buf->prod; +static inline struct t_rec *next_record(const struct t_buf *buf, + uint32_t *next) +{ + u32 x = buf->prod, cons = buf->cons; + + barrier(); /* must read buf->prod and buf->cons only once */ + *next = x; + if ( !tb_init_done || bogus(x, cons) ) + return NULL; + if ( x >= data_size ) x -= data_size; - ASSERT(x >= 0); ASSERT(x < data_size); return (struct t_rec *)&this_cpu(t_data)[x]; } -static inline int __insert_record(struct t_buf *buf, - unsigned long event, - int extra, - int cycles, - int rec_size, - unsigned char *extra_data) +static inline void __insert_record(struct t_buf *buf, + unsigned long event, + unsigned int extra, + bool_t cycles, + unsigned int rec_size, + const void *extra_data) { struct t_rec *rec; unsigned char *dst; - unsigned long extra_word = extra/sizeof(u32); - int local_rec_size = calc_rec_size(cycles, extra); + unsigned int extra_word = extra / sizeof(u32); + unsigned int local_rec_size = calc_rec_size(cycles, extra); uint32_t next; BUG_ON(local_rec_size != rec_size); BUG_ON(extra & 3); + rec = next_record(buf, &next); + if ( !rec ) + return; /* Double-check once more that we have enough space. * Don't bugcheck here, in case the userland tool is doing * something stupid. */ - if ( calc_bytes_avail(buf) < rec_size ) - { - printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n", - __func__, - calc_bytes_avail(buf), - data_size, buf->prod, buf->cons, data_size, rec_size); - return 0; - } - rmb(); - - rec = next_record(buf); + if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) + { + if ( printk_ratelimit() ) + printk(XENLOG_WARNING + "%s: size=%08x prod=%08x cons=%08x rec=%u\n", + __func__, data_size, next, buf->cons, rec_size); + return; + } + rec->event = event; rec->extra_u32 = extra_word; dst = (unsigned char *)rec->u.nocycles.extra_u32; @@ -503,21 +564,19 @@ static inline int __insert_record(struct wmb(); - next = buf->prod + rec_size; + next += rec_size; if ( next >= 2*data_size ) next -= 2*data_size; - ASSERT(next >= 0); ASSERT(next < 2*data_size); buf->prod = next; - - return rec_size; -} - -static inline int insert_wrap_record(struct t_buf *buf, int size) -{ - int space_left = calc_bytes_to_wrap(buf); - unsigned long extra_space = space_left - sizeof(u32); - int cycles = 0; +} + +static inline void insert_wrap_record(struct t_buf *buf, + unsigned int size) +{ + u32 space_left = calc_bytes_to_wrap(buf); + unsigned int extra_space = space_left - sizeof(u32); + bool_t cycles = 0; BUG_ON(space_left > size); @@ -529,17 +588,13 @@ static inline int insert_wrap_record(str ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); } - return __insert_record(buf, - TRC_TRACE_WRAP_BUFFER, - extra_space, - cycles, - space_left, - NULL); + __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles, + space_left, NULL); } #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */ -static inline int insert_lost_records(struct t_buf *buf) +static inline void insert_lost_records(struct t_buf *buf) { struct { u32 lost_records; @@ -554,12 +609,8 @@ static inline int insert_lost_records(st this_cpu(lost_records) = 0; - return __insert_record(buf, - TRC_LOST_RECORDS, - sizeof(ed), - 1 /* cycles */, - LOST_REC_SIZE, - (unsigned char *)&ed); + __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */, + LOST_REC_SIZE, &ed); } /* @@ -581,13 +632,15 @@ static DECLARE_TASKLET(trace_notify_dom0 * failure, otherwise 0. Failure occurs only if the trace buffers are not yet * initialised. */ -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) +void __trace_var(u32 event, bool_t cycles, unsigned int extra, + const void *extra_data) { struct t_buf *buf; - unsigned long flags, bytes_to_tail, bytes_to_wrap; - int rec_size, total_size; - int extra_word; - int started_below_highwater = 0; + unsigned long flags; + u32 bytes_to_tail, bytes_to_wrap; + unsigned int rec_size, total_size; + unsigned int extra_word; + bool_t started_below_highwater; if( !tb_init_done ) return; @@ -626,7 +679,11 @@ void __trace_var(u32 event, int cycles, buf = this_cpu(t_bufs); if ( unlikely(!buf) ) + { + /* Make gcc happy */ + started_below_highwater = 0; goto unlock; + } started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); @@ -707,8 +764,9 @@ unlock: spin_unlock_irqrestore(&this_cpu(t_lock), flags); /* Notify trace buffer consumer that we've crossed the high water mark. */ - if ( started_below_highwater && - (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) + if ( likely(buf!=NULL) + && started_below_highwater + && (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) tasklet_schedule(&trace_notify_dom0_tasklet); } diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.c --- a/xen/drivers/passthrough/vtd/dmar.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/drivers/passthrough/vtd/dmar.c Tue Jul 06 11:54:40 2010 +0100 @@ -32,6 +32,7 @@ #include "dmar.h" #include "iommu.h" #include "extern.h" +#include "vtd.h" #undef PREFIX #define PREFIX VTDPREFIX "ACPI DMAR:" @@ -378,7 +379,6 @@ acpi_parse_one_drhd(struct acpi_dmar_ent struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header; void *dev_scope_start, *dev_scope_end; struct acpi_drhd_unit *dmaru; - void *addr; int ret; static int include_all = 0; @@ -397,8 +397,9 @@ acpi_parse_one_drhd(struct acpi_dmar_ent dprintk(VTDPREFIX, " dmaru->address = %"PRIx64"\n", dmaru->address); - addr = map_to_nocache_virt(0, drhd->address); - dmaru->ecap = dmar_readq(addr, DMAR_ECAP_REG); + ret = iommu_alloc(dmaru); + if ( ret ) + goto out; dev_scope_start = (void *)(drhd + 1); dev_scope_end = ((void *)drhd) + header->length; @@ -420,7 +421,7 @@ acpi_parse_one_drhd(struct acpi_dmar_ent } if ( ret ) - xfree(dmaru); + goto out; else if ( force_iommu || dmaru->include_all ) acpi_register_drhd_unit(dmaru); else @@ -451,14 +452,15 @@ acpi_parse_one_drhd(struct acpi_dmar_ent if ( invalid_cnt ) { - xfree(dmaru); - if ( iommu_workaround_bios_bug && invalid_cnt == dmaru->scope.devices_cnt ) { dprintk(XENLOG_WARNING VTDPREFIX, " Workaround BIOS bug: ignore the DRHD due to all " "devices under its scope are not PCI discoverable!\n"); + + iommu_free(dmaru); + xfree(dmaru); } else { @@ -474,6 +476,12 @@ acpi_parse_one_drhd(struct acpi_dmar_ent acpi_register_drhd_unit(dmaru); } +out: + if ( ret ) + { + iommu_free(dmaru); + xfree(dmaru); + } return ret; } diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.h --- a/xen/drivers/passthrough/vtd/dmar.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/drivers/passthrough/vtd/dmar.h Tue Jul 06 11:54:40 2010 +0100 @@ -50,7 +50,6 @@ struct acpi_drhd_unit { struct dmar_scope scope; /* must be first member of struct */ struct list_head list; u64 address; /* register base address of the unit */ - u64 ecap; u8 include_all:1; struct iommu *iommu; struct list_head ioapic_list; diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/extern.h --- a/xen/drivers/passthrough/vtd/extern.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/drivers/passthrough/vtd/extern.h Tue Jul 06 11:54:40 2010 +0100 @@ -33,7 +33,7 @@ extern struct keyhandler dump_iommu_info int enable_qinval(struct iommu *iommu); void disable_qinval(struct iommu *iommu); -int enable_intremap(struct iommu *iommu); +int enable_intremap(struct iommu *iommu, int eim); void disable_intremap(struct iommu *iommu); int queue_invalidate_context(struct iommu *iommu, u16 did, u16 source_id, u8 function_mask, u8 granu); @@ -44,6 +44,7 @@ int invalidate_sync(struct iommu *iommu) int invalidate_sync(struct iommu *iommu); int iommu_flush_iec_global(struct iommu *iommu); int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx); +void clear_fault_bits(struct iommu *iommu); struct iommu * ioapic_to_iommu(unsigned int apic_id); struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id); struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu); diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/intremap.c --- a/xen/drivers/passthrough/vtd/intremap.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/drivers/passthrough/vtd/intremap.c Tue Jul 06 11:54:40 2010 +0100 @@ -134,18 +134,26 @@ int iommu_supports_eim(void) if ( !iommu_enabled || !iommu_qinval || !iommu_intremap ) return 0; + if ( list_empty(&acpi_drhd_units) ) + { + dprintk(XENLOG_WARNING VTDPREFIX, "VT-d is not supported\n"); + return 0; + } + /* We MUST have a DRHD unit for each IOAPIC. */ for ( apic = 0; apic < nr_ioapics; apic++ ) if ( !ioapic_to_drhd(IO_APIC_ID(apic)) ) + { + dprintk(XENLOG_WARNING VTDPREFIX, + "There is not a DRHD for IOAPIC 0x%x (id: 0x%x)!\n", + apic, IO_APIC_ID(apic)); return 0; - - if ( list_empty(&acpi_drhd_units) ) - return 0; + } for_each_drhd_unit ( drhd ) - if ( !ecap_queued_inval(drhd->ecap) || - !ecap_intr_remap(drhd->ecap) || - !ecap_eim(drhd->ecap) ) + if ( !ecap_queued_inval(drhd->iommu->ecap) || + !ecap_intr_remap(drhd->iommu->ecap) || + !ecap_eim(drhd->iommu->ecap) ) return 0; return 1; @@ -706,7 +714,7 @@ void msi_msg_write_remap_rte( } #endif -int enable_intremap(struct iommu *iommu) +int enable_intremap(struct iommu *iommu, int eim) { struct acpi_drhd_unit *drhd; struct ir_ctrl *ir_ctrl; @@ -716,10 +724,25 @@ int enable_intremap(struct iommu *iommu) ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap); ir_ctrl = iommu_ir_ctrl(iommu); + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + + /* Return if already enabled by Xen */ + if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr ) + return 0; + + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if ( !(sts & DMA_GSTS_QIES) ) + { + dprintk(XENLOG_ERR VTDPREFIX, + "Queued invalidation is not enabled, should not enable " + "interrupt remapping\n"); + return -EINVAL; + } + if ( ir_ctrl->iremap_maddr == 0 ) { drhd = iommu_to_drhd(iommu); - ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR ); + ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR); if ( ir_ctrl->iremap_maddr == 0 ) { dprintk(XENLOG_WARNING VTDPREFIX, @@ -732,7 +755,7 @@ int enable_intremap(struct iommu *iommu) #ifdef CONFIG_X86 /* set extended interrupt mode bit */ ir_ctrl->iremap_maddr |= - x2apic_enabled ? (1 << IRTA_REG_EIME_SHIFT) : 0; + eim ? (1 << IRTA_REG_EIME_SHIFT) : 0; #endif spin_lock_irqsave(&iommu->register_lock, flags); @@ -769,13 +792,95 @@ void disable_intremap(struct iommu *iomm u32 sts; unsigned long flags; - ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap); + if ( !ecap_intr_remap(iommu->ecap) ) + return; spin_lock_irqsave(&iommu->register_lock, flags); sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if ( !(sts & DMA_GSTS_IRES) ) + goto out; + dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE)); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, !(sts & DMA_GSTS_IRES), sts); +out: spin_unlock_irqrestore(&iommu->register_lock, flags); } + +/* + * This function is used to enable Interrutp remapping when + * enable x2apic + */ +int iommu_enable_IR(void) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + + if ( !iommu_supports_eim() ) + return -1; + + for_each_drhd_unit ( drhd ) + { + struct qi_ctrl *qi_ctrl = NULL; + + iommu = drhd->iommu; + qi_ctrl = iommu_qi_ctrl(iommu); + + /* Clear previous faults */ + clear_fault_bits(iommu); + + /* + * Disable interrupt remapping and queued invalidation if + * already enabled by BIOS + */ + disable_intremap(iommu); + disable_qinval(iommu); + } + + /* Enable queue invalidation */ + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + if ( enable_qinval(iommu) != 0 ) + { + dprintk(XENLOG_INFO VTDPREFIX, + "Failed to enable Queued Invalidation!\n"); + return -1; + } + } + + /* Enable interrupt remapping */ + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + if ( enable_intremap(iommu, 1) ) + { + dprintk(XENLOG_INFO VTDPREFIX, + "Failed to enable Interrupt Remapping!\n"); + return -1; + } + } + + return 0; +} + +/* + * Check if interrupt remapping is enabled or not + * return 1: enabled + * return 0: not enabled + */ +int intremap_enabled(void) +{ + struct acpi_drhd_unit *drhd; + u32 sts; + + for_each_drhd_unit ( drhd ) + { + sts = dmar_readl(drhd->iommu->reg, DMAR_GSTS_REG); + if ( !(sts & DMA_GSTS_IRES) ) + return 0; + } + + return 1; +} diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Jul 06 11:54:40 2010 +0100 @@ -144,14 +144,17 @@ struct iommu_flush *iommu_get_flush(stru return iommu ? &iommu->intel->flush : NULL; } -static unsigned int clflush_size; static int iommus_incoherent; static void __iommu_flush_cache(void *addr, unsigned int size) { int i; + static unsigned int clflush_size = 0; if ( !iommus_incoherent ) return; + + if ( clflush_size == 0 ) + clflush_size = get_cache_line_size(); for ( i = 0; i < size; i += clflush_size ) cacheline_flush((char *)addr + i); @@ -1037,7 +1040,7 @@ static int iommu_set_interrupt(struct io return irq; } -static int __init iommu_alloc(struct acpi_drhd_unit *drhd) +int __init iommu_alloc(struct acpi_drhd_unit *drhd) { struct iommu *iommu; unsigned long sagaw, nr_dom; @@ -1131,7 +1134,7 @@ static int __init iommu_alloc(struct acp return 0; } -static void __init iommu_free(struct acpi_drhd_unit *drhd) +void __init iommu_free(struct acpi_drhd_unit *drhd) { struct iommu *iommu = drhd->iommu; @@ -1787,7 +1790,7 @@ static void setup_dom0_devices(struct do spin_unlock(&pcidevs_lock); } -static void clear_fault_bits(struct iommu *iommu) +void clear_fault_bits(struct iommu *iommu) { u64 val; unsigned long flags; @@ -1831,24 +1834,20 @@ static int init_vtd_hw(void) spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FECTL_REG, 0); spin_unlock_irqrestore(&iommu->register_lock, flags); - - /* initialize flush functions */ - flush = iommu_get_flush(iommu); - flush->context = flush_context_reg; - flush->iotlb = flush_iotlb_reg; - } - - if ( iommu_qinval ) - { - for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - if ( enable_qinval(iommu) != 0 ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "Failed to enable Queued Invalidation!\n"); - break; - } + } + + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + /* + * If queued invalidation not enabled, use regiser based + * invalidation + */ + if ( enable_qinval(iommu) != 0 ) + { + flush = iommu_get_flush(iommu); + flush->context = flush_context_reg; + flush->iotlb = flush_iotlb_reg; } } @@ -1874,9 +1873,9 @@ static int init_vtd_hw(void) for_each_drhd_unit ( drhd ) { iommu = drhd->iommu; - if ( enable_intremap(iommu) != 0 ) + if ( enable_intremap(iommu, 0) != 0 ) { - dprintk(XENLOG_INFO VTDPREFIX, + dprintk(XENLOG_WARNING VTDPREFIX, "Failed to enable Interrupt Remapping!\n"); break; } @@ -1943,8 +1942,6 @@ int __init intel_vtd_setup(void) platform_quirks(); - clflush_size = get_cache_line_size(); - irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs); BUG_ON(!irq_to_iommu); memset(irq_to_iommu, 0, nr_irqs * sizeof(struct iommu*)); @@ -1958,9 +1955,6 @@ int __init intel_vtd_setup(void) */ for_each_drhd_unit ( drhd ) { - if ( iommu_alloc(drhd) != 0 ) - goto error; - iommu = drhd->iommu; if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) ) @@ -2000,8 +1994,6 @@ int __init intel_vtd_setup(void) return 0; error: - for_each_drhd_unit ( drhd ) - iommu_free(drhd); iommu_enabled = 0; iommu_snoop = 0; iommu_passthrough = 0; diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/qinval.c --- a/xen/drivers/passthrough/vtd/qinval.c Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/drivers/passthrough/vtd/qinval.c Tue Jul 06 11:54:40 2010 +0100 @@ -437,10 +437,16 @@ int enable_qinval(struct iommu *iommu) u32 sts; unsigned long flags; + if ( !ecap_queued_inval(iommu->ecap) || !iommu_qinval ) + return -ENOENT; + qi_ctrl = iommu_qi_ctrl(iommu); flush = iommu_get_flush(iommu); - ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval); + /* Return if already enabled by Xen */ + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if ( (sts & DMA_GSTS_QIES) && qi_ctrl->qinval_maddr ) + return 0; if ( qi_ctrl->qinval_maddr == 0 ) { @@ -488,14 +494,19 @@ void disable_qinval(struct iommu *iommu) u32 sts; unsigned long flags; - ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval); + if ( !ecap_queued_inval(iommu->ecap) ) + return; spin_lock_irqsave(&iommu->register_lock, flags); sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if ( !(sts & DMA_GSTS_QIES) ) + goto out; + dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_QIE)); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, !(sts & DMA_GSTS_QIES), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); -} +out: + spin_unlock_irqrestore(&iommu->register_lock, flags); +} diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/vtd.h --- a/xen/drivers/passthrough/vtd/vtd.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/drivers/passthrough/vtd/vtd.h Tue Jul 06 11:54:40 2010 +0100 @@ -108,4 +108,7 @@ void iommu_flush_cache_entry(void *addr, void iommu_flush_cache_entry(void *addr, unsigned int size); void iommu_flush_cache_page(void *addr, unsigned long npages); +int iommu_alloc(struct acpi_drhd_unit *drhd); +void iommu_free(struct acpi_drhd_unit *drhd); + #endif // _VTD_H_ diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/amd.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/amd.h Tue Jul 06 11:54:40 2010 +0100 @@ -0,0 +1,138 @@ +/* + * amd.h - AMD processor specific definitions + */ + +#ifndef __AMD_H__ +#define __AMD_H__ + +#include <asm/cpufeature.h> + +/* CPUID masked for use by AMD-V Extended Migration */ + +#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32) +#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_)) + +/* Family 0Fh, Revision C */ +#define AMD_FEATURES_K8_REV_C_ECX 0 +#define AMD_FEATURES_K8_REV_C_EDX ( \ + __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ + __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ + __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ + __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ + __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ + __bit(X86_FEATURE_SEP) | __bit(X86_FEATURE_MTRR) | \ + __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ + __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ + __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_CLFLSH)| \ + __bit(X86_FEATURE_MMX) | __bit(X86_FEATURE_FXSR) | \ + __bit(X86_FEATURE_XMM) | __bit(X86_FEATURE_XMM2)) +#define AMD_EXTFEATURES_K8_REV_C_ECX 0 +#define AMD_EXTFEATURES_K8_REV_C_EDX ( \ + __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ + __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ + __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ + __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ + __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ + __bit(X86_FEATURE_SYSCALL) | __bit(X86_FEATURE_MTRR) | \ + __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ + __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ + __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_NX) | \ + __bit(X86_FEATURE_MMXEXT) | __bit(X86_FEATURE_MMX) | \ + __bit(X86_FEATURE_FXSR) | __bit(X86_FEATURE_LM) | \ + __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW)) + +/* Family 0Fh, Revision D */ +#define AMD_FEATURES_K8_REV_D_ECX AMD_FEATURES_K8_REV_C_ECX +#define AMD_FEATURES_K8_REV_D_EDX AMD_FEATURES_K8_REV_C_EDX +#define AMD_EXTFEATURES_K8_REV_D_ECX (AMD_EXTFEATURES_K8_REV_C_ECX |\ + __bit(X86_FEATURE_LAHF_LM)) +#define AMD_EXTFEATURES_K8_REV_D_EDX (AMD_EXTFEATURES_K8_REV_C_EDX |\ + __bit(X86_FEATURE_FFXSR)) + +/* Family 0Fh, Revision E */ +#define AMD_FEATURES_K8_REV_E_ECX (AMD_FEATURES_K8_REV_D_ECX | \ + __bit(X86_FEATURE_XMM3)) +#define AMD_FEATURES_K8_REV_E_EDX (AMD_FEATURES_K8_REV_D_EDX | \ + __bit(X86_FEATURE_HT)) +#define AMD_EXTFEATURES_K8_REV_E_ECX (AMD_EXTFEATURES_K8_REV_D_ECX |\ + __bit(X86_FEATURE_CMP_LEGACY)) +#define AMD_EXTFEATURES_K8_REV_E_EDX AMD_EXTFEATURES_K8_REV_D_EDX + +/* Family 0Fh, Revision F */ +#define AMD_FEATURES_K8_REV_F_ECX (AMD_FEATURES_K8_REV_E_ECX | \ + __bit(X86_FEATURE_CX16)) +#define AMD_FEATURES_K8_REV_F_EDX AMD_FEATURES_K8_REV_E_EDX +#define AMD_EXTFEATURES_K8_REV_F_ECX (AMD_EXTFEATURES_K8_REV_E_ECX |\ + __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) | \ + __bit(X86_FEATURE_ALTMOVCR)) +#define AMD_EXTFEATURES_K8_REV_F_EDX (AMD_EXTFEATURES_K8_REV_E_EDX |\ + __bit(X86_FEATURE_RDTSCP)) + +/* Family 0Fh, Revision G */ +#define AMD_FEATURES_K8_REV_G_ECX AMD_FEATURES_K8_REV_F_ECX +#define AMD_FEATURES_K8_REV_G_EDX AMD_FEATURES_K8_REV_F_EDX +#define AMD_EXTFEATURES_K8_REV_G_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ + __bit(X86_FEATURE_3DNOWPF)) +#define AMD_EXTFEATURES_K8_REV_G_EDX AMD_EXTFEATURES_K8_REV_F_EDX + +/* Family 10h, Revision B */ +#define AMD_FEATURES_FAM10h_REV_B_ECX (AMD_FEATURES_K8_REV_F_ECX | \ + __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT)) +#define AMD_FEATURES_FAM10h_REV_B_EDX AMD_FEATURES_K8_REV_F_EDX +#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ + __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) | \ + __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) | \ + __bit(X86_FEATURE_IBS)) +#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\ + __bit(X86_FEATURE_PAGE1GB)) + +/* Family 10h, Revision C */ +#define AMD_FEATURES_FAM10h_REV_C_ECX AMD_FEATURES_FAM10h_REV_B_ECX +#define AMD_FEATURES_FAM10h_REV_C_EDX AMD_FEATURES_FAM10h_REV_B_EDX +#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\ + __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT)) +#define AMD_EXTFEATURES_FAM10h_REV_C_EDX AMD_EXTFEATURES_FAM10h_REV_B_EDX + +/* Family 11h, Revision B */ +#define AMD_FEATURES_FAM11h_REV_B_ECX AMD_FEATURES_K8_REV_G_ECX +#define AMD_FEATURES_FAM11h_REV_B_EDX AMD_FEATURES_K8_REV_G_EDX +#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\ + __bit(X86_FEATURE_SKINIT)) +#define AMD_EXTFEATURES_FAM11h_REV_B_EDX AMD_EXTFEATURES_K8_REV_G_EDX + +/* AMD errata checking + * + * Errata are defined using the AMD_LEGACY_ERRATUM() or AMD_OSVW_ERRATUM() + * macros. The latter is intended for newer errata that have an OSVW id + * assigned, which it takes as first argument. Both take a variable number + * of family-specific model-stepping ranges created by AMD_MODEL_RANGE(). + * + * Example 1: + * #define AMD_ERRATUM_319 \ + * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), \ + * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), \ + * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)) + * Example 2: + * #define AMD_ERRATUM_400 \ + * AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), \ + * AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)) + * + */ + +#define AMD_LEGACY_ERRATUM(...) 0 /* legacy */, __VA_ARGS__, 0 +#define AMD_OSVW_ERRATUM(osvw_id, ...) 1 /* osvw */, osvw_id, __VA_ARGS__, 0 +#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ + ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) +#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) +#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) +#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) + +#define AMD_ERRATUM_170 \ + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x67, 0xf)) + +#define AMD_ERRATUM_383 \ + AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf), \ + AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0x1, 0x0)) + +int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...); +#endif /* __AMD_H__ */ diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/apic.h --- a/xen/include/asm-x86/apic.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/apic.h Tue Jul 06 11:54:40 2010 +0100 @@ -25,6 +25,8 @@ extern int x2apic_enabled; extern int x2apic_enabled; extern int directed_eoi_enabled; +extern void check_x2apic_preenabled(void); +extern int x2apic_cmdline_disable(void); extern void enable_x2apic(void); static __inline int x2apic_is_available(void) diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/debugger.h --- a/xen/include/asm-x86/debugger.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/debugger.h Tue Jul 06 11:54:40 2010 +0100 @@ -68,10 +68,8 @@ static inline int debugger_trap_entry( if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached && ((vector == TRAP_int3) || (vector == TRAP_debug)) ) { -#ifdef XEN_GDBSX_CONFIG if ( vector != TRAP_debug ) /* domain pause is good enough */ current->arch.gdbsx_vcpu_event = vector; -#endif domain_pause_for_debugger(); return 1; } diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/domain.h Tue Jul 06 11:54:40 2010 +0100 @@ -415,9 +415,7 @@ struct arch_vcpu struct mapcache_vcpu mapcache; #endif -#if XEN_GDBSX_CONFIG uint32_t gdbsx_vcpu_event; -#endif /* A secondary copy of the vcpu time info. */ XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest; diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/genapic.h --- a/xen/include/asm-x86/genapic.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/genapic.h Tue Jul 06 11:54:40 2010 +0100 @@ -70,6 +70,7 @@ cpumask_t vector_allocation_domain_flat( .send_IPI_mask = send_IPI_mask_flat, \ .send_IPI_self = send_IPI_self_flat +const struct genapic *apic_x2apic_probe(void); void init_apic_ldr_x2apic_phys(void); void init_apic_ldr_x2apic_cluster(void); void clustered_apic_check_x2apic(void); diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/io_apic.h --- a/xen/include/asm-x86/io_apic.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/io_apic.h Tue Jul 06 11:54:40 2010 +0100 @@ -199,6 +199,12 @@ extern void ioapic_suspend(void); extern void ioapic_suspend(void); extern void ioapic_resume(void); +extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); +extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); +extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); + #else /* !CONFIG_X86_IO_APIC */ static inline void init_ioapic_mappings(void) {} static inline void ioapic_suspend(void) {} diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/irq.h --- a/xen/include/asm-x86/irq.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/irq.h Tue Jul 06 11:54:40 2010 +0100 @@ -91,6 +91,8 @@ void disable_8259A_irq(unsigned int irq) void disable_8259A_irq(unsigned int irq); void enable_8259A_irq(unsigned int irq); int i8259A_irq_pending(unsigned int irq); +void mask_8259A(void); +void unmask_8259A(void); void init_8259A(int aeoi); int i8259A_suspend(void); int i8259A_resume(void); diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/msr-index.h --- a/xen/include/asm-x86/msr-index.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/msr-index.h Tue Jul 06 11:54:40 2010 +0100 @@ -251,6 +251,10 @@ /* AMD Microcode MSRs */ #define MSR_AMD_PATCHLEVEL 0x0000008b #define MSR_AMD_PATCHLOADER 0xc0010020 + +/* AMD OS Visible Workaround MSRs */ +#define MSR_AMD_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD_OSVW_STATUS 0xc0010141 /* K6 MSRs */ #define MSR_K6_EFER 0xc0000080 diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/mtrr.h --- a/xen/include/asm-x86/mtrr.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/asm-x86/mtrr.h Tue Jul 06 11:54:40 2010 +0100 @@ -65,7 +65,7 @@ extern u32 get_pat_flags(struct vcpu *v, extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr, paddr_t spaddr, uint8_t gmtrr_mtype); extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, - mfn_t mfn, uint8_t *ipat, int direct_mmio); + mfn_t mfn, uint8_t *ipat, bool_t direct_mmio); extern void ept_change_entry_emt_with_range( struct domain *d, unsigned long start_gfn, unsigned long end_gfn); extern unsigned char pat_type_2_pte_flags(unsigned char pat_type); diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/io/ring.h --- a/xen/include/public/io/ring.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/public/io/ring.h Tue Jul 06 11:54:40 2010 +0100 @@ -103,8 +103,16 @@ struct __name##_sring { struct __name##_sring { \ RING_IDX req_prod, req_event; \ RING_IDX rsp_prod, rsp_event; \ - uint8_t netfront_smartpoll_active; \ - uint8_t pad[47]; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ union __name##_sring_entry ring[1]; /* variable-length */ \ }; \ \ @@ -148,7 +156,8 @@ typedef struct __name##_back_ring __name #define SHARED_RING_INIT(_s) do { \ (_s)->req_prod = (_s)->rsp_prod = 0; \ (_s)->req_event = (_s)->rsp_event = 1; \ - (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) #define FRONT_RING_INIT(_r, _s, __size) do { \ diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/memory.h --- a/xen/include/public/memory.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/public/memory.h Tue Jul 06 11:54:40 2010 +0100 @@ -52,6 +52,9 @@ #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) /* Flag to populate physmap with populate-on-demand entries */ #define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) #endif struct xen_memory_reservation { diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/xen/iommu.h Tue Jul 06 11:54:40 2010 +0100 @@ -58,6 +58,8 @@ struct iommu { int iommu_setup(void); int iommu_supports_eim(void); +int iommu_enable_IR(void); +int intremap_enabled(void); int iommu_add_device(struct pci_dev *pdev); int iommu_remove_device(struct pci_dev *pdev); diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/mm.h --- a/xen/include/xen/mm.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/xen/mm.h Tue Jul 06 11:54:40 2010 +0100 @@ -82,6 +82,8 @@ int assign_pages( #define MEMF_tmem (1U<<_MEMF_tmem) #define _MEMF_no_dma 3 #define MEMF_no_dma (1U<<_MEMF_no_dma) +#define _MEMF_exact_node 4 +#define MEMF_exact_node (1U<<_MEMF_exact_node) #define _MEMF_node 8 #define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node) #define _MEMF_bits 24 diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/trace.h --- a/xen/include/xen/trace.h Fri Jul 02 18:04:54 2010 +0100 +++ b/xen/include/xen/trace.h Tue Jul 06 11:54:40 2010 +0100 @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op int trace_will_trace_event(u32 event); -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data); +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *); static inline void trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) @@ -57,7 +57,7 @@ static inline void trace_var(u32 event, { \ u32 _d[1]; \ _d[0] = d1; \ - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -68,7 +68,7 @@ static inline void trace_var(u32 event, u32 _d[2]; \ _d[0] = d1; \ _d[1] = d2; \ - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -80,7 +80,7 @@ static inline void trace_var(u32 event, _d[0] = d1; \ _d[1] = d2; \ _d[2] = d3; \ - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -93,7 +93,7 @@ static inline void trace_var(u32 event, _d[1] = d2; \ _d[2] = d3; \ _d[3] = d4; \ - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -107,7 +107,7 @@ static inline void trace_var(u32 event, _d[2] = d3; \ _d[3] = d4; \ _d[4] = d5; \ - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -122,7 +122,7 @@ static inline void trace_var(u32 event, _d[3] = d4; \ _d[4] = d5; \ _d[5] = d6; \ - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |