[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Merge



# HG changeset patch
# User Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
# Date 1278413680 -3600
# Node ID db6234d3eafbd2d7b1469d8b98a13d6ab0b89973
# Parent  ce278fdaced3ff898651657fda848c2b4daee648
# Parent  9d965ac1b0dbcb2f1fd4845e30753251d68d064f
Merge
---
 xen/arch/x86/cpu/amd.h                 |  103 -----------
 xen/arch/x86/hvm/vmx/vpmu.c            |  119 -------------
 tools/blktap2/drivers/tapdisk-vbd.c    |    2 
 xen/Rules.mk                           |    2 
 xen/arch/x86/Makefile                  |    2 
 xen/arch/x86/apic.c                    |  198 +++++++++++++++++++---
 xen/arch/x86/cpu/amd.c                 |   50 +++++
 xen/arch/x86/domctl.c                  |    9 -
 xen/arch/x86/genapic/x2apic.c          |   19 ++
 xen/arch/x86/hvm/mtrr.c                |    2 
 xen/arch/x86/hvm/svm/asid.c            |    4 
 xen/arch/x86/hvm/svm/svm.c             |    7 
 xen/arch/x86/hvm/vmx/vmcs.c            |    4 
 xen/arch/x86/hvm/vmx/vmx.c             |    5 
 xen/arch/x86/i8259.c                   |   20 ++
 xen/arch/x86/io_apic.c                 |  120 +++++++++++++
 xen/arch/x86/mm/hap/p2m-ept.c          |  297 ++++++++++++++++++---------------
 xen/arch/x86/setup.c                   |    9 -
 xen/common/memory.c                    |    2 
 xen/common/page_alloc.c                |    8 
 xen/common/trace.c                     |  216 +++++++++++++++---------
 xen/drivers/passthrough/vtd/dmar.c     |   20 +-
 xen/drivers/passthrough/vtd/dmar.h     |    1 
 xen/drivers/passthrough/vtd/extern.h   |    3 
 xen/drivers/passthrough/vtd/intremap.c |  125 ++++++++++++-
 xen/drivers/passthrough/vtd/iommu.c    |   54 ++----
 xen/drivers/passthrough/vtd/qinval.c   |   19 +-
 xen/drivers/passthrough/vtd/vtd.h      |    3 
 xen/include/asm-x86/amd.h              |  138 +++++++++++++++
 xen/include/asm-x86/apic.h             |    2 
 xen/include/asm-x86/debugger.h         |    2 
 xen/include/asm-x86/domain.h           |    2 
 xen/include/asm-x86/genapic.h          |    1 
 xen/include/asm-x86/io_apic.h          |    6 
 xen/include/asm-x86/irq.h              |    2 
 xen/include/asm-x86/msr-index.h        |    4 
 xen/include/asm-x86/mtrr.h             |    2 
 xen/include/public/io/ring.h           |   15 +
 xen/include/public/memory.h            |    3 
 xen/include/xen/iommu.h                |    2 
 xen/include/xen/mm.h                   |    2 
 xen/include/xen/trace.h                |   14 -
 42 files changed, 1073 insertions(+), 545 deletions(-)

diff -r ce278fdaced3 -r db6234d3eafb tools/blktap2/drivers/tapdisk-vbd.c
--- a/tools/blktap2/drivers/tapdisk-vbd.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/tools/blktap2/drivers/tapdisk-vbd.c       Tue Jul 06 11:54:40 2010 +0100
@@ -1684,7 +1684,7 @@ tapdisk_vbd_check_ring_message(td_vbd_t 
        if (!vbd->ring.sring)
                return -EINVAL;
 
-       switch (vbd->ring.sring->pad[0]) {
+       switch (vbd->ring.sring->private.tapif_user.msg) {
        case 0:
                return 0;
 
diff -r ce278fdaced3 -r db6234d3eafb xen/Rules.mk
--- a/xen/Rules.mk      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/Rules.mk      Tue Jul 06 11:54:40 2010 +0100
@@ -8,7 +8,6 @@ perfc_arrays  ?= n
 perfc_arrays  ?= n
 lock_profile  ?= n
 crash_debug   ?= n
-gdbsx         ?= n
 frame_pointer ?= n
 
 XEN_ROOT=$(BASEDIR)/..
@@ -53,7 +52,6 @@ CFLAGS-$(perfc_arrays)  += -DPERF_ARRAYS
 CFLAGS-$(perfc_arrays)  += -DPERF_ARRAYS
 CFLAGS-$(lock_profile)  += -DLOCK_PROFILE
 CFLAGS-$(frame_pointer) += -fno-omit-frame-pointer -DCONFIG_FRAME_POINTER
-CFLAGS-$(gdbsx)         += -DXEN_GDBSX_CONFIG
 
 ifneq ($(max_phys_cpus),)
 CFLAGS-y                += -DMAX_PHYS_CPUS=$(max_phys_cpus)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/Makefile     Tue Jul 06 11:54:40 2010 +0100
@@ -13,6 +13,7 @@ obj-y += clear_page.o
 obj-y += clear_page.o
 obj-y += copy_page.o
 obj-y += compat.o
+obj-y += debug.o
 obj-y += delay.o
 obj-y += dmi_scan.o
 obj-y += domctl.o
@@ -57,7 +58,6 @@ obj-y += bzimage.o
 obj-y += bzimage.o
 
 obj-$(crash_debug) += gdbstub.o
-obj-$(gdbsx) += debug.o
 
 x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
 
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/apic.c       Tue Jul 06 11:54:40 2010 +0100
@@ -70,6 +70,9 @@ int x2apic_enabled __read_mostly = 0;
 int x2apic_enabled __read_mostly = 0;
 int directed_eoi_enabled __read_mostly = 0;
 
+/* x2APIC is enabled in BIOS */
+static int x2apic_preenabled;
+
 /*
  * The following vectors are part of the Linux architecture, there
  * is no hardware IRQ pin equivalent for them, they are triggered
@@ -487,6 +490,47 @@ static void apic_pm_activate(void)
 static void apic_pm_activate(void)
 {
     apic_pm_state.active = 1;
+}
+
+static void resume_x2apic(void)
+{
+    uint64_t msr_content;
+    struct IO_APIC_route_entry **ioapic_entries = NULL;
+
+    ASSERT(x2apic_enabled);
+
+    ioapic_entries = alloc_ioapic_entries();
+    if ( !ioapic_entries )
+    {
+        printk("Allocate ioapic_entries failed\n");
+        goto out;
+    }
+
+    if ( save_IO_APIC_setup(ioapic_entries) )
+    {
+        printk("Saving IO-APIC state failed\n");
+        goto out;
+    }
+
+    mask_8259A();
+    mask_IO_APIC_setup(ioapic_entries);
+
+    iommu_enable_IR();
+
+    rdmsrl(MSR_IA32_APICBASE, msr_content);
+    if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
+    {
+        msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD;
+        msr_content = (uint32_t)msr_content;
+        wrmsrl(MSR_IA32_APICBASE, msr_content);
+    }
+
+    restore_IO_APIC_setup(ioapic_entries);
+    unmask_8259A();
+
+out:
+    if ( ioapic_entries )
+        free_ioapic_entries(ioapic_entries);
 }
 
 void __devinit setup_local_APIC(void)
@@ -727,7 +771,7 @@ int lapic_resume(void)
             msr_content | MSR_IA32_APICBASE_ENABLE | mp_lapic_addr);
     }
     else
-        enable_x2apic();
+        resume_x2apic();
 
     apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
     apic_write(APIC_ID, apic_pm_state.apic_id);
@@ -894,35 +938,138 @@ no_apic:
     return -1;
 }
 
-void enable_x2apic(void)
+void check_x2apic_preenabled(void)
 {
     uint64_t msr_content;
 
-    if ( smp_processor_id() == 0 )
+    if ( !x2apic_is_available() )
+        return;
+
+    rdmsrl(MSR_IA32_APICBASE, msr_content);
+    if ( msr_content & MSR_IA32_APICBASE_EXTD )
     {
-        if ( !iommu_supports_eim() )
+        printk("x2APIC mode is already enabled by BIOS.\n");
+        x2apic_preenabled = 1;
+        x2apic_enabled = 1;
+    }
+}
+
+static void enable_bsp_x2apic(void)
+{
+    struct IO_APIC_route_entry **ioapic_entries = NULL;
+    const struct genapic *x2apic_genapic = NULL;
+
+    ASSERT(smp_processor_id() == 0);
+
+    if ( x2apic_preenabled )
+    {
+        /*
+         * Interrupt remapping should be also enabled by BIOS when
+         * x2APIC is already enabled by BIOS, otherwise it's a BIOS
+         * bug
+         */
+        if ( !intremap_enabled() )
+            panic("Interrupt remapping is not enabled by BIOS while "
+                  "x2APIC is already enabled by BIOS!\n");
+    }
+
+    x2apic_genapic = apic_x2apic_probe();
+    if ( x2apic_genapic )
+        genapic = x2apic_genapic;
+    else
+    {
+        if ( x2apic_cmdline_disable() )
         {
-            printk("x2APIC would not be enabled without EIM.\n");
-            return;
+            if ( x2apic_preenabled )
+            {
+                /* Ignore x2apic=0, and set default x2apic mode */
+                genapic = &apic_x2apic_cluster;
+                printk("x2APIC: already enabled by BIOS, ignore x2apic=0.\n");
+            }
+            else
+            {
+                printk("Not enable x2APIC due to x2apic=0 is set.\n");
+                return;
+            }
         }
-
-        if ( apic_x2apic_phys.probe() )
-            genapic = &apic_x2apic_phys;
-        else if ( apic_x2apic_cluster.probe() )
-            genapic = &apic_x2apic_cluster;
         else
         {
-            printk("x2APIC would not be enabled due to x2apic=off.\n");
-            return;
+            if ( !iommu_enabled || !iommu_intremap || !iommu_qinval )
+                panic("Cannot enable x2APIC due to iommu or interrupt "
+                      "remapping or queued invalidation is disabled "
+                      "by command line!\n");
+            else
+            {
+                if ( x2apic_preenabled )
+                    panic("x2APIC: already enabled by BIOS, but "
+                          "iommu_supports_eim fails\n");
+                else
+                {
+                    printk("Not enable x2APIC due to "
+                           "iommu_supports_eim fails!\n");
+                    return;
+                }
+            }
         }
-
-        x2apic_enabled = 1;
-        printk("Switched to APIC driver %s.\n", genapic->name);
-    }
-    else
+    }
+
+    ioapic_entries = alloc_ioapic_entries();
+    if ( !ioapic_entries )
     {
-        BUG_ON(!x2apic_enabled); /* APs only enable x2apic when BSP did so. */
-    }
+        printk("Allocate ioapic_entries failed\n");
+        goto out;
+    }
+
+    if ( save_IO_APIC_setup(ioapic_entries) )
+    {
+        printk("Saving IO-APIC state failed\n");
+        goto out;
+    }
+
+    mask_8259A();
+    mask_IO_APIC_setup(ioapic_entries);
+
+    if ( iommu_enable_IR() )
+    {
+        printk("Would not enable x2APIC due to interrupt remapping "
+               "cannot be enabled.\n");
+        goto restore_out;
+    }
+
+    x2apic_enabled = 1;
+    printk("Switched to APIC driver %s.\n", genapic->name);
+
+    if ( !x2apic_preenabled )
+    {
+        uint64_t msr_content;
+        rdmsrl(MSR_IA32_APICBASE, msr_content);
+        if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
+        {
+            msr_content |= MSR_IA32_APICBASE_ENABLE |
+                           MSR_IA32_APICBASE_EXTD;
+            msr_content = (uint32_t)msr_content;
+            wrmsrl(MSR_IA32_APICBASE, msr_content);
+            printk("x2APIC mode enabled.\n");
+        }
+    }
+
+restore_out:
+    restore_IO_APIC_setup(ioapic_entries);
+    unmask_8259A();
+
+out:
+    if ( ioapic_entries )
+        free_ioapic_entries(ioapic_entries);
+}
+
+static void enable_ap_x2apic(void)
+{
+    uint64_t msr_content;
+
+    ASSERT(smp_processor_id() != 0);
+
+    /* APs only enable x2apic when BSP did so. */
+    BUG_ON(!x2apic_enabled);
 
     rdmsrl(MSR_IA32_APICBASE, msr_content);
     if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
@@ -930,10 +1077,15 @@ void enable_x2apic(void)
         msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD;
         msr_content = (uint32_t)msr_content;
         wrmsrl(MSR_IA32_APICBASE, msr_content);
-        printk("x2APIC mode enabled.\n");
-    }
+    }
+}
+
+void enable_x2apic(void)
+{
+    if ( smp_processor_id() == 0 )
+        enable_bsp_x2apic();
     else
-        printk("x2APIC mode enabled by BIOS.\n");
+        enable_ap_x2apic();
 }
 
 void __init init_apic_mappings(void)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/cpu/amd.c    Tue Jul 06 11:54:40 2010 +0100
@@ -7,12 +7,12 @@
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
+#include <asm/amd.h>
 #include <asm/hvm/support.h>
 #include <asm/setup.h> /* amd_init_cpu */
 #include <asm/acpi.h>
 
 #include "cpu.h"
-#include "amd.h"
 
 /*
  * Pre-canned values for overriding the CPUID features 
@@ -148,6 +148,54 @@ static void __devinit set_cpuidmask(cons
 }
 
 /*
+ * Check for the presence of an AMD erratum. Arguments are defined in amd.h 
+ * for each known erratum. Return 1 if erratum is found.
+ */
+int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw, ...) 
+{
+       va_list ap;
+       u32 range;
+       u32 ms;
+       
+       if (cpu->x86_vendor != X86_VENDOR_AMD)
+               return 0;
+
+       va_start(ap, osvw);
+
+       if (osvw) {
+               u16 osvw_id = va_arg(ap, int);
+
+               if (cpu_has(cpu, X86_FEATURE_OSVW)) {
+                       u64 osvw_len;
+                       rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len);
+
+                       if (osvw_id < osvw_len) {
+                               u64 osvw_bits;
+                               rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6), 
+                                      osvw_bits);
+
+                               va_end(ap);
+                               return (osvw_bits >> (osvw_id & 0x3f)) & 0x01;
+                       }
+               }
+       }
+
+       /* OSVW unavailable or ID unknown, match family-model-stepping range */
+       ms = (cpu->x86_model << 8) | cpu->x86_mask;
+       while ((range = va_arg(ap, int))) {
+               if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+                   (ms >= AMD_MODEL_RANGE_START(range)) &&
+                   (ms <= AMD_MODEL_RANGE_END(range))) {
+                       va_end(ap);
+                       return 1;
+               }
+       }
+
+       va_end(ap);
+       return 0;
+}
+
+/*
  * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush
  * filter on AMD 64-bit processors.
  */
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.h
--- a/xen/arch/x86/cpu/amd.h    Fri Jul 02 18:04:54 2010 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-/*
- * amd.h - AMD processor specific definitions
- */
-
-#ifndef __AMD_H__
-#define __AMD_H__
-
-#include <asm/cpufeature.h>
-
-/* CPUID masked for use by AMD-V Extended Migration */
-
-#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32)
-#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_))
-
-/* Family 0Fh, Revision C */
-#define AMD_FEATURES_K8_REV_C_ECX  0
-#define AMD_FEATURES_K8_REV_C_EDX (                                    \
-       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
-       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
-       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
-       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
-       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
-       __bit(X86_FEATURE_SEP)      | __bit(X86_FEATURE_MTRR)  |        \
-       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
-       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
-       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_CLFLSH)|        \
-       __bit(X86_FEATURE_MMX)      | __bit(X86_FEATURE_FXSR)  |        \
-       __bit(X86_FEATURE_XMM)      | __bit(X86_FEATURE_XMM2))
-#define AMD_EXTFEATURES_K8_REV_C_ECX  0 
-#define AMD_EXTFEATURES_K8_REV_C_EDX  (                                        
\
-       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
-       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
-       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
-       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
-       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
-       __bit(X86_FEATURE_SYSCALL)  | __bit(X86_FEATURE_MTRR)  |        \
-       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
-       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
-       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_NX)    |        \
-       __bit(X86_FEATURE_MMXEXT)   | __bit(X86_FEATURE_MMX)   |        \
-       __bit(X86_FEATURE_FXSR)     | __bit(X86_FEATURE_LM)    |        \
-       __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW))
-
-/* Family 0Fh, Revision D */
-#define AMD_FEATURES_K8_REV_D_ECX         AMD_FEATURES_K8_REV_C_ECX
-#define AMD_FEATURES_K8_REV_D_EDX         AMD_FEATURES_K8_REV_C_EDX
-#define AMD_EXTFEATURES_K8_REV_D_ECX     (AMD_EXTFEATURES_K8_REV_C_ECX |\
-       __bit(X86_FEATURE_LAHF_LM))
-#define AMD_EXTFEATURES_K8_REV_D_EDX     (AMD_EXTFEATURES_K8_REV_C_EDX |\
-       __bit(X86_FEATURE_FFXSR))
-
-/* Family 0Fh, Revision E */
-#define AMD_FEATURES_K8_REV_E_ECX        (AMD_FEATURES_K8_REV_D_ECX |  \
-       __bit(X86_FEATURE_XMM3))
-#define AMD_FEATURES_K8_REV_E_EDX        (AMD_FEATURES_K8_REV_D_EDX |  \
-       __bit(X86_FEATURE_HT))
-#define AMD_EXTFEATURES_K8_REV_E_ECX     (AMD_EXTFEATURES_K8_REV_D_ECX |\
-       __bit(X86_FEATURE_CMP_LEGACY)) 
-#define AMD_EXTFEATURES_K8_REV_E_EDX      AMD_EXTFEATURES_K8_REV_D_EDX
-
-/* Family 0Fh, Revision F */
-#define AMD_FEATURES_K8_REV_F_ECX        (AMD_FEATURES_K8_REV_E_ECX |  \
-       __bit(X86_FEATURE_CX16))
-#define AMD_FEATURES_K8_REV_F_EDX         AMD_FEATURES_K8_REV_E_EDX
-#define AMD_EXTFEATURES_K8_REV_F_ECX     (AMD_EXTFEATURES_K8_REV_E_ECX |\
-       __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) |     \
-       __bit(X86_FEATURE_ALTMOVCR))
-#define AMD_EXTFEATURES_K8_REV_F_EDX     (AMD_EXTFEATURES_K8_REV_E_EDX |\
-       __bit(X86_FEATURE_RDTSCP))
-
-/* Family 0Fh, Revision G */
-#define AMD_FEATURES_K8_REV_G_ECX         AMD_FEATURES_K8_REV_F_ECX
-#define AMD_FEATURES_K8_REV_G_EDX         AMD_FEATURES_K8_REV_F_EDX
-#define AMD_EXTFEATURES_K8_REV_G_ECX     (AMD_EXTFEATURES_K8_REV_F_ECX |\
-       __bit(X86_FEATURE_3DNOWPF))
-#define AMD_EXTFEATURES_K8_REV_G_EDX      AMD_EXTFEATURES_K8_REV_F_EDX
-
-/* Family 10h, Revision B */
-#define AMD_FEATURES_FAM10h_REV_B_ECX    (AMD_FEATURES_K8_REV_F_ECX |  \
-       __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT))
-#define AMD_FEATURES_FAM10h_REV_B_EDX     AMD_FEATURES_K8_REV_F_EDX
-#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
-       __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) |             \
-       __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) |      \
-       __bit(X86_FEATURE_IBS))
-#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\
-       __bit(X86_FEATURE_PAGE1GB))
-
-/* Family 10h, Revision C */
-#define AMD_FEATURES_FAM10h_REV_C_ECX     AMD_FEATURES_FAM10h_REV_B_ECX
-#define AMD_FEATURES_FAM10h_REV_C_EDX     AMD_FEATURES_FAM10h_REV_B_EDX
-#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\
-       __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT))
-#define AMD_EXTFEATURES_FAM10h_REV_C_EDX  AMD_EXTFEATURES_FAM10h_REV_B_EDX
-
-/* Family 11h, Revision B */
-#define AMD_FEATURES_FAM11h_REV_B_ECX     AMD_FEATURES_K8_REV_G_ECX
-#define AMD_FEATURES_FAM11h_REV_B_EDX     AMD_FEATURES_K8_REV_G_EDX
-#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\
-       __bit(X86_FEATURE_SKINIT))
-#define AMD_EXTFEATURES_FAM11h_REV_B_EDX  AMD_EXTFEATURES_K8_REV_G_EDX
-
-#endif /* __AMD_H__ */
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/domctl.c     Tue Jul 06 11:54:40 2010 +0100
@@ -34,7 +34,6 @@
 #include <public/mem_event.h>
 #include <asm/mem_sharing.h>
 
-#ifdef XEN_GDBSX_CONFIG                    
 #ifdef XEN_KDB_CONFIG
 #include "../kdb/include/kdbdefs.h"
 #include "../kdb/include/kdbproto.h"
@@ -43,8 +42,9 @@ typedef unsigned char kdbbyt_t;
 typedef unsigned char kdbbyt_t;
 extern int dbg_rw_mem(kdbva_t, kdbbyt_t *, int, domid_t, int, uint64_t);
 #endif
-static int 
-gdbsx_guest_mem_io(domid_t domid, struct xen_domctl_gdbsx_memio *iop)
+
+static int gdbsx_guest_mem_io(
+    domid_t domid, struct xen_domctl_gdbsx_memio *iop)
 {   
     ulong l_uva = (ulong)iop->uva;
     iop->remain = dbg_rw_mem(
@@ -52,7 +52,6 @@ gdbsx_guest_mem_io(domid_t domid, struct
         iop->gwr, iop->pgd3val);
     return (iop->remain ? -EFAULT : 0);
 }
-#endif  /* XEN_GDBSX_CONFIG */
 
 long arch_do_domctl(
     struct xen_domctl *domctl,
@@ -1309,7 +1308,6 @@ long arch_do_domctl(
     }
     break;
 
-#ifdef XEN_GDBSX_CONFIG
     case XEN_DOMCTL_gdbsx_guestmemio:
     {
         struct domain *d;
@@ -1418,7 +1416,6 @@ long arch_do_domctl(
         rcu_unlock_domain(d);
     }
     break;
-#endif /* XEN_GDBSX_CONFIG */
 
 #ifdef __x86_64__
     case XEN_DOMCTL_mem_event_op:
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/genapic/x2apic.c
--- a/xen/arch/x86/genapic/x2apic.c     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/genapic/x2apic.c     Tue Jul 06 11:54:40 2010 +0100
@@ -33,6 +33,11 @@ static int  x2apic_phys; /* By default w
 static int  x2apic_phys; /* By default we use logical cluster mode. */
 boolean_param("x2apic_phys", x2apic_phys);
 
+int x2apic_cmdline_disable(void)
+{
+    return (x2apic == 0);
+}
+
 static int probe_x2apic_phys(void)
 {
     return x2apic && x2apic_phys && x2apic_is_available() &&
@@ -54,6 +59,20 @@ const struct genapic apic_x2apic_cluster
     APIC_INIT("x2apic_cluster", probe_x2apic_cluster),
     GENAPIC_X2APIC_CLUSTER
 };
+
+const struct genapic *apic_x2apic_probe(void)
+{
+    if ( !x2apic || !x2apic_is_available() )
+        return NULL;
+
+    if ( !iommu_supports_eim() )
+        return NULL;
+
+    if ( x2apic_phys )
+        return &apic_x2apic_phys;
+    else
+        return &apic_x2apic_cluster;
+}
 
 void init_apic_ldr_x2apic_phys(void)
 {
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/mtrr.c   Tue Jul 06 11:54:40 2010 +0100
@@ -707,7 +707,7 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
                           1, HVMSR_PER_VCPU);
 
 uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
-                           uint8_t *ipat, int direct_mmio)
+                           uint8_t *ipat, bool_t direct_mmio)
 {
     uint8_t gmtrr_mtype, hmtrr_mtype;
     uint32_t type;
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/asid.c
--- a/xen/arch/x86/hvm/svm/asid.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/svm/asid.c       Tue Jul 06 11:54:40 2010 +0100
@@ -21,14 +21,14 @@
 #include <xen/lib.h>
 #include <xen/perfc.h>
 #include <asm/hvm/svm/asid.h>
+#include <asm/amd.h>
 
 void svm_asid_init(struct cpuinfo_x86 *c)
 {
     int nasids = 0;
 
     /* Check for erratum #170, and leave ASIDs disabled if it's present. */
-    if ( (c->x86 == 0x10) ||
-         ((c->x86 == 0xf) && (c->x86_model >= 0x68) && (c->x86_mask >= 1)) )
+    if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_170) )
         nasids = cpuid_ebx(0x8000000A);
 
     hvm_asid_init(nasids);
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Tue Jul 06 11:54:40 2010 +0100
@@ -34,6 +34,7 @@
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
+#include <asm/amd.h>
 #include <asm/types.h>
 #include <asm/debugreg.h>
 #include <asm/msr.h>
@@ -846,8 +847,8 @@ static void svm_init_erratum_383(struct 
 {
     uint64_t msr_content;
 
-    /* only family 10h is affected */
-    if ( c->x86 != 0x10 )
+    /* check whether CPU is affected */
+    if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_383) )
         return;
 
     /* use safe methods to be compatible with nested virtualization */
@@ -1492,9 +1493,7 @@ asmlinkage void svm_vmexit_handler(struc
         if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 )
             break;
         __update_guest_eip(regs, inst_len);
-#ifdef XEN_GDBSX_CONFIG
         current->arch.gdbsx_vcpu_event = TRAP_int3;
-#endif
         domain_pause_for_debugger();
         break;
 
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Tue Jul 06 11:54:40 2010 +0100
@@ -1064,8 +1064,10 @@ void vmx_do_resume(struct vcpu *v)
          *  1: flushing cache (wbinvd) when the guest is scheduled out if
          *     there is no wbinvd exit, or
          *  2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
+         * If VT-d engine can force snooping, we don't need to do these.
          */
-        if ( has_arch_pdevs(v->domain) && !cpu_has_wbinvd_exiting )
+        if ( has_arch_pdevs(v->domain) && !iommu_snoop
+                && !cpu_has_wbinvd_exiting )
         {
             int cpu = v->arch.hvm_vmx.active_cpu;
             if ( cpu != -1 )
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue Jul 06 11:54:40 2010 +0100
@@ -2089,6 +2089,9 @@ static void vmx_wbinvd_intercept(void)
     if ( !has_arch_mmios(current->domain) )
         return;
 
+    if ( iommu_snoop )
+        return;
+
     if ( cpu_has_wbinvd_exiting )
         on_each_cpu(wbinvd_ipi, NULL, 1);
     else
@@ -2406,9 +2409,7 @@ asmlinkage void vmx_vmexit_handler(struc
                 goto exit_and_crash;
             inst_len = __get_instruction_length(); /* Safe: INT3 */
             __update_guest_eip(inst_len);
-#ifdef XEN_GDBSX_CONFIG
             current->arch.gdbsx_vcpu_event = TRAP_int3;
-#endif
             domain_pause_for_debugger();
             break;
         case TRAP_no_device:
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vpmu.c
--- a/xen/arch/x86/hvm/vmx/vpmu.c       Fri Jul 02 18:04:54 2010 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-/*
- * vpmu.c: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@xxxxxxxxx>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/regs.h>
-#include <asm/types.h>
-#include <asm/msr.h>
-#include <asm/hvm/support.h>
-#include <asm/hvm/vmx/vmx.h>
-#include <asm/hvm/vmx/vmcs.h>
-#include <public/sched.h>
-#include <public/hvm/save.h>
-#include <asm/hvm/vmx/vpmu.h>
-
-static int __read_mostly opt_vpmu_enabled;
-boolean_param("vpmu", opt_vpmu_enabled);
-
-int vpmu_do_wrmsr(struct cpu_user_regs *regs)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( vpmu->arch_vpmu_ops )
-        return vpmu->arch_vpmu_ops->do_wrmsr(regs);
-    return 0;
-}
-
-int vpmu_do_rdmsr(struct cpu_user_regs *regs)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( vpmu->arch_vpmu_ops )
-        return vpmu->arch_vpmu_ops->do_rdmsr(regs);
-    return 0;
-}
-
-int vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( vpmu->arch_vpmu_ops )
-        return vpmu->arch_vpmu_ops->do_interrupt(regs);
-    return 0;
-}
-
-void vpmu_save(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu->arch_vpmu_ops )
-        vpmu->arch_vpmu_ops->arch_vpmu_save(v);
-}
-
-void vpmu_load(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu->arch_vpmu_ops )
-        vpmu->arch_vpmu_ops->arch_vpmu_load(v);
-}
-
-extern struct arch_vpmu_ops core2_vpmu_ops;
-void vpmu_initialise(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( !opt_vpmu_enabled )
-        return;
-
-    if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
-        vpmu_destroy(v);
-
-    if ( current_cpu_data.x86 == 6 )
-    {
-        switch ( current_cpu_data.x86_model )
-        {
-        case 15:
-        case 23:
-        case 26:
-        case 29:
-            vpmu->arch_vpmu_ops = &core2_vpmu_ops;
-            break;
-        }
-    }
-
-    if ( vpmu->arch_vpmu_ops != NULL )
-    {
-        vpmu->flags = 0;
-        vpmu->context = NULL;
-        vpmu->arch_vpmu_ops->arch_vpmu_initialise(v);
-    }
-}
-
-void vpmu_destroy(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu->arch_vpmu_ops )
-        vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
-}
-
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/i8259.c      Tue Jul 06 11:54:40 2010 +0100
@@ -173,6 +173,26 @@ int i8259A_irq_pending(unsigned int irq)
     spin_unlock_irqrestore(&i8259A_lock, flags);
 
     return ret;
+}
+
+void mask_8259A(void)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    outb(0xff, 0xA1);
+    outb(0xff, 0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    outb(cached_A1, 0xA1);
+    outb(cached_21, 0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
 /*
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/io_apic.c    Tue Jul 06 11:54:40 2010 +0100
@@ -134,6 +134,126 @@ static void __init replace_pin_at_irq(un
             break;
         entry = irq_2_pin + entry->next;
     }
+}
+
+struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+    int apic;
+    struct IO_APIC_route_entry **ioapic_entries;
+
+    ioapic_entries = xmalloc_array(struct IO_APIC_route_entry *, nr_ioapics);
+    if (!ioapic_entries)
+        return 0;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        ioapic_entries[apic] =
+            xmalloc_array(struct IO_APIC_route_entry,
+                          nr_ioapic_registers[apic]);
+        if (!ioapic_entries[apic])
+            goto nomem;
+    }
+
+    return ioapic_entries;
+
+nomem:
+    while (--apic >= 0)
+        xfree(ioapic_entries[apic]);
+    xfree(ioapic_entries);
+
+    return 0;
+}
+
+/*
+ * Saves all the IO-APIC RTE's
+ */
+int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic, pin;
+
+    if (!ioapic_entries)
+        return -ENOMEM;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        if (!ioapic_entries[apic])
+            return -ENOMEM;
+
+        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+            *(((int *)&ioapic_entries[apic][pin])+0) =
+                __io_apic_read(apic, 0x10+pin*2);
+            *(((int *)&ioapic_entries[apic][pin])+1) =
+                __io_apic_read(apic, 0x11+pin*2);
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Mask all IO APIC entries.
+ */
+void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic, pin;
+
+    if (!ioapic_entries)
+        return;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        if (!ioapic_entries[apic])
+            break;
+
+        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+            struct IO_APIC_route_entry entry;
+            unsigned long flags;
+
+            entry = ioapic_entries[apic][pin];
+            if (!entry.mask) {
+                entry.mask = 1;
+
+                spin_lock_irqsave(&ioapic_lock, flags);
+                __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+                __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+                spin_unlock_irqrestore(&ioapic_lock, flags);
+            }
+        }
+    }
+}
+
+/*
+ * Restore IO APIC entries which was saved in ioapic_entries.
+ */
+int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic, pin;
+    unsigned long flags;
+    struct IO_APIC_route_entry entry;
+
+    if (!ioapic_entries)
+        return -ENOMEM;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        if (!ioapic_entries[apic])
+            return -ENOMEM;
+
+        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+            entry = ioapic_entries[apic][pin];
+            spin_lock_irqsave(&ioapic_lock, flags);
+            __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+            __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+            spin_unlock_irqrestore(&ioapic_lock, flags);
+    }
+
+    return 0;
+}
+
+void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic;
+
+    for (apic = 0; apic < nr_ioapics; apic++)
+        xfree(ioapic_entries[apic]);
+
+    xfree(ioapic_entries);
 }
 
 static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, 
unsigned long disable)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Tue Jul 06 11:54:40 2010 +0100
@@ -118,6 +118,74 @@ static int ept_set_middle_entry(struct d
     return 1;
 }
 
+/* free ept sub tree behind an entry */
+void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level)
+{
+    /* End if the entry is a leaf entry. */
+    if ( level == 0 || !is_epte_present(ept_entry) ||
+         is_epte_superpage(ept_entry) )
+        return;
+
+    if ( level > 1 )
+    {
+        ept_entry_t *epte = map_domain_page(ept_entry->mfn);
+        for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+            ept_free_entry(d, epte + i, level - 1);
+        unmap_domain_page(epte);
+    }
+
+    d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn));
+}
+
+static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry,
+                                int level, int target)
+{
+    ept_entry_t new_ept, *table;
+    uint64_t trunk;
+    int rv = 1;
+
+    /* End if the entry is a leaf entry or reaches the target level. */
+    if ( level == 0 || level == target )
+        return rv;
+
+    ASSERT(is_epte_superpage(ept_entry));
+
+    if ( !ept_set_middle_entry(d, &new_ept) )
+        return 0;
+
+    table = map_domain_page(new_ept.mfn);
+    trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
+
+    for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+    {
+        ept_entry_t *epte = table + i;
+
+        epte->emt = ept_entry->emt;
+        epte->ipat = ept_entry->ipat;
+        epte->sp = (level > 1) ? 1 : 0;
+        epte->avail1 = ept_entry->avail1;
+        epte->avail2 = 0;
+        epte->mfn = ept_entry->mfn + i * trunk;
+
+        ept_p2m_type_to_flags(epte, epte->avail1);
+
+        if ( (level - 1) == target )
+            continue;
+
+        ASSERT(is_epte_superpage(epte));
+
+        if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) )
+            break;
+    }
+
+    unmap_domain_page(table);
+
+    /* Even failed we should install the newly allocated ept page. */
+    *ept_entry = new_ept;
+
+    return rv;
+}
+
 /* Take the currently mapped table, find the corresponding gfn entry,
  * and map the next table, if available.  If the entry is empty
  * and read_only is set, 
@@ -134,13 +202,18 @@ static int ept_set_middle_entry(struct d
  */
 static int ept_next_level(struct domain *d, bool_t read_only,
                           ept_entry_t **table, unsigned long *gfn_remainder,
-                          u32 shift)
-{
+                          int next_level)
+{
+    unsigned long mfn;
     ept_entry_t *ept_entry;
-    ept_entry_t *next;
-    u32 index;
+    u32 shift, index;
+
+    shift = next_level * EPT_TABLE_ORDER;
 
     index = *gfn_remainder >> shift;
+
+    /* index must be falling into the page */
+    ASSERT(index < EPT_PAGETABLE_ENTRIES);
 
     ept_entry = (*table) + index;
 
@@ -159,69 +232,12 @@ static int ept_next_level(struct domain 
     /* The only time sp would be set here is if we had hit a superpage */
     if ( is_epte_superpage(ept_entry) )
         return GUEST_TABLE_SUPER_PAGE;
-    else
-    {
-        *gfn_remainder &= (1UL << shift) - 1;
-        next = map_domain_page(ept_entry->mfn);
-        unmap_domain_page(*table);
-        *table = next;
-        return GUEST_TABLE_NORMAL_PAGE;
-    }
-}
-
-/* It's super page before and we should break down it now. */
-static int ept_split_large_page(struct domain *d,
-                                ept_entry_t **table, u32 *index,
-                                unsigned long gfn, int level)
-{
-    ept_entry_t *prev_table = *table;
-    ept_entry_t *split_table = NULL;
-    ept_entry_t *split_entry = NULL;
-    ept_entry_t *ept_entry = (*table) + (*index);
-    ept_entry_t temp_ept_entry;
-    unsigned long s_gfn, s_mfn;
-    unsigned long offset, trunk;
-    int i;
-
-    /* alloc new page for new ept middle level entry which is
-     * before a leaf super entry
-     */
-
-    if ( !ept_set_middle_entry(d, &temp_ept_entry) )
-        return 0;
-
-    /* split the super page to small next level pages */
-    split_table = map_domain_page(temp_ept_entry.mfn);
-    offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1);
-    trunk = (1UL << ((level-1) * EPT_TABLE_ORDER));
-
-    for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ )
-    {
-        s_gfn = gfn - offset + i * trunk;
-        s_mfn = ept_entry->mfn + i * trunk;
-
-        split_entry = split_table + i;
-        split_entry->emt = ept_entry->emt;
-        split_entry->ipat = ept_entry->ipat;
-
-        split_entry->sp = (level > 1) ? 1 : 0;
-
-        split_entry->mfn = s_mfn;
-
-        split_entry->avail1 = ept_entry->avail1;
-        split_entry->avail2 = 0;
-        /* last step */
-        split_entry->r = split_entry->w = split_entry->x = 1;
-        ept_p2m_type_to_flags(split_entry, ept_entry->avail1);
-    }
-
-    *ept_entry = temp_ept_entry;
-    
-    *index = offset / trunk;
-    *table = split_table;
-    unmap_domain_page(prev_table);
-
-    return 1;
+
+    mfn = ept_entry->mfn;
+    unmap_domain_page(*table);
+    *table = map_domain_page(mfn);
+    *gfn_remainder &= (1UL << shift) - 1;
+    return GUEST_TABLE_NORMAL_PAGE;
 }
 
 /*
@@ -229,56 +245,64 @@ static int ept_split_large_page(struct d
  * by observing whether any gfn->mfn translations are modified.
  */
 static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
               unsigned int order, p2m_type_t p2mt)
 {
-    ept_entry_t *table = NULL;
+    ept_entry_t *table, *ept_entry;
     unsigned long gfn_remainder = gfn;
     unsigned long offset = 0;
-    ept_entry_t *ept_entry = NULL;
     u32 index;
-    int i;
+    int i, target = order / EPT_TABLE_ORDER;
     int rv = 0;
     int ret = 0;
-    int split_level = 0;
-    int walk_level = order / EPT_TABLE_ORDER;
-    int direct_mmio = (p2mt == p2m_mmio_direct);
+    bool_t direct_mmio = (p2mt == p2m_mmio_direct);
     uint8_t ipat = 0;
     int need_modify_vtd_table = 1;
     int needs_sync = 1;
 
-    if (  order != 0 )
-        if ( (gfn & ((1UL << order) - 1)) )
-            return 1;
+    /*
+     * the caller must make sure:
+     * 1. passing valid gfn and mfn at order boundary.
+     * 2. gfn not exceeding guest physical address width.
+     * 3. passing a valid order.
+     */
+    if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) ||
+         (gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) ||
+         (order % EPT_TABLE_ORDER) )
+        return 0;
+
+    ASSERT((target == 2 && hvm_hap_has_1gb(d)) ||
+           (target == 1 && hvm_hap_has_2mb(d)) ||
+           (target == 0));
 
     table = map_domain_page(ept_get_asr(d));
 
     ASSERT(table != NULL);
 
-    for ( i = ept_get_wl(d); i > walk_level; i-- )
-    {
-        ret = ept_next_level(d, 0, &table, &gfn_remainder, i * 
EPT_TABLE_ORDER);
+    for ( i = ept_get_wl(d); i > target; i-- )
+    {
+        ret = ept_next_level(d, 0, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret != GUEST_TABLE_NORMAL_PAGE )
             break;
     }
 
-    /* If order == 0, we should only get POD if we have a POD superpage.
-     * If i > walk_level, we need to split the page; otherwise,
-     * just behave as normal. */
-    ASSERT(ret != GUEST_TABLE_POD_PAGE || i != walk_level);
-
-    index = gfn_remainder >> ( i ?  (i * EPT_TABLE_ORDER): order);
-    offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
-
-    split_level = i;
+    ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
+
+    index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+    offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
 
     ept_entry = table + index;
 
-    if ( i == walk_level )
-    {
-        /* We reached the level we're looking for */
+    /*
+     * When we are here, we must be on a leaf ept entry
+     * with i == target or i > target.
+     */
+
+    if ( i == target )
+    {
+        /* We reached the target level. */
 
         /* No need to flush if the old entry wasn't valid */
         if ( !is_epte_present(ept_entry) )
@@ -291,15 +315,14 @@ ept_set_entry(struct domain *d, unsigned
                                                 direct_mmio);
             ept_entry->ipat = ipat;
             ept_entry->sp = order ? 1 : 0;
+            ept_entry->avail1 = p2mt;
+            ept_entry->avail2 = 0;
 
             if ( ept_entry->mfn == mfn_x(mfn) )
                 need_modify_vtd_table = 0;
             else
                 ept_entry->mfn = mfn_x(mfn);
 
-            ept_entry->avail1 = p2mt;
-            ept_entry->avail2 = 0;
-
             ept_p2m_type_to_flags(ept_entry, p2mt);
         }
         else
@@ -307,32 +330,51 @@ ept_set_entry(struct domain *d, unsigned
     }
     else
     {
-        int level;
-        ept_entry_t *split_ept_entry;
-
-        for ( level = split_level; level > walk_level ; level-- )
-        {
-            rv = ept_split_large_page(d, &table, &index, gfn, level);
-            if ( !rv )
-                goto out;
-        }
-
-        split_ept_entry = table + index;
-        split_ept_entry->avail1 = p2mt;
-        ept_p2m_type_to_flags(split_ept_entry, p2mt);
-        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
-                                                  direct_mmio);
-        split_ept_entry->ipat = ipat;
-
-        if ( split_ept_entry->mfn == mfn_x(mfn) )
-            need_modify_vtd_table = 0;
-        else
-            split_ept_entry->mfn = mfn_x(mfn);
+        /* We need to split the original page. */
+        ept_entry_t split_ept_entry;
+
+        ASSERT(is_epte_superpage(ept_entry));
+
+        split_ept_entry = *ept_entry;
+
+        if ( !ept_split_super_page(d, &split_ept_entry, i, target) )
+        {
+            ept_free_entry(d, &split_ept_entry, i);
+            goto out;
+        }
+
+        /* now install the newly split ept sub-tree */
+        /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
+        *ept_entry = split_ept_entry;
+
+        /* then move to the level we want to make real changes */
+        for ( ; i > target; i-- )
+            ept_next_level(d, 0, &table, &gfn_remainder, i);
+
+        ASSERT(i == target);
+
+        index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+        offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
+
+        ept_entry = table + index;
+
+        ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
+        ept_entry->ipat = ipat;
+        ept_entry->sp = i ? 1 : 0;
+        ept_entry->avail1 = p2mt;
+        ept_entry->avail2 = 0;
+
+        if ( ept_entry->mfn == mfn_x(mfn) )
+             need_modify_vtd_table = 0;
+        else /* the caller should take care of the previous page */
+            ept_entry->mfn = mfn_x(mfn);
+
+        ept_p2m_type_to_flags(ept_entry, p2mt);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( mfn_valid(mfn_x(mfn))
-         && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+    if ( mfn_valid(mfn_x(mfn)) &&
+         (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
         d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
 
     /* Success */
@@ -354,11 +396,11 @@ out:
                 for ( i = 0; i < (1 << order); i++ )
                     iommu_map_page(
                         d, gfn - offset + i, mfn_x(mfn) - offset + i,
-                        IOMMUF_readable|IOMMUF_writable);
+                        IOMMUF_readable | IOMMUF_writable);
             }
             else if ( !order )
                 iommu_map_page(
-                    d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable);
+                    d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable);
         }
         else
         {
@@ -398,8 +440,7 @@ static mfn_t ept_get_entry(struct domain
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
     retry:
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret == GUEST_TABLE_POD_PAGE )
@@ -486,8 +527,7 @@ static ept_entry_t ept_get_entry_content
 
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret || ret == GUEST_TABLE_POD_PAGE )
             goto out;
         else if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -559,7 +599,7 @@ static mfn_t ept_get_entry_current(unsig
     return ept_get_entry(current->domain, gfn, t, q);
 }
 
-/* 
+/*
  * To test if the new emt type is the same with old,
  * return 1 to not to reset ept entry.
  */
@@ -569,14 +609,14 @@ static int need_modify_ept_entry(struct 
 {
     uint8_t ipat;
     uint8_t emt;
-    int direct_mmio = (p2mt == p2m_mmio_direct);
+    bool_t direct_mmio = (p2mt == p2m_mmio_direct);
 
     emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
 
     if ( (emt == o_emt) && (ipat == o_ipat) )
         return 0;
 
-    return 1; 
+    return 1;
 }
 
 void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
@@ -710,8 +750,7 @@ static void ept_dump_p2m_table(unsigned 
 
             for ( i = ept_get_wl(d); i > 0; i-- )
             {
-                ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                                     i * EPT_TABLE_ORDER);
+                ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
                     break;
             }
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/setup.c      Tue Jul 06 11:54:40 2010 +0100
@@ -909,6 +909,9 @@ void __init __start_xen(unsigned long mb
 
     tboot_probe();
 
+    /* Check if x2APIC is already enabled in BIOS */
+    check_x2apic_preenabled();
+
     /* Unmap the first page of CPU0's stack. */
     memguard_guard_stack(cpu0_stack);
 
@@ -926,9 +929,6 @@ void __init __start_xen(unsigned long mb
     generic_apic_probe();
 
     acpi_boot_init();
-
-    if ( x2apic_is_available() )
-        enable_x2apic();
 
     init_cpu_to_node();
 
@@ -941,6 +941,9 @@ void __init __start_xen(unsigned long mb
 #endif
 
     init_apic_mappings();
+
+    if ( x2apic_is_available() )
+        enable_x2apic();
 
     init_IRQ();
 
diff -r ce278fdaced3 -r db6234d3eafb xen/common/memory.c
--- a/xen/common/memory.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/memory.c       Tue Jul 06 11:54:40 2010 +0100
@@ -545,6 +545,8 @@ long do_memory_op(unsigned long cmd, XEN
         }
 
         args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
+        if (reservation.mem_flags & XENMEMF_exact_node_request)
+            args.memflags |= MEMF_exact_node;
 
         if ( op == XENMEM_populate_physmap
              && (reservation.mem_flags & XENMEMF_populate_on_demand) )
diff -r ce278fdaced3 -r db6234d3eafb xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/page_alloc.c   Tue Jul 06 11:54:40 2010 +0100
@@ -300,11 +300,15 @@ static struct page_info *alloc_heap_page
     unsigned int i, j, zone = 0;
     unsigned int num_nodes = num_online_nodes();
     unsigned long request = 1UL << order;
+    bool_t exact_node_request = !!(memflags & MEMF_exact_node);
     cpumask_t extra_cpus_mask, mask;
     struct page_info *pg;
 
     if ( node == NUMA_NO_NODE )
+    {
         node = cpu_to_node(smp_processor_id());
+        exact_node_request = 0;
+    }
 
     ASSERT(node >= 0);
     ASSERT(zone_lo <= zone_hi);
@@ -345,6 +349,9 @@ static struct page_info *alloc_heap_page
                     goto found;
         } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
 
+        if ( exact_node_request )
+            goto not_found;
+
         /* Pick next node, wrapping around if needed. */
         node = next_node(node, node_online_map);
         if (node == MAX_NUMNODES)
@@ -360,6 +367,7 @@ static struct page_info *alloc_heap_page
         return pg;
     }
 
+ not_found:
     /* No suitable memory blocks. Fail the request. */
     spin_unlock(&heap_lock);
     return NULL;
diff -r ce278fdaced3 -r db6234d3eafb xen/common/trace.c
--- a/xen/common/trace.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/trace.c        Tue Jul 06 11:54:40 2010 +0100
@@ -50,16 +50,15 @@ static struct t_info *t_info;
 static struct t_info *t_info;
 #define T_INFO_PAGES 2  /* Size fixed at 2 pages for now. */
 #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
-/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
-#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) 
/ sizeof(uint32_t))
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
+static u32 t_info_first_offset __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
 
 /* Number of records lost due to per-CPU trace buffer being full. */
 static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -75,13 +74,37 @@ static cpumask_t tb_cpu_mask = CPU_MASK_
 /* which tracing events are enabled */
 static u32 tb_event_mask = TRC_ALL;
 
+/* Return the number of elements _type necessary to store at least _x bytes of 
data
+ * i.e., sizeof(_type) * ans >= _x. */
+#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type))
+
+static void calc_tinfo_first_offset(void)
+{
+    int offset_in_bytes;
+    
+    offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
+
+    t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+
+    gdprintk(XENLOG_INFO, "%s: NR_CPUs %d, offset_in_bytes %d, 
t_info_first_offset %u\n",
+           __func__, NR_CPUS, offset_in_bytes, (unsigned)t_info_first_offset);
+}
+
 /**
  * check_tbuf_size - check to make sure that the proposed size will fit
- * in the currently sized struct t_info.
- */
-static inline int check_tbuf_size(int size)
-{
-    return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / 
sizeof(uint32_t));
+ * in the currently sized struct t_info and allows prod and cons to
+ * reach double the value without overflow.
+ */
+static int check_tbuf_size(u32 pages)
+{
+    struct t_buf dummy;
+    typeof(dummy.prod) size;
+    
+    size = ((typeof(dummy.prod))pages)  * PAGE_SIZE;
+    
+    return (size / PAGE_SIZE != pages)
+           || (size + size < size)
+           || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE / 
sizeof(uint32_t));
 }
 
 /**
@@ -100,7 +123,7 @@ static int alloc_trace_bufs(void)
     unsigned long nr_pages;
     /* Start after a fixed-size array of NR_CPUS */
     uint32_t *t_info_mfn_list = (uint32_t *)t_info;
-    int offset = T_INFO_FIRST_OFFSET;
+    int offset = t_info_first_offset;
 
     BUG_ON(check_tbuf_size(opt_tbuf_size));
 
@@ -115,7 +138,7 @@ static int alloc_trace_bufs(void)
     }
 
     t_info->tbuf_size = opt_tbuf_size;
-    printk("tbuf_size %d\n", t_info->tbuf_size);
+    printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
 
     nr_pages = opt_tbuf_size;
     order = get_order_from_pages(nr_pages);
@@ -140,7 +163,7 @@ static int alloc_trace_bufs(void)
 
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
 
-        buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+        per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
         buf->cons = buf->prod = 0;
         per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
 
@@ -172,7 +195,7 @@ static int alloc_trace_bufs(void)
             /* Write list first, then write per-cpu offset. */
             wmb();
             t_info->mfn_offset[cpu]=offset;
-            printk("p%d mfn %"PRIx32" offset %d\n",
+            printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
                    cpu, mfn, offset);
             offset+=i;
         }
@@ -191,6 +214,7 @@ out_dealloc:
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
         if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
         {
+            per_cpu(t_bufs, cpu) = NULL;
             ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
             free_xenheap_pages(rawbuf, order);
         }
@@ -293,6 +317,10 @@ void __init init_trace_bufs(void)
 void __init init_trace_bufs(void)
 {
     int i;
+
+    /* Calculate offset in u32 of first mfn */
+    calc_tinfo_first_offset();
+
     /* t_info size fixed at 2 pages for now.  That should be big enough / 
small enough
      * until it's worth making it dynamic. */
     t_info = alloc_xenheap_pages(1, 0);
@@ -405,19 +433,39 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
     return rc;
 }
 
-static inline int calc_rec_size(int cycles, int extra) 
-{
-    int rec_size;
-    rec_size = 4;
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) 
+{
+    unsigned int rec_size = 4;
+
     if ( cycles )
         rec_size += 8;
     rec_size += extra;
     return rec_size;
 }
 
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
-{
-    int x = buf->prod - buf->cons;
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+    if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+         unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+    {
+        tb_init_done = 0;
+        printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+               smp_processor_id(), prod, cons);
+        return 1;
+    }
+    return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const struct t_buf *buf)
+{
+    u32 prod = buf->prod, cons = buf->cons;
+    s32 x;
+
+    barrier(); /* must read buf->prod and buf->cons only once */
+    if ( bogus(prod, cons) )
+        return data_size;
+
+    x = prod - cons;
     if ( x < 0 )
         x += 2*data_size;
 
@@ -427,9 +475,16 @@ static inline int calc_unconsumed_bytes(
     return x;
 }
 
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
-{
-    int x = data_size - buf->prod;
+static inline u32 calc_bytes_to_wrap(const struct t_buf *buf)
+{
+    u32 prod = buf->prod, cons = buf->cons;
+    s32 x;
+
+    barrier(); /* must read buf->prod and buf->cons only once */
+    if ( bogus(prod, cons) )
+        return 0;
+
+    x = data_size - prod;
     if ( x <= 0 )
         x += data_size;
 
@@ -439,54 +494,60 @@ static inline int calc_bytes_to_wrap(str
     return x;
 }
 
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const struct t_buf *buf)
 {
     return data_size - calc_unconsumed_bytes(buf);
 }
 
-static inline struct t_rec *
-next_record(struct t_buf *buf)
-{
-    int x = buf->prod;
+static inline struct t_rec *next_record(const struct t_buf *buf,
+                                        uint32_t *next)
+{
+    u32 x = buf->prod, cons = buf->cons;
+
+    barrier(); /* must read buf->prod and buf->cons only once */
+    *next = x;
+    if ( !tb_init_done || bogus(x, cons) )
+        return NULL;
+
     if ( x >= data_size )
         x -= data_size;
 
-    ASSERT(x >= 0);
     ASSERT(x < data_size);
 
     return (struct t_rec *)&this_cpu(t_data)[x];
 }
 
-static inline int __insert_record(struct t_buf *buf,
-                                  unsigned long event,
-                                  int extra,
-                                  int cycles,
-                                  int rec_size,
-                                  unsigned char *extra_data)
+static inline void __insert_record(struct t_buf *buf,
+                                   unsigned long event,
+                                   unsigned int extra,
+                                   bool_t cycles,
+                                   unsigned int rec_size,
+                                   const void *extra_data)
 {
     struct t_rec *rec;
     unsigned char *dst;
-    unsigned long extra_word = extra/sizeof(u32);
-    int local_rec_size = calc_rec_size(cycles, extra);
+    unsigned int extra_word = extra / sizeof(u32);
+    unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
 
     BUG_ON(local_rec_size != rec_size);
     BUG_ON(extra & 3);
 
+    rec = next_record(buf, &next);
+    if ( !rec )
+        return;
     /* Double-check once more that we have enough space.
      * Don't bugcheck here, in case the userland tool is doing
      * something stupid. */
-    if ( calc_bytes_avail(buf) < rec_size )
-    {
-        printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
-               __func__,
-               calc_bytes_avail(buf),
-               data_size, buf->prod, buf->cons, data_size, rec_size);
-        return 0;
-    }
-    rmb();
-
-    rec = next_record(buf);
+    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
+    {
+        if ( printk_ratelimit() )
+            printk(XENLOG_WARNING
+                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
+                   __func__, data_size, next, buf->cons, rec_size);
+        return;
+    }
+
     rec->event = event;
     rec->extra_u32 = extra_word;
     dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -503,21 +564,19 @@ static inline int __insert_record(struct
 
     wmb();
 
-    next = buf->prod + rec_size;
+    next += rec_size;
     if ( next >= 2*data_size )
         next -= 2*data_size;
-    ASSERT(next >= 0);
     ASSERT(next < 2*data_size);
     buf->prod = next;
-
-    return rec_size;
-}
-
-static inline int insert_wrap_record(struct t_buf *buf, int size)
-{
-    int space_left = calc_bytes_to_wrap(buf);
-    unsigned long extra_space = space_left - sizeof(u32);
-    int cycles = 0;
+}
+
+static inline void insert_wrap_record(struct t_buf *buf,
+                                      unsigned int size)
+{
+    u32 space_left = calc_bytes_to_wrap(buf);
+    unsigned int extra_space = space_left - sizeof(u32);
+    bool_t cycles = 0;
 
     BUG_ON(space_left > size);
 
@@ -529,17 +588,13 @@ static inline int insert_wrap_record(str
         ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
     }
 
-    return __insert_record(buf,
-                    TRC_TRACE_WRAP_BUFFER,
-                    extra_space,
-                    cycles,
-                    space_left,
-                    NULL);
+    __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+                    space_left, NULL);
 }
 
 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
 
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(struct t_buf *buf)
 {
     struct {
         u32 lost_records;
@@ -554,12 +609,8 @@ static inline int insert_lost_records(st
 
     this_cpu(lost_records) = 0;
 
-    return __insert_record(buf,
-                           TRC_LOST_RECORDS,
-                           sizeof(ed),
-                           1 /* cycles */,
-                           LOST_REC_SIZE,
-                           (unsigned char *)&ed);
+    __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+                    LOST_REC_SIZE, &ed);
 }
 
 /*
@@ -581,13 +632,15 @@ static DECLARE_TASKLET(trace_notify_dom0
  * failure, otherwise 0.  Failure occurs only if the trace buffers are not yet
  * initialised.
  */
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+                 const void *extra_data)
 {
     struct t_buf *buf;
-    unsigned long flags, bytes_to_tail, bytes_to_wrap;
-    int rec_size, total_size;
-    int extra_word;
-    int started_below_highwater = 0;
+    unsigned long flags;
+    u32 bytes_to_tail, bytes_to_wrap;
+    unsigned int rec_size, total_size;
+    unsigned int extra_word;
+    bool_t started_below_highwater;
 
     if( !tb_init_done )
         return;
@@ -626,7 +679,11 @@ void __trace_var(u32 event, int cycles, 
     buf = this_cpu(t_bufs);
 
     if ( unlikely(!buf) )
+    {
+        /* Make gcc happy */
+        started_below_highwater = 0;
         goto unlock;
+    }
 
     started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
 
@@ -707,8 +764,9 @@ unlock:
     spin_unlock_irqrestore(&this_cpu(t_lock), flags);
 
     /* Notify trace buffer consumer that we've crossed the high water mark. */
-    if ( started_below_highwater &&
-         (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
+    if ( likely(buf!=NULL)
+         && started_below_highwater
+         && (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
         tasklet_schedule(&trace_notify_dom0_tasklet);
 }
 
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/dmar.c        Tue Jul 06 11:54:40 2010 +0100
@@ -32,6 +32,7 @@
 #include "dmar.h"
 #include "iommu.h"
 #include "extern.h"
+#include "vtd.h"
 
 #undef PREFIX
 #define PREFIX VTDPREFIX "ACPI DMAR:"
@@ -378,7 +379,6 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
     struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header;
     void *dev_scope_start, *dev_scope_end;
     struct acpi_drhd_unit *dmaru;
-    void *addr;
     int ret;
     static int include_all = 0;
 
@@ -397,8 +397,9 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
         dprintk(VTDPREFIX, "  dmaru->address = %"PRIx64"\n",
                 dmaru->address);
 
-    addr = map_to_nocache_virt(0, drhd->address);
-    dmaru->ecap = dmar_readq(addr, DMAR_ECAP_REG);
+    ret = iommu_alloc(dmaru);
+    if ( ret )
+        goto out;
 
     dev_scope_start = (void *)(drhd + 1);
     dev_scope_end = ((void *)drhd) + header->length;
@@ -420,7 +421,7 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
     }
 
     if ( ret )
-        xfree(dmaru);
+        goto out;
     else if ( force_iommu || dmaru->include_all )
         acpi_register_drhd_unit(dmaru);
     else
@@ -451,14 +452,15 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
 
         if ( invalid_cnt )
         {
-            xfree(dmaru);
-
             if ( iommu_workaround_bios_bug &&
                  invalid_cnt == dmaru->scope.devices_cnt )
             {
                 dprintk(XENLOG_WARNING VTDPREFIX,
                     "  Workaround BIOS bug: ignore the DRHD due to all "
                     "devices under its scope are not PCI discoverable!\n");
+
+                iommu_free(dmaru);
+                xfree(dmaru);
             }
             else
             {
@@ -474,6 +476,12 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
             acpi_register_drhd_unit(dmaru);
     }
 
+out:
+    if ( ret )
+    {
+        iommu_free(dmaru);
+        xfree(dmaru);
+    }
     return ret;
 }
 
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.h
--- a/xen/drivers/passthrough/vtd/dmar.h        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/dmar.h        Tue Jul 06 11:54:40 2010 +0100
@@ -50,7 +50,6 @@ struct acpi_drhd_unit {
     struct dmar_scope scope;            /* must be first member of struct */
     struct list_head list;
     u64    address;                     /* register base address of the unit */
-    u64    ecap;
     u8     include_all:1;
     struct iommu *iommu;
     struct list_head ioapic_list;
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/extern.h
--- a/xen/drivers/passthrough/vtd/extern.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/extern.h      Tue Jul 06 11:54:40 2010 +0100
@@ -33,7 +33,7 @@ extern struct keyhandler dump_iommu_info
 
 int enable_qinval(struct iommu *iommu);
 void disable_qinval(struct iommu *iommu);
-int enable_intremap(struct iommu *iommu);
+int enable_intremap(struct iommu *iommu, int eim);
 void disable_intremap(struct iommu *iommu);
 int queue_invalidate_context(struct iommu *iommu,
     u16 did, u16 source_id, u8 function_mask, u8 granu);
@@ -44,6 +44,7 @@ int invalidate_sync(struct iommu *iommu)
 int invalidate_sync(struct iommu *iommu);
 int iommu_flush_iec_global(struct iommu *iommu);
 int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
+void clear_fault_bits(struct iommu *iommu);
 struct iommu * ioapic_to_iommu(unsigned int apic_id);
 struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id);
 struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu);
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c    Tue Jul 06 11:54:40 2010 +0100
@@ -134,18 +134,26 @@ int iommu_supports_eim(void)
     if ( !iommu_enabled || !iommu_qinval || !iommu_intremap )
         return 0;
 
+    if ( list_empty(&acpi_drhd_units) )
+    {
+        dprintk(XENLOG_WARNING VTDPREFIX, "VT-d is not supported\n");
+        return 0;
+    }
+
     /* We MUST have a DRHD unit for each IOAPIC. */
     for ( apic = 0; apic < nr_ioapics; apic++ )
         if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
+    {
+            dprintk(XENLOG_WARNING VTDPREFIX,
+                    "There is not a DRHD for IOAPIC 0x%x (id: 0x%x)!\n",
+                    apic, IO_APIC_ID(apic));
             return 0;
-
-    if ( list_empty(&acpi_drhd_units) )
-        return 0;
+    }
 
     for_each_drhd_unit ( drhd )
-        if ( !ecap_queued_inval(drhd->ecap) ||
-             !ecap_intr_remap(drhd->ecap) ||
-             !ecap_eim(drhd->ecap) )
+        if ( !ecap_queued_inval(drhd->iommu->ecap) ||
+             !ecap_intr_remap(drhd->iommu->ecap) ||
+             !ecap_eim(drhd->iommu->ecap) )
             return 0;
 
     return 1;
@@ -706,7 +714,7 @@ void msi_msg_write_remap_rte(
 }
 #endif
 
-int enable_intremap(struct iommu *iommu)
+int enable_intremap(struct iommu *iommu, int eim)
 {
     struct acpi_drhd_unit *drhd;
     struct ir_ctrl *ir_ctrl;
@@ -716,10 +724,25 @@ int enable_intremap(struct iommu *iommu)
     ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
 
     ir_ctrl = iommu_ir_ctrl(iommu);
+    sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+
+    /* Return if already enabled by Xen */
+    if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr )
+        return 0;
+
+    sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( !(sts & DMA_GSTS_QIES) )
+    {
+        dprintk(XENLOG_ERR VTDPREFIX,
+                "Queued invalidation is not enabled, should not enable "
+                "interrupt remapping\n");
+        return -EINVAL;
+    }
+
     if ( ir_ctrl->iremap_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR 
);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR);
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
@@ -732,7 +755,7 @@ int enable_intremap(struct iommu *iommu)
 #ifdef CONFIG_X86
     /* set extended interrupt mode bit */
     ir_ctrl->iremap_maddr |=
-            x2apic_enabled ? (1 << IRTA_REG_EIME_SHIFT) : 0;
+            eim ? (1 << IRTA_REG_EIME_SHIFT) : 0;
 #endif
     spin_lock_irqsave(&iommu->register_lock, flags);
 
@@ -769,13 +792,95 @@ void disable_intremap(struct iommu *iomm
     u32 sts;
     unsigned long flags;
 
-    ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+    if ( !ecap_intr_remap(iommu->ecap) )
+        return;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( !(sts & DMA_GSTS_IRES) )
+        goto out;
+
     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE));
 
     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
                   !(sts & DMA_GSTS_IRES), sts);
+out:
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
+
+/*
+ * This function is used to enable Interrutp remapping when
+ * enable x2apic
+ */
+int iommu_enable_IR(void)
+{
+    struct acpi_drhd_unit *drhd;
+    struct iommu *iommu;
+
+    if ( !iommu_supports_eim() )
+        return -1;
+
+    for_each_drhd_unit ( drhd )
+    {
+        struct qi_ctrl *qi_ctrl = NULL;
+
+        iommu = drhd->iommu;
+        qi_ctrl = iommu_qi_ctrl(iommu);
+
+        /* Clear previous faults */
+        clear_fault_bits(iommu);
+
+        /*
+         * Disable interrupt remapping and queued invalidation if
+         * already enabled by BIOS
+         */
+        disable_intremap(iommu);
+        disable_qinval(iommu);
+    }
+
+    /* Enable queue invalidation */
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        if ( enable_qinval(iommu) != 0 )
+        {
+            dprintk(XENLOG_INFO VTDPREFIX,
+                    "Failed to enable Queued Invalidation!\n");
+            return -1;
+        }
+    }
+
+    /* Enable interrupt remapping */
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        if ( enable_intremap(iommu, 1) )
+        {
+            dprintk(XENLOG_INFO VTDPREFIX,
+                    "Failed to enable Interrupt Remapping!\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Check if interrupt remapping is enabled or not
+ * return 1: enabled
+ * return 0: not enabled
+ */
+int intremap_enabled(void)
+{
+    struct acpi_drhd_unit *drhd;
+    u32 sts;
+
+    for_each_drhd_unit ( drhd )
+    {
+        sts = dmar_readl(drhd->iommu->reg, DMAR_GSTS_REG);
+        if ( !(sts & DMA_GSTS_IRES) )
+            return 0;
+    }
+
+    return 1;
+}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Tue Jul 06 11:54:40 2010 +0100
@@ -144,14 +144,17 @@ struct iommu_flush *iommu_get_flush(stru
     return iommu ? &iommu->intel->flush : NULL;
 }
 
-static unsigned int clflush_size;
 static int iommus_incoherent;
 static void __iommu_flush_cache(void *addr, unsigned int size)
 {
     int i;
+    static unsigned int clflush_size = 0;
 
     if ( !iommus_incoherent )
         return;
+
+    if ( clflush_size == 0 )
+        clflush_size = get_cache_line_size();
 
     for ( i = 0; i < size; i += clflush_size )
         cacheline_flush((char *)addr + i);
@@ -1037,7 +1040,7 @@ static int iommu_set_interrupt(struct io
     return irq;
 }
 
-static int __init iommu_alloc(struct acpi_drhd_unit *drhd)
+int __init iommu_alloc(struct acpi_drhd_unit *drhd)
 {
     struct iommu *iommu;
     unsigned long sagaw, nr_dom;
@@ -1131,7 +1134,7 @@ static int __init iommu_alloc(struct acp
     return 0;
 }
 
-static void __init iommu_free(struct acpi_drhd_unit *drhd)
+void __init iommu_free(struct acpi_drhd_unit *drhd)
 {
     struct iommu *iommu = drhd->iommu;
 
@@ -1787,7 +1790,7 @@ static void setup_dom0_devices(struct do
     spin_unlock(&pcidevs_lock);
 }
 
-static void clear_fault_bits(struct iommu *iommu)
+void clear_fault_bits(struct iommu *iommu)
 {
     u64 val;
     unsigned long flags;
@@ -1831,24 +1834,20 @@ static int init_vtd_hw(void)
         spin_lock_irqsave(&iommu->register_lock, flags);
         dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
         spin_unlock_irqrestore(&iommu->register_lock, flags);
-
-        /* initialize flush functions */
-        flush = iommu_get_flush(iommu);
-        flush->context = flush_context_reg;
-        flush->iotlb = flush_iotlb_reg;
-    }
-
-    if ( iommu_qinval )
-    {
-        for_each_drhd_unit ( drhd )
-        {
-            iommu = drhd->iommu;
-            if ( enable_qinval(iommu) != 0 )
-            {
-                dprintk(XENLOG_INFO VTDPREFIX,
-                        "Failed to enable Queued Invalidation!\n");
-                break;
-            }
+    }
+
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        /*
+         * If queued invalidation not enabled, use regiser based
+         * invalidation
+         */
+        if ( enable_qinval(iommu) != 0 )
+        {
+            flush = iommu_get_flush(iommu);
+            flush->context = flush_context_reg;
+            flush->iotlb = flush_iotlb_reg;
         }
     }
 
@@ -1874,9 +1873,9 @@ static int init_vtd_hw(void)
         for_each_drhd_unit ( drhd )
         {
             iommu = drhd->iommu;
-            if ( enable_intremap(iommu) != 0 )
+            if ( enable_intremap(iommu, 0) != 0 )
             {
-                dprintk(XENLOG_INFO VTDPREFIX,
+                dprintk(XENLOG_WARNING VTDPREFIX,
                         "Failed to enable Interrupt Remapping!\n");
                 break;
             }
@@ -1943,8 +1942,6 @@ int __init intel_vtd_setup(void)
 
     platform_quirks();
 
-    clflush_size = get_cache_line_size();
-
     irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
     BUG_ON(!irq_to_iommu);
     memset(irq_to_iommu, 0, nr_irqs * sizeof(struct iommu*));
@@ -1958,9 +1955,6 @@ int __init intel_vtd_setup(void)
      */
     for_each_drhd_unit ( drhd )
     {
-        if ( iommu_alloc(drhd) != 0 )
-            goto error;
-
         iommu = drhd->iommu;
 
         if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
@@ -2000,8 +1994,6 @@ int __init intel_vtd_setup(void)
     return 0;
 
  error:
-    for_each_drhd_unit ( drhd )
-        iommu_free(drhd);
     iommu_enabled = 0;
     iommu_snoop = 0;
     iommu_passthrough = 0;
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c      Tue Jul 06 11:54:40 2010 +0100
@@ -437,10 +437,16 @@ int enable_qinval(struct iommu *iommu)
     u32 sts;
     unsigned long flags;
 
+    if ( !ecap_queued_inval(iommu->ecap) || !iommu_qinval )
+        return -ENOENT;
+
     qi_ctrl = iommu_qi_ctrl(iommu);
     flush = iommu_get_flush(iommu);
 
-    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+    /* Return if already enabled by Xen */
+    sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( (sts & DMA_GSTS_QIES) && qi_ctrl->qinval_maddr )
+        return 0;
 
     if ( qi_ctrl->qinval_maddr == 0 )
     {
@@ -488,14 +494,19 @@ void disable_qinval(struct iommu *iommu)
     u32 sts;
     unsigned long flags;
 
-    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+    if ( !ecap_queued_inval(iommu->ecap) )
+        return;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( !(sts & DMA_GSTS_QIES) )
+        goto out;
+
     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_QIE));
 
     /* Make sure hardware complete it */
     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
                   !(sts & DMA_GSTS_QIES), sts);
-    spin_unlock_irqrestore(&iommu->register_lock, flags);
-}
+out:
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/vtd.h Tue Jul 06 11:54:40 2010 +0100
@@ -108,4 +108,7 @@ void iommu_flush_cache_entry(void *addr,
 void iommu_flush_cache_entry(void *addr, unsigned int size);
 void iommu_flush_cache_page(void *addr, unsigned long npages);
 
+int iommu_alloc(struct acpi_drhd_unit *drhd);
+void iommu_free(struct acpi_drhd_unit *drhd);
+
 #endif // _VTD_H_
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/amd.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/amd.h Tue Jul 06 11:54:40 2010 +0100
@@ -0,0 +1,138 @@
+/*
+ * amd.h - AMD processor specific definitions
+ */
+
+#ifndef __AMD_H__
+#define __AMD_H__
+
+#include <asm/cpufeature.h>
+
+/* CPUID masked for use by AMD-V Extended Migration */
+
+#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32)
+#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_))
+
+/* Family 0Fh, Revision C */
+#define AMD_FEATURES_K8_REV_C_ECX  0
+#define AMD_FEATURES_K8_REV_C_EDX (                                    \
+       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
+       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
+       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
+       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
+       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
+       __bit(X86_FEATURE_SEP)      | __bit(X86_FEATURE_MTRR)  |        \
+       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
+       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
+       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_CLFLSH)|        \
+       __bit(X86_FEATURE_MMX)      | __bit(X86_FEATURE_FXSR)  |        \
+       __bit(X86_FEATURE_XMM)      | __bit(X86_FEATURE_XMM2))
+#define AMD_EXTFEATURES_K8_REV_C_ECX  0 
+#define AMD_EXTFEATURES_K8_REV_C_EDX  (                                        
\
+       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
+       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
+       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
+       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
+       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
+       __bit(X86_FEATURE_SYSCALL)  | __bit(X86_FEATURE_MTRR)  |        \
+       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
+       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
+       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_NX)    |        \
+       __bit(X86_FEATURE_MMXEXT)   | __bit(X86_FEATURE_MMX)   |        \
+       __bit(X86_FEATURE_FXSR)     | __bit(X86_FEATURE_LM)    |        \
+       __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW))
+
+/* Family 0Fh, Revision D */
+#define AMD_FEATURES_K8_REV_D_ECX         AMD_FEATURES_K8_REV_C_ECX
+#define AMD_FEATURES_K8_REV_D_EDX         AMD_FEATURES_K8_REV_C_EDX
+#define AMD_EXTFEATURES_K8_REV_D_ECX     (AMD_EXTFEATURES_K8_REV_C_ECX |\
+       __bit(X86_FEATURE_LAHF_LM))
+#define AMD_EXTFEATURES_K8_REV_D_EDX     (AMD_EXTFEATURES_K8_REV_C_EDX |\
+       __bit(X86_FEATURE_FFXSR))
+
+/* Family 0Fh, Revision E */
+#define AMD_FEATURES_K8_REV_E_ECX        (AMD_FEATURES_K8_REV_D_ECX |  \
+       __bit(X86_FEATURE_XMM3))
+#define AMD_FEATURES_K8_REV_E_EDX        (AMD_FEATURES_K8_REV_D_EDX |  \
+       __bit(X86_FEATURE_HT))
+#define AMD_EXTFEATURES_K8_REV_E_ECX     (AMD_EXTFEATURES_K8_REV_D_ECX |\
+       __bit(X86_FEATURE_CMP_LEGACY)) 
+#define AMD_EXTFEATURES_K8_REV_E_EDX      AMD_EXTFEATURES_K8_REV_D_EDX
+
+/* Family 0Fh, Revision F */
+#define AMD_FEATURES_K8_REV_F_ECX        (AMD_FEATURES_K8_REV_E_ECX |  \
+       __bit(X86_FEATURE_CX16))
+#define AMD_FEATURES_K8_REV_F_EDX         AMD_FEATURES_K8_REV_E_EDX
+#define AMD_EXTFEATURES_K8_REV_F_ECX     (AMD_EXTFEATURES_K8_REV_E_ECX |\
+       __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) |     \
+       __bit(X86_FEATURE_ALTMOVCR))
+#define AMD_EXTFEATURES_K8_REV_F_EDX     (AMD_EXTFEATURES_K8_REV_E_EDX |\
+       __bit(X86_FEATURE_RDTSCP))
+
+/* Family 0Fh, Revision G */
+#define AMD_FEATURES_K8_REV_G_ECX         AMD_FEATURES_K8_REV_F_ECX
+#define AMD_FEATURES_K8_REV_G_EDX         AMD_FEATURES_K8_REV_F_EDX
+#define AMD_EXTFEATURES_K8_REV_G_ECX     (AMD_EXTFEATURES_K8_REV_F_ECX |\
+       __bit(X86_FEATURE_3DNOWPF))
+#define AMD_EXTFEATURES_K8_REV_G_EDX      AMD_EXTFEATURES_K8_REV_F_EDX
+
+/* Family 10h, Revision B */
+#define AMD_FEATURES_FAM10h_REV_B_ECX    (AMD_FEATURES_K8_REV_F_ECX |  \
+       __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT))
+#define AMD_FEATURES_FAM10h_REV_B_EDX     AMD_FEATURES_K8_REV_F_EDX
+#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
+       __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) |             \
+       __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) |      \
+       __bit(X86_FEATURE_IBS))
+#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\
+       __bit(X86_FEATURE_PAGE1GB))
+
+/* Family 10h, Revision C */
+#define AMD_FEATURES_FAM10h_REV_C_ECX     AMD_FEATURES_FAM10h_REV_B_ECX
+#define AMD_FEATURES_FAM10h_REV_C_EDX     AMD_FEATURES_FAM10h_REV_B_EDX
+#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\
+       __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT))
+#define AMD_EXTFEATURES_FAM10h_REV_C_EDX  AMD_EXTFEATURES_FAM10h_REV_B_EDX
+
+/* Family 11h, Revision B */
+#define AMD_FEATURES_FAM11h_REV_B_ECX     AMD_FEATURES_K8_REV_G_ECX
+#define AMD_FEATURES_FAM11h_REV_B_EDX     AMD_FEATURES_K8_REV_G_EDX
+#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\
+       __bit(X86_FEATURE_SKINIT))
+#define AMD_EXTFEATURES_FAM11h_REV_B_EDX  AMD_EXTFEATURES_K8_REV_G_EDX
+
+/* AMD errata checking
+ *
+ * Errata are defined using the AMD_LEGACY_ERRATUM() or AMD_OSVW_ERRATUM()
+ * macros. The latter is intended for newer errata that have an OSVW id
+ * assigned, which it takes as first argument. Both take a variable number
+ * of family-specific model-stepping ranges created by AMD_MODEL_RANGE().
+ *
+ * Example 1:
+ * #define AMD_ERRATUM_319                                              \
+ *   AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),      \
+ *                      AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),      \
+ *                      AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0))
+ * Example 2:
+ * #define AMD_ERRATUM_400                                              \
+ *   AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),    \
+ *                       AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf))
+ *   
+ */
+
+#define AMD_LEGACY_ERRATUM(...)         0 /* legacy */, __VA_ARGS__, 0
+#define AMD_OSVW_ERRATUM(osvw_id, ...)  1 /* osvw */, osvw_id, __VA_ARGS__, 0
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end)              \
+    ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range)   (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range)    (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range)      ((range) & 0xfff)
+
+#define AMD_ERRATUM_170                                                 \
+    AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x67, 0xf))
+
+#define AMD_ERRATUM_383                                                 \
+    AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf),    \
+                       AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0x1, 0x0))
+
+int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
+#endif /* __AMD_H__ */
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/apic.h
--- a/xen/include/asm-x86/apic.h        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/apic.h        Tue Jul 06 11:54:40 2010 +0100
@@ -25,6 +25,8 @@ extern int x2apic_enabled;
 extern int x2apic_enabled;
 extern int directed_eoi_enabled;
 
+extern void check_x2apic_preenabled(void);
+extern int x2apic_cmdline_disable(void);
 extern void enable_x2apic(void);
 
 static __inline int x2apic_is_available(void)
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/debugger.h
--- a/xen/include/asm-x86/debugger.h    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/debugger.h    Tue Jul 06 11:54:40 2010 +0100
@@ -68,10 +68,8 @@ static inline int debugger_trap_entry(
     if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached &&
          ((vector == TRAP_int3) || (vector == TRAP_debug)) )
     {
-#ifdef XEN_GDBSX_CONFIG
         if ( vector != TRAP_debug ) /* domain pause is good enough */
             current->arch.gdbsx_vcpu_event = vector;
-#endif
         domain_pause_for_debugger();
         return 1;
     }
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/domain.h      Tue Jul 06 11:54:40 2010 +0100
@@ -415,9 +415,7 @@ struct arch_vcpu
     struct mapcache_vcpu mapcache;
 #endif
 
-#if XEN_GDBSX_CONFIG
     uint32_t gdbsx_vcpu_event;
-#endif 
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/genapic.h
--- a/xen/include/asm-x86/genapic.h     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/genapic.h     Tue Jul 06 11:54:40 2010 +0100
@@ -70,6 +70,7 @@ cpumask_t vector_allocation_domain_flat(
        .send_IPI_mask = send_IPI_mask_flat, \
        .send_IPI_self = send_IPI_self_flat
 
+const struct genapic *apic_x2apic_probe(void);
 void init_apic_ldr_x2apic_phys(void);
 void init_apic_ldr_x2apic_cluster(void);
 void clustered_apic_check_x2apic(void);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/io_apic.h     Tue Jul 06 11:54:40 2010 +0100
@@ -199,6 +199,12 @@ extern void ioapic_suspend(void);
 extern void ioapic_suspend(void);
 extern void ioapic_resume(void);
 
+extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
+extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
+extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+
 #else  /* !CONFIG_X86_IO_APIC */
 static inline void init_ioapic_mappings(void) {}
 static inline void ioapic_suspend(void) {}
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/irq.h Tue Jul 06 11:54:40 2010 +0100
@@ -91,6 +91,8 @@ void disable_8259A_irq(unsigned int irq)
 void disable_8259A_irq(unsigned int irq);
 void enable_8259A_irq(unsigned int irq);
 int i8259A_irq_pending(unsigned int irq);
+void mask_8259A(void);
+void unmask_8259A(void);
 void init_8259A(int aeoi);
 int i8259A_suspend(void);
 int i8259A_resume(void);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/msr-index.h   Tue Jul 06 11:54:40 2010 +0100
@@ -251,6 +251,10 @@
 /* AMD Microcode MSRs */
 #define MSR_AMD_PATCHLEVEL             0x0000008b
 #define MSR_AMD_PATCHLOADER            0xc0010020
+
+/* AMD OS Visible Workaround MSRs */
+#define MSR_AMD_OSVW_ID_LENGTH          0xc0010140
+#define MSR_AMD_OSVW_STATUS             0xc0010141
 
 /* K6 MSRs */
 #define MSR_K6_EFER                    0xc0000080
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/mtrr.h        Tue Jul 06 11:54:40 2010 +0100
@@ -65,7 +65,7 @@ extern u32 get_pat_flags(struct vcpu *v,
 extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
                   paddr_t spaddr, uint8_t gmtrr_mtype);
 extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn,
-                                  mfn_t mfn, uint8_t *ipat, int direct_mmio);
+                                  mfn_t mfn, uint8_t *ipat, bool_t 
direct_mmio);
 extern void ept_change_entry_emt_with_range(
     struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
 extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/public/io/ring.h      Tue Jul 06 11:54:40 2010 +0100
@@ -103,8 +103,16 @@ struct __name##_sring {                 
 struct __name##_sring {                                                 \
     RING_IDX req_prod, req_event;                                       \
     RING_IDX rsp_prod, rsp_event;                                       \
-    uint8_t  netfront_smartpoll_active;                                 \
-    uint8_t  pad[47];                                                   \
+    union {                                                             \
+        struct {                                                        \
+            uint8_t smartpoll_active;                                   \
+        } netif;                                                        \
+        struct {                                                        \
+            uint8_t msg;                                                \
+        } tapif_user;                                                   \
+        uint8_t pvt_pad[4];                                             \
+    } private;                                                          \
+    uint8_t __pad[44];                                                  \
     union __name##_sring_entry ring[1]; /* variable-length */           \
 };                                                                      \
                                                                         \
@@ -148,7 +156,8 @@ typedef struct __name##_back_ring __name
 #define SHARED_RING_INIT(_s) do {                                       \
     (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
     (_s)->req_event = (_s)->rsp_event = 1;                              \
-    (void)memset((_s)->pad, 0, sizeof((_s)->pad));                      \
+    (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \
+    (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \
 } while(0)
 
 #define FRONT_RING_INIT(_r, _s, __size) do {                            \
diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/memory.h
--- a/xen/include/public/memory.h       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/public/memory.h       Tue Jul 06 11:54:40 2010 +0100
@@ -52,6 +52,9 @@
 #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
 /* Flag to populate physmap with populate-on-demand entries */
 #define XENMEMF_populate_on_demand (1<<16)
+/* Flag to request allocation only from the node specified */
+#define XENMEMF_exact_node_request  (1<<17)
+#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
 #endif
 
 struct xen_memory_reservation {
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/iommu.h   Tue Jul 06 11:54:40 2010 +0100
@@ -58,6 +58,8 @@ struct iommu {
 
 int iommu_setup(void);
 int iommu_supports_eim(void);
+int iommu_enable_IR(void);
+int intremap_enabled(void);
 
 int iommu_add_device(struct pci_dev *pdev);
 int iommu_remove_device(struct pci_dev *pdev);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/mm.h      Tue Jul 06 11:54:40 2010 +0100
@@ -82,6 +82,8 @@ int assign_pages(
 #define  MEMF_tmem        (1U<<_MEMF_tmem)
 #define _MEMF_no_dma      3
 #define  MEMF_no_dma      (1U<<_MEMF_no_dma)
+#define _MEMF_exact_node  4
+#define  MEMF_exact_node  (1U<<_MEMF_exact_node)
 #define _MEMF_node        8
 #define  MEMF_node(n)     ((((n)+1)&0xff)<<_MEMF_node)
 #define _MEMF_bits        24
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/trace.h
--- a/xen/include/xen/trace.h   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/trace.h   Tue Jul 06 11:54:40 2010 +0100
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
 
 int trace_will_trace_event(u32 event);
 
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
 
 static inline void trace_var(u32 event, int cycles, int extra,
                                unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event, 
         {                                                       \
             u32 _d[1];                                          \
             _d[0] = d1;                                         \
-            __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event, 
             u32 _d[2];                                          \
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event, 
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event, 
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event, 
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
 
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event, 
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
             _d[5] = d6;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.