[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] vtd: Enable queued invalidation method if such HW support is



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1200995331 0
# Node ID cc5bb500df5feda0755b865134c47f3fe9cec46d
# Parent  80e177b12fd202a4a1df67bdd5cb0f42d515cc05
vtd: Enable queued invalidation method if such HW support is
detected.  Otherwise, register invalidation method is used.

Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
---
 xen/arch/x86/hvm/vmx/vtd/Makefile      |    1 
 xen/arch/x86/hvm/vmx/vtd/extern.h      |   55 +++
 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c |  165 ++++++++---
 xen/arch/x86/hvm/vmx/vtd/qinval.c      |  456 +++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vtd/vtd.h         |   54 +++
 xen/include/asm-x86/iommu.h            |    4 
 6 files changed, 686 insertions(+), 49 deletions(-)

diff -r 80e177b12fd2 -r cc5bb500df5f xen/arch/x86/hvm/vmx/vtd/Makefile
--- a/xen/arch/x86/hvm/vmx/vtd/Makefile Tue Jan 22 09:46:33 2008 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/Makefile Tue Jan 22 09:48:51 2008 +0000
@@ -2,3 +2,4 @@ obj-y += dmar.o
 obj-y += dmar.o
 obj-y += utils.o
 obj-y += io.o
+obj-y += qinval.o
diff -r 80e177b12fd2 -r cc5bb500df5f xen/arch/x86/hvm/vmx/vtd/extern.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/extern.h Tue Jan 22 09:48:51 2008 +0000
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Weidong Han <weidong.han@xxxxxxxxx>
+ */
+
+#ifndef _VTD_EXTERN_H_
+#define _VTD_EXTERN_H_
+
+#include "dmar.h"
+
+extern int iommu_setup_done;
+extern int vtd2_thurley_enabled;
+extern int vtd2_qinval_enabled;
+
+extern spinlock_t ioapic_lock;
+extern struct qi_ctrl *qi_ctrl;
+extern struct ir_ctrl *ir_ctrl;
+
+void print_iommu_regs(struct acpi_drhd_unit *drhd);
+void print_vtd_entries(struct domain *d, struct iommu *iommu,
+                       int bus, int devfn, unsigned long gmfn);
+
+int qinval_setup(struct iommu *iommu);
+int queue_invalidate_context(struct iommu *iommu,
+    u16 did, u16 source_id, u8 function_mask, u8 granu);
+int queue_invalidate_iotlb(struct iommu *iommu,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr);
+int queue_invalidate_iec(struct iommu *iommu,
+    u8 granu, u8 im, u16 iidx);
+int invalidate_sync(struct iommu *iommu);
+int iommu_flush_iec_global(struct iommu *iommu);
+int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
+void gsi_remapping(unsigned int gsi);
+void print_iommu_regs(struct acpi_drhd_unit *drhd);
+int vtd_hw_check(void);
+struct iommu * ioapic_to_iommu(unsigned int apic_id);
+struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id);
+void clear_fault_bits(struct iommu *iommu);
+
+#endif // _VTD_EXTERN_H_
diff -r 80e177b12fd2 -r cc5bb500df5f xen/arch/x86/hvm/vmx/vtd/intel-iommu.c
--- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Jan 22 09:46:33 2008 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Jan 22 09:48:51 2008 +0000
@@ -34,12 +34,9 @@
 #include "pci-direct.h"
 #include "pci_regs.h"
 #include "msi.h"
+#include "extern.h"
 
 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
-
-extern void print_iommu_regs(struct acpi_drhd_unit *drhd);
-extern void print_vtd_entries(struct domain *d, int bus, int devfn,
-                              unsigned long gmfn);
 
 static spinlock_t domid_bitmap_lock;    /* protect domain id bitmap */
 static int domid_bitmap_size;           /* domain id bitmap size in bit */
@@ -304,11 +301,12 @@ static void iommu_flush_write_buffer(str
 }
 
 /* return value determine if we need a write buffer flush */
-static int __iommu_flush_context(
-    struct iommu *iommu,
+static int flush_context_reg(
+    void *_iommu,
     u16 did, u16 source_id, u8 function_mask, u64 type,
     int non_present_entry_flush)
 {
+    struct iommu *iommu = (struct iommu *) _iommu;
     u64 val = 0;
     unsigned long flag;
     unsigned long start_time;
@@ -367,14 +365,16 @@ static int inline iommu_flush_context_gl
 static int inline iommu_flush_context_global(
     struct iommu *iommu, int non_present_entry_flush)
 {
-    return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
                                  non_present_entry_flush);
 }
 
 static int inline iommu_flush_context_domain(
     struct iommu *iommu, u16 did, int non_present_entry_flush)
 {
-    return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
                                  non_present_entry_flush);
 }
 
@@ -382,16 +382,18 @@ static int inline iommu_flush_context_de
     struct iommu *iommu, u16 did, u16 source_id,
     u8 function_mask, int non_present_entry_flush)
 {
-    return __iommu_flush_context(iommu, did, source_id, function_mask,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->context(iommu, did, source_id, function_mask,
                                  DMA_CCMD_DEVICE_INVL,
                                  non_present_entry_flush);
 }
 
 /* return value determine if we need a write buffer flush */
-static int __iommu_flush_iotlb(struct iommu *iommu, u16 did,
+static int flush_iotlb_reg(void *_iommu, u16 did,
                                u64 addr, unsigned int size_order, u64 type,
                                int non_present_entry_flush)
 {
+    struct iommu *iommu = (struct iommu *) _iommu;
     int tlb_offset = ecap_iotlb_offset(iommu->ecap);
     u64 val = 0, val_iva = 0;
     unsigned long flag;
@@ -467,14 +469,16 @@ static int inline iommu_flush_iotlb_glob
 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
                                            int non_present_entry_flush)
 {
-    return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
                                non_present_entry_flush);
 }
 
 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
                                         int non_present_entry_flush)
 {
-    return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
+    struct iommu_flush *flush = iommu_get_flush(iommu);
+    return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
                                non_present_entry_flush);
 }
 
@@ -498,6 +502,7 @@ static int inline iommu_flush_iotlb_psi(
     u64 addr, unsigned int pages, int non_present_entry_flush)
 {
     unsigned int align;
+    struct iommu_flush *flush = iommu_get_flush(iommu);
 
     BUG_ON(addr & (~PAGE_MASK_4K));
     BUG_ON(pages == 0);
@@ -520,7 +525,7 @@ static int inline iommu_flush_iotlb_psi(
     addr >>= PAGE_SHIFT_4K + align;
     addr <<= PAGE_SHIFT_4K + align;
 
-    return __iommu_flush_iotlb(iommu, did, addr, align,
+    return flush->iotlb(iommu, did, addr, align,
                                DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
@@ -701,7 +706,7 @@ static int iommu_enable_translation(stru
     unsigned long flags;
 
     dprintk(XENLOG_INFO VTDPREFIX,
-            "iommu_enable_translation: enabling vt-d translation\n");
+            "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
     spin_lock_irqsave(&iommu->register_lock, flags);
     iommu->gcmd |= DMA_GCMD_TE;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
@@ -746,14 +751,47 @@ static int iommu_page_fault_do_one(struc
                                    u8 fault_reason, u16 source_id, u32 addr)
 {
     dprintk(XENLOG_WARNING VTDPREFIX,
-            "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n",
-            (type ? "DMA Read" : "DMA Write"),
-            (source_id >> 8), PCI_SLOT(source_id & 0xFF),
-            PCI_FUNC(source_id & 0xFF), addr, fault_reason);
-
-    print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff),
-                      (addr >> PAGE_SHIFT)); 
+            "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
+            (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
+            PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
+            fault_reason, iommu->reg);
+
+    if (fault_reason < 0x20) 
+        print_vtd_entries(current->domain, iommu, (source_id >> 8),
+                          (source_id & 0xff), (addr >> PAGE_SHIFT)); 
+
     return 0;
+}
+
+static void iommu_fault_status(u32 fault_status)
+{
+    if (fault_status & DMA_FSTS_PFO)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Fault Overflow\n");
+    else
+    if (fault_status & DMA_FSTS_PPF)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Primary Pending Fault\n");
+    else
+    if (fault_status & DMA_FSTS_AFO)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Advanced Fault Overflow\n");
+    else
+    if (fault_status & DMA_FSTS_APF)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Advanced Pending Fault\n");
+    else
+    if (fault_status & DMA_FSTS_IQE)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Invalidation Queue Error\n");
+    else
+    if (fault_status & DMA_FSTS_ICE)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Invalidation Completion Error\n");
+    else
+    if (fault_status & DMA_FSTS_ITE)
+        dprintk(XENLOG_ERR VTDPREFIX,
+            "iommu_fault_status: Invalidation Time-out Error\n");
 }
 
 #define PRIMARY_FAULT_REG_LEN (16)
@@ -771,6 +809,8 @@ static void iommu_page_fault(int vector,
     spin_lock_irqsave(&iommu->register_lock, flags);
     fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+    iommu_fault_status(fault_status);
 
     /* FIXME: ignore advanced fault log */
     if ( !(fault_status & DMA_FSTS_PPF) )
@@ -936,6 +976,8 @@ struct iommu *iommu_alloc(void *hw_data)
 {
     struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
     struct iommu *iommu;
+    struct qi_ctrl *qi_ctrl;
+    struct ir_ctrl *ir_ctrl;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
@@ -951,9 +993,10 @@ struct iommu *iommu_alloc(void *hw_data)
 
     set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
     iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
-    dprintk(XENLOG_INFO VTDPREFIX,
-            "iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
-            iommu->reg, drhd->address);
+
+    printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
+           iommu->reg, drhd->address);
+
     nr_iommus++;
 
     if ( !iommu->reg )
@@ -965,8 +1008,18 @@ struct iommu *iommu_alloc(void *hw_data)
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
 
+    printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
+    printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
+
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
+
+    qi_ctrl = iommu_qi_ctrl(iommu);
+    spin_lock_init(&qi_ctrl->qinval_lock);
+    spin_lock_init(&qi_ctrl->qinval_poll_lock);
+
+    ir_ctrl = iommu_ir_ctrl(iommu);
+    spin_lock_init(&ir_ctrl->iremap_lock);
 
     drhd->iommu = iommu;
     return iommu;
@@ -1071,8 +1124,10 @@ static int domain_context_mapping_one(
 
     if ( ecap_pass_thru(iommu->ecap) )
         context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+#ifdef CONTEXT_PASSTHRU
     else
     {
+#endif
         if ( !hd->pgd )
         {
             struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
@@ -1087,7 +1142,9 @@ static int domain_context_mapping_one(
  
         context_set_address_root(*context, virt_to_maddr(hd->pgd));
         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
-    }
+#ifdef CONTEXT_PASSTHRU
+    }
+#endif
 
     context_set_fault_enable(*context);
     context_set_present(*context);
@@ -1462,7 +1519,6 @@ void iommu_domain_teardown(struct domain
                 if ( pgd[0].val != 0 )
                     free_xenheap_page((void*)maddr_to_virt(
                         dma_pte_addr(pgd[0])));
-
                 free_xenheap_page((void *)hd->pgd);
             }
             break;
@@ -1503,9 +1559,11 @@ int iommu_map_page(struct domain *d, pad
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
 
+#ifdef CONTEXT_PASSTHRU
     /* do nothing if dom0 and iommu supports pass thru */
     if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
+#endif
 
     pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
     if ( !pg )
@@ -1538,9 +1596,11 @@ int iommu_unmap_page(struct domain *d, d
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
 
+#ifdef CONTEXT_PASSTHRU
     /* do nothing if dom0 and iommu supports pass thru */
     if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
+#endif
 
     dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
 
@@ -1711,7 +1771,7 @@ void __init setup_dom0_devices(void)
                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 }
 
-void clear_fault_bit(struct iommu *iommu)
+void clear_fault_bits(struct iommu *iommu)
 {
     u64 val;
 
@@ -1722,13 +1782,15 @@ void clear_fault_bit(struct iommu *iommu
         iommu->reg,
         cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
         val);
-    dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
+    dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
 }
 
 static int init_vtd_hw(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
+    struct iommu_flush *flush = NULL;
+    int vector;
     int ret;
 
     for_each_drhd_unit ( drhd )
@@ -1740,29 +1802,37 @@ static int init_vtd_hw(void)
             gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
             return -EIO;
         }
-    }
-
-    return 0;
-}
-
-static int enable_vtd_translation(void)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-    int vector = 0;
-
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
+
         vector = iommu_set_interrupt(iommu);
         dma_msi_data_init(iommu, vector);
         dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
         iommu->vector = vector;
-        clear_fault_bit(iommu);
+        clear_fault_bits(iommu);
+        dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
+
+        /* initialize flush functions */
+        flush = iommu_get_flush(iommu);
+        flush->context = flush_context_reg;
+        flush->iotlb = flush_iotlb_reg;
+
+        if ( qinval_setup(iommu) != 0);
+            dprintk(XENLOG_ERR VTDPREFIX,
+                    "Queued Invalidation hardware not found\n");
+    }
+    return 0;
+}
+
+static int enable_vtd_translation(void)
+{
+    struct acpi_drhd_unit *drhd;
+    struct iommu *iommu;
+
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
         if ( iommu_enable_translation(iommu) )
             return -EIO;
     }
-
     return 0;
 }
 
@@ -1792,9 +1862,6 @@ int iommu_setup(void)
 
     spin_lock_init(&domid_bitmap_lock);
     INIT_LIST_HEAD(&hd->pdev_list);
-
-    /* start from scratch */
-    iommu_flush_all();
 
     /* setup clflush size */
     x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
@@ -1815,12 +1882,12 @@ int iommu_setup(void)
     for ( i = 0; i < max_page; i++ )
         iommu_map_page(dom0, i, i);
 
+    enable_vtd_translation();
     if ( init_vtd_hw() )
         goto error;
     setup_dom0_devices();
     setup_dom0_rmrr();
-    if ( enable_vtd_translation() )
-        goto error;
+    iommu_flush_all();
 
     return 0;
 
diff -r 80e177b12fd2 -r cc5bb500df5f xen/arch/x86/hvm/vmx/vtd/qinval.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/qinval.c Tue Jan 22 09:48:51 2008 +0000
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx>
+ */
+
+
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/spinlock.h>
+#include <xen/sched.h>
+#include <xen/xmalloc.h>
+#include <xen/domain_page.h>
+#include <asm/delay.h>
+#include <asm/string.h>
+#include <asm/iommu.h>
+#include <asm/hvm/vmx/intel-iommu.h>
+#include "dmar.h"
+#include "vtd.h"
+#include "pci-direct.h"
+#include "pci_regs.h"
+#include "msi.h"
+#include "extern.h"
+
+static void print_qi_regs(struct iommu *iommu)
+{
+    u64 val;
+
+    val = dmar_readq(iommu->reg, DMAR_IQA_REG);
+    printk("DMAR_IAQ_REG = %"PRIx64"\n", val);
+
+    val = dmar_readq(iommu->reg, DMAR_IQH_REG);
+    printk("DMAR_IAH_REG = %"PRIx64"\n", val);
+
+    val = dmar_readq(iommu->reg, DMAR_IQT_REG);
+    printk("DMAR_IAT_REG = %"PRIx64"\n", val);
+}
+
+static int qinval_next_index(struct iommu *iommu)
+{
+    u64 val;
+    val = dmar_readq(iommu->reg, DMAR_IQT_REG);
+    return (val >> 4);
+}
+
+static int qinval_update_qtail(struct iommu *iommu, int index)
+{
+    u64 val;
+
+    /* Need an ASSERT to insure that we have got register lock */
+    val = (index < (QINVAL_ENTRY_NR-1)) ? (index + 1) : 0;
+    dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << 4));
+    return 0;
+}
+
+static int gen_cc_inv_dsc(struct iommu *iommu, int index,
+    u16 did, u16 source_id, u8 function_mask, u8 granu)
+{
+    u64 *ptr64;
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
+    qinval_entry->q.cc_inv_dsc.lo.granu = granu;
+    qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.cc_inv_dsc.lo.did = did;
+    qinval_entry->q.cc_inv_dsc.lo.sid = source_id;
+    qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
+    qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.cc_inv_dsc.hi.res = 0;
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+
+    ptr64 = (u64 *)qinval_entry;
+    return 0;
+}
+
+int queue_invalidate_context(struct iommu *iommu,
+    u16 did, u16 source_id, u8 function_mask, u8 granu)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    if (index == -1)
+        return -EBUSY;
+    ret = gen_cc_inv_dsc(iommu, index, did, source_id,
+                         function_mask, granu);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_iotlb_inv_dsc(struct iommu *iommu, int index,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
+{
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
+    qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
+    qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.dw = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.did = did;
+    qinval_entry->q.iotlb_inv_dsc.lo.res_2 = 0;
+
+    qinval_entry->q.iotlb_inv_dsc.hi.am = am;
+    qinval_entry->q.iotlb_inv_dsc.hi.ih = ih;
+    qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
+    qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
+
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_iotlb(struct iommu *iommu,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+
+    index = qinval_next_index(iommu);
+    ret = gen_iotlb_inv_dsc(iommu, index, granu, dr, dw, did,
+                            am, ih, addr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_wait_dsc(struct iommu *iommu, int index,
+    u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
+{
+    u64 *ptr64;
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
+    qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
+    qinval_entry->q.inv_wait_dsc.lo.sw = sw;
+    qinval_entry->q.inv_wait_dsc.lo.fn = fn;
+    qinval_entry->q.inv_wait_dsc.lo.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
+    qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    ptr64 = (u64 *)qinval_entry;
+    return 0;
+}
+
+static int queue_invalidate_wait(struct iommu *iommu,
+    u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
+{
+    unsigned long flags;
+    unsigned long start_time;
+    int index = -1;
+    int ret = -1;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    if (*saddr == 1)
+        *saddr = 0;
+    ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+    /* Now we don't support interrupt method */
+    if ( sw )
+    {
+        /* In case all wait descriptor writes to same addr with same data */
+        start_time = jiffies;
+        while ( *saddr != 1 ) {
+            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) {
+                print_qi_regs(iommu);
+                panic("queue invalidate wait descriptor was not executed\n");
+            }
+            cpu_relax();
+        }
+    }
+    spin_unlock_irqrestore(&qi_ctrl->qinval_poll_lock, flags);
+    return ret;
+}
+
+int invalidate_sync(struct iommu *iommu)
+{
+    int ret = -1;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if (qi_ctrl->qinval)
+    {
+        ret = queue_invalidate_wait(iommu,
+            0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
+        return ret;
+    }
+    return 0;
+}
+
+static int gen_dev_iotlb_inv_dsc(struct iommu *iommu, int index,
+    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+{
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
+
+    qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
+    qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
+
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_device_iotlb(struct iommu *iommu,
+    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    ret = gen_dev_iotlb_inv_dsc(iommu, index, max_invs_pend,
+                                sid, size, addr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_iec_inv_dsc(struct iommu *iommu, int index,
+    u8 granu, u8 im, u16 iidx)
+{
+    unsigned long flags;
+    struct qinval_entry * qinval_entry = NULL;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
+    qinval_entry->q.iec_inv_dsc.lo.granu = granu;
+    qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.iec_inv_dsc.lo.im = im;
+    qinval_entry->q.iec_inv_dsc.lo.iidx = iidx;
+    qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.iec_inv_dsc.hi.res = 0;
+
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
+{
+    int ret;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    ret = gen_iec_inv_dsc(iommu, index, granu, im, iidx);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+u64 iec_cap;
+int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
+{
+    int ret;
+    ret = queue_invalidate_iec(iommu, granu, im, iidx);
+    ret |= invalidate_sync(iommu);
+
+    /*
+     * reading vt-d architecture register will ensure
+     * draining happens in implementation independent way.
+     */
+    iec_cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
+    return ret;
+}
+
+int iommu_flush_iec_global(struct iommu *iommu)
+{
+    return __iommu_flush_iec(iommu, IEC_GLOBAL_INVL, 0, 0);
+}
+
+int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx)
+{
+   return __iommu_flush_iec(iommu, IEC_INDEX_INVL, im, iidx);
+}
+
+static int flush_context_qi(
+    void *_iommu, u16 did, u16 sid, u8 fm, u64 type,
+    int non_present_entry_flush)
+{
+    int ret = 0;
+    struct iommu *iommu = (struct iommu *)_iommu;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    /*
+     * In the non-present entry flush case, if hardware doesn't cache
+     * non-present entry we do nothing and if hardware cache non-present
+     * entry, we flush entries of domain 0 (the domain id is used to cache
+     * any non-present entries)
+     */
+    if ( non_present_entry_flush )
+    {
+        if ( !cap_caching_mode(iommu->cap) )
+            return 1;
+        else
+            did = 0;
+    }
+
+    if (qi_ctrl->qinval)
+    {
+        ret = queue_invalidate_context(iommu, did, sid, fm,
+                                       type >> DMA_CCMD_INVL_GRANU_OFFSET);
+        ret |= invalidate_sync(iommu);
+    }
+    return ret;
+}
+
+static int flush_iotlb_qi(
+    void *_iommu, u16 did,
+    u64 addr, unsigned int size_order, u64 type,
+    int non_present_entry_flush)
+{
+    u8 dr = 0, dw = 0;
+    int ret = 0;
+    struct iommu *iommu = (struct iommu *)_iommu;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    /*
+     * In the non-present entry flush case, if hardware doesn't cache
+     * non-present entry we do nothing and if hardware cache non-present
+     * entry, we flush entries of domain 0 (the domain id is used to cache
+     * any non-present entries)
+     */
+    if ( non_present_entry_flush )
+    {
+        if ( !cap_caching_mode(iommu->cap) )
+            return 1;
+        else
+            did = 0;
+    }
+
+    if (qi_ctrl->qinval) {
+        /* use queued invalidation */
+        if (cap_write_drain(iommu->cap))
+            dw = 1;
+        if (cap_read_drain(iommu->cap))
+            dr = 1;
+        /* Need to conside the ih bit later */
+        ret = queue_invalidate_iotlb(iommu,
+                  (type >> DMA_TLB_FLUSH_GRANU_OFFSET), dr,
+                  dw, did, (u8)size_order, 0, addr);
+        ret |= invalidate_sync(iommu);
+    }
+    return ret;
+}
+
+int qinval_setup(struct iommu *iommu)
+{
+    unsigned long start_time;
+    u64 paddr;
+    u32 status = 0;
+    struct qi_ctrl *qi_ctrl;
+    struct iommu_flush *flush;
+
+    qi_ctrl = iommu_qi_ctrl(iommu);
+    flush = iommu_get_flush(iommu);
+
+    if ( !ecap_queued_inval(iommu->ecap) )
+        return -ENODEV;
+
+    if (qi_ctrl->qinval == NULL) {
+        qi_ctrl->qinval = alloc_xenheap_page();
+        if (qi_ctrl->qinval == NULL)
+            panic("Cannot allocate memory for qi_ctrl->qinval\n");
+        memset((u8*)qi_ctrl->qinval, 0, PAGE_SIZE_4K);
+        flush->context = flush_context_qi;
+        flush->iotlb = flush_iotlb_qi;
+    }
+    paddr = virt_to_maddr(qi_ctrl->qinval);
+
+    /* Setup Invalidation Queue Address(IQA) register with the
+     * address of the page we just allocated.  QS field at
+     * bits[2:0] to indicate size of queue is one 4KB page.
+     * That's 256 entries.  Queued Head (IQH) and Queue Tail (IQT)
+     * registers are automatically reset to 0 with write
+     * to IQA register.
+     */
+    dmar_writeq(iommu->reg, DMAR_IQA_REG, paddr);
+
+    /* enable queued invalidation hardware */
+    iommu->gcmd |= DMA_GCMD_QIE;
+    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+    /* Make sure hardware complete it */
+    start_time = jiffies;
+    while (1) {
+        status = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+        if (status & DMA_GSTS_QIES)
+            break;
+        if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+            panic("Cannot set QIE field for queue invalidation\n");
+        cpu_relax();
+    }
+    status = 0;
+    return status;
+}
diff -r 80e177b12fd2 -r cc5bb500df5f xen/arch/x86/hvm/vmx/vtd/vtd.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/vtd.h    Tue Jan 22 09:48:51 2008 +0000
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Weidong Han <weidong.han@xxxxxxxxx>
+ */
+
+#ifndef _VTD_H_
+#define _VTD_H_
+
+#include <xen/list.h>
+#include <asm/iommu.h>
+
+#define VTDPREFIX "[VT-D]" 
+
+#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
+#define time_after(a,b)         \
+        (typecheck(unsigned long, a) && \
+         typecheck(unsigned long, b) && \
+         ((long)(b) - (long)(a) < 0))
+
+struct IO_APIC_route_remap_entry {
+    union {
+        u64 val;
+        struct {
+            u64 vector:8,
+            delivery_mode:3,
+            index_15:1,
+            delivery_status:1,
+            polarity:1,
+            irr:1,
+            trigger:1,
+            mask:1,
+            reserved:31,
+            format:1,
+            index_0_14:15;
+        };
+    };
+};
+
+#endif // _VTD_H_
diff -r 80e177b12fd2 -r cc5bb500df5f xen/include/asm-x86/iommu.h
--- a/xen/include/asm-x86/iommu.h       Tue Jan 22 09:46:33 2008 +0000
+++ b/xen/include/asm-x86/iommu.h       Tue Jan 22 09:48:51 2008 +0000
@@ -31,6 +31,9 @@ extern int vtd_enabled;
 
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
+#define iommu_qi_ctrl(iommu)    (&(iommu->intel.qi_ctrl));
+#define iommu_ir_ctrl(iommu)    (&(iommu->intel.ir_ctrl));
+#define iommu_get_flush(iommu)  (&(iommu->intel.flush));
 
 /*
  * The PCI interface treats multi-function devices as independent
@@ -61,6 +64,7 @@ struct iommu {
     spinlock_t register_lock; /* protect iommu register handling */
     struct root_entry *root_entry; /* virtual address */
     unsigned int vector;
+    struct intel_iommu intel;
 };
 
 int iommu_setup(void);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.