[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Intel vt-d specific changes in arch/x86/hvm/vmx/vtd.
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Date 1189784449 -3600 # Node ID f4bbd3f327e4308aa2aebf5484fc32d1d1ff4b41 # Parent acfa9290746f9c00e30dca7a62e9f7a96702b3b5 Intel vt-d specific changes in arch/x86/hvm/vmx/vtd. Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx> Signed-off-by: Guy Zana <guy@xxxxxxxxxxxx> --- xen/arch/x86/hvm/vmx/vtd/Makefile | 4 xen/arch/x86/hvm/vmx/vtd/dmar.c | 494 ++++++++ xen/arch/x86/hvm/vmx/vtd/dmar.h | 90 + xen/arch/x86/hvm/vmx/vtd/intel-iommu.c | 1927 +++++++++++++++++++++++++++++++++ xen/arch/x86/hvm/vmx/vtd/io.c | 120 ++ xen/arch/x86/hvm/vmx/vtd/msi.h | 128 ++ xen/arch/x86/hvm/vmx/vtd/pci-direct.h | 48 xen/arch/x86/hvm/vmx/vtd/pci_regs.h | 449 +++++++ xen/arch/x86/hvm/vmx/vtd/utils.c | 302 +++++ 9 files changed, 3562 insertions(+) diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/Makefile Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,4 @@ +obj-y += intel-iommu.o +obj-y += dmar.o +obj-y += utils.o +obj-y += io.o diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/dmar.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.c Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,494 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@xxxxxxxxx> + * Copyright (C) Shaohua Li <shaohua.li@xxxxxxxxx> + * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> - adapted to xen + */ + +#include <xen/init.h> +#include <xen/bitmap.h> +#include <xen/kernel.h> +#include <xen/acpi.h> +#include <xen/mm.h> +#include <xen/xmalloc.h> +#include <asm/string.h> +#include "dmar.h" +#include "pci-direct.h" +#include "pci_regs.h" + +#undef PREFIX +#define PREFIX VTDPREFIX "ACPI DMAR:" +#define DEBUG + +#define MIN_SCOPE_LEN (sizeof(struct acpi_pci_path) + sizeof(struct acpi_dev_scope)) + +LIST_HEAD(acpi_drhd_units); +LIST_HEAD(acpi_rmrr_units); +LIST_HEAD(acpi_atsr_units); +LIST_HEAD(acpi_ioapic_units); + +u8 dmar_host_address_width; + +static int __init acpi_register_drhd_unit(struct acpi_drhd_unit *drhd) +{ + /* + * add INCLUDE_ALL at the tail, so scan the list will find it at + * the very end. + */ + if (drhd->include_all) + list_add_tail(&drhd->list, &acpi_drhd_units); + else + list_add(&drhd->list, &acpi_drhd_units); + return 0; +} + +static int __init acpi_register_rmrr_unit(struct acpi_rmrr_unit *rmrr) +{ + list_add(&rmrr->list, &acpi_rmrr_units); + return 0; +} + +static int acpi_pci_device_match(struct pci_dev *devices, int cnt, + struct pci_dev *dev) +{ + int i; + + for (i = 0; i < cnt; i++) { + if ((dev->bus == devices->bus) && + (dev->devfn == devices->devfn)) + return 1; + devices++; + } + return 0; +} + +static int __init acpi_register_atsr_unit(struct acpi_atsr_unit *atsr) +{ + /* + * add ALL_PORTS at the tail, so scan the list will find it at + * the very end. + */ + if (atsr->all_ports) + list_add_tail(&atsr->list, &acpi_atsr_units); + else + list_add(&atsr->list, &acpi_atsr_units); + return 0; +} + +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev) +{ + struct acpi_drhd_unit *drhd; + struct acpi_drhd_unit *include_all_drhd; + + include_all_drhd = NULL; + list_for_each_entry(drhd, &acpi_drhd_units, list) { + if (drhd->include_all) + include_all_drhd = drhd; + if (acpi_pci_device_match(drhd->devices, + drhd->devices_cnt, dev)) + { + gdprintk(XENLOG_INFO VTDPREFIX, + "acpi_find_matched_drhd_unit: drhd->address = %lx\n", + drhd->address); + return drhd; + } + } + + if (include_all_drhd) { + gdprintk(XENLOG_INFO VTDPREFIX, + "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n", + include_all_drhd->address); + return include_all_drhd;; + } + + return(NULL); +} + +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev) +{ + struct acpi_rmrr_unit *rmrr; + + list_for_each_entry(rmrr, &acpi_rmrr_units, list) { + if (acpi_pci_device_match(rmrr->devices, + rmrr->devices_cnt, dev)) + goto out; + } + rmrr = NULL; +out: + return rmrr; +} + +struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev) +{ + struct acpi_atsr_unit *atsru; + struct acpi_atsr_unit *all_ports_atsru; + + all_ports_atsru = NULL; + list_for_each_entry(atsru, &acpi_atsr_units, list) { + if (atsru->all_ports) + all_ports_atsru = atsru; + if (acpi_pci_device_match(atsru->devices, atsru->devices_cnt, dev)) + return atsru; + } + if (all_ports_atsru) { + gdprintk(XENLOG_INFO VTDPREFIX, + "acpi_find_matched_atsr_unit: all_ports_atsru\n"); + return all_ports_atsru;; + } + return(NULL); +} + +static int __init acpi_parse_dev_scope(void *start, void *end, int *cnt, + struct pci_dev **devices) +{ + struct acpi_dev_scope *scope; + u8 bus, sub_bus, sec_bus; + struct acpi_pci_path *path; + struct acpi_ioapic_unit *acpi_ioapic_unit = NULL; + int count, dev_count=0; + struct pci_dev *pdev; + u8 dev, func; + u32 l; + void *tmp; + + *cnt = 0; + tmp = start; + while (start < end) { + scope = start; + if (scope->length < MIN_SCOPE_LEN || + (scope->dev_type != ACPI_DEV_ENDPOINT && + scope->dev_type != ACPI_DEV_P2PBRIDGE)) { + printk(KERN_WARNING PREFIX "Invalid device scope\n"); + return -EINVAL; + } + (*cnt)++; + start += scope->length; + } + + start = tmp; + while (start < end) { + scope = start; + path = (struct acpi_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dev_scope)) + /sizeof(struct acpi_pci_path); + bus = scope->start_bus; + + while (--count) { + bus = read_pci_config_byte(bus, path->dev, + path->fn, PCI_SECONDARY_BUS); + path++; + } + + if (scope->dev_type == ACPI_DEV_ENDPOINT) { + printk(KERN_WARNING PREFIX + "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn); + dev_count++; + } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) { + printk(KERN_WARNING PREFIX + "found bridge: bdf = %x:%x:%x\n", bus, path->dev, path->fn); + + sec_bus = read_pci_config_byte(bus, path->dev, + path->fn, PCI_SECONDARY_BUS); + sub_bus = read_pci_config_byte(bus, path->dev, + path->fn, PCI_SUBORDINATE_BUS); + while (sec_bus <= sub_bus) { + for (dev = 0; dev < 32; dev++) { + for (func = 0; func < 8; func++) { + l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID); + + /* some broken boards return 0 or ~0 if a slot is empty: */ + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) + break; + dev_count++; + } + } + sec_bus++; + } + } else if (scope->dev_type == ACPI_DEV_IOAPIC) { + printk(KERN_WARNING PREFIX + "found IOAPIC: bdf = %x:%x:%x\n", bus, path->dev, path->fn); + dev_count++; + } else { + printk(KERN_WARNING PREFIX + "found MSI HPET: bdf = %x:%x:%x\n", bus, path->dev, path->fn); + dev_count++; + } + + start += scope->length; + } + + *cnt = dev_count; + *devices = xmalloc_array(struct pci_dev, *cnt); + if (!*devices) + return -ENOMEM; + memset(*devices, 0, sizeof(struct pci_dev) * (*cnt)); + + pdev = *devices; + start = tmp; + while (start < end) { + scope = start; + path = (struct acpi_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dev_scope)) + /sizeof(struct acpi_pci_path); + bus = scope->start_bus; + + while (--count) { + bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS); + path++; + } + + if (scope->dev_type == ACPI_DEV_ENDPOINT) { + printk(KERN_WARNING PREFIX + "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn); + + pdev->bus = bus; + pdev->devfn = PCI_DEVFN(path->dev, path->fn); + pdev++; + } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) { + printk(KERN_WARNING PREFIX + "found bridge: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn); + + sec_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SECONDARY_BUS); + sub_bus = read_pci_config_byte(bus, path->dev, path->fn, PCI_SUBORDINATE_BUS); + + while (sec_bus <= sub_bus) { + for (dev = 0; dev < 32; dev++) { + for (func = 0; func < 8; func++) { + l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID); + + /* some broken boards return 0 or ~0 if a slot is empty: */ + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) + break; + + pdev->bus = sec_bus; + pdev->devfn = PCI_DEVFN(dev, func); + pdev++; + } + } + sec_bus++; + } + } else if (scope->dev_type == ACPI_DEV_IOAPIC) { + acpi_ioapic_unit = xmalloc(struct acpi_ioapic_unit); + acpi_ioapic_unit->apic_id = scope->enum_id; + acpi_ioapic_unit->ioapic.bdf.bus = bus; + acpi_ioapic_unit->ioapic.bdf.dev = path->dev; + acpi_ioapic_unit->ioapic.bdf.func = path->fn; + list_add(&acpi_ioapic_unit->list, &acpi_ioapic_units); + printk(KERN_WARNING PREFIX + "found IOAPIC: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn); + } else { + printk(KERN_WARNING PREFIX + "found MSI HPET: bus = %x dev = %x func = %x\n", bus, path->dev, path->fn); + } + + start += scope->length; + } + + return 0; +} + +static int __init +acpi_parse_one_drhd(struct acpi_dmar_entry_header *header) +{ + struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header; + struct acpi_drhd_unit *dmaru; + int ret = 0; + static int include_all; + + dmaru = xmalloc(struct acpi_drhd_unit); + if (!dmaru) + return -ENOMEM; + memset(dmaru, 0, sizeof(struct acpi_drhd_unit)); + + dmaru->address = drhd->address; + dmaru->include_all = drhd->flags & 1; /* BIT0: INCLUDE_ALL */ + printk(KERN_WARNING PREFIX "dmaru->address = %lx\n", dmaru->address); + + if (!dmaru->include_all) { + ret = acpi_parse_dev_scope((void *)(drhd + 1), + ((void *)drhd) + header->length, + &dmaru->devices_cnt, &dmaru->devices); + } + else { + printk(KERN_WARNING PREFIX "found INCLUDE_ALL\n"); + /* Only allow one INCLUDE_ALL */ + if (include_all) { + printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " + "device scope is allowed\n"); + ret = -EINVAL; + } + include_all = 1; + } + + if (ret) + xfree(dmaru); + else + acpi_register_drhd_unit(dmaru); + return ret; +} + +static int __init +acpi_parse_one_rmrr(struct acpi_dmar_entry_header *header) +{ + struct acpi_table_rmrr *rmrr = (struct acpi_table_rmrr *)header; + struct acpi_rmrr_unit *rmrru; + int ret = 0; + + rmrru = xmalloc(struct acpi_rmrr_unit); + if (!rmrru) + return -ENOMEM; + memset(rmrru, 0, sizeof(struct acpi_rmrr_unit)); + +#ifdef VTD_DEBUG + gdprintk(XENLOG_INFO VTDPREFIX, + "acpi_parse_one_rmrr: base = %lx end = %lx\n", + rmrr->base_address, rmrr->end_address); +#endif + + rmrru->base_address = rmrr->base_address; + rmrru->end_address = rmrr->end_address; + ret = acpi_parse_dev_scope((void *)(rmrr + 1), + ((void*)rmrr) + header->length, + &rmrru->devices_cnt, &rmrru->devices); + + if (ret || (rmrru->devices_cnt == 0)) + xfree(rmrru); + else + acpi_register_rmrr_unit(rmrru); + return ret; +} + +static int __init +acpi_parse_one_atsr(struct acpi_dmar_entry_header *header) +{ + struct acpi_table_atsr *atsr = (struct acpi_table_atsr *)header; + struct acpi_atsr_unit *atsru; + int ret = 0; + static int all_ports; + + atsru = xmalloc(struct acpi_atsr_unit); + if (!atsru) + return -ENOMEM; + memset(atsru, 0, sizeof(struct acpi_atsr_unit)); + + atsru->all_ports = atsr->flags & 1; /* BIT0: ALL_PORTS */ + if (!atsru->all_ports) { + ret = acpi_parse_dev_scope((void *)(atsr + 1), + ((void *)atsr) + header->length, + &atsru->devices_cnt, &atsru->devices); + } + else { + printk(KERN_WARNING PREFIX "found ALL_PORTS\n"); + /* Only allow one ALL_PORTS */ + if (all_ports) { + printk(KERN_WARNING PREFIX "Only one ALL_PORTS " + "device scope is allowed\n"); + ret = -EINVAL; + } + all_ports = 1; + } + + if (ret) + xfree(atsr); + else + acpi_register_atsr_unit(atsru); + return ret; +} + +static void __init +acpi_table_print_dmar_entry(struct acpi_dmar_entry_header *header) +{ + struct acpi_table_drhd *drhd; + struct acpi_table_rmrr *rmrr; + + switch (header->type) { + case ACPI_DMAR_DRHD: + drhd = (struct acpi_table_drhd *)header; + break; + case ACPI_DMAR_RMRR: + rmrr = (struct acpi_table_rmrr *)header; + break; + } +} + +static int __init +acpi_parse_dmar(unsigned long phys_addr, unsigned long size) +{ + struct acpi_table_dmar *dmar = NULL; + struct acpi_dmar_entry_header *entry_header; + int ret = 0; + + if (!phys_addr || !size) + return -EINVAL; + + dmar = (struct acpi_table_dmar *)__acpi_map_table(phys_addr, size); + if (!dmar) { + printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); + return -ENODEV; + } + + if (!dmar->haw) { + printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n"); + return -EINVAL; + } + + dmar_host_address_width = dmar->haw; + printk (KERN_INFO PREFIX "Host address width %d\n", + dmar_host_address_width); + + entry_header = (struct acpi_dmar_entry_header *)(dmar + 1); + while (((unsigned long)entry_header) < (((unsigned long)dmar) + size)) { + acpi_table_print_dmar_entry(entry_header); + + switch (entry_header->type) { + case ACPI_DMAR_DRHD: + printk (KERN_INFO PREFIX "found ACPI_DMAR_DRHD\n"); + ret = acpi_parse_one_drhd(entry_header); + break; + case ACPI_DMAR_RMRR: + printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n"); + ret = acpi_parse_one_rmrr(entry_header); + break; + case ACPI_DMAR_ATSR: + printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n"); + ret = acpi_parse_one_atsr(entry_header); + break; + default: + printk(KERN_WARNING PREFIX "Unknown DMAR structure type\n"); + ret = -EINVAL; + break; + } + if (ret) + break; + + entry_header = ((void *)entry_header + entry_header->length); + } + return ret; +} + +int acpi_dmar_init(void) +{ + acpi_table_parse(ACPI_DMAR, acpi_parse_dmar); + if (list_empty(&acpi_drhd_units)) { + printk(KERN_ERR PREFIX "No DMAR devices found\n"); + return -ENODEV; + } else + vtd_enabled = 1; + return 0; +} diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/dmar.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.h Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@xxxxxxxxx> + * Copyright (C) Shaohua Li <shaohua.li@xxxxxxxxx> + */ + +#ifndef _DMAR_H_ +#define _DMAR_H_ + +#include <xen/list.h> +#include <asm/iommu.h> + +extern u8 dmar_host_address_width; + +struct acpi_drhd_unit { + struct list_head list; + unsigned long address; /* register base address of the unit */ + struct pci_dev *devices; /* target devices */ + int devices_cnt; + u8 include_all:1; + struct iommu *iommu; +}; + +struct acpi_rmrr_unit { + struct list_head list; + unsigned long base_address; + unsigned long end_address; + struct pci_dev *devices; /* target devices */ + int devices_cnt; + u8 allow_all:1; +}; + +struct acpi_atsr_unit { + struct list_head list; + struct pci_dev *devices; /* target devices */ + int devices_cnt; + u8 all_ports:1; +}; + +#define for_each_iommu(domain, iommu) \ + list_for_each_entry(iommu, \ + &(domain->arch.hvm_domain.hvm_iommu.iommu_list), list) + +#define for_each_pdev(domain, pdev) \ + list_for_each_entry(pdev, \ + &(domain->arch.hvm_domain.hvm_iommu.pdev_list), list) + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &acpi_drhd_units, list) +#define for_each_rmrr_device(rmrr, pdev) \ + list_for_each_entry(rmrr, &acpi_rmrr_units, list) { \ + int _i; \ + for (_i = 0; _i < rmrr->devices_cnt; _i++) { \ + pdev = &(rmrr->devices[_i]); +#define end_for_each_rmrr_device(rmrr, pdev) \ + } \ + } + +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev); +struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev); + +/* This one is for interrupt remapping */ +struct acpi_ioapic_unit { + struct list_head list; + int apic_id; + union { + u16 info; + struct { + u16 bus: 8, + dev: 5, + func: 3; + }bdf; + }ioapic; +}; + +#endif // _DMAR_H_ diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,1927 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@xxxxxxxxx> + * Copyright (C) Shaohua Li <shaohua.li@xxxxxxxxx> + * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> - adapted to xen + */ + +#include <xen/init.h> +#include <xen/irq.h> +#include <xen/spinlock.h> +#include <xen/sched.h> +#include <xen/xmalloc.h> +#include <xen/domain_page.h> +#include <asm/delay.h> +#include <asm/string.h> +#include <asm/iommu.h> +#include <asm/hvm/vmx/intel-iommu.h> +#include "dmar.h" +#include "pci-direct.h" +#include "pci_regs.h" +#include "msi.h" + +extern void print_iommu_regs(struct acpi_drhd_unit *drhd); +extern void print_vtd_entries(struct domain *d, int bus, int devfn, + unsigned long gmfn); +extern void (*interrupt[])(void); + +#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */ + +#define time_after(a,b) \ + (typecheck(unsigned long, a) && \ + typecheck(unsigned long, b) && \ + ((long)(b) - (long)(a) < 0)) + +unsigned int x86_clflush_size; +void clflush_cache_range(void *adr, int size) +{ + int i; + for (i = 0; i < size; i += x86_clflush_size) + clflush(adr + i); +} + +static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + +#define iommu_flush_cache_entry(iommu, addr) \ + __iommu_flush_cache(iommu, addr, 8) +#define iommu_flush_cache_page(iommu, addr) \ + __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K) + +int nr_iommus; +/* context entry handling */ +static struct context_entry * device_to_context_entry(struct iommu *iommu, + u8 bus, u8 devfn) +{ + struct root_entry *root; + struct context_entry *context; + unsigned long phy_addr; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + root = &iommu->root_entry[bus]; + if (!root_present(*root)) { + phy_addr = (unsigned long) alloc_xenheap_page(); + if (!phy_addr) { + spin_unlock_irqrestore(&iommu->lock, flags); + return NULL; + } + memset((void *) phy_addr, 0, PAGE_SIZE); + iommu_flush_cache_page(iommu, (void *)phy_addr); + phy_addr = virt_to_maddr((void *)phy_addr); + set_root_value(*root, phy_addr); + set_root_present(*root); + iommu_flush_cache_entry(iommu, root); + } + phy_addr = (unsigned long) get_context_addr(*root); + context = (struct context_entry *)maddr_to_virt(phy_addr); + spin_unlock_irqrestore(&iommu->lock, flags); + return &context[devfn]; +} + +static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn) +{ + struct root_entry *root; + struct context_entry *context; + unsigned long phy_addr; + int ret; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + root = &iommu->root_entry[bus]; + if (!root_present(*root)) { + ret = 0; + goto out; + } + phy_addr = get_context_addr(*root); + context = (struct context_entry *)maddr_to_virt(phy_addr); + ret = context_present(context[devfn]); +out: + spin_unlock_irqrestore(&iommu->lock, flags); + return ret; +} + +/* page table handling */ +#define LEVEL_STRIDE (9) +#define LEVEL_MASK ((1 << LEVEL_STRIDE) - 1) +#define agaw_to_level(val) ((val) + 2) +#define agaw_to_width(val) (30 + val * LEVEL_STRIDE) +#define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE) +#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) +#define address_level_offset(addr, level) \ + ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) +#define level_size(l) (1 << level_to_offset_bits(l)) +#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) +static struct dma_pte * addr_to_dma_pte(struct domain *domain, u64 addr) +{ + struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int addr_width = agaw_to_width(hd->agaw); + struct dma_pte *parent, *pte = NULL, *pgd; + int level = agaw_to_level(hd->agaw); + int offset; + unsigned long flags; + + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + iommu = drhd->iommu; + + addr &= (((u64)1) << addr_width) - 1; + spin_lock_irqsave(&hd->mapping_lock, flags); + if (!hd->pgd) { + pgd = (struct dma_pte *)alloc_xenheap_page(); + if (!pgd && !hd->pgd) { + spin_unlock_irqrestore(&hd->mapping_lock, flags); + return NULL; + } + memset((u8*)pgd, 0, PAGE_SIZE); + if (!hd->pgd) + hd->pgd = pgd; + else /* somebody is fast */ + free_xenheap_page((void *) pgd); + } + parent = hd->pgd; + while (level > 0) { + u8 *tmp; + offset = address_level_offset(addr, level); + pte = &parent[offset]; + if (level == 1) + break; + if (dma_pte_addr(*pte) == 0) { + tmp = alloc_xenheap_page(); + if (tmp == NULL) + gdprintk(XENLOG_ERR VTDPREFIX, + "addr_to_dma_pte: tmp == NULL\n"); + + memset(tmp, 0, PAGE_SIZE); + iommu_flush_cache_page(iommu, tmp); + + if (!tmp && dma_pte_addr(*pte) == 0) { + spin_unlock_irqrestore(&hd->mapping_lock, flags); + return NULL; + } + if (dma_pte_addr(*pte) == 0) { + dma_set_pte_addr(*pte, + virt_to_maddr(tmp)); + /* + * high level table always sets r/w, last level + * page table control read/write + */ + dma_set_pte_readable(*pte); + dma_set_pte_writable(*pte); + iommu_flush_cache_entry(iommu, pte); + } else /* somebody is fast */ + free_xenheap_page(tmp); + } + parent = maddr_to_virt(dma_pte_addr(*pte)); + level--; + } + spin_unlock_irqrestore(&hd->mapping_lock, flags); + return pte; +} + +/* return address's pte at specific level */ +static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr, + int level) +{ + struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct dma_pte *parent, *pte = NULL; + int total = agaw_to_level(hd->agaw); + int offset; + + parent = hd->pgd; + while (level <= total) { + offset = address_level_offset(addr, total); + pte = &parent[offset]; + if (level == total) + return pte; + + if (dma_pte_addr(*pte) == 0) + break; + parent = maddr_to_virt(dma_pte_addr(*pte)); + total--; + } + return NULL; +} + +static void iommu_flush_write_buffer(struct iommu *iommu) +{ + u32 val; + unsigned long flag; + unsigned long start_time; + + if (!cap_rwbf(iommu->cap)) + return; + val = iommu->gcmd | DMA_GCMD_WBF; + + spin_lock_irqsave(&iommu->register_lock, flag); + dmar_writel(iommu->reg, DMAR_GCMD_REG, val); + + /* Make sure hardware complete it */ + start_time = jiffies; + while (1) { + val = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if (!(val & DMA_GSTS_WBFS)) + break; + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); + cpu_relax(); + } + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +/* return value determine if we need a write buffer flush */ +static int __iommu_flush_context(struct iommu *iommu, + u16 did, u16 source_id, u8 function_mask, u64 type, + int non_present_entry_flush) +{ + u64 val = 0; + unsigned long flag; + unsigned long start_time; + + /* + * In the non-present entry flush case, if hardware doesn't cache + * non-present entry we do nothing and if hardware cache non-present + * entry, we flush entries of domain 0 (the domain id is used to cache + * any non-present entries) + */ + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + /* use register invalidation */ + switch (type) + { + case DMA_CCMD_GLOBAL_INVL: + val = DMA_CCMD_GLOBAL_INVL; + break; + case DMA_CCMD_DOMAIN_INVL: + val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); + break; + case DMA_CCMD_DEVICE_INVL: + val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) + |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask); + break; + default: + BUG(); + } + val |= DMA_CCMD_ICC; + + spin_lock_irqsave(&iommu->register_lock, flag); + dmar_writeq(iommu->reg, DMAR_CCMD_REG, val); + + /* Make sure hardware complete it */ + start_time = jiffies; + while (1) { + val = dmar_readq(iommu->reg, DMAR_CCMD_REG); + if (!(val & DMA_CCMD_ICC)) + break; + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); + cpu_relax(); + } + spin_unlock_irqrestore(&iommu->register_lock, flag); + /* flush context entry will implictly flush write buffer */ + return 0; +} + +static int inline iommu_flush_context_global(struct iommu *iommu, + int non_present_entry_flush) +{ + return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, + non_present_entry_flush); +} + +static int inline iommu_flush_context_domain(struct iommu *iommu, u16 did, + int non_present_entry_flush) +{ + return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, + non_present_entry_flush); +} + +static int inline iommu_flush_context_device(struct iommu *iommu, + u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) +{ + return __iommu_flush_context(iommu, did, source_id, function_mask, + DMA_CCMD_DEVICE_INVL, non_present_entry_flush); +} + +/* return value determine if we need a write buffer flush */ +static int __iommu_flush_iotlb(struct iommu *iommu, u16 did, + u64 addr, unsigned int size_order, u64 type, + int non_present_entry_flush) +{ + int tlb_offset = ecap_iotlb_offset(iommu->ecap); + u64 val = 0, val_iva = 0; + unsigned long flag; + unsigned long start_time; + + /* + * In the non-present entry flush case, if hardware doesn't cache + * non-present entry we do nothing and if hardware cache non-present + * entry, we flush entries of domain 0 (the domain id is used to cache + * any non-present entries) + */ + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + /* use register invalidation */ + switch (type) { + case DMA_TLB_GLOBAL_FLUSH: + /* global flush doesn't need set IVA_REG */ + val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; + break; + case DMA_TLB_DSI_FLUSH: + val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); + break; + case DMA_TLB_PSI_FLUSH: + val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); + /* Note: always flush non-leaf currently */ + val_iva = size_order | addr; + break; + default: + BUG(); + } + /* Note: set drain read/write */ +#if 0 + /* + * This is probably to be super secure.. Looks like we can + * ignore it without any impact. + */ + if (cap_read_drain(iommu->cap)) + val |= DMA_TLB_READ_DRAIN; +#endif + if (cap_write_drain(iommu->cap)) + val |= DMA_TLB_WRITE_DRAIN; + + spin_lock_irqsave(&iommu->register_lock, flag); + /* Note: Only uses first TLB reg currently */ + if (val_iva) + dmar_writeq(iommu->reg, tlb_offset, val_iva); + dmar_writeq(iommu->reg, tlb_offset + 8, val); + + /* Make sure hardware complete it */ + start_time = jiffies; + while (1) { + val = dmar_readq(iommu->reg, tlb_offset + 8); + if (!(val & DMA_TLB_IVT)) + break; + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); + cpu_relax(); + } + spin_unlock_irqrestore(&iommu->register_lock, flag); + + /* check IOTLB invalidation granularity */ + if (DMA_TLB_IAIG(val) == 0) + printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n"); + if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) + printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n", + (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val)); + /* flush context entry will implictly flush write buffer */ + return 0; +} + +static int inline iommu_flush_iotlb_global(struct iommu *iommu, + int non_present_entry_flush) +{ + return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, + non_present_entry_flush); +} + +static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did, + int non_present_entry_flush) +{ + return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, + non_present_entry_flush); +} + +static int inline get_alignment(u64 base, unsigned int size) +{ + int t = 0; + u64 end; + + end = base + size - 1; + while (base != end) { + t++; + base >>= 1; + end >>= 1; + } + return t; +} + +static int inline iommu_flush_iotlb_psi(struct iommu *iommu, u16 did, + u64 addr, unsigned int pages, int non_present_entry_flush) +{ + unsigned int align; + + BUG_ON(addr & (~PAGE_MASK_4K)); + BUG_ON(pages == 0); + + /* Fallback to domain selective flush if no PSI support */ + if (!cap_pgsel_inv(iommu->cap)) + return iommu_flush_iotlb_dsi(iommu, did, + non_present_entry_flush); + + /* + * PSI requires page size is 2 ^ x, and the base address is naturally + * aligned to the size + */ + align = get_alignment(addr >> PAGE_SHIFT_4K, pages); + /* Fallback to domain selective flush if size is too big */ + if (align > cap_max_amask_val(iommu->cap)) + return iommu_flush_iotlb_dsi(iommu, did, + non_present_entry_flush); + + addr >>= PAGE_SHIFT_4K + align; + addr <<= PAGE_SHIFT_4K + align; + + return __iommu_flush_iotlb(iommu, did, addr, align, + DMA_TLB_PSI_FLUSH, non_present_entry_flush); +} + +void flush_all(void) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int i = 0; + + wbinvd(); + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + iommu_flush_context_global(iommu, 0); + iommu_flush_iotlb_global(iommu, 0); + i++; + } +} + +/* clear one page's page table */ +static void dma_pte_clear_one(struct domain *domain, u64 addr) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + struct dma_pte *pte = NULL; + + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + + /* get last level pte */ + pte = dma_addr_level_pte(domain, addr, 1); + + if (pte) { + dma_clear_pte(*pte); + iommu_flush_cache_entry(drhd->iommu, pte); + + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + if (cap_caching_mode(iommu->cap)) + { + iommu_flush_iotlb_psi(iommu, domain->domain_id, addr, 1, 0); + } + else if (cap_rwbf(iommu->cap)) + iommu_flush_write_buffer(iommu); + } + } +} + +/* clear last level pte, a tlb flush should be followed */ +static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end) +{ + struct hvm_iommu *hd = domain_hvm_iommu(domain); + int addr_width = agaw_to_width(hd->agaw); + + start &= (((u64)1) << addr_width) - 1; + end &= (((u64)1) << addr_width) - 1; + /* in case it's partial page */ + start = PAGE_ALIGN_4K(start); + end &= PAGE_MASK_4K; + + /* we don't need lock here, nobody else touches the iova range */ + while (start < end) { + dma_pte_clear_one(domain, start); + start += PAGE_SIZE_4K; + } +} + +/* free page table pages. last level pte should already be cleared */ +// static void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end) +void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end) +{ + struct acpi_drhd_unit *drhd; + struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct iommu *iommu; + int addr_width = agaw_to_width(hd->agaw); + struct dma_pte *pte; + int total = agaw_to_level(hd->agaw); + int level; + u32 tmp; + + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + iommu = drhd->iommu; + + start &= (((u64)1) << addr_width) - 1; + end &= (((u64)1) << addr_width) - 1; + + /* we don't need lock here, nobody else touches the iova range */ + level = 2; + while (level <= total) { + tmp = align_to_level(start, level); + if (tmp >= end || (tmp + level_size(level) > end)) + return; + + while (tmp < end) { + pte = dma_addr_level_pte(domain, tmp, level); + if (pte) { + free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte))); + dma_clear_pte(*pte); + iommu_flush_cache_entry(iommu, pte); + } + tmp += level_size(level); + } + level++; + } + /* free pgd */ + if (start == 0 && end == ((((u64)1) << addr_width) - 1)) { + free_xenheap_page((void *)hd->pgd); + hd->pgd = NULL; + } +} + +/* iommu handling */ +static int iommu_set_root_entry(struct iommu *iommu) +{ + void *addr; + u32 cmd, sts; + struct root_entry *root; + unsigned long flags; + + if (iommu == NULL) + gdprintk(XENLOG_ERR VTDPREFIX, + "iommu_set_root_entry: iommu == NULL\n"); + + spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->root_entry) { + spin_unlock_irqrestore(&iommu->lock, flags); + root = (struct root_entry *)alloc_xenheap_page(); + memset((u8*)root, 0, PAGE_SIZE); + iommu_flush_cache_page(iommu, root); + spin_lock_irqsave(&iommu->lock, flags); + + if (!root && !iommu->root_entry) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } + + if (!iommu->root_entry) + iommu->root_entry = root; + else /* somebody is fast */ + free_xenheap_page((void *)root); + } + spin_unlock_irqrestore(&iommu->lock, flags); + + addr = iommu->root_entry; + spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr)); + cmd = iommu->gcmd | DMA_GCMD_SRTP; + dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd); + + /* Make sure hardware complete it */ + while (1) { + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if (sts & DMA_GSTS_RTPS) + break; + cpu_relax(); + } + spin_unlock_irqrestore(&iommu->register_lock, flags); + + return 0; +} + +static int iommu_enable_translation(struct iommu *iommu) +{ + u32 sts; + unsigned long flags; + + dprintk(XENLOG_INFO VTDPREFIX, + "iommu_enable_translation: enabling vt-d translation\n"); + spin_lock_irqsave(&iommu->register_lock, flags); + iommu->gcmd |= DMA_GCMD_TE; + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); + /* Make sure hardware complete it */ + while (1) { + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if (sts & DMA_GSTS_TES) { + break; + } + cpu_relax(); + } + spin_unlock_irqrestore(&iommu->register_lock, flags); + return 0; +} + +int iommu_disable_translation(struct iommu *iommu) +{ + u32 sts; + unsigned long flags; + + spin_lock_irqsave(&iommu->register_lock, flags); + iommu->gcmd &= ~ DMA_GCMD_TE; + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); + + /* Make sure hardware complete it */ + while(1) { + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_TES)) + break; + cpu_relax(); + } + spin_unlock_irqrestore(&iommu->register_lock, flags); + return 0; +} + +static struct iommu *vector_to_iommu[NR_VECTORS]; +static int iommu_page_fault_do_one(struct iommu *iommu, int type, + u8 fault_reason, u16 source_id, u32 addr) +{ + dprintk(XENLOG_WARNING VTDPREFIX, + "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n", + (type ? "DMA Read" : "DMA Write"), + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason); + + print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff), + (addr >> PAGE_SHIFT)); + return 0; +} + +#define PRIMARY_FAULT_REG_LEN (16) +static void iommu_page_fault(int vector, void *dev_id, + struct cpu_user_regs *regs) +{ + struct iommu *iommu = dev_id; + int reg, fault_index; + u32 fault_status; + unsigned long flags; + + dprintk(XENLOG_WARNING VTDPREFIX, + "iommu_page_fault: iommu->reg = %p\n", iommu->reg); + + spin_lock_irqsave(&iommu->register_lock, flags); + fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + /* FIXME: ignore advanced fault log */ + if (!(fault_status & DMA_FSTS_PPF)) + return; + fault_index = dma_fsts_fault_record_index(fault_status); + reg = cap_fault_reg_offset(iommu->cap); + while (1) { + u8 fault_reason; + u16 source_id; + u32 guest_addr; + int type; + u32 data; + + /* highest 32 bits */ + spin_lock_irqsave(&iommu->register_lock, flags); + data = dmar_readl(iommu->reg, reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12); + if (!(data & DMA_FRCD_F)) { + spin_unlock_irqrestore(&iommu->register_lock, flags); + break; + } + + fault_reason = dma_frcd_fault_reason(data); + type = dma_frcd_type(data); + + data = dmar_readl(iommu->reg, reg + + fault_index * PRIMARY_FAULT_REG_LEN + 8); + source_id = dma_frcd_source_id(data); + + guest_addr = dmar_readq(iommu->reg, reg + + fault_index * PRIMARY_FAULT_REG_LEN); + guest_addr = dma_frcd_page_addr(guest_addr); + /* clear the fault */ + dmar_writel(iommu->reg, reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + iommu_page_fault_do_one(iommu, type, fault_reason, + source_id, guest_addr); + + fault_index++; + if (fault_index > cap_num_fault_regs(iommu->cap)) + fault_index = 0; + } + /* clear primary fault overflow */ + if (fault_status & DMA_FSTS_PFO) { + spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO); + spin_unlock_irqrestore(&iommu->register_lock, flags); + } + return; +} + +static void dma_msi_unmask(unsigned int vector) +{ + struct iommu *iommu = vector_to_iommu[vector]; + unsigned long flags; + + /* unmask it */ + spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writel(iommu->reg, DMAR_FECTL_REG, 0); + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static void dma_msi_mask(unsigned int vector) +{ + unsigned long flags; + struct iommu *iommu = vector_to_iommu[vector]; + + /* mask it */ + spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM); + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static unsigned int dma_msi_startup(unsigned int vector) +{ + dma_msi_unmask(vector); + return 0; +} + +static void dma_msi_end(unsigned int vector) +{ + dma_msi_unmask(vector); + ack_APIC_irq(); +} + +static void dma_msi_data_init(struct iommu *iommu, int vector) +{ + u32 msi_data = 0; + unsigned long flags; + + /* Fixed, edge, assert mode. Follow MSI setting */ + msi_data |= vector & 0xff; + msi_data |= 1 << 14; + + spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data); + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu) +{ + u64 msi_address; + unsigned long flags; + + /* Physical, dedicated cpu. Follow MSI setting */ + msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8)); + msi_address |= MSI_PHYSICAL_MODE << 2; + msi_address |= MSI_REDIRECTION_HINT_MODE << 3; + msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT; + + spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address); + dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32)); + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest) +{ + struct iommu *iommu = vector_to_iommu[vector]; + dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest))); +} + +static struct hw_interrupt_type dma_msi_type = { + .typename = "DMA_MSI", + .startup = dma_msi_startup, + .shutdown = dma_msi_mask, + .enable = dma_msi_unmask, + .disable = dma_msi_mask, + .ack = dma_msi_mask, + .end = dma_msi_end, + .set_affinity = dma_msi_set_affinity, +}; + +int iommu_set_interrupt(struct iommu *iommu) +{ + int vector, ret; + unsigned long flags; + + vector = assign_irq_vector(AUTO_ASSIGN); + vector_to_iommu[vector] = iommu; + + /* VT-d fault is a MSI, make irq == vector */ + irq_vector[vector] = vector; + vector_irq[vector] = vector; + + if (!vector) { + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); + return -EINVAL; + } + + spin_lock_irqsave(&irq_desc[vector].lock, flags); + irq_desc[vector].handler = &dma_msi_type; + spin_unlock_irqrestore(&irq_desc[vector].lock, flags); + set_intr_gate(vector, interrupt[vector]); + ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu); + if (ret) + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); + return vector; +} + +struct iommu *iommu_alloc(void *hw_data) +{ + struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data; + struct iommu *iommu; + + if (nr_iommus > MAX_IOMMUS) { + gdprintk(XENLOG_ERR VTDPREFIX, + "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus); + return NULL; + } + + iommu = xmalloc(struct iommu); + if (!iommu) + return NULL; + memset(iommu, 0, sizeof(struct iommu)); + + set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address); + iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); + dprintk(XENLOG_INFO VTDPREFIX, + "iommu_alloc: iommu->reg = %p drhd->address = %lx\n", + iommu->reg, drhd->address); + nr_iommus++; + + if (!iommu->reg) { + printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n"); + goto error; + } + + iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); + + spin_lock_init(&iommu->lock); + spin_lock_init(&iommu->register_lock); + + drhd->iommu = iommu; + return iommu; +error: + xfree(iommu); + return NULL; +} + +static void free_iommu(struct iommu *iommu) +{ + if (!iommu) + return; + if (iommu->root_entry) + free_xenheap_page((void *)iommu->root_entry); + if (iommu->reg) + iounmap(iommu->reg); + free_irq(iommu->vector); + xfree(iommu); +} + +#define guestwidth_to_adjustwidth(gaw) ({ \ + int agaw; \ + int r = (gaw - 12) % 9; \ + if (r == 0) \ + agaw = gaw; \ + else \ + agaw = gaw + 9 - r; \ + if (agaw > 64) \ + agaw = 64; \ + agaw; }) +int iommu_domain_init(struct domain *domain) +{ + struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct iommu *iommu = NULL; + int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH; + int adjust_width, agaw; + unsigned long sagaw; + struct acpi_drhd_unit *drhd; + + if (list_empty(&acpi_drhd_units)) + return 0; + spin_lock_init(&hd->mapping_lock); + spin_lock_init(&hd->iommu_list_lock); + INIT_LIST_HEAD(&hd->pdev_list); + + for_each_drhd_unit(drhd) { + if (drhd->iommu) + iommu = drhd->iommu; + else + iommu = iommu_alloc(drhd); + } + + /* calculate AGAW */ + if (guest_width > cap_mgaw(iommu->cap)) + guest_width = cap_mgaw(iommu->cap); + adjust_width = guestwidth_to_adjustwidth(guest_width); + agaw = width_to_agaw(adjust_width); + /* FIXME: hardware doesn't support it, choose a bigger one? */ + sagaw = cap_sagaw(iommu->cap); + if (!test_bit(agaw, &sagaw)) { + gdprintk(XENLOG_ERR VTDPREFIX, + "IOMMU: hardware doesn't support the agaw\n"); + agaw = find_next_bit(&sagaw, 5, agaw); + if (agaw >= 5) + return -ENODEV; + } + hd->agaw = agaw; + return 0; +} + +static int domain_context_mapping_one( + struct domain *domain, + struct iommu *iommu, + u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(domain); + struct context_entry *context; + unsigned long flags; + int ret = 0; + + context = device_to_context_entry(iommu, bus, devfn); + if (!context) { + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping_one:context == NULL:bdf = %x:%x:%x \n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return -ENOMEM; + } + spin_lock_irqsave(&iommu->lock, flags); + if (context_present(*context)) { + spin_unlock_irqrestore(&iommu->lock, flags); + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping_one:context present:bdf=%x:%x:%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return 0; + } + +#ifdef VTD_DEBUG + dprintk(XENLOG_INFO VTDPREFIX, + "context_mapping_one_1-%x:%x:%x-*context = %lx %lx\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), context->hi, context->lo); +#endif + + /* + * domain_id 0 is not valid on Intel's IOMMU, force domain_id to + * be 1 based as required by intel's iommu hw. + */ + context_set_domain_id(*context, domain->domain_id); + context_set_address_width(*context, hd->agaw); + + if (ecap_pass_thru(iommu->ecap)) + context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); + else { + context_set_address_root(*context, virt_to_maddr(hd->pgd)); + context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); + } + + context_set_fault_enable(*context); + context_set_present(*context); + iommu_flush_cache_entry(iommu, context); + +#ifdef VTD_DEBUG + dprintk(XENLOG_INFO VTDPREFIX, + "context_mapping_one_2-%x:%x:%x-*context=%lx %lx hd->pgd = %p\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + context->hi, context->lo, hd->pgd); +#endif + + if (iommu_flush_context_device(iommu, domain->domain_id, + (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) + iommu_flush_write_buffer(iommu); + else + iommu_flush_iotlb_dsi(iommu, domain->domain_id, 0); + spin_unlock_irqrestore(&iommu->lock, flags); + return ret; +} + +static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap) +{ + u8 id; + int ttl = 48; + + while (ttl--) { + pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos); + if (pos < 0x40) + break; + pos &= ~3; + id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + pos + PCI_CAP_LIST_ID); + + if (id == 0xff) + break; + if (id == cap) + return pos; + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +#define PCI_BASE_CLASS_BRIDGE 0x06 +#define PCI_CLASS_BRIDGE_PCI 0x0604 + +#define DEV_TYPE_PCIe_ENDPOINT 1 +#define DEV_TYPE_PCI_BRIDGE 2 +#define DEV_TYPE_PCI 3 + +int pdev_type(struct pci_dev *dev) +{ + u16 class_device; + u16 status; + + class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE); + if (class_device == PCI_CLASS_BRIDGE_PCI) + return DEV_TYPE_PCI_BRIDGE; + + status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), PCI_STATUS); + + if (!(status & PCI_STATUS_CAP_LIST)) + return DEV_TYPE_PCI; + + if (__pci_find_next_cap(dev->bus, dev->devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP)) + return DEV_TYPE_PCIe_ENDPOINT; + + return DEV_TYPE_PCI; +} + +#define MAX_BUSES 256 +struct pci_dev bus2bridge[MAX_BUSES]; + +static int domain_context_mapping( + struct domain *domain, + struct iommu *iommu, + struct pci_dev *pdev) +{ + int ret = 0; + int dev, func, sec_bus, sub_bus; + u32 type; + + type = pdev_type(pdev); + if (type == DEV_TYPE_PCI_BRIDGE) { + sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS); + + if (bus2bridge[sec_bus].bus == 0) { + bus2bridge[sec_bus].bus = pdev->bus; + bus2bridge[sec_bus].devfn = pdev->devfn; + } + + sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); + + if (sec_bus != sub_bus) { + dprintk(XENLOG_INFO VTDPREFIX, + "context_mapping: nested PCI bridge not supported\n"); + dprintk(XENLOG_INFO VTDPREFIX, + " bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + sec_bus, sub_bus); + } + } + + if (type == DEV_TYPE_PCIe_ENDPOINT) { + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping:PCIe : bdf = %x:%x:%x\n", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + ret = domain_context_mapping_one(domain, iommu, + (u8)(pdev->bus), (u8) (pdev->devfn)); + } + + /* PCI devices */ + if (type == DEV_TYPE_PCI) { + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping:PCI: bdf = %x:%x:%x\n", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + if (pdev->bus == 0) + ret = domain_context_mapping_one(domain, iommu, + (u8)(pdev->bus), (u8) (pdev->devfn)); + else { + if (bus2bridge[pdev->bus].bus != 0) + gdprintk(XENLOG_ERR VTDPREFIX, + "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n"); + + ret = domain_context_mapping_one(domain, iommu, + (u8)(bus2bridge[pdev->bus].bus), + (u8)(bus2bridge[pdev->bus].devfn)); + + /* now map everything behind the PCI bridge */ + for (dev = 0; dev < 32; dev++) { + for (func = 0; func < 8; func++) { + ret = domain_context_mapping_one(domain, iommu, + pdev->bus, (u8)PCI_DEVFN(dev, func)); + if (ret) + return ret; + } + } + } + } + return ret; +} + +static int domain_context_unmap_one( + struct domain *domain, + struct iommu *iommu, + u8 bus, u8 devfn) +{ + struct context_entry *context; + unsigned long flags; + + context = device_to_context_entry(iommu, bus, devfn); + if (!context) { + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return -ENOMEM; + } + spin_lock_irqsave(&iommu->lock, flags); + if (!context_present(*context)) { + spin_unlock_irqrestore(&iommu->lock, flags); + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_unmap_one-%x:%x:%x- context NOT present:return\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return 0; + } + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_unmap_one_1:bdf = %x:%x:%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + context_clear_present(*context); + context_clear_entry(*context); + iommu_flush_cache_entry(iommu, context); + iommu_flush_context_global(iommu, 0); + iommu_flush_iotlb_global(iommu, 0); + spin_unlock_irqrestore(&iommu->lock, flags); + + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_unmap_one_2:bdf = %x:%x:%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + return 0; +} + +static int domain_context_unmap( + struct domain *domain, + struct iommu *iommu, + struct pci_dev *pdev) +{ + int ret = 0; + int dev, func, sec_bus, sub_bus; + u32 type; + + type = pdev_type(pdev); + if (type == DEV_TYPE_PCI_BRIDGE) { + sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS); + sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); + + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_unmap:BRIDGE:%x:%x:%x sec_bus=%x sub_bus=%x\n", + pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), sec_bus, sub_bus); + } + + if (type == DEV_TYPE_PCIe_ENDPOINT) { + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_unmap:PCIe : bdf = %x:%x:%x\n", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + ret = domain_context_unmap_one(domain, iommu, + (u8)(pdev->bus), (u8) (pdev->devfn)); + } + + /* PCI devices */ + if (type == DEV_TYPE_PCI) { + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_unmap:PCI: bdf = %x:%x:%x\n", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + if (pdev->bus == 0) + ret = domain_context_unmap_one(domain, iommu, + (u8)(pdev->bus), (u8) (pdev->devfn)); + else { + if (bus2bridge[pdev->bus].bus != 0) + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n"); + + ret = domain_context_unmap_one(domain, iommu, + (u8)(bus2bridge[pdev->bus].bus), + (u8)(bus2bridge[pdev->bus].devfn)); + + /* now map everything behind the PCI bridge */ + for (dev = 0; dev < 32; dev++) { + for (func = 0; func < 8; func++) { + ret = domain_context_unmap_one(domain, iommu, + pdev->bus, (u8)PCI_DEVFN(dev, func)); + if (ret) + return ret; + } + } + } + } + return ret; +} + +void reassign_device_ownership( + struct domain *source, + struct domain *target, + u8 bus, u8 devfn) +{ + struct hvm_iommu *source_hd = domain_hvm_iommu(source); + struct hvm_iommu *target_hd = domain_hvm_iommu(target); + struct pci_dev *pdev; + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int status; + unsigned long flags; + + gdprintk(XENLOG_ERR VTDPREFIX, + "reassign_device-%x:%x:%x- source = %d target = %d\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + source->domain_id, target->domain_id); + + for_each_pdev(source, pdev) { + if ( (pdev->bus != bus) || (pdev->devfn != devfn) ) + continue; + + pdev->bus = bus; + pdev->devfn = devfn; + drhd = acpi_find_matched_drhd_unit(pdev); + iommu = drhd->iommu; + domain_context_unmap(source, iommu, pdev); + + /* + * move pci device from the source domain to target domain. + */ + spin_lock_irqsave(&source_hd->iommu_list_lock, flags); + spin_lock_irqsave(&target_hd->iommu_list_lock, flags); + list_move(&pdev->list, &target_hd->pdev_list); + spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); + spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); + + status = domain_context_mapping(target, iommu, pdev); + if (status != 0) + gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); + + /* + * We are done. + */ + break; + } +} + +void return_devices_to_dom0(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev; + + while (!list_empty(&hd->pdev_list)) { + pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list); + dprintk(XENLOG_INFO VTDPREFIX, + "return_devices_to_dom0: bdf = %x:%x:%x\n", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn); + } + +#ifdef VTD_DEBUG + for_each_pdev(dom0, pdev) { + dprintk(XENLOG_INFO VTDPREFIX, + "return_devices_to_dom0:%x: bdf = %x:%x:%x\n", + dom0->domain_id, pdev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + } +#endif +} + +void iommu_domain_teardown(struct domain *d) +{ + if (list_empty(&acpi_drhd_units)) + return; + +#if CONFIG_PAGING_LEVELS == 3 + { + struct hvm_iommu *hd = domain_hvm_iommu(d); + int level = agaw_to_level(hd->agaw); + struct dma_pte *pgd = NULL; + + switch (level) + { + case VTD_PAGE_TABLE_LEVEL_3: + if ( hd->pgd ) + free_xenheap_page((void *)hd->pgd); + break; + case VTD_PAGE_TABLE_LEVEL_4: + if ( hd->pgd ) + { + pgd = hd->pgd; + if ( pgd[0].val != 0 ) + free_xenheap_page((void*)maddr_to_virt( + dma_pte_addr(pgd[0]))); + } + break; + default: + gdprintk(XENLOG_ERR VTDPREFIX, + "Unsupported p2m table sharing level!\n"); + break; + } + } +#endif + return_devices_to_dom0(d); +} + +static int domain_context_mapped(struct domain *domain, struct pci_dev *pdev) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int ret; + + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + ret = device_context_mapped(iommu, pdev->bus, pdev->devfn); + if (ret) + return ret; + } + return 0; +} + +int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + struct dma_pte *pte = NULL; + + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + iommu = drhd->iommu; + + /* do nothing if dom0 and iommu supports pass thru */ + if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0)) + return 0; + + pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K); + if (!pte) + return -ENOMEM; + dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K); + dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE); + iommu_flush_cache_entry(iommu, pte); + + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, d->domain_id, + gfn << PAGE_SHIFT_4K, 1, 0); + else if (cap_rwbf(iommu->cap)) + iommu_flush_write_buffer(iommu); + } + return 0; +} + +int iommu_unmap_page(struct domain *d, dma_addr_t gfn) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + struct dma_pte *pte = NULL; + + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + iommu = drhd->iommu; + + /* do nothing if dom0 and iommu supports pass thru */ + if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0)) + return 0; + + /* get last level pte */ + pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1); + dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K); + + return 0; +} + +int iommu_page_mapping(struct domain *domain, dma_addr_t iova, + void *hpa, size_t size, int prot) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + unsigned long start_pfn, end_pfn; + struct dma_pte *pte = NULL; + int index; + + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + iommu = drhd->iommu; + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) + return -EINVAL; + iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K; + start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K); + end_pfn = (unsigned long) + ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K); + index = 0; + while (start_pfn < end_pfn) { + pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); + if (!pte) + return -ENOMEM; + dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); + dma_set_pte_prot(*pte, prot); + iommu_flush_cache_entry(iommu, pte); + start_pfn++; + index++; + } + + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, domain->domain_id, iova, size, 0); + else if (cap_rwbf(iommu->cap)) + iommu_flush_write_buffer(iommu); + } + return 0; +} + +int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size) +{ + struct dma_pte *pte = NULL; + + /* get last level pte */ + pte = dma_addr_level_pte(domain, addr, 1); + dma_pte_clear_range(domain, addr, addr + size); + + return 0; +} + +void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu = NULL; + struct dma_pte *pte = (struct dma_pte *) p2m_entry; + + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, d->domain_id, + gfn << PAGE_SHIFT_4K, 1, 0); + else if (cap_rwbf(iommu->cap)) + iommu_flush_write_buffer(iommu); + } + iommu_flush_cache_entry(iommu, pte); +} + +int +prepare_device(struct domain *domain, struct pci_dev dev) +{ + return 0; +} + +static int iommu_prepare_rmrr_dev( + struct domain *d, + struct acpi_rmrr_unit *rmrr, + struct pci_dev *pdev) +{ + struct acpi_drhd_unit *drhd; + unsigned long size; + int ret; + + /* page table init */ + size = rmrr->end_address - rmrr->base_address + 1; + ret = iommu_page_mapping(d, rmrr->base_address, + (void *)rmrr->base_address, size, + DMA_PTE_READ|DMA_PTE_WRITE); + if (ret) + return ret; + + if (domain_context_mapped(d, pdev) == 0) { + drhd = acpi_find_matched_drhd_unit(pdev); + ret = domain_context_mapping(d, drhd->iommu, pdev); + if (!ret) + return 0; + } + return ret; +} + +void __init setup_dom0_devices(void) +{ + struct hvm_iommu *hd = domain_hvm_iommu(dom0); + struct acpi_drhd_unit *drhd; + struct pci_dev *pdev; + int bus, dev, func; + u32 l; + u8 hdr_type; + int ret; + +#ifdef DEBUG_VTD_CONTEXT_ENTRY + for (bus = 0; bus < 256; bus++) { + for (dev = 0; dev < 32; dev++) { + for (func = 0; func < 8; func++) { + struct context_entry *context; + struct pci_dev device; + + device.bus = bus; + device.devfn = PCI_DEVFN(dev, func); + drhd = acpi_find_matched_drhd_unit(&device); + context = device_to_context_entry(drhd->iommu, + bus, PCI_DEVFN(dev, func)); + if ((context->lo != 0) || (context->hi != 0)) + dprintk(XENLOG_INFO VTDPREFIX, + "setup_dom0_devices-%x:%x:%x- context not 0\n", + bus, dev, func); + } + } + } +#endif + + for (bus = 0; bus < 256; bus++) { + for (dev = 0; dev < 32; dev++) { + for (func = 0; func < 8; func++) { + l = read_pci_config(bus, dev, func, PCI_VENDOR_ID); + /* some broken boards return 0 or ~0 if a slot is empty: */ + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) + continue; + pdev = xmalloc(struct pci_dev); + pdev->bus = bus; + pdev->devfn = PCI_DEVFN(dev, func); + list_add_tail(&pdev->list, &hd->pdev_list); + + drhd = acpi_find_matched_drhd_unit(pdev); + ret = domain_context_mapping(dom0, drhd->iommu, pdev); + if (ret != 0) + gdprintk(XENLOG_ERR VTDPREFIX, + "domain_context_mapping failed\n"); + + hdr_type = read_pci_config(bus, dev, func, PCI_HEADER_TYPE); + // if ((hdr_type & 0x8) == 0) + // break; + } + } + } + for_each_pdev(dom0, pdev) { + dprintk(XENLOG_INFO VTDPREFIX, + "setup_dom0_devices: bdf = %x:%x:%x\n", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + } +} + +void clear_fault_bit(struct iommu *iommu) +{ + u64 val; + + val = dmar_readq( + iommu->reg, + cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8); + dmar_writeq( + iommu->reg, + cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8, + val); + dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO); +} + +/* + * Called from ACPI discovery code, once all DMAR's and RMRR's are done + * scanning, we need to run through and initialize as much of it as necessary + */ +int vtd_enable = 1; +static void setup_vtd_enable(char *s) +{ + if ( !strcmp(s, "0") ) + vtd_enable = 0; + else if ( !strcmp(s, "1") ) + vtd_enable = 1; + else + dprintk(XENLOG_INFO VTDPREFIX, + "Unknown vtd_enable value specified: '%s'\n", s); + dprintk(XENLOG_INFO VTDPREFIX, "vtd_enable = %x\n", vtd_enable); +} +custom_param("vtd", setup_vtd_enable); + +static int init_vtd_hw(void) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int ret; + + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + ret = iommu_set_root_entry(iommu); + if (ret) { + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n"); + return -EIO; + } + } + return 0; +} + +static int enable_vtd_translation(void) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int vector = 0; + + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + vector = iommu_set_interrupt(iommu); + dma_msi_data_init(iommu, vector); + dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map))); + iommu->vector = vector; + clear_fault_bit(iommu); + if (vtd_enable && iommu_enable_translation(iommu)) + return -EIO; + } + return 0; +} + +static void setup_dom0_rmrr(void) +{ + struct acpi_rmrr_unit *rmrr; + struct pci_dev *pdev; + int ret; + + for_each_rmrr_device(rmrr, pdev) + ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev); + if (ret) + gdprintk(XENLOG_ERR VTDPREFIX, + "IOMMU: mapping reserved region failed\n"); + end_for_each_rmrr_device(rmrr, pdev) +} + +int iommu_setup(void) +{ + struct hvm_iommu *hd = domain_hvm_iommu(dom0); + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + + if (list_empty(&acpi_drhd_units)) + return 0; + + INIT_LIST_HEAD(&hd->pdev_list); + + /* start from scratch */ + flush_all(); + + /* setup clflush size */ + x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8; + + /* + * allocate IO page directory page for the domain. + */ + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + iommu = drhd->iommu; + + hd->pgd = (struct dma_pte *)alloc_xenheap_page(); + memset((u8*)hd->pgd, 0, PAGE_SIZE); + + if (init_vtd_hw()) + goto error; + setup_dom0_devices(); + setup_dom0_rmrr(); + if (enable_vtd_translation()) + goto error; + + return 0; + +error: + printk("iommu_setup() failed\n"); + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + free_iommu(iommu); + } + return -EIO; +} + +int assign_device(struct domain *d, u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct acpi_rmrr_unit *rmrr; + struct pci_dev *pdev; + int ret = 0; + + if (list_empty(&acpi_drhd_units)) + return ret; + + dprintk(XENLOG_INFO VTDPREFIX, + "assign_device: bus = %x dev = %x func = %x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + reassign_device_ownership(dom0, d, bus, devfn); + + /* setup rmrr identify mapping just once per domain */ + if (list_empty(&hd->pdev_list)) + for_each_rmrr_device(rmrr, pdev) + ret = iommu_prepare_rmrr_dev(d, rmrr, pdev); + if (ret) + gdprintk(XENLOG_ERR VTDPREFIX, + "IOMMU: mapping reserved region failed\n"); + end_for_each_rmrr_device(rmrr, pdev) + return ret; +} + +void iommu_set_pgd(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + unsigned long p2m_table; + + if (hd->pgd) { + gdprintk(XENLOG_INFO VTDPREFIX, + "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd); + hd->pgd = NULL; + } + p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table)); + +#if CONFIG_PAGING_LEVELS == 3 + if ( !hd->pgd ) + { + int level = agaw_to_level(hd->agaw); + struct dma_pte *pmd = NULL; + struct dma_pte *pgd = NULL; + struct dma_pte *pte = NULL; + l3_pgentry_t *l3e; + unsigned long flags; + int i; + + spin_lock_irqsave(&hd->mapping_lock, flags); + if (!hd->pgd) { + pgd = (struct dma_pte *)alloc_xenheap_page(); + memset((u8*)pgd, 0, PAGE_SIZE); + if (!hd->pgd) + hd->pgd = pgd; + else /* somebody is fast */ + free_xenheap_page((void *) pgd); + } + + l3e = map_domain_page(p2m_table); + switch(level) + { + case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */ + /* We only support 8 entries for the PAE L3 p2m table */ + for ( i = 0; i < 8 ; i++ ) + { + /* Don't create new L2 entry, use ones from p2m table */ + pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW; + } + break; + + case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */ + /* We allocate one more page for the top vtd page table. */ + pmd = (struct dma_pte *)alloc_xenheap_page(); + memset((u8*)pmd, 0, PAGE_SIZE); + pte = &pgd[0]; + dma_set_pte_addr(*pte, virt_to_maddr(pmd)); + dma_set_pte_readable(*pte); + dma_set_pte_writable(*pte); + + for ( i = 0; i < 8; i++ ) + { + /* Don't create new L2 entry, use ones from p2m table */ + pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW; + } + break; + default: + gdprintk(XENLOG_ERR VTDPREFIX, + "iommu_set_pgd:Unsupported p2m table sharing level!\n"); + break; + } + unmap_domain_page(l3e); + spin_unlock_irqrestore(&hd->mapping_lock, flags); + } +#elif CONFIG_PAGING_LEVELS == 4 + if ( !hd->pgd ) + { + int level = agaw_to_level(hd->agaw); + l3_pgentry_t *l3e; + mfn_t pgd_mfn; + + switch (level) + { + case VTD_PAGE_TABLE_LEVEL_3: + l3e = map_domain_page(p2m_table); + if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) + { + gdprintk(XENLOG_ERR VTDPREFIX, + "iommu_set_pgd: second level wasn't there\n"); + unmap_domain_page(l3e); + return; + } + pgd_mfn = _mfn(l3e_get_pfn(*l3e)); + unmap_domain_page(l3e); + hd->pgd = maddr_to_virt(pagetable_get_paddr( + pagetable_from_mfn(pgd_mfn))); + break; + + case VTD_PAGE_TABLE_LEVEL_4: + pgd_mfn = _mfn(p2m_table); + hd->pgd = maddr_to_virt(pagetable_get_paddr( + pagetable_from_mfn(pgd_mfn))); + break; + default: + gdprintk(XENLOG_ERR VTDPREFIX, + "iommu_set_pgd:Unsupported p2m table sharing level!\n"); + break; + } + } +#endif + gdprintk(XENLOG_INFO VTDPREFIX, + "iommu_set_pgd: hd->pgd = %p\n", hd->pgd); +} + + +u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS]; +int iommu_suspend(void) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int i = 0; + + if (!vtd_enable) + return 0; + + flush_all(); + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + iommu_state[DMAR_RTADDR_REG * i] = + (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG); + iommu_state[DMAR_FECTL_REG * i] = + (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG); + iommu_state[DMAR_FEDATA_REG * i] = + (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG); + iommu_state[DMAR_FEADDR_REG * i] = + (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG); + iommu_state[DMAR_FEUADDR_REG * i] = + (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG); + iommu_state[DMAR_PLMBASE_REG * i] = + (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG); + iommu_state[DMAR_PLMLIMIT_REG * i] = + (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG); + iommu_state[DMAR_PHMBASE_REG * i] = + (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG); + iommu_state[DMAR_PHMLIMIT_REG * i] = + (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG); + i++; + } + + return 0; +} + +int iommu_resume(void) +{ + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + int i = 0; + + if (!vtd_enable) + return 0; + + flush_all(); + + init_vtd_hw(); + for_each_drhd_unit(drhd) { + iommu = drhd->iommu; + dmar_writeq( iommu->reg, DMAR_RTADDR_REG, + (u64) iommu_state[DMAR_RTADDR_REG * i]); + dmar_writel(iommu->reg, DMAR_FECTL_REG, + (u32) iommu_state[DMAR_FECTL_REG * i]); + dmar_writel(iommu->reg, DMAR_FEDATA_REG, + (u32) iommu_state[DMAR_FEDATA_REG * i]); + dmar_writel(iommu->reg, DMAR_FEADDR_REG, + (u32) iommu_state[DMAR_FEADDR_REG * i]); + dmar_writel(iommu->reg, DMAR_FEUADDR_REG, + (u32) iommu_state[DMAR_FEUADDR_REG * i]); + dmar_writel(iommu->reg, DMAR_PLMBASE_REG, + (u32) iommu_state[DMAR_PLMBASE_REG * i]); + dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG, + (u32) iommu_state[DMAR_PLMLIMIT_REG * i]); + dmar_writeq(iommu->reg, DMAR_PHMBASE_REG, + (u64) iommu_state[DMAR_PHMBASE_REG * i]); + dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG, + (u64) iommu_state[DMAR_PHMLIMIT_REG * i]); + + if (iommu_enable_translation(iommu)) + return -EIO; + i++; + } + return 0; +} diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/io.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/io.c Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> + * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx> + */ + +#include <xen/init.h> +#include <xen/config.h> +#include <xen/init.h> +#include <xen/mm.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/trace.h> +#include <xen/event.h> +#include <xen/hypercall.h> +#include <asm/current.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <asm/paging.h> +#include <asm/shadow.h> +#include <asm/p2m.h> +#include <asm/hvm/hvm.h> +#include <asm/hvm/support.h> +#include <asm/hvm/vpt.h> +#include <asm/hvm/vpic.h> +#include <asm/hvm/vlapic.h> +#include <public/sched.h> +#include <xen/iocap.h> +#include <public/hvm/ioreq.h> + +int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq) +{ + uint32_t device, intx; + uint32_t link, isa_irq; + struct hvm_irq *hvm_irq; + + if (!vtd_enabled || (d == dom0)) + return 0; + + if (d->arch.hvm_domain.irq.mirq[mirq].valid) + { + device = d->arch.hvm_domain.irq.mirq[mirq].device; + intx = d->arch.hvm_domain.irq.mirq[mirq].intx; + link = hvm_pci_intx_link(device, intx); + hvm_irq = &d->arch.hvm_domain.irq; + isa_irq = hvm_irq->pci_link.route[link]; + + if ( !d->arch.hvm_domain.irq.girq[isa_irq].valid ) + { + d->arch.hvm_domain.irq.girq[isa_irq].valid = 1; + d->arch.hvm_domain.irq.girq[isa_irq].device = device; + d->arch.hvm_domain.irq.girq[isa_irq].intx = intx; + d->arch.hvm_domain.irq.girq[isa_irq].machine_gsi = mirq; + } + + if ( !test_and_set_bit(mirq, d->arch.hvm_domain.irq.dirq_mask) ) + { + vcpu_kick(d->vcpu[0]); + return 1; + } + else + dprintk(XENLOG_INFO, "Want to pending mirq, but failed\n"); + } + return 0; +} + +void hvm_dpci_eoi(unsigned int guest_gsi, union vioapic_redir_entry *ent) +{ + struct domain *d = current->domain; + uint32_t device, intx, machine_gsi; + irq_desc_t *desc; + + if (d->arch.hvm_domain.irq.girq[guest_gsi].valid) + { + device = d->arch.hvm_domain.irq.girq[guest_gsi].device; + intx = d->arch.hvm_domain.irq.girq[guest_gsi].intx; + machine_gsi = d->arch.hvm_domain.irq.girq[guest_gsi].machine_gsi; + gdprintk(XENLOG_INFO, "hvm_dpci_eoi:: device %x intx %x\n", + device, intx); + hvm_pci_intx_deassert(d, device, intx); + if ( (ent == NULL) || (ent && ent->fields.mask == 0) ) { + desc = &irq_desc[irq_to_vector(machine_gsi)]; + desc->handler->end(irq_to_vector(machine_gsi)); + } + } +} + +int release_devices(struct domain *d) +{ + struct hvm_domain *hd = &d->arch.hvm_domain; + uint32_t i; + int ret = 0; + + if (!vtd_enabled) + return ret; + + /* unbind irq */ + for (i = 0; i < NR_IRQS; i++) { + if (hd->irq.mirq[i].valid) + ret = pirq_guest_unbind(d, i); + } + iommu_domain_teardown(d); + return ret; +} diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/msi.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/msi.h Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2003-2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@xxxxxxxxx) + */ + +#ifndef MSI_H +#define MSI_H + +/* + * Assume the maximum number of hot plug slots supported by the system is about + * ten. The worstcase is that each of these slots is hot-added with a device, + * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which + * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined + * as below to ensure at least one message is assigned to each detected MSI/ + * MSI-X device function. + */ +#define NR_HP_RESERVED_VECTORS 20 + +extern int vector_irq[NR_VECTORS]; +extern void (*interrupt[NR_IRQS])(void); +extern int pci_vector_resources(int last, int nr_released); + +/* + * MSI-X Address Register + */ +#define PCI_MSIX_FLAGS_QSIZE 0x7FF +#define PCI_MSIX_FLAGS_ENABLE (1 << 15) +#define PCI_MSIX_FLAGS_BIRMASK (7 << 0) +#define PCI_MSIX_FLAGS_BITMASK (1 << 0) + +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4 +#define PCI_MSIX_ENTRY_DATA_OFFSET 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12 + +#define msi_control_reg(base) (base + PCI_MSI_FLAGS) +#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO) +#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI) +#define msi_data_reg(base, is64bit) \ + ( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 ) +#define msi_mask_bits_reg(base, is64bit) \ + ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) +#define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE +#define multi_msi_capable(control) \ + (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) +#define multi_msi_enable(control, num) \ + control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); +#define is_64bit_address(control) (control & PCI_MSI_FLAGS_64BIT) +#define is_mask_bit_support(control) (control & PCI_MSI_FLAGS_MASKBIT) +#define msi_enable(control, num) multi_msi_enable(control, num); \ + control |= PCI_MSI_FLAGS_ENABLE + +#define msix_table_offset_reg(base) (base + 0x04) +#define msix_pba_offset_reg(base) (base + 0x08) +#define msix_enable(control) control |= PCI_MSIX_FLAGS_ENABLE +#define msix_disable(control) control &= ~PCI_MSIX_FLAGS_ENABLE +#define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1) +#define multi_msix_capable msix_table_size +#define msix_unmask(address) (address & ~PCI_MSIX_FLAGS_BITMASK) +#define msix_mask(address) (address | PCI_MSIX_FLAGS_BITMASK) +#define msix_is_pending(address) (address & PCI_MSIX_FLAGS_PENDMASK) + +/* + * MSI Defined Data Structures + */ +#define MSI_ADDRESS_HEADER 0xfee +#define MSI_ADDRESS_HEADER_SHIFT 12 +#define MSI_ADDRESS_HEADER_MASK 0xfff000 +#define MSI_ADDRESS_DEST_ID_MASK 0xfff0000f +#define MSI_TARGET_CPU_MASK 0xff +#define MSI_TARGET_CPU_SHIFT 12 +#define MSI_DELIVERY_MODE 0 +#define MSI_LEVEL_MODE 1 /* Edge always assert */ +#define MSI_TRIGGER_MODE 0 /* MSI is edge sensitive */ +#define MSI_PHYSICAL_MODE 0 +#define MSI_LOGICAL_MODE 1 +#define MSI_REDIRECTION_HINT_MODE 0 + +#define __LITTLE_ENDIAN_BITFIELD 1 + +struct msg_data { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 vector : 8; + __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */ + __u32 reserved_1 : 3; + __u32 level : 1; /* 0: deassert | 1: assert */ + __u32 trigger : 1; /* 0: edge | 1: level */ + __u32 reserved_2 : 16; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u32 reserved_2 : 16; + __u32 trigger : 1; /* 0: edge | 1: level */ + __u32 level : 1; /* 0: deassert | 1: assert */ + __u32 reserved_1 : 3; + __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */ + __u32 vector : 8; +#else +#error "Bitfield endianness not defined! Check your byteorder.h" +#endif +} __attribute__ ((packed)); + +struct msg_address { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 reserved_1 : 2; + __u32 dest_mode : 1; /*0:physic | 1:logic */ + __u32 redirection_hint: 1; /*0: dedicated CPU + 1: lowest priority */ + __u32 reserved_2 : 4; + __u32 dest_id : 24; /* Destination ID */ +#elif defined(__BIG_ENDIAN_BITFIELD) + __u32 dest_id : 24; /* Destination ID */ + __u32 reserved_2 : 4; + __u32 redirection_hint: 1; /*0: dedicated CPU + 1: lowest priority */ + __u32 dest_mode : 1; /*0:physic | 1:logic */ + __u32 reserved_1 : 2; +#else +#error "Bitfield endianness not defined! Check your byteorder.h" +#endif + }u; + __u32 value; + }lo_address; + __u32 hi_address; +} __attribute__ ((packed)); + +#endif /* MSI_H */ diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/pci-direct.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/pci-direct.h Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,48 @@ +#ifndef ASM_PCI_DIRECT_H +#define ASM_PCI_DIRECT_H 1 + +#include <xen/types.h> +#include <asm/io.h> + +/* Direct PCI access. This is used for PCI accesses in early boot before + the PCI subsystem works. */ + +#define PDprintk(x...) + +static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) +{ + u32 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inl(0xcfc); + if (v != 0xffffffff) + PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); + return v; +} + +static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) +{ + u8 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inb(0xcfc + (offset&3)); + PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); + return v; +} + +static inline u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) +{ + u16 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inw(0xcfc + (offset&2)); + PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); + return v; +} + +static inline void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, + u32 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outl(val, 0xcfc); +} + +#endif diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/pci_regs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/pci_regs.h Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,449 @@ +/* + * pci_regs.h + * + * PCI standard defines + * Copyright 1994, Drew Eckhardt + * Copyright 1997--1999 Martin Mares <mj@xxxxxx> + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + */ + +#ifndef LINUX_PCI_REGS_H +#define LINUX_PCI_REGS_H + +/* + * Under PCI, each device has 256 bytes of configuration address space, + * of which the first 64 bytes are standardized as follows: + */ +#define PCI_VENDOR_ID 0x00 /* 16 bits */ +#define PCI_DEVICE_ID 0x02 /* 16 bits */ +#define PCI_COMMAND 0x04 /* 16 bits */ +#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ +#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ +#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ +#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ +#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ +#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ +#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ +#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ +#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ + +#define PCI_STATUS 0x06 /* 16 bits */ +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ +#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ +#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ +#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ +#define PCI_STATUS_DEVSEL_FAST 0x000 +#define PCI_STATUS_DEVSEL_MEDIUM 0x200 +#define PCI_STATUS_DEVSEL_SLOW 0x400 +#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ +#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ +#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ +#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ +#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ + +#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 revision */ +#define PCI_REVISION_ID 0x08 /* Revision ID */ +#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ + +#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ +#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ +#define PCI_HEADER_TYPE 0x0e /* 8 bits */ +#define PCI_HEADER_TYPE_NORMAL 0 +#define PCI_HEADER_TYPE_BRIDGE 1 +#define PCI_HEADER_TYPE_CARDBUS 2 + +#define PCI_BIST 0x0f /* 8 bits */ +#define PCI_BIST_CODE_MASK 0x0f /* Return result */ +#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ +#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ + +/* + * Base addresses specify locations in memory or I/O space. + * Decoded size can be determined by writing a value of + * 0xffffffff to the register, and reading it back. Only + * 1 bits are decoded. + */ +#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ +#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ +#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ +#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ +#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ +#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ +#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ +#define PCI_BASE_ADDRESS_SPACE_IO 0x01 +#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 +#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 +#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ +#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ +#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ +#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ +#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) +#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) +/* bit 1 is reserved if address_space = 1 */ + +/* Header type 0 (normal devices) */ +#define PCI_CARDBUS_CIS 0x28 +#define PCI_SUBSYSTEM_VENDOR_ID 0x2c +#define PCI_SUBSYSTEM_ID 0x2e +#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ +#define PCI_ROM_ADDRESS_ENABLE 0x01 +#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) + +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ + +/* 0x35-0x3b are reserved */ +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ +#define PCI_MIN_GNT 0x3e /* 8 bits */ +#define PCI_MAX_LAT 0x3f /* 8 bits */ + +/* Header type 1 (PCI-to-PCI bridges) */ +#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ +#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ +#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ +#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ +#define PCI_IO_LIMIT 0x1d +#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ +#define PCI_IO_RANGE_TYPE_16 0x00 +#define PCI_IO_RANGE_TYPE_32 0x01 +#define PCI_IO_RANGE_MASK (~0x0fUL) +#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ +#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ +#define PCI_MEMORY_LIMIT 0x22 +#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL +#define PCI_MEMORY_RANGE_MASK (~0x0fUL) +#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ +#define PCI_PREF_MEMORY_LIMIT 0x26 +#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL +#define PCI_PREF_RANGE_TYPE_32 0x00 +#define PCI_PREF_RANGE_TYPE_64 0x01 +#define PCI_PREF_RANGE_MASK (~0x0fUL) +#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ +#define PCI_PREF_LIMIT_UPPER32 0x2c +#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ +#define PCI_IO_LIMIT_UPPER16 0x32 +/* 0x34 same as for htype 0 */ +/* 0x35-0x3b is reserved */ +#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_BRIDGE_CONTROL 0x3e +#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ +#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ +#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */ +#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ +#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ +#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ +#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ + +/* Header type 2 (CardBus bridges) */ +#define PCI_CB_CAPABILITY_LIST 0x14 +/* 0x15 reserved */ +#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ +#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ +#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ +#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ +#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ +#define PCI_CB_MEMORY_BASE_0 0x1c +#define PCI_CB_MEMORY_LIMIT_0 0x20 +#define PCI_CB_MEMORY_BASE_1 0x24 +#define PCI_CB_MEMORY_LIMIT_1 0x28 +#define PCI_CB_IO_BASE_0 0x2c +#define PCI_CB_IO_BASE_0_HI 0x2e +#define PCI_CB_IO_LIMIT_0 0x30 +#define PCI_CB_IO_LIMIT_0_HI 0x32 +#define PCI_CB_IO_BASE_1 0x34 +#define PCI_CB_IO_BASE_1_HI 0x36 +#define PCI_CB_IO_LIMIT_1 0x38 +#define PCI_CB_IO_LIMIT_1_HI 0x3a +#define PCI_CB_IO_RANGE_MASK (~0x03UL) +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_CB_BRIDGE_CONTROL 0x3e +#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ +#define PCI_CB_BRIDGE_CTL_SERR 0x02 +#define PCI_CB_BRIDGE_CTL_ISA 0x04 +#define PCI_CB_BRIDGE_CTL_VGA 0x08 +#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 +#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ +#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 +#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 +#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 +#define PCI_CB_SUBSYSTEM_ID 0x42 +#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ +/* 0x48-0x7f reserved */ + +/* Capability lists */ + +#define PCI_CAP_LIST_ID 0 /* Capability ID */ +#define PCI_CAP_ID_PM 0x01 /* Power Management */ +#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ +#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ +#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ +#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ +#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ +#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ +#define PCI_CAP_ID_HT_IRQCONF 0x08 /* HyperTransport IRQ Configuration */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ +#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ +#define PCI_CAP_SIZEOF 4 + +/* Power Management Registers */ + +#define PCI_PM_PMC 2 /* PM Capabilities Register */ +#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ +#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ +#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ +#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ +#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */ +#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ +#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ +#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ +#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ +#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ +#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ +#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ +#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ +#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CTRL 4 /* PM control and status register */ +#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ +#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ +#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ +#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ +#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ +#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ +#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ +#define PCI_PM_DATA_REGISTER 7 /* (??) */ +#define PCI_PM_SIZEOF 8 + +/* AGP registers */ + +#define PCI_AGP_VERSION 2 /* BCD version number */ +#define PCI_AGP_RFU 3 /* Rest of capability flags */ +#define PCI_AGP_STATUS 4 /* Status register */ +#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ +#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ +#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ +#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ +#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ +#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ +#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ +#define PCI_AGP_COMMAND 8 /* Control register */ +#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ +#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ +#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ +#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ +#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ +#define PCI_AGP_SIZEOF 12 + +/* Vital Product Data */ + +#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ +#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ +#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ +#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ + +/* Slot Identification */ + +#define PCI_SID_ESR 2 /* Expansion Slot Register */ +#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ +#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ +#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +/* Message Signalled Interrupts registers */ + +#define PCI_MSI_FLAGS 2 /* Various flags */ +#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ +#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ +#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ +#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ +#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */ +#define PCI_MSI_RFU 3 /* Rest of capability flags */ +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_BIT 16 /* Mask bits register */ + +/* CompactPCI Hotswap Register */ + +#define PCI_CHSWP_CSR 2 /* Control and Status Register */ +#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ +#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ +#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ +#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ +#define PCI_CHSWP_PI 0x30 /* Programming Interface */ +#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ +#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ + +/* PCI-X registers */ + +#define PCI_X_CMD 2 /* Modes & Features */ +#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ +#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ +#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_STATUS 4 /* PCI-X capabilities */ +#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ +#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ +#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ +#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ +#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ +#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ +#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ +#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ +#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ +#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ +#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ +#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ +#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ + +/* PCI Express capability registers */ + +#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */ +#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */ +#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */ +#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */ +#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */ +#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ +#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ +#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ +#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ +#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ +#define PCI_EXP_DEVCTL 8 /* Device Control */ +#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ +#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ +#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ +#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ +#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ +#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ +#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */ +#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */ +#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ +#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ +#define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ +#define PCI_EXP_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */ +#define PCI_EXP_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */ +#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTSTA 32 /* Root Status */ + +/* Extended Capabilities (PCI-X 2.0 and Express) */ +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) + +#define PCI_EXT_CAP_ID_ERR 1 +#define PCI_EXT_CAP_ID_VC 2 +#define PCI_EXT_CAP_ID_DSN 3 +#define PCI_EXT_CAP_ID_PWR 4 + +/* Advanced Error Reporting */ +#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ +#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ +#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ +#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ +#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ +#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ +#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ +#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ +#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ +#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ +#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ +#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ + /* Same bits as above */ +#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ +#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ +#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ +#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ +#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_COR_SRC 52 +#define PCI_ERR_ROOT_SRC 54 + +/* Virtual Channel */ +#define PCI_VC_PORT_REG1 4 +#define PCI_VC_PORT_REG2 8 +#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_STATUS 26 + +/* Power Budgeting */ +#define PCI_PWR_DSR 4 /* Data Select Register */ +#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ +#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ +#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ +#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ +#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ +#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ + +#endif /* LINUX_PCI_REGS_H */ diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/utils.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/utils.c Fri Sep 14 16:40:49 2007 +0100 @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> + */ + +#include <xen/init.h> +#include <xen/bitmap.h> +#include <xen/irq.h> +#include <xen/spinlock.h> +#include <xen/sched.h> +#include <asm/delay.h> +#include <asm/iommu.h> +#include <asm/hvm/vmx/intel-iommu.h> +#include "dmar.h" +#include "pci-direct.h" +#include "pci_regs.h" +#include "msi.h" + +#include <xen/mm.h> +#include <xen/xmalloc.h> + +#if defined(__x86_64__) +void print_iommu_regs(struct acpi_drhd_unit *drhd) +{ + struct iommu *iommu = drhd->iommu; + + printk("---- print_iommu_regs ----\n"); + printk("print_iommu_regs: drhd->address = %lx\n", drhd->address); + printk("print_iommu_regs: DMAR_VER_REG = %x\n", + dmar_readl(iommu->reg,DMAR_VER_REG)); + printk("print_iommu_regs: DMAR_CAP_REG = %lx\n", + dmar_readq(iommu->reg,DMAR_CAP_REG)); + printk("print_iommu_regs: n_fault_reg = %lx\n", + cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG))); + printk("print_iommu_regs: fault_recording_offset_l = %lx\n", + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))); + printk("print_iommu_regs: fault_recording_offset_h = %lx\n", + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8); + printk("print_iommu_regs: fault_recording_reg_l = %lx\n", + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)))); + printk("print_iommu_regs: fault_recording_reg_h = %lx\n", + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8)); + printk("print_iommu_regs: DMAR_ECAP_REG = %lx\n", + dmar_readq(iommu->reg,DMAR_ECAP_REG)); + printk("print_iommu_regs: DMAR_GCMD_REG = %x\n", + dmar_readl(iommu->reg,DMAR_GCMD_REG)); + printk("print_iommu_regs: DMAR_GSTS_REG = %x\n", + dmar_readl(iommu->reg,DMAR_GSTS_REG)); + printk("print_iommu_regs: DMAR_RTADDR_REG = %lx\n", + dmar_readq(iommu->reg,DMAR_RTADDR_REG)); + printk("print_iommu_regs: DMAR_CCMD_REG = %lx\n", + dmar_readq(iommu->reg,DMAR_CCMD_REG)); + printk("print_iommu_regs: DMAR_FSTS_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FSTS_REG)); + printk("print_iommu_regs: DMAR_FECTL_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FECTL_REG)); + printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FEDATA_REG)); + printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FEADDR_REG)); + printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FEUADDR_REG)); +} + +void print_vtd_entries(struct domain *d, int bus, int devfn, + unsigned long gmfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + struct context_entry *ctxt_entry; + struct root_entry *root_entry; + u64 *l3, *l2, *l1; + u32 l3_index, l2_index, l1_index; + u32 i = 0; + + printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn); + + for_each_drhd_unit(drhd) { + printk("---- print_vtd_entries %d ----\n", i++); + + if (hd->pgd == NULL) { + printk(" hg->pgd == NULL\n"); + return; + } + + iommu = drhd->iommu; + root_entry = iommu->root_entry; + printk(" hd->pgd = %p virt_to_maddr(hd->pgd) = %lx\n", + hd->pgd, virt_to_maddr(hd->pgd)); + + printk(" root_entry = %p\n", root_entry); + if (root_entry == NULL) { + printk(" root_entry == NULL\n"); + return; + } + + printk(" root_entry[%x] = %lx\n", bus, root_entry[bus].val); + printk(" maddr_to_virt(root_entry[%x]) = %p\n", + bus, maddr_to_virt(root_entry[bus].val)); + + if (root_entry[bus].val == 0) { + printk(" root_entry[%x].lo == 0\n", bus); + return; + } + + ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT); + if (ctxt_entry == NULL) { + printk(" ctxt_entry == NULL\n"); + return; + } + + if (ctxt_entry[devfn].lo == 0) { + printk(" ctxt_entry[%x].lo == 0\n", devfn); + return; + } + + printk(" context = %p\n", ctxt_entry); + printk(" context[%x] = %lx %lx\n", + devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo); + printk(" maddr_to_virt(context[%x].lo) = %p\n", + devfn, maddr_to_virt(ctxt_entry[devfn].lo)); + printk(" context[%x] = %lx\n", devfn, ctxt_entry[devfn].lo); + + l3 = maddr_to_virt(ctxt_entry[devfn].lo); + l3 = (u64*)(((u64) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); + printk(" l3 = %p\n", l3); + if (l3 == NULL) return; + + l3_index = (gmfn >> 9 >> 9) & 0x1ff; + printk(" l3_index = %x\n", l3_index); + printk(" l3[%x] = %lx\n", l3_index, l3[l3_index]); + + l2 = maddr_to_virt(l3[l3_index]); + l2 = (u64*)(((u64) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); + printk(" l2 = %p\n", l2); + if (l2 == NULL) return; + + l2_index = (gmfn >> 9) & 0x1ff; + printk(" gmfn = %lx\n", gmfn); + printk(" gmfn >> 9= %lx\n", gmfn >> 9); + printk(" l2_index = %x\n", l2_index); + printk(" l2[%x] = %lx\n", l2_index, l2[l2_index]); + + l1 = maddr_to_virt(l2[l2_index]); + l1 = (u64*)(((u64) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); + if (l1 == NULL) return; + l1_index = gmfn & 0x1ff; + printk(" l1 = %p\n", l1); + printk(" l1_index = %x\n", l1_index); + printk(" l1[%x] = %lx\n", l1_index, l1[l1_index]); + } +} + +#else // !m64 + +void print_iommu_regs(struct acpi_drhd_unit *drhd) +{ + struct iommu *iommu = drhd->iommu; + + printk("---- print_iommu_regs ----\n"); + printk("print_iommu_regs: drhd->address = %lx\n", drhd->address); + printk("print_iommu_regs: DMAR_VER_REG = %x\n", + dmar_readl(iommu->reg,DMAR_VER_REG)); + printk("print_iommu_regs: DMAR_CAP_REG = %llx\n", + dmar_readq(iommu->reg,DMAR_CAP_REG)); + printk("print_iommu_regs: n_fault_reg = %llx\n", + cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG))); + printk("print_iommu_regs: fault_recording_offset_l = %llx\n", + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))); + printk("print_iommu_regs: fault_recording_offset_h = %llx\n", + cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8); + printk("print_iommu_regs: fault_recording_reg_l = %llx\n", + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)))); + printk("print_iommu_regs: fault_recording_reg_h = %llx\n", + dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8)); + printk("print_iommu_regs: DMAR_ECAP_REG = %llx\n", + dmar_readq(iommu->reg,DMAR_ECAP_REG)); + printk("print_iommu_regs: DMAR_GCMD_REG = %x\n", + dmar_readl(iommu->reg,DMAR_GCMD_REG)); + printk("print_iommu_regs: DMAR_GSTS_REG = %x\n", + dmar_readl(iommu->reg,DMAR_GSTS_REG)); + printk("print_iommu_regs: DMAR_RTADDR_REG = %llx\n", + dmar_readq(iommu->reg,DMAR_RTADDR_REG)); + printk("print_iommu_regs: DMAR_CCMD_REG = %llx\n", + dmar_readq(iommu->reg,DMAR_CCMD_REG)); + printk("print_iommu_regs: DMAR_FSTS_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FSTS_REG)); + printk("print_iommu_regs: DMAR_FECTL_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FECTL_REG)); + printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FEDATA_REG)); + printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FEADDR_REG)); + printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n", + dmar_readl(iommu->reg,DMAR_FEUADDR_REG)); +} + +void print_vtd_entries(struct domain *d, int bus, int devfn, + unsigned long gmfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct acpi_drhd_unit *drhd; + struct iommu *iommu; + struct context_entry *ctxt_entry; + struct root_entry *root_entry; + u64 *l3, *l2, *l1; + u32 l3_index, l2_index, l1_index; + u32 i = 0; + + printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn = %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn); + + for_each_drhd_unit(drhd) { + printk("---- print_vtd_entries %d ----\n", i++); + + if (hd->pgd == NULL) { + printk(" hg->pgd == NULL\n"); + return; + } + + iommu = drhd->iommu; + root_entry = iommu->root_entry; + printk(" d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n", + hd->pgd, virt_to_maddr(hd->pgd)); + + printk(" root_entry = %p\n", root_entry); + if (root_entry == NULL) { + printk(" root_entry == NULL\n"); + return; + } + + printk(" root_entry[%x] = %llx\n", bus, root_entry[bus].val); + printk(" maddr_to_virt(root_entry[%x]) = %p\n", + bus, maddr_to_virt(root_entry[bus].val)); + + if (root_entry[bus].val == 0) { + printk(" root_entry[%x].lo == 0\n", bus); + return; + } + + ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT); + if (ctxt_entry == NULL) { + printk(" ctxt_entry == NULL\n"); + return; + } + + if (ctxt_entry[devfn].lo == 0) { + printk(" ctxt_entry[%x].lo == 0\n", devfn); + return; + } + + printk(" context = %p\n", ctxt_entry); + printk(" context[%x] = %llx %llx\n", + devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo); + printk(" maddr_to_virt(context[%x].lo) = %p\n", + devfn, maddr_to_virt(ctxt_entry[devfn].lo)); + printk(" context[%x] = %llx\n", devfn, ctxt_entry[devfn].lo); + + l3 = maddr_to_virt(ctxt_entry[devfn].lo); + l3 = (u64*)(((u32) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); + printk(" l3 = %p\n", l3); + if (l3 == NULL) return; + + l3_index = (gmfn >> 9 >> 9) & 0x1ff; + printk(" l3_index = %x\n", l3_index); + printk(" l3[%x] = %llx\n", l3_index, l3[l3_index]); + + l2 = maddr_to_virt(l3[l3_index]); + l2 = (u64*)(((u32) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); + printk(" l2 = %p\n", l2); + if (l2 == NULL) return; + + l2_index = (gmfn >> 9) & 0x1ff; + printk(" gmfn = %lx\n", gmfn); + printk(" gmfn >> 9= %lx\n", gmfn >> 9); + printk(" l2_index = %x\n", l2_index); + printk(" l2[%x] = %llx\n", l2_index, l2[l2_index]); + + l1 = maddr_to_virt(l2[l2_index]); + l1 = (u64*)(((u32) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); + if (l1 == NULL) return; + l1_index = gmfn & 0x1ff; + printk(" l1 = %p\n", l1); + printk(" l1_index = %x\n", l1_index); + printk(" l1[%x] = %llx\n", l1_index, l1[l1_index]); + } +} +#endif // !m64 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |