[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] Re: [PATCH 07/11] Xen/x86/PCI: Add support for the Xen PCI subsytem
On Thu, May 07, 2009 at 02:45:33PM -0700, Jeremy Fitzhardinge wrote: > From: Alex Nixon <alex.nixon@xxxxxxxxxx> > > Impact: add core of Xen PCI support > > On boot, the system will search to see if a Xen iommu/pci subsystem is > available. If the kernel detects it's running in a domain rather than > on bare hardware, this subsystem will be used. Otherwise, it falls > back to using hardware as usual. > > The frontend stub lives in arch/x86/pci-xen.c, alongside other > sub-arch PCI init code (e.g. olpc.c) > > (All subsequent fixes, API changes and swiotlb operations folded in.) > > Signed-off-by: Alex Nixon <alex.nixon@xxxxxxxxxx> > Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx> > Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx> > Reviewed-by: "H. Peter Anvin" <hpa@xxxxxxxxx> > --- > arch/x86/Kconfig | 4 + > arch/x86/include/asm/pci_x86.h | 1 + > arch/x86/include/asm/xen/iommu.h | 12 ++ > arch/x86/kernel/pci-dma.c | 3 + > arch/x86/pci/Makefile | 1 + > arch/x86/pci/init.c | 6 + > arch/x86/pci/xen.c | 52 +++++++ > drivers/pci/Makefile | 2 + > drivers/pci/xen-iommu.c | 302 > ++++++++++++++++++++++++++++++++++++++ > 9 files changed, 383 insertions(+), 0 deletions(-) > create mode 100644 arch/x86/include/asm/xen/iommu.h > create mode 100644 arch/x86/pci/xen.c > create mode 100644 drivers/pci/xen-iommu.c > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index 4b34082..4a62659 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -1830,6 +1830,10 @@ config PCI_OLPC > def_bool y > depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) > > +config PCI_XEN > + def_bool y > + depends on XEN_PCI_PASSTHROUGH || XEN_DOM0_PCI > + > config PCI_DOMAINS > def_bool y > depends on PCI > diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h > index 5401ca2..34f03a4 100644 > --- a/arch/x86/include/asm/pci_x86.h > +++ b/arch/x86/include/asm/pci_x86.h > @@ -107,6 +107,7 @@ extern int pci_direct_probe(void); > extern void pci_direct_init(int type); > extern void pci_pcbios_init(void); > extern int pci_olpc_init(void); > +extern int pci_xen_init(void); > extern void __init dmi_check_pciprobe(void); > extern void __init dmi_check_skip_isa_align(void); > > diff --git a/arch/x86/include/asm/xen/iommu.h > b/arch/x86/include/asm/xen/iommu.h > new file mode 100644 > index 0000000..75df312 > --- /dev/null > +++ b/arch/x86/include/asm/xen/iommu.h > @@ -0,0 +1,12 @@ > +#ifndef ASM_X86__XEN_IOMMU_H > + > +#ifdef CONFIG_PCI_XEN > +extern void xen_iommu_init(void); > +#else > +static inline void xen_iommu_init(void) > +{ > +} > +#endif > + > +#endif > + > diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c > index 745579b..2fffc22 100644 > --- a/arch/x86/kernel/pci-dma.c > +++ b/arch/x86/kernel/pci-dma.c > @@ -10,6 +10,7 @@ > #include <asm/gart.h> > #include <asm/calgary.h> > #include <asm/amd_iommu.h> > +#include <asm/xen/iommu.h> > > static int forbid_dac __read_mostly; > > @@ -275,6 +276,8 @@ static int __init pci_iommu_init(void) > dma_debug_add_bus(&pci_bus_type); > #endif > > + xen_iommu_init(); > + > calgary_iommu_init(); > > intel_iommu_init(); > diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile > index d49202e..64182c5 100644 > --- a/arch/x86/pci/Makefile > +++ b/arch/x86/pci/Makefile > @@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o > obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o > obj-$(CONFIG_PCI_DIRECT) += direct.o > obj-$(CONFIG_PCI_OLPC) += olpc.o > +obj-$(CONFIG_PCI_XEN) += xen.o > > obj-y += fixup.o > obj-$(CONFIG_ACPI) += acpi.o > diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c > index 25a1f8e..4e2f90a 100644 > --- a/arch/x86/pci/init.c > +++ b/arch/x86/pci/init.c > @@ -15,10 +15,16 @@ static __init int pci_arch_init(void) > if (!(pci_probe & PCI_PROBE_NOEARLY)) > pci_mmcfg_early_init(); > > +#ifdef CONFIG_PCI_XEN > + if (!pci_xen_init()) > + return 0; > +#endif > + > #ifdef CONFIG_PCI_OLPC > if (!pci_olpc_init()) > return 0; /* skip additional checks if it's an XO */ > #endif > + > #ifdef CONFIG_PCI_BIOS > pci_pcbios_init(); > #endif > diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c > new file mode 100644 > index 0000000..76f803f > --- /dev/null > +++ b/arch/x86/pci/xen.c > @@ -0,0 +1,52 @@ > +/* > + * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux > + * x86 PCI core to support the Xen PCI Frontend > + * > + * Author: Ryan Wilson <hap9@xxxxxxxxxxxxxx> > + */ > +#include <linux/module.h> > +#include <linux/init.h> > +#include <linux/pci.h> > +#include <linux/acpi.h> > + > +#include <asm/pci_x86.h> > + > +#include <asm/xen/hypervisor.h> > + > +static int xen_pcifront_enable_irq(struct pci_dev *dev) > +{ > + return 0; > +} > + > +extern int isapnp_disable; > + > +int __init pci_xen_init(void) > +{ > + if (!xen_pv_domain() || xen_initial_domain()) > + return -ENODEV; > + > + printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); > + > + pcibios_set_cache_line_size(); > + > + pcibios_enable_irq = xen_pcifront_enable_irq; > + pcibios_disable_irq = NULL; > + > +#ifdef CONFIG_ACPI > + /* Keep ACPI out of the picture */ > + acpi_noirq = 1; > +#endif > + > +#ifdef CONFIG_ISAPNP > + /* Stop isapnp from probing */ > + isapnp_disable = 1; > +#endif > + > + /* Ensure a device still gets scanned even if it's fn number > + * is non-zero. > + */ > + pci_scan_all_fns = 1; > + > + return 0; > +} > + > diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile > index ba6af16..8db0cb5 100644 > --- a/drivers/pci/Makefile > +++ b/drivers/pci/Makefile > @@ -27,6 +27,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o > # Build Intel IOMMU support > obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o > > +# Build Xen IOMMU support > +obj-$(CONFIG_PCI_XEN) += xen-iommu.o > obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o > > obj-$(CONFIG_PCI_IOV) += iov.o > diff --git a/drivers/pci/xen-iommu.c b/drivers/pci/xen-iommu.c > new file mode 100644 > index 0000000..e7a22f1 > --- /dev/null > +++ b/drivers/pci/xen-iommu.c > @@ -0,0 +1,302 @@ > +#include <linux/types.h> > +#include <linux/mm.h> > +#include <linux/string.h> > +#include <linux/pci.h> > +#include <linux/module.h> > +#include <linux/version.h> > +#include <linux/scatterlist.h> > +#include <linux/io.h> > +#include <linux/bug.h> > + > +#include <xen/interface/xen.h> > +#include <xen/grant_table.h> > +#include <xen/page.h> > +#include <xen/xen-ops.h> > + > +#include <asm/iommu.h> > +#include <asm/swiotlb.h> > +#include <asm/tlbflush.h> > + > +#define IOMMU_BUG_ON(test) \ > +do { \ > + if (unlikely(test)) { \ > + printk(KERN_ALERT "Fatal DMA error! " \ > + "Please use 'swiotlb=force'\n"); \ > + BUG(); \ > + } \ > +} while (0) > + > +/* Print address range with message */ > +#define PAR(msg, addr, size) \ > +do { \ > + printk(msg "[%#llx - %#llx]\n", \ > + (unsigned long long)addr, \ > + (unsigned long long)addr + size); \ > +} while (0) > + > +struct dma_coherent_mem { > + void *virt_base; > + u32 device_base; > + int size; > + int flags; > + unsigned long *bitmap; > +}; > + > +static inline int address_needs_mapping(struct device *hwdev, > + dma_addr_t addr) > +{ > + dma_addr_t mask = 0xffffffff; You can use DMA_32BIT_MASK here. > + int ret; > + > + /* If the device has a mask, use it, otherwise default to 32 bits */ > + if (hwdev && hwdev->dma_mask) > + mask = *hwdev->dma_mask; I think the check for a valid hwdev->dma_mask is not necessary. Other IOMMU drivers also don't check for this. > + > + ret = (addr & ~mask) != 0; > + > + if (ret) { > + printk(KERN_ERR "dma address needs mapping\n"); > + printk(KERN_ERR "mask: %#llx\n address: [%#llx]\n", mask, addr); > + } > + return ret; > +} > + > +static int check_pages_physically_contiguous(unsigned long pfn, > + unsigned int offset, > + size_t length) > +{ > + unsigned long next_mfn; > + int i; > + int nr_pages; > + > + next_mfn = pfn_to_mfn(pfn); > + nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; > + > + for (i = 1; i < nr_pages; i++) { > + if (pfn_to_mfn(++pfn) != ++next_mfn) > + return 0; > + } > + return 1; > +} > + > +static int range_straddles_page_boundary(phys_addr_t p, size_t size) > +{ > + unsigned long pfn = PFN_DOWN(p); > + unsigned int offset = p & ~PAGE_MASK; > + > + if (offset + size <= PAGE_SIZE) > + return 0; You can use iommu_num_pages here from lib/iommu_helpers.c > + if (check_pages_physically_contiguous(pfn, offset, size)) > + return 0; > + return 1; > +} > + > +static inline void xen_dma_unmap_page(struct page *page) > +{ > + /* Xen TODO: 2.6.18 xen calls __gnttab_dma_unmap_page here > + * to deal with foreign pages. We'll need similar logic here at > + * some point. > + */ > +} > + > +/* Gets dma address of a page */ > +static inline dma_addr_t xen_dma_map_page(struct page *page) > +{ > + /* Xen TODO: 2.6.18 xen calls __gnttab_dma_map_page here to deal > + * with foreign pages. We'll need similar logic here at some > + * point. > + */ > + return ((dma_addr_t)pfn_to_mfn(page_to_pfn(page))) << PAGE_SHIFT; > +} > + > +static int xen_map_sg(struct device *hwdev, struct scatterlist *sg, > + int nents, int direction) > +{ > + struct scatterlist *s; > + struct page *page; > + int i, rc; > + > + BUG_ON(direction == DMA_NONE); > + WARN_ON(nents == 0 || sg[0].length == 0); > + > + for_each_sg(sg, s, nents, i) { > + BUG_ON(!sg_page(s)); > + page = sg_page(s); > + s->dma_address = xen_dma_map_page(page) + s->offset; > + s->dma_length = s->length; > + IOMMU_BUG_ON(range_straddles_page_boundary( > + page_to_phys(page), s->length)); I have a question on this. How do you make sure that the memory to map does not cross page boundarys? I have a stats counter for x-page requests in amd iommu code and around 10% of the requests are actually x-page for me. > + } > + > + rc = nents; > + > + flush_write_buffers(); > + return rc; > +} > + > +static void xen_unmap_sg(struct device *hwdev, struct scatterlist *sg, > + int nents, int direction) > +{ > + struct scatterlist *s; > + struct page *page; > + int i; > + > + for_each_sg(sg, s, nents, i) { > + page = pfn_to_page(mfn_to_pfn(PFN_DOWN(s->dma_address))); > + xen_dma_unmap_page(page); > + } > +} > + > +static void *xen_alloc_coherent(struct device *dev, size_t size, > + dma_addr_t *dma_handle, gfp_t gfp) > +{ > + void *ret; > + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; > + unsigned int order = get_order(size); > + unsigned long vstart; > + u64 mask; > + > + /* ignore region specifiers */ > + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); > + > + if (mem) { > + int page = bitmap_find_free_region(mem->bitmap, mem->size, > + order); Can you use iommu_area_alloc here? > + if (page >= 0) { > + *dma_handle = mem->device_base + (page << PAGE_SHIFT); > + ret = mem->virt_base + (page << PAGE_SHIFT); > + memset(ret, 0, size); > + return ret; > + } > + if (mem->flags & DMA_MEMORY_EXCLUSIVE) > + return NULL; > + } > + > + if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) DMA_32BIT_MASK > + gfp |= GFP_DMA; > + > + vstart = __get_free_pages(gfp, order); > + ret = (void *)vstart; > + > + if (dev != NULL && dev->coherent_dma_mask) > + mask = dev->coherent_dma_mask; > + else > + mask = 0xffffffff; DMA_32BIT_MASK > + > + if (ret != NULL) { > + if (xen_create_contiguous_region(vstart, order, > + fls64(mask)) != 0) { > + free_pages(vstart, order); > + return NULL; > + } > + memset(ret, 0, size); > + *dma_handle = virt_to_machine(ret).maddr; > + } > + return ret; > +} > + > +static void xen_free_coherent(struct device *dev, size_t size, > + void *vaddr, dma_addr_t dma_addr) > +{ > + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; > + int order = get_order(size); > + > + if (mem && vaddr >= mem->virt_base && > + vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { > + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; > + bitmap_release_region(mem->bitmap, page, order); iommu_area_free > + } else { > + xen_destroy_contiguous_region((unsigned long)vaddr, order); > + free_pages((unsigned long)vaddr, order); > + } > +} > + > +static dma_addr_t xen_swiotlb_map_single(struct device *dev, phys_addr_t > paddr, > + size_t size, int direction) > +{ > + dma_addr_t dma; > + BUG_ON(direction == DMA_NONE); > + > + WARN_ON(size == 0); > + dma = swiotlb_map_single(dev, phys_to_virt(paddr), size, direction); > + > + flush_write_buffers(); > + return dma; > +} > + > +static dma_addr_t xen_map_single(struct device *dev, phys_addr_t paddr, > + size_t size, int direction) > +{ > + struct page *page; > + dma_addr_t dma; > + > + BUG_ON(direction == DMA_NONE); > + > + WARN_ON(size == 0); > + page = pfn_to_page(PFN_DOWN(paddr)); > + > + dma = xen_dma_map_page(page) + offset_in_page(paddr); > + > + IOMMU_BUG_ON(address_needs_mapping(dev, dma)); > + IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size)); > + flush_write_buffers(); > + return dma; > +} > + > +static void xen_unmap_single(struct device *dev, dma_addr_t dma_addr, > + size_t size, int direction) > +{ > + BUG_ON(direction == DMA_NONE); > + xen_dma_unmap_page(pfn_to_page(mfn_to_pfn(PFN_DOWN(dma_addr)))); > +} > + > +static struct dma_mapping_ops xen_dma_ops = { > + .dma_supported = NULL, > + > + .alloc_coherent = xen_alloc_coherent, > + .free_coherent = xen_free_coherent, > + > + .map_single = xen_map_single, > + .unmap_single = xen_unmap_single, > + > + .map_sg = xen_map_sg, > + .unmap_sg = xen_unmap_sg, > + > + .mapping_error = NULL, > + > + .is_phys = 0, > +}; > + > +static struct dma_mapping_ops xen_swiotlb_dma_ops = { > + .dma_supported = swiotlb_dma_supported, > + > + .alloc_coherent = xen_alloc_coherent, > + .free_coherent = xen_free_coherent, > + > + .map_single = xen_swiotlb_map_single, /* swiotlb_map_single has a > different prototype */ > + .unmap_single = swiotlb_unmap_single, > + > + .map_sg = swiotlb_map_sg, > + .unmap_sg = swiotlb_unmap_sg, > + > + .mapping_error = swiotlb_dma_mapping_error, > + > + .is_phys = 0, > +}; > + > +void __init xen_iommu_init(void) > +{ > + if (!xen_pv_domain()) > + return; > + > + printk(KERN_INFO "Xen: Initializing Xen DMA ops\n"); > + > + force_iommu = 0; > + dma_ops = &xen_dma_ops; > + > + if (swiotlb) { > + printk(KERN_INFO "Xen: Enabling DMA fallback to swiotlb\n"); > + dma_ops = &xen_swiotlb_dma_ops; > + } > +} > + > -- > 1.6.0.6 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |