[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge.
# HG changeset patch # User adsharma@xxxxxxxxxxxxxxxxxxxx # Node ID 1ae656509f021f7436cd6813c9b50c395d29c3bf # Parent e3d811cca4e1d385a793cc515d72c8e671fd6267 # Parent 26c03c17c418ba106ebda01502713da2fc9d28c6 Merge. diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue Aug 16 18:09:07 2005 @@ -130,6 +130,7 @@ # CONFIG_X86_REBOOTFIXUPS is not set CONFIG_MICROCODE=y CONFIG_X86_CPUID=y +CONFIG_SWIOTLB=y # # Firmware Drivers diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug 16 18:09:07 2005 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12.3-xen0 -# Mon Aug 15 11:36:25 2005 +# Linux kernel version: 2.6.12.4-xen0 +# Mon Aug 15 18:57:19 2005 # CONFIG_XEN=y CONFIG_ARCH_XEN=y @@ -52,6 +52,7 @@ # CONFIG_IKCONFIG is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -122,6 +123,7 @@ # CONFIG_X86_MSR is not set # CONFIG_GART_IOMMU is not set CONFIG_DUMMY_IOMMU=y +CONFIG_SWIOTLB=y # CONFIG_X86_MCE is not set # @@ -163,6 +165,7 @@ CONFIG_STANDALONE=y # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set # # Memory Technology Devices (MTD) @@ -1060,7 +1063,22 @@ # Kernel hacking # # CONFIG_PRINTK_TIME is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=15 +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_KPROBES is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_4KSTACKS is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y +# CONFIG_CHECKING is not set +# CONFIG_INIT_DEBUG is not set diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue Aug 16 18:09:07 2005 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-xenU -# Tue Aug 2 23:56:13 2005 +# Linux kernel version: 2.6.12.4-xenU +# Mon Aug 15 19:25:22 2005 # CONFIG_XEN=y CONFIG_ARCH_XEN=y @@ -30,7 +30,7 @@ # CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y -CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 # @@ -48,8 +48,10 @@ CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y # CONFIG_IKCONFIG is not set +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -74,6 +76,7 @@ CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y CONFIG_XENARCH="x86_64" CONFIG_X86=y CONFIG_MMU=y @@ -86,12 +89,15 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_GOOD_APIC=y # CONFIG_HPET_TIMER is not set -# CONFIG_SMP is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +# CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set # CONFIG_MICROCODE is not set CONFIG_X86_CPUID=y # CONFIG_NUMA is not set # CONFIG_MTRR is not set +CONFIG_HAVE_DEC_LOCK=y # CONFIG_X86_LOCAL_APIC is not set # CONFIG_X86_IO_APIC is not set # CONFIG_PCI is not set @@ -114,7 +120,11 @@ # CONFIG_GENERIC_CPU is not set CONFIG_X86_L1_CACHE_BYTES=128 # CONFIG_X86_TSC is not set +CONFIG_X86_XEN_GENAPIC=y # CONFIG_X86_MSR is not set +CONFIG_X86_HT=y +# CONFIG_K8_NUMA is not set +# CONFIG_NUMA_EMU is not set CONFIG_DUMMY_IOMMU=y # CONFIG_X86_MCE is not set @@ -157,6 +167,7 @@ CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set # # Block devices @@ -559,7 +570,6 @@ # # Old SIR device drivers # -# CONFIG_IRPORT_SIR is not set # # Old Serial dongle support @@ -861,17 +871,7 @@ # Security options # # CONFIG_KEYS is not set -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_CAPABILITIES=y -# CONFIG_SECURITY_SECLVL is not set -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 -CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_SECURITY_SELINUX_DEVELOP=y -CONFIG_SECURITY_SELINUX_AVC_STATS=y -CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY is not set # # Cryptographic options @@ -919,5 +919,19 @@ # Kernel hacking # # CONFIG_PRINTK_TIME is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=15 +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_KPROBES is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_4KSTACKS is not set +# CONFIG_INIT_DEBUG is not set diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Tue Aug 16 18:09:07 2005 @@ -137,6 +137,7 @@ # CONFIG_X86_REBOOTFIXUPS is not set CONFIG_MICROCODE=m CONFIG_X86_CPUID=m +CONFIG_SWIOTLB=y # # Firmware Drivers diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Tue Aug 16 18:09:07 2005 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12.3-xen0 -# Mon Aug 15 19:46:39 2005 +# Linux kernel version: 2.6.12.4-xen +# Mon Aug 15 19:54:11 2005 # CONFIG_XEN=y CONFIG_ARCH_XEN=y @@ -35,6 +35,7 @@ # CONFIG_CLEAN_COMPILE is not set CONFIG_BROKEN=y CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 # @@ -50,8 +51,10 @@ CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y # CONFIG_IKCONFIG is not set +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -76,6 +79,7 @@ # CONFIG_MODVERSIONS is not set CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y CONFIG_XENARCH="x86_64" CONFIG_X86=y CONFIG_MMU=y @@ -88,12 +92,15 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_GOOD_APIC=y # CONFIG_HPET_TIMER is not set -# CONFIG_SMP is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +# CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_MICROCODE=y # CONFIG_X86_CPUID is not set # CONFIG_NUMA is not set # CONFIG_MTRR is not set +CONFIG_HAVE_DEC_LOCK=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_PCI=y @@ -120,8 +127,12 @@ # CONFIG_X86_TSC is not set CONFIG_X86_XEN_GENAPIC=y # CONFIG_X86_MSR is not set +CONFIG_X86_HT=y +# CONFIG_K8_NUMA is not set +# CONFIG_NUMA_EMU is not set # CONFIG_GART_IOMMU is not set CONFIG_DUMMY_IOMMU=y +CONFIG_SWIOTLB=y # CONFIG_X86_MCE is not set # @@ -163,6 +174,7 @@ CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set # # Memory Technology Devices (MTD) @@ -214,7 +226,6 @@ CONFIG_MTD_ROM=m CONFIG_MTD_ABSENT=m # CONFIG_MTD_OBSOLETE_CHIPS is not set -# CONFIG_MTD_XIP is not set # # Mapping drivers for chip access @@ -2395,7 +2406,21 @@ # Kernel hacking # # CONFIG_PRINTK_TIME is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=15 +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_KPROBES is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_4KSTACKS is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y +# CONFIG_INIT_DEBUG is not set diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 16 18:09:07 2005 @@ -533,6 +533,11 @@ with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. +config SWIOTLB + bool + depends on PCI + default y + source "drivers/firmware/Kconfig" choice diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16 18:09:07 2005 @@ -44,6 +44,7 @@ c-obj-$(CONFIG_EFI) += efi.o efi_stub.o c-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o +c-obj-$(CONFIG_SWIOTLB) += swiotlb.o EXTRA_AFLAGS := -traditional diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16 18:09:07 2005 @@ -23,6 +23,103 @@ int flags; unsigned long *bitmap; }; + +static void iommu_bug(void) +{ + printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n"); + BUG(); +} + +#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0) + +int +dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i, rc; + + BUG_ON(direction == DMA_NONE); + + if (swiotlb) { + rc = swiotlb_map_sg(hwdev, sg, nents, direction); + } else { + for (i = 0; i < nents; i++ ) { + sg[i].dma_address = + page_to_phys(sg[i].page) + sg[i].offset; + sg[i].dma_length = sg[i].length; + BUG_ON(!sg[i].page); + IOMMU_BUG_ON(address_needs_mapping( + hwdev, sg[i].dma_address)); + } + rc = nents; + } + + flush_write_buffers(); + return rc; +} +EXPORT_SYMBOL(dma_map_sg); + +void +dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + if (swiotlb) + swiotlb_unmap_sg(hwdev, sg, nents, direction); +} +EXPORT_SYMBOL(dma_unmap_sg); + +dma_addr_t +dma_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction direction) +{ + dma_addr_t dma_addr; + + BUG_ON(direction == DMA_NONE); + + if (swiotlb) { + dma_addr = swiotlb_map_page( + dev, page, offset, size, direction); + } else { + dma_addr = page_to_phys(page) + offset; + IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); + } + + return dma_addr; +} +EXPORT_SYMBOL(dma_map_page); + +void +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + if (swiotlb) + swiotlb_unmap_page(dev, dma_address, size, direction); +} +EXPORT_SYMBOL(dma_unmap_page); + +int +dma_mapping_error(dma_addr_t dma_addr) +{ + if (swiotlb) + return swiotlb_dma_mapping_error(dma_addr); + return 0; +} +EXPORT_SYMBOL(dma_mapping_error); + +int +dma_supported(struct device *dev, u64 mask) +{ + if (swiotlb) + return swiotlb_dma_supported(dev, mask); + /* + * By default we'll BUG when an infeasible DMA is requested, and + * request swiotlb=force (see IOMMU_BUG_ON). + */ + return 1; +} +EXPORT_SYMBOL(dma_supported); void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned int __nocast gfp) @@ -54,13 +151,14 @@ ret = (void *)vstart; if (ret != NULL) { - xen_contig_memory(vstart, order); + xen_create_contiguous_region(vstart, order); memset(ret, 0, size); *dma_handle = virt_to_bus(ret); } return ret; } +EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) @@ -72,9 +170,12 @@ int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; bitmap_release_region(mem->bitmap, page, order); - } else + } else { + xen_destroy_contiguous_region((unsigned long)vaddr, order); free_pages((unsigned long)vaddr, order); -} + } +} +EXPORT_SYMBOL(dma_free_coherent); int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dma_addr_t device_addr, size_t size, int flags) @@ -153,46 +254,20 @@ } EXPORT_SYMBOL(dma_mark_declared_memory_occupied); -static LIST_HEAD(dma_map_head); -static DEFINE_SPINLOCK(dma_map_lock); -struct dma_map_entry { - struct list_head list; - dma_addr_t dma; - char *bounce, *host; - size_t size; -}; -#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d))) - dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) { - struct dma_map_entry *ent; - void *bnc; dma_addr_t dma; - unsigned long flags; - - BUG_ON(direction == DMA_NONE); - - /* - * Even if size is sub-page, the buffer may still straddle a page - * boundary. Take into account buffer start offset. All other calls are - * conservative and always search the dma_map list if it's non-empty. - */ - if ((((unsigned int)ptr & ~PAGE_MASK) + size) <= PAGE_SIZE) { + + BUG_ON(direction == DMA_NONE); + + if (swiotlb) { + dma = swiotlb_map_single(dev, ptr, size, direction); + } else { dma = virt_to_bus(ptr); - } else { - BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, GFP_ATOMIC)) == NULL); - BUG_ON((ent = kmalloc(sizeof(*ent), GFP_ATOMIC)) == NULL); - if (direction != DMA_FROM_DEVICE) - memcpy(bnc, ptr, size); - ent->dma = dma; - ent->bounce = bnc; - ent->host = ptr; - ent->size = size; - spin_lock_irqsave(&dma_map_lock, flags); - list_add(&ent->list, &dma_map_head); - spin_unlock_irqrestore(&dma_map_lock, flags); + IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size)); + IOMMU_BUG_ON(address_needs_mapping(dev, dma)); } flush_write_buffers(); @@ -204,30 +279,9 @@ dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction) { - struct dma_map_entry *ent; - unsigned long flags; - - BUG_ON(direction == DMA_NONE); - - /* Fast-path check: are there any multi-page DMA mappings? */ - if (!list_empty(&dma_map_head)) { - spin_lock_irqsave(&dma_map_lock, flags); - list_for_each_entry ( ent, &dma_map_head, list ) { - if (DMA_MAP_MATCHES(ent, dma_addr)) { - list_del(&ent->list); - break; - } - } - spin_unlock_irqrestore(&dma_map_lock, flags); - if (&ent->list != &dma_map_head) { - BUG_ON(dma_addr != ent->dma); - BUG_ON(size != ent->size); - if (direction != DMA_TO_DEVICE) - memcpy(ent->host, ent->bounce, size); - dma_free_coherent(dev, size, ent->bounce, ent->dma); - kfree(ent); - } - } + BUG_ON(direction == DMA_NONE); + if (swiotlb) + swiotlb_unmap_single(dev, dma_addr, size, direction); } EXPORT_SYMBOL(dma_unmap_single); @@ -235,23 +289,8 @@ dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { - struct dma_map_entry *ent; - unsigned long flags, off; - - /* Fast-path check: are there any multi-page DMA mappings? */ - if (!list_empty(&dma_map_head)) { - spin_lock_irqsave(&dma_map_lock, flags); - list_for_each_entry ( ent, &dma_map_head, list ) - if (DMA_MAP_MATCHES(ent, dma_handle)) - break; - spin_unlock_irqrestore(&dma_map_lock, flags); - if (&ent->list != &dma_map_head) { - off = dma_handle - ent->dma; - BUG_ON((off + size) > ent->size); - /*if (direction != DMA_TO_DEVICE)*/ - memcpy(ent->host+off, ent->bounce+off, size); - } - } + if (swiotlb) + swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction); } EXPORT_SYMBOL(dma_sync_single_for_cpu); @@ -259,24 +298,17 @@ dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { - struct dma_map_entry *ent; - unsigned long flags, off; - - /* Fast-path check: are there any multi-page DMA mappings? */ - if (!list_empty(&dma_map_head)) { - spin_lock_irqsave(&dma_map_lock, flags); - list_for_each_entry ( ent, &dma_map_head, list ) - if (DMA_MAP_MATCHES(ent, dma_handle)) - break; - spin_unlock_irqrestore(&dma_map_lock, flags); - if (&ent->list != &dma_map_head) { - off = dma_handle - ent->dma; - BUG_ON((off + size) > ent->size); - /*if (direction != DMA_FROM_DEVICE)*/ - memcpy(ent->bounce+off, ent->host+off, size); - } - } - - flush_write_buffers(); + if (swiotlb) + swiotlb_sync_single_for_device(dev, dma_handle, size, direction); } EXPORT_SYMBOL(dma_sync_single_for_device); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 16 18:09:07 2005 @@ -540,16 +540,13 @@ EXPORT_SYMBOL(profile_pc); #endif -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick - */ -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { s64 delta, delta_cpu; int cpu = smp_processor_id(); struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + + write_seqlock(&xtime_lock); do { get_time_values_from_xen(); @@ -572,7 +569,6 @@ for (cpu = 0; cpu < num_online_cpus(); cpu++) printk(" %d: %lld\n", cpu, per_cpu(processed_system_time, cpu)); - return; } /* System-wide jiffy work. */ @@ -582,7 +578,18 @@ do_timer(regs); } - /* Local CPU jiffy work. */ + if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { + update_wallclock(); + clock_was_set(); + } + + write_sequnlock(&xtime_lock); + + /* + * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure + * if there is risk of deadlock if we do (since update_process_times + * may do scheduler rebalancing work and thus acquire runqueue locks). + */ while (delta_cpu >= NS_PER_TICK) { delta_cpu -= NS_PER_TICK; per_cpu(processed_system_time, cpu) += NS_PER_TICK; @@ -590,29 +597,6 @@ profile_tick(CPU_PROFILING, regs); } - if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { - update_wallclock(); - clock_was_set(); - } -} - -/* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. - */ -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); - do_timer_interrupt(irq, NULL, regs); - write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16 18:09:07 2005 @@ -263,12 +263,9 @@ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } -void xen_contig_memory(unsigned long vstart, unsigned int order) -{ - /* - * Ensure multi-page extents are contiguous in machine memory. This code - * could be cleaned up some, and the number of hypercalls reduced. - */ +/* Ensure multi-page extents are contiguous in machine memory. */ +void xen_create_contiguous_region(unsigned long vstart, unsigned int order) +{ pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -312,6 +309,49 @@ balloon_unlock(flags); } +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long mfn, i, flags; + + scrub_pages(vstart, 1 << order); + + balloon_lock(flags); + + /* 1. Zap current PTEs, giving away the underlying pages. */ + for (i = 0; i < (1<<order); i++) { + pgd = pgd_offset_k(vstart + (i*PAGE_SIZE)); + pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE))); + pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE))); + pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); + mfn = pte_mfn(*pte); + BUG_ON(HYPERVISOR_update_va_mapping( + vstart + (i*PAGE_SIZE), __pte_ma(0), 0)); + phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = + INVALID_P2M_ENTRY; + BUG_ON(HYPERVISOR_dom_mem_op( + MEMOP_decrease_reservation, &mfn, 1, 0) != 1); + } + + /* 2. Map new pages in place of old pages. */ + for (i = 0; i < (1<<order); i++) { + BUG_ON(HYPERVISOR_dom_mem_op( + MEMOP_increase_reservation, &mfn, 1, 0) != 1); + BUG_ON(HYPERVISOR_update_va_mapping( + vstart + (i*PAGE_SIZE), + __pte_ma((mfn<<PAGE_SHIFT)|__PAGE_KERNEL), 0)); + xen_machphys_update(mfn, (__pa(vstart)>>PAGE_SHIFT)+i); + phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn; + } + + flush_tlb_all(); + + balloon_unlock(flags); +} + #ifdef CONFIG_XEN_PHYSDEV_ACCESS unsigned long allocate_empty_lowmem_region(unsigned long pages) diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 16 18:09:07 2005 @@ -41,6 +41,12 @@ #include <asm/sections.h> #include <asm-xen/hypervisor.h> +#if defined(CONFIG_SWIOTLB) +extern void swiotlb_init(void); +int swiotlb; +EXPORT_SYMBOL(swiotlb); +#endif + unsigned int __VMALLOC_RESERVE = 128 << 20; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -630,6 +636,10 @@ int tmp; int bad_ppro; unsigned long pfn; + +#if defined(CONFIG_SWIOTLB) + swiotlb_init(); +#endif #ifndef CONFIG_DISCONTIGMEM if (!mem_map) diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 16 18:09:07 2005 @@ -332,10 +332,10 @@ for (i = 0; i < size; i += PAGE_SIZE) { if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) { /* Fill in the PTE pointers. */ - generic_page_range(mm, start_address, - address-start_address, - direct_remap_area_pte_fn, &w); - + generic_page_range(mm, start_address, + address - start_address, + direct_remap_area_pte_fn, &w); + w = u; if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) return -EFAULT; v = u; @@ -355,9 +355,8 @@ if (v != u) { /* get the ptep's filled in */ - generic_page_range(mm, start_address, - address-start_address, - direct_remap_area_pte_fn, &w); + generic_page_range(mm, start_address, address - start_address, + direct_remap_area_pte_fn, &w); if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) return -EFAULT; } @@ -370,32 +369,34 @@ EXPORT_SYMBOL(direct_remap_area_pages); int create_lookup_pte_addr(struct mm_struct *mm, - unsigned long address, - unsigned long *ptep) -{ - int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) - { - unsigned long *ptep = (unsigned long *)data; - if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT) - | ((unsigned long)pte & ~PAGE_MASK); - return 0; - } - - return generic_page_range(mm, address, PAGE_SIZE, f, ptep); + unsigned long address, + unsigned long *ptep) +{ + int f(pte_t *pte, struct page *pte_page, unsigned long addr, + void *data) { + unsigned long *ptep = (unsigned long *)data; + if (ptep) + *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << + PAGE_SHIFT) | + ((unsigned long)pte & ~PAGE_MASK); + return 0; + } + + return generic_page_range(mm, address, PAGE_SIZE, f, ptep); } EXPORT_SYMBOL(create_lookup_pte_addr); int touch_pte_range(struct mm_struct *mm, - unsigned long address, - unsigned long size) -{ - int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) - { - return 0; - } - - return generic_page_range(mm, address, size, f, NULL); + unsigned long address, + unsigned long size) +{ + int f(pte_t *pte, struct page *pte_page, unsigned long addr, + void *data) { + return 0; + } + + return generic_page_range(mm, address, size, f, NULL); } EXPORT_SYMBOL(touch_pte_range); diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 16 18:09:07 2005 @@ -277,7 +277,7 @@ #ifdef CONFIG_X86_PAE /* this gives us a page below 4GB */ - xen_contig_memory((unsigned long)pgd, 0); + xen_create_contiguous_region((unsigned long)pgd, 0); #endif if (!HAVE_SHARED_KERNEL_PMD) diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Tue Aug 16 18:09:07 2005 @@ -329,12 +329,12 @@ # need this always enabled with GART_IOMMU for the VIA workaround config SWIOTLB bool - depends on GART_IOMMU + depends on PCI default y config DUMMY_IOMMU bool - depends on !GART_IOMMU && !SWIOTLB + depends on !GART_IOMMU default y help Don't use IOMMU code. This will cause problems when you have more than 4GB diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 16 18:09:07 2005 @@ -36,8 +36,9 @@ #obj-$(CONFIG_CPU_FREQ) += cpufreq/ #obj-$(CONFIG_EARLY_PRINTK) += early_printk.o #obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o -obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o -#obj-$(CONFIG_SWIOTLB) += swiotlb.o +obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o +i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o +i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o @@ -49,7 +50,7 @@ bootflag-y += ../../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../../i386/kernel/cpuid.o topology-y += ../../../i386/mach-default/topology.o -swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o +#swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../../i386/kernel/microcode.o intel_cacheinfo-y += ../../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Tue Aug 16 18:09:07 2005 @@ -61,6 +61,7 @@ EXPORT_SYMBOL(dma_free_coherent); #endif +#if 0 int dma_supported(struct device *hwdev, u64 mask) { /* @@ -76,6 +77,7 @@ return 1; } EXPORT_SYMBOL(dma_supported); +#endif int dma_get_cache_alignment(void) { diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 16 18:09:07 2005 @@ -42,10 +42,6 @@ #ifndef Dprintk #define Dprintk(x...) -#endif - -#ifdef CONFIG_GART_IOMMU -extern int swiotlb; #endif extern char _stext[]; @@ -790,8 +786,6 @@ return 1; } -extern int swiotlb_force; - static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; @@ -800,14 +794,9 @@ int codesize, reservedpages, datasize, initsize; int tmp; -#ifdef CONFIG_SWIOTLB - if (swiotlb_force) - swiotlb = 1; - if (!iommu_aperture && - (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu)) - swiotlb = 1; - if (swiotlb) - swiotlb_init(); +#if defined(CONFIG_SWIOTLB) + extern void swiotlb_init(void); + swiotlb_init(); #endif /* How many end-of-memory variables you have, grandma! */ diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 18:09:07 2005 @@ -23,6 +23,9 @@ blkif_be_driver_status_t be_st; printk(KERN_INFO "Initialising Xen block tap device\n"); +#ifdef CONFIG_XEN_BLKDEV_GRANT + printk(KERN_INFO "Block tap is using grant tables.\n"); +#endif DPRINTK(" tap - Backend connection init:\n"); diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 18:09:07 2005 @@ -85,6 +85,11 @@ spinlock_t blk_ring_lock; atomic_t refcnt; struct work_struct work; +#ifdef CONFIG_XEN_BLKDEV_GRANT + u16 shmem_handle; + memory_t shmem_vaddr; + grant_ref_t shmem_ref; +#endif } blkif_t; blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug 16 18:09:07 2005 @@ -9,6 +9,7 @@ */ #include "blktap.h" +#include <asm-xen/evtchn.h> static char *blkif_state_name[] = { [BLKIF_STATE_CLOSED] = "closed", @@ -48,12 +49,21 @@ blkif_t *blkif = (blkif_t *)arg; ctrl_msg_t cmsg; blkif_be_disconnect_t disc; +#ifdef CONFIG_XEN_BLKDEV_GRANT + struct gnttab_unmap_grant_ref op; +#endif /* * These can't be done in blkif_disconnect() because at that point there * may be outstanding requests at the disc whose asynchronous responses * must still be notified to the remote driver. */ +#ifdef CONFIG_XEN_BLKDEV_GRANT + op.host_addr = blkif->shmem_vaddr; + op.handle = blkif->shmem_handle; + op.dev_bus_addr = 0; + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); +#endif vfree(blkif->blk_ring.sring); /* Construct the deferred response message. */ @@ -177,8 +187,12 @@ unsigned int evtchn = connect->evtchn; unsigned long shmem_frame = connect->shmem_frame; struct vm_struct *vma; +#ifdef CONFIG_XEN_BLKDEV_GRANT + int ref = connect->shmem_ref; +#else pgprot_t prot; int error; +#endif blkif_t *blkif; blkif_sring_t *sring; @@ -199,24 +213,46 @@ return; } - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); +#ifndef CONFIG_XEN_BLKDEV_GRANT + prot = __pgprot(_KERNPG_TABLE); error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), shmem_frame<<PAGE_SHIFT, PAGE_SIZE, prot, domid); if ( error != 0 ) { - WPRINTK("BE_CONNECT: error! (%d)\n", error); if ( error == -ENOMEM ) connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - else if ( error == -EFAULT ) { + else if ( error == -EFAULT ) connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; - WPRINTK("BE_CONNECT: MAPPING error!\n"); - } else connect->status = BLKIF_BE_STATUS_ERROR; vfree(vma->addr); return; } +#else + { /* Map: Use the Grant table reference */ + struct gnttab_map_grant_ref op; + op.host_addr = VMALLOC_VMADDR(vma->addr); + op.flags = GNTMAP_host_map; + op.ref = ref; + op.dom = domid; + + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); + + handle = op.handle; + + if (op.handle < 0) { + DPRINTK(" Grant table operation failure !\n"); + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; + vfree(vma->addr); + return; + } + + blkif->shmem_ref = ref; + blkif->shmem_handle = handle; + blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr); + } +#endif if ( blkif->status != DISCONNECTED ) { diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16 18:09:07 2005 @@ -21,6 +21,9 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm-xen/xen-public/io/blkif.h> /* for control ring. */ +#ifdef CONFIG_XEN_BLKDEV_GRANT +#include <asm-xen/xen-public/grant_table.h> +#endif #include "blktap.h" @@ -42,6 +45,7 @@ /* local prototypes */ static int blktap_read_fe_ring(void); static int blktap_read_be_ring(void); + /* -------[ mmap region ]--------------------------------------------- */ /* @@ -73,7 +77,28 @@ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ ((_seg) * PAGE_SIZE)) - +/* -------[ grant handles ]------------------------------------------- */ + +#ifdef CONFIG_XEN_BLKDEV_GRANT +/* When using grant tables to map a frame for device access then the + * handle returned must be used to unmap the frame. This is needed to + * drop the ref count on the frame. + */ +struct grant_handle_pair +{ + u16 kernel; + u16 user; +}; +static struct grant_handle_pair pending_grant_handles[MMAP_PAGES]; +#define pending_handle(_idx, _i) \ + (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) +#define BLKTAP_INVALID_HANDLE(_g) \ + (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ + (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ + } while(0) + +#endif /* -------[ blktap vm ops ]------------------------------------------- */ @@ -348,9 +373,43 @@ /*-----[ Data to/from user space ]----------------------------------------*/ - static void fast_flush_area(int idx, int nr_pages) { +#ifdef CONFIG_XEN_BLKDEV_GRANT + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int i, op = 0; + struct grant_handle_pair *handle; + unsigned long ptep; + + for (i=0; i<nr_pages; i++) + { + handle = &pending_handle(idx, i); + if (!BLKTAP_INVALID_HANDLE(handle)) + { + + unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); + unmap[op].dev_bus_addr = 0; + unmap[op].handle = handle->kernel; + op++; + + if (create_lookup_pte_addr(blktap_vma->vm_mm, + MMAP_VADDR(user_vstart, idx, i), + &ptep) !=0) { + DPRINTK("Couldn't get a pte addr!\n"); + return; + } + unmap[op].host_addr = ptep; + unmap[op].dev_bus_addr = 0; + unmap[op].handle = handle->user; + op++; + + BLKTAP_INVALIDATE_HANDLE(handle); + } + } + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, op))) + BUG(); +#else multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; @@ -363,21 +422,22 @@ mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); -} - - -extern int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v); +#endif +} + int blktap_write_fe_ring(blkif_request_t *req) { blkif_request_t *target; - int i; + int i, ret = 0; +#ifdef CONFIG_XEN_BLKDEV_GRANT + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + int op; +#else unsigned long remap_prot; multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1]; mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +#endif /* * This is called to pass a request from the real frontend domain's @@ -394,18 +454,109 @@ return 0; } - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; flush_cache_all(); /* a noop on intel... */ target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); memcpy(target, req, sizeof(*req)); /* Map the foreign pages directly in to the application */ +#ifdef CONFIG_XEN_BLKDEV_GRANT + op = 0; + for (i=0; i<target->nr_segments; i++) { + + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long ptep; + + uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); + + /* Map the remote page to kernel. */ + map[op].host_addr = kvaddr; + map[op].dom = ID_TO_DOM(req->id); + map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); + map[op].flags = GNTMAP_host_map; + /* This needs a bit more thought in terms of interposition: + * If we want to be able to modify pages during write using + * grant table mappings, the guest will either need to allow + * it, or we'll need to incur a copy. */ + if (req->operation == BLKIF_OP_WRITE) + map[op].flags |= GNTMAP_readonly; + op++; + + /* Now map it to user. */ + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); + if (ret) + { + DPRINTK("Couldn't get a pte addr!\n"); + goto fail; + } + + map[op].host_addr = ptep; + map[op].dom = ID_TO_DOM(req->id); + map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); + map[op].flags = GNTMAP_host_map | GNTMAP_application_map + | GNTMAP_contains_pte; + /* Above interposition comment applies here as well. */ + if (req->operation == BLKIF_OP_WRITE) + map[op].flags |= GNTMAP_readonly; + op++; + } + + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, map, op))) + BUG(); + + op = 0; + for (i=0; i<(target->nr_segments*2); i+=2) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + int cancel = 0; + + uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2); + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2); + + if ( unlikely(map[i].handle < 0) ) { + DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle); + ret = map[i].handle; + cancel = 1; + } + + if ( unlikely(map[i+1].handle < 0) ) { + DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle); + ret = map[i+1].handle; + cancel = 1; + } + + if (cancel) + goto fail; + + /* Set the necessary mappings in p2m and in the VM_FOREIGN + * vm_area_struct to allow user vaddr -> struct page lookups + * to work. This is needed for direct IO to foreign pages. */ + phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = + FOREIGN_FRAME(map[i].dev_bus_addr); + + offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; + ((struct page **)blktap_vma->vm_private_data)[offset] = + pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + + /* Save handles for unmapping later. */ + pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle; + pending_handle(ID_TO_IDX(req->id), i/2).user = map[i+1].handle; + } + +#else + + remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; + for (i=0; i<target->nr_segments; i++) { unsigned long buf; unsigned long uvaddr; unsigned long kvaddr; unsigned long offset; + unsigned long ptep; buf = target->frame_and_sects[i] & PAGE_MASK; uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); @@ -421,10 +572,14 @@ phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = FOREIGN_FRAME(buf >> PAGE_SHIFT); - __direct_remap_area_pages(blktap_vma->vm_mm, - uvaddr, - PAGE_SIZE, - &mmu[i]); + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); + if (ret) + { + DPRINTK("error getting pte\n"); + goto fail; + } + + mmu[i].ptr = ptep; mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK) | pgprot_val(blktap_vma->vm_page_prot); @@ -448,16 +603,17 @@ if ( unlikely(mcl[i].result != 0) ) { DPRINTK("invalid buffer -- could not remap it\n"); - fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); - return -1; + ret = mcl[i].result; + goto fail; } } if ( unlikely(mcl[i].result != 0) ) { DPRINTK("direct remapping of pages to /dev/blktap failed.\n"); - return -1; - } - + ret = mcl[i].result; + goto fail; + } +#endif /* CONFIG_XEN_BLKDEV_GRANT */ /* Mark mapped pages as reserved: */ for ( i = 0; i < target->nr_segments; i++ ) @@ -472,6 +628,10 @@ blktap_ufe_ring.req_prod_pvt++; return 0; + + fail: + fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); + return ret; } int blktap_write_be_ring(blkif_response_t *rsp) @@ -538,11 +698,10 @@ map[offset] = NULL; } - + fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); zap_page_range(blktap_vma, MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), ar->nr_pages << PAGE_SHIFT, NULL); - fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); write_resp_to_fe_ring(blkif, resp_s); blktap_ufe_ring.rsp_cons = i + 1; kick_fe_domain(blkif); @@ -616,10 +775,16 @@ int blktap_init(void) { - int err; + int err, i, j; if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) BUG(); + +#ifdef CONFIG_XEN_BLKDEV_GRANT + for (i=0; i<MAX_PENDING_REQS ; i++) + for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) + BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j)); +#endif err = misc_register(&blktap_miscdev); if ( err != 0 ) diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug 16 18:09:07 2005 @@ -1,11 +1,33 @@ #ifndef _ASM_I386_DMA_MAPPING_H #define _ASM_I386_DMA_MAPPING_H +/* + * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for + * documentation. + */ + +#include <linux/config.h> #include <linux/mm.h> - #include <asm/cache.h> #include <asm/io.h> #include <asm/scatterlist.h> +#include <asm-i386/swiotlb.h> + +static inline int +address_needs_mapping(struct device *hwdev, dma_addr_t addr) +{ + dma_addr_t mask = 0xffffffff; + /* If the device has a mask, use it, otherwise default to 32 bits */ + if (hwdev && hwdev->dma_mask) + mask = *hwdev->dma_mask; + return (addr & ~mask) != 0; +} + +static inline int +range_straddles_page_boundary(void *p, size_t size) +{ + return ((((unsigned long)p & ~PAGE_MASK) + size) > PAGE_SIZE); +} #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) @@ -24,46 +46,18 @@ dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction); -static inline int -dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - int i; +extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction); +extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction); - BUG_ON(direction == DMA_NONE); +extern dma_addr_t +dma_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction direction); - for (i = 0; i < nents; i++ ) { - BUG_ON(!sg[i].page); - - sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset; - } - - flush_write_buffers(); - return nents; -} - -static inline dma_addr_t -dma_map_page(struct device *dev, struct page *page, unsigned long offset, - size_t size, enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - return page_to_phys(page) + offset; -} - -static inline void +extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - - -static inline void -dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} + enum dma_data_direction direction); extern void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -93,34 +87,25 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { + if (swiotlb) + swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction); + flush_write_buffers(); } static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { + if (swiotlb) + swiotlb_sync_sg_for_device(dev,sg,nelems,direction); flush_write_buffers(); } -static inline int -dma_mapping_error(dma_addr_t dma_addr) -{ - return 0; -} +extern int +dma_mapping_error(dma_addr_t dma_addr); -static inline int -dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if(mask < 0x00ffffff) - return 0; - - return 1; -} +extern int +dma_supported(struct device *dev, u64 mask); static inline int dma_set_mask(struct device *dev, u64 mask) @@ -133,6 +118,7 @@ return 0; } +#ifdef __i386__ static inline int dma_get_cache_alignment(void) { @@ -140,6 +126,9 @@ * maximum possible, to be safe */ return (1 << L1_CACHE_SHIFT_MAX); } +#else +extern int dma_get_cache_alignment(void); +#endif #define dma_is_consistent(d) (1) diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Tue Aug 16 18:09:07 2005 @@ -43,11 +43,8 @@ struct pci_dev; -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) +/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */ +#define PCI_DMA_BUS_IS_PHYS (0) /* pci_unmap_{page,single} is a nop so... */ #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Tue Aug 16 18:09:07 2005 @@ -1,89 +1,1 @@ -#ifndef _X8664_DMA_MAPPING_H -#define _X8664_DMA_MAPPING_H 1 - -/* - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for - * documentation. - */ - -#include <linux/config.h> - -#include <asm/scatterlist.h> -#include <asm/io.h> -#include <asm/swiotlb.h> - -extern dma_addr_t bad_dma_address; -#define dma_mapping_error(x) \ - (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address)) - -void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned gfp); -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle); - -extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, - enum dma_data_direction direction); -extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size, - enum dma_data_direction direction); - -#define dma_map_page(dev,page,offset,size,dir) \ - dma_map_single((dev), page_address(page)+(offset), (size), (dir)) - -extern void -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction); - -extern void -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction); - -static inline void dma_sync_sg_for_cpu(struct device *hwdev, - struct scatterlist *sg, - int nelems, int direction) -{ - if (direction == DMA_NONE) - out_of_line_bug(); - - if (swiotlb) - return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction); - - flush_write_buffers(); -} - -static inline void dma_sync_sg_for_device(struct device *hwdev, - struct scatterlist *sg, - int nelems, int direction) -{ - if (direction == DMA_NONE) - out_of_line_bug(); - - if (swiotlb) - return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction); - - flush_write_buffers(); -} - -extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); - -#define dma_unmap_page dma_unmap_single - -extern int dma_supported(struct device *hwdev, u64 mask); -extern int dma_get_cache_alignment(void); -#define dma_is_consistent(h) 1 - -static inline int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - *dev->dma_mask = mask; - return 0; -} - -static inline void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) -{ - flush_write_buffers(); -} -#endif +#include <asm-i386/dma-mapping.h> diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Tue Aug 16 18:09:07 2005 @@ -79,7 +79,9 @@ #else /* No IOMMU */ -#define PCI_DMA_BUS_IS_PHYS 1 +/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */ +#define PCI_DMA_BUS_IS_PHYS (0) + #define pci_dac_dma_supported(pci_dev, mask) 1 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 18:09:07 2005 @@ -134,7 +134,8 @@ #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) #endif /* linux < 2.6.0 */ -void xen_contig_memory(unsigned long vstart, unsigned int order); +void xen_create_contiguous_region(unsigned long vstart, unsigned int order); +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); #ifdef CONFIG_XEN_PHYSDEV_ACCESS /* Allocate a contiguous empty region of low memory. Return virtual start. */ diff -r e3d811cca4e1 -r 1ae656509f02 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Tue Aug 16 04:15:23 2005 +++ b/tools/console/daemon/io.c Tue Aug 16 18:09:07 2005 @@ -87,6 +87,7 @@ { int domid; int tty_fd; + bool is_dead; struct buffer buffer; struct domain *next; }; @@ -156,10 +157,12 @@ dom->domid = domid; dom->tty_fd = domain_create_tty(dom); + dom->is_dead = false; dom->buffer.data = 0; dom->buffer.size = 0; dom->buffer.capacity = 0; dom->buffer.max_capacity = 0; + dom->next = 0; dolog(LOG_DEBUG, "New domain %d", domid); @@ -206,6 +209,16 @@ } } +static void remove_dead_domains(struct domain *dom) +{ + if (dom == NULL) return; + remove_dead_domains(dom->next); + + if (dom->is_dead) { + remove_domain(dom); + } +} + static void handle_tty_read(struct domain *dom) { ssize_t len; @@ -224,7 +237,7 @@ if (domain_is_valid(dom->domid)) { dom->tty_fd = domain_create_tty(dom); } else { - remove_domain(dom); + dom->is_dead = true; } } else if (domain_is_valid(dom->domid)) { msg.u.control.msg.length = len; @@ -235,7 +248,7 @@ } } else { close(dom->tty_fd); - remove_domain(dom); + dom->is_dead = true; } } @@ -250,7 +263,7 @@ if (domain_is_valid(dom->domid)) { dom->tty_fd = domain_create_tty(dom); } else { - remove_domain(dom); + dom->is_dead = true; } } else { buffer_advance(&dom->buffer, len); @@ -316,6 +329,7 @@ ret = select(max_fd + 1, &readfds, &writefds, 0, &tv); if (tv.tv_sec == 1 && (++num_of_writes % 100) == 0) { +#if 0 /* FIXME */ /* This is a nasty hack. xcs does not handle the control channels filling up well at all. We'll @@ -325,6 +339,7 @@ going away */ tv.tv_usec = 1000; select(0, 0, 0, 0, &tv); +#endif } enum_domains(); @@ -333,13 +348,15 @@ } for (d = dom_head; d; d = d->next) { - if (FD_ISSET(d->tty_fd, &readfds)) { + if (!d->is_dead && FD_ISSET(d->tty_fd, &readfds)) { handle_tty_read(d); } - if (FD_ISSET(d->tty_fd, &writefds)) { + if (!d->is_dead && FD_ISSET(d->tty_fd, &writefds)) { handle_tty_write(d); } } + + remove_dead_domains(dom_head); } while (ret > -1); } diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Domain.ml --- a/tools/debugger/pdb/Domain.ml Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/Domain.ml Tue Aug 16 18:09:07 2005 @@ -36,6 +36,7 @@ Printf.sprintf "{domain} domain: %d, vcpu: %d" ctx.domain ctx.vcpu +external read_register : context_t -> int -> int32 = "dom_read_register" external read_registers : context_t -> registers = "dom_read_registers" external write_register : context_t -> register -> int32 -> unit = "dom_write_register" diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Domain.mli --- a/tools/debugger/pdb/Domain.mli Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/Domain.mli Tue Aug 16 18:09:07 2005 @@ -22,6 +22,7 @@ val string_of_context : context_t -> string +val read_register : context_t -> int -> int32 val read_registers : context_t -> registers val write_register : context_t -> register -> int32 -> unit val read_memory : context_t -> int32 -> int -> int list diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Makefile --- a/tools/debugger/pdb/Makefile Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/Makefile Tue Aug 16 18:09:07 2005 @@ -33,7 +33,8 @@ LIBS += unix str # bc = byte-code, dc = debug byte-code -all : patches dc +# patches = patch linux domU source code +all : dc SOURCES += pdb_caml_xc.c SOURCES += pdb_caml_domain.c pdb_caml_process.c diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/PDB.ml --- a/tools/debugger/pdb/PDB.ml Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/PDB.ml Tue Aug 16 18:09:07 2005 @@ -219,6 +219,17 @@ (***************************************************************************) +let read_register ctx register = (* register is int32 because of sscanf *) + match ctx with + | Void -> 0l (* default for startup *) + | Domain d -> Domain.read_register d register + | Process p -> + begin + Process.read_register p register; + raise No_reply + end + | _ -> raise (Unimplemented "read registers") + let read_registers ctx = match ctx with | Void -> Intel.null_registers (* default for startup *) @@ -278,14 +289,42 @@ let insert_memory_breakpoint ctx addr len = match ctx with | Domain d -> Domain.insert_memory_breakpoint d addr len - | Process p -> Process.insert_memory_breakpoint p addr len + | Process p -> + begin + Process.insert_memory_breakpoint p addr len; + raise No_reply + end | _ -> raise (Unimplemented "insert memory breakpoint") let remove_memory_breakpoint ctx addr len = match ctx with | Domain d -> Domain.remove_memory_breakpoint d addr len - | Process p -> Process.remove_memory_breakpoint p addr len + | Process p -> + begin + Process.remove_memory_breakpoint p addr len; + raise No_reply + end | _ -> raise (Unimplemented "remove memory breakpoint") + +let insert_watchpoint ctx kind addr len = + match ctx with +(* | Domain d -> Domain.insert_watchpoint d kind addr len TODO *) + | Process p -> + begin + Process.insert_watchpoint p kind addr len; + raise No_reply + end + | _ -> raise (Unimplemented "insert watchpoint") + +let remove_watchpoint ctx kind addr len = + match ctx with +(* | Domain d -> Domain.remove_watchpoint d kind addr len TODO *) + | Process p -> + begin + Process.remove_watchpoint p kind addr len; + raise No_reply + end + | _ -> raise (Unimplemented "remove watchpoint") let pause ctx = diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Process.ml --- a/tools/debugger/pdb/Process.ml Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/Process.ml Tue Aug 16 18:09:07 2005 @@ -54,6 +54,7 @@ proc_ctx.ring <- Xen_domain.get_ring dom_ctx; _attach_debugger proc_ctx +external read_register : context_t -> int -> unit = "proc_read_register" external read_registers : context_t -> unit = "proc_read_registers" external write_register : context_t -> register -> int32 -> unit = "proc_write_register" @@ -69,6 +70,10 @@ "proc_insert_memory_breakpoint" external remove_memory_breakpoint : context_t -> int32 -> int -> unit = "proc_remove_memory_breakpoint" +external insert_watchpoint : context_t -> int -> int32 -> int -> unit = + "proc_insert_watchpoint" +external remove_watchpoint : context_t -> int -> int32 -> int -> unit = + "proc_remove_watchpoint" let pause ctx = pause_target ctx diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/Process.mli --- a/tools/debugger/pdb/Process.mli Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/Process.mli Tue Aug 16 18:09:07 2005 @@ -26,7 +26,7 @@ val detach_debugger : context_t -> unit val pause : context_t -> unit - +val read_register : context_t -> int -> unit val read_registers : context_t -> unit val write_register : context_t -> register -> int32 -> unit val read_memory : context_t -> int32 -> int -> unit @@ -37,3 +37,5 @@ val insert_memory_breakpoint : context_t -> int32 -> int -> unit val remove_memory_breakpoint : context_t -> int32 -> int -> unit +val insert_watchpoint : context_t -> int -> int32 -> int -> unit +val remove_watchpoint : context_t -> int -> int32 -> int -> unit diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/debugger.ml --- a/tools/debugger/pdb/debugger.ml Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/debugger.ml Tue Aug 16 18:09:07 2005 @@ -53,10 +53,20 @@ PDB.step ctx; raise No_reply +(** + Read Register Command. + return register as a 4-byte value. + *) +let gdb_read_register ctx command = + let read_reg register = + (Printf.sprintf "%08lx" (Util.flip_int32 (PDB.read_register ctx register))) + in + Scanf.sscanf command "p%x" read_reg + (** Read Registers Command. - returns 16 4-byte registers in a particular defined by gdb. + returns 16 4-byte registers in a particular format defined by gdb. *) let gdb_read_registers ctx = let regs = PDB.read_registers ctx in @@ -100,7 +110,7 @@ with Failure s -> "E02" in - Scanf.sscanf command "m%lx,%d" read_mem + Scanf.sscanf command "m%lx,%x" read_mem @@ -218,16 +228,24 @@ (** Insert Breakpoint or Watchpoint Packet *) + +let bwc_watch_write = 102 (* from pdb_module.h *) +let bwc_watch_read = 103 +let bwc_watch_access = 104 + let gdb_insert_bwcpoint ctx command = let insert cmd addr length = try match cmd with | 0 -> PDB.insert_memory_breakpoint ctx addr length; "OK" + | 2 -> PDB.insert_watchpoint ctx bwc_watch_write addr length; "OK" + | 3 -> PDB.insert_watchpoint ctx bwc_watch_read addr length; "OK" + | 4 -> PDB.insert_watchpoint ctx bwc_watch_access addr length; "OK" | _ -> "" with Failure s -> "E03" in - Scanf.sscanf command "Z%d,%lx,%d" insert + Scanf.sscanf command "Z%d,%lx,%x" insert (** Remove Breakpoint or Watchpoint Packet @@ -237,6 +255,9 @@ try match cmd with | 0 -> PDB.remove_memory_breakpoint ctx addr length; "OK" + | 2 -> PDB.remove_watchpoint ctx bwc_watch_write addr length; "OK" + | 3 -> PDB.remove_watchpoint ctx bwc_watch_read addr length; "OK" + | 4 -> PDB.remove_watchpoint ctx bwc_watch_access addr length; "OK" | _ -> "" with Failure s -> "E04" @@ -260,6 +281,7 @@ | 'k' -> gdb_kill () | 'm' -> gdb_read_memory ctx command | 'M' -> gdb_write_memory ctx command + | 'p' -> gdb_read_register ctx command | 'P' -> gdb_write_register ctx command | 'q' -> gdb_query command | 's' -> gdb_step ctx @@ -270,7 +292,7 @@ | 'Z' -> gdb_insert_bwcpoint ctx command | _ -> print_endline (Printf.sprintf "unknown gdb command [%s]" command); - "E02" + "" with Unimplemented s -> print_endline (Printf.sprintf "loser. unimplemented command [%s][%s]" diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/linux-2.6-module/debug.c --- a/tools/debugger/pdb/linux-2.6-module/debug.c Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/linux-2.6-module/debug.c Tue Aug 16 18:09:07 2005 @@ -9,33 +9,143 @@ #include <asm-i386/kdebug.h> #include <asm-xen/asm-i386/processor.h> #include <asm-xen/asm-i386/ptrace.h> +#include <asm-xen/asm-i386/tlbflush.h> #include <asm-xen/xen-public/xen.h> #include "pdb_module.h" #include "pdb_debug.h" -#define BWC_DEBUG 1 -#define BWC_INT3 3 + +static int pdb_debug_fn (struct pt_regs *regs, long error_code, + unsigned int condition); +static int pdb_int3_fn (struct pt_regs *regs, long error_code); +static int pdb_page_fault_fn (struct pt_regs *regs, long error_code, + unsigned int condition); + +/***********************************************************************/ + typedef struct bwcpoint /* break/watch/catch point */ { struct list_head list; memory_t address; - u32 domain; + int length; + + u8 type; /* BWC_??? */ + u8 mode; /* for BWC_PAGE, the current protection mode */ u32 process; - u8 old_value; /* old value for software bkpt */ - u8 type; /* BWC_??? */ + u8 error; /* error occured when enabling: don't disable. */ + + /* original values */ + u8 orig_bkpt; /* single byte breakpoint */ + pte_t orig_pte; + + struct list_head watchpt_read_list; /* read watchpoints on this page */ + struct list_head watchpt_write_list; /* write */ + struct list_head watchpt_access_list; /* access */ + struct list_head watchpt_disabled_list; /* disabled */ + + struct bwcpoint *parent; /* watchpoint: bwc_watch (the page) */ + struct bwcpoint *watchpoint; /* bwc_watch_step: original watchpoint */ } bwcpoint_t, *bwcpoint_p; -static bwcpoint_t bwcpoint_list; +static struct list_head bwcpoint_list = LIST_HEAD_INIT(bwcpoint_list); + +#define _pdb_bwcpoint_alloc(_var) \ +{ \ + if ( (_var = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL)) == NULL ) \ + printk("error: unable to allocate memory %d\n", __LINE__); \ + else { \ + memset(_var, 0, sizeof(bwcpoint_t)); \ + INIT_LIST_HEAD(&_var->watchpt_read_list); \ + INIT_LIST_HEAD(&_var->watchpt_write_list); \ + INIT_LIST_HEAD(&_var->watchpt_access_list); \ + INIT_LIST_HEAD(&_var->watchpt_disabled_list); \ + } \ +} + +/***********************************************************************/ + +static void _pdb_bwc_print_list (struct list_head *, char *, int); + +static void +_pdb_bwc_print (bwcpoint_p bwc, char *label, int level) +{ + printk("%s%03d 0x%08lx:0x%02x %c\n", label, bwc->type, + bwc->address, bwc->length, bwc->error ? 'e' : '-'); + + if ( !list_empty(&bwc->watchpt_read_list) ) + _pdb_bwc_print_list(&bwc->watchpt_read_list, "r", level); + if ( !list_empty(&bwc->watchpt_write_list) ) + _pdb_bwc_print_list(&bwc->watchpt_write_list, "w", level); + if ( !list_empty(&bwc->watchpt_access_list) ) + _pdb_bwc_print_list(&bwc->watchpt_access_list, "a", level); + if ( !list_empty(&bwc->watchpt_disabled_list) ) + _pdb_bwc_print_list(&bwc->watchpt_disabled_list, "d", level); +} + +static void +_pdb_bwc_print_list (struct list_head *bwc_list, char *label, int level) +{ + struct list_head *ptr; + int counter = 0; + + list_for_each(ptr, bwc_list) + { + bwcpoint_p bwc = list_entry(ptr, bwcpoint_t, list); + printk(" %s[%02d]%s ", level > 0 ? " " : "", counter++, + level > 0 ? "" : " "); + _pdb_bwc_print(bwc, label, level+1); + } + + if (counter == 0) + { + printk(" empty list\n"); + } +} void -pdb_initialize_bwcpoint (void) -{ - memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t)); - INIT_LIST_HEAD(&bwcpoint_list.list); - - return; -} - +pdb_bwc_print_list (void) +{ + _pdb_bwc_print_list(&bwcpoint_list, " ", 0); +} + +bwcpoint_p +pdb_search_watchpoint (u32 process, memory_t address) +{ + bwcpoint_p bwc_watch = (bwcpoint_p) 0; + bwcpoint_p bwc_entry = (bwcpoint_p) 0; + struct list_head *ptr; + + list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */ + { + bwc_watch = list_entry(ptr, bwcpoint_t, list); + if (bwc_watch->address == (address & PAGE_MASK)) break; + } + + if ( !bwc_watch ) + { + return (bwcpoint_p) 0; + } + +#define __pdb_search_watchpoint_list(__list) \ + list_for_each(ptr, (__list)) \ + { \ + bwc_entry = list_entry(ptr, bwcpoint_t, list); \ + if ( bwc_entry->process == process && \ + bwc_entry->address <= address && \ + bwc_entry->address + bwc_entry->length > address ) \ + return bwc_entry; \ + } + + __pdb_search_watchpoint_list(&bwc_watch->watchpt_read_list); + __pdb_search_watchpoint_list(&bwc_watch->watchpt_write_list); + __pdb_search_watchpoint_list(&bwc_watch->watchpt_access_list); + +#undef __pdb_search_watchpoint_list + + return (bwcpoint_p) 0; +} + +/*************************************************************/ int pdb_suspend (struct task_struct *target) @@ -134,6 +244,35 @@ *(unsigned long *) stack = value; return; +} + +int +pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op) +{ + int rc = 0; + + switch (op->reg) + { + case 0: op->value = _pdb_get_register(target, LINUX_EAX); break; + case 1: op->value = _pdb_get_register(target, LINUX_ECX); break; + case 2: op->value = _pdb_get_register(target, LINUX_EDX); break; + case 3: op->value = _pdb_get_register(target, LINUX_EBX); break; + case 4: op->value = _pdb_get_register(target, LINUX_ESP); break; + case 5: op->value = _pdb_get_register(target, LINUX_EBP); break; + case 6: op->value = _pdb_get_register(target, LINUX_ESI); break; + case 7: op->value = _pdb_get_register(target, LINUX_EDI); break; + case 8: op->value = _pdb_get_register(target, LINUX_EIP); break; + case 9: op->value = _pdb_get_register(target, LINUX_EFL); break; + + case 10: op->value = _pdb_get_register(target, LINUX_CS); break; + case 11: op->value = _pdb_get_register(target, LINUX_SS); break; + case 12: op->value = _pdb_get_register(target, LINUX_DS); break; + case 13: op->value = _pdb_get_register(target, LINUX_ES); break; + case 14: op->value = _pdb_get_register(target, LINUX_FS); break; + case 15: op->value = _pdb_get_register(target, LINUX_GS); break; + } + + return rc; } int @@ -209,18 +348,14 @@ eflags |= X86_EFLAGS_TF; _pdb_set_register(target, LINUX_EFL, eflags); - bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL); - if ( bkpt == NULL ) - { - printk("error: unable to allocation memory\n"); - return -1; - } + _pdb_bwcpoint_alloc(bkpt); + if ( bkpt == NULL ) return -1; bkpt->process = target->pid; bkpt->address = 0; bkpt->type = BWC_DEBUG; - list_add(&bkpt->list, &bwcpoint_list.list); + list_add_tail(&bkpt->list, &bwcpoint_list); wake_up_process(target); @@ -237,31 +372,27 @@ printk("insert breakpoint %d:%lx len: %d\n", target->pid, address, length); - bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL); - if ( bkpt == NULL ) - { - printk("error: unable to allocation memory\n"); + if ( length != 1 ) + { + printk("error: breakpoint length should be 1\n"); return -1; } - if ( length != 1 ) - { - printk("error: breakpoint length should be 1\n"); - kfree(bkpt); - return -1; - } + _pdb_bwcpoint_alloc(bkpt); + if ( bkpt == NULL ) return -1; bkpt->process = target->pid; bkpt->address = address; bkpt->type = BWC_INT3; - pdb_access_memory(target, address, &bkpt->old_value, 1, 0); - pdb_access_memory(target, address, &breakpoint_opcode, 1, 1); + pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_READ); + pdb_access_memory(target, address, &breakpoint_opcode, 1, PDB_MEM_WRITE); - list_add(&bkpt->list, &bwcpoint_list.list); + list_add_tail(&bkpt->list, &bwcpoint_list); printk("breakpoint_set %d:%lx OLD: 0x%x\n", - target->pid, address, bkpt->old_value); + target->pid, address, bkpt->orig_bkpt); + pdb_bwc_print_list(); return rc; } @@ -276,7 +407,7 @@ printk ("remove breakpoint %d:%lx\n", target->pid, address); struct list_head *entry; - list_for_each(entry, &bwcpoint_list.list) + list_for_each(entry, &bwcpoint_list) { bkpt = list_entry(entry, bwcpoint_t, list); if ( target->pid == bkpt->process && @@ -285,17 +416,223 @@ break; } - if (bkpt == &bwcpoint_list || bkpt == NULL) + if (entry == &bwcpoint_list) { printk ("error: no breakpoint found\n"); return -1; } + pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_WRITE); + list_del(&bkpt->list); - - pdb_access_memory(target, address, &bkpt->old_value, 1, 1); - kfree(bkpt); + + pdb_bwc_print_list(); + + return rc; +} + +#define PDB_PTE_UPDATE 1 +#define PDB_PTE_RESTORE 2 + +int +pdb_change_pte (struct task_struct *target, bwcpoint_p bwc, int mode) +{ + int rc = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep; + + pgd = pgd_offset(target->mm, bwc->address); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return -1; + + pud = pud_offset(pgd, bwc->address); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) return -2; + + pmd = pmd_offset(pud, bwc->address); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return -3; + + ptep = pte_offset_map(pmd, bwc->address); + if (!ptep) return -4; + + switch ( mode ) + { + case PDB_PTE_UPDATE: /* added or removed a watchpoint. update pte. */ + { + pte_t new_pte; + + if ( pte_val(bwc->parent->orig_pte) == 0 ) /* new watchpoint page */ + { + bwc->parent->orig_pte = *ptep; + } + + new_pte = bwc->parent->orig_pte; + + if ( !list_empty(&bwc->parent->watchpt_read_list) || + !list_empty(&bwc->parent->watchpt_access_list) ) + { + new_pte = pte_rdprotect(new_pte); + } + + if ( !list_empty(&bwc->parent->watchpt_write_list) || + !list_empty(&bwc->parent->watchpt_access_list) ) + { + new_pte = pte_wrprotect(new_pte); + } + + if ( pte_val(new_pte) != pte_val(*ptep) ) + { + *ptep = new_pte; + flush_tlb_mm(target->mm); + } + break; + } + case PDB_PTE_RESTORE : /* suspend watchpoint by restoring original pte */ + { + *ptep = bwc->parent->orig_pte; + flush_tlb_mm(target->mm); + break; + } + default : + { + printk("(linux) unknown mode %d %d\n", mode, __LINE__); + break; + } + } + + pte_unmap(ptep); /* can i flush the tlb before pte_unmap? */ + + return rc; +} + +int +pdb_insert_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt) +{ + int rc = 0; + + bwcpoint_p bwc_watch; + bwcpoint_p bwc_entry; + struct list_head *ptr; + unsigned long page = watchpt->address & PAGE_MASK; + struct list_head *watchpoint_list; + + printk("insert watchpoint: %d %x %x\n", + watchpt->type, watchpt->address, watchpt->length); + + list_for_each(ptr, &bwcpoint_list) /* find existing bwc page entry */ + { + bwc_watch = list_entry(ptr, bwcpoint_t, list); + + if (bwc_watch->address == page) goto got_bwc_watch; + } + + _pdb_bwcpoint_alloc(bwc_watch); /* create new bwc:watch */ + if ( bwc_watch == NULL ) return -1; + + bwc_watch->type = BWC_WATCH; + bwc_watch->process = target->pid; + bwc_watch->address = page; + + list_add_tail(&bwc_watch->list, &bwcpoint_list); + + got_bwc_watch: + + switch (watchpt->type) + { + case BWC_WATCH_READ: + watchpoint_list = &bwc_watch->watchpt_read_list; break; + case BWC_WATCH_WRITE: + watchpoint_list = &bwc_watch->watchpt_write_list; break; + case BWC_WATCH_ACCESS: + watchpoint_list = &bwc_watch->watchpt_access_list; break; + default: + printk("unknown type %d\n", watchpt->type); return -2; + } + + _pdb_bwcpoint_alloc(bwc_entry); /* create new bwc:entry */ + if ( bwc_entry == NULL ) return -1; + + bwc_entry->process = target->pid; + bwc_entry->address = watchpt->address; + bwc_entry->length = watchpt->length; + bwc_entry->type = watchpt->type; + bwc_entry->parent = bwc_watch; + + list_add_tail(&bwc_entry->list, watchpoint_list); + pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE); + + pdb_bwc_print_list(); + + return rc; +} + +int +pdb_remove_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt) +{ + int rc = 0; + bwcpoint_p bwc_watch = (bwcpoint_p) NULL; + bwcpoint_p bwc_entry = (bwcpoint_p) NULL; + unsigned long page = watchpt->address & PAGE_MASK; + struct list_head *ptr; + struct list_head *watchpoint_list; + + printk("remove watchpoint: %d %x %x\n", + watchpt->type, watchpt->address, watchpt->length); + + list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */ + { + bwc_watch = list_entry(ptr, bwcpoint_t, list); + if (bwc_watch->address == page) break; + } + + if ( !bwc_watch ) + { + printk("(linux) delete watchpoint: can't find bwc page 0x%08x\n", + watchpt->address); + return -1; + } + + switch (watchpt->type) + { + case BWC_WATCH_READ: + watchpoint_list = &bwc_watch->watchpt_read_list; break; + case BWC_WATCH_WRITE: + watchpoint_list = &bwc_watch->watchpt_write_list; break; + case BWC_WATCH_ACCESS: + watchpoint_list = &bwc_watch->watchpt_access_list; break; + default: + printk("unknown type %d\n", watchpt->type); return -2; + } + + list_for_each(ptr, watchpoint_list) /* find watchpoint */ + { + bwc_entry = list_entry(ptr, bwcpoint_t, list); + if ( bwc_entry->address == watchpt->address && + bwc_entry->length == watchpt->length ) break; + } + + if ( !bwc_entry ) /* or ptr == watchpoint_list */ + { + printk("(linux) delete watchpoint: can't find watchpoint 0x%08x\n", + watchpt->address); + return -1; + } + + list_del(&bwc_entry->list); + pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE); + kfree(bwc_entry); + + + if ( list_empty(&bwc_watch->watchpt_read_list) && + list_empty(&bwc_watch->watchpt_write_list) && + list_empty(&bwc_watch->watchpt_access_list) ) + { + list_del(&bwc_watch->list); + kfree(bwc_watch); + } + + pdb_bwc_print_list(); return rc; } @@ -312,16 +649,24 @@ switch (val) { case DIE_DEBUG: - if (pdb_debug_fn(args->regs, args->trapnr, args->err)) + if ( pdb_debug_fn(args->regs, args->trapnr, args->err) ) return NOTIFY_STOP; break; case DIE_TRAP: - if (args->trapnr == 3 && pdb_int3_fn(args->regs, args->err)) + if ( args->trapnr == 3 && pdb_int3_fn(args->regs, args->err) ) return NOTIFY_STOP; break; case DIE_INT3: /* without kprobes, we should never see DIE_INT3 */ + if ( pdb_int3_fn(args->regs, args->err) ) + return NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + if ( pdb_page_fault_fn(args->regs, args->trapnr, args->err) ) + return NOTIFY_STOP; + break; case DIE_GPF: - case DIE_PAGE_FAULT: + printk("---------------GPF\n"); + break; default: break; } @@ -330,70 +675,110 @@ } -int +static int pdb_debug_fn (struct pt_regs *regs, long error_code, unsigned int condition) { pdb_response_t resp; bwcpoint_p bkpt = NULL; - struct list_head *entry; - list_for_each(entry, &bwcpoint_list.list) + + printk("pdb_debug_fn\n"); + + list_for_each(entry, &bwcpoint_list) { bkpt = list_entry(entry, bwcpoint_t, list); if ( current->pid == bkpt->process && - bkpt->type == BWC_DEBUG ) + (bkpt->type == BWC_DEBUG || /* single step */ + bkpt->type == BWC_WATCH_STEP)) /* single step over watchpoint */ break; } - if (bkpt == &bwcpoint_list || bkpt == NULL) + if (entry == &bwcpoint_list) { printk("not my debug 0x%x 0x%lx\n", current->pid, regs->eip); return 0; } - list_del(&bkpt->list); - pdb_suspend(current); - printk("(pdb) debug pid: %d, eip: 0x%08lx\n", current->pid, regs->eip); + printk("(pdb) %s pid: %d, eip: 0x%08lx\n", + bkpt->type == BWC_DEBUG ? "debug" : "watch-step", + current->pid, regs->eip); regs->eflags &= ~X86_EFLAGS_TF; set_tsk_thread_flag(current, TIF_SINGLESTEP); - resp.operation = PDB_OPCODE_STEP; + switch (bkpt->type) + { + case BWC_DEBUG: + resp.operation = PDB_OPCODE_STEP; + break; + case BWC_WATCH_STEP: + { + struct list_head *watchpoint_list; + bwcpoint_p watch_page = bkpt->watchpoint->parent; + + switch (bkpt->watchpoint->type) + { + case BWC_WATCH_READ: + watchpoint_list = &watch_page->watchpt_read_list; break; + case BWC_WATCH_WRITE: + watchpoint_list = &watch_page->watchpt_write_list; break; + case BWC_WATCH_ACCESS: + watchpoint_list = &watch_page->watchpt_access_list; break; + default: + printk("unknown type %d\n", bkpt->watchpoint->type); return 0; + } + + resp.operation = PDB_OPCODE_WATCHPOINT; + list_del_init(&bkpt->watchpoint->list); + list_add_tail(&bkpt->watchpoint->list, watchpoint_list); + pdb_change_pte(current, bkpt->watchpoint, PDB_PTE_UPDATE); + pdb_bwc_print_list(); + break; + } + default: + printk("unknown breakpoint type %d %d\n", __LINE__, bkpt->type); + return 0; + } + resp.process = current->pid; resp.status = PDB_RESPONSE_OKAY; pdb_send_response(&resp); + list_del(&bkpt->list); + kfree(bkpt); + return 1; } -int +static int pdb_int3_fn (struct pt_regs *regs, long error_code) { pdb_response_t resp; bwcpoint_p bkpt = NULL; + memory_t address = regs->eip - 1; struct list_head *entry; - list_for_each(entry, &bwcpoint_list.list) + list_for_each(entry, &bwcpoint_list) { bkpt = list_entry(entry, bwcpoint_t, list); if ( current->pid == bkpt->process && - regs->eip == bkpt->address && + address == bkpt->address && bkpt->type == BWC_INT3 ) break; } - if (bkpt == &bwcpoint_list || bkpt == NULL) - { - printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, regs->eip); + if (entry == &bwcpoint_list) + { + printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, address); return 0; } - printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, regs->eip); + printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, address); pdb_suspend(current); @@ -405,6 +790,54 @@ return 1; } + +static int +pdb_page_fault_fn (struct pt_regs *regs, long error_code, + unsigned int condition) +{ + unsigned long cr2; + unsigned long cr3; + bwcpoint_p bwc; + bwcpoint_p watchpt; + bwcpoint_p bkpt; + + __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : ); + + bwc = pdb_search_watchpoint(current->pid, cr2); + if ( !bwc ) + { + return 0; /* not mine */ + } + + printk("page_fault cr2:%08lx err:%lx eip:%08lx\n", + cr2, error_code, regs->eip); + + /* disable the watchpoint */ + watchpt = bwc->watchpoint; + list_del_init(&bwc->list); + list_add_tail(&bwc->list, &bwc->parent->watchpt_disabled_list); + pdb_change_pte(current, bwc, PDB_PTE_RESTORE); + + /* single step the faulting instruction */ + regs->eflags |= X86_EFLAGS_TF; + + /* create a bwcpoint entry so we know what to do once we regain control */ + _pdb_bwcpoint_alloc(bkpt); + if ( bkpt == NULL ) return -1; + + bkpt->process = current->pid; + bkpt->address = 0; + bkpt->type = BWC_WATCH_STEP; + bkpt->watchpoint = bwc; + + /* add to head so we see it first the next time we break */ + list_add(&bkpt->list, &bwcpoint_list); + + pdb_bwc_print_list(); + return 1; +} + /* * Local variables: diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/linux-2.6-module/module.c --- a/tools/debugger/pdb/linux-2.6-module/module.c Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/linux-2.6-module/module.c Tue Aug 16 18:09:07 2005 @@ -98,6 +98,11 @@ printk("(linux) detach 0x%x\n", request->process); resp.status = PDB_RESPONSE_OKAY; break; + case PDB_OPCODE_RD_REG : + resp.u.rd_reg.reg = request->u.rd_reg.reg; + pdb_read_register(target, &resp.u.rd_reg); + resp.status = PDB_RESPONSE_OKAY; + break; case PDB_OPCODE_RD_REGS : pdb_read_registers(target, &resp.u.rd_regs); resp.status = PDB_RESPONSE_OKAY; @@ -108,14 +113,16 @@ break; case PDB_OPCODE_RD_MEM : pdb_access_memory(target, request->u.rd_mem.address, - &resp.u.rd_mem.data, request->u.rd_mem.length, 0); + &resp.u.rd_mem.data, request->u.rd_mem.length, + PDB_MEM_READ); resp.u.rd_mem.address = request->u.rd_mem.address; resp.u.rd_mem.length = request->u.rd_mem.length; resp.status = PDB_RESPONSE_OKAY; break; case PDB_OPCODE_WR_MEM : pdb_access_memory(target, request->u.wr_mem.address, - &request->u.wr_mem.data, request->u.wr_mem.length, 1); + &request->u.wr_mem.data, request->u.wr_mem.length, + PDB_MEM_WRITE); resp.status = PDB_RESPONSE_OKAY; break; case PDB_OPCODE_CONTINUE : @@ -135,6 +142,14 @@ case PDB_OPCODE_CLR_BKPT : pdb_remove_memory_breakpoint(target, request->u.bkpt.address, request->u.bkpt.length); + resp.status = PDB_RESPONSE_OKAY; + break; + case PDB_OPCODE_SET_WATCHPT : + pdb_insert_watchpoint(target, &request->u.watchpt); + resp.status = PDB_RESPONSE_OKAY; + break; + case PDB_OPCODE_CLR_WATCHPT : + pdb_remove_watchpoint(target, &request->u.watchpt); resp.status = PDB_RESPONSE_OKAY; break; default: @@ -248,8 +263,6 @@ pdb_sring_t *sring; printk("----\npdb initialize %s %s\n", __DATE__, __TIME__); - - pdb_initialize_bwcpoint(); /* if ( xen_start_info.flags & SIF_INITDOMAIN ) diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/linux-2.6-module/pdb_debug.h --- a/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Tue Aug 16 18:09:07 2005 @@ -6,6 +6,7 @@ void pdb_initialize_bwcpoint (void); int pdb_suspend (struct task_struct *target); int pdb_resume (struct task_struct *target); +int pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op); int pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op); int pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op); int pdb_read_memory (struct task_struct *target, pdb_op_rd_mem_req_p req, @@ -20,13 +21,13 @@ memory_t address, u32 length); int pdb_remove_memory_breakpoint (struct task_struct *target, memory_t address, u32 length); +int pdb_insert_watchpoint (struct task_struct *target, + pdb_op_watchpt_p watchpt); +int pdb_remove_watchpoint (struct task_struct *target, + pdb_op_watchpt_p watchpt); int pdb_exceptions_notify (struct notifier_block *self, unsigned long val, void *data); - -int pdb_debug_fn (struct pt_regs *regs, long error_code, - unsigned int condition); -int pdb_int3_fn (struct pt_regs *regs, long error_code); /* module.c */ void pdb_send_response (pdb_response_t *response); diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/linux-2.6-module/pdb_module.h --- a/tools/debugger/pdb/linux-2.6-module/pdb_module.h Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/linux-2.6-module/pdb_module.h Tue Aug 16 18:09:07 2005 @@ -14,20 +14,27 @@ #define PDB_OPCODE_DETACH 3 -#define PDB_OPCODE_RD_REGS 4 +#define PDB_OPCODE_RD_REG 4 +typedef struct pdb_op_rd_reg +{ + u32 reg; + u32 value; +} pdb_op_rd_reg_t, *pdb_op_rd_reg_p; + +#define PDB_OPCODE_RD_REGS 5 typedef struct pdb_op_rd_regs { u32 reg[GDB_REGISTER_FRAME_SIZE]; } pdb_op_rd_regs_t, *pdb_op_rd_regs_p; -#define PDB_OPCODE_WR_REG 5 +#define PDB_OPCODE_WR_REG 6 typedef struct pdb_op_wr_reg { u32 reg; u32 value; } pdb_op_wr_reg_t, *pdb_op_wr_reg_p; -#define PDB_OPCODE_RD_MEM 6 +#define PDB_OPCODE_RD_MEM 7 typedef struct pdb_op_rd_mem_req { u32 address; @@ -41,7 +48,7 @@ u8 data[1024]; } pdb_op_rd_mem_resp_t, *pdb_op_rd_mem_resp_p; -#define PDB_OPCODE_WR_MEM 7 +#define PDB_OPCODE_WR_MEM 8 typedef struct pdb_op_wr_mem { u32 address; @@ -49,16 +56,33 @@ u8 data[1024]; /* arbitrary */ } pdb_op_wr_mem_t, *pdb_op_wr_mem_p; -#define PDB_OPCODE_CONTINUE 8 -#define PDB_OPCODE_STEP 9 +#define PDB_OPCODE_CONTINUE 9 +#define PDB_OPCODE_STEP 10 -#define PDB_OPCODE_SET_BKPT 10 -#define PDB_OPCODE_CLR_BKPT 11 +#define PDB_OPCODE_SET_BKPT 11 +#define PDB_OPCODE_CLR_BKPT 12 typedef struct pdb_op_bkpt { u32 address; u32 length; } pdb_op_bkpt_t, *pdb_op_bkpt_p; + +#define PDB_OPCODE_SET_WATCHPT 13 +#define PDB_OPCODE_CLR_WATCHPT 14 +#define PDB_OPCODE_WATCHPOINT 15 +typedef struct pdb_op_watchpt +{ +#define BWC_DEBUG 1 +#define BWC_INT3 3 +#define BWC_WATCH 100 /* pdb: watchpoint page */ +#define BWC_WATCH_STEP 101 /* pdb: watchpoint single step */ +#define BWC_WATCH_WRITE 102 +#define BWC_WATCH_READ 103 +#define BWC_WATCH_ACCESS 104 + u32 type; + u32 address; + u32 length; +} pdb_op_watchpt_t, *pdb_op_watchpt_p; typedef struct @@ -68,10 +92,12 @@ union { pdb_op_attach_t attach; + pdb_op_rd_reg_t rd_reg; pdb_op_wr_reg_t wr_reg; pdb_op_rd_mem_req_t rd_mem; pdb_op_wr_mem_t wr_mem; pdb_op_bkpt_t bkpt; + pdb_op_watchpt_t watchpt; } u; } pdb_request_t, *pdb_request_p; @@ -87,6 +113,7 @@ s16 status; /* PDB_RESPONSE_??? */ union { + pdb_op_rd_reg_t rd_reg; pdb_op_rd_regs_t rd_regs; pdb_op_rd_mem_resp_t rd_mem; } u; @@ -94,6 +121,11 @@ DEFINE_RING_TYPES(pdb, pdb_request_t, pdb_response_t); + + +/* from access_process_vm */ +#define PDB_MEM_READ 0 +#define PDB_MEM_WRITE 1 #endif diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch --- a/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Tue Aug 16 18:09:07 2005 @@ -1,7 +1,15 @@ diff -u linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c --- linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c 2005-07-31 22:36:50.000000000 +0100 +++ linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c 2005-08-01 10:57:31.000000000 +0100 -@@ -172,6 +172,7 @@ +@@ -151,6 +151,7 @@ + /* TLB flushing */ + EXPORT_SYMBOL(flush_tlb_page); + #endif ++EXPORT_SYMBOL(flush_tlb_mm); + + #ifdef CONFIG_X86_IO_APIC + EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); +@@ -172,6 +173,7 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); EXPORT_SYMBOL(register_die_notifier); diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/pdb_caml_domain.c --- a/tools/debugger/pdb/pdb_caml_domain.c Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/pdb_caml_domain.c Tue Aug 16 18:09:07 2005 @@ -41,6 +41,54 @@ /****************************************************************************/ + +/* + * dom_read_register : context_t -> int -> int32 + */ +value +dom_read_register (value context, value reg) +{ + CAMLparam2(context, reg); + CAMLlocal1(result); + + int my_reg = Int_val(reg); + cpu_user_regs_t *regs; + context_t ctx; + + decode_context(&ctx, context); + + if ( xendebug_read_registers(xc_handle, ctx.domain, ctx.vcpu, ®s) ) + { + printf("(pdb) read registers error!\n"); fflush(stdout); + failwith("read registers error"); + } + + dump_regs(regs); + + result = caml_alloc_tuple(16); + + switch (my_reg) + { + case GDB_EAX: result = caml_copy_int32(regs->eax); break; + case GDB_ECX: result = caml_copy_int32(regs->ecx); break; + case GDB_EDX: result = caml_copy_int32(regs->edx); break; + case GDB_EBX: result = caml_copy_int32(regs->ebx); break; + case GDB_ESP: result = caml_copy_int32(regs->esp); break; + case GDB_EBP: result = caml_copy_int32(regs->ebp); break; + case GDB_ESI: result = caml_copy_int32(regs->esi); break; + case GDB_EDI: result = caml_copy_int32(regs->edi); break; + case GDB_EIP: result = caml_copy_int32(regs->eip); break; + case GDB_EFL: result = caml_copy_int32(regs->eflags); break; + case GDB_CS: result = caml_copy_int32(regs->cs); break; + case GDB_SS: result = caml_copy_int32(regs->ss); break; + case GDB_DS: result = caml_copy_int32(regs->ds); break; + case GDB_ES: result = caml_copy_int32(regs->es); break; + case GDB_FS: result = caml_copy_int32(regs->fs); break; + case GDB_GS: result = caml_copy_int32(regs->gs); break; + } + + CAMLreturn(result); +} /* * dom_read_registers : context_t -> int32 diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/pdb_caml_process.c --- a/tools/debugger/pdb/pdb_caml_process.c Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/pdb_caml_process.c Tue Aug 16 18:09:07 2005 @@ -113,6 +113,12 @@ case PDB_OPCODE_DETACH : break; + case PDB_OPCODE_RD_REG : + { + sprintf(&msg[0], "%08x", _flip(resp->u.rd_reg.value)); + break; + } + case PDB_OPCODE_RD_REGS : { int loop; @@ -161,16 +167,22 @@ } case PDB_OPCODE_SET_BKPT : - { - break; - } case PDB_OPCODE_CLR_BKPT : - { + case PDB_OPCODE_SET_WATCHPT : + case PDB_OPCODE_CLR_WATCHPT : + { + break; + } + + case PDB_OPCODE_WATCHPOINT : + { + sprintf(msg, "S05"); break; } default : - printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE\n"); + printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE %d\n", + resp->operation); break; } @@ -258,6 +270,32 @@ CAMLreturn(Val_unit); } + + +/* + * proc_read_register : context_t -> int -> unit + */ +value +proc_read_register (value context, value reg) +{ + CAMLparam1(context); + + pdb_request_t req; + context_t ctx; + int my_reg = Int_val(reg); + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_RD_REG; + req.process = ctx.process; + req.u.rd_reg.reg = my_reg; + req.u.rd_reg.value = 0; + + send_request (ctx.ring, ctx.evtchn, &req); + + CAMLreturn(Val_unit); +} + /* @@ -443,7 +481,7 @@ /* - * proc_insert_memory_breakpoint : context_t -> int32 -> int list -> unit + * proc_insert_memory_breakpoint : context_t -> int32 -> int -> unit */ value proc_insert_memory_breakpoint (value context, value address, value length) @@ -466,7 +504,7 @@ } /* - * proc_remove_memory_breakpoint : context_t -> int32 -> int list -> unit + * proc_remove_memory_breakpoint : context_t -> int32 -> int -> unit */ value proc_remove_memory_breakpoint (value context, value address, value length) @@ -482,6 +520,54 @@ req.process = ctx.process; req.u.bkpt.address = (memory_t) Int32_val(address); req.u.bkpt.length = Int_val(length); + + send_request(ctx.ring, ctx.evtchn, &req); + + CAMLreturn(Val_unit); +} + +/* + * proc_insert_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit + */ +value +proc_insert_watchpoint (value context, value kind, value address, value length) +{ + CAMLparam3(context, address, length); + + context_t ctx; + pdb_request_t req; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_SET_WATCHPT; + req.process = ctx.process; + req.u.watchpt.type = Int_val(kind); + req.u.watchpt.address = (memory_t) Int32_val(address); + req.u.watchpt.length = Int_val(length); + + send_request(ctx.ring, ctx.evtchn, &req); + + CAMLreturn(Val_unit); +} + +/* + * proc_remove_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit + */ +value +proc_remove_watchpoint (value context, value kind, value address, value length) +{ + CAMLparam3(context, address, length); + + context_t ctx; + pdb_request_t req; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_CLR_WATCHPT; + req.process = ctx.process; + req.u.watchpt.type = Int_val(kind); + req.u.watchpt.address = (memory_t) Int32_val(address); + req.u.watchpt.length = Int_val(length); send_request(ctx.ring, ctx.evtchn, &req); diff -r e3d811cca4e1 -r 1ae656509f02 tools/debugger/pdb/readme --- a/tools/debugger/pdb/readme Tue Aug 16 04:15:23 2005 +++ b/tools/debugger/pdb/readme Tue Aug 16 18:09:07 2005 @@ -1,9 +1,9 @@ -PDB 0.3 +PDB 0.3.3 http://www.cl.cam.ac.uk/netos/pdb Alex Ho -June 2005 +August 2005 This is the latest incarnation of the pervasive debugger. @@ -79,6 +79,11 @@ Process PDB can also debug a process running in a Linux 2.6 domain. + You will need to patch the Linux 2.6 domain U tree to export some + additional symbols for the pdb module + + % make -C linux-2.6-patches + After running PDB in domain 0, insert the pdb module in dom u: % insmod linux-2.6-module/pdb.ko @@ -87,7 +92,14 @@ (gdb) maint packet x context = process <domid> <pid> + Read, write, and access watchpoint should also work for processes, + use the "rwatch", "watch" and "awatch" gdb commands respectively. + + If you are having trouble with GDB 5.3 (i386-redhat-linux-gnu), + try GDB 6.3 (configured with --target=i386-linux-gnu). + + To Do -- watchpoints +- watchpoints for domains - support for SMP diff -r e3d811cca4e1 -r 1ae656509f02 tools/examples/network-bridge --- a/tools/examples/network-bridge Tue Aug 16 04:15:23 2005 +++ b/tools/examples/network-bridge Tue Aug 16 18:09:07 2005 @@ -188,12 +188,13 @@ fi fi ip link set ${netdev} name p${netdev} - ip link set veth0 name eth0 + ip link set veth0 name ${netdev} ifconfig p${netdev} -arp down ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff ifconfig ${netdev} hw ether ${mac} add_to_bridge ${bridge} vif0.0 add_to_bridge ${bridge} p${netdev} + ip link set ${bridge} up ip link set vif0.0 up ip link set p${netdev} up if ! ifup ${netdev} ; then diff -r e3d811cca4e1 -r 1ae656509f02 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 04:15:23 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 18:09:07 2005 @@ -583,7 +583,7 @@ self.create_channel() self.image.createImage() self.exportToDB() - if self.store_channel: + if self.store_channel and self.store_mfn >= 0: self.db.introduceDomain(self.id, self.store_mfn, self.store_channel) @@ -915,8 +915,7 @@ """ self.configure_fields() self.create_devices() - if self.image.ostype != 'vmx': - self.create_blkif() + self.create_blkif() def create_blkif(self): """Create the block device interface (blkif) for the vm. diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c --- /dev/null Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Tue Aug 16 18:09:07 2005 @@ -0,0 +1,653 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is a fallback for platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@xxxxxxxxx> + * Copyright (C) 2000 Goutham Rao <goutham.rao@xxxxxxxxx> + * Copyright (C) 2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * Copyright (C) 2005 Keir Fraser <keir@xxxxxxxxxxxxx> + */ + +#include <linux/cache.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <asm/io.h> +#include <asm/pci.h> +#include <asm/dma.h> + +#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) + +#define SG_ENT_PHYS_ADDRESS(sg) (page_to_phys((sg)->page) + (sg)->offset) + +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define IO_TLB_SEGSIZE 128 + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + +int swiotlb_force; + +/* + * Used to do a quick range check in swiotlb_unmap_single and + * swiotlb_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ +static char *io_tlb_start, *io_tlb_end; + +/* + * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and + * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. + */ +static unsigned long io_tlb_nslabs; + +/* + * When the IOMMU overflows we return a fallback buffer. This sets the size. + */ +static unsigned long io_tlb_overflow = 32*1024; + +void *io_tlb_overflow_buffer; + +/* + * This is a free list describing the number of free entries available from + * each index + */ +static unsigned int *io_tlb_list; +static unsigned int io_tlb_index; + +/* + * We need to save away the original address corresponding to a mapped entry + * for the sync operations. + */ +static struct phys_addr { + struct page *page; + unsigned int offset; +} *io_tlb_orig_addr; + +/* + * Protect the above data structures in the map and unmap calls + */ +static DEFINE_SPINLOCK(io_tlb_lock); + +static int __init +setup_io_tlb_npages(char *str) +{ + if (isdigit(*str)) { + io_tlb_nslabs = simple_strtoul(str, &str, 0) << + (PAGE_SHIFT - IO_TLB_SHIFT); + /* avoid tail segment of size < IO_TLB_SEGSIZE */ + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + if (*str == ',') + ++str; + /* + * NB. 'force' enables the swiotlb, but doesn't force its use for + * every DMA like it does on native Linux. + */ + if (!strcmp(str, "force")) + swiotlb_force = 1; + return 1; +} +__setup("swiotlb=", setup_io_tlb_npages); +/* make io_tlb_overflow tunable too? */ + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the PCI DMA API. + */ +void +swiotlb_init_with_default_size (size_t default_size) +{ + unsigned long i; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> PAGE_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * + (1 << IO_TLB_SHIFT)); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + + xen_create_contiguous_region( + (unsigned long)io_tlb_start, + get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))); + + io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); + for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; + io_tlb_orig_addr = alloc_bootmem( + io_tlb_nslabs * sizeof(*io_tlb_orig_addr)); + + /* + * Get the overflow emergency buffer + */ + io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", + virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end-1)); +} + +void +swiotlb_init(void) +{ + /* The user can forcibly enable swiotlb. */ + if (swiotlb_force) + swiotlb = 1; + + /* + * Otherwise, enable for domain 0 if the machine has 'lots of memory', + * which we take to mean more than 2GB. + */ + if (xen_start_info.flags & SIF_INITDOMAIN) { + dom0_op_t op; + op.cmd = DOM0_PHYSINFO; + if ((HYPERVISOR_dom0_op(&op) == 0) && + (op.u.physinfo.total_pages > 0x7ffff)) + swiotlb = 1; + } + + if (swiotlb) + swiotlb_init_with_default_size(64 * (1<<20)); +} + +static void +__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir) +{ + if (PageHighMem(buffer.page)) { + size_t len, bytes; + char *dev, *host, *kmp; + len = size; + while (len != 0) { + if (((bytes = len) + buffer.offset) > PAGE_SIZE) + bytes = PAGE_SIZE - buffer.offset; + kmp = kmap_atomic(buffer.page, KM_SWIOTLB); + dev = dma_addr + size - len; + host = kmp + buffer.offset; + memcpy((dir == DMA_FROM_DEVICE) ? host : dev, + (dir == DMA_FROM_DEVICE) ? dev : host, + bytes); + kunmap_atomic(kmp, KM_SWIOTLB); + len -= bytes; + buffer.page++; + buffer.offset = 0; + } + } else { + char *host = (char *)phys_to_virt( + page_to_pseudophys(buffer.page)) + buffer.offset; + if (dir == DMA_FROM_DEVICE) + memcpy(host, dma_addr, size); + else if (dir == DMA_TO_DEVICE) + memcpy(dma_addr, host, size); + } +} + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ +static void * +map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir) +{ + unsigned long flags; + char *dma_addr; + unsigned int nslots, stride, index, wrap; + int i; + + /* + * For mappings greater than a page, we limit the stride (and + * hence alignment) to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + if (size > PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = 1; + + BUG_ON(!nslots); + + /* + * Find suitable number of IO TLB entries size that will fit this + * request and allocate a buffer from that IO TLB pool. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + wrap = index = ALIGN(io_tlb_index, stride); + + if (index >= io_tlb_nslabs) + wrap = index = 0; + + do { + /* + * If we find a slot that indicates we have 'nslots' + * number of contiguous buffers, we allocate the + * buffers from that slot and mark the entries as '0' + * indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + int count = 0; + + for (i = index; i < (int)(index + nslots); i++) + io_tlb_list[i] = 0; + for (i = index - 1; + (OFFSET(i, IO_TLB_SEGSIZE) != + IO_TLB_SEGSIZE -1) && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + dma_addr = io_tlb_start + + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in + * the next round. + */ + io_tlb_index = + ((index + nslots) < io_tlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + + spin_unlock_irqrestore(&io_tlb_lock, flags); + return NULL; + } + found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + io_tlb_orig_addr[index] = buffer; + if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL)) + __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); + + return dma_addr; +} + +/* + * dma_addr is the kernel virtual address of the bounce buffer to unmap. + */ +static void +unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +{ + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + struct phys_addr buffer = io_tlb_orig_addr[index]; + + /* + * First, sync the memory before unmapping the entry + */ + if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)) + __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contigous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? + io_tlb_list[index + nslots] : 0); + /* + * Step 1: return the slots to the free list, merging the + * slots with superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) + io_tlb_list[i] = ++count; + /* + * Step 2: merge the returned slots with the preceding slots, + * if available (non zero) + */ + for (i = index - 1; + (OFFSET(i, IO_TLB_SEGSIZE) != + IO_TLB_SEGSIZE -1) && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + } + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +static void +sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +{ + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + struct phys_addr buffer = io_tlb_orig_addr[index]; + BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE)); + __sync_single(buffer, dma_addr, size, dir); +} + +static void +swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +{ + /* + * Ran out of IOMMU space for this operation. This is very bad. + * Unfortunately the drivers cannot handle this operation properly. + * unless they check for pci_dma_mapping_error (most don't) + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " + "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); + + if (size > io_tlb_overflow && do_panic) { + if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic("PCI-DMA: Memory would be corrupted\n"); + if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic("PCI-DMA: Random memory would be DMAed\n"); + } +} + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. The + * PCI address to use is returned. + * + * Once the device is given the dma address, the device owns this memory until + * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. + */ +dma_addr_t +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) +{ + dma_addr_t dev_addr = virt_to_bus(ptr); + void *map; + struct phys_addr buffer; + + BUG_ON(dir == DMA_NONE); + + /* + * If the pointer passed in happens to be in the device's DMA window, + * we can safely return the device addr and not worry about bounce + * buffering it. + */ + if (!range_straddles_page_boundary(ptr, size) && + !address_needs_mapping(hwdev, dev_addr)) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + buffer.page = virt_to_page(ptr); + buffer.offset = (unsigned long)ptr & ~PAGE_MASK; + map = map_single(hwdev, buffer, size, dir); + if (!map) { + swiotlb_full(hwdev, size, dir, 1); + map = io_tlb_overflow_buffer; + } + + dev_addr = virt_to_bus(map); + + /* + * Ensure that the address returned is DMA'ble + */ + if (address_needs_mapping(hwdev, dev_addr)) + panic("map_single: bounce buffer is not DMA'ble"); + + return dev_addr; +} + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size must + * match what was provided for in a previous swiotlb_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +void +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, + int dir) +{ + char *dma_addr = bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + unmap_single(hwdev, dma_addr, size, dir); +} + +/* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. + * + * If you perform a swiotlb_map_single() but wish to interrogate the buffer + * using the cpu, yet do not wish to teardown the PCI dma mapping, you must + * call this function before doing so. At the next point you give the PCI dma + * address back to the card, you must first perform a + * swiotlb_dma_sync_for_device, and then the device again owns the buffer + */ +void +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + char *dma_addr = bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + sync_single(hwdev, dma_addr, size, dir); +} + +void +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + char *dma_addr = bus_to_virt(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + sync_single(hwdev, dma_addr, size, dir); +} + +/* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above swiotlb_map_single + * interface. Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for swiotlb_map_single are the + * same here. + */ +int +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + struct phys_addr buffer; + dma_addr_t dev_addr; + char *map; + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) { + dev_addr = SG_ENT_PHYS_ADDRESS(sg); + if (address_needs_mapping(hwdev, dev_addr)) { + buffer.page = sg->page; + buffer.offset = sg->offset; + map = map_single(hwdev, buffer, sg->length, dir); + if (!map) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ + swiotlb_full(hwdev, sg->length, dir, 0); + swiotlb_unmap_sg(hwdev, sg - i, i, dir); + sg[0].dma_length = 0; + return 0; + } + sg->dma_address = (dma_addr_t)virt_to_bus(map); + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; + } + return nelems; +} + +/* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules + * concerning calls here are the same as for swiotlb_unmap_single() above. + */ +void +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + unmap_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); +} + +/* + * Make physical memory consistent for a set of streaming mode DMA translations + * after a transfer. + * + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules + * and usage. + */ +void +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + sync_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); +} + +void +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + sync_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); +} + +dma_addr_t +swiotlb_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + struct phys_addr buffer; + dma_addr_t dev_addr; + char *map; + + dev_addr = page_to_phys(page) + offset; + if (address_needs_mapping(hwdev, dev_addr)) { + buffer.page = page; + buffer.offset = offset; + map = map_single(hwdev, buffer, size, direction); + if (!map) { + swiotlb_full(hwdev, size, direction, 1); + map = io_tlb_overflow_buffer; + } + dev_addr = (dma_addr_t)virt_to_bus(map); + } + + return dev_addr; +} + +void +swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, + size_t size, enum dma_data_direction direction) +{ + char *dma_addr = bus_to_virt(dma_address); + + BUG_ON(direction == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + unmap_single(hwdev, dma_addr, size, direction); +} + +int +swiotlb_dma_mapping_error(dma_addr_t dma_addr) +{ + return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); +} + +/* + * Return whether the given PCI device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during PCI bus mastering, then you would pass 0x00ffffff as the mask to + * this function. + */ +int +swiotlb_dma_supported (struct device *hwdev, u64 mask) +{ + return (mask >= 0xffffffffUL); +} + +EXPORT_SYMBOL(swiotlb_init); +EXPORT_SYMBOL(swiotlb_map_single); +EXPORT_SYMBOL(swiotlb_unmap_single); +EXPORT_SYMBOL(swiotlb_map_sg); +EXPORT_SYMBOL(swiotlb_unmap_sg); +EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_single_for_device); +EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_sg_for_device); +EXPORT_SYMBOL(swiotlb_map_page); +EXPORT_SYMBOL(swiotlb_unmap_page); +EXPORT_SYMBOL(swiotlb_dma_mapping_error); +EXPORT_SYMBOL(swiotlb_dma_supported); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h --- /dev/null Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h Tue Aug 16 18:09:07 2005 @@ -0,0 +1,32 @@ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +#include <linux/config.h> + +#ifdef CONFIG_DEBUG_HIGHMEM +# define D(n) __KM_FENCE_##n , +#else +# define D(n) +#endif + +enum km_type { +D(0) KM_BOUNCE_READ, +D(1) KM_SKB_SUNRPC_DATA, +D(2) KM_SKB_DATA_SOFTIRQ, +D(3) KM_USER0, +D(4) KM_USER1, +D(5) KM_BIO_SRC_IRQ, +D(6) KM_BIO_DST_IRQ, +D(7) KM_PTE0, +D(8) KM_PTE1, +D(9) KM_IRQ0, +D(10) KM_IRQ1, +D(11) KM_SOFTIRQ0, +D(12) KM_SOFTIRQ1, +D(13) KM_SWIOTLB, +D(14) KM_TYPE_NR +}; + +#undef D + +#endif diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h --- /dev/null Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h Tue Aug 16 18:09:07 2005 @@ -0,0 +1,22 @@ +#ifndef _I386_SCATTERLIST_H +#define _I386_SCATTERLIST_H + +struct scatterlist { + struct page *page; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; + unsigned int dma_length; +}; + +/* These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->dma_length) + +#define ISA_DMA_THRESHOLD (0x00ffffff) + +#endif /* !(_I386_SCATTERLIST_H) */ diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h --- /dev/null Tue Aug 16 04:15:23 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h Tue Aug 16 18:09:07 2005 @@ -0,0 +1,42 @@ +#ifndef _ASM_SWIOTLB_H +#define _ASM_SWIOTLB_H 1 + +#include <linux/config.h> + +/* SWIOTLB interface */ + +extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, + int dir); +extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_cpu(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_device(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern void swiotlb_sync_sg_for_device(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); +extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction); +extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, + size_t size, enum dma_data_direction direction); +extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); + +#ifdef CONFIG_SWIOTLB +extern int swiotlb; +#else +#define swiotlb 0 +#endif + +#endif diff -r e3d811cca4e1 -r 1ae656509f02 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c Tue Aug 16 04:15:23 2005 +++ /dev/null Tue Aug 16 18:09:07 2005 @@ -1,336 +0,0 @@ -/* - * Dynamic DMA mapping support. - */ - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/pci.h> -#include <linux/module.h> -#include <asm/io.h> -#include <asm-xen/balloon.h> - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -int dma_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; - BUG_ON(!s->page); - s->dma_address = virt_to_bus(page_address(s->page) +s->offset); - s->dma_length = s->length; - } - return nents; -} - -EXPORT_SYMBOL(dma_map_sg); - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, int dir) -{ - int i; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; - BUG_ON(s->page == NULL); - BUG_ON(s->dma_address == 0); - dma_unmap_single(dev, s->dma_address, s->dma_length, dir); - } -} - -EXPORT_SYMBOL(dma_unmap_sg); - -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned gfp) -{ - void *ret; - unsigned int order = get_order(size); - unsigned long vstart; - - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - - vstart = __get_free_pages(gfp, order); - ret = (void *)vstart; - if (ret == NULL) - return ret; - - xen_contig_memory(vstart, order); - - memset(ret, 0, size); - *dma_handle = virt_to_bus(ret); - - return ret; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); - -#if 0 -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base; - int pages = size >> PAGE_SHIFT; - int bitmap_size = (pages + 31)/32; - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - memset(dev->dma_mem->bitmap, 0, bitmap_size); - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem->bitmap); - out: - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); -#endif - -static LIST_HEAD(dma_map_head); -static DEFINE_SPINLOCK(dma_map_lock); -struct dma_map_entry { - struct list_head list; - dma_addr_t dma; - char *bounce, *host; - size_t size; -}; -#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d))) - -dma_addr_t -dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) -{ - struct dma_map_entry *ent; - void *bnc; - dma_addr_t dma; - unsigned long flags; - - if (direction == DMA_NONE) - out_of_line_bug(); - - /* - * Even if size is sub-page, the buffer may still straddle a page - * boundary. Take into account buffer start offset. All other calls are - * conservative and always search the dma_map list if it's non-empty. - */ - if (((((unsigned long)ptr) & ~PAGE_MASK) + size) <= PAGE_SIZE) { - dma = virt_to_bus(ptr); - } else { - BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, GFP_ATOMIC)) == NULL); - BUG_ON((ent = kmalloc(sizeof(*ent), GFP_ATOMIC)) == NULL); - if (direction != DMA_FROM_DEVICE) - memcpy(bnc, ptr, size); - ent->dma = dma; - ent->bounce = bnc; - ent->host = ptr; - ent->size = size; - spin_lock_irqsave(&dma_map_lock, flags); - list_add(&ent->list, &dma_map_head); - spin_unlock_irqrestore(&dma_map_lock, flags); - } - - if ((dma+size) & ~*dev->dma_mask) - out_of_line_bug(); - return dma; -} -EXPORT_SYMBOL(dma_map_single); - -void -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - struct dma_map_entry *ent; - unsigned long flags; - - if (direction == DMA_NONE) - out_of_line_bug(); - - /* Fast-path check: are there any multi-page DMA mappings? */ - if (!list_empty(&dma_map_head)) { - spin_lock_irqsave(&dma_map_lock, flags); - list_for_each_entry ( ent, &dma_map_head, list ) { - if (DMA_MAP_MATCHES(ent, dma_addr)) { - list_del(&ent->list); - break; - } - } - spin_unlock_irqrestore(&dma_map_lock, flags); - if (&ent->list != &dma_map_head) { - BUG_ON(dma_addr != ent->dma); - BUG_ON(size != ent->size); - if (direction != DMA_TO_DEVICE) - memcpy(ent->host, ent->bounce, size); - dma_free_coherent(dev, size, ent->bounce, ent->dma); - kfree(ent); - } - } -} -EXPORT_SYMBOL(dma_unmap_single); - -void -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - struct dma_map_entry *ent; - unsigned long flags, off; - - /* Fast-path check: are there any multi-page DMA mappings? */ - if (!list_empty(&dma_map_head)) { - spin_lock_irqsave(&dma_map_lock, flags); - list_for_each_entry ( ent, &dma_map_head, list ) - if (DMA_MAP_MATCHES(ent, dma_handle)) - break; - spin_unlock_irqrestore(&dma_map_lock, flags); - if (&ent->list != &dma_map_head) { - off = dma_handle - ent->dma; - BUG_ON((off + size) > ent->size); - /*if (direction != DMA_TO_DEVICE)*/ - memcpy(ent->host+off, ent->bounce+off, size); - } - } -} -EXPORT_SYMBOL(dma_sync_single_for_cpu); - -void -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - struct dma_map_entry *ent; - unsigned long flags, off; - - /* Fast-path check: are there any multi-page DMA mappings? */ - if (!list_empty(&dma_map_head)) { - spin_lock_irqsave(&dma_map_lock, flags); - list_for_each_entry ( ent, &dma_map_head, list ) - if (DMA_MAP_MATCHES(ent, dma_handle)) - break; - spin_unlock_irqrestore(&dma_map_lock, flags); - if (&ent->list != &dma_map_head) { - off = dma_handle - ent->dma; - BUG_ON((off + size) > ent->size); - /*if (direction != DMA_FROM_DEVICE)*/ - memcpy(ent->bounce+off, ent->host+off, size); - } - } - - flush_write_buffers(); -} -EXPORT_SYMBOL(dma_sync_single_for_device); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |