[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [linux-2.6.18-xen] merge with linux-2.6.18-xen.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1185545750 21600 # Node ID b0bf9ba32bfe341af07da97d57572659c920fd30 # Parent e6974eba1baec726ca1a97d076f0bf8428096bc2 # Parent 88a17da7f3362126182423100a9d7d4c0d854139 merge with linux-2.6.18-xen.hg --- arch/i386/kernel/swiotlb.c | 745 ----------------------------- arch/i386/Kconfig | 4 arch/i386/Makefile | 1 arch/i386/kernel/Makefile | 26 - arch/i386/kernel/acpi/Makefile | 4 arch/i386/kernel/acpi/sleep-xen.c | 129 +++++ arch/i386/kernel/cpu/Makefile | 4 arch/i386/kernel/cpu/mtrr/Makefile | 8 arch/i386/kernel/time-xen.c | 4 arch/i386/mm/Makefile | 9 arch/i386/mm/fault-xen.c | 33 - arch/i386/mm/ioremap-xen.c | 2 arch/i386/pci/Makefile | 5 arch/i386/power/cpu.c | 3 arch/powerpc/platforms/xen/hcall.c | 146 +++++ arch/powerpc/platforms/xen/setup.c | 13 arch/x86_64/Kconfig | 4 arch/x86_64/Makefile | 1 arch/x86_64/ia32/Makefile | 13 arch/x86_64/kernel/Makefile | 18 arch/x86_64/kernel/acpi/Makefile | 4 arch/x86_64/kernel/acpi/sleep-xen.c | 162 ++++++ arch/x86_64/kernel/head-xen.S | 15 arch/x86_64/kernel/suspend.c | 2 arch/x86_64/mm/Makefile | 13 arch/x86_64/pci/Makefile | 7 buildconfigs/linux-defconfig_xen_x86_32 | 7 buildconfigs/linux-defconfig_xen_x86_64 | 7 drivers/acpi/Kconfig | 6 drivers/acpi/hardware/hwsleep.c | 5 drivers/acpi/sleep/main.c | 9 drivers/acpi/sleep/poweroff.c | 2 drivers/xen/balloon/balloon.c | 12 drivers/xen/blkfront/blkfront.c | 2 drivers/xen/blktap/blktap.c | 12 drivers/xen/char/mem.c | 31 - drivers/xen/core/gnttab.c | 8 drivers/xen/core/machine_reboot.c | 1 drivers/xen/fbfront/xenfb.c | 1 drivers/xen/fbfront/xenkbd.c | 1 drivers/xen/netfront/netfront.c | 58 +- drivers/xen/netfront/netfront.h | 4 drivers/xen/pciback/conf_space_header.c | 18 drivers/xen/pcifront/xenbus.c | 1 drivers/xen/privcmd/privcmd.c | 1 drivers/xen/xenbus/xenbus_probe.c | 59 +- drivers/xen/xenbus/xenbus_probe_backend.c | 1 drivers/xen/xenbus/xenbus_xs.c | 6 include/asm-i386/acpi.h | 4 include/asm-i386/mach-xen/asm/gnttab_dma.h | 2 include/asm-i386/mach-xen/asm/io.h | 3 include/asm-powerpc/io.h | 3 include/asm-x86_64/acpi.h | 4 include/asm-x86_64/mach-xen/asm/io.h | 3 include/xen/interface/platform.h | 12 lib/Makefile | 3 lib/swiotlb-xen.c | 745 +++++++++++++++++++++++++++++ scripts/Makefile.build | 12 scripts/Makefile.lib | 6 scripts/Makefile.xen.awk | 34 + 60 files changed, 1528 insertions(+), 930 deletions(-) diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/Kconfig --- a/arch/i386/Kconfig Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/Kconfig Fri Jul 27 08:15:50 2007 -0600 @@ -832,9 +832,7 @@ menu "Power management options (ACPI, AP menu "Power management options (ACPI, APM)" depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST) -if !X86_XEN -source kernel/power/Kconfig -endif +source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/Makefile --- a/arch/i386/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -116,7 +116,6 @@ PHONY += zImage bzImage compressed zlilo ifdef CONFIG_XEN CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) -head-y := arch/i386/kernel/head-xen.o arch/i386/kernel/init_task-xen.o boot := arch/i386/boot-xen .PHONY: vmlinuz all: vmlinuz diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/kernel/Makefile --- a/arch/i386/kernel/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/kernel/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -43,12 +43,7 @@ EXTRA_AFLAGS := -traditional EXTRA_AFLAGS := -traditional obj-$(CONFIG_SCx200) += scx200.o - -ifdef CONFIG_XEN -vsyscall_note := vsyscall-note-xen.o -else -vsyscall_note := vsyscall-note.o -endif +obj-$(CONFIG_XEN) += fixup.o # vsyscall.o contains the vsyscall DSO images as __initdata. # We must build both images before we can assemble it. @@ -71,7 +66,7 @@ SYSCFLAGS_vsyscall-int80.so = $(vsyscall $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \ - $(obj)/vsyscall-%.o $(obj)/$(vsyscall_note) FORCE + $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE $(call if_changed,syscall) # We also create a special relocatable object that should mirror the symbol @@ -83,20 +78,11 @@ extra-y += vsyscall-syms.o SYSCFLAGS_vsyscall-syms.o = -r $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ - $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE + $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE $(call if_changed,syscall) +early_printk-y += ../../x86_64/kernel/early_printk.o k8-y += ../../x86_64/kernel/k8.o -ifdef CONFIG_XEN -include $(srctree)/scripts/Makefile.xen - -obj-y += fixup.o -microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o -n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o - -obj-y := $(call filterxen, $(obj-y), $(n-obj-xen)) -obj-y := $(call cherrypickxen, $(obj-y)) -extra-y := $(call cherrypickxen, $(extra-y)) -%/head-xen.o %/head-xen.s: EXTRA_AFLAGS := -endif +disabled-obj-$(CONFIG_XEN) := i8259.o reboot.o smpboot.o trampoline.o +%/head.o %/head.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/kernel/acpi/Makefile --- a/arch/i386/kernel/acpi/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/kernel/acpi/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -6,7 +6,3 @@ obj-y += cstate.o processor.o obj-y += cstate.o processor.o endif -ifdef CONFIG_XEN -include $(srctree)/scripts/Makefile.xen -obj-y := $(call cherrypickxen, $(obj-y), $(src)) -endif diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/kernel/acpi/sleep-xen.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/i386/kernel/acpi/sleep-xen.c Fri Jul 27 08:15:50 2007 -0600 @@ -0,0 +1,129 @@ +/* + * sleep.c - x86-specific ACPI sleep support. + * + * Copyright (C) 2001-2003 Patrick Mochel + * Copyright (C) 2001-2003 Pavel Machek <pavel@xxxxxxx> + */ + +#include <linux/acpi.h> +#include <linux/bootmem.h> +#include <linux/dmi.h> +#include <linux/cpumask.h> + +#include <asm/smp.h> + +/* address in low memory of the wakeup routine. */ +unsigned long acpi_wakeup_address = 0; +unsigned long acpi_video_flags; +extern char wakeup_start, wakeup_end; + +extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); + +/** + * acpi_save_state_mem - save kernel state + * + * Create an identity mapped page table and copy the wakeup routine to + * low memory. + */ +int acpi_save_state_mem(void) +{ +#ifndef CONFIG_ACPI_PV_SLEEP + if (!acpi_wakeup_address) + return 1; + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); + acpi_copy_wakeup_routine(acpi_wakeup_address); +#endif + return 0; +} + +/* + * acpi_restore_state - undo effects of acpi_save_state_mem + */ +void acpi_restore_state_mem(void) +{ +} + +/** + * acpi_reserve_bootmem - do _very_ early ACPI initialisation + * + * We allocate a page from the first 1MB of memory for the wakeup + * routine for when we come back from a sleep state. The + * runtime allocator allows specification of <16MB pages, but not + * <1MB pages. + */ +void __init acpi_reserve_bootmem(void) +{ + if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { + printk(KERN_ERR + "ACPI: Wakeup code way too big, S3 disabled.\n"); + return; + } + + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); + if (!acpi_wakeup_address) + printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); +} + +static int __init acpi_sleep_setup(char *str) +{ + while ((str != NULL) && (*str != '\0')) { + if (strncmp(str, "s3_bios", 7) == 0) + acpi_video_flags = 1; + if (strncmp(str, "s3_mode", 7) == 0) + acpi_video_flags |= 2; + str = strchr(str, ','); + if (str != NULL) + str += strspn(str, ", \t"); + } + return 1; +} + +__setup("acpi_sleep=", acpi_sleep_setup); + +static __init int reset_videomode_after_s3(struct dmi_system_id *d) +{ + acpi_video_flags |= 2; + return 0; +} + +static __initdata struct dmi_system_id acpisleep_dmi_table[] = { + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + {} +}; + +static int __init acpisleep_dmi_init(void) +{ + dmi_check_system(acpisleep_dmi_table); + return 0; +} + +core_initcall(acpisleep_dmi_init); + +#ifdef CONFIG_ACPI_PV_SLEEP +#include <asm/hypervisor.h> +#include <xen/interface/platform.h> +int acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt, u32 pm1b_cnt) +{ + struct xen_platform_op op = { + .cmd = XENPF_enter_acpi_sleep, + .interface_version = XENPF_INTERFACE_VERSION, + .u = { + .enter_acpi_sleep = { + .pm1a_cnt_val = (u16)pm1a_cnt, + .pm1b_cnt_val = (u16)pm1b_cnt, + .sleep_state = sleep_state, + }, + }, + }; + + return HYPERVISOR_platform_op(&op); +} +#endif /* CONFIG_ACPI_PV_SLEEP */ diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/kernel/cpu/Makefile --- a/arch/i386/kernel/cpu/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/kernel/cpu/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -18,7 +18,3 @@ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ -ifdef CONFIG_XEN -include $(srctree)/scripts/Makefile.xen -obj-y := $(call cherrypickxen, $(obj-y), $(src)) -endif diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/kernel/cpu/mtrr/Makefile --- a/arch/i386/kernel/cpu/mtrr/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/kernel/cpu/mtrr/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -3,10 +3,4 @@ obj-y += cyrix.o obj-y += cyrix.o obj-y += centaur.o -ifdef CONFIG_XEN -include $(srctree)/scripts/Makefile.xen -n-obj-xen := generic.o state.o amd.o cyrix.o centaur.o - -obj-y := $(call filterxen, $(obj-y), $(n-obj-xen)) -obj-y := $(call cherrypickxen, $(obj-y)) -endif +obj-$(CONFIG_XEN) := main.o if.o diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/kernel/swiotlb.c --- a/arch/i386/kernel/swiotlb.c Thu Jul 26 14:33:50 2007 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,745 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is a fallback for platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@xxxxxxxxx> - * Copyright (C) 2000 Goutham Rao <goutham.rao@xxxxxxxxx> - * Copyright (C) 2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@xxxxxxxxxx> - * Copyright (C) 2005 Keir Fraser <keir@xxxxxxxxxxxxx> - */ - -#include <linux/cache.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/highmem.h> -#include <asm/io.h> -#include <asm/pci.h> -#include <asm/dma.h> -#include <asm/uaccess.h> -#include <xen/gnttab.h> -#include <xen/interface/memory.h> -#include <asm-i386/mach-xen/asm/gnttab_dma.h> - -int swiotlb; -EXPORT_SYMBOL(swiotlb); - -#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) - -/* - * Maximum allowable number of contiguous slabs to map, - * must be a power of 2. What is the appropriate value ? - * The complexity of {map,unmap}_single is linearly dependent on this value. - */ -#define IO_TLB_SEGSIZE 128 - -/* - * log of the size of each IO TLB slab. The number of slabs is command line - * controllable. - */ -#define IO_TLB_SHIFT 11 - -int swiotlb_force; - -static char *iotlb_virt_start; -static unsigned long iotlb_nslabs; - -/* - * Used to do a quick range check in swiotlb_unmap_single and - * swiotlb_sync_single_*, to see if the memory was in fact allocated by this - * API. - */ -static unsigned long iotlb_pfn_start, iotlb_pfn_end; - -/* Does the given dma address reside within the swiotlb aperture? */ -static inline int in_swiotlb_aperture(dma_addr_t dev_addr) -{ - unsigned long pfn = mfn_to_local_pfn(dev_addr >> PAGE_SHIFT); - return (pfn_valid(pfn) - && (pfn >= iotlb_pfn_start) - && (pfn < iotlb_pfn_end)); -} - -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -void *io_tlb_overflow_buffer; - -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. - */ -static struct phys_addr { - struct page *page; - unsigned int offset; -} *io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -static DEFINE_SPINLOCK(io_tlb_lock); - -static unsigned int dma_bits; -static unsigned int __initdata max_dma_bits = 32; -static int __init -setup_dma_bits(char *str) -{ - max_dma_bits = simple_strtoul(str, NULL, 0); - return 0; -} -__setup("dma_bits=", setup_dma_bits); - -static int __init -setup_io_tlb_npages(char *str) -{ - /* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */ - if (isdigit(*str)) { - iotlb_nslabs = simple_strtoul(str, &str, 0) << - (20 - IO_TLB_SHIFT); - iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); - /* Round up to power of two (xen_create_contiguous_region). */ - while (iotlb_nslabs & (iotlb_nslabs-1)) - iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); - } - if (*str == ',') - ++str; - /* - * NB. 'force' enables the swiotlb, but doesn't force its use for - * every DMA like it does on native Linux. 'off' forcibly disables - * use of the swiotlb. - */ - if (!strcmp(str, "force")) - swiotlb_force = 1; - else if (!strcmp(str, "off")) - swiotlb_force = -1; - return 1; -} -__setup("swiotlb=", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the PCI DMA API. - */ -void -swiotlb_init_with_default_size (size_t default_size) -{ - unsigned long i, bytes; - int rc; - - if (!iotlb_nslabs) { - iotlb_nslabs = (default_size >> IO_TLB_SHIFT); - iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); - /* Round up to power of two (xen_create_contiguous_region). */ - while (iotlb_nslabs & (iotlb_nslabs-1)) - iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); - } - - bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT); - - /* - * Get IO TLB memory from the low pages - */ - iotlb_virt_start = alloc_bootmem_low_pages(bytes); - if (!iotlb_virt_start) - panic("Cannot allocate SWIOTLB buffer!\n"); - - dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; - for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) { - do { - rc = xen_create_contiguous_region( - (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), - get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), - dma_bits); - } while (rc && dma_bits++ < max_dma_bits); - if (rc) { - if (i == 0) - panic("No suitable physical memory available for SWIOTLB buffer!\n" - "Use dom0_mem Xen boot parameter to reserve\n" - "some DMA memory (e.g., dom0_mem=-128M).\n"); - iotlb_nslabs = i; - i <<= IO_TLB_SHIFT; - free_bootmem(__pa(iotlb_virt_start + i), bytes - i); - bytes = i; - for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) { - unsigned int bits = fls64(virt_to_bus(iotlb_virt_start + i - 1)); - - if (bits > dma_bits) - dma_bits = bits; - } - break; - } - } - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE. - */ - io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int)); - for (i = 0; i < iotlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem( - iotlb_nslabs * sizeof(*io_tlb_orig_addr)); - - /* - * Get the overflow emergency buffer - */ - io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); - if (!io_tlb_overflow_buffer) - panic("Cannot allocate SWIOTLB overflow buffer!\n"); - - do { - rc = xen_create_contiguous_region( - (unsigned long)io_tlb_overflow_buffer, - get_order(io_tlb_overflow), - dma_bits); - } while (rc && dma_bits++ < max_dma_bits); - if (rc) - panic("No suitable physical memory available for SWIOTLB overflow buffer!\n"); - - iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT; - iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT); - - printk(KERN_INFO "Software IO TLB enabled: \n" - " Aperture: %lu megabytes\n" - " Kernel range: %p - %p\n" - " Address size: %u bits\n", - bytes >> 20, - iotlb_virt_start, iotlb_virt_start + bytes, - dma_bits); -} - -void -swiotlb_init(void) -{ - long ram_end; - size_t defsz = 64 * (1 << 20); /* 64MB default size */ - - if (swiotlb_force == 1) { - swiotlb = 1; - } else if ((swiotlb_force != -1) && - is_running_on_xen() && - is_initial_xendomain()) { - /* Domain 0 always has a swiotlb. */ - ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); - if (ram_end <= 0x7ffff) - defsz = 2 * (1 << 20); /* 2MB on <2GB on systems. */ - swiotlb = 1; - } - - if (swiotlb) - swiotlb_init_with_default_size(defsz); - else - printk(KERN_INFO "Software IO TLB disabled\n"); -} - -/* - * We use __copy_to_user_inatomic to transfer to the host buffer because the - * buffer may be mapped read-only (e.g, in blkback driver) but lower-level - * drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an - * unnecessary copy from the aperture to the host buffer, and a page fault. - */ -static void -__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir) -{ - if (PageHighMem(buffer.page)) { - size_t len, bytes; - char *dev, *host, *kmp; - len = size; - while (len != 0) { - unsigned long flags; - - if (((bytes = len) + buffer.offset) > PAGE_SIZE) - bytes = PAGE_SIZE - buffer.offset; - local_irq_save(flags); /* protects KM_BOUNCE_READ */ - kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ); - dev = dma_addr + size - len; - host = kmp + buffer.offset; - if (dir == DMA_FROM_DEVICE) { - if (__copy_to_user_inatomic(host, dev, bytes)) - /* inaccessible */; - } else - memcpy(dev, host, bytes); - kunmap_atomic(kmp, KM_BOUNCE_READ); - local_irq_restore(flags); - len -= bytes; - buffer.page++; - buffer.offset = 0; - } - } else { - char *host = (char *)phys_to_virt( - page_to_pseudophys(buffer.page)) + buffer.offset; - if (dir == DMA_FROM_DEVICE) { - if (__copy_to_user_inatomic(host, dma_addr, size)) - /* inaccessible */; - } else if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, host, size); - } -} - -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir) -{ - unsigned long flags; - char *dma_addr; - unsigned int nslots, stride, index, wrap; - struct phys_addr slot_buf; - int i; - - /* - * For mappings greater than a page, we limit the stride (and - * hence alignment) to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size > PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); - - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - wrap = index = ALIGN(io_tlb_index, stride); - - if (index >= iotlb_nslabs) - wrap = index = 0; - - do { - /* - * If we find a slot that indicates we have 'nslots' - * number of contiguous buffers, we allocate the - * buffers from that slot and mark the entries as '0' - * indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int)(index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; - (OFFSET(i, IO_TLB_SEGSIZE) != - IO_TLB_SEGSIZE -1) && io_tlb_list[i]; - i--) - io_tlb_list[i] = ++count; - dma_addr = iotlb_virt_start + - (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in - * the next round. - */ - io_tlb_index = - ((index + nslots) < iotlb_nslabs - ? (index + nslots) : 0); - - goto found; - } - index += stride; - if (index >= iotlb_nslabs) - index = 0; - } while (index != wrap); - - spin_unlock_irqrestore(&io_tlb_lock, flags); - return NULL; - } - found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - slot_buf = buffer; - for (i = 0; i < nslots; i++) { - slot_buf.page += slot_buf.offset >> PAGE_SHIFT; - slot_buf.offset &= PAGE_SIZE - 1; - io_tlb_orig_addr[index+i] = slot_buf; - slot_buf.offset += 1 << IO_TLB_SHIFT; - } - if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL)) - __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); - - return dma_addr; -} - -struct phys_addr dma_addr_to_phys_addr(char *dma_addr) -{ - int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT; - struct phys_addr buffer = io_tlb_orig_addr[index]; - buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1); - buffer.page += buffer.offset >> PAGE_SHIFT; - buffer.offset &= PAGE_SIZE - 1; - return buffer; -} - -/* - * dma_addr is the kernel virtual address of the bounce buffer to unmap. - */ -static void -unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) -{ - unsigned long flags; - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT; - struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr); - - /* - * First, sync the memory before unmapping the entry - */ - if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)) - __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contigous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) - io_tlb_list[i] = ++count; - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; - (OFFSET(i, IO_TLB_SEGSIZE) != - IO_TLB_SEGSIZE -1) && io_tlb_list[i]; - i--) - io_tlb_list[i] = ++count; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) -{ - struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr); - BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE)); - __sync_single(buffer, dma_addr, size, dir); -} - -static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) -{ - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for pci_dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " - "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); - - if (size > io_tlb_overflow && do_panic) { - if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic("PCI-DMA: Memory would be corrupted\n"); - if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic("PCI-DMA: Random memory would be DMAed\n"); - } -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * PCI address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. - */ -dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) -{ - dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) + - offset_in_page(ptr); - void *map; - struct phys_addr buffer; - - BUG_ON(dir == DMA_NONE); - - /* - * If the pointer passed in happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (!range_straddles_page_boundary(__pa(ptr), size) && - !address_needs_mapping(hwdev, dev_addr)) - return dev_addr; - - /* - * Oh well, have to allocate and map a bounce buffer. - */ - gnttab_dma_unmap_page(dev_addr); - buffer.page = virt_to_page(ptr); - buffer.offset = (unsigned long)ptr & ~PAGE_MASK; - map = map_single(hwdev, buffer, size, dir); - if (!map) { - swiotlb_full(hwdev, size, dir, 1); - map = io_tlb_overflow_buffer; - } - - dev_addr = virt_to_bus(map); - return dev_addr; -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, - int dir) -{ - BUG_ON(dir == DMA_NONE); - if (in_swiotlb_aperture(dev_addr)) - unmap_single(hwdev, bus_to_virt(dev_addr), size, dir); - else - gnttab_dma_unmap_page(dev_addr); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_single() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the PCI dma mapping, you must - * call this function before doing so. At the next point you give the PCI dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) -{ - BUG_ON(dir == DMA_NONE); - if (in_swiotlb_aperture(dev_addr)) - sync_single(hwdev, bus_to_virt(dev_addr), size, dir); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) -{ - BUG_ON(dir == DMA_NONE); - if (in_swiotlb_aperture(dev_addr)) - sync_single(hwdev, bus_to_virt(dev_addr), size, dir); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_single - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for swiotlb_map_single are the - * same here. - */ -int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, - int dir) -{ - struct phys_addr buffer; - dma_addr_t dev_addr; - char *map; - int i; - - BUG_ON(dir == DMA_NONE); - - for (i = 0; i < nelems; i++, sg++) { - dev_addr = gnttab_dma_map_page(sg->page) + sg->offset; - - if (range_straddles_page_boundary(page_to_pseudophys(sg->page) - + sg->offset, sg->length) - || address_needs_mapping(hwdev, dev_addr)) { - gnttab_dma_unmap_page(dev_addr); - buffer.page = sg->page; - buffer.offset = sg->offset; - map = map_single(hwdev, buffer, sg->length, dir); - if (!map) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); - swiotlb_unmap_sg(hwdev, sg - i, i, dir); - sg[0].dma_length = 0; - return 0; - } - sg->dma_address = (dma_addr_t)virt_to_bus(map); - } else - sg->dma_address = dev_addr; - sg->dma_length = sg->length; - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_single() above. - */ -void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, - int dir) -{ - int i; - - BUG_ON(dir == DMA_NONE); - - for (i = 0; i < nelems; i++, sg++) - if (in_swiotlb_aperture(sg->dma_address)) - unmap_single(hwdev, - (void *)bus_to_virt(sg->dma_address), - sg->dma_length, dir); - else - gnttab_dma_unmap_page(sg->dma_address); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) -{ - int i; - - BUG_ON(dir == DMA_NONE); - - for (i = 0; i < nelems; i++, sg++) - if (in_swiotlb_aperture(sg->dma_address)) - sync_single(hwdev, - (void *)bus_to_virt(sg->dma_address), - sg->dma_length, dir); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) -{ - int i; - - BUG_ON(dir == DMA_NONE); - - for (i = 0; i < nelems; i++, sg++) - if (in_swiotlb_aperture(sg->dma_address)) - sync_single(hwdev, - (void *)bus_to_virt(sg->dma_address), - sg->dma_length, dir); -} - -#ifdef CONFIG_HIGHMEM - -dma_addr_t -swiotlb_map_page(struct device *hwdev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - struct phys_addr buffer; - dma_addr_t dev_addr; - char *map; - - dev_addr = gnttab_dma_map_page(page) + offset; - if (address_needs_mapping(hwdev, dev_addr)) { - gnttab_dma_unmap_page(dev_addr); - buffer.page = page; - buffer.offset = offset; - map = map_single(hwdev, buffer, size, direction); - if (!map) { - swiotlb_full(hwdev, size, direction, 1); - map = io_tlb_overflow_buffer; - } - dev_addr = (dma_addr_t)virt_to_bus(map); - } - - return dev_addr; -} - -void -swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, - size_t size, enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - if (in_swiotlb_aperture(dma_address)) - unmap_single(hwdev, bus_to_virt(dma_address), size, direction); - else - gnttab_dma_unmap_page(dma_address); -} - -#endif - -int -swiotlb_dma_mapping_error(dma_addr_t dma_addr) -{ - return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); -} - -/* - * Return whether the given PCI device DMA address mask can be supported - * properly. For example, if your device can only drive the low 24-bits - * during PCI bus mastering, then you would pass 0x00ffffff as the mask to - * this function. - */ -int -swiotlb_dma_supported (struct device *hwdev, u64 mask) -{ - return (mask >= ((1UL << dma_bits) - 1)); -} - -EXPORT_SYMBOL(swiotlb_init); -EXPORT_SYMBOL(swiotlb_map_single); -EXPORT_SYMBOL(swiotlb_unmap_single); -EXPORT_SYMBOL(swiotlb_map_sg); -EXPORT_SYMBOL(swiotlb_unmap_sg); -EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); -EXPORT_SYMBOL(swiotlb_sync_single_for_device); -EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); -EXPORT_SYMBOL(swiotlb_sync_sg_for_device); -EXPORT_SYMBOL(swiotlb_dma_mapping_error); -EXPORT_SYMBOL(swiotlb_dma_supported); diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/kernel/time-xen.c --- a/arch/i386/kernel/time-xen.c Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/kernel/time-xen.c Fri Jul 27 08:15:50 2007 -0600 @@ -867,9 +867,9 @@ static int timer_resume(struct sys_devic return 0; } +void time_resume(void); static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, + .resume = time_resume, set_kset_name("timer"), }; diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/mm/Makefile --- a/arch/i386/mm/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/mm/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -8,11 +8,4 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpag obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o - -ifdef CONFIG_XEN -include $(srctree)/scripts/Makefile.xen - -obj-y += hypervisor.o - -obj-y := $(call cherrypickxen, $(obj-y)) -endif +obj-$(CONFIG_XEN) += hypervisor.o diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/mm/fault-xen.c --- a/arch/i386/mm/fault-xen.c Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/mm/fault-xen.c Fri Jul 27 08:15:50 2007 -0600 @@ -739,18 +739,31 @@ void vmalloc_sync_all(void) * problematic: insync can only get set bits added, and updates to * start are only improving performance (without affecting correctness * if undone). - */ - static DECLARE_BITMAP(insync, PTRS_PER_PGD); + * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs. + * This change works just fine with 2-level paging too. + */ +#define sync_index(a) ((a) >> PMD_SHIFT) + static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD); static unsigned long start = TASK_SIZE; unsigned long address; BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); - for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { - if (!test_bit(pgd_index(address), insync)) { + for (address = start; + address >= TASK_SIZE && address < hypervisor_virt_start; + address += 1UL << PMD_SHIFT) { + if (!test_bit(sync_index(address), insync)) { unsigned long flags; struct page *page; spin_lock_irqsave(&pgd_lock, flags); + /* + * XEN: vmalloc_sync_one() failure path logic assumes + * pgd_list is non-empty. + */ + if (unlikely(!pgd_list)) { + spin_unlock_irqrestore(&pgd_lock, flags); + return; + } for (page = pgd_list; page; page = (struct page *)page->index) if (!vmalloc_sync_one(page_address(page), @@ -760,10 +773,10 @@ void vmalloc_sync_all(void) } spin_unlock_irqrestore(&pgd_lock, flags); if (!page) - set_bit(pgd_index(address), insync); + set_bit(sync_index(address), insync); } - if (address == start && test_bit(pgd_index(address), insync)) - start = address + PGDIR_SIZE; - } -} -#endif + if (address == start && test_bit(sync_index(address), insync)) + start = address + (1UL << PMD_SHIFT); + } +} +#endif diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/mm/ioremap-xen.c --- a/arch/i386/mm/ioremap-xen.c Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/mm/ioremap-xen.c Fri Jul 27 08:15:50 2007 -0600 @@ -74,7 +74,7 @@ static int __direct_remap_pfn_range(stru /* * Fill in the machine address: PTE ptr is done later by - * __direct_remap_area_pages(). + * apply_to_page_range(). */ v->val = __pte_val(pfn_pte_ma(mfn, prot)); diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/pci/Makefile --- a/arch/i386/pci/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/pci/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -16,8 +16,3 @@ pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o obj-y += $(pci-y) common.o - -ifdef CONFIG_XEN -include $(srctree)/scripts/Makefile.xen -obj-y := $(call cherrypickxen, $(obj-y)) -endif diff -r e6974eba1bae -r b0bf9ba32bfe arch/i386/power/cpu.c --- a/arch/i386/power/cpu.c Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/i386/power/cpu.c Fri Jul 27 08:15:50 2007 -0600 @@ -62,11 +62,12 @@ static void do_fpu_end(void) static void fix_processor_context(void) { +#ifndef CONFIG_X86_NO_TSS int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - +#endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ diff -r e6974eba1bae -r b0bf9ba32bfe arch/powerpc/platforms/xen/hcall.c --- a/arch/powerpc/platforms/xen/hcall.c Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/powerpc/platforms/xen/hcall.c Fri Jul 27 08:15:50 2007 -0600 @@ -34,6 +34,7 @@ #include <xen/interface/event_channel.h> #include <xen/interface/physdev.h> #include <xen/interface/vcpu.h> +#include <xen/interface/acm_ops.h> #include <xen/interface/kexec.h> #include <xen/public/privcmd.h> #include <asm/hypercall.h> @@ -680,6 +681,145 @@ static int xenppc_privcmd_event_channel_ return ret; } +static int xenppc_acmcmd_op(privcmd_hypercall_t *hypercall) +{ + xen_acmctl_t kern_op; + xen_acmctl_t __user *user_op = (xen_acmctl_t __user *)hypercall->arg[0]; + void *op_desc; + void *desc = NULL, *desc2 = NULL, *desc3 = NULL, *desc4 = NULL; + int ret = 0; + + if (copy_from_user(&kern_op, user_op, sizeof(xen_acmctl_t))) + return -EFAULT; + + if (kern_op.interface_version != ACM_INTERFACE_VERSION) { + printk(KERN_WARNING "%s: %s %x != %x\n", __func__, current->comm, + kern_op.interface_version, ACM_INTERFACE_VERSION); + return -EACCES; + } + + op_desc = xencomm_map(&kern_op, sizeof(xen_acmctl_t)); + if (op_desc == NULL) + return -ENOMEM; + + switch (kern_op.cmd) { + case ACMOP_setpolicy: + desc = xencomm_map( + xen_guest_handle(kern_op.u.setpolicy.pushcache), + kern_op.u.setpolicy.pushcache_size); + + if (desc == NULL) + ret = -ENOMEM; + + set_xen_guest_handle(kern_op.u.setpolicy.pushcache, + desc); + break; + case ACMOP_getpolicy: + desc = xencomm_map( + xen_guest_handle(kern_op.u.getpolicy.pullcache), + kern_op.u.getpolicy.pullcache_size); + + if (desc == NULL) + ret = -ENOMEM; + + set_xen_guest_handle(kern_op.u.getpolicy.pullcache, + desc); + break; + case ACMOP_dumpstats: + desc = xencomm_map( + xen_guest_handle(kern_op.u.dumpstats.pullcache), + kern_op.u.dumpstats.pullcache_size); + + if (desc == NULL) + ret = -ENOMEM; + + set_xen_guest_handle(kern_op.u.dumpstats.pullcache, + desc); + break; + case ACMOP_getssid: + desc = xencomm_map( + xen_guest_handle(kern_op.u.getssid.ssidbuf), + kern_op.u.getssid.ssidbuf_size); + + if (desc == NULL) + ret = -ENOMEM; + + set_xen_guest_handle(kern_op.u.getssid.ssidbuf, + desc); + break; + case ACMOP_getdecision: + break; + case ACMOP_chgpolicy: + desc = xencomm_map( + xen_guest_handle(kern_op.u.change_policy.policy_pushcache), + kern_op.u.change_policy.policy_pushcache_size); + desc2 = xencomm_map( + xen_guest_handle(kern_op.u.change_policy.del_array), + kern_op.u.change_policy.delarray_size); + desc3 = xencomm_map( + xen_guest_handle(kern_op.u.change_policy.chg_array), + kern_op.u.change_policy.chgarray_size); + desc4 = xencomm_map( + xen_guest_handle(kern_op.u.change_policy.err_array), + kern_op.u.change_policy.errarray_size); + + if (desc == NULL || desc2 == NULL || + desc3 == NULL || desc4 == NULL) { + ret = -ENOMEM; + goto out; + } + + set_xen_guest_handle(kern_op.u.change_policy.policy_pushcache, + desc); + set_xen_guest_handle(kern_op.u.change_policy.del_array, + desc2); + set_xen_guest_handle(kern_op.u.change_policy.chg_array, + desc3); + set_xen_guest_handle(kern_op.u.change_policy.err_array, + desc4); + break; + case ACMOP_relabeldoms: + desc = xencomm_map( + xen_guest_handle(kern_op.u.relabel_doms.relabel_map), + kern_op.u.relabel_doms.relabel_map_size); + desc2 = xencomm_map( + xen_guest_handle(kern_op.u.relabel_doms.err_array), + kern_op.u.relabel_doms.errarray_size); + + if (desc == NULL || desc2 == NULL) { + ret = -ENOMEM; + goto out; + } + + set_xen_guest_handle(kern_op.u.relabel_doms.relabel_map, + desc); + set_xen_guest_handle(kern_op.u.relabel_doms.err_array, + desc2); + break; + default: + printk(KERN_ERR "%s: unknown/unsupported acmctl cmd %d\n", + __func__, kern_op.cmd); + return -ENOSYS; + } + + if (ret) + goto out; /* error mapping the nested pointer */ + + ret = plpar_hcall_norets(XEN_MARK(hypercall->op),op_desc); + + if (copy_to_user(user_op, &kern_op, sizeof(xen_acmctl_t))) + ret = -EFAULT; + +out: + xencomm_free(desc); + xencomm_free(desc2); + xencomm_free(desc3); + xencomm_free(desc4); + xencomm_free(op_desc); + return ret; +} + + /* The PowerPC hypervisor runs in a separate address space from Linux * kernel/userspace, i.e. real mode. We must therefore translate userspace * pointers to something the hypervisor can make sense of. */ @@ -698,11 +838,11 @@ int privcmd_hypercall(privcmd_hypercall_ return xenppc_privcmd_version(hypercall); case __HYPERVISOR_event_channel_op: return xenppc_privcmd_event_channel_op(hypercall); + case __HYPERVISOR_acm_op: + return xenppc_acmcmd_op(hypercall); default: printk(KERN_ERR "%s: unknown hcall (%ld)\n", __func__, hypercall->op); - /* fallthru */ - /* below are the hcalls we know will fail and its ok */ - case __HYPERVISOR_acm_op: + /* maybe we'll get lucky and the hcall needs no translation. */ return plpar_hcall_norets(XEN_MARK(hypercall->op), hypercall->arg[0], hypercall->arg[1], diff -r e6974eba1bae -r b0bf9ba32bfe arch/powerpc/platforms/xen/setup.c --- a/arch/powerpc/platforms/xen/setup.c Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/powerpc/platforms/xen/setup.c Fri Jul 27 08:15:50 2007 -0600 @@ -39,6 +39,7 @@ #include <asm/machdep.h> #include <asm/hypervisor.h> #include <asm/time.h> +#include <asm/pmc.h> #include "setup.h" #ifdef DEBUG @@ -287,6 +288,17 @@ unsigned int xenppc_get_irq(struct pt_re * is nothing for do_IRQ() to do. * XXX This means we aren't using IRQ stacks. */ return NO_IRQ; +} + +static void xenppc_enable_pmcs(void) +{ + unsigned long set, reset; + + power4_enable_pmcs(); + + set = 1UL << 63; + reset = 0; + plpar_hcall_norets(H_PERFMON, set, reset); } #ifdef CONFIG_KEXEC @@ -316,6 +328,7 @@ define_machine(xen) { .calibrate_decr = generic_calibrate_decr, .progress = xenppc_progress, .power_save = xenppc_power_save, + .enable_pmcs = xenppc_enable_pmcs, #ifdef CONFIG_KEXEC .machine_kexec = xen_machine_kexec, .machine_kexec_prepare = xen_machine_kexec_prepare, diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/Kconfig --- a/arch/x86_64/Kconfig Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/Kconfig Fri Jul 27 08:15:50 2007 -0600 @@ -594,9 +594,7 @@ menu "Power management options" menu "Power management options" depends on !XEN_UNPRIVILEGED_GUEST -if !X86_64_XEN -source kernel/power/Kconfig -endif +source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/Makefile --- a/arch/x86_64/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -80,7 +80,6 @@ PHONY += bzImage bzlilo install archmrpr ifdef CONFIG_XEN CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) -head-y := arch/x86_64/kernel/head-xen.o arch/x86_64/kernel/head64-xen.o arch/x86_64/kernel/init_task.o LDFLAGS_vmlinux := -e startup_64 boot := arch/i386/boot-xen .PHONY: vmlinuz diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/ia32/Makefile --- a/arch/x86_64/ia32/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/ia32/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -37,15 +37,8 @@ AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 ifdef CONFIG_XEN AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel -CFLAGS_syscall32-xen.o += -DUSE_INT80 -AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80 +CFLAGS_syscall32.o += -DUSE_INT80 +AFLAGS_syscall32_syscall.o += -DUSE_INT80 -$(obj)/syscall32_syscall-xen.o: \ - $(foreach F,int80 sysenter syscall,$(obj)/vsyscall-$F.so) - -targets := $(foreach F,int80 sysenter syscall,vsyscall-$F.o vsyscall-$F.so) - -include $(srctree)/scripts/Makefile.xen - -obj-y := $(call cherrypickxen, $(obj-y)) +$(obj)/syscall32_syscall.o: $(obj)/vsyscall-int80.so endif diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/kernel/Makefile --- a/arch/x86_64/kernel/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/kernel/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -57,18 +57,8 @@ msr-$(subst m,y,$(CONFIG_X86_MSR)) += . msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o alternative-y += ../../i386/kernel/alternative.o -ifdef CONFIG_XEN -time-y += ../../i386/kernel/time-xen.o -pci-dma-y += ../../i386/kernel/pci-dma-xen.o -microcode-$(subst m,y,$(CONFIG_MICROCODE)) := ../../i386/kernel/microcode-xen.o -quirks-y := ../../i386/kernel/quirks-xen.o +time-$(CONFIG_XEN) += ../../i386/kernel/time.o +pci-dma-$(CONFIG_XEN) += ../../i386/kernel/pci-dma.o -n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o - -include $(srctree)/scripts/Makefile.xen - -obj-y := $(call filterxen, $(obj-y), $(n-obj-xen)) -obj-y := $(call cherrypickxen, $(obj-y)) -extra-y := $(call cherrypickxen, $(extra-y)) -%/head-xen.o %/head-xen.s: EXTRA_AFLAGS := -endif +disabled-obj-$(CONFIG_XEN) := i8259.o reboot.o smpboot.o trampoline.o +%/head.o %/head.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/kernel/acpi/Makefile --- a/arch/x86_64/kernel/acpi/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/kernel/acpi/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -8,3 +8,7 @@ endif endif boot-$(CONFIG_XEN) := ../../../i386/kernel/acpi/boot-xen.o +ifdef CONFIG_XEN +include $(srctree)/scripts/Makefile.xen +obj-y := $(call cherrypickxen, $(obj-y)) +endif diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/kernel/acpi/sleep-xen.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/x86_64/kernel/acpi/sleep-xen.c Fri Jul 27 08:15:50 2007 -0600 @@ -0,0 +1,162 @@ +/* + * acpi.c - Architecture-Specific Low-Level ACPI Support + * + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx> + * Copyright (C) 2001 Jun Nakajima <jun.nakajima@xxxxxxxxx> + * Copyright (C) 2001 Patrick Mochel <mochel@xxxxxxxx> + * Copyright (C) 2002 Andi Kleen, SuSE Labs (x86-64 port) + * Copyright (C) 2003 Pavel Machek, SuSE Labs + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/bootmem.h> +#include <linux/acpi.h> +#include <linux/cpumask.h> + +#include <asm/mpspec.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/apicdef.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io_apic.h> +#include <asm/proto.h> +#include <asm/tlbflush.h> + +/* -------------------------------------------------------------------------- + Low-Level Sleep Support + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_SLEEP + +/* address in low memory of the wakeup routine. */ +unsigned long acpi_wakeup_address = 0; +unsigned long acpi_video_flags; +extern char wakeup_start, wakeup_end; + +extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); + +static pgd_t low_ptr; + +static void init_low_mapping(void) +{ + pgd_t *slot0 = pgd_offset(current->mm, 0UL); + low_ptr = *slot0; + set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); + WARN_ON(num_online_cpus() != 1); + local_flush_tlb(); +} + +/** + * acpi_save_state_mem - save kernel state + * + * Create an identity mapped page table and copy the wakeup routine to + * low memory. + */ +int acpi_save_state_mem(void) +{ +#ifndef CONFIG_ACPI_PV_SLEEP + init_low_mapping(); + + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); + acpi_copy_wakeup_routine(acpi_wakeup_address); +#endif + return 0; +} + +/* + * acpi_restore_state + */ +void acpi_restore_state_mem(void) +{ +#ifndef CONFIG_ACPI_PV_SLEEP + set_pgd(pgd_offset(current->mm, 0UL), low_ptr); + local_flush_tlb(); +#endif +} + +/** + * acpi_reserve_bootmem - do _very_ early ACPI initialisation + * + * We allocate a page in low memory for the wakeup + * routine for when we come back from a sleep state. The + * runtime allocator allows specification of <16M pages, but not + * <1M pages. + */ +void __init acpi_reserve_bootmem(void) +{ + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); + if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) + printk(KERN_CRIT + "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); +} + +static int __init acpi_sleep_setup(char *str) +{ + while ((str != NULL) && (*str != '\0')) { + if (strncmp(str, "s3_bios", 7) == 0) + acpi_video_flags = 1; + if (strncmp(str, "s3_mode", 7) == 0) + acpi_video_flags |= 2; + str = strchr(str, ','); + if (str != NULL) + str += strspn(str, ", \t"); + } + + return 1; +} + +__setup("acpi_sleep=", acpi_sleep_setup); + +#ifdef CONFIG_ACPI_PV_SLEEP +#include <asm/hypervisor.h> +#include <xen/interface/platform.h> +int acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt, u32 pm1b_cnt) +{ + struct xen_platform_op op = { + .cmd = XENPF_enter_acpi_sleep, + .interface_version = XENPF_INTERFACE_VERSION, + .u = { + .enter_acpi_sleep = { + .pm1a_cnt_val = (u16)pm1a_cnt, + .pm1b_cnt_val = (u16)pm1b_cnt, + .sleep_state = sleep_state, + }, + }, + }; + + return HYPERVISOR_platform_op(&op); +} +#endif /* CONFIG_ACPI_PV_SLEEP */ + +#endif /*CONFIG_ACPI_SLEEP */ + +void acpi_pci_link_exit(void) +{ +} diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/kernel/head-xen.S --- a/arch/x86_64/kernel/head-xen.S Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/kernel/head-xen.S Fri Jul 27 08:15:50 2007 -0600 @@ -37,6 +37,13 @@ startup_64: pushq $0 # fake return address jmp x86_64_start_kernel +#ifdef CONFIG_ACPI_SLEEP +.org 0xf00 + .globl pGDT32 +pGDT32: + .word gdt_end-cpu_gdt_table-1 + .long cpu_gdt_table-__START_KERNEL_map +#endif ENTRY(stext) ENTRY(_stext) @@ -95,6 +102,14 @@ NEXT_PAGE(hypercall_page) CFI_ENDPROC #undef NEXT_PAGE + + .data +/* Just dummy symbol to allow compilation. Not used in sleep path */ +#ifdef CONFIG_ACPI_SLEEP + .align PAGE_SIZE +ENTRY(wakeup_level4_pgt) + .fill 512,8,0 +#endif .data diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/kernel/suspend.c --- a/arch/x86_64/kernel/suspend.c Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/kernel/suspend.c Fri Jul 27 08:15:50 2007 -0600 @@ -114,12 +114,14 @@ void restore_processor_state(void) void fix_processor_context(void) { +#ifndef CONFIG_X86_NO_TSS int cpu = smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9; +#endif syscall_init(); /* This sets MSR_*STAR and related */ load_TR_desc(); /* This does ltr */ diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/mm/Makefile --- a/arch/x86_64/mm/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/mm/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -7,15 +7,8 @@ obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o obj-$(CONFIG_ACPI_NUMA) += srat.o +obj-$(CONFIG_XEN) += hypervisor.o hugetlbpage-y = ../../i386/mm/hugetlbpage.o - -ifdef CONFIG_XEN -include $(srctree)/scripts/Makefile.xen - -ioremap-y += ../../i386/mm/ioremap-xen.o -hypervisor-y += ../../i386/mm/hypervisor.o -obj-y += hypervisor.o - -obj-y := $(call cherrypickxen, $(obj-y)) -endif +ioremap-$(CONFIG_XEN) := ../../i386/mm/ioremap.o +hypervisor-y := ../../i386/mm/hypervisor.o diff -r e6974eba1bae -r b0bf9ba32bfe arch/x86_64/pci/Makefile --- a/arch/x86_64/pci/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/arch/x86_64/pci/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -28,10 +28,3 @@ fixup-y += ../../i386/pci/fixup.o fixup-y += ../../i386/pci/fixup.o i386-y += ../../i386/pci/i386.o init-y += ../../i386/pci/init.o - -ifdef CONFIG_XEN -irq-y := ../../i386/pci/irq-xen.o -include $(srctree)/scripts/Makefile.xen - -obj-y := $(call cherrypickxen, $(obj-y)) -endif diff -r e6974eba1bae -r b0bf9ba32bfe buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Jul 26 14:33:50 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Jul 27 08:15:50 2007 -0600 @@ -202,11 +202,18 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # Power management options (ACPI, APM) # CONFIG_PM=y +# CONFIG_PM_LEGACY is not set +CONFIG_PM_DEBUG=y +# CONFIG_SOFTWARE_SUSPEND is not set +CONFIG_SUSPEND_SMP=y # # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_SLEEP_PROC_FS=y +# CONFIG_ACPI_SLEEP_PROC_SLEEP is not set CONFIG_ACPI_AC=m CONFIG_ACPI_BATTERY=m CONFIG_ACPI_BUTTON=m diff -r e6974eba1bae -r b0bf9ba32bfe buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Thu Jul 26 14:33:50 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Fri Jul 27 08:15:50 2007 -0600 @@ -161,11 +161,18 @@ CONFIG_GENERIC_PENDING_IRQ=y # Power management options # CONFIG_PM=y +# CONFIG_PM_LEGACY is not set +CONFIG_PM_DEBUG=y +# CONFIG_SOFTWARE_SUSPEND is not set +CONFIG_SUSPEND_SMP=y # # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_SLEEP_PROC_FS=y +# CONFIG_ACPI_SLEEP_PROC_SLEEP is not set CONFIG_ACPI_AC=m CONFIG_ACPI_BATTERY=m CONFIG_ACPI_BUTTON=m diff -r e6974eba1bae -r b0bf9ba32bfe drivers/acpi/Kconfig --- a/drivers/acpi/Kconfig Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/acpi/Kconfig Fri Jul 27 08:15:50 2007 -0600 @@ -45,7 +45,7 @@ if ACPI config ACPI_SLEEP bool "Sleep States" - depends on X86 && (!SMP || SUSPEND_SMP) && !XEN + depends on X86 && (!SMP || SUSPEND_SMP) depends on PM default y ---help--- @@ -363,6 +363,10 @@ config ACPI_SBS A "Smart Battery" is quite old and quite rare compared to today's ACPI "Control Method" battery. +config ACPI_PV_SLEEP + bool + depends on X86 && XEN + default y endif # ACPI endmenu diff -r e6974eba1bae -r b0bf9ba32bfe drivers/acpi/hardware/hwsleep.c --- a/drivers/acpi/hardware/hwsleep.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/acpi/hardware/hwsleep.c Fri Jul 27 08:15:50 2007 -0600 @@ -327,6 +327,7 @@ acpi_status asmlinkage acpi_enter_sleep_ ACPI_FLUSH_CPU_CACHE(); +#ifndef CONFIG_ACPI_PV_SLEEP status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK, ACPI_REGISTER_PM1A_CONTROL, PM1Acontrol); @@ -337,6 +338,10 @@ acpi_status asmlinkage acpi_enter_sleep_ status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK, ACPI_REGISTER_PM1B_CONTROL, PM1Bcontrol); +#else + status = acpi_notify_hypervisor_state(sleep_state, + PM1Acontrol, PM1Bcontrol); +#endif if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } diff -r e6974eba1bae -r b0bf9ba32bfe drivers/acpi/sleep/main.c --- a/drivers/acpi/sleep/main.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/acpi/sleep/main.c Fri Jul 27 08:15:50 2007 -0600 @@ -91,7 +91,14 @@ static int acpi_pm_enter(suspend_state_t break; case PM_SUSPEND_MEM: +#ifdef CONFIG_ACPI_PV_SLEEP + /* Hyperviosr will save and restore CPU context + * and then we can skip low level housekeeping here. + */ + acpi_enter_sleep_state(acpi_state); +#else do_suspend_lowlevel(); +#endif break; case PM_SUSPEND_DISK: @@ -145,10 +152,12 @@ static int acpi_pm_finish(suspend_state_ /* reset firmware waking vector */ acpi_set_firmware_waking_vector((acpi_physical_address) 0); +#ifndef CONFIG_ACPI_PV_SLEEP if (init_8259A_after_S1) { printk("Broken toshiba laptop -> kicking interrupts\n"); init_8259A(0); } +#endif return 0; } diff -r e6974eba1bae -r b0bf9ba32bfe drivers/acpi/sleep/poweroff.c --- a/drivers/acpi/sleep/poweroff.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/acpi/sleep/poweroff.c Fri Jul 27 08:15:50 2007 -0600 @@ -20,6 +20,7 @@ int acpi_sleep_prepare(u32 acpi_state) int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP +#ifndef CONFIG_ACPI_PV_SLEEP /* do we have a wakeup address for S2 and S3? */ if (acpi_state == ACPI_STATE_S3) { if (!acpi_wakeup_address) { @@ -30,6 +31,7 @@ int acpi_sleep_prepare(u32 acpi_state) acpi_wakeup_address)); } +#endif ACPI_FLUSH_CPU_CACHE(); acpi_enable_wakeup_device_prep(acpi_state); #endif diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/balloon/balloon.c --- a/drivers/xen/balloon/balloon.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/balloon/balloon.c Fri Jul 27 08:15:50 2007 -0600 @@ -83,7 +83,15 @@ static unsigned long frame_list[PAGE_SIZ /* VM /proc information for memory */ extern unsigned long totalram_pages; + +#ifndef MODULE extern unsigned long totalhigh_pages; +#define inc_totalhigh_pages() (totalhigh_pages++) +#define dec_totalhigh_pages() (totalhigh_pages--) +#else +#define inc_totalhigh_pages() ((void)0) +#define dec_totalhigh_pages() ((void)0) +#endif /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); @@ -119,7 +127,7 @@ static void balloon_append(struct page * if (PageHighMem(page)) { list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); bs.balloon_high++; - totalhigh_pages--; + dec_totalhigh_pages(); } else { list_add(PAGE_TO_LIST(page), &ballooned_pages); bs.balloon_low++; @@ -139,7 +147,7 @@ static struct page *balloon_retrieve(voi if (PageHighMem(page)) { bs.balloon_high--; - totalhigh_pages++; + inc_totalhigh_pages(); } else bs.balloon_low--; diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/blkfront/blkfront.c --- a/drivers/xen/blkfront/blkfront.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/blkfront/blkfront.c Fri Jul 27 08:15:50 2007 -0600 @@ -879,7 +879,7 @@ static struct xenbus_device_id blkfront_ { "vbd" }, { "" } }; - +MODULE_ALIAS("xen:vbd"); static struct xenbus_driver blkfront = { .name = "vbd", diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/blktap/blktap.c --- a/drivers/xen/blktap/blktap.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/blktap/blktap.c Fri Jul 27 08:15:50 2007 -0600 @@ -653,6 +653,7 @@ static int blktap_mmap(struct file *filp struct page **map; int i; tap_blkif_t *info = filp->private_data; + int ret; if (info == NULL) { WPRINTK("blktap: mmap, retrieving idx failed\n"); @@ -674,9 +675,14 @@ static int blktap_mmap(struct file *filp info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); /* Map the ring pages to the start of the region and reserve it. */ - if (remap_pfn_range(vma, vma->vm_start, - __pa(info->ufe_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { + if (xen_feature(XENFEAT_auto_translated_physmap)) + ret = vm_insert_page(vma, vma->vm_start, + virt_to_page(info->ufe_ring.sring)); + else + ret = remap_pfn_range(vma, vma->vm_start, + __pa(info->ufe_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); + if (ret) { WPRINTK("Mapping user ring failed!\n"); goto fail; } diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/char/mem.c --- a/drivers/xen/char/mem.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/char/mem.c Fri Jul 27 08:15:50 2007 -0600 @@ -26,12 +26,13 @@ #include <asm/io.h> #include <asm/hypervisor.h> -#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE -static inline int valid_phys_addr_range(unsigned long addr, size_t count) +static inline int uncached_access(struct file *file) { - return 1; + if (file->f_flags & O_SYNC) + return 1; + /* Xen sets correct MTRR type on non-RAM for us. */ + return 0; } -#endif /* * This funcion reads the *physical* memory. The f_pos points directly to the @@ -44,9 +45,6 @@ static ssize_t read_mem(struct file * fi ssize_t read = 0, sz; void __iomem *v; - if (!valid_phys_addr_range(p, count)) - return -EFAULT; - while (count > 0) { /* * Handle first page in case it's not aligned @@ -58,7 +56,7 @@ static ssize_t read_mem(struct file * fi sz = min_t(unsigned long, sz, count); - v = xen_xlate_dev_mem_ptr(p, sz); + v = ioremap(p, sz); if (IS_ERR(v) || v == NULL) { /* * Some programs (e.g., dmidecode) groove off into @@ -75,7 +73,7 @@ static ssize_t read_mem(struct file * fi } ignored = copy_to_user(buf, v, sz); - xen_xlate_dev_mem_ptr_unmap(v); + iounmap(v); if (ignored) return -EFAULT; buf += sz; @@ -95,9 +93,6 @@ static ssize_t write_mem(struct file * f ssize_t written = 0, sz; void __iomem *v; - if (!valid_phys_addr_range(p, count)) - return -EFAULT; - while (count > 0) { /* * Handle first page in case it's not aligned @@ -109,7 +104,7 @@ static ssize_t write_mem(struct file * f sz = min_t(unsigned long, sz, count); - v = xen_xlate_dev_mem_ptr(p, sz); + v = ioremap(p, sz); if (v == NULL) break; if (IS_ERR(v)) { @@ -119,7 +114,7 @@ static ssize_t write_mem(struct file * f } ignored = copy_from_user(v, buf, sz); - xen_xlate_dev_mem_ptr_unmap(v); + iounmap(v); if (ignored) { written += sz - ignored; if (written) @@ -137,14 +132,6 @@ static ssize_t write_mem(struct file * f } #ifndef ARCH_HAS_DEV_MEM_MMAP_MEM -static inline int uncached_access(struct file *file) -{ - if (file->f_flags & O_SYNC) - return 1; - /* Xen sets correct MTRR type on non-RAM for us. */ - return 0; -} - static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma) { size_t size = vma->vm_end - vma->vm_start; diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/core/gnttab.c --- a/drivers/xen/core/gnttab.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/core/gnttab.c Fri Jul 27 08:15:50 2007 -0600 @@ -65,8 +65,6 @@ static struct grant_entry *shared; static struct gnttab_free_callback *gnttab_free_callback_list; -static DEFINE_SEQLOCK(gnttab_dma_lock); - static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) @@ -186,7 +184,7 @@ int gnttab_end_foreign_access_ref(grant_ nflags = shared[ref].flags; do { if ((flags = nflags) & (GTF_reading|GTF_writing)) { - printk(KERN_ALERT "WARNING: g.e. still in use!\n"); + printk(KERN_DEBUG "WARNING: g.e. still in use!\n"); return 0; } } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) != @@ -206,7 +204,7 @@ void gnttab_end_foreign_access(grant_ref } else { /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ - printk(KERN_WARNING + printk(KERN_DEBUG "WARNING: leaking g.e. and page still in use!\n"); } } @@ -430,6 +428,8 @@ static inline unsigned int max_nr_grant_ #ifdef CONFIG_XEN +static DEFINE_SEQLOCK(gnttab_dma_lock); + #ifdef CONFIG_X86 static int map_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/core/machine_reboot.c --- a/drivers/xen/core/machine_reboot.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/core/machine_reboot.c Fri Jul 27 08:15:50 2007 -0600 @@ -13,7 +13,6 @@ #include <asm/hypervisor.h> #include <xen/xenbus.h> #include <linux/cpu.h> -#include <linux/kthread.h> #include <xen/gnttab.h> #include <xen/xencons.h> #include <xen/cpu_hotplug.h> diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/fbfront/xenfb.c --- a/drivers/xen/fbfront/xenfb.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/fbfront/xenfb.c Fri Jul 27 08:15:50 2007 -0600 @@ -718,6 +718,7 @@ static struct xenbus_device_id xenfb_ids { "vfb" }, { "" } }; +MODULE_ALIAS("xen:vfb"); static struct xenbus_driver xenfb = { .name = "vfb", diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/fbfront/xenkbd.c --- a/drivers/xen/fbfront/xenkbd.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/fbfront/xenkbd.c Fri Jul 27 08:15:50 2007 -0600 @@ -299,6 +299,7 @@ static struct xenbus_device_id xenkbd_id { "vkbd" }, { "" } }; +MODULE_ALIAS("xen:vkbd"); static struct xenbus_driver xenkbd = { .name = "vkbd", diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/netfront/netfront.c --- a/drivers/xen/netfront/netfront.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/netfront/netfront.c Fri Jul 27 08:15:50 2007 -0600 @@ -65,10 +65,6 @@ #include <xen/gnttab.h> #include <xen/hypercall.h> -#ifdef HAVE_XEN_PLATFORM_COMPAT_H -#include <xen/platform-compat.h> -#endif - struct netfront_cb { struct page *page; unsigned offset; @@ -1540,7 +1536,7 @@ static void netif_release_tx_bufs(struct } } -static void netif_release_rx_bufs(struct netfront_info *np) +static void netif_release_rx_bufs_flip(struct netfront_info *np) { struct mmu_update *mmu = np->rx_mmu; struct multicall_entry *mcl = np->rx_mcl; @@ -1549,11 +1545,6 @@ static void netif_release_rx_bufs(struct unsigned long mfn; int xfer = 0, noxfer = 0, unused = 0; int id, ref, rc; - - if (np->copying_receiver) { - WPRINTK("%s: fix me for copying receiver.\n", __FUNCTION__); - return; - } skb_queue_head_init(&free_list); @@ -1601,7 +1592,7 @@ static void netif_release_rx_bufs(struct xfer++; } - IPRINTK("%s: %d xfer, %d noxfer, %d unused\n", + DPRINTK("%s: %d xfer, %d noxfer, %d unused\n", __FUNCTION__, xfer, noxfer, unused); if (xfer) { @@ -1628,6 +1619,45 @@ static void netif_release_rx_bufs(struct spin_unlock_bh(&np->rx_lock); } +static void netif_release_rx_bufs_copy(struct netfront_info *np) +{ + struct sk_buff *skb; + int i, ref; + int busy = 0, inuse = 0; + + spin_lock_bh(&np->rx_lock); + + for (i = 0; i < NET_RX_RING_SIZE; i++) { + ref = np->grant_rx_ref[i]; + + if (ref == GRANT_INVALID_REF) + continue; + + inuse++; + + skb = np->rx_skbs[i]; + + if (!gnttab_end_foreign_access_ref(ref, 0)) + { + busy++; + continue; + } + + gnttab_release_grant_reference(&np->gref_rx_head, ref); + np->grant_rx_ref[i] = GRANT_INVALID_REF; + add_id_to_freelist(np->rx_skbs, i); + + skb_shinfo(skb)->nr_frags = 0; + dev_kfree_skb(skb); + } + + if (busy) + DPRINTK("%s: Unable to release %d of %d inuse grant references out of %ld total.\n", + __FUNCTION__, busy, inuse, NET_RX_RING_SIZE); + + spin_unlock_bh(&np->rx_lock); +} + static int network_close(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); @@ -1821,7 +1851,10 @@ static void netif_uninit(struct net_devi { struct netfront_info *np = netdev_priv(dev); netif_release_tx_bufs(np); - netif_release_rx_bufs(np); + if (np->copying_receiver) + netif_release_rx_bufs_copy(np); + else + netif_release_rx_bufs_flip(np); gnttab_free_grant_references(np->gref_tx_head); gnttab_free_grant_references(np->gref_rx_head); } @@ -2118,6 +2151,7 @@ static struct xenbus_device_id netfront_ { "vif" }, { "" } }; +MODULE_ALIAS("xen:vif"); static struct xenbus_driver netfront = { diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/netfront/netfront.h --- a/drivers/xen/netfront/netfront.h Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/netfront/netfront.h Fri Jul 27 08:15:50 2007 -0600 @@ -44,6 +44,10 @@ #include <xen/xenbus.h> +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif + /* * Function pointer table for hooks into a network acceleration * plugin. These are called at appropriate points from the netfront diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/pciback/conf_space_header.c --- a/drivers/xen/pciback/conf_space_header.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/pciback/conf_space_header.c Fri Jul 27 08:15:50 2007 -0600 @@ -75,8 +75,15 @@ static int rom_write(struct pci_dev *dev */ if (value == ~PCI_ROM_ADDRESS_ENABLE) bar->which = 1; - else + else { + u32 tmpval; + pci_read_config_dword(dev, offset, &tmpval); + if (tmpval != bar->val && value == bar->val) { + /* Allow restoration of bar value. */ + pci_write_config_dword(dev, offset, bar->val); + } bar->which = 0; + } /* Do we need to support enabling/disabling the rom address here? */ @@ -102,8 +109,15 @@ static int bar_write(struct pci_dev *dev */ if (value == ~0) bar->which = 1; - else + else { + u32 tmpval; + pci_read_config_dword(dev, offset, &tmpval); + if (tmpval != bar->val && value == bar->val) { + /* Allow restoration of bar value. */ + pci_write_config_dword(dev, offset, bar->val); + } bar->which = 0; + } return 0; } diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/pcifront/xenbus.c --- a/drivers/xen/pcifront/xenbus.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/pcifront/xenbus.c Fri Jul 27 08:15:50 2007 -0600 @@ -273,6 +273,7 @@ static struct xenbus_device_id xenpci_id {"pci"}, {{0}}, }; +MODULE_ALIAS("xen:pci"); static struct xenbus_driver xenbus_pcifront_driver = { .name = "pcifront", diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/privcmd/privcmd.c --- a/drivers/xen/privcmd/privcmd.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/privcmd/privcmd.c Fri Jul 27 08:15:50 2007 -0600 @@ -18,7 +18,6 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/seq_file.h> -#include <linux/kthread.h> #include <asm/hypervisor.h> #include <asm/pgalloc.h> diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/xenbus/xenbus_probe.c --- a/drivers/xen/xenbus/xenbus_probe.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/xenbus/xenbus_probe.c Fri Jul 27 08:15:50 2007 -0600 @@ -42,7 +42,6 @@ #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/notifier.h> -#include <linux/kthread.h> #include <linux/mutex.h> #include <asm/io.h> @@ -166,6 +165,30 @@ static int read_backend_details(struct x return read_otherend_details(xendev, "backend-id", "backend"); } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +static int xenbus_uevent_frontend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + struct xenbus_device *xdev; + int length = 0, i = 0; + + if (dev == NULL) + return -ENODEV; + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "MODALIAS=xen:%s", xdev->devicetype); + + return 0; +} +#endif /* Bus type for frontend drivers. */ static struct xen_bus_type xenbus_frontend = { @@ -181,6 +204,7 @@ static struct xen_bus_type xenbus_fronte .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_frontend, #endif }, .dev = { @@ -210,6 +234,7 @@ static void otherend_changed(struct xenb DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state), dev->otherend_watch.node, vec[XS_WATCH_PATH]); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) /* * Ignore xenbus transitions during shutdown. This prevents us doing * work that can fail e.g., when the rootfs is gone. @@ -223,6 +248,7 @@ static void otherend_changed(struct xenb xenbus_frontend_closed(dev); return; } +#endif if (drv->otherend_changed) drv->otherend_changed(dev, state); @@ -1008,7 +1034,7 @@ static int is_disconnected_device(struct return 0; xendrv = to_xenbus_driver(dev->driver); - return (xendev->state != XenbusStateConnected || + return (xendev->state < XenbusStateConnected || (xendrv->is_ready && !xendrv->is_ready(xendev))); } @@ -1033,10 +1059,13 @@ static int print_device_status(struct de /* Information only: is this too noisy? */ printk(KERN_INFO "XENBUS: Device with no driver: %s\n", xendev->nodename); - } else if (xendev->state != XenbusStateConnected) { + } else if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); printk(KERN_WARNING "XENBUS: Timeout connecting " - "to device: %s (state %d)\n", - xendev->nodename, xendev->state); + "to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); } return 0; @@ -1046,7 +1075,7 @@ static int ready_to_wait_for_devices; static int ready_to_wait_for_devices; /* - * On a 10 second timeout, wait for all devices currently configured. We need + * On a 30-second timeout, wait for all devices currently configured. We need * to do this to guarantee that the filesystems and / or network devices * needed for boot are available, before we can allow the boot to proceed. * @@ -1061,17 +1090,29 @@ static int ready_to_wait_for_devices; */ static void wait_for_devices(struct xenbus_driver *xendrv) { - unsigned long timeout = jiffies + 10*HZ; + unsigned long start = jiffies; struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; if (!ready_to_wait_for_devices || !is_running_on_xen()) return; while (exists_disconnected_device(drv)) { - if (time_after(jiffies, timeout)) - break; + if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { + if (!seconds_waited) + printk(KERN_WARNING "XENBUS: Waiting for " + "devices to initialise: "); + seconds_waited += 5; + printk("%us...", 30 - seconds_waited); + if (seconds_waited == 30) + break; + } + schedule_timeout_interruptible(HZ/10); } + + if (seconds_waited) + printk("\n"); bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, print_device_status); diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/xenbus/xenbus_probe_backend.c --- a/drivers/xen/xenbus/xenbus_probe_backend.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/xenbus/xenbus_probe_backend.c Fri Jul 27 08:15:50 2007 -0600 @@ -42,7 +42,6 @@ #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/notifier.h> -#include <linux/kthread.h> #include <asm/io.h> #include <asm/page.h> diff -r e6974eba1bae -r b0bf9ba32bfe drivers/xen/xenbus/xenbus_xs.c --- a/drivers/xen/xenbus/xenbus_xs.c Thu Jul 26 14:33:50 2007 -0600 +++ b/drivers/xen/xenbus/xenbus_xs.c Fri Jul 27 08:15:50 2007 -0600 @@ -51,6 +51,10 @@ #include <xen/platform-compat.h> #endif +#ifndef PF_NOFREEZE /* Old kernel (pre-2.6.6). */ +#define PF_NOFREEZE 0 +#endif + struct xs_stored_msg { struct list_head list; @@ -802,8 +806,8 @@ static int process_msg(void) msg->u.watch.vec = split(body, msg->hdr.len, &msg->u.watch.vec_size); if (IS_ERR(msg->u.watch.vec)) { + err = PTR_ERR(msg->u.watch.vec); kfree(msg); - err = PTR_ERR(msg->u.watch.vec); goto out; } diff -r e6974eba1bae -r b0bf9ba32bfe include/asm-i386/acpi.h --- a/include/asm-i386/acpi.h Thu Jul 26 14:33:50 2007 -0600 +++ b/include/asm-i386/acpi.h Fri Jul 27 08:15:50 2007 -0600 @@ -177,6 +177,10 @@ extern unsigned long acpi_wakeup_address /* early initialization routine */ extern void acpi_reserve_bootmem(void); +#ifdef CONFIG_ACPI_PV_SLEEP +extern int acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt, u32 pm1b_cnt); +#endif /* CONFIG_ACPI_PV_SLEEP */ #endif /*CONFIG_ACPI_SLEEP*/ extern u8 x86_acpiid_to_apicid[]; diff -r e6974eba1bae -r b0bf9ba32bfe include/asm-i386/mach-xen/asm/gnttab_dma.h --- a/include/asm-i386/mach-xen/asm/gnttab_dma.h Thu Jul 26 14:33:50 2007 -0600 +++ b/include/asm-i386/mach-xen/asm/gnttab_dma.h Fri Jul 27 08:15:50 2007 -0600 @@ -30,7 +30,7 @@ static inline maddr_t gnttab_dma_map_pag static inline maddr_t gnttab_dma_map_page(struct page *page) { __gnttab_dma_map_page(page); - return page_to_bus(page); + return ((maddr_t)pfn_to_mfn(page_to_pfn(page)) << PAGE_SHIFT); } static inline void gnttab_dma_unmap_page(maddr_t maddr) diff -r e6974eba1bae -r b0bf9ba32bfe include/asm-i386/mach-xen/asm/io.h --- a/include/asm-i386/mach-xen/asm/io.h Thu Jul 26 14:33:50 2007 -0600 +++ b/include/asm-i386/mach-xen/asm/io.h Fri Jul 27 08:15:50 2007 -0600 @@ -53,8 +53,7 @@ * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access */ -#define xen_xlate_dev_mem_ptr(p, sz) ioremap(p, sz) -#define xen_xlate_dev_mem_ptr_unmap(p) iounmap(p) +#define xlate_dev_mem_ptr(p) __va(p) /* * Convert a virtual cached pointer to an uncached pointer diff -r e6974eba1bae -r b0bf9ba32bfe include/asm-powerpc/io.h --- a/include/asm-powerpc/io.h Thu Jul 26 14:33:50 2007 -0600 +++ b/include/asm-powerpc/io.h Fri Jul 27 08:15:50 2007 -0600 @@ -457,9 +457,6 @@ out: */ #define xlate_dev_mem_ptr(p) __va(p) -#define xen_xlate_dev_mem_ptr(p,sz) xlate_dev_mem_ptr(p) -#define xen_xlate_dev_mem_ptr_unmap(p) - /* * Convert a virtual cached pointer to an uncached pointer */ diff -r e6974eba1bae -r b0bf9ba32bfe include/asm-x86_64/acpi.h --- a/include/asm-x86_64/acpi.h Thu Jul 26 14:33:50 2007 -0600 +++ b/include/asm-x86_64/acpi.h Fri Jul 27 08:15:50 2007 -0600 @@ -153,6 +153,10 @@ extern unsigned long acpi_wakeup_address /* early initialization routine */ extern void acpi_reserve_bootmem(void); +#ifdef CONFIG_ACPI_PV_SLEEP +extern int acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt, u32 pm1b_cnt); +#endif /* CONFIG_ACPI_PV_SLEEP */ #endif /*CONFIG_ACPI_SLEEP*/ #define boot_cpu_physical_apicid boot_cpu_id diff -r e6974eba1bae -r b0bf9ba32bfe include/asm-x86_64/mach-xen/asm/io.h --- a/include/asm-x86_64/mach-xen/asm/io.h Thu Jul 26 14:33:50 2007 -0600 +++ b/include/asm-x86_64/mach-xen/asm/io.h Fri Jul 27 08:15:50 2007 -0600 @@ -315,8 +315,7 @@ extern int iommu_bio_merge; * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access */ -#define xen_xlate_dev_mem_ptr(p, sz) ioremap(p, sz) -#define xen_xlate_dev_mem_ptr_unmap(p) iounmap(p) +#define xlate_dev_mem_ptr(p) __va(p) /* * Convert a virtual cached pointer to an uncached pointer diff -r e6974eba1bae -r b0bf9ba32bfe include/xen/interface/platform.h --- a/include/xen/interface/platform.h Thu Jul 26 14:33:50 2007 -0600 +++ b/include/xen/interface/platform.h Fri Jul 27 08:15:50 2007 -0600 @@ -153,6 +153,17 @@ typedef struct xenpf_firmware_info xenpf typedef struct xenpf_firmware_info xenpf_firmware_info_t; DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); +#define XENPF_enter_acpi_sleep 51 +struct xenpf_enter_acpi_sleep { + /* IN variables */ + uint16_t pm1a_cnt_val; /* PM1a control value. */ + uint16_t pm1b_cnt_val; /* PM1b control value. */ + uint32_t sleep_state; /* Which state to enter (Sn). */ + uint32_t flags; /* Must be zero. */ +}; +typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -164,6 +175,7 @@ struct xen_platform_op { struct xenpf_microcode_update microcode; struct xenpf_platform_quirk platform_quirk; struct xenpf_firmware_info firmware_info; + struct xenpf_enter_acpi_sleep enter_acpi_sleep; uint8_t pad[128]; } u; }; diff -r e6974eba1bae -r b0bf9ba32bfe lib/Makefile --- a/lib/Makefile Thu Jul 26 14:33:50 2007 -0600 +++ b/lib/Makefile Fri Jul 27 08:15:50 2007 -0600 @@ -52,9 +52,6 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -ifeq ($(CONFIG_X86),y) -swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o -endif ifeq ($(CONFIG_IA64),y) swiotlb-$(CONFIG_XEN) := ../arch/ia64/xen/swiotlb.o endif diff -r e6974eba1bae -r b0bf9ba32bfe lib/swiotlb-xen.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/swiotlb-xen.c Fri Jul 27 08:15:50 2007 -0600 @@ -0,0 +1,745 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is a fallback for platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@xxxxxxxxx> + * Copyright (C) 2000 Goutham Rao <goutham.rao@xxxxxxxxx> + * Copyright (C) 2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * Copyright (C) 2005 Keir Fraser <keir@xxxxxxxxxxxxx> + */ + +#include <linux/cache.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <asm/io.h> +#include <asm/pci.h> +#include <asm/dma.h> +#include <asm/uaccess.h> +#include <xen/gnttab.h> +#include <xen/interface/memory.h> +#include <asm-i386/mach-xen/asm/gnttab_dma.h> + +int swiotlb; +EXPORT_SYMBOL(swiotlb); + +#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) + +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define IO_TLB_SEGSIZE 128 + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + +int swiotlb_force; + +static char *iotlb_virt_start; +static unsigned long iotlb_nslabs; + +/* + * Used to do a quick range check in swiotlb_unmap_single and + * swiotlb_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ +static unsigned long iotlb_pfn_start, iotlb_pfn_end; + +/* Does the given dma address reside within the swiotlb aperture? */ +static inline int in_swiotlb_aperture(dma_addr_t dev_addr) +{ + unsigned long pfn = mfn_to_local_pfn(dev_addr >> PAGE_SHIFT); + return (pfn_valid(pfn) + && (pfn >= iotlb_pfn_start) + && (pfn < iotlb_pfn_end)); +} + +/* + * When the IOMMU overflows we return a fallback buffer. This sets the size. + */ +static unsigned long io_tlb_overflow = 32*1024; + +void *io_tlb_overflow_buffer; + +/* + * This is a free list describing the number of free entries available from + * each index + */ +static unsigned int *io_tlb_list; +static unsigned int io_tlb_index; + +/* + * We need to save away the original address corresponding to a mapped entry + * for the sync operations. + */ +static struct phys_addr { + struct page *page; + unsigned int offset; +} *io_tlb_orig_addr; + +/* + * Protect the above data structures in the map and unmap calls + */ +static DEFINE_SPINLOCK(io_tlb_lock); + +static unsigned int dma_bits; +static unsigned int __initdata max_dma_bits = 32; +static int __init +setup_dma_bits(char *str) +{ + max_dma_bits = simple_strtoul(str, NULL, 0); + return 0; +} +__setup("dma_bits=", setup_dma_bits); + +static int __init +setup_io_tlb_npages(char *str) +{ + /* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */ + if (isdigit(*str)) { + iotlb_nslabs = simple_strtoul(str, &str, 0) << + (20 - IO_TLB_SHIFT); + iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); + /* Round up to power of two (xen_create_contiguous_region). */ + while (iotlb_nslabs & (iotlb_nslabs-1)) + iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); + } + if (*str == ',') + ++str; + /* + * NB. 'force' enables the swiotlb, but doesn't force its use for + * every DMA like it does on native Linux. 'off' forcibly disables + * use of the swiotlb. + */ + if (!strcmp(str, "force")) + swiotlb_force = 1; + else if (!strcmp(str, "off")) + swiotlb_force = -1; + return 1; +} +__setup("swiotlb=", setup_io_tlb_npages); +/* make io_tlb_overflow tunable too? */ + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the PCI DMA API. + */ +void +swiotlb_init_with_default_size (size_t default_size) +{ + unsigned long i, bytes; + int rc; + + if (!iotlb_nslabs) { + iotlb_nslabs = (default_size >> IO_TLB_SHIFT); + iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); + /* Round up to power of two (xen_create_contiguous_region). */ + while (iotlb_nslabs & (iotlb_nslabs-1)) + iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); + } + + bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT); + + /* + * Get IO TLB memory from the low pages + */ + iotlb_virt_start = alloc_bootmem_low_pages(bytes); + if (!iotlb_virt_start) + panic("Cannot allocate SWIOTLB buffer!\n"); + + dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; + for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) { + do { + rc = xen_create_contiguous_region( + (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), + get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); + if (rc) { + if (i == 0) + panic("No suitable physical memory available for SWIOTLB buffer!\n" + "Use dom0_mem Xen boot parameter to reserve\n" + "some DMA memory (e.g., dom0_mem=-128M).\n"); + iotlb_nslabs = i; + i <<= IO_TLB_SHIFT; + free_bootmem(__pa(iotlb_virt_start + i), bytes - i); + bytes = i; + for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) { + unsigned int bits = fls64(virt_to_bus(iotlb_virt_start + i - 1)); + + if (bits > dma_bits) + dma_bits = bits; + } + break; + } + } + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE. + */ + io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int)); + for (i = 0; i < iotlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; + io_tlb_orig_addr = alloc_bootmem( + iotlb_nslabs * sizeof(*io_tlb_orig_addr)); + + /* + * Get the overflow emergency buffer + */ + io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + if (!io_tlb_overflow_buffer) + panic("Cannot allocate SWIOTLB overflow buffer!\n"); + + do { + rc = xen_create_contiguous_region( + (unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); + if (rc) + panic("No suitable physical memory available for SWIOTLB overflow buffer!\n"); + + iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT; + iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT); + + printk(KERN_INFO "Software IO TLB enabled: \n" + " Aperture: %lu megabytes\n" + " Kernel range: %p - %p\n" + " Address size: %u bits\n", + bytes >> 20, + iotlb_virt_start, iotlb_virt_start + bytes, + dma_bits); +} + +void +swiotlb_init(void) +{ + long ram_end; + size_t defsz = 64 * (1 << 20); /* 64MB default size */ + + if (swiotlb_force == 1) { + swiotlb = 1; + } else if ((swiotlb_force != -1) && + is_running_on_xen() && + is_initial_xendomain()) { + /* Domain 0 always has a swiotlb. */ + ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); + if (ram_end <= 0x7ffff) + defsz = 2 * (1 << 20); /* 2MB on <2GB on systems. */ + swiotlb = 1; + } + + if (swiotlb) + swiotlb_init_with_default_size(defsz); + else + printk(KERN_INFO "Software IO TLB disabled\n"); +} + +/* + * We use __copy_to_user_inatomic to transfer to the host buffer because the + * buffer may be mapped read-only (e.g, in blkback driver) but lower-level + * drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an + * unnecessary copy from the aperture to the host buffer, and a page fault. + */ +static void +__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir) +{ + if (PageHighMem(buffer.page)) { + size_t len, bytes; + char *dev, *host, *kmp; + len = size; + while (len != 0) { + unsigned long flags; + + if (((bytes = len) + buffer.offset) > PAGE_SIZE) + bytes = PAGE_SIZE - buffer.offset; + local_irq_save(flags); /* protects KM_BOUNCE_READ */ + kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ); + dev = dma_addr + size - len; + host = kmp + buffer.offset; + if (dir == DMA_FROM_DEVICE) { + if (__copy_to_user_inatomic(host, dev, bytes)) + /* inaccessible */; + } else + memcpy(dev, host, bytes); + kunmap_atomic(kmp, KM_BOUNCE_READ); + local_irq_restore(flags); + len -= bytes; + buffer.page++; + buffer.offset = 0; + } + } else { + char *host = (char *)phys_to_virt( + page_to_pseudophys(buffer.page)) + buffer.offset; + if (dir == DMA_FROM_DEVICE) { + if (__copy_to_user_inatomic(host, dma_addr, size)) + /* inaccessible */; + } else if (dir == DMA_TO_DEVICE) + memcpy(dma_addr, host, size); + } +} + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ +static void * +map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir) +{ + unsigned long flags; + char *dma_addr; + unsigned int nslots, stride, index, wrap; + struct phys_addr slot_buf; + int i; + + /* + * For mappings greater than a page, we limit the stride (and + * hence alignment) to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + if (size > PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = 1; + + BUG_ON(!nslots); + + /* + * Find suitable number of IO TLB entries size that will fit this + * request and allocate a buffer from that IO TLB pool. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + wrap = index = ALIGN(io_tlb_index, stride); + + if (index >= iotlb_nslabs) + wrap = index = 0; + + do { + /* + * If we find a slot that indicates we have 'nslots' + * number of contiguous buffers, we allocate the + * buffers from that slot and mark the entries as '0' + * indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + int count = 0; + + for (i = index; i < (int)(index + nslots); i++) + io_tlb_list[i] = 0; + for (i = index - 1; + (OFFSET(i, IO_TLB_SEGSIZE) != + IO_TLB_SEGSIZE -1) && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + dma_addr = iotlb_virt_start + + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in + * the next round. + */ + io_tlb_index = + ((index + nslots) < iotlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; + if (index >= iotlb_nslabs) + index = 0; + } while (index != wrap); + + spin_unlock_irqrestore(&io_tlb_lock, flags); + return NULL; + } + found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + slot_buf = buffer; + for (i = 0; i < nslots; i++) { + slot_buf.page += slot_buf.offset >> PAGE_SHIFT; + slot_buf.offset &= PAGE_SIZE - 1; + io_tlb_orig_addr[index+i] = slot_buf; + slot_buf.offset += 1 << IO_TLB_SHIFT; + } + if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL)) + __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); + + return dma_addr; +} + +static struct phys_addr dma_addr_to_phys_addr(char *dma_addr) +{ + int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT; + struct phys_addr buffer = io_tlb_orig_addr[index]; + buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1); + buffer.page += buffer.offset >> PAGE_SHIFT; + buffer.offset &= PAGE_SIZE - 1; + return buffer; +} + +/* + * dma_addr is the kernel virtual address of the bounce buffer to unmap. + */ +static void +unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +{ + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT; + struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr); + + /* + * First, sync the memory before unmapping the entry + */ + if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)) + __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contigous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? + io_tlb_list[index + nslots] : 0); + /* + * Step 1: return the slots to the free list, merging the + * slots with superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) + io_tlb_list[i] = ++count; + /* + * Step 2: merge the returned slots with the preceding slots, + * if available (non zero) + */ + for (i = index - 1; + (OFFSET(i, IO_TLB_SEGSIZE) != + IO_TLB_SEGSIZE -1) && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + } + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +static void +sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +{ + struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr); + BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE)); + __sync_single(buffer, dma_addr, size, dir); +} + +static void +swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +{ + /* + * Ran out of IOMMU space for this operation. This is very bad. + * Unfortunately the drivers cannot handle this operation properly. + * unless they check for pci_dma_mapping_error (most don't) + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " + "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); + + if (size > io_tlb_overflow && do_panic) { + if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic("PCI-DMA: Memory would be corrupted\n"); + if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic("PCI-DMA: Random memory would be DMAed\n"); + } +} + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. The + * PCI address to use is returned. + * + * Once the device is given the dma address, the device owns this memory until + * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. + */ +dma_addr_t +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) +{ + dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) + + offset_in_page(ptr); + void *map; + struct phys_addr buffer; + + BUG_ON(dir == DMA_NONE); + + /* + * If the pointer passed in happens to be in the device's DMA window, + * we can safely return the device addr and not worry about bounce + * buffering it. + */ + if (!range_straddles_page_boundary(__pa(ptr), size) && + !address_needs_mapping(hwdev, dev_addr)) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + gnttab_dma_unmap_page(dev_addr); + buffer.page = virt_to_page(ptr); + buffer.offset = (unsigned long)ptr & ~PAGE_MASK; + map = map_single(hwdev, buffer, size, dir); + if (!map) { + swiotlb_full(hwdev, size, dir, 1); + map = io_tlb_overflow_buffer; + } + + dev_addr = virt_to_bus(map); + return dev_addr; +} + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size must + * match what was provided for in a previous swiotlb_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +void +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, + int dir) +{ + BUG_ON(dir == DMA_NONE); + if (in_swiotlb_aperture(dev_addr)) + unmap_single(hwdev, bus_to_virt(dev_addr), size, dir); + else + gnttab_dma_unmap_page(dev_addr); +} + +/* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. + * + * If you perform a swiotlb_map_single() but wish to interrogate the buffer + * using the cpu, yet do not wish to teardown the PCI dma mapping, you must + * call this function before doing so. At the next point you give the PCI dma + * address back to the card, you must first perform a + * swiotlb_dma_sync_for_device, and then the device again owns the buffer + */ +void +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + BUG_ON(dir == DMA_NONE); + if (in_swiotlb_aperture(dev_addr)) + sync_single(hwdev, bus_to_virt(dev_addr), size, dir); +} + +void +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + BUG_ON(dir == DMA_NONE); + if (in_swiotlb_aperture(dev_addr)) + sync_single(hwdev, bus_to_virt(dev_addr), size, dir); +} + +/* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above swiotlb_map_single + * interface. Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for swiotlb_map_single are the + * same here. + */ +int +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + struct phys_addr buffer; + dma_addr_t dev_addr; + char *map; + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) { + dev_addr = gnttab_dma_map_page(sg->page) + sg->offset; + + if (range_straddles_page_boundary(page_to_pseudophys(sg->page) + + sg->offset, sg->length) + || address_needs_mapping(hwdev, dev_addr)) { + gnttab_dma_unmap_page(dev_addr); + buffer.page = sg->page; + buffer.offset = sg->offset; + map = map_single(hwdev, buffer, sg->length, dir); + if (!map) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ + swiotlb_full(hwdev, sg->length, dir, 0); + swiotlb_unmap_sg(hwdev, sg - i, i, dir); + sg[0].dma_length = 0; + return 0; + } + sg->dma_address = (dma_addr_t)virt_to_bus(map); + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; + } + return nelems; +} + +/* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules + * concerning calls here are the same as for swiotlb_unmap_single() above. + */ +void +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) + unmap_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); + else + gnttab_dma_unmap_page(sg->dma_address); +} + +/* + * Make physical memory consistent for a set of streaming mode DMA translations + * after a transfer. + * + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules + * and usage. + */ +void +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) + sync_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); +} + +void +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) + sync_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); +} + +#ifdef CONFIG_HIGHMEM + +dma_addr_t +swiotlb_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + struct phys_addr buffer; + dma_addr_t dev_addr; + char *map; + + dev_addr = gnttab_dma_map_page(page) + offset; + if (address_needs_mapping(hwdev, dev_addr)) { + gnttab_dma_unmap_page(dev_addr); + buffer.page = page; + buffer.offset = offset; + map = map_single(hwdev, buffer, size, direction); + if (!map) { + swiotlb_full(hwdev, size, direction, 1); + map = io_tlb_overflow_buffer; + } + dev_addr = (dma_addr_t)virt_to_bus(map); + } + + return dev_addr; +} + +void +swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + if (in_swiotlb_aperture(dma_address)) + unmap_single(hwdev, bus_to_virt(dma_address), size, direction); + else + gnttab_dma_unmap_page(dma_address); +} + +#endif + +int +swiotlb_dma_mapping_error(dma_addr_t dma_addr) +{ + return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); +} + +/* + * Return whether the given PCI device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during PCI bus mastering, then you would pass 0x00ffffff as the mask to + * this function. + */ +int +swiotlb_dma_supported (struct device *hwdev, u64 mask) +{ + return (mask >= ((1UL << dma_bits) - 1)); +} + +EXPORT_SYMBOL(swiotlb_init); +EXPORT_SYMBOL(swiotlb_map_single); +EXPORT_SYMBOL(swiotlb_unmap_single); +EXPORT_SYMBOL(swiotlb_map_sg); +EXPORT_SYMBOL(swiotlb_unmap_sg); +EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_single_for_device); +EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_sg_for_device); +EXPORT_SYMBOL(swiotlb_dma_mapping_error); +EXPORT_SYMBOL(swiotlb_dma_supported); diff -r e6974eba1bae -r b0bf9ba32bfe scripts/Makefile.build --- a/scripts/Makefile.build Thu Jul 26 14:33:50 2007 -0600 +++ b/scripts/Makefile.build Fri Jul 27 08:15:50 2007 -0600 @@ -66,6 +66,18 @@ endif ifndef obj $(warning kbuild: Makefile.build is included improperly) +endif + +ifeq ($(CONFIG_XEN),y) +$(objtree)/scripts/Makefile.xen: $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build + @echo ' Updating $@' + @$(AWK) -f $< $(filter-out $<,$^) >$@ + +xen-src-single-used-m := $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c)))) +xen-single-used-m := $(xen-src-single-used-m:-xen.c=.o) +single-used-m := $(filter-out $(xen-single-used-m),$(single-used-m)) + +-include $(objtree)/scripts/Makefile.xen endif # =========================================================================== diff -r e6974eba1bae -r b0bf9ba32bfe scripts/Makefile.lib --- a/scripts/Makefile.lib Thu Jul 26 14:33:50 2007 -0600 +++ b/scripts/Makefile.lib Fri Jul 27 08:15:50 2007 -0600 @@ -12,6 +12,12 @@ obj-m := $(filter-out $(obj-y),$(obj-m)) # Filter out objects already built-in lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m))) + +# Remove objects forcibly disabled + +obj-y := $(filter-out $(disabled-obj-y),$(obj-y)) +obj-m := $(filter-out $(disabled-obj-y),$(obj-m)) +lib-y := $(filter-out $(disabled-obj-y),$(lib-y)) # Handle objects in subdirs diff -r e6974eba1bae -r b0bf9ba32bfe scripts/Makefile.xen.awk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/Makefile.xen.awk Fri Jul 27 08:15:50 2007 -0600 @@ -0,0 +1,34 @@ +BEGIN { + is_rule = 0 +} + +/^[[:space:]]*#/ { + next +} + +/^[[:space:]]*$/ { + if (is_rule) + print("") + is_rule = 0 + next +} + +/:[[:space:]]*%\.[cS][[:space:]]/ { + line = gensub(/%.([cS])/, "%-xen.\\1", "g", $0) + line = gensub(/(single-used-m)/, "xen-\\1", "g", line) + print line + is_rule = 1 + next +} + +/^[^\t]$/ { + if (is_rule) + print("") + is_rule = 0 + next +} + +is_rule { + print $0 + next +} _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |