[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge.
# HG changeset patch # User adsharma@xxxxxxxxxxxxxxxxxxxx # Node ID 3a8f27c6d56c7632db60b81e6a2d2aa529c2749c # Parent 483ac5017c9c9a5cf3de5f50e219b08d9bd5b581 # Parent 2052ce3345c1441f96729bc59120b3352a995112 Merge. diff -r 483ac5017c9c -r 3a8f27c6d56c Makefile --- a/Makefile Sat Aug 20 00:47:24 2005 +++ b/Makefile Mon Aug 22 18:00:37 2005 @@ -172,6 +172,12 @@ rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm rm -rf $(D)/boot/*xen* rm -rf $(D)/lib/modules/*xen* + rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen + rm -rf $(D)/usr/bin/xc_shadow + rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen + rm -rf $(D)/usr/share/man/man1/xen* + rm -rf $(D)/usr/share/man/man8/xen* + rm -rf $(D)/usr/lib/xen # Legacy targets for compatibility linux24: diff -r 483ac5017c9c -r 3a8f27c6d56c buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Sat Aug 20 00:47:24 2005 +++ b/buildconfigs/Rules.mk Mon Aug 22 18:00:37 2005 @@ -66,6 +66,7 @@ PATCHDIRS := $(wildcard patches/*-*) +ifneq ($(PATCHDIRS),) -include $(patsubst %,%/.makedep,$(PATCHDIRS)) $(patsubst patches/%,patches/%/.makedep,$(PATCHDIRS)): patches/%/.makedep: @@ -80,6 +81,7 @@ ([ -d patches/$* ] && \ for i in patches/$*/*.patch ; do ( cd $(@D) ; patch -p1 <../$$i || exit 1 ) ; done) || true touch $@ # update timestamp to avoid rebuild +endif %-build: $(MAKE) -f buildconfigs/mk.$* build diff -r 483ac5017c9c -r 3a8f27c6d56c extras/mini-os/include/time.h --- a/extras/mini-os/include/time.h Sat Aug 20 00:47:24 2005 +++ b/extras/mini-os/include/time.h Mon Aug 22 18:00:37 2005 @@ -28,7 +28,7 @@ * of real time into system time */ typedef s64 s_time_t; -#define NOW() ((s_time_t)get_s_time()) +#define NOW() ((s_time_t)monotonic_clock()) #define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) #define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) #define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL ) @@ -36,7 +36,8 @@ #define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL ) #define Time_Max ((s_time_t) 0x7fffffffffffffffLL) #define FOREVER Time_Max - +#define NSEC_TO_USEC(_nsec) (_nsec / 1000UL) +#define NSEC_TO_SEC(_nsec) (_nsec / 1000000000ULL) /* wall clock time */ typedef long time_t; @@ -44,6 +45,11 @@ struct timeval { time_t tv_sec; /* seconds */ suseconds_t tv_usec; /* microseconds */ +}; + +struct timespec { + time_t ts_sec; + long ts_nsec; }; diff -r 483ac5017c9c -r 3a8f27c6d56c extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Sat Aug 20 00:47:24 2005 +++ b/extras/mini-os/kernel.c Mon Aug 22 18:00:37 2005 @@ -132,20 +132,6 @@ i = 0; for ( ; ; ) { - if(i >= 1000) - { - { - unsigned long saved; - __asm__ ("movl %%esp, %0" - :"=r"(saved) /* y is output operand */ - /* x is input operand */); -// :"a"); /* %eax is clobbered register */ - printk("ESP=0x%lx\n", saved); - } - - printk("1000 bloks\n"); - i=0; - } // HYPERVISOR_yield(); block(1); i++; diff -r 483ac5017c9c -r 3a8f27c6d56c extras/mini-os/time.c --- a/extras/mini-os/time.c Sat Aug 20 00:47:24 2005 +++ b/extras/mini-os/time.c Mon Aug 22 18:00:37 2005 @@ -43,19 +43,20 @@ * Time functions *************************************************************************/ -/* Cached *multiplier* to convert TSC counts to microseconds. - * (see the equation below). - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ -static unsigned long fast_gettimeoffset_quotient; - - /* These are peridically updated in shared_info, and then copied here. */ -static u32 shadow_tsc_stamp; -static s64 shadow_system_time; -static u32 shadow_time_version; -static struct timeval shadow_tv; +struct shadow_time_info { + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_timestamp; /* Time, in nanosecs, since boot. */ + u32 tsc_to_nsec_mul; + u32 tsc_to_usec_mul; + int tsc_shift; + u32 version; +}; +static struct timespec shadow_ts; +static u32 shadow_ts_version; + +static struct shadow_time_info shadow; + #ifndef rmb #define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") @@ -63,116 +64,150 @@ #define HANDLE_USEC_OVERFLOW(_tv) \ do { \ - while ( (_tv).tv_usec >= 1000000 ) \ + while ( (_tv)->tv_usec >= 1000000 ) \ { \ - (_tv).tv_usec -= 1000000; \ - (_tv).tv_sec++; \ + (_tv)->tv_usec -= 1000000; \ + (_tv)->tv_sec++; \ } \ } while ( 0 ) +static inline int time_values_up_to_date(void) +{ + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_time[0]; + + return (shadow.version == src->version); +} + + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#endif + + if ( shift < 0 ) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "add %4,%%eax ; " + "xor %5,%5 ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#else + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); +#endif + + return product; +} + + +static unsigned long get_nsec_offset(void) +{ + u64 now, delta; + rdtscll(now); + delta = now - shadow.tsc_timestamp; + return scale_delta(delta, shadow.tsc_to_nsec_mul, shadow.tsc_shift); +} + + static void get_time_values_from_xen(void) { - do { - shadow_time_version = HYPERVISOR_shared_info->time_version2; - rmb(); - shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec; - shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec; - shadow_tsc_stamp = (u32)HYPERVISOR_shared_info->tsc_timestamp; - shadow_system_time = HYPERVISOR_shared_info->system_time; - rmb(); - } - while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 ); -} - - -#define TIME_VALUES_UP_TO_DATE \ - (shadow_time_version == HYPERVISOR_shared_info->time_version2) - -static u32 get_time_delta_usecs(void) -{ - register unsigned long eax, edx; - - /* Read the Time Stamp Counter */ - - rdtsc(eax,edx); - - /* .. relative to previous jiffy (32 bits is enough) */ - eax -= shadow_tsc_stamp; - - /* - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient - * = (tsc_low delta) * (usecs_per_clock) - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) - * - * Using a mull instead of a divl saves up to 31 clock cycles - * in the critical path. - */ - - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - - /* our adjusted time offset in microseconds */ - return edx; -} - -s64 get_s_time (void) -{ - u64 u_delta; - s64 ret; - - again: - - u_delta = get_time_delta_usecs(); - ret = shadow_system_time + (1000 * u_delta); - - if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) - { - /* - * We may have blocked for a long time, rendering our calculations - * invalid (e.g. the time delta may have overflowed). Detect that - * and recalculate with fresh values. - */ - get_time_values_from_xen(); - goto again; - } - - return ret; -} + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_time[0]; + + do { + shadow.version = src->version; + rmb(); + shadow.tsc_timestamp = src->tsc_timestamp; + shadow.system_timestamp = src->system_time; + shadow.tsc_to_nsec_mul = src->tsc_to_system_mul; + shadow.tsc_shift = src->tsc_shift; + rmb(); + } + while ((src->version & 1) | (shadow.version ^ src->version)); + + shadow.tsc_to_usec_mul = shadow.tsc_to_nsec_mul / 1000; +} + + + + +/* monotonic_clock(): returns # of nanoseconds passed since time_init() + * Note: This function is required to return accurate + * time even in the absence of multiple timer ticks. + */ +u64 monotonic_clock(void) +{ + u64 time; + u32 local_time_version; + + do { + local_time_version = shadow.version; + rmb(); + time = shadow.system_timestamp + get_nsec_offset(); + if (!time_values_up_to_date()) + get_time_values_from_xen(); + rmb(); + } while (local_time_version != shadow.version); + + return time; +} + +static void update_wallclock(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + + do { + shadow_ts_version = s->wc_version; + rmb(); + shadow_ts.ts_sec = s->wc_sec; + shadow_ts.ts_nsec = s->wc_nsec; + rmb(); + } + while ((s->wc_version & 1) | (shadow_ts_version ^ s->wc_version)); +} + void gettimeofday(struct timeval *tv) { - struct timeval _tv; - - do { - get_time_values_from_xen(); - _tv.tv_usec = get_time_delta_usecs(); - _tv.tv_sec = shadow_tv.tv_sec; - _tv.tv_usec += shadow_tv.tv_usec; - } - while ( unlikely(!TIME_VALUES_UP_TO_DATE) ); - - HANDLE_USEC_OVERFLOW(_tv); - *tv = _tv; -} + u64 nsec = monotonic_clock(); + nsec += shadow_ts.ts_nsec; + + + tv->tv_sec = shadow_ts.ts_sec; + tv->tv_sec += NSEC_TO_SEC(nsec); + tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL); +} + static void print_current_time(void) { - struct timeval tv; - - get_time_values_from_xen(); + struct timeval tv; gettimeofday(&tv); printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec); } + void block(u32 millisecs) { struct timeval tv; gettimeofday(&tv); - //printk("tv.tv_sec=%ld, tv.tv_usec=%ld, shadow_system_time=%lld\n", tv.tv_sec, tv.tv_usec, shadow_system_time ); - HYPERVISOR_set_timer_op(get_s_time() + 1000000LL * (s64) millisecs); + HYPERVISOR_set_timer_op(monotonic_clock() + 1000000LL * (s64) millisecs); HYPERVISOR_block(); } @@ -185,7 +220,7 @@ static int i; get_time_values_from_xen(); - + update_wallclock(); i++; if (i >= 1000) { print_current_time(); @@ -197,24 +232,5 @@ void init_time(void) { - u64 __cpu_khz; - unsigned long cpu_khz; - - __cpu_khz = HYPERVISOR_shared_info->cpu_freq; - - cpu_khz = (u32) (__cpu_khz/1000); - - printk("Xen reported: %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz = - (2^32 * 1 / (clocks/us)) */ - { - unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (fast_gettimeoffset_quotient), "=d" (edx) - :"r" (cpu_khz), - "0" (eax), "1" (edx)); - } - bind_virq(VIRQ_TIMER, &timer_handler); } diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/Kconfig Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Mon Aug 22 18:00:37 2005 @@ -61,15 +61,6 @@ with the blktap. This option will be removed as the block drivers are modified to use grant tables. -config XEN_BLKDEV_GRANT - bool "Grant table substrate for block drivers" - depends on !XEN_BLKDEV_TAP_BE - default y - help - This introduces the use of grant tables as a data exhange mechanism - between the frontend and backend block drivers. This currently - conflicts with the block tap. - config XEN_NETDEV_BACKEND bool "Network-device backend driver" depends on XEN_PHYSDEV_ACCESS diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Mon Aug 22 18:00:37 2005 @@ -14,7 +14,6 @@ CONFIG_XEN_PHYSDEV_ACCESS=y CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set -CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_NETDEV_BACKEND=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Mon Aug 22 18:00:37 2005 @@ -14,7 +14,6 @@ CONFIG_XEN_PHYSDEV_ACCESS=y CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set -CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_NETDEV_BACKEND=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Mon Aug 22 18:00:37 2005 @@ -12,7 +12,6 @@ # # CONFIG_XEN_PRIVILEGED_GUEST is not set # CONFIG_XEN_PHYSDEV_ACCESS is not set -CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y CONFIG_XEN_NETDEV_GRANT_TX=y diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Mon Aug 22 18:00:37 2005 @@ -12,7 +12,6 @@ # # CONFIG_XEN_PRIVILEGED_GUEST is not set # CONFIG_XEN_PHYSDEV_ACCESS is not set -CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y CONFIG_XEN_NETDEV_GRANT_TX=y diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Mon Aug 22 18:00:37 2005 @@ -14,7 +14,6 @@ CONFIG_XEN_PHYSDEV_ACCESS=y CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set -CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_NETDEV_BACKEND=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Mon Aug 22 18:00:37 2005 @@ -14,7 +14,6 @@ CONFIG_XEN_PHYSDEV_ACCESS=y CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set -CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_NETDEV_BACKEND=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Mon Aug 22 18:00:37 2005 @@ -923,7 +923,8 @@ ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); - printk("boot error: %ld\n", boot_error); + if (boot_error) + printk("boot error: %ld\n", boot_error); if (!boot_error) { /* diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Mon Aug 22 18:00:37 2005 @@ -43,20 +43,18 @@ #define IO_TLB_SHIFT 11 int swiotlb_force; +static char *iotlb_virt_start; +static unsigned long iotlb_nslabs; /* * Used to do a quick range check in swiotlb_unmap_single and * swiotlb_sync_single_*, to see if the memory was in fact allocated by this * API. */ -static char *iotlb_virt_start, *iotlb_virt_end; -static dma_addr_t iotlb_bus_start, iotlb_bus_end; - -/* - * The number of IO TLB blocks (in groups of 64) betweeen iotlb_virt_start and - * iotlb_virt_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long iotlb_nslabs; +static dma_addr_t iotlb_bus_start, iotlb_bus_mask; + +/* Does the given dma address reside within the swiotlb aperture? */ +#define in_swiotlb_aperture(a) (!(((a) ^ iotlb_bus_start) & iotlb_bus_mask)) /* * When the IOMMU overflows we return a fallback buffer. This sets the size. @@ -94,6 +92,9 @@ iotlb_nslabs = simple_strtoul(str, &str, 0) << (20 - IO_TLB_SHIFT); iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); + /* Round up to power of two (xen_create_contiguous_region). */ + while (iotlb_nslabs & (iotlb_nslabs-1)) + iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); } if (*str == ',') ++str; @@ -120,6 +121,9 @@ if (!iotlb_nslabs) { iotlb_nslabs = (default_size >> IO_TLB_SHIFT); iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); + /* Round up to power of two (xen_create_contiguous_region). */ + while (iotlb_nslabs & (iotlb_nslabs-1)) + iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); } bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT); @@ -133,17 +137,12 @@ "Use dom0_mem Xen boot parameter to reserve\n" "some DMA memory (e.g., dom0_mem=-128M).\n"); - for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) - xen_create_contiguous_region( - (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), - get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT)); - - iotlb_virt_end = iotlb_virt_start + bytes; + xen_create_contiguous_region( + (unsigned long)iotlb_virt_start, get_order(bytes)); /* * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between iotlb_virt_start and iotlb_virt_end. + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE. */ io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int)); for (i = 0; i < iotlb_nslabs; i++) @@ -156,15 +155,19 @@ * Get the overflow emergency buffer */ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + iotlb_bus_start = virt_to_bus(iotlb_virt_start); - iotlb_bus_end = iotlb_bus_start + bytes; + iotlb_bus_mask = ~(dma_addr_t)(bytes - 1); + printk(KERN_INFO "Software IO TLB enabled: \n" " Aperture: %lu megabytes\n" " Bus range: 0x%016lx - 0x%016lx\n" " Kernel range: 0x%016lx - 0x%016lx\n", bytes >> 20, - (unsigned long)iotlb_bus_start, (unsigned long)iotlb_bus_end, - (unsigned long)iotlb_virt_start, (unsigned long)iotlb_virt_end); + (unsigned long)iotlb_bus_start, + (unsigned long)iotlb_bus_start + bytes, + (unsigned long)iotlb_virt_start, + (unsigned long)iotlb_virt_start + bytes); } void @@ -444,7 +447,7 @@ int dir) { BUG_ON(dir == DMA_NONE); - if ((dev_addr >= iotlb_bus_start) && (dev_addr < iotlb_bus_end)) + if (in_swiotlb_aperture(dev_addr)) unmap_single(hwdev, bus_to_virt(dev_addr), size, dir); } @@ -463,7 +466,7 @@ size_t size, int dir) { BUG_ON(dir == DMA_NONE); - if ((dev_addr >= iotlb_bus_start) && (dev_addr < iotlb_bus_end)) + if (in_swiotlb_aperture(dev_addr)) sync_single(hwdev, bus_to_virt(dev_addr), size, dir); } @@ -472,7 +475,7 @@ size_t size, int dir) { BUG_ON(dir == DMA_NONE); - if ((dev_addr >= iotlb_bus_start) && (dev_addr < iotlb_bus_end)) + if (in_swiotlb_aperture(dev_addr)) sync_single(hwdev, bus_to_virt(dev_addr), size, dir); } @@ -610,7 +613,7 @@ size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); - if ((dma_address >= iotlb_bus_start) && (dma_address < iotlb_bus_end)) + if (in_swiotlb_aperture(dma_address)) unmap_single(hwdev, bus_to_virt(dma_address), size, direction); } diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Mon Aug 22 18:00:37 2005 @@ -405,54 +405,6 @@ balloon_unlock(flags); } - -unsigned long allocate_empty_lowmem_region(unsigned long pages) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long *pfn_array; - unsigned long vstart; - unsigned long i; - unsigned int order = get_order(pages*PAGE_SIZE); - - vstart = __get_free_pages(GFP_KERNEL, order); - if (vstart == 0) - return 0UL; - - scrub_pages(vstart, 1 << order); - - pfn_array = vmalloc((1<<order) * sizeof(*pfn_array)); - BUG_ON(pfn_array == NULL); - - for (i = 0; i < (1<<order); i++) { - pgd = pgd_offset_k( (vstart + (i*PAGE_SIZE))); - pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE))); - pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE))); - pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); - pfn_array[i] = pte_mfn(*pte); -#ifdef CONFIG_X86_64 - xen_l1_entry_update(pte, __pte(0)); -#else - BUG_ON(HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), - __pte_ma(0), 0)); -#endif - phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = - INVALID_P2M_ENTRY; - } - - flush_tlb_all(); - - balloon_put_pages(pfn_array, 1 << order); - - vfree(pfn_array); - - return vstart; -} - -EXPORT_SYMBOL(allocate_empty_lowmem_region); - /* * Local variables: * c-file-style: "linux" diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/i386/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Mon Aug 22 18:00:37 2005 @@ -352,13 +352,6 @@ swapper_pg_dir = pgd_base; init_mm.pgd = pgd_base; -#ifdef CONFIG_X86_PAE - int i; - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); -#endif - /* Enable PSE if available */ if (cpu_has_pse) { set_in_cr4(X86_CR4_PSE); @@ -383,17 +376,6 @@ page_table_range_init(vaddr, 0, pgd_base); permanent_kmaps_init(pgd_base); - -#if 0 /* def CONFIG_X86_PAE */ - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); -#endif } #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND) diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Mon Aug 22 18:00:37 2005 @@ -17,7 +17,7 @@ c-pci-$(CONFIG_X86_VISWS) := visws.o fixup.o pci-$(CONFIG_X86_VISWS) := c-pci-$(CONFIG_X86_NUMAQ) := numa.o -pci-$(CONFIG_X86_NUMAQ) := irq.o +l-pci-$(CONFIG_X86_NUMAQ) := irq.o obj-y += $(pci-y) c-obj-y += $(c-pci-y) common.o @@ -27,6 +27,7 @@ $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)): @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@ -obj-y += $(c-obj-y) $(l-pci-y) +# Make sure irq.o gets linked in before common.o +obj-y += $(patsubst common.o,$(l-pci-y) common.o,$(c-obj-y)) clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link)) diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/kernel/reboot.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Mon Aug 22 18:00:37 2005 @@ -143,13 +143,8 @@ #define usbif_resume() do{}while(0) #endif -#ifdef CONFIG_XEN_BLKDEV_GRANT extern int gnttab_suspend(void); extern int gnttab_resume(void); -#else -#define gnttab_suspend() do{}while(0) -#define gnttab_resume() do{}while(0) -#endif #ifdef CONFIG_SMP extern void smp_suspend(void); diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile Mon Aug 22 18:00:37 2005 @@ -36,8 +36,8 @@ $(call if_changed,syscall) AFLAGS_vsyscall-int80.o = -m32 -I$(obj) -AFLAGS_vsyscall-sysenter.o = -m32 -AFLAGS_vsyscall-syscall.o = -m32 +AFLAGS_vsyscall-sysenter.o = -m32 -I$(obj) +AFLAGS_vsyscall-syscall.o = -m32 -I$(obj) CFLAGS_ia32_ioctl.o += -Ifs/ s-link := vsyscall-syscall.o vsyscall-sysenter.o vsyscall-sigreturn.o @@ -48,13 +48,11 @@ $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)): @ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@ -$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S ../../i386/kernel/vsyscall-note.S -$(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S -$(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S +$(obj)/vsyscall-int80.o $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-syscall.o: \ + $(obj)/vsyscall-sigreturn.S $(obj)/../../i386/kernel/vsyscall-note.S -../../i386/kernel/vsyscall-note.S: - @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $(srctree)/arch/xen/i386/kernel/$(notdir $@) - make -C arch/xen/i386/kernel vsyscall-note.S +$(obj)/../../i386/kernel/vsyscall-note.S: + @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@ obj-y += $(c-obj-y) $(s-obj-y) diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c Mon Aug 22 18:00:37 2005 @@ -210,15 +210,16 @@ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } -extern void __set_fixmap_user (enum fixed_addresses, unsigned long, pgprot_t); - +#ifdef CONFIG_XEN static void __init map_vsyscall_user(void) { + extern void __set_fixmap_user(enum fixed_addresses, unsigned long, pgprot_t); extern char __vsyscall_0; unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); __set_fixmap_user(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } +#endif static int __init vsyscall_init(void) { @@ -227,7 +228,10 @@ BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); map_vsyscall(); - map_vsyscall_user(); /* establish tranlation for user address space */ +#ifdef CONFIG_XEN + map_vsyscall_user(); + sysctl_vsyscall = 0; /* disable vgettimeofay() */ +#endif #ifdef CONFIG_SYSCTL register_sysctl_table(kernel_root_table2, 0); #endif diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Mon Aug 22 18:00:37 2005 @@ -30,8 +30,9 @@ $(patsubst %.o,$(obj)/%.c,$(c-i386-obj-y)): @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@ -obj-y += $(c-i386-obj-y) $(c-obj-y) -obj-y += $(c-xen-obj-y) +# Make sure irq.o gets linked in before common.o +obj-y += $(patsubst common.o,$(c-xen-obj-y) common.o,$(c-i386-obj-y)) +obj-y += $(c-obj-y) clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link)) clean-files += $(patsubst %.o,%.c,$(c-i386-obj-y) $(c-i386-obj-)) diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Aug 22 18:00:37 2005 @@ -83,12 +83,15 @@ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) /* Use the private and mapping fields of struct page as a list. */ -#define PAGE_TO_LIST(p) ( (struct list_head *)&p->private ) -#define LIST_TO_PAGE(l) ( list_entry( ((unsigned long *)l), \ - struct page, private ) ) -#define UNLIST_PAGE(p) do { list_del(PAGE_TO_LIST(p)); \ - p->mapping = NULL; \ - p->private = 0; } while(0) +#define PAGE_TO_LIST(p) ((struct list_head *)&p->private) +#define LIST_TO_PAGE(l) \ + (list_entry(((unsigned long *)l), struct page, private)) +#define UNLIST_PAGE(p) \ + do { \ + list_del(PAGE_TO_LIST(p)); \ + p->mapping = NULL; \ + p->private = 0; \ + } while(0) #else /* There's a dedicated list field in struct page we can use. */ #define PAGE_TO_LIST(p) ( &p->list ) @@ -104,56 +107,53 @@ #endif #define IPRINTK(fmt, args...) \ - printk(KERN_INFO "xen_mem: " fmt, ##args) + printk(KERN_INFO "xen_mem: " fmt, ##args) #define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "xen_mem: " fmt, ##args) + printk(KERN_WARNING "xen_mem: " fmt, ##args) /* balloon_append: add the given page to the balloon. */ static void balloon_append(struct page *page) { - /* Low memory is re-populated first, so highmem pages go at list tail. */ - if ( PageHighMem(page) ) - { - list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); - balloon_high++; - } - else - { - list_add(PAGE_TO_LIST(page), &ballooned_pages); - balloon_low++; - } + /* Lowmem is re-populated first, so highmem pages go at list tail. */ + if (PageHighMem(page)) { + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); + balloon_high++; + } else { + list_add(PAGE_TO_LIST(page), &ballooned_pages); + balloon_low++; + } } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ static struct page *balloon_retrieve(void) { - struct page *page; - - if ( list_empty(&ballooned_pages) ) - return NULL; - - page = LIST_TO_PAGE(ballooned_pages.next); - UNLIST_PAGE(page); - - if ( PageHighMem(page) ) - balloon_high--; - else - balloon_low--; - - return page; + struct page *page; + + if (list_empty(&ballooned_pages)) + return NULL; + + page = LIST_TO_PAGE(ballooned_pages.next); + UNLIST_PAGE(page); + + if (PageHighMem(page)) + balloon_high--; + else + balloon_low--; + + return page; } static void balloon_alarm(unsigned long unused) { - schedule_work(&balloon_worker); + schedule_work(&balloon_worker); } static unsigned long current_target(void) { - unsigned long target = min(target_pages, hard_limit); - if ( target > (current_pages + balloon_low + balloon_high) ) - target = current_pages + balloon_low + balloon_high; - return target; + unsigned long target = min(target_pages, hard_limit); + if (target > (current_pages + balloon_low + balloon_high)) + target = current_pages + balloon_low + balloon_high; + return target; } /* @@ -164,161 +164,147 @@ */ static void balloon_process(void *unused) { - unsigned long *mfn_list, pfn, i, flags; - struct page *page; - long credit, debt, rc; - void *v; - - down(&balloon_mutex); + unsigned long *mfn_list, pfn, i, flags; + struct page *page; + long credit, debt, rc; + void *v; + + down(&balloon_mutex); retry: - mfn_list = NULL; - - if ( (credit = current_target() - current_pages) > 0 ) - { - mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list)); - if ( mfn_list == NULL ) - goto out; - - balloon_lock(flags); - rc = HYPERVISOR_dom_mem_op( - MEMOP_increase_reservation, mfn_list, credit, 0); - balloon_unlock(flags); - if ( rc < credit ) - { - /* We hit the Xen hard limit: reprobe. */ - if ( HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, mfn_list, rc, 0) != rc ) - BUG(); - hard_limit = current_pages + rc - driver_pages; - vfree(mfn_list); - goto retry; - } - - for ( i = 0; i < credit; i++ ) - { - if ( (page = balloon_retrieve()) == NULL ) - BUG(); - - pfn = page - mem_map; - if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY ) - BUG(); - - /* Update P->M and M->P tables. */ - phys_to_machine_mapping[pfn] = mfn_list[i]; - xen_machphys_update(mfn_list[i], pfn); + mfn_list = NULL; + + if ((credit = current_target() - current_pages) > 0) { + mfn_list = vmalloc(credit * sizeof(*mfn_list)); + if (mfn_list == NULL) + goto out; + + balloon_lock(flags); + rc = HYPERVISOR_dom_mem_op( + MEMOP_increase_reservation, mfn_list, credit, 0); + balloon_unlock(flags); + if (rc < credit) { + /* We hit the Xen hard limit: reprobe. */ + BUG_ON(HYPERVISOR_dom_mem_op( + MEMOP_decrease_reservation, + mfn_list, rc, 0) != rc); + hard_limit = current_pages + rc - driver_pages; + vfree(mfn_list); + goto retry; + } + + for (i = 0; i < credit; i++) { + page = balloon_retrieve(); + BUG_ON(page == NULL); + + pfn = page - mem_map; + if (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) + BUG(); + + /* Update P->M and M->P tables. */ + phys_to_machine_mapping[pfn] = mfn_list[i]; + xen_machphys_update(mfn_list[i], pfn); - /* Link back into the page tables if it's not a highmem page. */ - if ( pfn < max_low_pfn ) - { - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - pfn_pte_ma(mfn_list[i], PAGE_KERNEL), 0)); - } - - /* Finally, relinquish the memory back to the system allocator. */ - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - } - - current_pages += credit; - } - else if ( credit < 0 ) - { - debt = -credit; - - mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list)); - if ( mfn_list == NULL ) - goto out; - - for ( i = 0; i < debt; i++ ) - { - if ( (page = alloc_page(GFP_HIGHUSER)) == NULL ) - { - debt = i; - break; - } - - pfn = page - mem_map; - mfn_list[i] = phys_to_machine_mapping[pfn]; - - if ( !PageHighMem(page) ) - { - v = phys_to_virt(pfn << PAGE_SHIFT); - scrub_pages(v, 1); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)v, __pte_ma(0), 0)); - } + /* Link back into the page tables if not highmem. */ + if (pfn < max_low_pfn) + BUG_ON(HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + pfn_pte_ma(mfn_list[i], PAGE_KERNEL), + 0)); + + /* Relinquish the page back to the allocator. */ + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + } + + current_pages += credit; + } else if (credit < 0) { + debt = -credit; + + mfn_list = vmalloc(debt * sizeof(*mfn_list)); + if (mfn_list == NULL) + goto out; + + for (i = 0; i < debt; i++) { + if ((page = alloc_page(GFP_HIGHUSER)) == NULL) { + debt = i; + break; + } + + pfn = page - mem_map; + mfn_list[i] = phys_to_machine_mapping[pfn]; + + if (!PageHighMem(page)) { + v = phys_to_virt(pfn << PAGE_SHIFT); + scrub_pages(v, 1); + BUG_ON(HYPERVISOR_update_va_mapping( + (unsigned long)v, __pte_ma(0), 0)); + } #ifdef CONFIG_XEN_SCRUB_PAGES - else - { - v = kmap(page); - scrub_pages(v, 1); - kunmap(page); - } + else { + v = kmap(page); + scrub_pages(v, 1); + kunmap(page); + } #endif - } - - /* Ensure that ballooned highmem pages don't have cached mappings. */ - kmap_flush_unused(); - flush_tlb_all(); - - /* No more mappings: invalidate pages in P2M and add to balloon. */ - for ( i = 0; i < debt; i++ ) - { - pfn = mfn_to_pfn(mfn_list[i]); - phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; - balloon_append(pfn_to_page(pfn)); - } - - if ( HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, mfn_list, debt, 0) != debt ) - BUG(); - - current_pages -= debt; - } + } + + /* Ensure that ballooned highmem pages don't have kmaps. */ + kmap_flush_unused(); + flush_tlb_all(); + + /* No more mappings: invalidate P2M and add to balloon. */ + for (i = 0; i < debt; i++) { + pfn = mfn_to_pfn(mfn_list[i]); + phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; + balloon_append(pfn_to_page(pfn)); + } + + BUG_ON(HYPERVISOR_dom_mem_op( + MEMOP_decrease_reservation,mfn_list, debt, 0) != debt); + + current_pages -= debt; + } out: - if ( mfn_list != NULL ) - vfree(mfn_list); - - /* Schedule more work if there is some still to be done. */ - if ( current_target() != current_pages ) - mod_timer(&balloon_timer, jiffies + HZ); - - up(&balloon_mutex); + if (mfn_list != NULL) + vfree(mfn_list); + + /* Schedule more work if there is some still to be done. */ + if (current_target() != current_pages) + mod_timer(&balloon_timer, jiffies + HZ); + + up(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ static void set_new_target(unsigned long target) { - /* No need for lock. Not read-modify-write updates. */ - hard_limit = ~0UL; - target_pages = target; - schedule_work(&balloon_worker); + /* No need for lock. Not read-modify-write updates. */ + hard_limit = ~0UL; + target_pages = target; + schedule_work(&balloon_worker); } static struct xenbus_watch target_watch = { - .node = "memory/target" + .node = "memory/target" }; /* React to a change in the target key */ static void watch_target(struct xenbus_watch *watch, const char *node) { - unsigned long new_target; - int err; - - err = xenbus_scanf("memory", "target", "%lu", &new_target); + unsigned long new_target; + int err; + + err = xenbus_scanf("memory", "target", "%lu", &new_target); + if (err != 1) { + printk(KERN_ERR "Unable to read memory/target\n"); + return; + } - if(err != 1) - { - printk(KERN_ERR "Unable to read memory/target\n"); - return; - } - - set_new_target(new_target >> PAGE_SHIFT); + set_new_target(new_target >> PAGE_SHIFT); } @@ -329,141 +315,185 @@ unsigned long event, void *data) { - int err; - - BUG_ON(down_trylock(&xenbus_lock) == 0); - - err = register_xenbus_watch(&target_watch); - - if (err) { - printk(KERN_ERR "Failed to set balloon watcher\n"); - } - - return NOTIFY_DONE; + int err; + + BUG_ON(down_trylock(&xenbus_lock) == 0); + + err = register_xenbus_watch(&target_watch); + if (err) + printk(KERN_ERR "Failed to set balloon watcher\n"); + + return NOTIFY_DONE; } static int balloon_write(struct file *file, const char __user *buffer, unsigned long count, void *data) { - char memstring[64], *endchar; - unsigned long long target_bytes; - - if ( !capable(CAP_SYS_ADMIN) ) - return -EPERM; - - if ( count <= 1 ) - return -EBADMSG; /* runt */ - if ( count > sizeof(memstring) ) - return -EFBIG; /* too long */ - - if ( copy_from_user(memstring, buffer, count) ) - return -EFAULT; - memstring[sizeof(memstring)-1] = '\0'; - - target_bytes = memparse(memstring, &endchar); - set_new_target(target_bytes >> PAGE_SHIFT); - - return count; + char memstring[64], *endchar; + unsigned long long target_bytes; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 1) + return -EBADMSG; /* runt */ + if (count > sizeof(memstring)) + return -EFBIG; /* too long */ + + if (copy_from_user(memstring, buffer, count)) + return -EFAULT; + memstring[sizeof(memstring)-1] = '\0'; + + target_bytes = memparse(memstring, &endchar); + set_new_target(target_bytes >> PAGE_SHIFT); + + return count; } static int balloon_read(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len; - - len = sprintf( - page, - "Current allocation: %8lu kB\n" - "Requested target: %8lu kB\n" - "Low-mem balloon: %8lu kB\n" - "High-mem balloon: %8lu kB\n" - "Xen hard limit: ", - PAGES2KB(current_pages), PAGES2KB(target_pages), - PAGES2KB(balloon_low), PAGES2KB(balloon_high)); - - if ( hard_limit != ~0UL ) - len += sprintf( - page + len, - "%8lu kB (inc. %8lu kB driver headroom)\n", - PAGES2KB(hard_limit), PAGES2KB(driver_pages)); - else - len += sprintf( - page + len, - " ??? kB\n"); - - *eof = 1; - return len; + int len; + + len = sprintf( + page, + "Current allocation: %8lu kB\n" + "Requested target: %8lu kB\n" + "Low-mem balloon: %8lu kB\n" + "High-mem balloon: %8lu kB\n" + "Xen hard limit: ", + PAGES2KB(current_pages), PAGES2KB(target_pages), + PAGES2KB(balloon_low), PAGES2KB(balloon_high)); + + if (hard_limit != ~0UL) { + len += sprintf( + page + len, + "%8lu kB (inc. %8lu kB driver headroom)\n", + PAGES2KB(hard_limit), PAGES2KB(driver_pages)); + } else { + len += sprintf( + page + len, + " ??? kB\n"); + } + + *eof = 1; + return len; } static struct notifier_block xenstore_notifier; static int __init balloon_init(void) { - unsigned long pfn; - struct page *page; - - IPRINTK("Initialising balloon driver.\n"); - - current_pages = min(xen_start_info.nr_pages, max_pfn); - target_pages = current_pages; - balloon_low = 0; - balloon_high = 0; - driver_pages = 0UL; - hard_limit = ~0UL; - - init_timer(&balloon_timer); - balloon_timer.data = 0; - balloon_timer.function = balloon_alarm; + unsigned long pfn; + struct page *page; + + IPRINTK("Initialising balloon driver.\n"); + + current_pages = min(xen_start_info.nr_pages, max_pfn); + target_pages = current_pages; + balloon_low = 0; + balloon_high = 0; + driver_pages = 0UL; + hard_limit = ~0UL; + + init_timer(&balloon_timer); + balloon_timer.data = 0; + balloon_timer.function = balloon_alarm; - if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL ) - { - WPRINTK("Unable to create /proc/xen/balloon.\n"); - return -1; - } - - balloon_pde->read_proc = balloon_read; - balloon_pde->write_proc = balloon_write; + if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) { + WPRINTK("Unable to create /proc/xen/balloon.\n"); + return -1; + } + + balloon_pde->read_proc = balloon_read; + balloon_pde->write_proc = balloon_write; - /* Initialise the balloon with excess memory space. */ - for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ ) - { - page = &mem_map[pfn]; - if ( !PageReserved(page) ) - balloon_append(page); - } - - target_watch.callback = watch_target; - xenstore_notifier.notifier_call = balloon_init_watcher; - - register_xenstore_notifier(&xenstore_notifier); + /* Initialise the balloon with excess memory space. */ + for (pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++) { + page = &mem_map[pfn]; + if (!PageReserved(page)) + balloon_append(page); + } + + target_watch.callback = watch_target; + xenstore_notifier.notifier_call = balloon_init_watcher; + + register_xenstore_notifier(&xenstore_notifier); - return 0; + return 0; } subsys_initcall(balloon_init); void balloon_update_driver_allowance(long delta) { - unsigned long flags; - balloon_lock(flags); - driver_pages += delta; /* non-atomic update */ - balloon_unlock(flags); -} - -void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns) -{ - unsigned long flags; - - balloon_lock(flags); - if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, - mfn_list, nr_mfns, 0) != nr_mfns ) - BUG(); - current_pages -= nr_mfns; /* non-atomic update */ - balloon_unlock(flags); - - schedule_work(&balloon_worker); + unsigned long flags; + balloon_lock(flags); + driver_pages += delta; /* non-atomic update */ + balloon_unlock(flags); +} + +struct page *balloon_alloc_empty_page_range(unsigned long nr_pages) +{ + int f(pte_t *pte, struct page *pte_page, + unsigned long addr, void *data) + { + unsigned long mfn = pte_mfn(*pte); + set_pte(pte, __pte_ma(0)); + phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] = + INVALID_P2M_ENTRY; + BUG_ON(HYPERVISOR_dom_mem_op( + MEMOP_decrease_reservation, &mfn, 1, 0) != 1); + return 0; + } + + unsigned long vstart, flags; + unsigned int order = get_order(nr_pages * PAGE_SIZE); + + vstart = __get_free_pages(GFP_KERNEL, order); + if (vstart == 0) + return NULL; + + scrub_pages(vstart, 1 << order); + + balloon_lock(flags); + BUG_ON(generic_page_range( + &init_mm, vstart, PAGE_SIZE << order, f, NULL) != 0); + current_pages -= 1UL << order; + balloon_unlock(flags); + + schedule_work(&balloon_worker); + + flush_tlb_all(); + + return virt_to_page(vstart); +} + +void balloon_dealloc_empty_page_range( + struct page *page, unsigned long nr_pages) +{ + unsigned long i, flags; + unsigned int order = get_order(nr_pages * PAGE_SIZE); + + balloon_lock(flags); + for (i = 0; i < (1UL << order); i++) + balloon_append(page + i); + balloon_unlock(flags); + + schedule_work(&balloon_worker); } EXPORT_SYMBOL(balloon_update_driver_allowance); -EXPORT_SYMBOL(balloon_put_pages); +EXPORT_SYMBOL(balloon_alloc_empty_page_range); +EXPORT_SYMBOL(balloon_dealloc_empty_page_range); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Aug 22 18:00:37 2005 @@ -80,7 +80,6 @@ } #endif -#ifdef CONFIG_XEN_BLKDEV_GRANT /* When using grant tables to map a frame for device access then the * handle returned must be used to unmap the frame. This is needed to * drop the ref count on the frame. @@ -89,7 +88,6 @@ #define pending_handle(_idx, _i) \ (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) #define BLKBACK_INVALID_HANDLE (0xFFFF) -#endif #ifdef CONFIG_XEN_BLKDEV_TAP_BE /* @@ -110,7 +108,6 @@ static void fast_flush_area(int idx, int nr_pages) { -#ifdef CONFIG_XEN_BLKDEV_GRANT struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; u16 handle; @@ -129,21 +126,6 @@ if ( unlikely(HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount))) BUG(); -#else - - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int i; - - for ( i = 0; i < nr_pages; i++ ) - { - MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i), - __pte(0), 0); - } - - mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) - BUG(); -#endif } @@ -367,12 +349,7 @@ unsigned long fas = 0; int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; pending_req_t *pending_req; -#ifdef CONFIG_XEN_BLKDEV_GRANT struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -#else - unsigned long remap_prot; - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -#endif struct phys_req preq; struct { unsigned long buf; unsigned int nsec; @@ -399,7 +376,6 @@ preq.sector_number = req->sector_number; preq.nr_sects = 0; -#ifdef CONFIG_XEN_BLKDEV_GRANT for ( i = 0; i < nseg; i++ ) { fas = req->frame_and_sects[i]; @@ -431,25 +407,15 @@ } phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = - FOREIGN_FRAME(map[i].dev_bus_addr); + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); pending_handle(pending_idx, i) = map[i].handle; } -#endif for ( i = 0; i < nseg; i++ ) { fas = req->frame_and_sects[i]; -#ifdef CONFIG_XEN_BLKDEV_GRANT - seg[i].buf = (map[i].dev_bus_addr << PAGE_SHIFT) | - (blkif_first_sect(fas) << 9); -#else - seg[i].buf = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9); - seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1; - if ( seg[i].nsec <= 0 ) - goto bad_descriptor; - preq.nr_sects += seg[i].nsec; -#endif + seg[i].buf = map[i].dev_bus_addr | (blkif_first_sect(fas) << 9); } if ( vbd_translate(&preq, blkif, operation) != 0 ) @@ -459,40 +425,6 @@ preq.sector_number + preq.nr_sects, preq.dev); goto bad_descriptor; } - -#ifndef CONFIG_XEN_BLKDEV_GRANT - if ( operation == READ ) - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; - else - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED; - - - for ( i = 0; i < nseg; i++ ) - { - MULTI_update_va_mapping_otherdomain( - mcl+i, MMAP_VADDR(pending_idx, i), - pfn_pte_ma(seg[i].buf >> PAGE_SHIFT, __pgprot(remap_prot)), - 0, blkif->domid); -#ifdef CONFIG_XEN_BLKDEV_TAP_BE - if ( blkif->is_blktap ) - mcl[i].args[MULTI_UVMDOMID_INDEX] = ID_TO_DOM(req->id); -#endif - phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = - FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT); - } - - BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0); - - for ( i = 0; i < nseg; i++ ) - { - if ( unlikely(mcl[i].result != 0) ) - { - DPRINTK("invalid buffer -- could not remap it\n"); - fast_flush_area(pending_idx, nseg); - goto bad_descriptor; - } - } -#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */ pending_req = &pending_reqs[pending_idx]; pending_req->blkif = blkif; @@ -637,6 +569,7 @@ static int __init blkif_init(void) { int i; + struct page *page; if ( !(xen_start_info.flags & SIF_INITDOMAIN) && !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) @@ -644,8 +577,9 @@ blkif_interface_init(); - if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) - BUG(); + page = balloon_alloc_empty_page_range(MMAP_PAGES); + BUG_ON(page == NULL); + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); pending_cons = 0; pending_prod = MAX_PENDING_REQS; @@ -667,10 +601,7 @@ blkif_xenbus_init(); -#ifdef CONFIG_XEN_BLKDEV_GRANT memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES ); - printk(KERN_ALERT "Blkif backend is using grant tables.\n"); -#endif #ifdef CONFIG_XEN_BLKDEV_TAP_BE printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n"); diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Mon Aug 22 18:00:37 2005 @@ -17,9 +17,7 @@ #include <asm-xen/hypervisor.h> #include <asm-xen/xen-public/io/blkif.h> #include <asm-xen/xen-public/io/ring.h> -#ifdef CONFIG_XEN_BLKDEV_GRANT #include <asm-xen/gnttab.h> -#endif #if 0 #define ASSERT(_p) \ @@ -53,7 +51,7 @@ rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs.*/ spinlock_t vbd_lock; /* Protects VBD mapping. */ /* Private fields. */ - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + enum { DISCONNECTED, CONNECTED } status; /* * DISCONNECT response is deferred until pending requests are ack'ed. * We therefore need to store the id from the original request. @@ -69,11 +67,9 @@ atomic_t refcnt; struct work_struct work; -#ifdef CONFIG_XEN_BLKDEV_GRANT u16 shmem_handle; unsigned long shmem_vaddr; grant_ref_t shmem_ref; -#endif } blkif_t; void blkif_create(blkif_be_create_t *create); diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Mon Aug 22 18:00:37 2005 @@ -7,7 +7,6 @@ */ #include "common.h" -#include <asm-xen/ctrl_if.h> #include <asm-xen/evtchn.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) @@ -48,19 +47,6 @@ return blkif; } -#ifndef CONFIG_XEN_BLKDEV_GRANT -static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, - unsigned long shared_page) -{ - return direct_remap_area_pages(&init_mm, localaddr, - shared_page<<PAGE_SHIFT, PAGE_SIZE, - __pgprot(_KERNPG_TABLE), blkif->domid); -} - -static void unmap_frontend_page(blkif_t *blkif) -{ -} -#else static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, unsigned long shared_page) { @@ -92,7 +78,6 @@ op.dev_bus_addr = 0; BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); } -#endif /* CONFIG_XEN_BLKDEV_GRANT */ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) { @@ -138,52 +123,6 @@ return 0; } -static void __blkif_disconnect_complete(void *arg) -{ - blkif_t *blkif = (blkif_t *)arg; - ctrl_msg_t cmsg; - blkif_be_disconnect_t disc; - - /* - * These can't be done in blkif_disconnect() because at that point there - * may be outstanding requests at the disc whose asynchronous responses - * must still be notified to the remote driver. - */ - unmap_frontend_page(blkif); - vfree(blkif->blk_ring.sring); - - /* Construct the deferred response message. */ - cmsg.type = CMSG_BLKIF_BE; - cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; - cmsg.id = blkif->disconnect_rspid; - cmsg.length = sizeof(blkif_be_disconnect_t); - disc.domid = blkif->domid; - disc.blkif_handle = blkif->handle; - disc.status = BLKIF_BE_STATUS_OKAY; - memcpy(cmsg.msg, &disc, sizeof(disc)); - - /* - * Make sure message is constructed /before/ status change, because - * after the status change the 'blkif' structure could be deallocated at - * any time. Also make sure we send the response /after/ status change, - * as otherwise a subsequent CONNECT request could spuriously fail if - * another CPU doesn't see the status change yet. - */ - mb(); - BUG_ON(blkif->status != DISCONNECTING); - blkif->status = DISCONNECTED; - mb(); - - /* Send the successful response. */ - ctrl_if_send_response(&cmsg); -} - -void blkif_disconnect_complete(blkif_t *blkif) -{ - INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif); - schedule_work(&blkif->work); -} - void free_blkif(blkif_t *blkif) { blkif_t **pblkif; diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Aug 22 18:00:37 2005 @@ -75,16 +75,6 @@ if (vbd_is_active(be->vbd)) return; -#ifndef CONFIG_XEN_BLKDEV_GRANT - err = xenbus_gather(be->frontpath, "shared-frame", "%lu", &sharedmfn, - "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_error(be->dev, err, - "reading %s/shared-frame and event-channel", - be->frontpath); - return; - } -#else err = xenbus_gather(be->frontpath, "grant-id", "%lu", &sharedmfn, "event-channel", "%u", &evtchn, NULL); if (err) { @@ -93,7 +83,6 @@ be->frontpath); return; } -#endif /* Domains must use same shared frame for all vbds. */ if (be->blkif->status == CONNECTED && @@ -182,14 +171,18 @@ "frontend-id", "%li", &be->frontend_id, "frontend", NULL, &frontend, NULL); - if (err == -ENOENT || err == -ERANGE || + if (XENBUS_EXIST_ERR(err) || strlen(frontend) == 0 || !xenbus_exists(frontend, "")) { - if (frontend) - kfree(frontend); /* If we can't get a frontend path and a frontend-id, * then our bus-id is no longer valid and we need to * destroy the backend device. */ + goto device_fail; + } + if (err < 0) { + xenbus_dev_error(dev, err, + "reading %s/frontend or frontend-id", + dev->nodename); goto device_fail; } @@ -199,6 +192,7 @@ if (be->frontpath) kfree(be->frontpath); be->frontpath = frontend; + frontend = NULL; be->watch.node = be->frontpath; be->watch.callback = frontend_changed; err = register_xenbus_watch(&be->watch); @@ -206,14 +200,13 @@ be->watch.node = NULL; goto device_fail; } - } else - kfree(frontend); + } err = xenbus_scanf(dev->nodename, "physical-device", "%li", &pdev); - if (err == -ENOENT || err == -ERANGE) + if (XENBUS_EXIST_ERR(err)) goto out; if (err < 0) { - xenbus_dev_error(dev, err, "Reading physical-device"); + xenbus_dev_error(dev, err, "reading physical-device"); goto device_fail; } if (be->pdev && be->pdev != pdev) { @@ -253,12 +246,14 @@ frontend_changed(&be->watch, be->frontpath); } + out: + if (frontend) + kfree(frontend); return; device_fail: device_unregister(&be->dev->dev); - out: - return; + goto out; } static int blkback_probe(struct xenbus_device *dev, diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Aug 22 18:00:37 2005 @@ -55,10 +55,8 @@ #include <scsi/scsi.h> #include <asm-xen/evtchn.h> #include <asm-xen/xenbus.h> -#ifdef CONFIG_XEN_BLKDEV_GRANT #include <asm-xen/xen-public/grant_table.h> #include <asm-xen/gnttab.h> -#endif typedef unsigned char byte; /* from linux/ide.h */ @@ -78,14 +76,12 @@ #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) -#ifdef CONFIG_XEN_BLKDEV_GRANT static domid_t rdomid = 0; static grant_ref_t gref_head, gref_terminal; #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) #define GRANTREF_INVALID (1<<15) static int shmem_ref; -#endif static struct blk_shadow { blkif_request_t req; @@ -131,30 +127,14 @@ static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) { -#ifndef CONFIG_XEN_BLKDEV_GRANT - int i; -#endif s->req = *r; - -#ifndef CONFIG_XEN_BLKDEV_GRANT - for ( i = 0; i < r->nr_segments; i++ ) - s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]); -#endif } static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) { -#ifndef CONFIG_XEN_BLKDEV_GRANT - int i; -#endif *r = s->req; - -#ifndef CONFIG_XEN_BLKDEV_GRANT - for ( i = 0; i < s->req.nr_segments; i++ ) - r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]); -#endif } @@ -256,9 +236,7 @@ int idx; unsigned long id; unsigned int fsect, lsect; -#ifdef CONFIG_XEN_BLKDEV_GRANT int ref; -#endif if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) return 1; @@ -284,7 +262,6 @@ buffer_ma = page_to_phys(bvec->bv_page); fsect = bvec->bv_offset >> 9; lsect = fsect + (bvec->bv_len >> 9) - 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); ASSERT( ref != -ENOSPC ); @@ -300,11 +277,6 @@ ring_req->frame_and_sects[ring_req->nr_segments++] = blkif_fas_from_gref(ref, fsect, lsect); - -#else - ring_req->frame_and_sects[ring_req->nr_segments++] = - blkif_fas(buffer_ma, fsect, lsect); -#endif } } @@ -711,9 +683,7 @@ blkif_request_t *req; struct buffer_head *bh; unsigned int fsect, lsect; -#ifdef CONFIG_XEN_BLKDEV_GRANT int ref; -#endif fsect = (buffer_ma & ~PAGE_MASK) >> 9; lsect = fsect + nr_sectors - 1; @@ -762,7 +732,6 @@ bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; blk_shadow[req->id].request = (unsigned long)id; -#ifdef CONFIG_XEN_BLKDEV_GRANT /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); ASSERT( ref != -ENOSPC ); @@ -778,10 +747,6 @@ req->frame_and_sects[req->nr_segments] = blkif_fas_from_gref(ref, fsect, lsect); -#else - req->frame_and_sects[req->nr_segments] = - blkif_fas(buffer_ma, fsect, lsect); -#endif if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) sg_next_sect += nr_sectors; else @@ -819,7 +784,6 @@ req->sector_number = (blkif_sector_t)sector_number; req->handle = handle; req->nr_segments = 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); ASSERT( ref != -ENOSPC ); @@ -833,9 +797,6 @@ blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); -#else - req->frame_and_sects[0] = blkif_fas(buffer_ma, fsect, lsect); -#endif /* Keep a private copy so we can reissue requests when recovering. */ pickle_request(&blk_shadow[xid], req); @@ -1015,9 +976,7 @@ int i; blkif_request_t *req; struct blk_shadow *copy; -#ifdef CONFIG_XEN_BLKDEV_GRANT int j; -#endif /* Stage 1: Make a safe copy of the shadow state. */ copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); @@ -1047,7 +1006,6 @@ req->id = GET_ID_FROM_FREELIST(); memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); -#ifdef CONFIG_XEN_BLKDEV_GRANT /* Rewrite any grant references invalidated by suspend/resume. */ for ( j = 0; j < req->nr_segments; j++ ) { @@ -1061,7 +1019,6 @@ req->frame_and_sects[j] &= ~GRANTREF_INVALID; } blk_shadow[req->id].req = *req; -#endif blk_ring.req_prod_pvt++; } @@ -1085,9 +1042,7 @@ int err = 0; blkif_evtchn = evtchn; -#ifdef CONFIG_XEN_BLKDEV_GRANT rdomid = domid; -#endif err = bind_evtchn_to_irqhandler( blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL); @@ -1134,25 +1089,24 @@ "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); - - if (err) + if (err) { xenbus_dev_error(info->dev, err, "reading backend fields"); - else { - xlvbd_add(sectors, info->vdevice, info->handle, binfo, - sector_size); - info->connected = 1; - - /* First to connect? blkif is now connected. */ - if (blkif_vbds_connected++ == 0) - blkif_state = BLKIF_STATE_CONNECTED; - - xenbus_dev_ok(info->dev); - - /* Kick pending requests. */ - spin_lock_irq(&blkif_io_lock); - kick_pending_request_queues(); - spin_unlock_irq(&blkif_io_lock); - } + return; + } + + xlvbd_add(sectors, info->vdevice, info->handle, binfo, sector_size); + info->connected = 1; + + /* First to connect? blkif is now connected. */ + if (blkif_vbds_connected++ == 0) + blkif_state = BLKIF_STATE_CONNECTED; + + xenbus_dev_ok(info->dev); + + /* Kick pending requests. */ + spin_lock_irq(&blkif_io_lock); + kick_pending_request_queues(); + spin_unlock_irq(&blkif_io_lock); } static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id) @@ -1169,7 +1123,6 @@ SHARED_RING_INIT(sring); FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE); -#ifdef CONFIG_XEN_BLKDEV_GRANT shmem_ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); ASSERT(shmem_ref != -ENOSPC); @@ -1177,7 +1130,6 @@ backend_id, virt_to_mfn(blk_ring.sring), 0); -#endif op.u.alloc_unbound.dom = backend_id; err = HYPERVISOR_event_channel_op(&op); @@ -1199,36 +1151,28 @@ const char *message; int err, backend_id; - backend = xenbus_read(dev->nodename, "backend", NULL); - if (IS_ERR(backend)) { - err = PTR_ERR(backend); - if (err == -ENOENT) - goto out; - xenbus_dev_error(dev, err, "reading %s/backend", + backend = NULL; + err = xenbus_gather(dev->nodename, + "backend-id", "%i", &backend_id, + "backend", NULL, &backend, + NULL); + if (XENBUS_EXIST_ERR(err)) + goto out; + if (backend && strlen(backend) == 0) { + err = -ENOENT; + goto out; + } + if (err < 0) { + xenbus_dev_error(dev, err, "reading %s/backend or backend-id", dev->nodename); goto out; } - if (strlen(backend) == 0) { - err = -ENOENT; - goto free_backend; - } - - /* FIXME: This driver can't handle backends on different - * domains. Check and fail gracefully. */ - err = xenbus_scanf(dev->nodename, "backend-id", "%i", &backend_id); - if (err == -ENOENT) - goto free_backend; - if (err < 0) { - xenbus_dev_error(dev, err, "reading %s/backend-id", - dev->nodename); - goto free_backend; - } /* First device? We create shared ring, alloc event channel. */ if (blkif_vbds == 0) { err = setup_blkring(dev, backend_id); if (err) - goto free_backend; + goto out; } err = xenbus_transaction_start(dev->nodename); @@ -1237,20 +1181,11 @@ goto destroy_blkring; } -#ifdef CONFIG_XEN_BLKDEV_GRANT err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref); if (err) { message = "writing grant-id"; goto abort_transaction; } -#else - err = xenbus_printf(dev->nodename, "shared-frame", "%lu", - virt_to_mfn(blk_ring.sring)); - if (err) { - message = "writing shared-frame"; - goto abort_transaction; - } -#endif err = xenbus_printf(dev->nodename, "event-channel", "%u", blkif_evtchn); if (err) { @@ -1258,9 +1193,11 @@ goto abort_transaction; } - info->watch.node = info->backend = backend; + info->backend = backend; + backend = NULL; + + info->watch.node = info->backend; info->watch.callback = watch_for_status; - err = register_xenbus_watch(&info->watch); if (err) { message = "registering watch on backend"; @@ -1272,20 +1209,20 @@ xenbus_dev_error(dev, err, "completing transaction"); goto destroy_blkring; } - return 0; - -abort_transaction: + + out: + if (backend) + kfree(backend); + return err; + + abort_transaction: xenbus_transaction_end(1); /* Have to do this *outside* transaction. */ xenbus_dev_error(dev, err, "%s", message); -destroy_blkring: + destroy_blkring: if (blkif_vbds == 0) blkif_free(); -free_backend: - kfree(backend); -out: - printk("%s:%u = %i\n", __FILE__, __LINE__, err); - return err; + goto out; } /* Setup supplies the backend dir, virtual device. @@ -1301,7 +1238,7 @@ /* FIXME: Use dynamic device id if this is not set. */ err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice); - if (err == -ENOENT) + if (XENBUS_EXIST_ERR(err)) return err; if (err < 0) { xenbus_dev_error(dev, err, "reading virtual-device"); @@ -1316,6 +1253,7 @@ info->dev = dev; info->vdevice = vdevice; info->connected = 0; + /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); dev->data = info; @@ -1425,13 +1363,10 @@ { int i; -#ifdef CONFIG_XEN_BLKDEV_GRANT /* A grant for every ring slot, plus one for the ring itself. */ - if ( 0 > gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1, - &gref_head, &gref_terminal) ) + if (gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1, + &gref_head, &gref_terminal) < 0) return 1; - printk(KERN_ALERT "Blkif frontend is using grant tables.\n"); -#endif if ( (xen_start_info.flags & SIF_INITDOMAIN) || (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) @@ -1455,20 +1390,7 @@ static void blkif_completion(struct blk_shadow *s) { int i; -#ifdef CONFIG_XEN_BLKDEV_GRANT for ( i = 0; i < s->req.nr_segments; i++ ) gnttab_release_grant_reference( &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i])); -#else - /* This is a hack to get the dirty logging bits set */ - if ( s->req.operation == BLKIF_OP_READ ) - { - for ( i = 0; i < s->req.nr_segments; i++ ) - { - unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT; - unsigned long mfn = phys_to_machine_mapping[pfn]; - xen_machphys_update(mfn, pfn); - } - } -#endif -} +} diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Mon Aug 22 18:00:37 2005 @@ -5,7 +5,6 @@ * Control interface between the driver and a character device. * * Copyright (c) 2004, Andrew Warfield - * */ #include <linux/config.h> @@ -535,8 +534,8 @@ /* Set the necessary mappings in p2m and in the VM_FOREIGN * vm_area_struct to allow user vaddr -> struct page lookups * to work. This is needed for direct IO to foreign pages. */ - phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = - FOREIGN_FRAME(map[i].dev_bus_addr); + phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] = + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; ((struct page **)blktap_vma->vm_private_data)[offset] = @@ -776,9 +775,11 @@ int blktap_init(void) { int err, i, j; - - if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) - BUG(); + struct page *page; + + page = balloon_alloc_empty_page_range(MMAP_PAGES); + BUG_ON(page == NULL); + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); #ifdef CONFIG_XEN_BLKDEV_GRANT for (i=0; i<MAX_PENDING_REQS ; i++) diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Aug 22 18:00:37 2005 @@ -768,7 +768,7 @@ continue; } phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = - FOREIGN_FRAME(mop->dev_bus_addr); + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT); grant_tx_ref[pending_idx] = mop->handle; #else if ( unlikely(mcl[0].result != 0) ) @@ -968,8 +968,9 @@ netif_interface_init(); - mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS); - BUG_ON(mmap_vstart == 0); + page = balloon_alloc_empty_page_range(MAX_PENDING_REQS); + BUG_ON(page == NULL); + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); for ( i = 0; i < MAX_PENDING_REQS; i++ ) { diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c --- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Mon Aug 22 18:00:37 2005 @@ -1027,13 +1027,15 @@ static int __init usbif_init(void) { int i; + struct page *page; if ( !(xen_start_info.flags & SIF_INITDOMAIN) && !(xen_start_info.flags & SIF_USB_BE_DOMAIN) ) return 0; - - if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) - BUG(); + + page = balloon_alloc_empty_page_range(MMAP_PAGES); + BUG_ON(page == NULL); + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); pending_cons = 0; pending_prod = MAX_PENDING_REQS; diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Mon Aug 22 18:00:37 2005 @@ -507,14 +507,14 @@ int vcpu, vcpu_guest_context_t *ctxt) { int ret; - unsigned long ign1, ign2; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), - "2" (ctxt) - : __syscall_clobber ); + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_sched_op), + "D" ((unsigned long)SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), + "S" ((unsigned long)ctxt) + : __syscall_clobber ); return ret; } diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/include/asm-xen/balloon.h --- a/linux-2.6-xen-sparse/include/asm-xen/balloon.h Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/balloon.h Mon Aug 22 18:00:37 2005 @@ -35,10 +35,19 @@ * Inform the balloon driver that it should allow some slop for device-driver * memory activities. */ -extern void balloon_update_driver_allowance(long delta); +extern void +balloon_update_driver_allowance( + long delta); -/* Give up unmapped pages to the balloon driver. */ -extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns); +/* Allocate an empty low-memory page range. */ +extern struct page * +balloon_alloc_empty_page_range( + unsigned long nr_pages); + +/* Deallocate an empty page range, adding to the balloon. */ +extern void +balloon_dealloc_empty_page_range( + struct page *page, unsigned long nr_pages); /* * Prevent the balloon driver from changing the memory reservation during diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/include/asm-xen/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Mon Aug 22 18:00:37 2005 @@ -137,9 +137,6 @@ void xen_create_contiguous_region(unsigned long vstart, unsigned int order); void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); -/* Allocate a contiguous empty region of low memory. Return virtual start. */ -unsigned long allocate_empty_lowmem_region(unsigned long pages); - #include <asm/hypercall.h> #if defined(CONFIG_X86_64) diff -r 483ac5017c9c -r 3a8f27c6d56c linux-2.6-xen-sparse/include/asm-xen/xenbus.h --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Sat Aug 20 00:47:24 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Mon Aug 22 18:00:37 2005 @@ -128,4 +128,14 @@ void xenbus_suspend(void); void xenbus_resume(void); +#define XENBUS_IS_ERR_READ(str) ({ \ + if (!IS_ERR(str) && strlen(str) == 0) { \ + kfree(str); \ + str = ERR_PTR(-ERANGE); \ + } \ + IS_ERR(str); \ +}) + +#define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE) + #endif /* _ASM_XEN_XENBUS_H */ diff -r 483ac5017c9c -r 3a8f27c6d56c tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Sat Aug 20 00:47:24 2005 +++ b/tools/firmware/vmxassist/vm86.c Mon Aug 22 18:00:37 2005 @@ -38,7 +38,7 @@ enum vm86_mode mode; #ifdef DEBUG -int traceset = 0; +int traceset = 0xff; char *states[] = { "<VM86_REAL>", @@ -446,6 +446,8 @@ #endif if (getreg(regs, modrm) & CR0_PE) set_mode(regs, VM86_REAL_TO_PROTECTED); + else + set_mode(regs, VM86_REAL); break; case 3: @@ -603,7 +605,9 @@ { switch (newmode) { case VM86_REAL: - if (mode == VM86_PROTECTED_TO_REAL) { + if ((mode == VM86_PROTECTED_TO_REAL) || + (mode == VM86_REAL_TO_PROTECTED)) { + regs->eflags &= ~EFLAGS_TF; real_mode(regs); break; } else if (mode == VM86_REAL) { diff -r 483ac5017c9c -r 3a8f27c6d56c tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Sat Aug 20 00:47:24 2005 +++ b/tools/python/xen/xm/create.py Mon Aug 22 18:00:37 2005 @@ -688,7 +688,9 @@ dom0_cur_alloc = get_dom0_alloc() dom0_new_alloc = dom0_cur_alloc - (domU_need_mem - free_mem) - if free_mem < domU_need_mem and dom0_new_alloc >= dom0_min_mem: + if free_mem < domU_need_mem and dom0_new_alloc < dom0_min_mem: + ret = 1 + if free_mem < domU_need_mem and ret == 0: server.xend_domain_mem_target_set(0, dom0_new_alloc) @@ -734,7 +736,8 @@ dom0_min_mem = xroot.get_dom0_min_mem() if dom0_min_mem != 0: if balloon_out(dom0_min_mem, opts): - return + print >>sys.stderr, "error: cannot allocate enough memory for domain" + sys.exit(1) dom = make_domain(opts, config) if opts.vals.console_autoconnect: diff -r 483ac5017c9c -r 3a8f27c6d56c tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Sat Aug 20 00:47:24 2005 +++ b/tools/python/xen/xm/main.py Mon Aug 22 18:00:37 2005 @@ -434,7 +434,7 @@ arg_check(args, 6, "sedf") dom = args[0] - v = map(int, args[1:5]) + v = map(int, args[1:6]) from xen.xend.XendClient import server server.xend_domain_cpu_sedf_set(dom, *v) diff -r 483ac5017c9c -r 3a8f27c6d56c xen/arch/ia64/grant_table.c --- a/xen/arch/ia64/grant_table.c Sat Aug 20 00:47:24 2005 +++ b/xen/arch/ia64/grant_table.c Mon Aug 22 18:00:37 2005 @@ -546,15 +546,6 @@ { frame = act->frame; } - else if ( frame == GNTUNMAP_DEV_FROM_VIRT ) - { - if ( !( flags & GNTMAP_device_map ) ) - PIN_FAIL(unmap_out, GNTST_bad_dev_addr, - "Bad frame number: frame not mapped for dev access.\n"); - frame = act->frame; - - /* Frame will be unmapped for device access below if virt addr okay. */ - } else { if ( unlikely(frame != act->frame) ) @@ -615,15 +606,6 @@ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; - - if ( frame == GNTUNMAP_DEV_FROM_VIRT ) - { - act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc - : GNTPIN_devw_inc; - - map->ref_and_flags &= ~GNTMAP_device_map; - (void)__put_user(0, &uop->dev_bus_addr); - } rc = 0; *va = virt; diff -r 483ac5017c9c -r 3a8f27c6d56c xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Sat Aug 20 00:47:24 2005 +++ b/xen/arch/x86/domain_build.c Mon Aug 22 18:00:37 2005 @@ -69,11 +69,21 @@ #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) #define round_pgdown(_p) ((_p)&PAGE_MASK) -static struct pfn_info *alloc_largest(struct domain *d, unsigned long max) +static struct pfn_info *alloc_chunk(struct domain *d, unsigned long max_pages) { struct pfn_info *page; - unsigned int order = get_order(max * PAGE_SIZE); - if ( (max & (max-1)) != 0 ) + unsigned int order; + /* + * Allocate up to 2MB at a time: + * 1. This prevents overflow of get_order() when allocating more than + * 4GB to domain 0 on a PAE machine. + * 2. It prevents allocating very large chunks from DMA pools before + * the >4GB pool is fully depleted. + */ + if ( max_pages > (2UL << (20 - PAGE_SHIFT)) ) + max_pages = 2UL << (20 - PAGE_SHIFT); + order = get_order(max_pages << PAGE_SHIFT); + if ( (max_pages & (max_pages-1)) != 0 ) order--; while ( (page = alloc_domheap_pages(d, order, 0)) == NULL ) if ( order-- == 0 ) @@ -608,7 +618,7 @@ } while ( pfn < nr_pages ) { - if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL ) + if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL ) panic("Not enough RAM for DOM0 reservation.\n"); while ( pfn < d->tot_pages ) { diff -r 483ac5017c9c -r 3a8f27c6d56c xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Sat Aug 20 00:47:24 2005 +++ b/xen/arch/x86/mm.c Mon Aug 22 18:00:37 2005 @@ -2176,7 +2176,7 @@ } } - *(unsigned long *)va = req.val; + *(intpte_t *)va = req.val; okay = 1; if ( shadow_mode_enabled(d) ) @@ -2386,7 +2386,7 @@ } /* Delete pagetable entry. */ - if ( unlikely(__put_user(0, (unsigned long *)va))) + if ( unlikely(__put_user(0, (intpte_t *)va))) { DPRINTK("Cannot delete PTE entry at %p.\n", va); put_page_type(page); @@ -2446,12 +2446,11 @@ int clear_grant_va_mapping(unsigned long addr, unsigned long frame) { - l1_pgentry_t *pl1e; - unsigned long _ol1e; + l1_pgentry_t *pl1e, ol1e; pl1e = &linear_pg_table[l1_linear_offset(addr)]; - if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) + if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) ) { DPRINTK("Could not find PTE entry for address %lx\n", addr); return GNTST_general_error; @@ -2461,15 +2460,15 @@ * Check that the virtual address supplied is actually mapped to * frame. */ - if ( unlikely((_ol1e >> PAGE_SHIFT) != frame )) + if ( unlikely(l1e_get_pfn(ol1e) != frame) ) { DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", - _ol1e, addr, frame); + l1e_get_pfn(ol1e), addr, frame); return GNTST_general_error; } /* Delete pagetable entry. */ - if ( unlikely(__put_user(0, (unsigned long *)pl1e))) + if ( unlikely(__put_user(0, &pl1e->l1)) ) { DPRINTK("Cannot delete PTE entry at %p.\n", (unsigned long *)pl1e); return GNTST_general_error; diff -r 483ac5017c9c -r 3a8f27c6d56c xen/common/grant_table.c --- a/xen/common/grant_table.c Sat Aug 20 00:47:24 2005 +++ b/xen/common/grant_table.c Mon Aug 22 18:00:37 2005 @@ -70,13 +70,13 @@ static int __gnttab_activate_grant_ref( - struct domain *mapping_d, /* IN */ + struct domain *mapping_d, /* IN */ struct vcpu *mapping_ed, - struct domain *granting_d, - grant_ref_t ref, - u16 dev_hst_ro_flags, - unsigned long addr, - unsigned long *pframe ) /* OUT */ + struct domain *granting_d, + grant_ref_t ref, + u16 dev_hst_ro_flags, + u64 addr, + unsigned long *pframe ) /* OUT */ { domid_t sdom; u16 sflags; @@ -336,14 +336,15 @@ gnttab_map_grant_ref_t *uop, unsigned long *va) { - domid_t dom; - grant_ref_t ref; - struct domain *ld, *rd; - struct vcpu *led; - u16 dev_hst_ro_flags; - int handle; - unsigned long frame = 0, addr; - int rc; + domid_t dom; + grant_ref_t ref; + struct domain *ld, *rd; + struct vcpu *led; + u16 dev_hst_ro_flags; + int handle; + u64 addr; + unsigned long frame = 0; + int rc; led = current; ld = led->domain; @@ -363,7 +364,7 @@ (!(dev_hst_ro_flags & GNTMAP_contains_pte) && unlikely(!__addr_ok(addr))) ) ) { - DPRINTK("Bad virtual address (%lx) or flags (%x).\n", + DPRINTK("Bad virtual address (%"PRIx64") or flags (%"PRIx16").\n", addr, dev_hst_ro_flags); (void)__put_user(GNTST_bad_virt_addr, &uop->handle); return GNTST_bad_gntref; @@ -450,7 +451,7 @@ = (ref << MAPTRACK_REF_SHIFT) | (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK); - (void)__put_user(frame, &uop->dev_bus_addr); + (void)__put_user((u64)frame << PAGE_SHIFT, &uop->dev_bus_addr); if ( ( dev_hst_ro_flags & GNTMAP_host_map ) && !( dev_hst_ro_flags & GNTMAP_contains_pte) ) @@ -492,28 +493,30 @@ gnttab_unmap_grant_ref_t *uop, unsigned long *va) { - domid_t dom; - grant_ref_t ref; - u16 handle; - struct domain *ld, *rd; - + domid_t dom; + grant_ref_t ref; + u16 handle; + struct domain *ld, *rd; active_grant_entry_t *act; - grant_entry_t *sha; + grant_entry_t *sha; grant_mapping_t *map; - u16 flags; - s16 rc = 1; - unsigned long frame, addr; + u16 flags; + s16 rc = 1; + u64 addr, dev_bus_addr; + unsigned long frame; ld = current->domain; /* Bitwise-OR avoids short-circuiting which screws control flow. */ if ( unlikely(__get_user(addr, &uop->host_addr) | - __get_user(frame, &uop->dev_bus_addr) | + __get_user(dev_bus_addr, &uop->dev_bus_addr) | __get_user(handle, &uop->handle)) ) { DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n"); return -EFAULT; /* don't set status */ } + + frame = (unsigned long)(dev_bus_addr >> PAGE_SHIFT); map = &ld->grant_table->maptrack[handle]; @@ -552,15 +555,6 @@ if ( frame == 0 ) { frame = act->frame; - } - else if ( frame == GNTUNMAP_DEV_FROM_VIRT ) - { - if ( !( flags & GNTMAP_device_map ) ) - PIN_FAIL(unmap_out, GNTST_bad_dev_addr, - "Bad frame number: frame not mapped for dev access.\n"); - frame = act->frame; - - /* Frame will be unmapped for device access below if virt addr okay. */ } else { @@ -596,15 +590,6 @@ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; - - if ( frame == GNTUNMAP_DEV_FROM_VIRT ) - { - act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc - : GNTPIN_devw_inc; - - map->ref_and_flags &= ~GNTMAP_device_map; - (void)__put_user(0, &uop->dev_bus_addr); - } rc = 0; if ( !( flags & GNTMAP_contains_pte) ) diff -r 483ac5017c9c -r 3a8f27c6d56c xen/include/asm-x86/x86_32/page-3level.h --- a/xen/include/asm-x86/x86_32/page-3level.h Sat Aug 20 00:47:24 2005 +++ b/xen/include/asm-x86/x86_32/page-3level.h Mon Aug 22 18:00:37 2005 @@ -63,7 +63,7 @@ /* Extract flags into 32-bit integer, or turn 32-bit flags into a pte mask. */ #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF)) -#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF)) +#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF)) #define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */ #define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */ diff -r 483ac5017c9c -r 3a8f27c6d56c xen/include/asm-x86/x86_32/uaccess.h --- a/xen/include/asm-x86/x86_32/uaccess.h Sat Aug 20 00:47:24 2005 +++ b/xen/include/asm-x86/x86_32/uaccess.h Mon Aug 22 18:00:37 2005 @@ -22,7 +22,11 @@ #define array_access_ok(addr,count,size) \ (likely(count < (~0UL/size)) && access_ok(addr,count*size)) +/* Undefined function to catch size mismatches on 64-bit get_user/put_user. */ +extern void __uaccess_var_not_u64(void); + #define __put_user_u64(x, addr, retval, errret) \ + if (sizeof(x) != 8) __uaccess_var_not_u64(); \ __asm__ __volatile__( \ "1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ @@ -52,6 +56,7 @@ } while (0) #define __get_user_u64(x, addr, retval, errret) \ + if (sizeof(x) != 8) __uaccess_var_not_u64(); \ __asm__ __volatile__( \ "1: movl 0(%2),%%eax\n" \ "2: movl 4(%2),%%edx\n" \ diff -r 483ac5017c9c -r 3a8f27c6d56c xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Sat Aug 20 00:47:24 2005 +++ b/xen/include/public/dom0_ops.h Mon Aug 22 18:00:37 2005 @@ -19,7 +19,7 @@ * This makes sure that old versions of dom0 tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define DOM0_INTERFACE_VERSION 0xAAAA100F +#define DOM0_INTERFACE_VERSION 0xAAAA1010 /************************************************************************/ diff -r 483ac5017c9c -r 3a8f27c6d56c xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h Sat Aug 20 00:47:24 2005 +++ b/xen/include/public/grant_table.h Mon Aug 22 18:00:37 2005 @@ -183,8 +183,6 @@ s16 status; /* GNTST_* */ } gnttab_unmap_grant_ref_t; -#define GNTUNMAP_DEV_FROM_VIRT (~0U) - /* * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least * <nr_frames> pages. The frame addresses are written to the <frame_list>. diff -r 483ac5017c9c -r 3a8f27c6d56c xen/include/public/io/blkif.h --- a/xen/include/public/io/blkif.h Sat Aug 20 00:47:24 2005 +++ b/xen/include/public/io/blkif.h Mon Aug 22 18:00:37 2005 @@ -36,11 +36,7 @@ unsigned long id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ /* @f_a_s[4:0]=last_sect ; @f_a_s[9:5]=first_sect */ -#ifdef CONFIG_XEN_BLKDEV_GRANT /* @f_a_s[:16]= grant reference (16 bits) */ -#else - /* @f_a_s[:12]=@frame: machine page frame number. */ -#endif /* @first_sect: first sector in frame to transfer (inclusive). */ /* @last_sect: last sector in frame to transfer (inclusive). */ unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -50,10 +46,8 @@ #define blkif_first_sect(_fas) (((_fas)>>5)&31) #define blkif_last_sect(_fas) ((_fas)&31) -#ifdef CONFIG_XEN_BLKDEV_GRANT #define blkif_fas_from_gref(_gref, _fs, _ls) (((_gref)<<16)|((_fs)<<5)|(_ls)) #define blkif_gref_from_fas(_fas) ((_fas)>>16) -#endif typedef struct blkif_response { unsigned long id; /* copied from request */ diff -r 483ac5017c9c -r 3a8f27c6d56c xen/include/public/physdev.h --- a/xen/include/public/physdev.h Sat Aug 20 00:47:24 2005 +++ b/xen/include/public/physdev.h Mon Aug 22 18:00:37 2005 @@ -27,8 +27,8 @@ typedef struct physdevop_set_iobitmap { /* IN */ - char *bitmap; - u32 nr_ports; + u8 *bitmap; + u32 nr_ports; } physdevop_set_iobitmap_t; typedef struct physdevop_apic { _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |