[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge in minor ia64 Makefile change from xen-unstable
# HG changeset patch # User djm@xxxxxxxxxxxxxxx # Node ID b53a65034532e790184c7d68a350879a5c6b2eec # Parent a4196568095c0551fa41dba7be6a57b008346b4d # Parent 55a5ad2f028d55758c6193b4fec970ee46a60ec1 Merge in minor ia64 Makefile change from xen-unstable diff -r a4196568095c -r b53a65034532 .hgignore --- a/.hgignore Fri Jul 29 18:52:33 2005 +++ b/.hgignore Fri Jul 29 20:25:03 2005 @@ -87,6 +87,11 @@ ^tools/cmdline/.*$ ^tools/cmdline/xen/.*$ ^tools/debugger/pdb/pdb$ +^tools/debugger/pdb/linux-[0-9.]*-module/.*\.ko$ +^tools/debugger/pdb/linux-[0-9.]*-module/.*\.mod.c$ +^tools/debugger/pdb/linux-[0-9.]*-module/\..*\.cmd$ +^tools/debugger/pdb/linux-[0-9.]*-module/.tmp_versions/.*$ +^tools/debugger/pdb/._bcdi/.*$ ^tools/firmware/acpi/acpigen$ ^tools/firmware/.*\.bin$ ^tools/firmware/.*\.sym$ @@ -157,10 +162,6 @@ ^xen/arch/x86/asm-offsets\.s$ ^xen/arch/x86/boot/mkelf32$ ^xen/ddb/.*$ -^xen/drivers/pci/classlist\.h$ -^xen/drivers/pci/devlist\.h$ -^xen/drivers/pci/gen-devlist$ -^xen/figlet/figlet$ ^xen/include/asm$ ^xen/include/asm-.*/asm-offsets\.h$ ^xen/include/hypervisor-ifs/arch$ @@ -170,8 +171,8 @@ ^xen/include/xen/banner\.h$ ^xen/include/xen/compile\.h$ ^xen/tags$ -^xen/tools/elf-reloc$ ^xen/tools/figlet/figlet$ +^xen/tools/symbols$ ^xen/xen$ ^xen/xen-syms$ ^xen/xen\..*$ diff -r a4196568095c -r b53a65034532 Makefile --- a/Makefile Fri Jul 29 18:52:33 2005 +++ b/Makefile Fri Jul 29 20:25:03 2005 @@ -163,7 +163,7 @@ uninstall: DESTDIR= uninstall: D=$(DESTDIR) uninstall: - [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-$(date +%s) + [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s` rm -rf $(D)/etc/init.d/xend* rm -rf $(D)/usr/$(LIBDIR)/libxc* $(D)/usr/$(LIBDIR)/libxutil* rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/include/xen diff -r a4196568095c -r b53a65034532 buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Fri Jul 29 18:52:33 2005 +++ b/buildconfigs/Rules.mk Fri Jul 29 20:25:03 2005 @@ -101,7 +101,7 @@ rm -rf tmp-$@ %-mrproper: %-mrproper-extra - rm -rf pristine-$* ref-$* $*.tar.bz2 + rm -rf pristine-$(*)* ref-$(*)* $*.tar.bz2 rm -rf $*-xen.patch netbsd-%-mrproper-extra: @@ -111,5 +111,12 @@ %-mrproper-extra: @: # do nothing +config-update-pae: +ifeq ($(XEN_TARGET_X86_PAE),y) + sed -e 's!^CONFIG_HIGHMEM4G=y$$!\# CONFIG_HIGHMEM4G is not set!;s!^\# CONFIG_HIGHMEM64G is not set$$!CONFIG_HIGHMEM64G=y!' $(CONFIG_FILE) > $(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE) +else + @: # do nothing yet +endif + # never delete any intermediate files. .SECONDARY: diff -r a4196568095c -r b53a65034532 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Fri Jul 29 18:52:33 2005 +++ b/buildconfigs/mk.linux-2.6-xen Fri Jul 29 20:25:03 2005 @@ -32,6 +32,8 @@ cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \ $(LINUX_DIR)/.config + # See if we need to munge config to enable PAE + $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk config-update-pae # Patch kernel Makefile to set EXTRAVERSION ( cd $(LINUX_DIR) ; \ sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ diff -r a4196568095c -r b53a65034532 buildconfigs/mk.linux-2.6-xen0 --- a/buildconfigs/mk.linux-2.6-xen0 Fri Jul 29 18:52:33 2005 +++ b/buildconfigs/mk.linux-2.6-xen0 Fri Jul 29 20:25:03 2005 @@ -32,6 +32,8 @@ cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \ $(LINUX_DIR)/.config + # See if we need to munge config to enable PAE + $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk config-update-pae # Patch kernel Makefile to set EXTRAVERSION ( cd $(LINUX_DIR) ; \ sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ diff -r a4196568095c -r b53a65034532 buildconfigs/mk.linux-2.6-xenU --- a/buildconfigs/mk.linux-2.6-xenU Fri Jul 29 18:52:33 2005 +++ b/buildconfigs/mk.linux-2.6-xenU Fri Jul 29 20:25:03 2005 @@ -32,6 +32,8 @@ cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \ $(LINUX_DIR)/.config + # See if we need to munge config to enable PAE + $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk config-update-pae # Patch kernel Makefile to set EXTRAVERSION ( cd $(LINUX_DIR) ; \ sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ diff -r a4196568095c -r b53a65034532 docs/misc/shype4xen_readme.txt --- a/docs/misc/shype4xen_readme.txt Fri Jul 29 18:52:33 2005 +++ b/docs/misc/shype4xen_readme.txt Fri Jul 29 20:25:03 2005 @@ -567,4 +567,22 @@ Our policy interface enables managers to create a single binary policy file in a trusted environment and distributed it to multiple systems for enforcement. +5. Booting with a binary policy: +******************************** +The grub configuration file can be adapted to boot the hypervisor with an +already active policy. To do this, a binary policy file - this can be +the same file as used by the policy_tool - should be placed into the boot +partition. The following entry from the grub configuration file shows how +a binary policy can be added to the system during boot time. Note that the +binary policy must be of the same type that the hypervisor was compiled +for. The policy module line should also only be added as the last module +line if XEN was compiled with the access control module (ACM). + +title XEN0 3.0 Devel + kernel /xen.gz dom0_mem=400000 + module /vmlinuz-2.6.12-xen0 root=/dev/hda2 ro console=tty0 + module /initrd-2.6.12-xen0.img + module /xen_sample_policy.bin + + ====================end-of file======================================= diff -r a4196568095c -r b53a65034532 docs/src/user.tex --- a/docs/src/user.tex Fri Jul 29 18:52:33 2005 +++ b/docs/src/user.tex Fri Jul 29 20:25:03 2005 @@ -930,12 +930,12 @@ \subsection{Setting memory footprints from dom0} The machine administrator can request that a domain alter its memory -footprint using the \path{xm balloon} command. For instance, we can +footprint using the \path{xm set-mem} command. For instance, we can request that our example ttylinux domain reduce its memory footprint to 32 megabytes. \begin{verbatim} -# xm balloon ttylinux 32 +# xm set-mem ttylinux 32 \end{verbatim} We can now see the result of this in the output of \path{xm list}: @@ -951,16 +951,16 @@ can restore the domain to its original size using the command line: \begin{verbatim} -# xm balloon ttylinux 64 +# xm set-mem ttylinux 64 \end{verbatim} \subsection{Setting memory footprints from within a domain} -The virtual file \path{/proc/xen/memory\_target} allows the owner of a +The virtual file \path{/proc/xen/balloon} allows the owner of a domain to adjust their own memory footprint. Reading the file -(e.g. \path{cat /proc/xen/memory\_target}) prints out the current +(e.g. \path{cat /proc/xen/balloon}) prints out the current memory footprint of the domain. Writing the file -(e.g. \path{echo new\_target > /proc/xen/memory\_target}) requests +(e.g. \path{echo new\_target > /proc/xen/balloon}) requests that the kernel adjust the domain's memory footprint to a new value. \subsection{Setting memory limits} @@ -1285,7 +1285,7 @@ The available commands are as follows: \begin{description} -\item[balloon] Request a domain to adjust its memory footprint. +\item[set-mem] Request a domain to adjust its memory footprint. \item[create] Create a new domain. \item[destroy] Kill a domain immediately. \item[list] List running domains. diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/Kconfig Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Fri Jul 29 20:25:03 2005 @@ -96,6 +96,20 @@ network interfaces within another guest OS. Unless you are building a dedicated device-driver domain, or your master control domain (domain 0), then you almost certainly want to say Y here. + +config XEN_NETDEV_GRANT_TX + bool "Grant table substrate for net drivers tx path (DANGEROUS)" + default n + help + This introduces the use of grant tables as a data exhange mechanism + between the frontend and backend network drivers. + +config XEN_NETDEV_GRANT_RX + bool "Grant table substrate for net drivers rx path (DANGEROUS)" + default n + help + This introduces the use of grant tables as a data exhange mechanism + between the frontend and backend network drivers. config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER bool "Pipelined transmitter (DANGEROUS)" diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Fri Jul 29 20:25:03 2005 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.12-xen0 -# Sat Jul 9 09:19:47 2005 +# Mon Jul 25 09:48:34 2005 # CONFIG_XEN=y CONFIG_ARCH_XEN=y @@ -18,6 +18,8 @@ CONFIG_XEN_NETDEV_BACKEND=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_XEN_NETDEV_GRANT_TX is not set +# CONFIG_XEN_NETDEV_GRANT_RX is not set # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set @@ -176,38 +178,12 @@ # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -CONFIG_DEBUG_KERNEL=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SCHEDSTATS is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_PREEMPT is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set -# CONFIG_DEBUG_KOBJECT is not set -# CONFIG_DEBUG_HIGHMEM is not set -CONFIG_DEBUG_BUGVERBOSE=y -# CONFIG_DEBUG_INFO is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_EARLY_PRINTK=y -# CONFIG_DEBUG_STACKOVERFLOW is not set -# CONFIG_KPROBES is not set -# CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_PAGEALLOC is not set -# CONFIG_4KSTACKS is not set -CONFIG_X86_FIND_SMP_CONFIG=y -CONFIG_X86_MPPARSE=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_X86_BIOS_REBOOT=y CONFIG_PC=y CONFIG_SECCOMP=y +CONFIG_EARLY_PRINTK=y # # Executable file formats @@ -1274,3 +1250,29 @@ CONFIG_CRC32=y CONFIG_LIBCRC32C=y CONFIG_ZLIB_INFLATE=y + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_KPROBES is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_4KSTACKS is not set +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Fri Jul 29 20:25:03 2005 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.12-xenU -# Sun Jul 10 17:32:04 2005 +# Mon Jul 25 10:06:06 2005 # CONFIG_XEN=y CONFIG_ARCH_XEN=y @@ -15,6 +15,8 @@ CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_XEN_NETDEV_GRANT_TX is not set +# CONFIG_XEN_NETDEV_GRANT_RX is not set # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Fri Jul 29 20:25:03 2005 @@ -19,7 +19,7 @@ s-obj-y := obj-y += cpu/ -obj-y += timers/ +#obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ #c-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o c-obj-$(CONFIG_MCA) += mca.o diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Fri Jul 29 20:25:03 2005 @@ -613,8 +613,6 @@ xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug - testl %eax,%eax - jnz restore_all jmp ret_from_exception #if 0 /* XEN */ @@ -669,8 +667,6 @@ xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 - testl %eax,%eax - jnz restore_all jmp ret_from_exception ENTRY(overflow) diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Fri Jul 29 20:25:03 2005 @@ -9,6 +9,9 @@ .ascii ",PAE=yes" #else .ascii ",PAE=no" +#endif +#ifdef CONFIG_XEN_SHADOW_MODE + .ascii ",SHADOW=translate" #endif .ascii ",LOADER=generic" .byte 0 diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c Fri Jul 29 20:25:03 2005 @@ -263,7 +263,6 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; - static int warned; for (irq = 0; irq < NR_IRQS; irq++) { cpumask_t mask; diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Fri Jul 29 20:25:03 2005 @@ -748,8 +748,10 @@ || (mpf->mpf_specification == 4)) ) { smp_found_config = 1; +#ifndef CONFIG_XEN printk(KERN_INFO "found SMP MP-table at %08lx\n", virt_to_phys(mpf)); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); if (mpf->mpf_physptr) { /* * We cannot access to MPC table to compute @@ -766,6 +768,10 @@ size = end - mpf->mpf_physptr; reserve_bootmem(mpf->mpf_physptr, size); } +#else + printk(KERN_INFO "found SMP MP-table at %08lx\n", + ((unsigned long)bp - (unsigned long)isa_bus_to_virt(base)) + base); +#endif mpf_found = mpf; return 1; @@ -809,9 +815,11 @@ * MP1.4 SPEC states to only scan first 1K of 4K EBDA. */ +#ifndef CONFIG_XEN address = get_bios_ebda(); if (address) smp_scan_config(address, 0x400); +#endif } /* -------------------------------------------------------------------------- diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Jul 29 20:25:03 2005 @@ -1060,6 +1060,7 @@ } } +#ifndef CONFIG_XEN /* * workaround for Dell systems that neglect to reserve EBDA */ @@ -1070,6 +1071,7 @@ if (addr) reserve_bootmem(addr, PAGE_SIZE); } +#endif #ifndef CONFIG_DISCONTIGMEM void __init setup_bootmem_allocator(void); @@ -1152,6 +1154,13 @@ reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); +#ifndef CONFIG_XEN + /* + * reserve physical page 0 - it's a special BIOS page on many boxes, + * enabling clean reboots, SMP operation, laptop functions. + */ + reserve_bootmem(0, PAGE_SIZE); + /* reserve EBDA region, it's a 4K region */ reserve_ebda_region(); @@ -1176,6 +1185,7 @@ */ acpi_reserve_bootmem(); #endif +#endif /* !CONFIG_XEN */ #ifdef CONFIG_BLK_DEV_INITRD if (xen_start_info.mod_start) { diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Fri Jul 29 20:25:03 2005 @@ -104,24 +104,16 @@ struct timer_opts *cur_timer = &timer_tsc; /* These are peridically updated in shared_info, and then copied here. */ -u32 shadow_tsc_stamp; -u64 shadow_system_time; -static u32 shadow_time_version; +struct shadow_time_info { + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_timestamp; /* Time, in nanosecs, since boot. */ + u32 tsc_to_nsec_mul; + u32 tsc_to_usec_mul; + int tsc_shift; + u32 version; +}; +static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); static struct timeval shadow_tv; - -/* - * We use this to ensure that gettimeofday() is monotonically increasing. We - * only break this guarantee if the wall clock jumps backwards "a long way". - */ -static struct timeval last_seen_tv = {0,0}; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -/* Periodically propagate synchronised time base to the RTC and to Xen. */ -static long last_rtc_update, last_update_to_xen; -#endif - -/* Periodically take synchronised time base from Xen, if we need it. */ -static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ /* Keep track of last time we did processing/updating of jiffies and xtime. */ static u64 processed_system_time; /* System time (ns) at last processing. */ @@ -164,26 +156,149 @@ #define INDEPENDENT_WALLCLOCK() \ (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN)) +int tsc_disable __initdata = 0; + +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +struct timer_opts timer_tsc = { + .name = "tsc", + .delay = delay_tsc, +}; + +static inline u32 down_shift(u64 time, int shift) +{ + if ( shift < 0 ) + return (u32)(time >> -shift); + return (u32)((u32)time << shift); +} + +/* + * 32-bit multiplication of integer multiplicand and fractional multiplier + * yielding 32-bit integer product. + */ +static inline u32 mul_frac(u32 multiplicand, u32 multiplier) +{ + u32 product_int, product_frac; + __asm__ ( + "mul %3" + : "=a" (product_frac), "=d" (product_int) + : "0" (multiplicand), "r" (multiplier) ); + return product_int; +} + +void init_cpu_khz(void) +{ + u64 __cpu_khz = 1000000ULL << 32; + struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0]; + do_div(__cpu_khz, info->tsc_to_system_mul); + cpu_khz = down_shift(__cpu_khz, -info->tsc_shift); + printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); +} + +static u64 get_nsec_offset(struct shadow_time_info *shadow) +{ + u64 now; + u32 delta; + rdtscll(now); + delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift); + return mul_frac(delta, shadow->tsc_to_nsec_mul); +} + +static unsigned long get_usec_offset(struct shadow_time_info *shadow) +{ + u64 now; + u32 delta; + rdtscll(now); + delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift); + return mul_frac(delta, shadow->tsc_to_usec_mul); +} + +static void update_wallclock(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + long wtm_nsec, xtime_nsec; + time_t wtm_sec, xtime_sec; + u64 tmp, usec; + + shadow_tv.tv_sec = s->wc_sec; + shadow_tv.tv_usec = s->wc_usec; + + if (INDEPENDENT_WALLCLOCK()) + return; + + if ((time_status & STA_UNSYNC) != 0) + return; + + /* Adjust wall-clock time base based on wall_jiffies ticks. */ + usec = processed_system_time; + do_div(usec, 1000); + usec += (u64)shadow_tv.tv_sec * 1000000ULL; + usec += (u64)shadow_tv.tv_usec; + usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ); + + /* Split wallclock base into seconds and nanoseconds. */ + tmp = usec; + xtime_nsec = do_div(tmp, 1000000) * 1000ULL; + xtime_sec = (time_t)tmp; + + wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec); + wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec); + + set_normalized_timespec(&xtime, xtime_sec, xtime_nsec); + set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); +} + /* * Reads a consistent set of time-base values from Xen, into a shadow data * area. Must be called with the xtime_lock held for writing. */ static void __get_time_values_from_xen(void) { - shared_info_t *s = HYPERVISOR_shared_info; + shared_info_t *s = HYPERVISOR_shared_info; + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &s->vcpu_time[smp_processor_id()]; + dst = &per_cpu(shadow_time, smp_processor_id()); do { - shadow_time_version = s->time_version2; + dst->version = src->time_version2; rmb(); - shadow_tv.tv_sec = s->wc_sec; - shadow_tv.tv_usec = s->wc_usec; - shadow_tsc_stamp = (u32)s->tsc_timestamp; - shadow_system_time = s->system_time; + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; rmb(); } - while (shadow_time_version != s->time_version1); - - cur_timer->mark_offset(); + while (dst->version != src->time_version1); + + dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; + + if ((shadow_tv.tv_sec != s->wc_sec) || + (shadow_tv.tv_usec != s->wc_usec)) + update_wallclock(); +} + +static inline int time_values_up_to_date(int cpu) +{ + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()]; + dst = &per_cpu(shadow_time, smp_processor_id()); + + return (dst->version == src->time_version2); } #define TIME_VALUES_UP_TO_DATE \ @@ -229,13 +344,18 @@ unsigned long max_ntp_tick; unsigned long flags; s64 nsec; + unsigned int cpu; + struct shadow_time_info *shadow; + + cpu = get_cpu(); + shadow = &per_cpu(shadow_time, cpu); do { unsigned long lost; seq = read_seqbegin(&xtime_lock); - usec = cur_timer->get_offset(); + usec = get_usec_offset(shadow); lost = jiffies - wall_jiffies; /* @@ -256,11 +376,11 @@ sec = xtime.tv_sec; usec += (xtime.tv_nsec / NSEC_PER_USEC); - nsec = shadow_system_time - processed_system_time; + nsec = shadow->system_timestamp - processed_system_time; __normalize_time(&sec, &nsec); usec += (long)nsec / NSEC_PER_USEC; - if (unlikely(!TIME_VALUES_UP_TO_DATE)) { + if (unlikely(!time_values_up_to_date(cpu))) { /* * We may have blocked for a long time, * rendering our calculations invalid @@ -275,19 +395,11 @@ } } while (read_seqretry(&xtime_lock, seq)); + put_cpu(); + while (usec >= USEC_PER_SEC) { usec -= USEC_PER_SEC; sec++; - } - - /* Ensure that time-of-day is monotonically increasing. */ - if ((sec < last_seen_tv.tv_sec) || - ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) { - sec = last_seen_tv.tv_sec; - usec = last_seen_tv.tv_usec; - } else { - last_seen_tv.tv_sec = sec; - last_seen_tv.tv_usec = usec; } tv->tv_sec = sec; @@ -302,12 +414,17 @@ long wtm_nsec; s64 nsec; struct timespec xentime; + unsigned int cpu; + struct shadow_time_info *shadow; if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; if (!INDEPENDENT_WALLCLOCK()) return 0; /* Silent failure? */ + + cpu = get_cpu(); + shadow = &per_cpu(shadow_time, cpu); write_seqlock_irq(&xtime_lock); @@ -317,9 +434,8 @@ * be stale, so we can retry with fresh ones. */ again: - nsec = (s64)tv->tv_nsec - - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC); - if (unlikely(!TIME_VALUES_UP_TO_DATE)) { + nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow); + if (unlikely(!time_values_up_to_date(cpu))) { __get_time_values_from_xen(); goto again; } @@ -335,7 +451,7 @@ */ nsec -= (jiffies - wall_jiffies) * TICK_NSEC; - nsec -= (shadow_system_time - processed_system_time); + nsec -= (shadow->system_timestamp - processed_system_time); __normalize_time(&sec, &nsec); wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); @@ -349,23 +465,20 @@ time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - /* Reset all our running time counts. They make no sense now. */ - last_seen_tv.tv_sec = 0; - last_update_from_xen = 0; - #ifdef CONFIG_XEN_PRIVILEGED_GUEST if (xen_start_info.flags & SIF_INITDOMAIN) { dom0_op_t op; - last_rtc_update = last_update_to_xen = 0; op.cmd = DOM0_SETTIME; op.u.settime.secs = xentime.tv_sec; op.u.settime.usecs = xentime.tv_nsec / NSEC_PER_USEC; - op.u.settime.system_time = shadow_system_time; + op.u.settime.system_time = shadow->system_timestamp; write_sequnlock_irq(&xtime_lock); HYPERVISOR_dom0_op(&op); } else #endif write_sequnlock_irq(&xtime_lock); + + put_cpu(); clock_was_set(); return 0; @@ -403,9 +516,30 @@ */ unsigned long long monotonic_clock(void) { - return cur_timer->monotonic_clock(); + int cpu = get_cpu(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + s64 off; + unsigned long flags; + + for ( ; ; ) { + off = get_nsec_offset(shadow); + if (time_values_up_to_date(cpu)) + break; + write_seqlock_irqsave(&xtime_lock, flags); + __get_time_values_from_xen(); + write_sequnlock_irqrestore(&xtime_lock, flags); + } + + put_cpu(); + + return shadow->system_timestamp + off; } EXPORT_SYMBOL(monotonic_clock); + +unsigned long long sched_clock(void) +{ + return monotonic_clock(); +} #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) unsigned long profile_pc(struct pt_regs *regs) @@ -427,27 +561,26 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - time_t wtm_sec, sec; - s64 delta, delta_cpu, nsec; - long sec_diff, wtm_nsec; + s64 delta, delta_cpu; int cpu = smp_processor_id(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); do { __get_time_values_from_xen(); - delta = delta_cpu = (s64)shadow_system_time + - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC); + delta = delta_cpu = + shadow->system_timestamp + get_nsec_offset(shadow); delta -= processed_system_time; delta_cpu -= per_cpu(processed_system_time, cpu); } - while (!TIME_VALUES_UP_TO_DATE); + while (!time_values_up_to_date(cpu)); if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) { printk("Timer ISR/%d: Time went backwards: " "delta=%lld cpu_delta=%lld shadow=%lld " "off=%lld processed=%lld cpu_processed=%lld\n", - cpu, delta, delta_cpu, shadow_system_time, - ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), + cpu, delta, delta_cpu, shadow->system_timestamp, + (s64)get_nsec_offset(shadow), processed_system_time, per_cpu(processed_system_time, cpu)); for (cpu = 0; cpu < num_online_cpus(); cpu++) @@ -470,76 +603,6 @@ update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING, regs); } - - if (cpu != 0) - return; - - /* - * Take synchronised time from Xen once a minute if we're not - * synchronised ourselves, and we haven't chosen to keep an independent - * time base. - */ - if (!INDEPENDENT_WALLCLOCK() && - ((time_status & STA_UNSYNC) != 0) && - (xtime.tv_sec > (last_update_from_xen + 60))) { - /* Adjust shadow for jiffies that haven't updated xtime yet. */ - shadow_tv.tv_usec -= - (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ); - HANDLE_USEC_UNDERFLOW(shadow_tv); - - /* - * Reset our running time counts if they are invalidated by - * a warp backwards of more than 500ms. - */ - sec_diff = xtime.tv_sec - shadow_tv.tv_sec; - if (unlikely(abs(sec_diff) > 1) || - unlikely(((sec_diff * USEC_PER_SEC) + - (xtime.tv_nsec / NSEC_PER_USEC) - - shadow_tv.tv_usec) > 500000)) { -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - last_rtc_update = last_update_to_xen = 0; -#endif - last_seen_tv.tv_sec = 0; - } - - /* Update our unsynchronised xtime appropriately. */ - sec = shadow_tv.tv_sec; - nsec = shadow_tv.tv_usec * NSEC_PER_USEC; - - __normalize_time(&sec, &nsec); - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - last_update_from_xen = sec; - } - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (!(xen_start_info.flags & SIF_INITDOMAIN)) - return; - - /* Send synchronised time to Xen approximately every minute. */ - if (((time_status & STA_UNSYNC) == 0) && - (xtime.tv_sec > (last_update_to_xen + 60))) { - dom0_op_t op; - struct timeval tv; - - tv.tv_sec = xtime.tv_sec; - tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; - tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ); - HANDLE_USEC_OVERFLOW(tv); - - op.cmd = DOM0_SETTIME; - op.u.settime.secs = tv.tv_sec; - op.u.settime.usecs = tv.tv_usec; - op.u.settime.system_time = shadow_system_time; - HYPERVISOR_dom0_op(&op); - - last_update_to_xen = xtime.tv_sec; - } -#endif } /* @@ -731,12 +794,10 @@ xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - processed_system_time = shadow_system_time; + processed_system_time = per_cpu(shadow_time, 0).system_timestamp; per_cpu(processed_system_time, 0) = processed_system_time; - if (timer_tsc_init.init(NULL) != 0) - BUG(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); + init_cpu_khz(); #if defined(__x86_64__) vxtime.mode = VXTIME_TSC; @@ -807,21 +868,15 @@ /* No locking required. We are only CPU running, and interrupts are off. */ void time_resume(void) { - if (timer_tsc_init.init(NULL) != 0) - BUG(); + init_cpu_khz(); /* Get timebases for new environment. */ __get_time_values_from_xen(); /* Reset our own concept of passage of system time. */ - processed_system_time = shadow_system_time; + processed_system_time = + per_cpu(shadow_time, smp_processor_id()).system_timestamp; per_cpu(processed_system_time, 0) = processed_system_time; - - /* Accept a warp in UTC (wall-clock) time. */ - last_seen_tv.tv_sec = 0; - - /* Make sure we resync UTC time with Xen on next timer interrupt. */ - last_update_from_xen = 0; } #ifdef CONFIG_SMP @@ -832,7 +887,8 @@ do { seq = read_seqbegin(&xtime_lock); - per_cpu(processed_system_time, cpu) = shadow_system_time; + per_cpu(processed_system_time, cpu) = + per_cpu(shadow_time, cpu).system_timestamp; } while (read_seqretry(&xtime_lock, seq)); per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER); @@ -861,3 +917,13 @@ return 0; } __initcall(xen_sysctl_init); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Fri Jul 29 20:25:03 2005 @@ -90,7 +90,9 @@ static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +DEFINE_SPINLOCK(die_notifier_lock); +EXPORT_SYMBOL(die_notifier_lock); +EXPORT_SYMBOL(i386die_chain); int register_die_notifier(struct notifier_block *nb) { diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Fri Jul 29 20:25:03 2005 @@ -296,7 +296,7 @@ /* 2. Get a new contiguous memory extent. */ BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_increase_reservation, &mfn, 1, order) != 1); + MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1); /* 3. Map the new extent in place of old pages. */ for (i = 0; i < (1<<order); i++) { diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c Fri Jul 29 20:25:03 2005 @@ -20,8 +20,6 @@ #include <linux/acpi.h> #include "pci.h" - -#define DBG printk #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) #define PIRQ_VERSION 0x0100 diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Fri Jul 29 20:25:03 2005 @@ -166,8 +166,14 @@ u16 flags; flags = shared[ref].flags; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + /* + * But can't flags == (GTF_accept_transfer | GTF_transfer_completed) + * if gnttab_donate executes without interruption??? + */ +#else ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed)); - +#endif /* * If a transfer is committed then wait for the frame address to appear. * Otherwise invalidate the grant entry against future use. diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/kernel/reboot.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Jul 29 20:25:03 2005 @@ -172,8 +172,8 @@ { static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - static char *restart_argv[] = { "/sbin/shutdown", "-r", "now", NULL }; - static char *poweroff_argv[] = { "/sbin/halt", "-p", NULL }; + static char *restart_argv[] = { "/sbin/reboot", NULL }; + static char *poweroff_argv[] = { "/sbin/poweroff", NULL }; extern asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void *arg); @@ -187,7 +187,7 @@ switch ( shutting_down ) { case CMSG_SHUTDOWN_POWEROFF: - if ( execve("/sbin/halt", poweroff_argv, envp) < 0 ) + if ( execve("/sbin/poweroff", poweroff_argv, envp) < 0 ) { sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, @@ -197,7 +197,7 @@ break; case CMSG_SHUTDOWN_REBOOT: - if ( execve("/sbin/shutdown", restart_argv, envp) < 0 ) + if ( execve("/sbin/reboot", restart_argv, envp) < 0 ) { sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Jul 29 20:25:03 2005 @@ -15,7 +15,7 @@ ptrace.o quirks.o syscall.o bootflag.o i386-obj-y := time.o -obj-y += ../../i386/kernel/timers/ +#obj-y += ../../i386/kernel/timers/ s-obj-y := diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Jul 29 20:25:03 2005 @@ -145,7 +145,7 @@ __pte(0), 0); } - mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; + mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); #endif @@ -550,7 +550,7 @@ 0, blkif->domid); #ifdef CONFIG_XEN_BLKDEV_TAP_BE if ( blkif->is_blktap ) - mcl[i].args[3] = ID_TO_DOM(req->id); + mcl[i].args[MULTI_UVMDOMID_INDEX] = ID_TO_DOM(req->id); #endif phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT); diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Jul 29 20:25:03 2005 @@ -1428,8 +1428,9 @@ int i; #ifdef CONFIG_XEN_BLKDEV_GRANT - if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS, - &gref_head, &gref_terminal )) + /* A grant for every ring slot, plus one for the ring itself. */ + if ( 0 > gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1, + &gref_head, &gref_terminal) ) return 1; printk(KERN_ALERT "Blkif frontend is using grant tables.\n"); #endif diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Jul 29 20:25:03 2005 @@ -50,6 +50,9 @@ /* Private indexes into shared ring. */ NETIF_RING_IDX rx_req_cons; NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */ +#endif NETIF_RING_IDX tx_req_cons; NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */ diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jul 29 20:25:03 2005 @@ -18,6 +18,24 @@ #include <linux/delay.h> #endif +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#include <asm-xen/xen-public/grant_table.h> +#include <asm-xen/gnttab.h> +#ifdef GRANT_DEBUG +static void +dump_packet(int tag, u32 addr, unsigned char *p) +{ + int i; + + printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr); + for (i = 0; i < 20; i++) { + printk("%02x", p[i]); + } + printk("\n"); +} +#endif +#endif + static void netif_idx_release(u16 pending_idx); static void netif_page_release(struct page *page); static void make_tx_response(netif_t *netif, @@ -41,7 +59,9 @@ static struct sk_buff_head rx_queue; static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1]; static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE]; +#ifndef CONFIG_XEN_NETDEV_GRANT_RX static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE]; +#endif static unsigned char rx_notify[NR_EVENT_CHANNELS]; /* Don't currently gate addition of an interface to the tx scheduling list. */ @@ -68,7 +88,20 @@ static PEND_RING_IDX dealloc_prod, dealloc_cons; static struct sk_buff_head tx_queue; + +#ifdef CONFIG_XEN_NETDEV_GRANT_TX +static u16 grant_tx_ref[MAX_PENDING_REQS]; +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX +static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS]; +#endif +#ifndef CONFIG_XEN_NETDEV_GRANT_TX static multicall_entry_t tx_mcl[MAX_PENDING_REQS]; +#endif + +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#define GRANT_INVALID_REF (0xFFFF) +#endif static struct list_head net_schedule_list; static spinlock_t net_schedule_list_lock; @@ -91,6 +124,7 @@ return mfn; } +#ifndef CONFIG_XEN_NETDEV_GRANT_RX static void free_mfn(unsigned long mfn) { unsigned long flags; @@ -102,6 +136,7 @@ BUG(); spin_unlock_irqrestore(&mfn_lock, flags); } +#endif static inline void maybe_schedule_tx_action(void) { @@ -160,7 +195,17 @@ dev_kfree_skb(skb); skb = nskb; } - +#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef DEBUG_GRANT + printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n", + netif->rx->req_prod, + netif->rx_req_cons, + netif->rx->ring[ + MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id, + netif->rx->ring[ + MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref); +#endif +#endif netif->rx_req_cons++; netif_get(netif); @@ -201,7 +246,11 @@ u16 size, id, evtchn; multicall_entry_t *mcl; mmu_update_t *mmu; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gnttab_donate_t *gop; +#else struct mmuext_op *mmuext; +#endif unsigned long vdata, mdata, new_mfn; struct sk_buff_head rxq; struct sk_buff *skb; @@ -212,7 +261,12 @@ mcl = rx_mcl; mmu = rx_mmu; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gop = grant_rx_op; +#else mmuext = rx_mmuext; +#endif + while ( (skb = skb_dequeue(&rx_queue)) != NULL ) { netif = netdev_priv(skb->dev); @@ -228,7 +282,6 @@ skb_queue_head(&rx_queue, skb); break; } - /* * Set the new P2M table entry before reassigning the old data page. * Heed the comment in pgtable-2level.h:pte_page(). :-) @@ -239,6 +292,14 @@ pfn_pte_ma(new_mfn, PAGE_KERNEL), 0); mcl++; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gop->mfn = mdata >> PAGE_SHIFT; + gop->domid = netif->domid; + gop->handle = netif->rx->ring[ + MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref; + netif->rx_resp_prod_copy++; + gop++; +#else mcl->op = __HYPERVISOR_mmuext_op; mcl->args[0] = (unsigned long)mmuext; mcl->args[1] = 1; @@ -249,13 +310,16 @@ mmuext->cmd = MMUEXT_REASSIGN_PAGE; mmuext->mfn = mdata >> PAGE_SHIFT; mmuext++; - +#endif mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = __pa(vdata) >> PAGE_SHIFT; mmu++; __skb_queue_tail(&rxq, skb); +#ifdef DEBUG_GRANT + dump_packet('a', mdata, vdata); +#endif /* Filled the batch queue? */ if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) ) break; @@ -271,12 +335,24 @@ mcl->args[3] = DOMID_SELF; mcl++; - mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; +#else + mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; +#endif if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) ) BUG(); mcl = rx_mcl; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate, + grant_rx_op, gop - grant_rx_op))) { + BUG(); + } + gop = grant_rx_op; +#else mmuext = rx_mmuext; +#endif while ( (skb = __skb_dequeue(&rxq)) != NULL ) { netif = netdev_priv(skb->dev); @@ -284,9 +360,12 @@ /* Rederive the machine addresses. */ new_mfn = mcl[0].args[1] >> PAGE_SHIFT; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mdata = (unsigned long)skb->data & ~PAGE_MASK; +#else mdata = ((mmuext[0].mfn << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK)); - +#endif atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->frag_list = NULL; @@ -299,13 +378,16 @@ /* Check the reassignment error code. */ status = NETIF_RSP_OKAY; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + BUG_ON(gop->status != 0); +#else if ( unlikely(mcl[1].result != 0) ) { DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid); free_mfn(mdata >> PAGE_SHIFT); status = NETIF_RSP_ERROR; } - +#endif evtchn = netif->evtchn; id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id; if ( make_rx_response(netif, id, status, mdata, @@ -318,9 +400,13 @@ netif_put(netif); dev_kfree_skb(skb); - +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mcl++; + gop++; +#else mcl += 2; mmuext += 1; +#endif } while ( notify_nr != 0 ) @@ -404,21 +490,39 @@ netif_schedule_work(netif); } -static void net_tx_action(unsigned long unused) -{ - struct list_head *ent; - struct sk_buff *skb; +inline static void net_tx_action_dealloc(void) +{ +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gnttab_unmap_grant_ref_t unmap_ops[MAX_PENDING_REQS]; + gnttab_unmap_grant_ref_t *gop; +#else + multicall_entry_t *mcl; +#endif + u16 pending_idx; + PEND_RING_IDX dc, dp; netif_t *netif; - netif_tx_request_t txreq; - u16 pending_idx; - NETIF_RING_IDX i; - multicall_entry_t *mcl; - PEND_RING_IDX dc, dp; - unsigned int data_len; - - if ( (dc = dealloc_cons) == (dp = dealloc_prod) ) - goto skip_dealloc; - + + dc = dealloc_cons; + dp = dealloc_prod; + +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + /* + * Free up any grants we have finished using + */ + gop = unmap_ops; + while (dc != dp) { + pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; + gop->host_virt_addr = MMAP_VADDR(pending_idx); + gop->dev_bus_addr = 0; + gop->handle = grant_tx_ref[pending_idx]; + grant_tx_ref[pending_idx] = GRANT_INVALID_REF; + gop++; + } + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap_ops, gop - unmap_ops))) { + BUG(); + } +#else mcl = tx_mcl; while ( dc != dp ) { @@ -428,15 +532,18 @@ mcl++; } - mcl[-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; + mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) ) BUG(); mcl = tx_mcl; +#endif while ( dealloc_cons != dp ) { +#ifndef CONFIG_XEN_NETDEV_GRANT_TX /* The update_va_mapping() must not fail. */ BUG_ON(mcl[0].result != 0); +#endif pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; @@ -460,11 +567,38 @@ netif_put(netif); +#ifndef CONFIG_XEN_NETDEV_GRANT_TX mcl++; - } - - skip_dealloc: +#endif + } + +} + +/* Called after netfront has transmitted */ +static void net_tx_action(unsigned long unused) +{ + struct list_head *ent; + struct sk_buff *skb; + netif_t *netif; + netif_tx_request_t txreq; + u16 pending_idx; + NETIF_RING_IDX i; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gnttab_map_grant_ref_t map_ops[MAX_PENDING_REQS]; + gnttab_map_grant_ref_t *mop; +#else + multicall_entry_t *mcl; +#endif + unsigned int data_len; + + if ( dealloc_cons != dealloc_prod ) + net_tx_action_dealloc(); + +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + mop = map_ops; +#else mcl = tx_mcl; +#endif while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && !list_empty(&net_schedule_list) ) { @@ -486,7 +620,6 @@ rmb(); /* Ensure that we see the request before we copy it. */ memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, sizeof(txreq)); - /* Credit-based scheduling. */ if ( txreq.size > netif->remaining_credit ) { @@ -566,12 +699,20 @@ /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, 16); - +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + mop->host_virt_addr = MMAP_VADDR(pending_idx); + mop->dom = netif->domid; + mop->ref = txreq.addr >> PAGE_SHIFT; + mop->flags = GNTMAP_host_map | GNTMAP_readonly; + mop++; +#else MULTI_update_va_mapping_otherdomain( mcl, MMAP_VADDR(pending_idx), pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL), 0, netif->domid); + mcl++; +#endif memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); pending_tx_info[pending_idx].netif = netif; @@ -581,11 +722,26 @@ pending_cons++; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if ((mop - map_ops) >= ARRAY_SIZE(map_ops)) + break; +#else /* Filled the batch queue? */ if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) ) break; - } - +#endif + } + +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (mop == map_ops) { + return; + } + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + map_ops, mop - map_ops))) { + BUG(); + } + mop = map_ops; +#else if ( mcl == tx_mcl ) return; @@ -593,6 +749,7 @@ BUG(); mcl = tx_mcl; +#endif while ( (skb = __skb_dequeue(&tx_queue)) != NULL ) { pending_idx = *((u16 *)skb->data); @@ -600,6 +757,20 @@ memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); /* Check the remap error code. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (unlikely(mop->dev_bus_addr == 0)) { + printk(KERN_ALERT "#### netback grant fails\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + kfree_skb(skb); + mop++; + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + continue; + } + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = + FOREIGN_FRAME(mop->dev_bus_addr); + grant_tx_ref[pending_idx] = mop->handle; +#else if ( unlikely(mcl[0].result != 0) ) { DPRINTK("Bad page frame\n"); @@ -613,6 +784,7 @@ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT); +#endif data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size; @@ -620,7 +792,6 @@ memcpy(skb->data, (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)), data_len); - if ( data_len < txreq.size ) { /* Append the packet payload as a fragment. */ @@ -654,7 +825,11 @@ netif_rx(skb); netif->dev->last_rx = jiffies; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + mop++; +#else mcl++; +#endif } } @@ -774,6 +949,12 @@ return 0; printk("Initialising Xen netif backend\n"); +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + printk("#### netback tx using grant tables\n"); +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + printk("#### netback rx using grant tables\n"); +#endif /* We can increase reservation by this much in net_rx_action(). */ balloon_update_driver_allowance(NETIF_RX_RING_SIZE); diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Jul 29 20:25:03 2005 @@ -54,6 +54,25 @@ #include <asm/page.h> #include <asm/uaccess.h> +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#include <asm-xen/xen-public/grant_table.h> +#include <asm-xen/gnttab.h> +#ifdef GRANT_DEBUG +static void +dump_packet(int tag, u32 addr, u32 ap) +{ + unsigned char *p = (unsigned char *)ap; + int i; + + printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr); + for (i = 0; i < 20; i++) { + printk("%02x", p[i]); + } + printk("\n"); +} +#endif +#endif + #ifndef __GFP_NOWARN #define __GFP_NOWARN 0 #endif @@ -82,6 +101,21 @@ #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */ #endif +#ifdef CONFIG_XEN_NETDEV_GRANT_TX +static grant_ref_t gref_tx_head, gref_tx_terminal; +static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; +#endif + +#ifdef CONFIG_XEN_NETDEV_GRANT_RX +static grant_ref_t gref_rx_head, gref_rx_terminal; +static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1]; +#endif + +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +static domid_t rdomid = 0; +#define GRANT_INVALID_REF (0xFFFF) +#endif + static void network_tx_buf_gc(struct net_device *dev); static void network_alloc_rx_buffers(struct net_device *dev); @@ -322,6 +356,14 @@ for (i = np->tx_resp_cons; i != prod; i++) { id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id; skb = np->tx_skbs[id]; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) { + printk(KERN_ALERT "netfront: query foreign access\n"); + } + gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly); + gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]); + grant_tx_ref[id] = GRANT_INVALID_REF; +#endif ADD_ID_TO_FREELIST(np->tx_skbs, id); dev_kfree_skb_irq(skb); } @@ -356,6 +398,9 @@ struct sk_buff *skb; int i, batch_target; NETIF_RING_IDX req_prod = np->rx->req_prod; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + int ref; +#endif if (unlikely(np->backend_state != BEST_CONNECTED)) return; @@ -388,7 +433,16 @@ np->rx_skbs[id] = skb; np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id; - +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if ((ref = gnttab_claim_grant_reference(&gref_rx_head, gref_rx_terminal)) < 0) { + printk(KERN_ALERT "#### netfront can't claim rx reference\n"); + BUG(); + } + grant_rx_ref[id] = ref; + gnttab_grant_foreign_transfer_ref(ref, rdomid, + virt_to_machine(skb->head) >> PAGE_SHIFT); + np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref; +#endif rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT; /* Remove this page from pseudo phys map before passing back to Xen. */ @@ -400,7 +454,7 @@ } /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; + rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; /* Give away a batch of pages. */ rx_mcl[i].op = __HYPERVISOR_dom_mem_op; @@ -436,6 +490,10 @@ struct net_private *np = netdev_priv(dev); netif_tx_request_t *tx; NETIF_RING_IDX i; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + unsigned int ref; + unsigned long mfn; +#endif if (unlikely(np->tx_full)) { printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); @@ -470,7 +528,18 @@ tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req; tx->id = id; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) < 0) { + printk(KERN_ALERT "#### netfront can't claim tx grant reference\n"); + BUG(); + } + mfn = virt_to_machine(skb->data) >> PAGE_SHIFT; + gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly); + tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK); + grant_tx_ref[id] = ref; +#else tx->addr = virt_to_machine(skb->data); +#endif tx->size = skb->len; tx->csum_blank = (skb->ip_summed == CHECKSUM_HW); @@ -530,6 +599,10 @@ int work_done, budget, more_to_do = 1; struct sk_buff_head rxq; unsigned long flags; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + unsigned long mfn; + grant_ref_t ref; +#endif spin_lock(&np->rx_lock); @@ -542,7 +615,6 @@ if ((budget = *pbudget) > dev->quota) budget = dev->quota; - rp = np->rx->resp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ @@ -550,7 +622,6 @@ (i != rp) && (work_done < budget); i++, work_done++) { rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp; - /* * An error here is very odd. Usually indicates a backend bug, * low-memory condition, or that we didn't have reservation headroom. @@ -565,11 +636,23 @@ continue; } +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + ref = grant_rx_ref[rx->id]; + grant_rx_ref[rx->id] = GRANT_INVALID_REF; + + mfn = gnttab_end_foreign_transfer(ref); + gnttab_release_grant_reference(&gref_rx_head, ref); +#endif + skb = np->rx_skbs[rx->id]; ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); /* NB. We handle skb overflow later. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + skb->data = skb->head + rx->addr; +#else skb->data = skb->head + (rx->addr & ~PAGE_MASK); +#endif skb->len = rx->status; skb->tail = skb->data + skb->len; @@ -580,16 +663,32 @@ np->stats.rx_bytes += rx->status; /* Remap the page. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE; +#else mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE; +#endif mmu->val = __pa(skb->head) >> PAGE_SHIFT; mmu++; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + MULTI_update_va_mapping(mcl, (unsigned long)skb->head, + pfn_pte_ma(mfn, PAGE_KERNEL), 0); +#else MULTI_update_va_mapping(mcl, (unsigned long)skb->head, pfn_pte_ma(rx->addr >> PAGE_SHIFT, PAGE_KERNEL), 0); +#endif mcl++; phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mfn; +#else rx->addr >> PAGE_SHIFT; - +#endif +#ifdef GRANT_DEBUG + printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x ref=%04x\n", + skb->data, mfn, ref); +#endif __skb_queue_tail(&rxq, skb); } @@ -608,6 +707,11 @@ } while ((skb = __skb_dequeue(&rxq)) != NULL) { +#ifdef GRANT_DEBUG + printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n", + skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT); + dump_packet('d', skb->data, (unsigned long)skb->data); +#endif /* * Enough room in skbuff for the data we were passed? Also, Linux * expects at least 16 bytes headroom in each receive buffer. @@ -615,6 +719,7 @@ if (unlikely(skb->tail > skb->end) || unlikely((skb->data - skb->head) < 16)) { nskb = NULL; + /* Only copy the packet if it fits in the current MTU. */ if (skb->len <= (dev->mtu + ETH_HLEN)) { @@ -646,7 +751,6 @@ /* Set the shared-info area, which is hidden behind the real data. */ init_skb_shinfo(skb); - /* Ethernet-specific work. Delayed to here as it peeks the header. */ skb->protocol = eth_type_trans(skb, dev); @@ -919,6 +1023,9 @@ network_connect(dev, status); np->evtchn = status->evtchn; np->irq = bind_evtchn_to_irq(np->evtchn); +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) + rdomid = status->domid; +#endif (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev); netctrl_connected_count(); (void)send_fake_arp(dev); @@ -962,10 +1069,18 @@ np->rx_max_target = RX_MAX_TARGET; /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for (i = 0; i <= NETIF_TX_RING_SIZE; i++) + for (i = 0; i <= NETIF_TX_RING_SIZE; i++) { np->tx_skbs[i] = (void *)((unsigned long) i+1); - for (i = 0; i <= NETIF_RX_RING_SIZE; i++) +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + grant_tx_ref[i] = GRANT_INVALID_REF; +#endif + } + for (i = 0; i <= NETIF_RX_RING_SIZE; i++) { np->rx_skbs[i] = (void *)((unsigned long) i+1); +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + grant_rx_ref[i] = GRANT_INVALID_REF; +#endif + } dev->open = network_open; dev->hard_start_xmit = network_start_xmit; @@ -1267,6 +1382,22 @@ if (xen_start_info.flags & SIF_INITDOMAIN) return 0; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE, + &gref_tx_head, &gref_tx_terminal) < 0) { + printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); + return 1; + } + printk(KERN_ALERT "#### netfront tx using grant tables\n"); +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE, + &gref_rx_head, &gref_rx_terminal) < 0) { + printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); + return 1; + } + printk(KERN_ALERT "#### netfront rx using grant tables\n"); +#endif if ((err = xennet_proc_init()) != 0) return err; @@ -1284,6 +1415,16 @@ DPRINTK("< err=%d\n", err); return err; +} + +static void netif_exit(void) +{ +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head); +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head); +#endif } static void vif_suspend(struct net_private *np) @@ -1478,3 +1619,4 @@ #endif module_init(netif_init); +module_exit(netif_exit); diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Jul 29 20:25:03 2005 @@ -200,27 +200,29 @@ case IOCTL_PRIVCMD_INITDOMAIN_STORE: { extern int do_xenbus_probe(void*); + unsigned long page; if (xen_start_info.store_evtchn != 0) { - ret = -EINVAL; + ret = xen_start_info.store_mfn; break; } /* Allocate page. */ - xen_start_info.store_page = get_zeroed_page(GFP_KERNEL); - if (!xen_start_info.store_page) { + page = get_zeroed_page(GFP_KERNEL); + if (!page) { ret = -ENOMEM; break; } /* We don't refcnt properly, so set reserved on page. * (this allocation is permanent) */ - SetPageReserved(virt_to_page(xen_start_info.store_page)); + SetPageReserved(virt_to_page(page)); /* Initial connect. Setup channel and page. */ xen_start_info.store_evtchn = data; - ret = pfn_to_mfn(virt_to_phys((void *)xen_start_info.store_page) >> - PAGE_SHIFT); + xen_start_info.store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + ret = xen_start_info.store_mfn; /* We'll return then this will wait for daemon to answer */ kthread_run(do_xenbus_probe, NULL, "xenbus_probe"); diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c --- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Fri Jul 29 20:25:03 2005 @@ -193,7 +193,7 @@ __pte(0), 0); } - mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; + mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); } @@ -651,7 +651,7 @@ { MULTI_update_va_mapping_otherdomain( mcl+i, MMAP_VADDR(pending_idx, i), - pfn_pte_ma(buffer_mach >> PAGE_SHIFT, remap_prot), + pfn_pte_ma((buffer_mach + offset) >> PAGE_SHIFT, remap_prot), 0, up->domid); phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri Jul 29 20:25:03 2005 @@ -47,6 +47,17 @@ DECLARE_WAIT_QUEUE_HEAD(xb_waitq); +static inline struct ringbuf_head *outbuf(void) +{ + return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT); +} + +static inline struct ringbuf_head *inbuf(void) +{ + return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT) + + PAGE_SIZE/2; +} + static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) { wake_up(&xb_waitq); @@ -108,9 +119,10 @@ return avail != 0; } -int xb_write(struct ringbuf_head *out, const void *data, unsigned len) +int xb_write(const void *data, unsigned len) { struct ringbuf_head h; + struct ringbuf_head *out = outbuf(); do { void *dst; @@ -141,24 +153,26 @@ return 0; } -int xs_input_avail(struct ringbuf_head *in) +int xs_input_avail(void) { unsigned int avail; + struct ringbuf_head *in = inbuf(); get_input_chunk(in, in->buf, &avail); return avail != 0; } -int xb_read(struct ringbuf_head *in, void *data, unsigned len) +int xb_read(void *data, unsigned len) { struct ringbuf_head h; + struct ringbuf_head *in = inbuf(); int was_full; while (len != 0) { unsigned int avail; const char *src; - wait_event(xb_waitq, xs_input_avail(in)); + wait_event(xb_waitq, xs_input_avail()); h = *in; mb(); if (!check_buffer(&h)) { @@ -182,14 +196,14 @@ } /* If we left something, wake watch thread to deal with it. */ - if (xs_input_avail(in)) + if (xs_input_avail()) wake_up(&xb_waitq); return 0; } /* Set up interrpt handler off store event channel. */ -int xb_init_comms(void **in, void **out) +int xb_init_comms(void) { int err, irq; @@ -202,7 +216,9 @@ return err; } - *out = (void *)xen_start_info.store_page; - *in = (void *)xen_start_info.store_page + PAGE_SIZE/2; + /* FIXME zero out page -- domain builder should probably do this*/ + memset(machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT), + 0, PAGE_SIZE); + return 0; } diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Fri Jul 29 20:25:03 2005 @@ -2,13 +2,12 @@ #ifndef _XENBUS_COMMS_H #define _XENBUS_COMMS_H int xs_init(void); -int xb_init_comms(void **in, void **out); +int xb_init_comms(void); /* Low level routines. */ -struct ringbuf_head; -int xb_write(struct ringbuf_head *out, const void *data, unsigned len); -int xb_read(struct ringbuf_head *in, void *data, unsigned len); -int xs_input_avail(struct ringbuf_head *in); +int xb_write(const void *data, unsigned len); +int xb_read(void *data, unsigned len); +int xs_input_avail(void); extern wait_queue_head_t xb_waitq; #endif /* _XENBUS_COMMS_H */ diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jul 29 20:25:03 2005 @@ -1,6 +1,5 @@ /****************************************************************************** * Talks to Xen Store to figure out what devices we have. - * Currently experiment code, but when I grow up I'll be a bus driver! * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard @@ -26,6 +25,8 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ +#define DEBUG + #include <asm-xen/hypervisor.h> #include <asm-xen/xenbus.h> #include <linux/kernel.h> @@ -36,151 +37,7 @@ #include <stdarg.h> #include "xenbus_comms.h" -/* Name of field containing device type. */ -#define XENBUS_DEVICE_TYPE "type" - -#define DEBUG - -#ifdef DEBUG -#define dprintf(_fmt, _args...) \ -printk(KERN_INFO __stringify(KBUILD_MODNAME) " [DBG] %s" _fmt, __FUNCTION__, ##_args) -#else -#define dprintf(_fmt, _args...) do { } while(0) -#endif - -static int xs_init_done = 0; - -/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ -int xenbus_gather(const char *dir, ...) -{ - va_list ap; - const char *name; - int ret = 0; - - va_start(ap, dir); - while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { - const char *fmt = va_arg(ap, char *); - void *result = va_arg(ap, void *); - char *p; - - p = xenbus_read(dir, name, NULL); - if (IS_ERR(p)) { - ret = PTR_ERR(p); - break; - } - if (sscanf(p, fmt, result) == 0) - ret = -EINVAL; - kfree(p); - } - va_end(ap); - return ret; -} - -/* Return the path to dir with /name appended. - * If name is null or empty returns a copy of dir. - */ -char *xenbus_path(const char *dir, const char *name) -{ - char *ret; - int len; - - len = strlen(dir) + 1; - if (name) - len += strlen(name) + 1; - ret = kmalloc(len, GFP_KERNEL); - if (ret == NULL) - return NULL; - strcpy(ret, dir); - if (name) { - strcat(ret, "/"); - strcat(ret, name); - } - return ret; -} - #define streq(a, b) (strcmp((a), (b)) == 0) - -char *xenbus_read(const char *dir, const char *name, unsigned int *data_n) -{ - int err = 0; - char *data = NULL; - char *path = xenbus_path(dir, name); - int n = 0; - - if (!path) { - err = -ENOMEM; - goto out; - } - data = xs_read(path, &n); - if (IS_ERR(data)) { - err = PTR_ERR(data); - if (err == -EISDIR) - err = -ENOENT; - } else if (n == 0) { - err = -ENOENT; - kfree(data); - } - kfree(path); - out: - if (data_n) - *data_n = n; - return (err ? ERR_PTR(err) : data); -} - -int xenbus_write(const char *dir, const char *name, const char *data, int data_n) -{ - int err = 0; - char *path = xenbus_path(dir, name); - - if (!path) - return -ENOMEM; - err = xs_write(path, data, data_n, O_CREAT); - kfree(path); - return err; -} - -int xenbus_read_string(const char *dir, const char *name, char **val) -{ - int err = 0; - - *val = xenbus_read(dir, name, NULL); - if (IS_ERR(*val)) { - err = PTR_ERR(*val); - *val = NULL; - } - return err; -} - -int xenbus_write_string(const char *dir, const char *name, const char *val) -{ - return xenbus_write(dir, name, val, strlen(val)); -} - -int xenbus_read_ulong(const char *dir, const char *name, unsigned long *val) -{ - return xenbus_gather(dir, name, "%lu", val, NULL); -} - -int xenbus_write_ulong(const char *dir, const char *name, unsigned long val) -{ - char data[32] = {}; - - snprintf(data, sizeof(data), "%lu", val); - return xenbus_write(dir, name, data, strlen(data)); -} - -int xenbus_read_long(const char *dir, const char *name, long *val) -{ - return xenbus_gather(dir, name, "%li", val, NULL); -} - -int xenbus_write_long(const char *dir, const char *name, long val) -{ - char data[32] = {}; - - snprintf(data, sizeof(data), "%li", val); - return xenbus_write(dir, name, data, strlen(data)); -} /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * @@ -190,10 +47,13 @@ if (!streq(arr->devicetype, dev->devicetype)) continue; - if (streq(arr->subtype, "") || - streq(arr->subtype, dev->subtype)) { + /* If they don't care what subtype, it's a match. */ + if (streq(arr->subtype, "")) return arr; - } + + /* If they care, device must have (same) subtype. */ + if (dev->subtype && streq(arr->subtype, dev->subtype)) + return arr; } return NULL; } @@ -214,86 +74,19 @@ .match = xenbus_match, }; - -/* Bus type for backend drivers. */ -static struct bus_type xenback_type = { - .name = "xenback", - .match = xenbus_match, -}; - -struct xenbus_for_dev { - int (*fn)(struct xenbus_device *, void *); - void *data; -}; - -static int for_dev(struct device *_dev, void *_data) -{ - struct xenbus_device *dev = to_xenbus_device(_dev); - struct xenbus_for_dev *data = _data; - dev = to_xenbus_device(_dev); - return data->fn(dev, data->data); -} - -int xenbus_for_each_dev(struct xenbus_device * start, void * data, - int (*fn)(struct xenbus_device *, void *)) -{ - struct xenbus_for_dev for_data = { - .fn = fn, - .data = data, - }; - if (!fn) - return -EINVAL; - printk("%s> data=%p fn=%p for_data=%p\n", __FUNCTION__, - data, fn, &for_data); - return bus_for_each_dev(&xenbus_type, - (start ? &start->dev : NULL), - &for_data, for_dev); -} - -struct xenbus_for_drv { - int (*fn)(struct xenbus_driver *, void *); - void *data; -}; - -static int for_drv(struct device_driver *_drv, void *_data) -{ - struct xenbus_driver *drv = to_xenbus_driver(_drv); - struct xenbus_for_drv *data = _data; - return data->fn(drv, data->data); -} - -int xenbus_for_each_drv(struct xenbus_driver * start, void * data, - int (*fn)(struct xenbus_driver *, void *)) -{ - struct xenbus_for_drv for_data = { - .fn = fn, - .data = data, - }; - if (!fn) - return -EINVAL; - return bus_for_each_drv(&xenbus_type, - (start ? &start->driver: NULL), - &for_data, for_drv); -} - static int xenbus_dev_probe(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); const struct xenbus_device_id *id; - printk("Probing device '%s'\n", _dev->bus_id); - if (!drv->probe) { - printk("'%s' no probefn\n", _dev->bus_id); + if (!drv->probe) return -ENODEV; - } id = match_device(drv->ids, dev); - if (!id) { - printk("'%s' no id match\n", _dev->bus_id); + if (!id) return -ENODEV; - } - printk("probing '%s' fn %p\n", _dev->bus_id, drv->probe); + return drv->probe(dev, id); } @@ -309,76 +102,99 @@ int xenbus_register_driver(struct xenbus_driver *drv) { - int err = 0; - - printk("%s> frontend driver %p %s\n", __FUNCTION__, - drv, drv->name); + int err; + drv->driver.name = drv->name; drv->driver.bus = &xenbus_type; drv->driver.owner = drv->owner; drv->driver.probe = xenbus_dev_probe; drv->driver.remove = xenbus_dev_remove; + down(&xenbus_lock); err = driver_register(&drv->driver); - if (err == 0 && xs_init_done && drv->connect) { - printk("%s> connecting driver %p %s\n", __FUNCTION__, - drv, drv->name); - drv->connect(drv); - } + up(&xenbus_lock); return err; } void xenbus_unregister_driver(struct xenbus_driver *drv) { + down(&xenbus_lock); driver_unregister(&drv->driver); -} - -static int xenbus_probe_device(const char *dir, const char *name, const char *devicetype) + up(&xenbus_lock); +} + +struct xb_find_info +{ + struct xenbus_device *dev; + const char *busid; +}; + +static int cmp_dev(struct device *dev, void *data) +{ + struct xb_find_info *info = data; + + if (streq(dev->bus_id, info->busid)) { + info->dev = container_of(get_device(dev), + struct xenbus_device, dev); + return 1; + } + return 0; +} + +/* FIXME: device_find is fixed in 2.6.13-rc2 according to Greg KH --RR */ +struct xenbus_device *xenbus_device_find(const char *busid) +{ + struct xb_find_info info = { .dev = NULL, .busid = busid }; + + bus_for_each_dev(&xenbus_type, NULL, &info, cmp_dev); + return info.dev; +} + + +static void xenbus_release_device(struct device *dev) +{ + if (dev) { + struct xenbus_device *xendev = to_xenbus_device(dev); + + kfree(xendev->subtype); + kfree(xendev); + } +} +/* devices/<typename>/<name> */ +static int xenbus_probe_device(const char *dirpath, const char *devicetype, + const char *name) { int err; struct xenbus_device *xendev; - unsigned int xendev_n; - char *nodename; - - dprintf("> dir=%s name=%s\n", dir, name); - nodename = xenbus_path(dir, name); - if (!nodename) + unsigned int stringlen; + + /* Nodename: /device/<typename>/<name>/ */ + stringlen = strlen(dirpath) + strlen(devicetype) + strlen(name) + 3; + /* Typename */ + stringlen += strlen(devicetype) + 1; + xendev = kmalloc(sizeof(*xendev) + stringlen, GFP_KERNEL); + if (!xendev) return -ENOMEM; - - /* FIXME: This could be a rescan. Don't re-register existing devices. */ - - /* Add space for the strings. */ - xendev_n = sizeof(*xendev) + strlen(nodename) + strlen(devicetype) + 2; - xendev = kmalloc(xendev_n, GFP_KERNEL); - if (!xendev) { - err = -ENOMEM; - goto free_nodename; - } - memset(xendev, 0, xendev_n); - + memset(xendev, 0, sizeof(*xendev)); + + /* Copy the strings into the extra space. */ + xendev->nodename = (char *)(xendev + 1); + sprintf(xendev->nodename, "%s/%s/%s", dirpath, devicetype, name); + xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1; + strcpy(xendev->devicetype, devicetype); + + /* FIXME: look for "subtype" field. */ snprintf(xendev->dev.bus_id, BUS_ID_SIZE, "%s-%s", devicetype, name); xendev->dev.bus = &xenbus_type; - - xendev->id = simple_strtol(name, NULL, 0); - - /* Copy the strings into the extra space. */ - xendev->nodename = (char *)(xendev + 1); - strcpy(xendev->nodename, nodename); - xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1; - strcpy(xendev->devicetype, devicetype); + xendev->dev.release = xenbus_release_device; /* Register with generic device framework. */ - printk("XENBUS: Registering device %s\n", xendev->dev.bus_id); err = device_register(&xendev->dev); if (err) { printk("XENBUS: Registering device %s: error %i\n", xendev->dev.bus_id, err); kfree(xendev); } - -free_nodename: - kfree(nodename); - dprintf("< err=%i\n", err); return err; } @@ -386,30 +202,19 @@ { int err = 0; char **dir; - char *path; unsigned int dir_n = 0; int i; - dprintf("> dirpath=%s typename=%s\n", dirpath, typename); - path = xenbus_path(dirpath, typename); - if (!path) - return -ENOMEM; - - dir = xs_directory(path, &dir_n); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; - } + dir = xenbus_directory(dirpath, typename, &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); for (i = 0; i < dir_n; i++) { - err = xenbus_probe_device(path, dir[i], typename); + err = xenbus_probe_device(dirpath, typename, dir[i]); if (err) break; } kfree(dir); -out: - kfree(path); - dprintf("< err=%i\n", err); return err; } @@ -419,198 +224,103 @@ char **dir; unsigned int i, dir_n; - dprintf("> path=%s\n", path); - down(&xs_lock); - dir = xs_directory(path, &dir_n); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto unlock; - } + dir = xenbus_directory(path, "", &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + for (i = 0; i < dir_n; i++) { err = xenbus_probe_device_type(path, dir[i]); if (err) break; } kfree(dir); -unlock: - up(&xs_lock); - dprintf("< err=%i\n", err); return err; } - -static int xenbus_probe_backend(const char *dir, const char *name) -{ - int err = 0; - struct xenbus_device *xendev = NULL; - unsigned int xendev_n = 0; - char *nodename = NULL, *devicetype = NULL; - unsigned int devicetype_n = 0; - - dprintf("> dir=%s name=%s\n", dir, name); - nodename = xenbus_path(dir, name); - if (!nodename) - return -ENOMEM; - - devicetype = xenbus_read(nodename, XENBUS_DEVICE_TYPE, &devicetype_n); - if (IS_ERR(devicetype)) { - err = PTR_ERR(devicetype); - goto free_nodename; - } - - dprintf("> devicetype='%s'\n", devicetype); - /* FIXME: This could be a rescan. Don't re-register existing devices. */ - - /* Add space for the strings. */ - xendev_n = sizeof(*xendev) + strlen(nodename) + strlen(devicetype) + 2; - xendev = kmalloc(xendev_n, GFP_KERNEL); - if (!xendev) { - err = -ENOMEM; - goto free_devicetype; - } - memset(xendev, 0, xendev_n); - - snprintf(xendev->dev.bus_id, BUS_ID_SIZE, "%s", devicetype); - xendev->dev.bus = &xenback_type; - - /* Copy the strings into the extra space. */ - xendev->nodename = (char *)(xendev + 1); - strcpy(xendev->nodename, nodename); - xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1; - strcpy(xendev->devicetype, devicetype); - - /* Register with generic device framework. */ - printk("XENBUS: Registering backend %s\n", xendev->dev.bus_id); - err = device_register(&xendev->dev); - if (err) { - printk("XENBUS: Registering device %s: error %i\n", - xendev->dev.bus_id, err); - kfree(xendev); - } - -free_devicetype: - kfree(devicetype); -free_nodename: - kfree(nodename); - dprintf("< err=%i\n", err); - return err; -} - -static int xenbus_probe_backends(const char *path) -{ - int err = 0; - char **dir; - unsigned int i, dir_n; - - dprintf("> path=%s\n", path); - down(&xs_lock); - dir = xs_directory(path, &dir_n); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto unlock; - } - for (i = 0; i < dir_n; i++) { - err = xenbus_probe_backend(path, dir[i]); - if (err) - break; - } - kfree(dir); -unlock: - up(&xs_lock); - dprintf("< err=%i\n", err); - return err; -} - -int xenbus_register_backend(struct xenbus_driver *drv) -{ - int err = 0; - - printk("%s> backend driver %p %s\n", __FUNCTION__, - drv, drv->name); - drv->driver.name = drv->name; - drv->driver.bus = &xenback_type; - drv->driver.owner = drv->owner; - drv->driver.probe = xenbus_dev_probe; - drv->driver.remove = xenbus_dev_remove; - - err = driver_register(&drv->driver); - if (err == 0 && xs_init_done && drv->connect) { - printk("%s> connecting driver %p %s\n", __FUNCTION__, - drv, drv->name); - drv->connect(drv); - } - return err; -} - -void xenbus_unregister_backend(struct xenbus_driver *drv) -{ - driver_unregister(&drv->driver); -} - -int xenbus_for_each_backend(struct xenbus_driver * start, void * data, - int (*fn)(struct xenbus_driver *, void *)) -{ - struct xenbus_for_drv for_data = { - .fn = fn, - .data = data, - }; - if (!fn) - return -EINVAL; - return bus_for_each_drv(&xenback_type, - (start ? &start->driver: NULL), - &for_data, for_drv); -} - -static int xenbus_driver_connect(struct xenbus_driver *drv, void *data) -{ - printk("%s> driver %p %s\n", __FUNCTION__, drv, drv->name); - if (drv->connect) { - printk("%s> connecting driver %p %s\n", __FUNCTION__, - drv, drv->name); - drv->connect(drv); - } - printk("%s< driver %p %s\n", __FUNCTION__, drv, drv->name); - return 0; -} - +static unsigned int char_count(const char *str, char c) +{ + unsigned int i, ret = 0; + + for (i = 0; str[i]; i++) + if (str[i] == c) + ret++; + return ret; +} + +static void dev_changed(struct xenbus_watch *watch, const char *node) +{ + char busid[BUS_ID_SIZE]; + int exists; + struct xenbus_device *dev; + char *p; + + /* Node is of form device/<type>/<identifier>[/...] */ + if (char_count(node, '/') != 2) + return; + + /* Created or deleted? */ + exists = xenbus_exists(node, ""); + + p = strchr(node, '/') + 1; + if (strlen(p) + 1 > BUS_ID_SIZE) { + printk("Device for node %s is too big!\n", node); + return; + } + /* Bus ID is name with / changed to - */ + strcpy(busid, p); + *strchr(busid, '/') = '-'; + + dev = xenbus_device_find(busid); + printk("xenbus: device %s %s\n", busid, dev ? "exists" : "new"); + if (dev && !exists) { + printk("xenbus: Unregistering device %s\n", busid); + /* FIXME: free? */ + device_unregister(&dev->dev); + } else if (!dev && exists) { + printk("xenbus: Adding device %s\n", busid); + /* Hack bus id back into two strings. */ + *strrchr(busid, '-') = '\0'; + xenbus_probe_device("device", busid, busid+strlen(busid)+1); + } else + printk("xenbus: strange, %s already %s\n", busid, + exists ? "exists" : "gone"); + if (dev) + put_device(&dev->dev); +} + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch dev_watch = { + /* FIXME: Ideally we'd only watch for changes 2 levels deep... */ + .node = "device", + .callback = dev_changed, +}; /* called from a thread in privcmd/privcmd.c */ int do_xenbus_probe(void *unused) { int err = 0; - printk("%s> xs_init_done=%d\n", __FUNCTION__, xs_init_done); - if (xs_init_done) - goto exit; /* Initialize xenstore comms unless already done. */ printk("store_evtchn = %i\n", xen_start_info.store_evtchn); err = xs_init(); if (err) { printk("XENBUS: Error initializing xenstore comms:" " %i\n", err); - goto exit; - } - xs_init_done = 1; - - /* Notify drivers that xenstore has connected. */ - printk("%s> connect drivers...\n", __FUNCTION__); - xenbus_for_each_drv(NULL, NULL, xenbus_driver_connect); - printk("%s> connect backends...\n", __FUNCTION__); - xenbus_for_each_backend(NULL, NULL, xenbus_driver_connect); - - /* Enumerate devices and backends in xenstore. */ + return err; + } + + down(&xenbus_lock); + /* Enumerate devices in xenstore. */ xenbus_probe_devices("device"); - xenbus_probe_backends("backend"); - -exit: - printk("%s< err=%d\n", __FUNCTION__, err); - return err; + /* Watch for changes. */ + register_xenbus_watch(&dev_watch); + up(&xenbus_lock); + return 0; } static int __init xenbus_probe_init(void) { bus_register(&xenbus_type); - bus_register(&xenback_type); if (!xen_start_info.store_evtchn) return 0; diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Fri Jul 29 20:25:03 2005 @@ -2,7 +2,7 @@ * xenbus_xs.c * * This is the kernel equivalent of the "xs" library. We don't need everything - * and we use xenbus_comms to communication. + * and we use xenbus_comms for communication. * * Copyright (C) 2005 Rusty Russell, IBM Corporation * @@ -43,9 +43,9 @@ #define streq(a, b) (strcmp((a), (b)) == 0) -static void *xs_in, *xs_out; +static char printf_buffer[4096]; static LIST_HEAD(watches); -DECLARE_MUTEX(xs_lock); +DECLARE_MUTEX(xenbus_lock); static int get_error(const char *errorstring) { @@ -68,7 +68,7 @@ void *ret; int err; - err = xb_read(xs_in, &msg, sizeof(msg)); + err = xb_read(&msg, sizeof(msg)); if (err) return ERR_PTR(err); @@ -76,7 +76,7 @@ if (!ret) return ERR_PTR(-ENOMEM); - err = xb_read(xs_in, ret, msg.len); + err = xb_read(ret, msg.len); if (err) { kfree(ret); return ERR_PTR(err); @@ -90,18 +90,17 @@ } /* Emergency write. */ -void xs_debug_write(const char *str, unsigned int count) +void xenbus_debug_write(const char *str, unsigned int count) { struct xsd_sockmsg msg; - void *out = (void *)xen_start_info.store_page; msg.type = XS_DEBUG; msg.len = sizeof("print") + count + 1; - xb_write(out, &msg, sizeof(msg)); - xb_write(out, "print", sizeof("print")); - xb_write(out, str, count); - xb_write(out, "", 1); + xb_write(&msg, sizeof(msg)); + xb_write("print", sizeof("print")); + xb_write(str, count); + xb_write("", 1); } /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ @@ -115,19 +114,19 @@ unsigned int i; int err; - WARN_ON(down_trylock(&xs_lock) == 0); + WARN_ON(down_trylock(&xenbus_lock) == 0); msg.type = type; msg.len = 0; for (i = 0; i < num_vecs; i++) msg.len += iovec[i].iov_len; - err = xb_write(xs_out, &msg, sizeof(msg)); + err = xb_write(&msg, sizeof(msg)); if (err) return ERR_PTR(err); for (i = 0; i < num_vecs; i++) { - err = xb_write(xs_out, iovec[i].iov_base, iovec[i].iov_len);; + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; if (err) return ERR_PTR(err); } @@ -182,12 +181,28 @@ return num; } -char **xs_directory(const char *path, unsigned int *num) +/* Return the path to dir with /name appended. */ +static char *join(const char *dir, const char *name) +{ + static char buffer[4096]; + + BUG_ON(down_trylock(&xenbus_lock) == 0); + BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer)); + + strcpy(buffer, dir); + if (!streq(name, "")) { + strcat(buffer, "/"); + strcat(buffer, name); + } + return buffer; +} + +char **xenbus_directory(const char *dir, const char *node, unsigned int *num) { char *strings, *p, **ret; unsigned int len; - strings = xs_single(XS_DIRECTORY, path, &len); + strings = xs_single(XS_DIRECTORY, join(dir, node), &len); if (IS_ERR(strings)) return (char **)strings; @@ -210,67 +225,37 @@ } /* Check if a path exists. Return 1 if it does. */ -int xs_exists(const char *path) -{ - char **dir; +int xenbus_exists(const char *dir, const char *node) +{ + char **d; int dir_n; - dir = xs_directory(path, &dir_n); - if (IS_ERR(dir)) + d = xenbus_directory(dir, node, &dir_n); + if (IS_ERR(d)) return 0; - kfree(dir); + kfree(d); return 1; } - -/* Make a directory, creating dirs on the path to it if necessary. - * Return 0 on success, error code otherwise. - */ -int xs_mkdirs(const char *path) -{ - int err = 0; - char s[strlen(path) + 1], *p = s; - - if (xs_exists(path)) - goto out; - strcpy(p, path); - if (*p == '/') - p++; - for (;;) { - p = strchr(p, '/'); - if (p) - *p = '\0'; - if (!xs_exists(s)) { - err = xs_mkdir(s); - if (err) - goto out; - } - if (!p) - break; - *p++ = '/'; - } - out: - return err; -} - /* Get the value of a single file. * Returns a kmalloced value: call free() on it after use. * len indicates length in bytes. */ -void *xs_read(const char *path, unsigned int *len) -{ - return xs_single(XS_READ, path, len); +void *xenbus_read(const char *dir, const char *node, unsigned int *len) +{ + return xs_single(XS_READ, join(dir, node), len); } /* Write the value of a single file. * Returns -err on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL. */ -int xs_write(const char *path, - const void *data, unsigned int len, int createflags) -{ - const char *flags; +int xenbus_write(const char *dir, const char *node, + const char *string, int createflags) +{ + const char *flags, *path; struct kvec iovec[3]; + path = join(dir, node); /* Format: Flags (as string), path, data. */ if (createflags == 0) flags = XS_WRITE_NONE; @@ -285,22 +270,22 @@ iovec[0].iov_len = strlen(path) + 1; iovec[1].iov_base = (void *)flags; iovec[1].iov_len = strlen(flags) + 1; - iovec[2].iov_base = (void *)data; - iovec[2].iov_len = len; + iovec[2].iov_base = (void *)string; + iovec[2].iov_len = strlen(string); return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); } /* Create a new directory. */ -int xs_mkdir(const char *path) -{ - return xs_error(xs_single(XS_MKDIR, path, NULL)); +int xenbus_mkdir(const char *dir, const char *node) +{ + return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL)); } /* Destroy a file or directory (directories must be empty). */ -int xs_rm(const char *path) -{ - return xs_error(xs_single(XS_RM, path, NULL)); +int xenbus_rm(const char *dir, const char *node) +{ + return xs_error(xs_single(XS_RM, join(dir, node), NULL)); } /* Start a transaction: changes by others will not be seen during this @@ -308,7 +293,7 @@ * Transaction only applies to the given subtree. * You can only have one transaction at any time. */ -int xs_transaction_start(const char *subtree) +int xenbus_transaction_start(const char *subtree) { return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL)); } @@ -316,7 +301,7 @@ /* End a transaction. * If abandon is true, transaction is discarded instead of committed. */ -int xs_transaction_end(int abort) +int xenbus_transaction_end(int abort) { char abortstr[2]; @@ -327,26 +312,109 @@ return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL)); } -char *xs_get_domain_path(domid_t domid) -{ - char domid_str[32]; - - sprintf(domid_str, "%u", domid); - return xs_single(XS_GETDOMAINPATH, domid_str, NULL); -} - -static int xs_watch(const char *path, const char *token, unsigned int priority) -{ - char prio[32]; - struct kvec iov[3]; - - sprintf(prio, "%u", priority); +/* Single read and scanf: returns -errno or num scanned. */ +int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *val; + + val = xenbus_read(dir, node, NULL); + if (IS_ERR(val)) + return PTR_ERR(val); + + va_start(ap, fmt); + ret = vsscanf(val, fmt, ap); + va_end(ap); + kfree(val); + /* Distinctive errno. */ + if (ret == 0) + return -ERANGE; + return ret; +} + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + + BUG_ON(down_trylock(&xenbus_lock) == 0); + va_start(ap, fmt); + ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap); + va_end(ap); + + BUG_ON(ret > sizeof(printf_buffer)-1); + return xenbus_write(dir, node, printf_buffer, O_CREAT); +} + +/* Report a (negative) errno into the store, with explanation. */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) +{ + va_list ap; + int ret; + unsigned int len; + + BUG_ON(down_trylock(&xenbus_lock) == 0); + + len = sprintf(printf_buffer, "%i ", -err); + va_start(ap, fmt); + ret = vsnprintf(printf_buffer+len, sizeof(printf_buffer)-len, fmt, ap); + va_end(ap); + + BUG_ON(len + ret > sizeof(printf_buffer)-1); + dev->has_error = 1; + if (xenbus_write(dev->nodename, "error", printf_buffer, O_CREAT) != 0) + printk("xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); +} + +/* Clear any error. */ +void xenbus_dev_ok(struct xenbus_device *dev) +{ + if (dev->has_error) { + if (xenbus_rm(dev->nodename, "error") != 0) + printk("xenbus: failed to clear error node for %s\n", + dev->nodename); + else + dev->has_error = 0; + } +} + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xenbus_gather(const char *dir, ...) +{ + va_list ap; + const char *name; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + p = xenbus_read(dir, name, NULL); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + break; + } + if (sscanf(p, fmt, result) == 0) + ret = -EINVAL; + kfree(p); + } + va_end(ap); + return ret; +} + +static int xs_watch(const char *path, const char *token) +{ + struct kvec iov[2]; + iov[0].iov_base = (void *)path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)token; iov[1].iov_len = strlen(token) + 1; - iov[2].iov_base = prio; - iov[2].iov_len = strlen(prio) + 1; return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL)); } @@ -405,7 +473,7 @@ sprintf(token, "%lX", (long)watch); BUG_ON(find_watch(token)); - err = xs_watch(watch->node, token, watch->priority); + err = xs_watch(watch->node, token); if (!err) list_add(&watch->list, &watches); return err; @@ -423,45 +491,43 @@ list_del(&watch->list); if (err) - printk(KERN_WARNING "XENBUS Failed to release watch %s: %i\n", + printk(KERN_WARNING + "XENBUS Failed to release watch %s: %i\n", watch->node, err); } static int watch_thread(void *unused) { - for (;;) { char *token; char *node = NULL; - wait_event(xb_waitq, xs_input_avail(xs_in)); + wait_event(xb_waitq, xs_input_avail()); /* If this is a spurious wakeup caused by someone * doing an op, they'll hold the lock and the buffer * will be empty by the time we get there. */ - down(&xs_lock); - if (xs_input_avail(xs_in)) + down(&xenbus_lock); + if (xs_input_avail()) node = xs_read_watch(&token); if (node && !IS_ERR(node)) { struct xenbus_watch *w; int err; + err = xs_acknowledge_watch(token); + if (err) + printk(KERN_WARNING "XENBUS ack %s fail %i\n", + node, err); w = find_watch(token); BUG_ON(!w); w->callback(w, node); - /* FIXME: Only ack if it wasn't deleted. */ - err = xs_acknowledge_watch(token); - if (err) - printk(KERN_WARNING - "XENBUS acknowledge %s failed %i\n", - node, err); kfree(node); } else printk(KERN_WARNING "XENBUS xs_read_watch: %li\n", PTR_ERR(node)); - up(&xs_lock); + up(&xenbus_lock); } } @@ -470,7 +536,7 @@ int err; struct task_struct *watcher; - err = xb_init_comms(&xs_in, &xs_out); + err = xb_init_comms(); if (err) return err; diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h Fri Jul 29 20:25:03 2005 @@ -369,4 +369,6 @@ #endif /* __KERNEL__ */ -#endif +#define ARCH_HAS_DEV_MEM + +#endif diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/include/asm-xen/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Fri Jul 29 20:25:03 2005 @@ -97,8 +97,8 @@ void xen_pte_pin(unsigned long ptr); void xen_pte_unpin(unsigned long ptr); #else -#define xen_l1_entry_update(_p, _v) set_pte((_p), (pte_t){(_v)}) -#define xen_l2_entry_update(_p, _v) set_pgd((_p), (pgd_t){(_v)}) +#define xen_l1_entry_update(_p, _v) set_pte((_p), (_v)) +#define xen_l2_entry_update(_p, _v) set_pgd((_p), (_v)) #define xen_pgd_pin(_p) ((void)0) #define xen_pgd_unpin(_p) ((void)0) #define xen_pte_pin(_p) ((void)0) @@ -140,6 +140,14 @@ #endif #include <asm/hypercall.h> + +#if defined(CONFIG_X86_64) +#define MULTI_UVMFLAGS_INDEX 2 +#define MULTI_UVMDOMID_INDEX 3 +#else +#define MULTI_UVMFLAGS_INDEX 3 +#define MULTI_UVMDOMID_INDEX 4 +#endif static inline void MULTI_update_va_mapping( diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/include/asm-xen/xenbus.h --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Fri Jul 29 20:25:03 2005 @@ -36,8 +36,8 @@ char *devicetype; char *subtype; char *nodename; - int id; struct device dev; + int has_error; void *data; }; @@ -58,21 +58,10 @@ char *name; struct module *owner; const struct xenbus_device_id *ids; - /* Called when xenstore is connected. */ - int (*connect) (struct xenbus_driver * drv); - - int (*probe) (struct xenbus_device * dev, const struct xenbus_device_id * id); - int (*remove) (struct xenbus_device * dev); - int (*configure)(struct xenbus_device * dev); - + int (*probe) (struct xenbus_device * dev, + const struct xenbus_device_id * id); + int (*remove) (struct xenbus_device * dev); struct device_driver driver; -}; - -struct xenbus_evtchn { - unsigned long dom1; - unsigned long port1; - unsigned long dom2; - unsigned long port2; }; static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) @@ -83,63 +72,47 @@ int xenbus_register_driver(struct xenbus_driver *drv); void xenbus_unregister_driver(struct xenbus_driver *drv); -int xenbus_register_backend(struct xenbus_driver *drv); -void xenbus_unregister_backend(struct xenbus_driver *drv); - -/* Iterator over xenbus devices (frontend). */ -int xenbus_for_each_dev(struct xenbus_device * start, void * data, - int (*fn)(struct xenbus_device *, void *)); - -/* Iterator over xenbus drivers (frontend). */ -int xenbus_for_each_drv(struct xenbus_driver * start, void * data, - int (*fn)(struct xenbus_driver *, void *)); - -/* Iterator over xenbus drivers (backend). */ -int xenbus_for_each_backend(struct xenbus_driver * start, void * data, - int (*fn)(struct xenbus_driver *, void *)); - /* Caller must hold this lock to call these functions: it's also held * across watch callbacks. */ -extern struct semaphore xs_lock; +extern struct semaphore xenbus_lock; -char **xs_directory(const char *path, unsigned int *num); -void *xs_read(const char *path, unsigned int *len); -int xs_write(const char *path, - const void *data, unsigned int len, int createflags); -int xs_mkdir(const char *path); -int xs_exists(const char *path); -int xs_mkdirs(const char *path); -int xs_rm(const char *path); -int xs_transaction_start(const char *subtree); -int xs_transaction_end(int abort); -char *xs_get_domain_path(domid_t domid); +char **xenbus_directory(const char *dir, const char *node, unsigned int *num); +void *xenbus_read(const char *dir, const char *node, unsigned int *len); +int xenbus_write(const char *dir, const char *node, + const char *string, int createflags); +int xenbus_mkdir(const char *dir, const char *node); +int xenbus_exists(const char *dir, const char *node); +int xenbus_rm(const char *dir, const char *node); +int xenbus_transaction_start(const char *subtree); +int xenbus_transaction_end(int abort); + +/* Single read and scanf: returns -errno or num scanned if > 0. */ +int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(scanf, 3, 4))); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); + +/* Generic read function: NULL-terminated triples of name, + * sprintf-style type string, and pointer. Returns 0 or errno.*/ +int xenbus_gather(const char *dir, ...); + +/* Report a (negative) errno into the store, with explanation. */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,...); + +/* Clear any error. */ +void xenbus_dev_ok(struct xenbus_device *dev); /* Register callback to watch this node. */ struct xenbus_watch { struct list_head list; char *node; - unsigned int priority; void (*callback)(struct xenbus_watch *, const char *node); }; int register_xenbus_watch(struct xenbus_watch *watch); void unregister_xenbus_watch(struct xenbus_watch *watch); -/* Generic read function: NULL-terminated triples of name, - * sprintf-style type string, and pointer. */ -int xenbus_gather(const char *dir, ...); - -char *xenbus_path(const char *dir, const char *name); -char *xenbus_read(const char *dir, const char *name, unsigned int *data_n); -int xenbus_write(const char *dir, const char *name, - const char *data, int data_n); - -int xenbus_read_string(const char *dir, const char *name, char **val); -int xenbus_write_string(const char *dir, const char *name, const char *val); -int xenbus_read_ulong(const char *dir, const char *name, unsigned long *val); -int xenbus_write_ulong(const char *dir, const char *name, unsigned long val); -int xenbus_read_long(const char *dir, const char *name, long *val); -int xenbus_write_long(const char *dir, const char *name, long val); - #endif /* _ASM_XEN_XENBUS_H */ diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/include/linux/mm.h --- a/linux-2.6-xen-sparse/include/linux/mm.h Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/include/linux/mm.h Fri Jul 29 20:25:03 2005 @@ -816,10 +816,6 @@ extern int check_user_page_readable(struct mm_struct *mm, unsigned long address); int remap_pfn_range(struct vm_area_struct *, unsigned long, unsigned long, unsigned long, pgprot_t); -/* Allow arch override for mapping of device and I/O (non-RAM) pages. */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif #ifdef CONFIG_PROC_FS void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); diff -r a4196568095c -r b53a65034532 patches/linux-2.6.12/smp-alts.patch --- a/patches/linux-2.6.12/smp-alts.patch Fri Jul 29 18:52:33 2005 +++ b/patches/linux-2.6.12/smp-alts.patch Fri Jul 29 20:25:03 2005 @@ -1,7 +1,7 @@ -diff -Naur linux-2.6.11/arch/i386/Kconfig linux-2.6.11.post/arch/i386/Kconfig ---- linux-2.6.11/arch/i386/Kconfig 2005-03-02 07:37:49.000000000 +0000 -+++ linux-2.6.11.post/arch/i386/Kconfig 2005-06-10 13:42:35.000000000 +0100 -@@ -481,6 +481,19 @@ +diff -Naur linux-2.6.12/arch/i386/Kconfig linux-2.6.12.post/arch/i386/Kconfig +--- linux-2.6.12/arch/i386/Kconfig 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/arch/i386/Kconfig 2005-07-25 05:51:21.000000000 -0400 +@@ -487,6 +487,19 @@ If you don't know what to do here, say N. @@ -21,10 +21,10 @@ config NR_CPUS int "Maximum number of CPUs (2-255)" range 2 255 -diff -Naur linux-2.6.11/arch/i386/kernel/Makefile linux-2.6.11.post/arch/i386/kernel/Makefile ---- linux-2.6.11/arch/i386/kernel/Makefile 2005-03-02 07:37:49.000000000 +0000 -+++ linux-2.6.11.post/arch/i386/kernel/Makefile 2005-06-16 11:16:18.555332435 +0100 -@@ -32,6 +32,7 @@ +diff -Naur linux-2.6.12/arch/i386/kernel/Makefile linux-2.6.12.post/arch/i386/kernel/Makefile +--- linux-2.6.12/arch/i386/kernel/Makefile 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/arch/i386/kernel/Makefile 2005-07-25 05:51:21.000000000 -0400 +@@ -33,6 +33,7 @@ obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o @@ -32,9 +32,9 @@ EXTRA_AFLAGS := -traditional -diff -Naur linux-2.6.11/arch/i386/kernel/smpalts.c linux-2.6.11.post/arch/i386/kernel/smpalts.c ---- linux-2.6.11/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.11.post/arch/i386/kernel/smpalts.c 2005-06-16 11:23:39.300902424 +0100 +diff -Naur linux-2.6.12/arch/i386/kernel/smpalts.c linux-2.6.12.post/arch/i386/kernel/smpalts.c +--- linux-2.6.12/arch/i386/kernel/smpalts.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.12.post/arch/i386/kernel/smpalts.c 2005-07-25 05:51:21.000000000 -0400 @@ -0,0 +1,85 @@ +#include <linux/kernel.h> +#include <asm/system.h> @@ -121,10 +121,10 @@ + asm volatile ("jmp 1f\n1:"); + mb(); +} -diff -Naur linux-2.6.11/arch/i386/kernel/smpboot.c linux-2.6.11.post/arch/i386/kernel/smpboot.c ---- linux-2.6.11/arch/i386/kernel/smpboot.c 2005-03-02 07:38:09.000000000 +0000 -+++ linux-2.6.11.post/arch/i386/kernel/smpboot.c 2005-06-16 11:17:09.287064617 +0100 -@@ -1003,6 +1003,11 @@ +diff -Naur linux-2.6.12/arch/i386/kernel/smpboot.c linux-2.6.12.post/arch/i386/kernel/smpboot.c +--- linux-2.6.12/arch/i386/kernel/smpboot.c 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/arch/i386/kernel/smpboot.c 2005-07-25 05:51:21.000000000 -0400 +@@ -1001,6 +1001,11 @@ if (max_cpus <= cpucount+1) continue; @@ -136,7 +136,7 @@ if (do_boot_cpu(apicid)) printk("CPU #%d not responding - cannot use it.\n", apicid); -@@ -1118,6 +1123,11 @@ +@@ -1130,6 +1135,11 @@ return -EIO; } @@ -148,9 +148,9 @@ local_irq_enable(); /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); -diff -Naur linux-2.6.11/arch/i386/kernel/vmlinux.lds.S linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S ---- linux-2.6.11/arch/i386/kernel/vmlinux.lds.S 2005-03-02 07:38:37.000000000 +0000 -+++ linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S 2005-06-10 11:14:14.000000000 +0100 +diff -Naur linux-2.6.12/arch/i386/kernel/vmlinux.lds.S linux-2.6.12.post/arch/i386/kernel/vmlinux.lds.S +--- linux-2.6.12/arch/i386/kernel/vmlinux.lds.S 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/arch/i386/kernel/vmlinux.lds.S 2005-07-25 05:51:21.000000000 -0400 @@ -30,6 +30,13 @@ __ex_table : { *(__ex_table) } __stop___ex_table = .; @@ -165,9 +165,9 @@ RODATA /* writeable */ -diff -Naur linux-2.6.11/include/asm-i386/atomic.h linux-2.6.11.post/include/asm-i386/atomic.h ---- linux-2.6.11/include/asm-i386/atomic.h 2005-03-02 07:37:51.000000000 +0000 -+++ linux-2.6.11.post/include/asm-i386/atomic.h 2005-06-13 10:10:39.000000000 +0100 +diff -Naur linux-2.6.12/include/asm-i386/atomic.h linux-2.6.12.post/include/asm-i386/atomic.h +--- linux-2.6.12/include/asm-i386/atomic.h 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/include/asm-i386/atomic.h 2005-07-25 05:51:21.000000000 -0400 @@ -4,18 +4,13 @@ #include <linux/config.h> #include <linux/compiler.h> @@ -188,9 +188,9 @@ /* * Make sure gcc doesn't try to be clever and move things around * on us. We need to use _exactly_ the address the user gave us, -diff -Naur linux-2.6.11/include/asm-i386/bitops.h linux-2.6.11.post/include/asm-i386/bitops.h ---- linux-2.6.11/include/asm-i386/bitops.h 2005-03-02 07:38:12.000000000 +0000 -+++ linux-2.6.11.post/include/asm-i386/bitops.h 2005-06-13 10:11:54.000000000 +0100 +diff -Naur linux-2.6.12/include/asm-i386/bitops.h linux-2.6.12.post/include/asm-i386/bitops.h +--- linux-2.6.12/include/asm-i386/bitops.h 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/include/asm-i386/bitops.h 2005-07-25 05:51:21.000000000 -0400 @@ -7,6 +7,7 @@ #include <linux/config.h> @@ -266,9 +266,9 @@ "btcl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) :"Ir" (nr) : "memory"); -diff -Naur linux-2.6.11/include/asm-i386/rwsem.h linux-2.6.11.post/include/asm-i386/rwsem.h ---- linux-2.6.11/include/asm-i386/rwsem.h 2005-03-02 07:38:08.000000000 +0000 -+++ linux-2.6.11.post/include/asm-i386/rwsem.h 2005-06-13 10:13:06.000000000 +0100 +diff -Naur linux-2.6.12/include/asm-i386/rwsem.h linux-2.6.12.post/include/asm-i386/rwsem.h +--- linux-2.6.12/include/asm-i386/rwsem.h 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/include/asm-i386/rwsem.h 2005-07-25 05:51:21.000000000 -0400 @@ -40,6 +40,7 @@ #include <linux/list.h> @@ -349,9 +349,9 @@ : "+r"(tmp), "=m"(sem->count) : "r"(sem), "m"(sem->count) : "memory"); -diff -Naur linux-2.6.11/include/asm-i386/smp_alt.h linux-2.6.11.post/include/asm-i386/smp_alt.h ---- linux-2.6.11/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.11.post/include/asm-i386/smp_alt.h 2005-06-16 11:16:50.109433206 +0100 +diff -Naur linux-2.6.12/include/asm-i386/smp_alt.h linux-2.6.12.post/include/asm-i386/smp_alt.h +--- linux-2.6.12/include/asm-i386/smp_alt.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.12.post/include/asm-i386/smp_alt.h 2005-07-25 05:51:21.000000000 -0400 @@ -0,0 +1,32 @@ +#ifndef __ASM_SMP_ALT_H__ +#define __ASM_SMP_ALT_H__ @@ -385,9 +385,9 @@ +#endif + +#endif /* __ASM_SMP_ALT_H__ */ -diff -Naur linux-2.6.11/include/asm-i386/spinlock.h linux-2.6.11.post/include/asm-i386/spinlock.h ---- linux-2.6.11/include/asm-i386/spinlock.h 2005-03-02 07:37:50.000000000 +0000 -+++ linux-2.6.11.post/include/asm-i386/spinlock.h 2005-06-13 14:13:52.000000000 +0100 +diff -Naur linux-2.6.12/include/asm-i386/spinlock.h linux-2.6.12.post/include/asm-i386/spinlock.h +--- linux-2.6.12/include/asm-i386/spinlock.h 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/include/asm-i386/spinlock.h 2005-07-25 05:51:21.000000000 -0400 @@ -6,6 +6,7 @@ #include <asm/page.h> #include <linux/config.h> @@ -467,9 +467,9 @@ static inline int _raw_read_trylock(rwlock_t *lock) { -diff -Naur linux-2.6.11/include/asm-i386/system.h linux-2.6.11.post/include/asm-i386/system.h ---- linux-2.6.11/include/asm-i386/system.h 2005-03-02 07:37:30.000000000 +0000 -+++ linux-2.6.11.post/include/asm-i386/system.h 2005-06-15 13:21:40.000000000 +0100 +diff -Naur linux-2.6.12/include/asm-i386/system.h linux-2.6.12.post/include/asm-i386/system.h +--- linux-2.6.12/include/asm-i386/system.h 2005-06-17 15:48:29.000000000 -0400 ++++ linux-2.6.12.post/include/asm-i386/system.h 2005-07-25 05:51:21.000000000 -0400 @@ -5,7 +5,7 @@ #include <linux/kernel.h> #include <asm/segment.h> diff -r a4196568095c -r b53a65034532 tools/debugger/gdb/gdbbuild --- a/tools/debugger/gdb/gdbbuild Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/gdb/gdbbuild Fri Jul 29 20:25:03 2005 @@ -1,6 +1,6 @@ #!/bin/sh -XENROOT=`bk root` +XENROOT=`hg root` export XENROOT cd $XENROOT/tools/debugger/gdb diff -r a4196568095c -r b53a65034532 tools/debugger/libxendebug/xendebug.c --- a/tools/debugger/libxendebug/xendebug.c Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/libxendebug/xendebug.c Fri Jul 29 20:25:03 2005 @@ -42,7 +42,6 @@ struct list_head list; memory_t address; u32 domain; - u16 vcpu; u8 old_value; /* old value for software bkpt */ } bwcpoint_t, *bwcpoint_p; diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/PDB.ml --- a/tools/debugger/pdb/PDB.ml Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/PDB.ml Fri Jul 29 20:25:03 2005 @@ -7,9 +7,12 @@ * @version 1 *) +open Util + exception Unimplemented of string exception Unknown_context of string exception Unknown_domain +exception Unknown_process type context_t = | Void @@ -44,6 +47,31 @@ let delete_context key = Hashtbl.remove hash key + + +(** + find_process : Locate the socket associated with the context(s) + matching a particular (domain, process id) pair. if there are multiple + contexts (there shouldn't be), then return the first one. + *) + +let find_process dom pid = + let find key ctx list = + match ctx with + | Process p -> + if (((Process.get_domain p) = dom) && + ((Process.get_process p) = pid)) + then + key :: list + else + list + | _ -> list + in + let sock_list = Hashtbl.fold find hash [] in + match sock_list with + | hd::tl -> hd + | [] -> raise Unknown_process + (** find_domain : Locate the socket associated with the context(s) @@ -98,18 +126,25 @@ begin let xdom_sock = find_xen_domain_context (Process.get_domain p) in let xdom_ctx = find_context xdom_sock in - match xdom_ctx with - | Xen_domain d -> - Process.attach_debugger p d - | _ -> failwith ("context has wrong xen domain type") + begin + match xdom_ctx with + | Xen_domain d -> + Process.attach_debugger p d + | _ -> failwith ("context has wrong xen domain type") + end; + raise No_reply end | _ -> raise (Unimplemented "attach debugger") let detach_debugger ctx = match ctx with - | Domain d -> Domain.detach_debugger (Domain.get_domain d) - (Domain.get_vcpu d) - | Process p -> Process.detach_debugger p + | Domain d -> + Domain.detach_debugger (Domain.get_domain d) + (Domain.get_vcpu d); + "OK" + | Process p -> + Process.detach_debugger p; + raise No_reply | _ -> raise (Unimplemented "detach debugger") @@ -158,8 +193,8 @@ match params with | dom::pid::_ -> let p = Process(Process.new_context dom pid) in - attach_debugger p; - Hashtbl.replace hash key p + Hashtbl.replace hash key p; + attach_debugger p | _ -> failwith "bogus parameters to process context" end | "xen domain" @@ -188,26 +223,42 @@ match ctx with | Void -> Intel.null_registers (* default for startup *) | Domain d -> Domain.read_registers d - | Process p -> Process.read_registers p + | Process p -> + begin + Process.read_registers p; + raise No_reply + end | _ -> raise (Unimplemented "read registers") let write_register ctx register value = match ctx with | Domain d -> Domain.write_register d register value - | Process p -> Process.write_register p register value + | Process p -> + begin + Process.write_register p register value; + raise No_reply + end | _ -> raise (Unimplemented "write register") let read_memory ctx addr len = match ctx with | Domain d -> Domain.read_memory d addr len - | Process p -> Process.read_memory p addr len + | Process p -> + begin + Process.read_memory p addr len; + raise No_reply + end | _ -> raise (Unimplemented "read memory") let write_memory ctx addr values = match ctx with | Domain d -> Domain.write_memory d addr values - | Process p -> Process.write_memory p addr values + | Process p -> + begin + Process.write_memory p addr values; + raise No_reply + end | _ -> raise (Unimplemented "write memory") diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Process.ml --- a/tools/debugger/pdb/Process.ml Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/Process.ml Fri Jul 29 20:25:03 2005 @@ -54,10 +54,10 @@ proc_ctx.ring <- Xen_domain.get_ring dom_ctx; _attach_debugger proc_ctx -external read_registers : context_t -> registers = "proc_read_registers" +external read_registers : context_t -> unit = "proc_read_registers" external write_register : context_t -> register -> int32 -> unit = "proc_write_register" -external read_memory : context_t -> int32 -> int -> int list = +external read_memory : context_t -> int32 -> int -> unit = "proc_read_memory" external write_memory : context_t -> int32 -> int list -> unit = "proc_write_memory" diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Process.mli --- a/tools/debugger/pdb/Process.mli Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/Process.mli Fri Jul 29 20:25:03 2005 @@ -27,9 +27,9 @@ val pause : context_t -> unit -val read_registers : context_t -> registers +val read_registers : context_t -> unit val write_register : context_t -> register -> int32 -> unit -val read_memory : context_t -> int32 -> int -> int list +val read_memory : context_t -> int32 -> int -> unit val write_memory : context_t -> int32 -> int list -> unit val continue : context_t -> unit diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Util.ml --- a/tools/debugger/pdb/Util.ml Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/Util.ml Fri Jul 29 20:25:03 2005 @@ -154,3 +154,12 @@ * BUG NEED TO LISTEN FOR REPLY +/- AND POSSIBLY RE-TRANSMIT *) + +(** A few debugger commands such as step 's' and continue 'c' do + * not immediately return a response to the debugger. In these + * cases we raise No_reply instead. + * This is also used by some contexts (such as Linux processes) + * which utilize an asynchronous request / response protocol when + * communicating with their respective backends. + *) +exception No_reply diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Xen_domain.ml --- a/tools/debugger/pdb/Xen_domain.ml Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/Xen_domain.ml Fri Jul 29 20:25:03 2005 @@ -40,4 +40,4 @@ let string_of_context ctx = Printf.sprintf "{xen domain assist} domain: %d" ctx.domain -external process_response : int32 -> unit = "process_handle_response" +external process_response : int32 -> int * int * string = "process_handle_response" diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/Xen_domain.mli --- a/tools/debugger/pdb/Xen_domain.mli Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/Xen_domain.mli Fri Jul 29 20:25:03 2005 @@ -21,5 +21,5 @@ val string_of_context : context_t -> string -val process_response : int32 -> unit +val process_response : int32 -> int * int * string diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/debugger.ml --- a/tools/debugger/pdb/debugger.ml Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/debugger.ml Fri Jul 29 20:25:03 2005 @@ -12,12 +12,6 @@ open Util open Str -(** a few debugger commands such as step 's' and continue 'c' do - * not immediately return a response to the debugger. in these - * cases we raise No_reply instead. - *) -exception No_reply - let initialize_debugger () = () @@ -31,8 +25,7 @@ hash. It will be cleaned up with the socket is closed. *) let gdb_detach ctx = - PDB.detach_debugger ctx; - raise No_reply + PDB.detach_debugger ctx (** Kill Command @@ -295,12 +288,17 @@ let channel = Evtchn.read fd in let ctx = find_context fd in + let (dom, pid, str) = begin match ctx with | Xen_domain d -> Xen_domain.process_response (Xen_domain.get_ring d) | _ -> failwith ("process_xen_domain called without Xen_domain context") - end; - + end + in + let sock = PDB.find_process dom pid in + print_endline (Printf.sprintf "(linux) dom:%d pid:%d %s %s" + dom pid str (Util.get_connection_info sock)); + Util.send_reply sock str; Evtchn.unmask fd channel (* allow next virq *) diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/linux-2.6-module/Makefile --- a/tools/debugger/pdb/linux-2.6-module/Makefile Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/linux-2.6-module/Makefile Fri Jul 29 20:25:03 2005 @@ -1,4 +1,4 @@ -XEN_ROOT=../../../.. +XEN_ROOT = ../../../.. LINUX_DIR = linux-2.6.12-xenU KDIR = $(XEN_ROOT)/$(LINUX_DIR) diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/linux-2.6-module/debug.c --- a/tools/debugger/pdb/linux-2.6-module/debug.c Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/linux-2.6-module/debug.c Fri Jul 29 20:25:03 2005 @@ -3,49 +3,54 @@ * pdb debug functionality for processes. */ - #include <linux/module.h> +#include <linux/mm.h> #include <linux/sched.h> +#include <asm-i386/kdebug.h> +#include <asm-xen/asm-i386/processor.h> #include <asm-xen/asm-i386/ptrace.h> #include <asm-xen/xen-public/xen.h> - #include "pdb_module.h" - -EXPORT_SYMBOL(pdb_attach); -EXPORT_SYMBOL(pdb_detach); - -int -pdb_attach (int pid) -{ - struct task_struct *target; +#include "pdb_debug.h" + +#define BWC_DEBUG 1 +#define BWC_INT3 3 +typedef struct bwcpoint /* break/watch/catch point */ +{ + struct list_head list; + memory_t address; + u32 domain; + u32 process; + u8 old_value; /* old value for software bkpt */ + u8 type; /* BWC_??? */ +} bwcpoint_t, *bwcpoint_p; + +static bwcpoint_t bwcpoint_list; + +void +pdb_initialize_bwcpoint (void) +{ + memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t)); + INIT_LIST_HEAD(&bwcpoint_list.list); + + return; +} + + +int +pdb_suspend (struct task_struct *target) +{ u32 rc = 0; - printk ("pdb attach: 0x%x\n", pid); - - read_lock(&tasklist_lock); - target = find_task_by_pid(pid); - if (target) - get_task_struct(target); - read_unlock(&tasklist_lock); - force_sig(SIGSTOP, target); /* force_sig_specific ??? */ return rc; } int -pdb_detach (int pid) -{ - int rc = 0; - struct task_struct *target; - - printk ("pdb detach: 0x%x\n", pid); - - read_lock(&tasklist_lock); - target = find_task_by_pid(pid); - if (target) - get_task_struct(target); - read_unlock(&tasklist_lock); +pdb_resume (struct task_struct *target) +{ + int rc = 0; wake_up_process(target); @@ -55,106 +60,350 @@ /* * from linux-2.6.11/arch/i386/kernel/ptrace.c::getreg() */ -int -pdb_read_register (int pid, pdb_op_rd_reg_p op, unsigned long *dest) -{ - int rc = 0; - struct task_struct *target; +static unsigned long +_pdb_get_register (struct task_struct *target, int reg) +{ + unsigned long result = ~0UL; unsigned long offset; unsigned char *stack = 0L; - *dest = ~0UL; - - read_lock(&tasklist_lock); - target = find_task_by_pid(pid); - if (target) - get_task_struct(target); - read_unlock(&tasklist_lock); - - switch (op->reg) - { - case FS: - *dest = target->thread.fs; - break; - case GS: - *dest = target->thread.gs; - break; - case DS: - case ES: - case SS: - case CS: - *dest = 0xffff; + switch (reg) + { + case LINUX_FS: + result = target->thread.fs; + break; + case LINUX_GS: + result = target->thread.gs; + break; + case LINUX_DS: + case LINUX_ES: + case LINUX_SS: + case LINUX_CS: + result = 0xffff; /* fall through */ default: - if (op->reg > GS) - op->reg -= 2; - - offset = op->reg * sizeof(long); + if (reg > LINUX_GS) + reg -= 2; + + offset = reg * sizeof(long); offset -= sizeof(struct pt_regs); stack = (unsigned char *)target->thread.esp0; stack += offset; - *dest &= *((int *)stack); - } - - /* - printk ("pdb read register: 0x%x %2d 0x%p 0x%lx\n", - pid, op->reg, stack, *dest); - */ - - return rc; + result &= *((int *)stack); + } + + return result; } /* * from linux-2.6.11/arch/i386/kernel/ptrace.c::putreg() */ -int -pdb_write_register (int pid, pdb_op_wr_reg_p op) -{ - int rc = 0; - struct task_struct *target; +static void +_pdb_set_register (struct task_struct *target, int reg, unsigned long val) +{ unsigned long offset; unsigned char *stack; - unsigned long value = op->value; - - /* - printk ("pdb write register: 0x%x %2d 0x%lx\n", pid, op->reg, value); - */ - - read_lock(&tasklist_lock); - target = find_task_by_pid(pid); - if (target) - get_task_struct(target); - read_unlock(&tasklist_lock); - - switch (op->reg) - { - case FS: + unsigned long value = val; + + switch (reg) + { + case LINUX_FS: target->thread.fs = value; - return rc; - case GS: + return; + case LINUX_GS: target->thread.gs = value; - return rc; - case DS: - case ES: + return; + case LINUX_DS: + case LINUX_ES: value &= 0xffff; break; - case SS: - case CS: + case LINUX_SS: + case LINUX_CS: value &= 0xffff; break; - case EFL: - break; - } - - if (op->reg > GS) - op->reg -= 2; - offset = op->reg * sizeof(long); + case LINUX_EFL: + break; + } + + if (reg > LINUX_GS) + reg -= 2; + offset = reg * sizeof(long); offset -= sizeof(struct pt_regs); stack = (unsigned char *)target->thread.esp0; stack += offset; - *(unsigned long *) stack = op->value; - - return rc; + *(unsigned long *) stack = value; + + return; +} + +int +pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op) +{ + int rc = 0; + + op->reg[ 0] = _pdb_get_register(target, LINUX_EAX); + op->reg[ 1] = _pdb_get_register(target, LINUX_ECX); + op->reg[ 2] = _pdb_get_register(target, LINUX_EDX); + op->reg[ 3] = _pdb_get_register(target, LINUX_EBX); + op->reg[ 4] = _pdb_get_register(target, LINUX_ESP); + op->reg[ 5] = _pdb_get_register(target, LINUX_EBP); + op->reg[ 6] = _pdb_get_register(target, LINUX_ESI); + op->reg[ 7] = _pdb_get_register(target, LINUX_EDI); + op->reg[ 8] = _pdb_get_register(target, LINUX_EIP); + op->reg[ 9] = _pdb_get_register(target, LINUX_EFL); + + op->reg[10] = _pdb_get_register(target, LINUX_CS); + op->reg[11] = _pdb_get_register(target, LINUX_SS); + op->reg[12] = _pdb_get_register(target, LINUX_DS); + op->reg[13] = _pdb_get_register(target, LINUX_ES); + op->reg[14] = _pdb_get_register(target, LINUX_FS); + op->reg[15] = _pdb_get_register(target, LINUX_GS); + + return rc; +} + +int +pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op) +{ + int rc = 0; + + _pdb_set_register(target, op->reg, op->value); + + return rc; +} + +int +pdb_access_memory (struct task_struct *target, unsigned long address, + void *buffer, int length, int write) +{ + int rc = 0; + + access_process_vm(target, address, buffer, length, write); + + return rc; +} + +int +pdb_continue (struct task_struct *target) +{ + int rc = 0; + unsigned long eflags; + + eflags = _pdb_get_register(target, LINUX_EFL); + eflags &= ~X86_EFLAGS_TF; + _pdb_set_register(target, LINUX_EFL, eflags); + + wake_up_process(target); + + return rc; +} + +int +pdb_step (struct task_struct *target) +{ + int rc = 0; + unsigned long eflags; + bwcpoint_p bkpt; + + eflags = _pdb_get_register(target, LINUX_EFL); + eflags |= X86_EFLAGS_TF; + _pdb_set_register(target, LINUX_EFL, eflags); + + bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL); + if ( bkpt == NULL ) + { + printk("error: unable to allocation memory\n"); + return -1; + } + + bkpt->process = target->pid; + bkpt->address = 0; + bkpt->type = BWC_DEBUG; + + list_add(&bkpt->list, &bwcpoint_list.list); + + wake_up_process(target); + + return rc; +} + +int +pdb_insert_memory_breakpoint (struct task_struct *target, + memory_t address, u32 length) +{ + int rc = 0; + bwcpoint_p bkpt; + u8 breakpoint_opcode = 0xcc; + + printk("insert breakpoint %d:%lx len: %d\n", target->pid, address, length); + + bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL); + if ( bkpt == NULL ) + { + printk("error: unable to allocation memory\n"); + return -1; + } + + if ( length != 1 ) + { + printk("error: breakpoint length should be 1\n"); + kfree(bkpt); + return -1; + } + + bkpt->process = target->pid; + bkpt->address = address; + bkpt->type = BWC_INT3; + + pdb_access_memory(target, address, &bkpt->old_value, 1, 0); + pdb_access_memory(target, address, &breakpoint_opcode, 1, 1); + + list_add(&bkpt->list, &bwcpoint_list.list); + + printk("breakpoint_set %d:%lx OLD: 0x%x\n", + target->pid, address, bkpt->old_value); + + return rc; +} + +int +pdb_remove_memory_breakpoint (struct task_struct *target, + memory_t address, u32 length) +{ + int rc = 0; + bwcpoint_p bkpt = NULL; + + printk ("remove breakpoint %d:%lx\n", target->pid, address); + + struct list_head *entry; + list_for_each(entry, &bwcpoint_list.list) + { + bkpt = list_entry(entry, bwcpoint_t, list); + if ( target->pid == bkpt->process && + address == bkpt->address && + bkpt->type == BWC_INT3 ) + break; + } + + if (bkpt == &bwcpoint_list || bkpt == NULL) + { + printk ("error: no breakpoint found\n"); + return -1; + } + + list_del(&bkpt->list); + + pdb_access_memory(target, address, &bkpt->old_value, 1, 1); + + kfree(bkpt); + + return rc; +} + + +/***************************************************************/ + +int +pdb_exceptions_notify (struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = (struct die_args *)data; + + switch (val) + { + case DIE_DEBUG: + if (pdb_debug_fn(args->regs, args->trapnr, args->err)) + return NOTIFY_STOP; + break; + case DIE_TRAP: + if (args->trapnr == 3 && pdb_int3_fn(args->regs, args->err)) + return NOTIFY_STOP; + break; + case DIE_INT3: /* without kprobes, we should never see DIE_INT3 */ + case DIE_GPF: + case DIE_PAGE_FAULT: + default: + break; + } + + return NOTIFY_DONE; +} + + +int +pdb_debug_fn (struct pt_regs *regs, long error_code, + unsigned int condition) +{ + pdb_response_t resp; + bwcpoint_p bkpt = NULL; + + struct list_head *entry; + list_for_each(entry, &bwcpoint_list.list) + { + bkpt = list_entry(entry, bwcpoint_t, list); + if ( current->pid == bkpt->process && + bkpt->type == BWC_DEBUG ) + break; + } + + if (bkpt == &bwcpoint_list || bkpt == NULL) + { + printk("not my debug 0x%x 0x%lx\n", current->pid, regs->eip); + return 0; + } + + list_del(&bkpt->list); + + pdb_suspend(current); + + printk("(pdb) debug pid: %d, eip: 0x%08lx\n", current->pid, regs->eip); + + regs->eflags &= ~X86_EFLAGS_TF; + set_tsk_thread_flag(current, TIF_SINGLESTEP); + + resp.operation = PDB_OPCODE_STEP; + resp.process = current->pid; + resp.status = PDB_RESPONSE_OKAY; + + pdb_send_response(&resp); + + return 1; +} + + +int +pdb_int3_fn (struct pt_regs *regs, long error_code) +{ + pdb_response_t resp; + bwcpoint_p bkpt = NULL; + + struct list_head *entry; + list_for_each(entry, &bwcpoint_list.list) + { + bkpt = list_entry(entry, bwcpoint_t, list); + if ( current->pid == bkpt->process && + regs->eip == bkpt->address && + bkpt->type == BWC_INT3 ) + break; + } + + if (bkpt == &bwcpoint_list || bkpt == NULL) + { + printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, regs->eip); + return 0; + } + + printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, regs->eip); + + pdb_suspend(current); + + resp.operation = PDB_OPCODE_CONTINUE; + resp.process = current->pid; + resp.status = PDB_RESPONSE_OKAY; + + pdb_send_response(&resp); + + return 1; } /* diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/linux-2.6-module/module.c --- a/tools/debugger/pdb/linux-2.6-module/module.c Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/linux-2.6-module/module.c Fri Jul 29 20:25:03 2005 @@ -11,6 +11,8 @@ #include <linux/module.h> #include <linux/interrupt.h> +#include <asm-i386/kdebug.h> + #include <asm-xen/evtchn.h> #include <asm-xen/ctrl_if.h> #include <asm-xen/hypervisor.h> @@ -20,17 +22,23 @@ #include <asm-xen/xen-public/io/ring.h> #include "pdb_module.h" +#include "pdb_debug.h" #define PDB_RING_SIZE __RING_SIZE((pdb_sring_t *)0, PAGE_SIZE) static pdb_back_ring_t pdb_ring; static unsigned int pdb_evtchn; static unsigned int pdb_irq; +static unsigned int pdb_domain; + +/* work queue */ +static void pdb_work_handler(void *unused); +static DECLARE_WORK(pdb_deferred_work, pdb_work_handler, NULL); /* * send response to a pdb request */ -static void +void pdb_send_response (pdb_response_t *response) { pdb_response_t *resp; @@ -38,6 +46,7 @@ resp = RING_GET_RESPONSE(&pdb_ring, pdb_ring.rsp_prod_pvt); memcpy(resp, response, sizeof(pdb_response_t)); + resp->domain = pdb_domain; wmb(); /* Ensure other side can see the response fields. */ pdb_ring.rsp_prod_pvt++; @@ -53,42 +62,98 @@ pdb_process_request (pdb_request_t *request) { pdb_response_t resp; + struct task_struct *target; + + read_lock(&tasklist_lock); + target = find_task_by_pid(request->process); + if (target) + get_task_struct(target); + read_unlock(&tasklist_lock); + + resp.operation = request->operation; + resp.process = request->process; + + if (!target) + { + printk ("(linux) target not found 0x%x\n", request->process); + resp.status = PDB_RESPONSE_ERROR; + goto response; + } switch (request->operation) { + case PDB_OPCODE_PAUSE : + pdb_suspend(target); + resp.status = PDB_RESPONSE_OKAY; + break; case PDB_OPCODE_ATTACH : - pdb_attach(request->process); + pdb_suspend(target); + pdb_domain = request->u.attach.domain; + printk("(linux) attach dom:0x%x pid:0x%x\n", + pdb_domain, request->process); resp.status = PDB_RESPONSE_OKAY; break; case PDB_OPCODE_DETACH : - pdb_detach(request->process); - resp.status = PDB_RESPONSE_OKAY; - break; - case PDB_OPCODE_RD_REG : - pdb_read_register(request->process, &request->u.rd_reg, - (unsigned long *)&resp.value); + pdb_resume(target); + printk("(linux) detach 0x%x\n", request->process); + resp.status = PDB_RESPONSE_OKAY; + break; + case PDB_OPCODE_RD_REGS : + pdb_read_registers(target, &resp.u.rd_regs); resp.status = PDB_RESPONSE_OKAY; break; case PDB_OPCODE_WR_REG : - pdb_write_register(request->process, &request->u.wr_reg); + pdb_write_register(target, &request->u.wr_reg); + resp.status = PDB_RESPONSE_OKAY; + break; + case PDB_OPCODE_RD_MEM : + pdb_access_memory(target, request->u.rd_mem.address, + &resp.u.rd_mem.data, request->u.rd_mem.length, 0); + resp.u.rd_mem.address = request->u.rd_mem.address; + resp.u.rd_mem.length = request->u.rd_mem.length; + resp.status = PDB_RESPONSE_OKAY; + break; + case PDB_OPCODE_WR_MEM : + pdb_access_memory(target, request->u.wr_mem.address, + &request->u.wr_mem.data, request->u.wr_mem.length, 1); + resp.status = PDB_RESPONSE_OKAY; + break; + case PDB_OPCODE_CONTINUE : + pdb_continue(target); + goto no_response; + break; + case PDB_OPCODE_STEP : + pdb_step(target); + resp.status = PDB_RESPONSE_OKAY; + goto no_response; + break; + case PDB_OPCODE_SET_BKPT : + pdb_insert_memory_breakpoint(target, request->u.bkpt.address, + request->u.bkpt.length); + resp.status = PDB_RESPONSE_OKAY; + break; + case PDB_OPCODE_CLR_BKPT : + pdb_remove_memory_breakpoint(target, request->u.bkpt.address, + request->u.bkpt.length); resp.status = PDB_RESPONSE_OKAY; break; default: printk("(pdb) unknown request operation %d\n", request->operation); resp.status = PDB_RESPONSE_ERROR; } - - resp.operation = request->operation; - + + response: pdb_send_response (&resp); + + no_response: return; } /* - * receive a pdb request - */ -static irqreturn_t -pdb_interrupt (int irq, void *dev_id, struct pt_regs *ptregs) + * work queue + */ +static void +pdb_work_handler (void *unused) { pdb_request_t *req; RING_IDX i, rp; @@ -105,10 +170,18 @@ } pdb_ring.req_cons = i; +} + +/* + * receive a pdb request + */ +static irqreturn_t +pdb_interrupt (int irq, void *dev_id, struct pt_regs *ptregs) +{ + schedule_work(&pdb_deferred_work); return IRQ_HANDLED; } - static void pdb_send_connection_status(int status, memory_t ring) @@ -135,8 +208,6 @@ static void pdb_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) { -printk ("pdb ctrlif rx\n"); - switch (msg->subtype) { case CMSG_DEBUG_CONNECTION_STATUS: @@ -160,17 +231,34 @@ return; } + +/********************************************************************/ + +static struct notifier_block pdb_exceptions_nb = +{ + .notifier_call = pdb_exceptions_notify, + .priority = 0x1 /* low priority */ +}; + + static int __init -pdb_initialize(void) -{ +pdb_initialize (void) +{ + int err; pdb_sring_t *sring; printk("----\npdb initialize %s %s\n", __DATE__, __TIME__); + + pdb_initialize_bwcpoint(); /* if ( xen_start_info.flags & SIF_INITDOMAIN ) return 1; */ + + pdb_evtchn = 0; + pdb_irq = 0; + pdb_domain = 0; (void)ctrl_if_register_receiver(CMSG_DEBUG, pdb_ctrlif_rx, CALLBACK_IN_BLOCKING_CONTEXT); @@ -184,12 +272,21 @@ pdb_send_connection_status(PDB_CONNECTION_STATUS_UP, virt_to_machine(pdb_ring.sring) >> PAGE_SHIFT); - return 0; -} + /* handler for int1 & int3 */ + err = register_die_notifier(&pdb_exceptions_nb); + + return err; +} + +extern struct notifier_block *i386die_chain; +extern spinlock_t die_notifier_lock; static void __exit pdb_terminate(void) { + int err = 0; + unsigned long flags; + printk("pdb cleanup\n"); (void)ctrl_if_unregister_receiver(CMSG_DEBUG, pdb_ctrlif_rx); @@ -207,6 +304,12 @@ } pdb_send_connection_status(PDB_CONNECTION_STATUS_DOWN, 0); + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_unregister(&i386die_chain, &pdb_exceptions_nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + + return; } diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/linux-2.6-module/pdb_module.h --- a/tools/debugger/pdb/linux-2.6-module/pdb_module.h Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/linux-2.6-module/pdb_module.h Fri Jul 29 20:25:03 2005 @@ -1,34 +1,80 @@ -#ifndef __XEN_PDB_H_ -#define __XEN_PDB_H_ +#ifndef __PDB_MODULE_H_ +#define __PDB_MODULE_H_ -#define PDB_OPCODE_ATTACH 1 -#define PDB_OPCODE_DETACH 2 +#include "../pdb_caml_xen.h" -#define PDB_OPCODE_RD_REG 3 -typedef struct pdb_op_rd_reg +#define PDB_OPCODE_PAUSE 1 + +#define PDB_OPCODE_ATTACH 2 +typedef struct pdb_op_attach { - u32 reg; -} pdb_op_rd_reg_t, *pdb_op_rd_reg_p; + u32 domain; +} pdb_op_attach_t, *pdb_op_attach_p; -#define PDB_OPCODE_WR_REG 4 +#define PDB_OPCODE_DETACH 3 + +#define PDB_OPCODE_RD_REGS 4 +typedef struct pdb_op_rd_regs +{ + u32 reg[GDB_REGISTER_FRAME_SIZE]; +} pdb_op_rd_regs_t, *pdb_op_rd_regs_p; + +#define PDB_OPCODE_WR_REG 5 typedef struct pdb_op_wr_reg { u32 reg; u32 value; } pdb_op_wr_reg_t, *pdb_op_wr_reg_p; +#define PDB_OPCODE_RD_MEM 6 +typedef struct pdb_op_rd_mem_req +{ + u32 address; + u32 length; +} pdb_op_rd_mem_req_t, *pdb_op_rd_mem_req_p; + +typedef struct pdb_op_rd_mem_resp +{ + u32 address; + u32 length; + u8 data[1024]; +} pdb_op_rd_mem_resp_t, *pdb_op_rd_mem_resp_p; + +#define PDB_OPCODE_WR_MEM 7 +typedef struct pdb_op_wr_mem +{ + u32 address; + u32 length; + u8 data[1024]; /* arbitrary */ +} pdb_op_wr_mem_t, *pdb_op_wr_mem_p; + +#define PDB_OPCODE_CONTINUE 8 +#define PDB_OPCODE_STEP 9 + +#define PDB_OPCODE_SET_BKPT 10 +#define PDB_OPCODE_CLR_BKPT 11 +typedef struct pdb_op_bkpt +{ + u32 address; + u32 length; +} pdb_op_bkpt_t, *pdb_op_bkpt_p; + + typedef struct { u8 operation; /* PDB_OPCODE_??? */ - u32 domain; u32 process; union { - pdb_op_rd_reg_t rd_reg; - pdb_op_wr_reg_t wr_reg; + pdb_op_attach_t attach; + pdb_op_wr_reg_t wr_reg; + pdb_op_rd_mem_req_t rd_mem; + pdb_op_wr_mem_t wr_mem; + pdb_op_bkpt_t bkpt; } u; } pdb_request_t, *pdb_request_p; + #define PDB_RESPONSE_OKAY 0 @@ -36,19 +82,18 @@ typedef struct { u8 operation; /* copied from request */ + u32 domain; + u32 process; s16 status; /* PDB_RESPONSE_??? */ - u32 value; + union + { + pdb_op_rd_regs_t rd_regs; + pdb_op_rd_mem_resp_t rd_mem; + } u; } pdb_response_t, *pdb_response_p; DEFINE_RING_TYPES(pdb, pdb_request_t, pdb_response_t); - - -int pdb_attach (int pid); -int pdb_detach (int pid); -int pdb_read_register (int pid, pdb_op_rd_reg_p op, unsigned long *dest); -int pdb_write_register (int pid, pdb_op_wr_reg_p op); - #endif diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/pdb_caml_process.c --- a/tools/debugger/pdb/pdb_caml_process.c Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/pdb_caml_process.c Fri Jul 29 20:25:03 2005 @@ -66,49 +66,127 @@ } /* - * read a response from a pdb domain backend. + * process_handle_response : int32 -> int * int * string * - * grabs the response off a ring. - */ -static void -read_response (pdb_front_ring_t *pdb_ring, pdb_response_p response) -{ - RING_IDX loop, rp; - - rp = pdb_ring->sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for ( loop = pdb_ring->rsp_cons; loop != rp; loop++ ) + * A backend domain has notified pdb (via an event channel) + * that a command has finished. + * We read the result from the channel and formulate a response + * as a single string. Also return the domain and process. + */ + +static inline unsigned int +_flip (unsigned int orig) +{ + return (((orig << 24) & 0xff000000) | ((orig << 8) & 0x00ff0000) | + ((orig >> 8) & 0x0000ff00) | ((orig >> 24) & 0x000000ff)); +} + +value +process_handle_response (value ring) +{ + CAMLparam1(ring); + CAMLlocal2(result, str); + + RING_IDX rp; + pdb_response_p resp; + pdb_front_ring_t *my_ring = (pdb_front_ring_t *)Int32_val(ring); + char msg[2048]; + int msglen; + + memset(msg, 0, sizeof(msg)); + + rp = my_ring->sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + /* default response is OK unless the command has something + more interesting to say */ + sprintf(msg, "OK"); + + if (my_ring->rsp_cons != rp) { - pdb_response_p resp; - - resp = RING_GET_RESPONSE(pdb_ring, loop); - memcpy(response, resp, sizeof(pdb_response_t)); - - /* - printf ("got response %x %x %x\n", response->operation, - response->status, response->value); - */ + resp = RING_GET_RESPONSE(my_ring, my_ring->rsp_cons); + + switch (resp->operation) + { + case PDB_OPCODE_PAUSE : + case PDB_OPCODE_ATTACH : + case PDB_OPCODE_DETACH : + break; + + case PDB_OPCODE_RD_REGS : + { + int loop; + pdb_op_rd_regs_p regs = &resp->u.rd_regs; + + for (loop = 0; loop < GDB_REGISTER_FRAME_SIZE * 8; loop += 8) + { + sprintf(&msg[loop], "%08x", _flip(regs->reg[loop >> 3])); + } + + break; + } + case PDB_OPCODE_WR_REG : + { + /* should check the return status */ + break; + } + + case PDB_OPCODE_RD_MEM : + { + int loop; + pdb_op_rd_mem_resp_p mem = &resp->u.rd_mem; + + for (loop = 0; loop < mem->length; loop ++) + { + sprintf(&msg[loop * 2], "%02x", mem->data[loop]); + } + break; + } + case PDB_OPCODE_WR_MEM : + { + /* should check the return status */ + break; + } + + /* this is equivalent to process_xen_virq */ + case PDB_OPCODE_CONTINUE : + { + sprintf(msg, "S05"); + break; + } + case PDB_OPCODE_STEP : + { + sprintf(msg, "S05"); + break; + } + + case PDB_OPCODE_SET_BKPT : + { + break; + } + case PDB_OPCODE_CLR_BKPT : + { + break; + } + + default : + printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE\n"); + break; + } + + my_ring->rsp_cons++; } - pdb_ring->rsp_cons = loop; -} - -/* - * process_handle_response : int32 -> unit - */ - -value -process_handle_response (value ring) -{ - CAMLparam1(ring); - - pdb_front_ring_t *my_ring = (pdb_front_ring_t *)Int32_val(ring); - pdb_response_t resp; - - if ( my_ring ) - read_response(my_ring, &resp); - - CAMLreturn(Val_unit); + + msglen = strlen(msg); + result = caml_alloc(3,0); + str = alloc_string(msglen); + memmove(&Byte(str,0), msg, msglen); + + Store_field(result, 0, Val_int(resp->domain)); + Store_field(result, 1, Val_int(resp->process)); + Store_field(result, 2, str); + + CAMLreturn(result); } /* @@ -120,27 +198,14 @@ CAMLparam1(context); context_t ctx; pdb_request_t req; - pdb_response_t resp; - - decode_context(&ctx, context); - - printf("(pdb) attach process [%d.%d] %d %p\n", ctx.domain, ctx.process, - ctx.evtchn, ctx.ring); - fflush(stdout); + + decode_context(&ctx, context); req.operation = PDB_OPCODE_ATTACH; - req.domain = ctx.domain; + req.u.attach.domain = ctx.domain; req.process = ctx.process; send_request (ctx.ring, ctx.evtchn, &req); - - printf("awaiting response\n"); - fflush(stdout); - - read_response (ctx.ring, &resp); - - printf("response %d %d\n", resp.operation, resp.status); - fflush(stdout); CAMLreturn(Val_unit); } @@ -163,7 +228,6 @@ fflush(stdout); req.operation = PDB_OPCODE_DETACH; - req.domain = ctx.domain; req.process = ctx.process; send_request (ctx.ring, ctx.evtchn, &req); @@ -180,67 +244,41 @@ { CAMLparam1(context); context_t ctx; + pdb_request_t req; decode_context(&ctx, context); printf("(pdb) pause target %d %d\n", ctx.domain, ctx.process); fflush(stdout); - CAMLreturn(Val_unit); -} - - -/* - * proc_read_registers : context_t -> int32 + req.operation = PDB_OPCODE_PAUSE; + req.process = ctx.process; + + send_request (ctx.ring, ctx.evtchn, &req); + + CAMLreturn(Val_unit); +} + + +/* + * proc_read_registers : context_t -> unit */ value proc_read_registers (value context) { CAMLparam1(context); - CAMLlocal1(result); - - u32 regs[REGISTER_FRAME_SIZE]; - - pdb_request_t req; - context_t ctx; - int loop; - - decode_context(&ctx, context); - - req.operation = PDB_OPCODE_RD_REG; - req.domain = ctx.domain; - req.process = ctx.process; - - for (loop = 0; loop < REGISTER_FRAME_SIZE; loop++) - { - pdb_response_t resp; - - req.u.rd_reg.reg = loop; - send_request(ctx.ring, ctx.evtchn, &req); - read_response(ctx.ring, &resp); - regs[loop] = resp.value; - } - - result = caml_alloc_tuple(16); - - Store_field(result, 0, caml_copy_int32(regs[LINUX_EAX])); - Store_field(result, 1, caml_copy_int32(regs[LINUX_ECX])); - Store_field(result, 2, caml_copy_int32(regs[LINUX_EDX])); - Store_field(result, 3, caml_copy_int32(regs[LINUX_EBX])); - Store_field(result, 4, caml_copy_int32(regs[LINUX_ESP])); - Store_field(result, 5, caml_copy_int32(regs[LINUX_EBP])); - Store_field(result, 6, caml_copy_int32(regs[LINUX_ESI])); - Store_field(result, 7, caml_copy_int32(regs[LINUX_EDI])); - Store_field(result, 8, caml_copy_int32(regs[LINUX_EIP])); - Store_field(result, 9, caml_copy_int32(regs[LINUX_EFL])); - Store_field(result, 10, caml_copy_int32(regs[LINUX_CS])); /* 16 */ - Store_field(result, 11, caml_copy_int32(regs[LINUX_SS])); /* 16 */ - Store_field(result, 12, caml_copy_int32(regs[LINUX_DS])); /* 16 */ - Store_field(result, 13, caml_copy_int32(regs[LINUX_ES])); /* 16 */ - Store_field(result, 14, caml_copy_int32(regs[LINUX_FS])); /* 16 */ - Store_field(result, 15, caml_copy_int32(regs[LINUX_GS])); /* 16 */ - - CAMLreturn(result); + + pdb_request_t req; + context_t ctx; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_RD_REGS; + req.process = ctx.process; + + send_request (ctx.ring, ctx.evtchn, &req); + + CAMLreturn(Val_unit); } @@ -257,12 +295,10 @@ context_t ctx; pdb_request_t req; - pdb_response_t resp; decode_context(&ctx, context); req.operation = PDB_OPCODE_WR_REG; - req.domain = ctx.domain; req.process = ctx.process; req.u.wr_reg.value = my_newval; @@ -290,71 +326,34 @@ } send_request(ctx.ring, ctx.evtchn, &req); - read_response(ctx.ring, &resp); - - CAMLreturn(Val_unit); -} - - -/* - * proc_read_memory : context_t -> int32 -> int -> int + + CAMLreturn(Val_unit); +} + + +/* + * proc_read_memory : context_t -> int32 -> int -> unit */ value proc_read_memory (value context, value address, value length) { CAMLparam3(context, address, length); - CAMLlocal2(result, temp); - - context_t ctx; - int loop; - char *buffer; - /* memory_t my_address = Int32_val(address); */ - u32 my_length = Int_val(length); - - printf ("(pdb) read memory\n"); - - decode_context(&ctx, context); - - buffer = malloc(my_length); - if ( buffer == NULL ) - { - printf("(pdb) read memory: malloc failed.\n"); fflush(stdout); - failwith("read memory error"); - } - - /* - if ( xendebug_read_memory(xc_handle, ctx.domain, ctx.vcpu, - my_address, my_length, buffer) ) - { - printf("(pdb) read memory error!\n"); fflush(stdout); - failwith("read memory error"); - } - */ - - memset(buffer, 0xff, my_length); - - result = caml_alloc(2,0); - if ( my_length > 0 ) /* car */ - { - Store_field(result, 0, Val_int(buffer[my_length - 1] & 0xff)); - } - else - - { - Store_field(result, 0, Val_int(0)); - } - Store_field(result, 1, Val_int(0)); /* cdr */ - - for (loop = 1; loop < my_length; loop++) - { - temp = result; - result = caml_alloc(2,0); - Store_field(result, 0, Val_int(buffer[my_length - loop - 1] & 0xff)); - Store_field(result, 1, temp); - } - - CAMLreturn(result); -} + + context_t ctx; + pdb_request_t req; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_RD_MEM; + req.process = ctx.process; + req.u.rd_mem.address = Int32_val(address); + req.u.rd_mem.length = Int_val(length); + + send_request(ctx.ring, ctx.evtchn, &req); + + CAMLreturn(Val_unit); +} + /* * proc_write_memory : context_t -> int32 -> int list -> unit @@ -366,50 +365,37 @@ CAMLlocal1(node); context_t ctx; - - char buffer[4096]; /* a big buffer */ - memory_t my_address; + pdb_request_t req; u32 length = 0; - printf ("(pdb) write memory\n"); - - decode_context(&ctx, context); + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_WR_MEM; + req.process = ctx.process; node = val_list; if ( Int_val(node) == 0 ) /* gdb functionalty test uses empty list */ { - CAMLreturn(Val_unit); + req.u.wr_mem.address = Int32_val(address); + req.u.wr_mem.length = 0; } - - while ( Int_val(Field(node,1)) != 0 ) + else { - buffer[length++] = Int_val(Field(node, 0)); - node = Field(node,1); + while ( Int_val(Field(node,1)) != 0 ) + { + req.u.wr_mem.data[length++] = Int_val(Field(node, 0)); + node = Field(node,1); + } + req.u.wr_mem.data[length++] = Int_val(Field(node, 0)); + + req.u.wr_mem.address = Int32_val(address); + req.u.wr_mem.length = length; } - buffer[length++] = Int_val(Field(node, 0)); - - my_address = (memory_t) Int32_val(address); - - /* - if ( xendebug_write_memory(xc_handle, ctx.domain, ctx.vcpu, - my_address, length, buffer) ) - { - printf("(pdb) write memory error!\n"); fflush(stdout); - failwith("write memory error"); - } - */ - { - int loop; - for (loop = 0; loop < length; loop++) - { - printf (" %02x", buffer[loop]); - } - printf ("\n"); - } - - CAMLreturn(Val_unit); -} - + + send_request(ctx.ring, ctx.evtchn, &req); + + CAMLreturn(Val_unit); +} /* @@ -421,17 +407,14 @@ CAMLparam1(context); context_t ctx; - - decode_context(&ctx, context); - - /* - if ( xendebug_continue(xc_handle, ctx.domain, ctx.vcpu) ) - { - printf("(pdb) continue\n"); fflush(stdout); - failwith("continue"); - } - */ - printf ("CONTINUE\n"); + pdb_request_t req; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_CONTINUE; + req.process = ctx.process; + + send_request(ctx.ring, ctx.evtchn, &req); CAMLreturn(Val_unit); } @@ -445,17 +428,14 @@ CAMLparam1(context); context_t ctx; - - decode_context(&ctx, context); - - /* - if ( xendebug_step(xc_handle, ctx.domain, ctx.vcpu) ) - { - printf("(pdb) step\n"); fflush(stdout); - failwith("step"); - } - */ - printf ("STEP\n"); + pdb_request_t req; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_STEP; + req.process = ctx.process; + + send_request(ctx.ring, ctx.evtchn, &req); CAMLreturn(Val_unit); } @@ -471,22 +451,16 @@ CAMLparam3(context, address, length); context_t ctx; - memory_t my_address = (memory_t) Int32_val(address); - int my_length = Int_val(length); - - decode_context(&ctx, context); - - printf ("(pdb) insert memory breakpoint 0x%lx %d\n", - my_address, my_length); - - /* - if ( xendebug_insert_memory_breakpoint(xc_handle, ctx.domain, ctx.vcpu, - my_address, my_length) ) - { - printf("(pdb) error: insert memory breakpoint\n"); fflush(stdout); - failwith("insert memory breakpoint"); - } - */ + pdb_request_t req; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_SET_BKPT; + req.process = ctx.process; + req.u.bkpt.address = (memory_t) Int32_val(address); + req.u.bkpt.length = Int_val(length); + + send_request(ctx.ring, ctx.evtchn, &req); CAMLreturn(Val_unit); } @@ -500,24 +474,16 @@ CAMLparam3(context, address, length); context_t ctx; - - memory_t my_address = (memory_t) Int32_val(address); - int my_length = Int_val(length); - - printf ("(pdb) remove memory breakpoint 0x%lx %d\n", - my_address, my_length); - - decode_context(&ctx, context); - - /* - if ( xendebug_remove_memory_breakpoint(xc_handle, - ctx.domain, ctx.vcpu, - my_address, my_length) ) - { - printf("(pdb) error: remove memory breakpoint\n"); fflush(stdout); - failwith("remove memory breakpoint"); - } - */ + pdb_request_t req; + + decode_context(&ctx, context); + + req.operation = PDB_OPCODE_CLR_BKPT; + req.process = ctx.process; + req.u.bkpt.address = (memory_t) Int32_val(address); + req.u.bkpt.length = Int_val(length); + + send_request(ctx.ring, ctx.evtchn, &req); CAMLreturn(Val_unit); } diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/pdb_caml_xcs.c --- a/tools/debugger/pdb/pdb_caml_xcs.c Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/pdb_caml_xcs.c Fri Jul 29 20:25:03 2005 @@ -201,7 +201,7 @@ ret = connect(control_fd, (struct sockaddr *)&addr, len); if (ret < 0) { - printf("error connecting to xcs(ctrl)! (%d)\n", errno); + printf("error connecting to xcs (ctrl)! (%d)\n", errno); goto ctrl_fd_fail; } @@ -235,7 +235,7 @@ ret = connect(data_fd, (struct sockaddr *)&addr, len); if (ret < 0) { - printf("error connecting to xcs(data)! (%d)\n", errno); + printf("error connecting to xcs (data)! (%d)\n", errno); goto data_fd_fail; } diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/pdb_caml_xen.h --- a/tools/debugger/pdb/pdb_caml_xen.h Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/pdb_caml_xen.h Fri Jul 29 20:25:03 2005 @@ -10,11 +10,12 @@ #ifndef _PDB_CAML_XEN_DEFINED_ #define _PDB_CAML_XEN_DEFINED_ -enum gdb_registers { GDB_EAX, GDB_ECX, GDB_EDX, GDB_EBX, - GDB_ESP, GDB_EBP, GDB_ESI, GDB_EDI, - GDB_EIP, GDB_EFL, - GDB_CS, GDB_SS, GDB_DS, GDB_ES, - GDB_FS, GDB_GS }; +enum gdb_registers { /* 32 */ GDB_EAX, GDB_ECX, GDB_EDX, GDB_EBX, + GDB_ESP, GDB_EBP, GDB_ESI, GDB_EDI, + GDB_EIP, GDB_EFL, + /* 16 */ GDB_CS, GDB_SS, GDB_DS, GDB_ES, + GDB_FS, GDB_GS }; +#define GDB_REGISTER_FRAME_SIZE 16 /* this order comes from linux-2.6.11/include/asm-i386/ptrace.h */ enum x86_registers { LINUX_EBX, LINUX_ECX, LINUX_EDX, LINUX_ESI, LINUX_EDI, @@ -24,7 +25,11 @@ #define REGISTER_FRAME_SIZE 17 +/* hack: this is also included from the pdb linux module which + has PAGE_SIZE defined */ +#ifndef PAGE_SIZE #define PAGE_SIZE 4096 +#endif extern int xc_handle; diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/readme --- a/tools/debugger/pdb/readme Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/readme Fri Jul 29 20:25:03 2005 @@ -31,7 +31,7 @@ Build the target domains with debugging symbols. make CONFIG_DEBUG_INFO=true CONFIG_FRAME_POINTER=false linux-2.6-xenU-build - You can also change linux-2.6.11-xenU/Makefile + You can also change linux-2.6.12-xenU/Makefile CONFIG_CC_OPTIMIZE_FOR_SIZE from -O2 to -O - Build PDB @@ -46,7 +46,7 @@ domain-0.xeno# ./pdb <port> - Run GDB - hostname% gdb <xeno.bk>/dist/install/boot/vmlinux-syms-2.6.11.11-xenU + hostname% gdb <xeno.bk>/dist/install/boot/vmlinux-syms-2.6.12-xenU (gdb) target remote domain-0.xeno:<port> @@ -76,9 +76,18 @@ continue print +Process + + PDB can also debug a process running in a Linux 2.6 domain. + After running PDB in domain 0, insert the pdb module in dom u: + + % insmod linux-2.6-module/pdb.ko + + Load GDB with the appropriate symbols, and attach with + + (gdb) maint packet x context = process <domid> <pid> To Do - watchpoints - support for SMP -- support for user applications diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/server.ml --- a/tools/debugger/pdb/server.ml Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/server.ml Fri Jul 29 20:25:03 2005 @@ -9,7 +9,7 @@ open Unix open Buffer - +open Util (** * connection_t: The state for each connection. @@ -98,7 +98,7 @@ (String.escaped reply)); Util.send_reply sock reply with - Debugger.No_reply -> + Util.No_reply -> print_endline (Printf.sprintf "[%s] %s -> null" (Util.get_connection_info sock) (String.escaped command)) diff -r a4196568095c -r b53a65034532 tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Fri Jul 29 18:52:33 2005 +++ b/tools/examples/xend-config.sxp Fri Jul 29 20:25:03 2005 @@ -44,3 +44,11 @@ # Setup script for enbd-backed block devices (block-enbd block-enbd) +# Dom0 will balloon out when needed to free memory for domU. +# dom0-min-mem is the lowest memory level (in MB) dom0 will get down to. +# If dom0-min-mem=0, dom0 will never balloon out. +(dom0-min-mem 0) + +# In SMP system, dom0 will use only CPUs in range [1,dom0-cpus] +# If dom0-cpus = 0, dom0 will take all cpus available +(dom0-cpus 0) diff -r a4196568095c -r b53a65034532 tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Fri Jul 29 18:52:33 2005 +++ b/tools/firmware/rombios/rombios.c Fri Jul 29 20:25:03 2005 @@ -28,6 +28,11 @@ #define VMXASSIST #undef VMXTEST + +// Xen full virtualization does not handle unaligned IO with page crossing. +// Disable 32-bit PIO as a workaround. +#define NO_PIO32 + // ROM BIOS compatability entry points: // =================================== @@ -2248,6 +2253,9 @@ Bit16u cylinders, heads, spt, blksize; Bit8u translation, removable, mode; + // default mode to PIO16 + mode = ATA_MODE_PIO16; + //Temporary values to do the transfer write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD); write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16); @@ -2256,7 +2264,10 @@ BX_PANIC("ata-detect: Failed to detect ATA device\n"); removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0; +#ifndef NO_PIO32 mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16; +#endif + blksize = read_word(get_SS(),buffer+10); cylinders = read_word(get_SS(),buffer+(1*2)); // word 1 @@ -2346,6 +2357,9 @@ Bit8u type, removable, mode; Bit16u blksize; + // default mode to PIO16 + mode = ATA_MODE_PIO16; + //Temporary values to do the transfer write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_CDROM); write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16); @@ -2355,7 +2369,9 @@ type = read_byte(get_SS(),buffer+1) & 0x1f; removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0; +#ifndef NO_PIO32 mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16; +#endif blksize = 2048; write_byte(ebda_seg,&EbdaData->ata.devices[device].device, type); diff -r a4196568095c -r b53a65034532 tools/ioemu/monitor.c --- a/tools/ioemu/monitor.c Fri Jul 29 18:52:33 2005 +++ b/tools/ioemu/monitor.c Fri Jul 29 20:25:03 2005 @@ -514,21 +514,45 @@ str_allocated[nb_args] = str; add_str: if (nb_args >= MAX_ARGS) { -#if 0 error_args: -#endif term_printf("%s: too many arguments\n", cmdname); goto fail; } args[nb_args++] = str; } break; + case '-': + { + int has_option; + /* option */ + + c = *typestr++; + if (c == '\0') + goto bad_type; + while (isspace(*p)) + p++; + has_option = 0; + if (*p == '-') { + p++; + if (*p != c) { + term_printf("%s: unsupported option -%c\n", + cmdname, *p); + goto fail; + } + p++; + has_option = 1; + } + if (nb_args >= MAX_ARGS) + goto error_args; + args[nb_args++] = (void *)has_option; + } + break; /* TODO: add more commands we need here to support vmx device model */ case '/': case 'i': - case '-': default: - term_printf("%s: unknown type '%c', we only support quit command now.\n", cmdname, c); + bad_type: + term_printf("%s: unknown type '%c',not support now.\n", cmdname, c); goto fail; } } diff -r a4196568095c -r b53a65034532 tools/ioemu/target-i386-dm/qemu-dm.debug --- a/tools/ioemu/target-i386-dm/qemu-dm.debug Fri Jul 29 18:52:33 2005 +++ b/tools/ioemu/target-i386-dm/qemu-dm.debug Fri Jul 29 20:25:03 2005 @@ -2,4 +2,4 @@ echo $* > /tmp/args echo $DISPLAY >> /tmp/args -exec /usr/bin/qemu-dm $* +exec /usr/lib/xen/bin/qemu-dm $* diff -r a4196568095c -r b53a65034532 tools/ioemu/vnc.c --- a/tools/ioemu/vnc.c Fri Jul 29 18:52:33 2005 +++ b/tools/ioemu/vnc.c Fri Jul 29 20:25:03 2005 @@ -393,7 +393,28 @@ keycode>>=8; } } else if(down) { - kbd_put_keysym(keySym); + int qemu_keysym = 0; + if (keySym <= 128) { /* normal ascii */ + qemu_keysym = keySym; + } else { + switch(keySym) { + case XK_Up: qemu_keysym = QEMU_KEY_UP; break; + case XK_Down: qemu_keysym = QEMU_KEY_DOWN; break; + case XK_Left: qemu_keysym = QEMU_KEY_LEFT; break; + case XK_Right: qemu_keysym = QEMU_KEY_RIGHT; break; + case XK_Home: qemu_keysym = QEMU_KEY_HOME; break; + case XK_End: qemu_keysym = QEMU_KEY_END; break; + case XK_Page_Up: qemu_keysym = QEMU_KEY_PAGEUP; break; + case XK_Page_Down: qemu_keysym = QEMU_KEY_PAGEDOWN; break; + case XK_BackSpace: qemu_keysym = QEMU_KEY_BACKSPACE; break; + case XK_Delete: qemu_keysym = QEMU_KEY_DELETE; break; + case XK_Return: + case XK_Linefeed: qemu_keysym = keySym; break; + default: break; + } + } + if (qemu_keysym != 0) + kbd_put_keysym(qemu_keysym); } if(down) { if(keySym==XK_Control_L) diff -r a4196568095c -r b53a65034532 tools/libxc/Makefile --- a/tools/libxc/Makefile Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/Makefile Fri Jul 29 20:25:03 2005 @@ -19,18 +19,22 @@ SRCS += xc_domain.c SRCS += xc_evtchn.c SRCS += xc_gnttab.c -SRCS += xc_load_aout9.c SRCS += xc_load_bin.c SRCS += xc_load_elf.c SRCS += xc_linux_build.c -SRCS += xc_linux_restore.c -SRCS += xc_linux_save.c SRCS += xc_misc.c SRCS += xc_physdev.c SRCS += xc_private.c +ifeq ($(XEN_TARGET_ARCH),ia64) +SRCS += xc_ia64_stubs.c +else +SRCS += xc_load_aout9.c +SRCS += xc_linux_restore.c +SRCS += xc_linux_save.c +SRCS += xc_vmx_build.c SRCS += xc_ptrace.c SRCS += xc_ptrace_core.c -SRCS += xc_vmx_build.c +endif CFLAGS += -Wall CFLAGS += -Werror diff -r a4196568095c -r b53a65034532 tools/libxc/xc.h --- a/tools/libxc/xc.h Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/xc.h Fri Jul 29 20:25:03 2005 @@ -27,6 +27,14 @@ #include <xen/sched_ctl.h> #include <xen/acm.h> +#ifdef __ia64__ +#define XC_PAGE_SHIFT 14 +#else +#define XC_PAGE_SHIFT 12 +#endif +#define XC_PAGE_SIZE (1UL << XC_PAGE_SHIFT) +#define XC_PAGE_MASK (~(XC_PAGE_SIZE-1)) + /* * DEFINITIONS FOR CPU BARRIERS */ @@ -39,6 +47,11 @@ #define mb() __asm__ __volatile__ ( "mfence" : : : "memory") #define rmb() __asm__ __volatile__ ( "lfence" : : : "memory") #define wmb() __asm__ __volatile__ ( "" : : : "memory") +#elif defined(__ia64__) +/* FIXME */ +#define mb() +#define rmb() +#define wmb() #else #error "Define barriers" #endif @@ -462,6 +475,9 @@ int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, unsigned long max_pfns); +int xc_ia64_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, + unsigned int start_page, unsigned int nr_pages); + /*\ * GRANT TABLE FUNCTIONS \*/ diff -r a4196568095c -r b53a65034532 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/xc_domain.c Fri Jul 29 20:25:03 2005 @@ -264,10 +264,11 @@ unsigned int mem_kb) { int err; + unsigned int npages = mem_kb / (PAGE_SIZE/1024); err = do_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL, - mem_kb / 4, 0, domid); - if (err == mem_kb / 4) + npages, 0, domid); + if (err == npages) return 0; if (err > 0) { diff -r a4196568095c -r b53a65034532 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/xc_linux_build.c Fri Jul 29 20:25:03 2005 @@ -8,7 +8,7 @@ #define ELFSIZE 32 #endif -#if defined(__x86_64__) +#if defined(__x86_64__) || defined(__ia64__) #define ELFSIZE 64 #endif @@ -34,6 +34,10 @@ #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) #define round_pgdown(_p) ((_p)&PAGE_MASK) + +#ifdef __ia64__ +#define probe_aout9(image,image_size,load_funcs) 1 +#endif static int probeimageformat(char *image, unsigned long image_size, @@ -258,6 +262,67 @@ } #endif +#ifdef __ia64__ +#include <asm/fpu.h> /* for FPSR_DEFAULT */ +static int setup_guest(int xc_handle, + u32 dom, + char *image, unsigned long image_size, + gzFile initrd_gfd, unsigned long initrd_len, + unsigned long nr_pages, + unsigned long *pvsi, unsigned long *pvke, + unsigned long *pvss, vcpu_guest_context_t *ctxt, + const char *cmdline, + unsigned long shared_info_frame, + unsigned int control_evtchn, + unsigned long flags, + unsigned int vcpus, + unsigned int store_evtchn, unsigned long *store_mfn) +{ + unsigned long *page_array = NULL; + struct load_funcs load_funcs; + struct domain_setup_info dsi; + unsigned long start_page; + int rc; + + rc = probeimageformat(image, image_size, &load_funcs); + if ( rc != 0 ) + goto error_out; + + memset(&dsi, 0, sizeof(struct domain_setup_info)); + + rc = (load_funcs.parseimage)(image, image_size, &dsi); + if ( rc != 0 ) + goto error_out; + + dsi.v_start = round_pgdown(dsi.v_start); + dsi.v_end = round_pgup(dsi.v_end); + + start_page = dsi.v_start >> PAGE_SHIFT; + nr_pages = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT; + if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) + { + PERROR("Could not allocate memory"); + goto error_out; + } + + if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, nr_pages) != nr_pages ) + { + PERROR("Could not get the page frame list"); + goto error_out; + } + + (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array, + &dsi); + + *pvke = dsi.v_kernentry; + return 0; + + error_out: + if ( page_array != NULL ) + free(page_array); + return -1; +} +#else /* x86 */ static int setup_guest(int xc_handle, u32 dom, char *image, unsigned long image_size, @@ -500,6 +565,8 @@ goto error_out; #endif + *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT]; + start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]); @@ -511,7 +578,7 @@ start_info->nr_pt_frames = nr_pt_pages; start_info->mfn_list = vphysmap_start; start_info->domain_controller_evtchn = control_evtchn; - start_info->store_page = vstoreinfo_start; + start_info->store_mfn = *store_mfn; start_info->store_evtchn = store_evtchn; if ( initrd_len != 0 ) { @@ -521,9 +588,6 @@ strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE); start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0'; munmap(start_info, PAGE_SIZE); - - /* Tell our caller where we told domain store page was. */ - *store_mfn = page_array[((vstoreinfo_start-dsi.v_start)>>PAGE_SHIFT)]; /* shared_info page starts its life empty. */ shared_info = xc_map_foreign_range( @@ -558,6 +622,7 @@ free(page_array); return -1; } +#endif int xc_linux_build(int xc_handle, u32 domid, @@ -628,7 +693,11 @@ } if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || +#ifdef __ia64__ + 0 ) +#else (ctxt->ctrlreg[3] != 0) ) +#endif { ERROR("Domain is already constructed"); goto error_out; @@ -653,6 +722,18 @@ if ( image != NULL ) free(image); +#ifdef __ia64__ + /* based on new_thread in xen/arch/ia64/domain.c */ + ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */ + ctxt->regs.cr_iip = vkern_entry; + ctxt->regs.cr_ifs = 1UL << 63; + ctxt->regs.ar_fpsr = FPSR_DEFAULT; + /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */ + ctxt->vcpu.privregs = 0; + ctxt->shared.domain_controller_evtchn = control_evtchn; + ctxt->shared.flags = flags; + i = 0; /* silence unused variable warning */ +#else /* x86 */ /* * Initial register values: * DS,ES,FS,GS = FLAT_KERNEL_DS @@ -707,6 +788,7 @@ ctxt->failsafe_callback_eip = 0; ctxt->syscall_callback_eip = 0; #endif +#endif /* x86 */ memset( &launch_op, 0, sizeof(launch_op) ); diff -r a4196568095c -r b53a65034532 tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/xc_load_elf.c Fri Jul 29 20:25:03 2005 @@ -7,7 +7,7 @@ #if defined(__i386__) #define ELFSIZE 32 #endif -#if defined(__x86_64__) +#if defined(__x86_64__) || defined(__ia64__) #define ELFSIZE 64 #endif @@ -122,8 +122,12 @@ } if ( guestinfo == NULL ) { +#ifdef __ia64__ + guestinfo = ""; +#else ERROR("Not a Xen-ELF image: '__xen_guest' section not found."); return -EINVAL; +#endif } for ( h = 0; h < ehdr->e_phnum; h++ ) diff -r a4196568095c -r b53a65034532 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/xc_private.c Fri Jul 29 20:25:03 2005 @@ -256,6 +256,37 @@ return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; } +#ifdef __ia64__ +int xc_ia64_get_pfn_list(int xc_handle, + u32 domid, + unsigned long *pfn_buf, + unsigned int start_page, + unsigned int nr_pages) +{ + dom0_op_t op; + int ret; + + op.cmd = DOM0_GETMEMLIST; + op.u.getmemlist.domain = (domid_t)domid; + op.u.getmemlist.max_pfns = ((unsigned long)start_page << 32) | nr_pages; + op.u.getmemlist.buffer = pfn_buf; + + if ( mlock(pfn_buf, nr_pages * sizeof(unsigned long)) != 0 ) + { + PERROR("Could not lock pfn list buffer"); + return -1; + } + + /* XXX Hack to put pages in TLB, hypervisor should be able to handle this */ + memset(pfn_buf, 0, nr_pages * sizeof(unsigned long)); + ret = do_dom0_op(xc_handle, &op); + + (void)munlock(pfn_buf, nr_pages * sizeof(unsigned long)); + + return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; +} +#endif + long xc_get_tot_pages(int xc_handle, u32 domid) { dom0_op_t op; diff -r a4196568095c -r b53a65034532 tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/xc_private.h Fri Jul 29 20:25:03 2005 @@ -55,7 +55,7 @@ #define L4_PAGETABLE_ENTRIES 512 #endif -#define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define PAGE_SHIFT XC_PAGE_SHIFT #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff -r a4196568095c -r b53a65034532 tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Fri Jul 29 18:52:33 2005 +++ b/tools/libxc/xc_ptrace.c Fri Jul 29 20:25:03 2005 @@ -202,7 +202,7 @@ } if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */ goto error_out; - if (ctxt[cpu].flags & VGCF_VMX_GUEST) + if (ctxt[cpu].flags & VGCF_VMX_GUEST && paging_enabled(&ctxt[cpu])) page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT; if (page != page_phys[cpu] || perm != prev_perm[cpu]) { diff -r a4196568095c -r b53a65034532 tools/misc/Makefile --- a/tools/misc/Makefile Fri Jul 29 18:52:33 2005 +++ b/tools/misc/Makefile Fri Jul 29 20:25:03 2005 @@ -22,7 +22,9 @@ build: $(TARGETS) $(MAKE) -C miniterm $(MAKE) -C cpuperf +ifneq ($(XEN_TARGET_ARCH),ia64) $(MAKE) -C mbootpack +endif $(MAKE) -C lomount install: build diff -r a4196568095c -r b53a65034532 tools/misc/policyprocessor/XmlToBinInterface.java --- a/tools/misc/policyprocessor/XmlToBinInterface.java Fri Jul 29 18:52:33 2005 +++ b/tools/misc/policyprocessor/XmlToBinInterface.java Fri Jul 29 20:25:03 2005 @@ -123,7 +123,7 @@ final short binaryBufferHeaderSz = (3 * u32Size + 4* u16Size); /* copied directlty from policy_ops.h */ - final int POLICY_INTERFACE_VERSION = 0xAAAA0002; + final int POLICY_INTERFACE_VERSION = 0xAAAA0003; /* copied directly from acm.h */ final int ACM_MAGIC = 0x0001debc; diff -r a4196568095c -r b53a65034532 tools/python/setup.py --- a/tools/python/setup.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/setup.py Fri Jul 29 20:25:03 2005 @@ -51,6 +51,7 @@ 'xen.xend.xenstore', 'xen.xm', 'xen.web', + 'xen.sv' ], ext_package = "xen.lowlevel", ext_modules = [ xc, xu, xs ] diff -r a4196568095c -r b53a65034532 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jul 29 20:25:03 2005 @@ -242,7 +242,7 @@ "paused", info[i].paused, "blocked", info[i].blocked, "running", info[i].running, - "mem_kb", info[i].nr_pages*4, + "mem_kb", info[i].nr_pages*(XC_PAGE_SIZE/1024), "cpu_time", info[i].cpu_time, "maxmem_kb", info[i].max_memkb, "ssidref", info[i].ssidref, @@ -813,6 +813,22 @@ return zero; } +static PyObject *pyxc_init_store(PyObject *self, PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + + int remote_port; + + static char *kwd_list[] = { "remote_port", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, + &remote_port) ) + return NULL; + + return PyInt_FromLong(xc_init_store(xc->xc_handle, remote_port)); +} + static PyMethodDef pyxc_methods[] = { { "handle", @@ -1088,6 +1104,13 @@ " mem_kb [long]: .\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "init_store", + (PyCFunction)pyxc_init_store, + METH_VARARGS | METH_KEYWORDS, "\n" + "Initialize the store event channel and return the store page mfn.\n" + " remote_port [int]: store event channel port number.\n" + "Returns: [int] mfn on success; <0 on error.\n" }, + { NULL, NULL, 0, NULL } }; diff -r a4196568095c -r b53a65034532 tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/lowlevel/xs/xs.c Fri Jul 29 20:25:03 2005 @@ -367,7 +367,7 @@ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path, &priority, &token)) goto exit; - xsval = xs_watch(xh, path, token, priority); + xsval = xs_watch(xh, path, token); val = pyvalue_int(xsval); exit: return val; diff -r a4196568095c -r b53a65034532 tools/python/xen/lowlevel/xu/xu.c --- a/tools/python/xen/lowlevel/xu/xu.c Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/lowlevel/xu/xu.c Fri Jul 29 20:25:03 2005 @@ -45,9 +45,6 @@ #define EVTCHN_BIND _IO('E', 2) /* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ #define EVTCHN_UNBIND _IO('E', 3) - -/* Size of a machine page frame. */ -#define PAGE_SIZE 4096 /* Set the close-on-exec flag on a file descriptor. Doesn't currently bother * to check for errors. */ diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/XendCheckpoint.py Fri Jul 29 20:25:03 2005 @@ -17,7 +17,6 @@ from XendLogging import log SIGNATURE = "LinuxGuestRecord" -PAGE_SIZE = 4096 PATH_XC_SAVE = "/usr/libexec/xen/xc_save" PATH_XC_RESTORE = "/usr/libexec/xen/xc_restore" diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/XendDomain.py Fri Jul 29 20:25:03 2005 @@ -112,6 +112,11 @@ else: self._delete_domain(domid) self.refresh(cleanup=True) + + dom0 = self.domain_lookup(0) + if not dom0: + dom0 = self.domain_unknown(0) + dom0.dom0_init_store() def close(self): pass @@ -218,10 +223,6 @@ if cleanup: self.reap() doms = self.xen_domains() - # Add entries for any domains we don't know about. - for id in doms.keys(): - if id not in self.domains: - self.domain_lookup(id) # Remove entries for domains that no longer exist. # Update entries for existing domains. do_domain_restarts = False @@ -331,22 +332,25 @@ self.update_domain(id) return self.domains.get(id) + def domain_unknown(self, id): + try: + info = self.xen_domain(id) + if info: + uuid = getUuid() + log.info( + "Creating entry for unknown domain: id=%d uuid=%s", + id, uuid) + db = self.dbmap.addChild(uuid) + dominfo = XendDomainInfo.recreate(db, info) + dominfo.setdom(id) + self._add_domain(dominfo) + return dominfo + except Exception, ex: + log.exception("Error creating domain info: id=%d", id) + return None + def domain_lookup(self, id): - dominfo = self.domains.get(id) - if not dominfo: - try: - info = self.xen_domain(id) - if info: - uuid = getUuid() - log.info( - "Creating entry for unknown domain: id=%d uuid=%s", - id, uuid) - db = self.dbmap.addChild(uuid) - dominfo = XendDomainInfo.recreate(db, info) - self._add_domain(dominfo) - except Exception, ex: - log.exception("Error creating domain info: id=%d", id) - return dominfo + return self.domains.get(id) def domain_lookup_by_name(self, name): dominfo = self.domains.get_by_name(name) diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Jul 29 20:25:03 2005 @@ -31,12 +31,6 @@ from xen.xend.uuid import getUuid from xen.xend.xenstore import DBVar - -"""Flag for a block device backend domain.""" -SIF_BLK_BE_DOMAIN = (1<<4) - -"""Flag for a net device backend domain.""" -SIF_NET_BE_DOMAIN = (1<<5) """Shutdown code for poweroff.""" DOMAIN_POWEROFF = 0 @@ -170,13 +164,13 @@ """ dom = info['dom'] vm = cls(db) + vm.setdom(dom) db.readDB() vm.importFromDB() config = vm.config log.debug('info=' + str(info)) log.debug('config=' + prettyprintstring(config)) - vm.setdom(dom) vm.memory = info['mem_kb']/1024 if config: @@ -289,6 +283,7 @@ def importFromDB(self): self.db.importFromDB(self, fields=self.__exports__) + self.store_channel = self.eventChannel("store_channel") def setdom(self, dom): """Set the domain id. @@ -989,6 +984,15 @@ return 0 return timeout - (time.time() - self.shutdown_pending['start']) + def dom0_init_store(self): + if not self.store_channel: + self.store_channel = self.eventChannel("store_channel") + self.store_mfn = xc.init_store(self.store_channel.port2) + if self.store_mfn >= 0: + self.db.introduceDomain(self.id, self.store_mfn, + self.store_channel) + self.exportToDB(save=True, sync=True) + def vm_field_ignore(vm, config, val, index): """Dummy config field handler used for fields with built-in handling. diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/XendRoot.py Fri Jul 29 20:25:03 2005 @@ -75,6 +75,10 @@ """Default port xend serves consoles at. """ console_port_base_default = '9600' + + dom0_min_mem_default = '0' + + dom0_cpus_default = '0' components = {} @@ -329,6 +333,12 @@ def get_vif_antispoof(self): return self.get_config_bool('vif-antispoof', 'yes') + def get_dom0_min_mem(self): + return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default) + + def get_dom0_cpus(self): + return self.get_config_int('dom0-cpus', self.dom0_cpus_default) + def instance(): """Get an instance of XendRoot. Use this instead of the constructor. diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/image.py Fri Jul 29 20:25:03 2005 @@ -7,6 +7,12 @@ from xen.xend.xenstore import DBVar from xen.xend.server import channel + +"""Flag for a block device backend domain.""" +SIF_BLK_BE_DOMAIN = (1<<4) + +"""Flag for a net device backend domain.""" +SIF_NET_BE_DOMAIN = (1<<5) class ImageHandler: """Abstract base class for image handlers. @@ -130,7 +136,13 @@ # xc.domain_setuuid(dom, uuid) xc.domain_setcpuweight(dom, cpu_weight) xc.domain_setmaxmem(dom, mem_kb) - xc.domain_memory_increase_reservation(dom, mem_kb) + + try: + xc.domain_memory_increase_reservation(dom, mem_kb) + except: + xc.domain_destroy(dom) + raise + if cpu != -1: xc.domain_pincpu(dom, 0, 1<<int(cpu)) return dom @@ -284,18 +296,19 @@ ret.append("%s" % v) # Handle hd img related options - device = sxp.child(self.vm.config, 'device') - vbdinfo = sxp.child(device, 'vbd') - if not vbdinfo: - raise VmError("vmx: missing vbd configuration") - uname = sxp.child_value(vbdinfo, 'uname') - vbddev = sxp.child_value(vbdinfo, 'dev') - (vbdtype, vbdparam) = string.split(uname, ':', 1) - vbddev_list = ['hda', 'hdb', 'hdc', 'hdd'] - if vbdtype != 'file' or vbddev not in vbddev_list: - raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd") - ret.append("-%s" % vbddev) - ret.append("%s" % vbdparam) + devices = sxp.children(self.vm.config, 'device') + for device in devices: + vbdinfo = sxp.child(device, 'vbd') + if not vbdinfo: + raise VmError("vmx: missing vbd configuration") + uname = sxp.child_value(vbdinfo, 'uname') + vbddev = sxp.child_value(vbdinfo, 'dev') + (vbdtype, vbdparam) = string.split(uname, ':', 1) + vbddev_list = ['hda', 'hdb', 'hdc', 'hdd'] + if vbdtype != 'file' or vbddev not in vbddev_list: + raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd") + ret.append("-%s" % vbddev) + ret.append("%s" % vbdparam) # Handle graphics library related options vnc = sxp.child_value(self.vm.config, 'vnc') @@ -352,8 +365,9 @@ def destroy(self): channel.eventChannelClose(self.device_channel) - os.system("kill -KILL" - + " %d" % self.pid) + import signal + os.kill(self.pid, signal.SIGKILL) + (pid, status) = os.waitpid(self.pid, 0) def getDomainMemory(self, mem_mb): return (mem_mb * 1024) + self.getPageTableSize(mem_mb) diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/server/SrvDaemon.py --- a/tools/python/xen/xend/server/SrvDaemon.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/server/SrvDaemon.py Fri Jul 29 20:25:03 2005 @@ -5,7 +5,6 @@ ########################################################### import os -import os.path import signal import sys import threading @@ -16,6 +15,7 @@ import StringIO import traceback import time +import glob from xen.lowlevel import xu @@ -25,6 +25,7 @@ from xen.xend.XendError import XendError from xen.xend.server import SrvServer from xen.xend.XendLogging import log +from xen.xend import XendRoot; xroot = XendRoot.instance() import channel import controller @@ -184,9 +185,13 @@ log.info("Started xenstored, pid=%d", pid) else: # Child - if XEND_DAEMONIZE and (not XENSTORED_DEBUG): + if XEND_DAEMONIZE: self.daemonize() - os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork") + if XENSTORED_DEBUG: + os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork", + "-T", "/var/log/xenstored-trace.log") + else: + os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork") def daemonize(self): if not XEND_DAEMONIZE: return @@ -323,6 +328,7 @@ return self.cleanup(kill=True) def run(self): + _enforce_dom0_cpus() try: log.info("Xend Daemon started") self.createFactories() @@ -359,6 +365,32 @@ #sys.exit(rc) os._exit(rc) +def _enforce_dom0_cpus(): + dn = xroot.get_dom0_cpus() + + for d in glob.glob("/sys/devices/system/cpu/cpu*"): + cpu = int(os.path.basename(d)[3:]) + if (dn == 0) or (cpu < dn): + v = "1" + else: + v = "0" + try: + f = open("%s/online" %d, "r+") + c = f.read(1) + if (c != v): + if v == "0": + log.info("dom0 is trying to give back cpu %d", cpu) + else: + log.info("dom0 is trying to take cpu %d", cpu) + f.seek(0) + f.write(v) + f.close() + log.info("dom0 successfully enforced cpu %d", cpu) + else: + f.close() + except: + pass + def instance(): global inst try: diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/server/channel.py --- a/tools/python/xen/xend/server/channel.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/server/channel.py Fri Jul 29 20:25:03 2005 @@ -42,16 +42,16 @@ @param port2 """ try: - dom1 = int(db['dom1']) + dom1 = int(db['dom1'].getData()) except: pass try: - dom2 = int(db['dom2']) + dom2 = int(db['dom2'].getData()) except: pass try: - port1 = int(db['port1']) + port1 = int(db['port1'].getData()) except: pass try: - port2 = int(db['port2']) + port2 = int(db['port2'].getData()) except: pass evtchn = cls.interdomain(dom1, dom2, port1=port1, port2=port2) return evtchn diff -r a4196568095c -r b53a65034532 tools/python/xen/xend/xenstore/xsnode.py --- a/tools/python/xen/xend/xenstore/xsnode.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xend/xenstore/xsnode.py Fri Jul 29 20:25:03 2005 @@ -350,7 +350,13 @@ self.watchThread = None def introduceDomain(self, dom, page, evtchn, path): - self.getxs().introduce_domain(dom, page, evtchn.port1, path) + try: + self.getxs().introduce_domain(dom, page, evtchn.port1, path) + except RuntimeError, ex: + if ex.args[0] == errno.EISCONN: + return None + else: + raise def releaseDomain(self, dom): self.getxs().release_domain(dom) diff -r a4196568095c -r b53a65034532 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xm/create.py Fri Jul 29 20:25:03 2005 @@ -1,4 +1,5 @@ # Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 Nguyen Anh Quynh <aquynh@xxxxxxxxx> """Domain creation. """ @@ -7,10 +8,13 @@ import sys import socket +import xen.lowlevel.xc + from xen.xend import sxp from xen.xend import PrettyPrint from xen.xend.XendClient import server, XendError from xen.xend.XendBootloader import bootloader +from xen.xend import XendRoot; xroot = XendRoot.instance() from xen.util import blkif from xen.util import console_client @@ -644,6 +648,36 @@ % (dom, console_port)) return (dom, console_port) +def get_dom0_alloc(): + """Return current allocation memory of dom0 (in MB). Return 0 on error""" + PROC_XEN_BALLOON = "/proc/xen/balloon" + + f = open(PROC_XEN_BALLOON, "r") + line = f.readline() + for x in line.split(): + for n in x: + if not n.isdigit(): + break + else: + f.close() + return int(x)/1024 + f.close() + return 0 + +def balloon_out(dom0_min_mem, opts): + """Balloon out to get memory for domU, if necessarily""" + SLACK = 4 + + xc = xen.lowlevel.xc.new() + pinfo = xc.physinfo() + free_mem = pinfo['free_pages']/256 + if free_mem < opts.vals.memory + SLACK: + need_mem = opts.vals.memory + SLACK - free_mem + cur_alloc = get_dom0_alloc() + if cur_alloc - need_mem >= dom0_min_mem: + server.xend_domain_mem_target_set(0, cur_alloc - need_mem) + del xc + def main(argv): opts = gopts args = opts.parse(argv) @@ -671,6 +705,10 @@ if opts.vals.dryrun: PrettyPrint.prettyprint(config) else: + dom0_min_mem = xroot.get_dom0_min_mem() + if dom0_min_mem != 0: + balloon_out(dom0_min_mem, opts) + (dom, console) = make_domain(opts, config) if opts.vals.console_autoconnect: path = "/var/lib/xend/console-%s" % console diff -r a4196568095c -r b53a65034532 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/xm/main.py Fri Jul 29 20:25:03 2005 @@ -11,6 +11,13 @@ from xen.xend import PrettyPrint from xen.xend import sxp +# this is a nasty place to stick this in, but required because +# log file access is set up via a 5 deep import chain. This +# ensures the user sees a useful message instead of a stack trace +if os.getuid() != 0: + print "xm requires root access to execute, please try again as root" + sys.exit(1) + from xen.xend.XendClient import XendError, server from xen.xend.XendClient import main as xend_client_main from xen.xm import create, destroy, migrate, shutdown, sysrq @@ -390,7 +397,7 @@ d['dom'] = int(sxp.child_value(info, 'id', '-1')) d['name'] = sxp.child_value(info, 'name', '??') d['mem'] = int(sxp.child_value(info, 'memory', '0')) - d['cpu'] = int(sxp.child_value(info, 'cpu', '0')) + d['cpu'] = str(sxp.child_value(info, 'cpu', '0')) d['vcpus'] = int(sxp.child_value(info, 'vcpus', '0')) d['state'] = sxp.child_value(info, 'state', '??') d['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0')) @@ -399,12 +406,14 @@ d['port'] = sxp.child_value(console, 'console_port') else: d['port'] = '' + if d['vcpus'] > 1: + d['cpu'] = '-' if ((int(sxp.child_value(info, 'ssidref', '0'))) != 0): d['ssidref1'] = int(sxp.child_value(info, 'ssidref', '0')) & 0xffff d['ssidref2'] = (int(sxp.child_value(info, 'ssidref', '0')) >> 16) & 0xffff - print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(vcpus)5d %(state)5s %(cpu_time)7.1f %(port)4s s:%(ssidref2)02x/p:%(ssidref1)02x" % d) + print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d %(state)5s %(cpu_time)7.1f %(port)4s s:%(ssidref2)02x/p:%(ssidref1)02x" % d) else: - print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(vcpus)5d %(state)5s %(cpu_time)7.1f %(port)4s" % d) + print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d %(state)5s %(cpu_time)7.1f %(port)4s" % d) def show_vcpus(self, doms): print 'Name Id VCPU CPU CPUMAP' @@ -554,9 +563,9 @@ xm.prog(ProgMaxmem) -class ProgBalloon(Prog): - group = 'domain' - name = 'balloon' +class ProgSetMem(Prog): + group = 'domain' + name = 'set-mem' info = """Set the domain's memory footprint using the balloon driver.""" def help(self, args): @@ -570,7 +579,7 @@ mem_target = int_unit(args[2], 'm') server.xend_domain_mem_target_set(dom, mem_target) -xm.prog(ProgBalloon) +xm.prog(ProgSetMem) class ProgVcpuhotplug(Prog): group = 'domain' diff -r a4196568095c -r b53a65034532 tools/xcs/xcs.h --- a/tools/xcs/xcs.h Fri Jul 29 18:52:33 2005 +++ b/tools/xcs/xcs.h Fri Jul 29 20:25:03 2005 @@ -37,7 +37,7 @@ /* ------[ Other required defines ]----------------------------------------*/ /* Size of a machine page frame. */ -#define PAGE_SIZE 4096 +#define PAGE_SIZE XC_PAGE_SIZE #ifndef timersub /* XOPEN and __BSD don't cooperate well... */ #define timersub(a, b, result) \ diff -r a4196568095c -r b53a65034532 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/Makefile Fri Jul 29 20:25:03 2005 @@ -86,9 +86,9 @@ $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED) stresstest: xs_stress xs_watch_stress xenstored_test - rm -rf $(TESTDIR)/store + rm -rf $(TESTDIR)/store $(TESTDIR)/transactions export $(TESTENV); PID=`./xenstored_test --output-pid --trace-file=/tmp/trace`; ./xs_stress 5000; ret=$$?; kill $$PID; exit $$ret - rm -rf $(TESTDIR)/store + rm -rf $(TESTDIR)/store $(TESTDIR)/transactions export $(TESTENV); PID=`./xenstored_test --output-pid`; ./xs_watch_stress; ret=$$?; kill $$PID; exit $$ret xs_dom0_test: xs_dom0_test.o utils.o diff -r a4196568095c -r b53a65034532 tools/xenstore/TODO --- a/tools/xenstore/TODO Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/TODO Fri Jul 29 20:25:03 2005 @@ -2,8 +2,9 @@ are omissions of important but necessary things. It is up to the reader to fill in the blanks. -- Remove calls to system() from daemon - Timeout failed watch responses -- Dynamic nodes +- Dynamic/supply nodes - Persistant storage of introductions, watches and transactions, so daemon can restart - Remove assumption that rename doesn't fail +- Multi-root transactions, for setting up front and back ends at same time. + diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/02directory.sh --- a/tools/xenstore/testsuite/02directory.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/02directory.sh Fri Jul 29 20:25:03 2005 @@ -1,22 +1,23 @@ #! /bin/sh -# Root directory has nothing in it. -[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "" ] +# Root directory has only tool dir in it. +[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "tool" ] # Create a file. [ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ] # Directory shows it. -[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "test" ] +[ "`echo -e 'dir /' | ./xs_test 2>&1 | sort`" = "test +tool" ] # Make a new directory. [ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] # Check it's there. -DIR="`echo -e 'dir /' | ./xs_test 2>&1`" -[ "$DIR" = "test -dir" ] || [ "$DIR" = "dir -test" ] +DIR="`echo -e 'dir /' | ./xs_test 2>&1 | sort`" +[ "$DIR" = "dir +test +tool" ] # Check it's empty. [ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "" ] diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/07watch.sh --- a/tools/xenstore/testsuite/07watch.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/07watch.sh Fri Jul 29 20:25:03 2005 @@ -3,45 +3,52 @@ # Watch something, write to it, check watch has fired. [ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ] -[ "`echo -e '1 watch /test token 100 -2 write /test create contents2 +[ "`echo -e '1 watch /test token +2 async write /test create contents2 1 waitwatch 1 ackwatch token' | ./xs_test 2>&1`" = "1:/test:token" ] # Check that reads don't set it off. -[ "`echo -e '1 watch /test token 100 +[ "`echo -e '1 watch /test token 2 read /test 1 waitwatch' | ./xs_test 2>&1`" = "2:contents2 1:waitwatch timeout" ] # mkdir, setperm and rm should (also tests watching dirs) [ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] -[ "`echo -e '1 watch /dir token 100 -2 mkdir /dir/newdir +[ "`echo -e '1 watch /dir token +2 async mkdir /dir/newdir 1 waitwatch 1 ackwatch token -2 setperm /dir/newdir 0 READ +asyncwait +2 async setperm /dir/newdir 0 READ 1 waitwatch 1 ackwatch token -2 rm /dir/newdir +asyncwait +2 async rm /dir/newdir 1 waitwatch 1 ackwatch token' | ./xs_test 2>&1`" = "1:/dir/newdir:token 1:/dir/newdir:token 1:/dir/newdir:token" ] +# We don't get a watch from our own commands. +[ "`echo -e 'watch /dir token +mkdir /dir/newdir +waitwatch' | ./xs_test 2>&1`" = "waitwatch timeout" ] + # ignore watches while doing commands, should work. -[ "`echo -e 'watch /dir token 100 -write /dir/test create contents +[ "`echo -e 'watch /dir token +1 async write /dir/test create contents read /dir/test waitwatch ackwatch token' | ./xs_test 2>&1`" = "contents /dir/test:token" ] -# watch priority /test. -[ "`echo -e '1 watch /dir token1 1 -3 watch /dir token3 3 -2 watch /dir token2 2 -write /dir/test create contents +# watch priority test: all simultaneous +[ "`echo -e '1 watch /dir token1 +3 watch /dir token3 +2 watch /dir token2 +async write /dir/test create contents 3 waitwatch 3 ackwatch token3 2 waitwatch @@ -52,9 +59,9 @@ 1:/dir/test:token1" ] # If one dies (without acking), the other should still get ack. -[ "`echo -e '1 watch /dir token1 0 -2 watch /dir token2 1 -write /dir/test create contents +[ "`echo -e '1 watch /dir token1 +2 watch /dir token2 +async write /dir/test create contents 2 waitwatch 2 close 1 waitwatch @@ -62,51 +69,52 @@ 1:/dir/test:token1" ] # If one dies (without reading at all), the other should still get ack. -[ "`echo -e '1 watch /dir token1 0 -2 watch /dir token2 1 -write /dir/test create contents +[ "`echo -e '1 watch /dir token1 +2 watch /dir token2 +async write /dir/test create contents 2 close 1 waitwatch 1 ackwatch token1' | ./xs_test 2>&1`" = "1:/dir/test:token1" ] # unwatch -[ "`echo -e '1 watch /dir token1 0 +[ "`echo -e '1 watch /dir token1 1 unwatch /dir token1 -1 watch /dir token2 0 -2 write /dir/test2 create contents +1 watch /dir token2 +2 async write /dir/test2 create contents 1 waitwatch 1 unwatch /dir token2' | ./xs_test 2>&1`" = "1:/dir/test2:token2" ] # unwatch while watch pending. Next watcher gets the event. -[ "`echo -e '1 watch /dir token1 0 -2 watch /dir token2 1 -write /dir/test create contents +[ "`echo -e '1 watch /dir token1 +2 watch /dir token2 +async write /dir/test create contents 2 unwatch /dir token2 1 waitwatch 1 ackwatch token1' | ./xs_test 2>&1`" = "1:/dir/test:token1" ] # unwatch while watch pending. Should clear this so we get next event. -[ "`echo -e '1 watch /dir token1 0 -write /dir/test create contents +[ "`echo -e '1 watch /dir token1 +async write /dir/test create contents 1 unwatch /dir token1 -1 watch /dir/test token2 0 -write /dir/test none contents2 +1 watch /dir/test token2 +asyncwait +async write /dir/test none contents2 1 waitwatch 1 ackwatch token2' | ./xs_test 2>&1`" = "1:/dir/test:token2" ] # check we only get notified once. -[ "`echo -e '1 watch /test token 100 -2 write /test create contents2 +[ "`echo -e '1 watch /test token +2 async write /test create contents2 1 waitwatch 1 ackwatch token 1 waitwatch' | ./xs_test 2>&1`" = "1:/test:token 1:waitwatch timeout" ] # watches are queued in order. -[ "`echo -e '1 watch / token 100 -2 write /test1 create contents -2 write /test2 create contents -2 write /test3 create contents +[ "`echo -e '1 watch / token +async 2 write /test1 create contents +async 2 write /test2 create contents +async 2 write /test3 create contents 1 waitwatch 1 ackwatch token 1 waitwatch @@ -117,9 +125,9 @@ 1:/test3:token" ] # Creation of subpaths should be covered correctly. -[ "`echo -e '1 watch / token 100 -2 write /test/subnode create contents2 -2 write /test/subnode/subnode create contents2 +[ "`echo -e '1 watch / token +2 async write /test/subnode create contents2 +2 async write /test/subnode/subnode create contents2 1 waitwatch 1 ackwatch token 1 waitwatch @@ -129,23 +137,23 @@ 1:waitwatch timeout" ] # Watch event must have happened before we registered interest. -[ "`echo -e '1 watch / token 100 -2 write /test/subnode create contents2 -2 watch / token2 0 +[ "`echo -e '1 watch / token +2 async write /test/subnode create contents2 +1 watch / token2 0 1 waitwatch 1 ackwatch token -2 waitwatch' | ./xs_test 2>&1`" = "1:/test/subnode:token -2:waitwatch timeout" ] +1 waitwatch' | ./xs_test 2>&1`" = "1:/test/subnode:token +1:waitwatch timeout" ] # Rm fires notification on child. -[ "`echo -e '1 watch /test/subnode token 100 -2 rm /test +[ "`echo -e '1 watch /test/subnode token +2 async rm /test 1 waitwatch 1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/subnode:token" ] # Watch should not double-send after we ack, even if we did something in between. -[ "`echo -e '1 watch /test2 token 100 -2 write /test2/foo create contents2 +[ "`echo -e '1 watch /test2 token +2 async write /test2/foo create contents2 1 waitwatch 1 read /test2/foo 1 ackwatch token diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/08transaction.sh --- a/tools/xenstore/testsuite/08transaction.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/08transaction.sh Fri Jul 29 20:25:03 2005 @@ -1,79 +1,93 @@ #! /bin/sh # Test transactions. +echo mkdir /test | ./xs_test + # Simple transaction: create a file inside transaction. -[ "`echo -e '1 start / -1 write /entry1 create contents -2 dir / -1 dir / +[ "`echo -e '1 start /test +1 write /test/entry1 create contents +2 dir /test +1 dir /test 1 commit -2 read /entry1' | ./xs_test`" = "1:entry1 +2 read /test/entry1' | ./xs_test`" = "1:entry1 2:contents" ] -echo rm /entry1 | ./xs_test +echo rm /test/entry1 | ./xs_test # Create a file and abort transaction. -[ "`echo -e '1 start / -1 write /entry1 create contents -2 dir / -1 dir / +[ "`echo -e '1 start /test +1 write /test/entry1 create contents +2 dir /test +1 dir /test 1 abort -2 dir /' | ./xs_test`" = "1:entry1" ] +2 dir /test' | ./xs_test`" = "1:entry1" ] -echo write /entry1 create contents | ./xs_test +echo write /test/entry1 create contents | ./xs_test # Delete in transaction, commit -[ "`echo -e '1 start / -1 rm /entry1 -2 dir / -1 dir / +[ "`echo -e '1 start /test +1 rm /test/entry1 +2 dir /test +1 dir /test 1 commit -2 dir /' | ./xs_test`" = "2:entry1" ] +2 dir /test' | ./xs_test`" = "2:entry1" ] # Delete in transaction, abort. -echo write /entry1 create contents | ./xs_test -[ "`echo -e '1 start / -1 rm /entry1 -2 dir / -1 dir / +echo write /test/entry1 create contents | ./xs_test +[ "`echo -e '1 start /test +1 rm /test/entry1 +2 dir /test +1 dir /test 1 abort -2 dir /' | ./xs_test`" = "2:entry1 +2 dir /test' | ./xs_test`" = "2:entry1 2:entry1" ] # Transactions can take as long as the want... -[ "`echo -e 'start / +[ "`echo -e 'start /test sleep 1 -rm /entry1 +rm /test/entry1 commit -dir /' | ./xs_test`" = "" ] +dir /test' | ./xs_test --no-timeout`" = "" ] # ... as long as noone is waiting. -[ "`echo -e '1 start / -2 mkdir /dir -1 mkdir /dir -1 dir / -1 commit' | ./xs_test 2>&1`" = "1:dir +[ "`echo -e '1 start /test +2 mkdir /test/dir +1 mkdir /test/dir +1 dir /test +1 commit' | ./xs_test --no-timeout 2>&1`" = "1:dir FATAL: 1: commit: Connection timed out" ] # Events inside transactions don't trigger watches until (successful) commit. -[ "`echo -e '1 watch / token 100 -2 start / -2 mkdir /dir/sub +[ "`echo -e '1 watch /test token +2 start /test +2 mkdir /test/dir/sub 1 waitwatch' | ./xs_test 2>&1`" = "1:waitwatch timeout" ] -[ "`echo -e '1 watch / token 100 -2 start / -2 mkdir /dir/sub +[ "`echo -e '1 watch /test token +2 start /test +2 mkdir /test/dir/sub 2 abort 1 waitwatch' | ./xs_test 2>&1`" = "1:waitwatch timeout" ] -[ "`echo -e '1 watch / token 100 -2 start / -2 mkdir /dir/sub -2 commit +[ "`echo -e '1 watch /test token +2 start /test +2 mkdir /test/dir/sub +2 async commit 1 waitwatch -1 ackwatch token' | ./xs_test 2>&1`" = "1:/dir/sub:token" ] +1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/dir/sub:token" ] # Rm inside transaction works like rm outside: children get notified. -[ "`echo -e '1 watch /dir/sub token 100 -2 start / -2 rm /dir -2 commit +[ "`echo -e '1 watch /test/dir/sub token +2 start /test +2 rm /test/dir +2 async commit 1 waitwatch -1 ackwatch token' | ./xs_test 2>&1`" = "1:/dir/sub:token" ] +1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/dir/sub:token" ] + +# Multiple events from single transaction don't trigger assert +[ "`echo -e '1 watch /test token +2 start /test +2 write /test/1 create contents +2 write /test/2 create contents +2 async commit +1 waitwatch +1 ackwatch token +1 waitwatch +1 ackwatch token' | ./xs_test 2>&1`" = "1:/test/1:token +1:/test/2:token" ] diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/09domain.sh --- a/tools/xenstore/testsuite/09domain.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/09domain.sh Fri Jul 29 20:25:03 2005 @@ -4,8 +4,9 @@ # Create a domain, write an entry. [ "`echo -e 'introduce 1 100 7 /my/home 1 write /entry1 create contents -dir /' | ./xs_test 2>&1`" = "handle is 1 -entry1" ] +dir /' | ./xs_test 2>&1 | sort`" = "entry1 +handle is 1 +tool" ] # Release that domain. [ "`echo -e 'release 1' | ./xs_test`" = "" ] diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/10domain-homedir.sh --- a/tools/xenstore/testsuite/10domain-homedir.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/10domain-homedir.sh Fri Jul 29 20:25:03 2005 @@ -13,8 +13,8 @@ # Place a watch using a relative path: expect relative answer. [ "`echo 'introduce 1 100 7 /home 1 mkdir foo -1 watch foo token 0 -write /home/foo/bar create contents +1 watch foo token +async write /home/foo/bar create contents 1 waitwatch 1 ackwatch token' | ./xs_test 2>&1`" = "handle is 1 1:foo/bar:token" ] diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/11domain-watch.sh --- a/tools/xenstore/testsuite/11domain-watch.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/11domain-watch.sh Fri Jul 29 20:25:03 2005 @@ -6,42 +6,46 @@ [ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] [ "`echo -e 'introduce 1 100 7 /my/home -1 watch /test token 100 -write /test create contents2 +1 watch /test token +async write /test create contents2 1 waitwatch 1 ackwatch token 1 unwatch /test token +asyncwait release 1' | ./xs_test 2>&1`" = "handle is 1 1:/test:token" ] # ignore watches while doing commands, should work. [ "`echo -e 'introduce 1 100 7 /my/home -1 watch /dir token 100 -1 write /dir/test create contents -1 read /dir/test +1 watch /dir token +async write /dir/test create contents +1 write /dir/test2 create contents2 +1 write /dir/test3 create contents3 +1 write /dir/test4 create contents4 1 waitwatch 1 ackwatch token +asyncwait release 1' | ./xs_test 2>&1`" = "handle is 1 -1:contents 1:/dir/test:token" ] # unwatch [ "`echo -e 'introduce 1 100 7 /my/home -1 watch /dir token1 0 +1 watch /dir token1 1 unwatch /dir token1 -1 watch /dir token2 0 -2 write /dir/test2 create contents +1 watch /dir token2 +async 2 write /dir/test2 create contents 1 waitwatch 1 unwatch /dir token2 +asyncwait release 1' | ./xs_test 2>&1`" = "handle is 1 1:/dir/test2:token2" ] # unwatch while watch pending. [ "`echo -e 'introduce 1 100 7 /my/home introduce 2 101 8 /my/secondhome -1 watch /dir token1 0 -2 watch /dir token2 1 -write /dir/test create contents +1 watch /dir token1 +2 watch /dir token2 +3 async write /dir/test create contents 2 unwatch /dir token2 1 waitwatch 1 ackwatch token1 diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/12readonly.sh --- a/tools/xenstore/testsuite/12readonly.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/12readonly.sh Fri Jul 29 20:25:03 2005 @@ -4,16 +4,17 @@ [ "`echo 'write /test create contents' | ./xs_test 2>&1`" = "" ] # These are all valid. -[ "`echo 'dir / -read /test +[ "`echo dir / | ./xs_test --readonly 2>&1 | sort`" = "test +tool" ] + +[ "`echo 'read /test getperm /test -watch /test token 0 +watch /test token unwatch /test token start / commit start / -abort' | ./xs_test --readonly 2>&1`" = "test -contents +abort' | ./xs_test --readonly 2>&1`" = "contents 0 READ" ] # These don't work @@ -26,7 +27,7 @@ # Check that watches work like normal. set -m -[ "`echo 'watch / token 0 +[ "`echo 'watch / token waitwatch ackwatch token' | ./xs_test --readonly 2>&1`" = "/test:token" ] & @@ -35,6 +36,3 @@ echo Readonly wait test failed: $? exit 1 fi - - - diff -r a4196568095c -r b53a65034532 tools/xenstore/testsuite/test.sh --- a/tools/xenstore/testsuite/test.sh Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/testsuite/test.sh Fri Jul 29 20:25:03 2005 @@ -9,7 +9,7 @@ mkdir $XENSTORED_ROOTDIR # Weird failures with this. if type valgrind >/dev/null 2>&1; then - valgrind -q --logfile-fd=3 ./xenstored_test --output-pid --no-fork 3>testsuite/tmp/vgout > /tmp/pid 2> testsuite/tmp/xenstored_errors & + valgrind -q --logfile-fd=3 ./xenstored_test --output-pid --trace-file=testsuite/tmp/trace --no-fork 3>testsuite/tmp/vgout > /tmp/pid 2> testsuite/tmp/xenstored_errors & while [ ! -s /tmp/pid ]; do sleep 0; done PID=`cat /tmp/pid` rm /tmp/pid @@ -33,12 +33,17 @@ fi } +MATCH=${1:-"*"} for f in testsuite/[0-9]*.sh; do + case `basename $f` in $MATCH) RUN=1;; esac + [ -n "$RUN" ] || continue if run_test $f; then echo Test $f passed... else echo Test $f failed, running verbosely... - run_test $f -x + run_test $f -x || true + # That will have filled the screen, repeat message. + echo Test $f failed exit 1 fi done diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_core.c Fri Jul 29 20:25:03 2005 @@ -51,7 +51,7 @@ #include "xenstored_domain.h" static bool verbose; -static LIST_HEAD(connections); +LIST_HEAD(connections); static int tracefd = -1; #ifdef TESTING @@ -111,6 +111,8 @@ str = talloc_vasprintf(NULL, fmt, arglist); va_end(arglist); + trace("xenstored corruption: connection id %i: err %s: %s", + conn ? (int)conn->id : -1, strerror(saved_errno), str); eprintf("xenstored corruption: connection id %i: err %s: %s", conn ? (int)conn->id : -1, strerror(saved_errno), str); #ifdef TESTING @@ -230,6 +232,21 @@ write(tracefd, ")\n", 2); } +void trace(const char *fmt, ...) +{ + va_list arglist; + char *str; + + if (tracefd < 0) + return; + + va_start(arglist, fmt); + str = talloc_vasprintf(NULL, fmt, arglist); + va_end(arglist); + write(tracefd, str, strlen(str)); + talloc_free(str); +} + static bool write_message(struct connection *conn) { int ret; @@ -253,7 +270,7 @@ out->used = 0; /* Second write might block if non-zero. */ - if (out->hdr.msg.len) + if (out->hdr.msg.len && !conn->domain) return true; } @@ -318,7 +335,7 @@ list_for_each_entry(i, &connections, list) { if (i->domain) continue; - if (!i->blocked) + if (i->state == OK) FD_SET(i->fd, inset); if (i->out) FD_SET(i->fd, outset); @@ -454,8 +471,7 @@ return i; } -/* Returns "false", meaning "connection is not blocked". */ -bool send_reply(struct connection *conn, enum xsd_sockmsg_type type, +void send_reply(struct connection *conn, enum xsd_sockmsg_type type, const void *data, unsigned int len) { struct buffered_data *bdata; @@ -476,16 +492,15 @@ conn->waiting_reply = bdata; } else conn->out = bdata; - return false; } /* Some routines (write, mkdir, etc) just need a non-error return */ -bool send_ack(struct connection *conn, enum xsd_sockmsg_type type) -{ - return send_reply(conn, type, "OK", sizeof("OK")); -} - -bool send_error(struct connection *conn, int error) +void send_ack(struct connection *conn, enum xsd_sockmsg_type type) +{ + send_reply(conn, type, "OK", sizeof("OK")); +} + +void send_error(struct connection *conn, int error) { unsigned int i; @@ -494,7 +509,7 @@ corrupt(conn, "Unknown error %i (%s)", error, strerror(error)); - return send_reply(conn, XS_ERROR, xsd_errors[i].errstring, + send_reply(conn, XS_ERROR, xsd_errors[i].errstring, strlen(xsd_errors[i].errstring) + 1); } @@ -780,7 +795,7 @@ return false; } -static bool send_directory(struct connection *conn, const char *node) +static void send_directory(struct connection *conn, const char *node) { char *path, *reply = talloc_strdup(node, ""); unsigned int reply_len = 0; @@ -788,13 +803,17 @@ struct dirent *dirent; node = canonicalize(conn, node); - if (!check_node_perms(conn, node, XS_PERM_READ)) - return send_error(conn, errno); + if (!check_node_perms(conn, node, XS_PERM_READ)) { + send_error(conn, errno); + return; + } path = node_dir(conn->transaction, node); dir = talloc_opendir(path); - if (!dir) - return send_error(conn, errno); + if (!dir) { + send_error(conn, errno); + return; + } while ((dirent = readdir(*dir)) != NULL) { int len = strlen(dirent->d_name) + 1; @@ -807,32 +826,35 @@ reply_len += len; } - return send_reply(conn, XS_DIRECTORY, reply, reply_len); -} - -static bool do_read(struct connection *conn, const char *node) + send_reply(conn, XS_DIRECTORY, reply, reply_len); +} + +static void do_read(struct connection *conn, const char *node) { char *value; unsigned int size; int *fd; node = canonicalize(conn, node); - if (!check_node_perms(conn, node, XS_PERM_READ)) - return send_error(conn, errno); + if (!check_node_perms(conn, node, XS_PERM_READ)) { + send_error(conn, errno); + return; + } fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0); if (!fd) { /* Data file doesn't exist? We call that a directory */ if (errno == ENOENT) errno = EISDIR; - return send_error(conn, errno); + send_error(conn, errno); + return; } value = read_all(fd, &size); if (!value) - return send_error(conn, errno); - - return send_reply(conn, XS_READ, value, size); + send_error(conn, errno); + else + send_reply(conn, XS_READ, value, size); } /* Create a new directory. Optionally put data in it (if data != NULL) */ @@ -876,7 +898,7 @@ } /* path, flags, data... */ -static bool do_write(struct connection *conn, struct buffered_data *in) +static void do_write(struct connection *conn, struct buffered_data *in) { unsigned int offset, datalen; char *vec[2]; @@ -885,16 +907,20 @@ struct stat st; /* Extra "strings" can be created by binary data. */ - if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) - return send_error(conn, EINVAL); + if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) { + send_error(conn, EINVAL); + return; + } node = canonicalize(conn, vec[0]); if (/*suppress error on write outside transaction*/ 0 && - !within_transaction(conn->transaction, node)) - return send_error(conn, EROFS); + !within_transaction(conn->transaction, node)) { + send_error(conn, EROFS); + return; + } if (transaction_block(conn, node)) - return true; + return; offset = strlen(vec[0]) + strlen(vec[1]) + 2; datalen = in->used - offset; @@ -905,196 +931,243 @@ mode = XS_PERM_WRITE|XS_PERM_CREATE; else if (streq(vec[1], XS_WRITE_CREATE_EXCL)) mode = XS_PERM_WRITE|XS_PERM_CREATE; - else - return send_error(conn, EINVAL); - - if (!check_node_perms(conn, node, mode)) - return send_error(conn, errno); + else { + send_error(conn, EINVAL); + return; + } + + if (!check_node_perms(conn, node, mode)) { + send_error(conn, errno); + return; + } if (lstat(node_dir(conn->transaction, node), &st) != 0) { /* Does not exist... */ - if (errno != ENOENT) - return send_error(conn, errno); + if (errno != ENOENT) { + send_error(conn, errno); + return; + } /* Not going to create it? */ - if (!(mode & XS_PERM_CREATE)) - return send_error(conn, ENOENT); - - if (!new_directory(conn, node, in->buffer + offset, datalen)) - return send_error(conn, errno); + if (!(mode & XS_PERM_CREATE)) { + send_error(conn, ENOENT); + return; + } + + if (!new_directory(conn, node, in->buffer + offset, datalen)) { + send_error(conn, errno); + return; + } } else { /* Exists... */ - if (streq(vec[1], XS_WRITE_CREATE_EXCL)) - return send_error(conn, EEXIST); + if (streq(vec[1], XS_WRITE_CREATE_EXCL)) { + send_error(conn, EEXIST); + return; + } tmppath = tempfile(node_datafile(conn->transaction, node), in->buffer + offset, datalen); - if (!tmppath) - return send_error(conn, errno); + if (!tmppath) { + send_error(conn, errno); + return; + } commit_tempfile(tmppath); } add_change_node(conn->transaction, node, false); + fire_watches(conn, node, false); send_ack(conn, XS_WRITE); - fire_watches(conn->transaction, node, false); - return false; -} - -static bool do_mkdir(struct connection *conn, const char *node) +} + +static void do_mkdir(struct connection *conn, const char *node) { node = canonicalize(conn, node); - if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE)) - return send_error(conn, errno); - - if (!within_transaction(conn->transaction, node)) - return send_error(conn, EROFS); + if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE)) { + send_error(conn, errno); + return; + } + + if (!within_transaction(conn->transaction, node)) { + send_error(conn, EROFS); + return; + } if (transaction_block(conn, node)) - return true; - - if (!new_directory(conn, node, NULL, 0)) - return send_error(conn, errno); + return; + + if (!new_directory(conn, node, NULL, 0)) { + send_error(conn, errno); + return; + } add_change_node(conn->transaction, node, false); + fire_watches(conn, node, false); send_ack(conn, XS_MKDIR); - fire_watches(conn->transaction, node, false); - return false; -} - -static bool do_rm(struct connection *conn, const char *node) +} + +static void do_rm(struct connection *conn, const char *node) { char *tmppath, *path; node = canonicalize(conn, node); - if (!check_node_perms(conn, node, XS_PERM_WRITE)) - return send_error(conn, errno); - - if (!within_transaction(conn->transaction, node)) - return send_error(conn, EROFS); + if (!check_node_perms(conn, node, XS_PERM_WRITE)) { + send_error(conn, errno); + return; + } + + if (!within_transaction(conn->transaction, node)) { + send_error(conn, EROFS); + return; + } if (transaction_block(conn, node)) - return true; - - if (streq(node, "/")) - return send_error(conn, EINVAL); + return; + + if (streq(node, "/")) { + send_error(conn, EINVAL); + return; + } /* We move the directory to temporary name, destructor cleans up. */ path = node_dir(conn->transaction, node); tmppath = talloc_asprintf(node, "%s.tmp", path); talloc_set_destructor(tmppath, destroy_path); - if (rename(path, tmppath) != 0) - return send_error(conn, errno); + if (rename(path, tmppath) != 0) { + send_error(conn, errno); + return; + } add_change_node(conn->transaction, node, true); + fire_watches(conn, node, true); send_ack(conn, XS_RM); - fire_watches(conn->transaction, node, true); - return false; -} - -static bool do_get_perms(struct connection *conn, const char *node) +} + +static void do_get_perms(struct connection *conn, const char *node) { struct xs_permissions *perms; char *strings; unsigned int len, num; node = canonicalize(conn, node); - if (!check_node_perms(conn, node, XS_PERM_READ)) - return send_error(conn, errno); + if (!check_node_perms(conn, node, XS_PERM_READ)) { + send_error(conn, errno); + return; + } perms = get_perms(conn->transaction, node, &num); - if (!perms) - return send_error(conn, errno); + if (!perms) { + send_error(conn, errno); + return; + } strings = perms_to_strings(node, perms, num, &len); if (!strings) - return send_error(conn, errno); - - return send_reply(conn, XS_GET_PERMS, strings, len); -} - -static bool do_set_perms(struct connection *conn, struct buffered_data *in) + send_error(conn, errno); + else + send_reply(conn, XS_GET_PERMS, strings, len); +} + +static void do_set_perms(struct connection *conn, struct buffered_data *in) { unsigned int num; char *node; struct xs_permissions *perms; num = xs_count_strings(in->buffer, in->used); - if (num < 2) - return send_error(conn, EINVAL); + if (num < 2) { + send_error(conn, EINVAL); + return; + } /* First arg is node name. */ node = canonicalize(conn, in->buffer); in->buffer += strlen(in->buffer) + 1; num--; - if (!within_transaction(conn->transaction, node)) - return send_error(conn, EROFS); + if (!within_transaction(conn->transaction, node)) { + send_error(conn, EROFS); + return; + } if (transaction_block(conn, node)) - return true; + return; /* We must own node to do this (tools can do this too). */ - if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) - return send_error(conn, errno); + if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) { + send_error(conn, errno); + return; + } perms = talloc_array(node, struct xs_permissions, num); - if (!xs_strings_to_perms(perms, num, in->buffer)) - return send_error(conn, errno); - - if (!set_perms(conn->transaction, node, perms, num)) - return send_error(conn, errno); + if (!xs_strings_to_perms(perms, num, in->buffer)) { + send_error(conn, errno); + return; + } + + if (!set_perms(conn->transaction, node, perms, num)) { + send_error(conn, errno); + return; + } + add_change_node(conn->transaction, node, false); + fire_watches(conn, node, false); send_ack(conn, XS_SET_PERMS); - fire_watches(conn->transaction, node, false); - return false; } /* Process "in" for conn: "in" will vanish after this conversation, so * we can talloc off it for temporary variables. May free "conn". - * Returns true if can't complete due to block. */ -static bool process_message(struct connection *conn, struct buffered_data *in) +static void process_message(struct connection *conn, struct buffered_data *in) { switch (in->hdr.msg.type) { case XS_DIRECTORY: - return send_directory(conn, onearg(in)); + send_directory(conn, onearg(in)); + break; case XS_READ: - return do_read(conn, onearg(in)); + do_read(conn, onearg(in)); + break; case XS_WRITE: - return do_write(conn, in); + do_write(conn, in); + break; case XS_MKDIR: - return do_mkdir(conn, onearg(in)); + do_mkdir(conn, onearg(in)); + break; case XS_RM: - return do_rm(conn, onearg(in)); + do_rm(conn, onearg(in)); + break; case XS_GET_PERMS: - return do_get_perms(conn, onearg(in)); + do_get_perms(conn, onearg(in)); + break; case XS_SET_PERMS: - return do_set_perms(conn, in); + do_set_perms(conn, in); + break; case XS_SHUTDOWN: /* FIXME: Implement gentle shutdown too. */ /* Only tools can do this. */ - if (conn->id != 0) - return send_error(conn, EACCES); - if (!conn->can_write) - return send_error(conn, EROFS); + if (conn->id != 0) { + send_error(conn, EACCES); + break; + } + if (!conn->can_write) { + send_error(conn, EROFS); + break; + } send_ack(conn, XS_SHUTDOWN); /* Everything hangs off auto-free context, freed at exit. */ exit(0); case XS_DEBUG: - if (streq(in->buffer, "print")) { + if (streq(in->buffer, "print")) xprintf("debug: %s", in->buffer + get_string(in, 0)); - return false; - } #ifdef TESTING /* For testing, we allow them to set id. */ if (streq(in->buffer, "setid")) { @@ -1107,37 +1180,44 @@ failtest = true; } #endif /* TESTING */ - return false; + break; case XS_WATCH: - return do_watch(conn, in); + do_watch(conn, in); + break; case XS_WATCH_ACK: - return do_watch_ack(conn, onearg(in)); + do_watch_ack(conn, onearg(in)); + break; case XS_UNWATCH: - return do_unwatch(conn, in); + do_unwatch(conn, in); + break; case XS_TRANSACTION_START: - return do_transaction_start(conn, onearg(in)); + do_transaction_start(conn, onearg(in)); + break; case XS_TRANSACTION_END: - return do_transaction_end(conn, onearg(in)); + do_transaction_end(conn, onearg(in)); + break; case XS_INTRODUCE: - return do_introduce(conn, in); + do_introduce(conn, in); + break; case XS_RELEASE: - return do_release(conn, onearg(in)); + do_release(conn, onearg(in)); + break; case XS_GETDOMAINPATH: - return do_get_domain_path(conn, onearg(in)); + do_get_domain_path(conn, onearg(in)); + break; case XS_WATCH_EVENT: default: eprintf("Client unknown operation %i", in->hdr.msg.type); send_error(conn, ENOSYS); - return false; } } @@ -1151,6 +1231,8 @@ struct buffered_data *in = NULL; enum xsd_sockmsg_type type = conn->in->hdr.msg.type; jmp_buf talloc_fail; + + assert(conn->state == OK); /* For simplicity, we kill the connection on OOM. */ talloc_set_fail_handler(out_of_mem, &talloc_fail); @@ -1166,7 +1248,7 @@ /* We might get a command while waiting for an ack: this means * the other end discarded it: we will re-transmit. */ if (type != XS_WATCH_ACK) - conn->waiting_for_ack = false; + conn->waiting_for_ack = NULL; /* Careful: process_message may free connection. We detach * "in" beforehand and allocate the new buffer to avoid @@ -1174,7 +1256,9 @@ */ in = talloc_steal(talloc_autofree_context(), conn->in); conn->in = new_buffer(conn); - if (process_message(conn, in)) { + process_message(conn, in); + + if (conn->state == BLOCKED) { /* Blocked by transaction: queue for re-xmit. */ talloc_free(conn->in); conn->in = in; @@ -1197,7 +1281,7 @@ int bytes; struct buffered_data *in; - assert(!conn->blocked); + assert(conn->state == OK); in = conn->in; /* Not finished header yet? */ @@ -1254,13 +1338,17 @@ struct connection *i, *tmp; list_for_each_entry_safe(i, tmp, &connections, list) { - if (!i->blocked) - continue; - - if (!transaction_covering_node(i->blocked)) { - talloc_free(i->blocked); - i->blocked = NULL; - consider_message(i); + switch (i->state) { + case BLOCKED: + if (!transaction_covering_node(i->blocked_by)) { + talloc_free(i->blocked_by); + i->blocked_by = NULL; + i->state = OK; + consider_message(i); + } + break; + case OK: + break; } } @@ -1281,7 +1369,8 @@ if (!new) return NULL; - new->blocked = false; + new->state = OK; + new->blocked_by = NULL; new->out = new->waiting_reply = NULL; new->fd = -1; new->id = 0; @@ -1290,6 +1379,7 @@ new->write = write; new->read = read; new->can_write = true; + INIT_LIST_HEAD(&new->watches); talloc_set_fail_handler(out_of_mem, &talloc_fail); if (setjmp(talloc_fail)) { @@ -1358,12 +1448,14 @@ list_for_each_entry(i, &connections, list) { printf("Connection %p:\n", i); + printf(" state = %s\n", + i->state == OK ? "OK" + : i->state == BLOCKED ? "BLOCKED" + : "INVALID"); if (i->id) printf(" id = %i\n", i->id); - if (i->blocked) - printf(" blocked on = %s\n", i->blocked); - if (i->waiting_for_ack) - printf(" waiting_for_ack TRUE\n"); + if (i->blocked_by) + printf(" blocked on = %s\n", i->blocked_by); if (!i->in->inhdr || i->in->used) printf(" got %i bytes of %s\n", i->in->used, i->in->inhdr ? "header" : "data"); @@ -1385,6 +1477,44 @@ } } #endif + +static void setup_structure(void) +{ + struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ }; + char *root, *dir, *permfile; + + /* Create root directory, with permissions. */ + if (mkdir(xs_daemon_store(), 0750) != 0) { + if (errno != EEXIST) + barf_perror("Could not create root %s", + xs_daemon_store()); + return; + } + root = talloc_strdup(talloc_autofree_context(), "/"); + if (!set_perms(NULL, root, &perms, 1)) + barf_perror("Could not create permissions in root"); + + /* Create tool directory, with xenstored subdir. */ + dir = talloc_asprintf(root, "%s/%s", xs_daemon_store(), "tool"); + if (mkdir(dir, 0750) != 0) + barf_perror("Making dir %s", dir); + + permfile = talloc_strdup(root, "/tool"); + if (!set_perms(NULL, permfile, &perms, 1)) + barf_perror("Could not create permissions on %s", permfile); + + dir = talloc_asprintf(root, "%s/%s", dir, "xenstored"); + if (mkdir(dir, 0750) != 0) + barf_perror("Making dir %s", dir); + + permfile = talloc_strdup(root, "/tool/xenstored"); + if (!set_perms(NULL, permfile, &perms, 1)) + barf_perror("Could not create permissions on %s", permfile); + talloc_free(root); + if (mkdir(xs_daemon_transactions(), 0750) != 0) + barf_perror("Could not create transaction dir %s", + xs_daemon_transactions()); +} static struct option options[] = { { "no-fork", 0, NULL, 'N' }, { "verbose", 0, NULL, 'V' }, @@ -1461,21 +1591,13 @@ barf_perror("Could not listen on sockets"); /* If we're the first, create .perms file for root. */ - if (mkdir(xs_daemon_store(), 0750) == 0) { - struct xs_permissions perms; - char *root = talloc_strdup(talloc_autofree_context(), "/"); - - perms.id = 0; - perms.perms = XS_PERM_READ; - if (!set_perms(NULL, root, &perms, 1)) - barf_perror("Could not create permissions in root"); - talloc_free(root); - mkdir(xs_daemon_transactions(), 0750); - } else if (errno != EEXIST) - barf_perror("Could not create root %s", xs_daemon_store()); + setup_structure(); /* Listen to hypervisor. */ event_fd = domain_init(); + + /* Restore existing connections. */ + restore_existing_connections(); /* Debugging: daemonize() closes standard fds, so dup here. */ tmpout = dup(STDOUT_FILENO); diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_core.h --- a/tools/xenstore/xenstored_core.h Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_core.h Fri Jul 29 20:25:03 2005 @@ -47,6 +47,14 @@ typedef int connwritefn_t(struct connection *, const void *, unsigned int); typedef int connreadfn_t(struct connection *, void *, unsigned int); +enum state +{ + /* Blocked by transaction. */ + BLOCKED, + /* Completed */ + OK, +}; + struct connection { struct list_head list; @@ -57,14 +65,17 @@ /* Who am I? 0 for socket connections. */ domid_t id; - /* Are we blocked waiting for a transaction to end? Contains node. */ - char *blocked; + /* Blocked on transaction? */ + enum state state; + + /* Node we are waiting for (if state == BLOCKED) */ + char *blocked_by; /* Is this a read-only connection? */ bool can_write; /* Are we waiting for a watch event ack? */ - bool waiting_for_ack; + struct watch *waiting_for_ack; /* Buffered incoming data. */ struct buffered_data *in; @@ -81,10 +92,14 @@ /* The domain I'm associated with, if any. */ struct domain *domain; + /* My watches. */ + struct list_head watches; + /* Methods for communicating over this connection: write can be NULL */ connwritefn_t *write; connreadfn_t *read; }; +extern struct list_head connections; /* Return length of string (including nul) at this offset. */ unsigned int get_string(const struct buffered_data *data, @@ -100,14 +115,14 @@ /* Create a new buffer with lifetime of context. */ struct buffered_data *new_buffer(void *ctx); -bool send_reply(struct connection *conn, enum xsd_sockmsg_type type, - const void *data, unsigned int len); +void send_reply(struct connection *conn, enum xsd_sockmsg_type type, + const void *data, unsigned int len); /* Some routines (write, mkdir, etc) just need a non-error return */ -bool send_ack(struct connection *conn, enum xsd_sockmsg_type type); +void send_ack(struct connection *conn, enum xsd_sockmsg_type type); /* Send an error: error is usually "errno". */ -bool send_error(struct connection *conn, int error); +void send_error(struct connection *conn, int error); /* Canonicalize this path if possible. */ char *canonicalize(struct connection *conn, const char *node); @@ -147,5 +162,6 @@ void trace_create(const void *data, const char *type); void trace_destroy(const void *data, const char *type); void trace_watch_timeout(const struct connection *conn, const char *node, const char *token); +void trace(const char *fmt, ...); #endif /* _XENSTORED_CORE_H */ diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_domain.c Fri Jul 29 20:25:03 2005 @@ -239,7 +239,8 @@ * careful that handle_input/handle_output can destroy conn. */ while ((domain = find_domain(port)) != NULL) { - if (!domain->conn->blocked && buffer_has_input(domain->input)) + if (domain->conn->state == OK + && buffer_has_input(domain->input)) handle_input(domain->conn); else if (domain->conn->out && buffer_has_output_room(domain->output)) @@ -254,34 +255,21 @@ #endif } -/* domid, mfn, evtchn, path */ -bool do_introduce(struct connection *conn, struct buffered_data *in) +static struct domain *new_domain(void *context, domid_t domid, + unsigned long mfn, int port, + const char *path) { struct domain *domain; - char *vec[4]; - - if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) - return send_error(conn, EINVAL); - - if (conn->id != 0) - return send_error(conn, EACCES); - - if (!conn->can_write) - return send_error(conn, EROFS); - - /* Hang domain off "in" until we're finished. */ - domain = talloc(in, struct domain); - domain->domid = atoi(vec[0]); - domain->port = atoi(vec[2]); - if ((domain->port <= 0) || !is_valid_nodename(vec[3])) - return send_error(conn, EINVAL); - domain->path = talloc_strdup(domain, vec[3]); + domain = talloc(context, struct domain); + domain->port = 0; + domain->domid = domid; + domain->path = talloc_strdup(domain, path); domain->page = xc_map_foreign_range(*xc_handle, domain->domid, getpagesize(), PROT_READ|PROT_WRITE, - atol(vec[1])); + mfn); if (!domain->page) - return send_error(conn, errno); + return NULL; list_add(&domain->list, &domains); talloc_set_destructor(domain, destroy_domain); @@ -291,15 +279,52 @@ domain->output = domain->page + getpagesize()/2; /* Tell kernel we're interested in this event. */ - if (ioctl(eventchn_fd, EVENTCHN_BIND, domain->port) != 0) - return send_error(conn, errno); - + if (ioctl(eventchn_fd, EVENTCHN_BIND, port) != 0) + return NULL; + + domain->port = port; domain->conn = new_connection(writechn, readchn); domain->conn->domain = domain; - + return domain; +} + +/* domid, mfn, evtchn, path */ +void do_introduce(struct connection *conn, struct buffered_data *in) +{ + struct domain *domain; + char *vec[4]; + + if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) { + send_error(conn, EINVAL); + return; + } + + if (conn->id != 0) { + send_error(conn, EACCES); + return; + } + + if (!conn->can_write) { + send_error(conn, EROFS); + return; + } + + /* Sanity check args. */ + if ((atoi(vec[2]) <= 0) || !is_valid_nodename(vec[3])) { + send_error(conn, EINVAL); + return; + } + /* Hang domain off "in" until we're finished. */ + domain = new_domain(in, atoi(vec[0]), atol(vec[1]), atol(vec[2]), + vec[3]); + if (!domain) { + send_error(conn, errno); + return; + } + + /* Now domain belongs to its connection. */ talloc_steal(domain->conn, domain); - - return send_ack(conn, XS_INTRODUCE); + send_ack(conn, XS_INTRODUCE); } static struct domain *find_domain_by_domid(domid_t domid) @@ -314,39 +339,51 @@ } /* domid */ -bool do_release(struct connection *conn, const char *domid_str) +void do_release(struct connection *conn, const char *domid_str) { struct domain *domain; domid_t domid; - if (!domid_str) - return send_error(conn, EINVAL); + if (!domid_str) { + send_error(conn, EINVAL); + return; + } domid = atoi(domid_str); - if (!domid) - return send_error(conn, EINVAL); - - if (conn->id != 0) - return send_error(conn, EACCES); + if (!domid) { + send_error(conn, EINVAL); + return; + } + + if (conn->id != 0) { + send_error(conn, EACCES); + return; + } domain = find_domain_by_domid(domid); - if (!domain) - return send_error(conn, ENOENT); - - if (!domain->conn) - return send_error(conn, EINVAL); + if (!domain) { + send_error(conn, ENOENT); + return; + } + + if (!domain->conn) { + send_error(conn, EINVAL); + return; + } talloc_free(domain->conn); - return send_ack(conn, XS_RELEASE); -} - -bool do_get_domain_path(struct connection *conn, const char *domid_str) + send_ack(conn, XS_RELEASE); +} + +void do_get_domain_path(struct connection *conn, const char *domid_str) { struct domain *domain; domid_t domid; - if (!domid_str) - return send_error(conn, EINVAL); + if (!domid_str) { + send_error(conn, EINVAL); + return; + } domid = atoi(domid_str); if (domid == DOMID_SELF) @@ -354,11 +391,11 @@ else domain = find_domain_by_domid(domid); - if (!domain) - return send_error(conn, ENOENT); - - return send_reply(conn, XS_GETDOMAINPATH, domain->path, - strlen(domain->path) + 1); + if (!domain) + send_error(conn, ENOENT); + else + send_reply(conn, XS_GETDOMAINPATH, domain->path, + strlen(domain->path) + 1); } static int close_xc_handle(void *_handle) @@ -373,6 +410,11 @@ if (!conn->domain) return NULL; return conn->domain->path; +} + +/* Restore existing connections. */ +void restore_existing_connections(void) +{ } /* Returns the event channel handle. */ diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_domain.h --- a/tools/xenstore/xenstored_domain.h Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_domain.h Fri Jul 29 20:25:03 2005 @@ -23,13 +23,13 @@ void handle_event(int event_fd); /* domid, mfn, eventchn, path */ -bool do_introduce(struct connection *conn, struct buffered_data *in); +void do_introduce(struct connection *conn, struct buffered_data *in); /* domid */ -bool do_release(struct connection *conn, const char *domid_str); +void do_release(struct connection *conn, const char *domid_str); /* domid */ -bool do_get_domain_path(struct connection *conn, const char *domid_str); +void do_get_domain_path(struct connection *conn, const char *domid_str); /* Returns the event channel handle */ int domain_init(void); @@ -37,4 +37,7 @@ /* Returns the implicit path of a connection (only domains have this) */ const char *get_implicit_path(const struct connection *conn); +/* Read existing connection information from store. */ +void restore_existing_connections(void); + #endif /* _XENSTORED_DOMAIN_H */ diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_transaction.c --- a/tools/xenstore/xenstored_transaction.c Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_transaction.c Fri Jul 29 20:25:03 2005 @@ -114,7 +114,8 @@ trans = transaction_covering_node(node); if (trans) { start_transaction_timeout(trans); - conn->blocked = talloc_strdup(conn, node); + conn->state = BLOCKED; + conn->blocked_by = talloc_strdup(conn, node); return true; } return false; @@ -239,20 +240,24 @@ return true; } -bool do_transaction_start(struct connection *conn, const char *node) +void do_transaction_start(struct connection *conn, const char *node) { struct transaction *transaction; char *dir; - if (conn->transaction) - return send_error(conn, EBUSY); + if (conn->transaction) { + send_error(conn, EBUSY); + return; + } node = canonicalize(conn, node); - if (!check_node_perms(conn, node, XS_PERM_READ)) - return send_error(conn, errno); + if (!check_node_perms(conn, node, XS_PERM_READ)) { + send_error(conn, errno); + return; + } if (transaction_block(conn, node)) - return true; + return; dir = node_dir_outside_transaction(node); @@ -270,18 +275,19 @@ talloc_set_destructor(transaction, destroy_transaction); trace_create(transaction, "transaction"); - if (!copy_dir(dir, transaction->divert)) - return send_error(conn, errno); + if (!copy_dir(dir, transaction->divert)) { + send_error(conn, errno); + return; + } talloc_steal(conn, transaction); conn->transaction = transaction; - return send_ack(transaction->conn, XS_TRANSACTION_START); + send_ack(transaction->conn, XS_TRANSACTION_START); } static bool commit_transaction(struct transaction *trans) { char *tmp, *dir; - struct changed_node *i; /* Move: orig -> .old, repl -> orig. Cleanup deletes .old. */ dir = node_dir_outside_transaction(trans->node); @@ -294,39 +300,44 @@ trans->divert, dir); trans->divert = tmp; - - /* Fire off the watches for everything that changed. */ - list_for_each_entry(i, &trans->changes, list) - fire_watches(NULL, i->node, i->recurse); return true; } -bool do_transaction_end(struct connection *conn, const char *arg) -{ - if (!arg || (!streq(arg, "T") && !streq(arg, "F"))) - return send_error(conn, EINVAL); - - if (!conn->transaction) - return send_error(conn, ENOENT); +void do_transaction_end(struct connection *conn, const char *arg) +{ + struct changed_node *i; + struct transaction *trans; + + if (!arg || (!streq(arg, "T") && !streq(arg, "F"))) { + send_error(conn, EINVAL); + return; + } + + if (!conn->transaction) { + send_error(conn, ENOENT); + return; + } + + /* Set to NULL so fire_watches sends events. */ + trans = conn->transaction; + conn->transaction = NULL; + /* Attach transaction to arg for auto-cleanup */ + talloc_steal(arg, trans); if (streq(arg, "T")) { - if (conn->transaction->destined_to_fail) { + if (trans->destined_to_fail) { send_error(conn, ETIMEDOUT); - goto failed; + return; } - if (!commit_transaction(conn->transaction)) { + if (!commit_transaction(trans)) { send_error(conn, errno); - goto failed; + return; } - } - - talloc_free(conn->transaction); - conn->transaction = NULL; - return send_ack(conn, XS_TRANSACTION_END); - -failed: - talloc_free(conn->transaction); - conn->transaction = NULL; - return false; -} - + + /* Fire off the watches for everything that changed. */ + list_for_each_entry(i, &trans->changes, list) + fire_watches(conn, i->node, i->recurse); + } + send_ack(conn, XS_TRANSACTION_END); +} + diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_transaction.h --- a/tools/xenstore/xenstored_transaction.h Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_transaction.h Fri Jul 29 20:25:03 2005 @@ -22,8 +22,8 @@ struct transaction; -bool do_transaction_start(struct connection *conn, const char *node); -bool do_transaction_end(struct connection *conn, const char *arg); +void do_transaction_start(struct connection *conn, const char *node); +void do_transaction_end(struct connection *conn, const char *arg); /* Is node covered by this transaction? */ bool within_transaction(struct transaction *trans, const char *node); diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_watch.c --- a/tools/xenstore/xenstored_watch.c Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_watch.c Fri Jul 29 20:25:03 2005 @@ -33,76 +33,43 @@ #include "xenstored_domain.h" /* FIXME: time out unacked watches. */ - -/* We create this if anyone is interested "node", then we pass it from - * watch to watch as each connection acks it. - */ struct watch_event { - /* The watch we are firing for (watch->events) */ + /* The events on this watch. */ struct list_head list; - /* Watches we need to fire for (watches[0]->events == this). */ - struct watch **watches; - unsigned int num_watches; - - struct timeval timeout; - - /* Name of node which changed. */ - char *node; - - /* For remove, we trigger on all the children of this node too. */ - bool recurse; + /* Data to send (node\0token\0). */ + unsigned int len; + char *data; }; struct watch { + /* Watches on this connection */ struct list_head list; - unsigned int priority; /* Current outstanding events applying to this watch. */ struct list_head events; /* Is this relative to connnection's implicit path? */ - bool relative; + const char *relative_path; char *token; char *node; - struct connection *conn; }; -static LIST_HEAD(watches); - -static struct watch_event *get_first_event(struct connection *conn) -{ - struct watch *watch; - struct watch_event *event; - - /* Find first watch with an event. */ - list_for_each_entry(watch, &watches, list) { - if (watch->conn != conn) - continue; - - event = list_top(&watch->events, struct watch_event, list); - if (event) - return event; - } - return NULL; -} /* Look through our watches: if any of them have an event, queue it. */ void queue_next_event(struct connection *conn) { struct watch_event *event; - const char *node; - char *buffer; - unsigned int len; + struct watch *watch; /* We had a reply queued already? Send it: other end will * discard watch. */ if (conn->waiting_reply) { conn->out = conn->waiting_reply; conn->waiting_reply = NULL; - conn->waiting_for_ack = false; + conn->waiting_for_ack = NULL; return; } @@ -110,170 +77,83 @@ if (conn->waiting_for_ack) return; - event = get_first_event(conn); - if (!event) - return; - - /* If we decide to cancel, we will reset this. */ - conn->waiting_for_ack = true; - - /* If we deleted /foo and they're watching /foo/bar, that's what we - * tell them has changed. */ - if (!is_child(event->node, event->watches[0]->node)) { - assert(event->recurse); - node = event->watches[0]->node; - } else - node = event->node; - - /* If watch placed using relative path, give them relative answer. */ - if (event->watches[0]->relative) { - node += strlen(get_implicit_path(conn)); - if (node[0] == '/') /* Could be "". */ + list_for_each_entry(watch, &conn->watches, list) { + event = list_top(&watch->events, struct watch_event, list); + if (event) { + conn->waiting_for_ack = watch; + send_reply(conn,XS_WATCH_EVENT,event->data,event->len); + break; + } + } +} + +static int destroy_watch_event(void *_event) +{ + struct watch_event *event = _event; + + trace_destroy(event, "watch_event"); + return 0; +} + +static void add_event(struct watch *watch, const char *node) +{ + struct watch_event *event; + + if (watch->relative_path) { + node += strlen(watch->relative_path); + if (*node == '/') /* Could be "" */ node++; } - /* Create reply from path and token */ - len = strlen(node) + 1 + strlen(event->watches[0]->token) + 1; - buffer = talloc_array(conn, char, len); - strcpy(buffer, node); - strcpy(buffer+strlen(node)+1, event->watches[0]->token); - send_reply(conn, XS_WATCH_EVENT, buffer, len); - talloc_free(buffer); -} - -static struct watch **find_watches(const char *node, bool recurse, - unsigned int *num) -{ - struct watch *i; - struct watch **ret = NULL; - - *num = 0; - - /* We include children too if this is an rm. */ - list_for_each_entry(i, &watches, list) { - if (is_child(node, i->node) || - (recurse && is_child(i->node, node))) { - (*num)++; - ret = talloc_realloc(node, ret, struct watch *, *num); - ret[*num - 1] = i; - } - } - return ret; + event = talloc(watch, struct watch_event); + event->len = strlen(node) + 1 + strlen(watch->token) + 1; + event->data = talloc_array(event, char, event->len); + strcpy(event->data, node); + strcpy(event->data + strlen(node) + 1, watch->token); + talloc_set_destructor(event, destroy_watch_event); + list_add_tail(&event->list, &watch->events); + trace_create(event, "watch_event"); } /* FIXME: we fail to fire on out of memory. Should drop connections. */ -void fire_watches(struct transaction *trans, const char *node, bool recurse) -{ - struct watch **watches; - struct watch_event *event; - unsigned int num_watches; +void fire_watches(struct connection *conn, const char *node, bool recurse) +{ + struct connection *i; + struct watch *watch; /* During transactions, don't fire watches. */ - if (trans) - return; - - watches = find_watches(node, recurse, &num_watches); - if (!watches) - return; - - /* Create and fill in info about event. */ - event = talloc(talloc_autofree_context(), struct watch_event); - event->node = talloc_strdup(event, node); - - /* Tie event to this watch. */ - event->watches = watches; - talloc_steal(event, watches); - event->num_watches = num_watches; - event->recurse = recurse; - list_add_tail(&event->list, &watches[0]->events); - - /* Warn if not finished after thirty seconds. */ - gettimeofday(&event->timeout, NULL); - event->timeout.tv_sec += 30; - - /* If connection not doing anything, queue this. */ - if (!watches[0]->conn->out) - queue_next_event(watches[0]->conn); -} - -/* We're done with this event: see if anyone else wants it. */ -static void move_event_onwards(struct watch_event *event) -{ - list_del(&event->list); - - event->num_watches--; - event->watches++; - if (!event->num_watches) { - talloc_free(event); - return; - } - - list_add_tail(&event->list, &event->watches[0]->events); - - /* If connection not doing anything, queue this. */ - if (!event->watches[0]->conn->out) - queue_next_event(event->watches[0]->conn); -} - -static void remove_watch_from_events(struct watch *dying_watch) -{ - struct watch *watch; - struct watch_event *event; - unsigned int i; - - list_for_each_entry(watch, &watches, list) { - list_for_each_entry(event, &watch->events, list) { - for (i = 0; i < event->num_watches; i++) { - if (event->watches[i] != dying_watch) - continue; - - assert(i != 0); - memmove(event->watches+i, - event->watches+i+1, - (event->num_watches - (i+1)) - * sizeof(struct watch *)); - event->num_watches--; - } + if (conn->transaction) + return; + + /* Create an event for each watch. Don't send to self. */ + list_for_each_entry(i, &connections, list) { + if (i == conn) + continue; + + list_for_each_entry(watch, &i->watches, list) { + if (is_child(node, watch->node)) + add_event(watch, node); + else if (recurse && is_child(watch->node, node)) + add_event(watch, watch->node); + else + continue; + /* If connection not doing anything, queue this. */ + if (!i->out) + queue_next_event(i); } } } static int destroy_watch(void *_watch) { - struct watch *watch = _watch; - struct watch_event *event; - - /* If we have pending events, pass them on to others. */ - while ((event = list_top(&watch->events, struct watch_event, list))) - move_event_onwards(event); - - /* Remove from global list. */ - list_del(&watch->list); - - /* Other events which match this watch must be cleared. */ - remove_watch_from_events(watch); - - trace_destroy(watch, "watch"); + trace_destroy(_watch, "watch"); return 0; } -/* We keep watches in priority order. */ -static void insert_watch(struct watch *watch) -{ - struct watch *i; - - list_for_each_entry(i, &watches, list) { - if (i->priority <= watch->priority) { - list_add_tail(&watch->list, &i->list); - return; - } - } - - list_add_tail(&watch->list, &watches); -} - void shortest_watch_ack_timeout(struct timeval *tv) { + (void)tv; +#if 0 /* FIXME */ struct watch *watch; list_for_each_entry(watch, &watches, list) { @@ -285,10 +165,12 @@ *tv = i->timeout; } } +#endif } void check_watch_ack_timeout(void) { +#if 0 struct watch *watch; struct timeval now; @@ -308,77 +190,97 @@ } } } -} - -bool do_watch(struct connection *conn, struct buffered_data *in) -{ - struct watch *watch; - char *vec[3]; +#endif +} + +void do_watch(struct connection *conn, struct buffered_data *in) +{ + struct watch *watch; + char *vec[2]; bool relative; - if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) - return send_error(conn, EINVAL); + if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) { + send_error(conn, EINVAL); + return; + } relative = !strstarts(vec[0], "/"); vec[0] = canonicalize(conn, vec[0]); - if (!check_node_perms(conn, vec[0], XS_PERM_READ)) - return send_error(conn, errno); + if (!check_node_perms(conn, vec[0], XS_PERM_READ)) { + send_error(conn, errno); + return; + } watch = talloc(conn, struct watch); watch->node = talloc_strdup(watch, vec[0]); watch->token = talloc_strdup(watch, vec[1]); - watch->conn = conn; - watch->priority = strtoul(vec[2], NULL, 0); - watch->relative = relative; + if (relative) + watch->relative_path = get_implicit_path(conn); + else + watch->relative_path = NULL; + INIT_LIST_HEAD(&watch->events); - insert_watch(watch); + list_add_tail(&watch->list, &conn->watches); + trace_create(watch, "watch"); talloc_set_destructor(watch, destroy_watch); - trace_create(watch, "watch"); - return send_ack(conn, XS_WATCH); -} - -bool do_watch_ack(struct connection *conn, const char *token) + send_ack(conn, XS_WATCH); +} + +void do_watch_ack(struct connection *conn, const char *token) { struct watch_event *event; - if (!token) - return send_error(conn, EINVAL); - - if (!conn->waiting_for_ack) - return send_error(conn, ENOENT); - - event = get_first_event(conn); - if (!streq(event->watches[0]->token, token)) - return send_error(conn, EINVAL); - - move_event_onwards(event); - conn->waiting_for_ack = false; - return send_ack(conn, XS_WATCH_ACK); -} - -bool do_unwatch(struct connection *conn, struct buffered_data *in) + if (!token) { + send_error(conn, EINVAL); + return; + } + + if (!conn->waiting_for_ack) { + send_error(conn, ENOENT); + return; + } + + if (!streq(conn->waiting_for_ack->token, token)) { + /* They're confused: this will cause us to send event again */ + conn->waiting_for_ack = NULL; + send_error(conn, EINVAL); + return; + } + + /* Remove event: after ack sent, core will call queue_next_event */ + event = list_top(&conn->waiting_for_ack->events, struct watch_event, + list); + list_del(&event->list); + talloc_free(event); + + conn->waiting_for_ack = NULL; + send_ack(conn, XS_WATCH_ACK); +} + +void do_unwatch(struct connection *conn, struct buffered_data *in) { struct watch *watch; char *node, *vec[2]; - if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) - return send_error(conn, EINVAL); + if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) { + send_error(conn, EINVAL); + return; + } /* We don't need to worry if we're waiting for an ack for the * watch we're deleting: conn->waiting_for_ack was reset by * this command in consider_message anyway. */ node = canonicalize(conn, vec[0]); - list_for_each_entry(watch, &watches, list) { - if (watch->conn != conn) - continue; - + list_for_each_entry(watch, &conn->watches, list) { if (streq(watch->node, node) && streq(watch->token, vec[1])) { + list_del(&watch->list); talloc_free(watch); - return send_ack(conn, XS_UNWATCH); - } - } - return send_error(conn, ENOENT); + send_ack(conn, XS_UNWATCH); + return; + } + } + send_error(conn, ENOENT); } #ifdef TESTING @@ -387,15 +289,16 @@ struct watch *watch; struct watch_event *event; - /* Find first watch with an event. */ - list_for_each_entry(watch, &watches, list) { - if (watch->conn != conn) - continue; - - printf(" watch on %s token %s prio %i\n", - watch->node, watch->token, watch->priority); + if (conn->waiting_for_ack) + printf(" waiting_for_ack for watch on %s token %s\n", + conn->waiting_for_ack->node, + conn->waiting_for_ack->token); + + list_for_each_entry(watch, &conn->watches, list) { + printf(" watch on %s token %s\n", + watch->node, watch->token); list_for_each_entry(event, &watch->events, list) - printf(" event: %s\n", event->node); + printf(" event: %s\n", event->data); } } #endif diff -r a4196568095c -r b53a65034532 tools/xenstore/xenstored_watch.h --- a/tools/xenstore/xenstored_watch.h Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xenstored_watch.h Fri Jul 29 20:25:03 2005 @@ -22,9 +22,9 @@ #include "xenstored_core.h" -bool do_watch(struct connection *conn, struct buffered_data *in); -bool do_watch_ack(struct connection *conn, const char *token); -bool do_unwatch(struct connection *conn, struct buffered_data *in); +void do_watch(struct connection *conn, struct buffered_data *in); +void do_watch_ack(struct connection *conn, const char *token); +void do_unwatch(struct connection *conn, struct buffered_data *in); /* Is this a watch event message for this connection? */ bool is_watch_event(struct connection *conn, struct buffered_data *out); @@ -32,8 +32,9 @@ /* Look through our watches: if any of them have an event, queue it. */ void queue_next_event(struct connection *conn); -/* Fire all watches: recurse means all the children are effected (ie. rm) */ -void fire_watches(struct transaction *trans, const char *node, bool recurse); +/* Fire all watches: recurse means all the children are effected (ie. rm). + */ +void fire_watches(struct connection *conn, const char *node, bool recurse); /* Find shortest timeout: if any, reduce tv (may already be set). */ void shortest_watch_ack_timeout(struct timeval *tv); diff -r a4196568095c -r b53a65034532 tools/xenstore/xs.c --- a/tools/xenstore/xs.c Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xs.c Fri Jul 29 20:25:03 2005 @@ -401,22 +401,16 @@ /* Watch a node for changes (poll on fd to detect, or call read_watch()). * When the node (or any child) changes, fd will become readable. * Token is returned when watch is read, to allow matching. - * Priority indicates order if multiple watchers: higher is first. * Returns false on failure. */ -bool xs_watch(struct xs_handle *h, const char *path, const char *token, - unsigned int priority) -{ - char prio[MAX_STRLEN(priority)]; - struct iovec iov[3]; - - sprintf(prio, "%u", priority); +bool xs_watch(struct xs_handle *h, const char *path, const char *token) +{ + struct iovec iov[2]; + iov[0].iov_base = (void *)path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)token; iov[1].iov_len = strlen(token) + 1; - iov[2].iov_base = prio; - iov[2].iov_len = strlen(prio) + 1; return xs_bool(xs_talkv(h, XS_WATCH, iov, ARRAY_SIZE(iov), NULL)); } diff -r a4196568095c -r b53a65034532 tools/xenstore/xs.h --- a/tools/xenstore/xs.h Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xs.h Fri Jul 29 20:25:03 2005 @@ -82,11 +82,9 @@ /* Watch a node for changes (poll on fd to detect, or call read_watch()). * When the node (or any child) changes, fd will become readable. * Token is returned when watch is read, to allow matching. - * Priority indicates order if multiple watchers: higher is first. * Returns false on failure. */ -bool xs_watch(struct xs_handle *h, const char *path, const char *token, - unsigned int priority); +bool xs_watch(struct xs_handle *h, const char *path, const char *token); /* Return the FD to poll on to see if a watch has fired. */ int xs_fileno(struct xs_handle *h); diff -r a4196568095c -r b53a65034532 tools/xenstore/xs_random.c --- a/tools/xenstore/xs_random.c Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xs_random.c Fri Jul 29 20:25:03 2005 @@ -987,6 +987,8 @@ char *cmd = talloc_asprintf(NULL, "echo -n r0 > %s/.perms", dir); if (mkdir(dir, 0700) != 0) barf_perror("Creating directory %s", dir); + if (mkdir(talloc_asprintf(cmd, "%s/tool", dir), 0700) != 0) + barf_perror("Creating directory %s/tool", dir); do_command(cmd); talloc_free(cmd); } @@ -1211,6 +1213,10 @@ char *nodename; bool ret = false; + /* Ignore tool/ dir. */ + if (streq(node, "/tool")) + return true; + /* FILE backend expects talloc'ed pointer. */ nodename = talloc_strdup(NULL, node); permsa = a->get_perms(ah, nodename, &numpermsa); diff -r a4196568095c -r b53a65034532 tools/xenstore/xs_test.c --- a/tools/xenstore/xs_test.c Fri Jul 29 18:52:33 2005 +++ b/tools/xenstore/xs_test.c Fri Jul 29 20:25:03 2005 @@ -20,6 +20,7 @@ #include <stdio.h> #include <stdlib.h> #include <sys/types.h> +#include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> #include <signal.h> @@ -33,6 +34,10 @@ #define XSTEST static struct xs_handle *handles[10] = { NULL }; +static unsigned int children; + +static bool timeout = true; +static bool readonly = false; struct ringbuf_head { @@ -80,6 +85,14 @@ return buf + h->read; } +static int output_avail(struct ringbuf_head *out) +{ + unsigned int avail; + + get_output_chunk(out, out->buf, &avail); + return avail != 0; +} + static void update_output_chunk(struct ringbuf_head *h, uint32_t len) { h->write += len; @@ -99,10 +112,12 @@ void *data, unsigned int len) { unsigned int avail; + int was_full; if (!check_buffer(in)) barf("Corrupt buffer"); + was_full = !output_avail(in); while (len) { const void *src = get_input_chunk(in, in->buf, &avail); if (avail > len) @@ -114,7 +129,8 @@ } /* Tell other end we read something. */ - kill(daemon_pid, SIGUSR2); + if (was_full) + kill(daemon_pid, SIGUSR2); return true; } @@ -173,7 +189,9 @@ " getperm <path>\n" " setperm <path> <id> <flags> ...\n" " shutdown\n" - " watch <path> <token> <prio>\n" + " watch <path> <token>\n" + " async <command>...\n" + " asyncwait\n" " waitwatch\n" " ackwatch <token>\n" " unwatch <path> <token>\n" @@ -186,22 +204,34 @@ " dump\n"); } +static int argpos(const char *line, unsigned int num) +{ + unsigned int i, len = 0, off = 0; + + for (i = 0; i <= num; i++) { + off += len; + off += strspn(line + off, " \t\n"); + len = strcspn(line + off, " \t\n"); + if (!len) + return off; + } + return off; +} + static char *arg(char *line, unsigned int num) { static char *args[10]; - unsigned int i, len = 0; - - for (i = 0; i <= num; i++) { - line += len; - line += strspn(line, " \t\n"); - len = strcspn(line, " \t\n"); - if (!len) - barf("Can't get arg %u", num); - } + unsigned int off, len; + + off = argpos(line, num); + len = strcspn(line + off, " \t\n"); + + if (!len) + barf("Can't get arg %u", num); free(args[num]); args[num] = malloc(len + 1); - memcpy(args[num], line, len); + memcpy(args[num], line+off, len); args[num][len] = '\0'; return args[num]; } @@ -360,10 +390,9 @@ failed(handle); } -static void do_watch(unsigned int handle, const char *node, const char *token, - const char *pri) -{ - if (!xs_watch(handles[handle], node, token, atoi(pri))) +static void do_watch(unsigned int handle, const char *node, const char *token) +{ + if (!xs_watch(handles[handle], node, token)) failed(handle); } @@ -386,6 +415,82 @@ { if (!xs_acknowledge_watch(handles[handle], token)) failed(handle); +} + +static bool wait_for_input(unsigned int handle) +{ + unsigned int i; + for (i = 0; i < ARRAY_SIZE(handles); i++) { + int fd; + + if (!handles[i] || i == handle) + continue; + + fd = xs_fileno(handles[i]); + if (fd == -2) { + unsigned int avail; + get_input_chunk(in, in->buf, &avail); + if (avail != 0) + return true; + } else { + struct timeval tv = {.tv_sec = 0, .tv_usec = 0 }; + fd_set set; + + FD_ZERO(&set); + FD_SET(fd, &set); + if (select(fd+1, &set, NULL, NULL,&tv)) + return true; + } + } + return false; +} + + +/* Async wait for watch on handle */ +static void do_command(unsigned int default_handle, char *line); +static void do_async(unsigned int handle, char *line) +{ + int child; + unsigned int i; + children++; + if ((child = fork()) != 0) { + /* Wait until *something* happens, which indicates + * child has created an event. V. sloppy, but we can't + * select on fake domain connections. + */ + while (!wait_for_input(handle)); + return; + } + + /* Don't keep other handles open in parent. */ + for (i = 0; i < ARRAY_SIZE(handles); i++) { + if (handles[i] && i != handle) { + xs_daemon_close(handles[i]); + handles[i] = NULL; + } + } + + do_command(handle, line + argpos(line, 1)); + exit(0); +} + +static void do_asyncwait(unsigned int handle) +{ + int status; + + if (handle) + barf("handle has no meaning with asyncwait"); + + if (children == 0) + barf("No children to wait for!"); + + if (waitpid(0, &status, 0) > 0) { + if (!WIFEXITED(status)) + barf("async died"); + if (WEXITSTATUS(status)) + exit(WEXITSTATUS(status)); + } + children--; } static void do_unwatch(unsigned int handle, const char *node, const char *token) @@ -533,23 +638,106 @@ free(subdirs); } +static int handle; + +static void alarmed(int sig __attribute__((unused))) +{ + if (handle) { + char handlename[10]; + sprintf(handlename, "%u:", handle); + write(STDOUT_FILENO, handlename, strlen(handlename)); + } + write(STDOUT_FILENO, command, strlen(command)); + write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n")); + exit(1); +} + +static void do_command(unsigned int default_handle, char *line) +{ + char *endp; + + if (strspn(line, " \n") == strlen(line)) + return; + if (strstarts(line, "#")) + return; + + handle = strtoul(line, &endp, 10); + if (endp != line) + memmove(line, endp+1, strlen(endp)); + else + handle = default_handle; + + if (!handles[handle]) { + if (readonly) + handles[handle] = xs_daemon_open_readonly(); + else + handles[handle] = xs_daemon_open(); + if (!handles[handle]) + barf_perror("Opening connection to daemon"); + } + command = arg(line, 0); + + if (timeout) + alarm(1); + + if (streq(command, "dir")) + do_dir(handle, arg(line, 1)); + else if (streq(command, "read")) + do_read(handle, arg(line, 1)); + else if (streq(command, "write")) + do_write(handle, + arg(line, 1), arg(line, 2), arg(line, 3)); + else if (streq(command, "setid")) + do_setid(handle, arg(line, 1)); + else if (streq(command, "mkdir")) + do_mkdir(handle, arg(line, 1)); + else if (streq(command, "rm")) + do_rm(handle, arg(line, 1)); + else if (streq(command, "getperm")) + do_getperm(handle, arg(line, 1)); + else if (streq(command, "setperm")) + do_setperm(handle, arg(line, 1), line); + else if (streq(command, "shutdown")) + do_shutdown(handle); + else if (streq(command, "watch")) + do_watch(handle, arg(line, 1), arg(line, 2)); + else if (streq(command, "waitwatch")) + do_waitwatch(handle); + else if (streq(command, "async")) + do_async(handle, line); + else if (streq(command, "asyncwait")) + do_asyncwait(handle); + else if (streq(command, "ackwatch")) + do_ackwatch(handle, arg(line, 1)); + else if (streq(command, "unwatch")) + do_unwatch(handle, arg(line, 1), arg(line, 2)); + else if (streq(command, "close")) { + xs_daemon_close(handles[handle]); + handles[handle] = NULL; + } else if (streq(command, "start")) + do_start(handle, arg(line, 1)); + else if (streq(command, "commit")) + do_end(handle, false); + else if (streq(command, "abort")) + do_end(handle, true); + else if (streq(command, "introduce")) + do_introduce(handle, arg(line, 1), arg(line, 2), + arg(line, 3), arg(line, 4)); + else if (streq(command, "release")) + do_release(handle, arg(line, 1)); + else if (streq(command, "dump")) + dump(handle); + else if (streq(command, "sleep")) + sleep(atoi(arg(line, 1))); + else + barf("Unknown command %s", command); + fflush(stdout); + alarm(0); +} + int main(int argc, char *argv[]) { char line[1024]; - bool readonly = false, timeout = true; - int handle; - - static void alarmed(int sig __attribute__((unused))) - { - if (handle) { - char handlename[10]; - sprintf(handlename, "%u:", handle); - write(STDOUT_FILENO, handlename, strlen(handlename)); - } - write(STDOUT_FILENO, command, strlen(command)); - write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n")); - exit(1); - } if (argc > 1 && streq(argv[1], "--readonly")) { readonly = true; @@ -557,7 +745,7 @@ argv++; } - if (argc > 1 && streq(argv[1], "--notimeout")) { + if (argc > 1 && streq(argv[1], "--no-timeout")) { timeout = false; argc--; argv++; @@ -570,81 +758,10 @@ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head); signal(SIGALRM, alarmed); - while (fgets(line, sizeof(line), stdin)) { - char *endp; - - if (strspn(line, " \n") == strlen(line)) - continue; - if (strstarts(line, "#")) - continue; - - handle = strtoul(line, &endp, 10); - if (endp != line) - memmove(line, endp+1, strlen(endp)); - else - handle = 0; - - if (!handles[handle]) { - if (readonly) - handles[handle] = xs_daemon_open_readonly(); - else - handles[handle] = xs_daemon_open(); - if (!handles[handle]) - barf_perror("Opening connection to daemon"); - } - command = arg(line, 0); - - if (timeout) - alarm(5); - if (streq(command, "dir")) - do_dir(handle, arg(line, 1)); - else if (streq(command, "read")) - do_read(handle, arg(line, 1)); - else if (streq(command, "write")) - do_write(handle, - arg(line, 1), arg(line, 2), arg(line, 3)); - else if (streq(command, "setid")) - do_setid(handle, arg(line, 1)); - else if (streq(command, "mkdir")) - do_mkdir(handle, arg(line, 1)); - else if (streq(command, "rm")) - do_rm(handle, arg(line, 1)); - else if (streq(command, "getperm")) - do_getperm(handle, arg(line, 1)); - else if (streq(command, "setperm")) - do_setperm(handle, arg(line, 1), line); - else if (streq(command, "shutdown")) - do_shutdown(handle); - else if (streq(command, "watch")) - do_watch(handle, arg(line, 1), arg(line, 2), arg(line, 3)); - else if (streq(command, "waitwatch")) - do_waitwatch(handle); - else if (streq(command, "ackwatch")) - do_ackwatch(handle, arg(line, 1)); - else if (streq(command, "unwatch")) - do_unwatch(handle, arg(line, 1), arg(line, 2)); - else if (streq(command, "close")) { - xs_daemon_close(handles[handle]); - handles[handle] = NULL; - } else if (streq(command, "start")) - do_start(handle, arg(line, 1)); - else if (streq(command, "commit")) - do_end(handle, false); - else if (streq(command, "abort")) - do_end(handle, true); - else if (streq(command, "introduce")) - do_introduce(handle, arg(line, 1), arg(line, 2), - arg(line, 3), arg(line, 4)); - else if (streq(command, "release")) - do_release(handle, arg(line, 1)); - else if (streq(command, "dump")) - dump(handle); - else if (streq(command, "sleep")) - sleep(atoi(arg(line, 1))); - else - barf("Unknown command %s", command); - fflush(stdout); - alarm(0); - } + while (fgets(line, sizeof(line), stdin)) + do_command(0, line); + + while (children) + do_asyncwait(0); return 0; } diff -r a4196568095c -r b53a65034532 xen/Makefile --- a/xen/Makefile Fri Jul 29 18:52:33 2005 +++ b/xen/Makefile Fri Jul 29 20:25:03 2005 @@ -50,10 +50,10 @@ $(MAKE) -C arch/$(TARGET_ARCH) clean rm -f include/asm *.o $(TARGET)* *~ core rm -f include/asm-*/asm-offsets.h - rm -f tools/figlet/*.o tools/figlet/figlet rm -f include/xen/acm_policy.h $(TARGET): delete-unfresh-files + $(MAKE) -C tools $(MAKE) include/xen/compile.h $(MAKE) include/xen/acm_policy.h [ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm @@ -71,7 +71,6 @@ delete-unfresh-files: @if [ ! -r include/xen/compile.h -o -O include/xen/compile.h ]; then \ rm -f include/xen/{banner,compile}.h; \ - $(MAKE) -C arch/$(TARGET_ARCH) delete-unfresh-files; \ fi # acm_policy.h contains security policy for Xen @@ -96,21 +95,16 @@ -e 's/@@whoami@@/$(shell whoami)/g' \ -e 's/@@domain@@/$(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown]))/g' \ -e 's/@@hostname@@/$(shell hostname)/g' \ - -e 's/@@compiler@@/$(shell $(CC) $(CFLAGS) -v 2>&1 | tail -1)/g' \ + -e 's/@@compiler@@/$(shell $(CC) $(CFLAGS) -v 2>&1 | tail -n 1)/g' \ -e 's/@@version@@/$(XEN_VERSION)/g' \ -e 's/@@subversion@@/$(XEN_SUBVERSION)/g' \ -e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \ - -e 's!@@changeset@@!$(shell (hg parents | awk -F: '/^changeset/{CS=$$3};{FS="date:[ ]+"}/^date/{D=$$2}; END {print D, CS}') 2>/dev/null || (head -6 ChangeLog | awk -F: '/^changeset/{CS=$$3};{FS="date:[ ]+"}/^date/{D=$$2}; END {print D, CS}') 2>/dev/null || echo information unavailable)!g' \ + -e 's!@@changeset@@!$(shell (hg parents | awk -F: '/^changeset/{CS=$$3};{FS="date:[ ]+"}/^date/{D=$$2}; END {print D, CS}') 2>/dev/null || (head -n 6 ChangeLog | awk -F: '/^changeset/{CS=$$3};{FS="date:[ ]+"}/^date/{D=$$2}; END {print D, CS}') 2>/dev/null || echo information unavailable)!g' \ < include/xen/compile.h.in > $@.new @cat include/xen/banner.h >> $@.new @mv -f $@.new $@ -tools/figlet/figlet: tools/figlet/figlet.o - $(HOSTCC) -o $@ $< -tools/figlet/figlet.o: tools/figlet/figlet.c - $(HOSTCC) -o $@ -c $< - -include/xen/banner.h: tools/figlet/figlet tools/figlet/xen.flf +include/xen/banner.h: tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) > $@.new @mv -f $@.new $@ @@ -147,4 +141,4 @@ $(all_sources) > cscope.files cscope -k -b -q MAP: - nm $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + $(NM) $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map diff -r a4196568095c -r b53a65034532 xen/acm/acm_core.c --- a/xen/acm/acm_core.c Fri Jul 29 18:52:33 2005 +++ b/xen/acm/acm_core.c Fri Jul 29 20:25:03 2005 @@ -5,6 +5,9 @@ * * Author: * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * + * Contributors: + * Stefan Berger <stefanb@xxxxxxxxxxxxxx> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -25,6 +28,7 @@ #include <xen/lib.h> #include <xen/delay.h> #include <xen/sched.h> +#include <xen/multiboot.h> #include <acm/acm_hooks.h> #include <acm/acm_endian.h> @@ -81,9 +85,68 @@ acm_bin_pol.secondary_binary_policy = secondary; } +static int +acm_setup(unsigned int *initrdidx, + const multiboot_info_t *mbi, + unsigned long initial_images_start) +{ + int i; + module_t *mod = (module_t *)__va(mbi->mods_addr); + int rc = ACM_OK; + + if (mbi->mods_count > 1) + *initrdidx = 1; + + /* + * Try all modules and see whichever could be the binary policy. + * Adjust the initrdidx if module[1] is the binary policy. + */ + for (i = mbi->mods_count-1; i >= 1; i--) { + struct acm_policy_buffer *pol; + char *_policy_start; + unsigned long _policy_len; +#if defined(__i386__) + _policy_start = (char *)(initial_images_start + (mod[i].mod_start-mod[0].mod_start)); +#elif defined(__x86_64__) + _policy_start = __va(initial_images_start + (mod[i].mod_start-mod[0].mod_start)); +#else +#error Architecture unsupported by sHype +#endif + _policy_len = mod[i].mod_end - mod[i].mod_start; + if (_policy_len < sizeof(struct acm_policy_buffer)) + continue; /* not a policy */ + + pol = (struct acm_policy_buffer *)_policy_start; + if (ntohl(pol->magic) == ACM_MAGIC) { + rc = acm_set_policy((void *)_policy_start, + (u16)_policy_len, + ACM_USE_SECURITY_POLICY, + 0); + if (rc == ACM_OK) { + printf("Policy len 0x%lx, start at %p.\n",_policy_len,_policy_start); + if (i == 1) { + if (mbi->mods_count > 2) { + *initrdidx = 2; + } else { + *initrdidx = 0; + } + } else { + *initrdidx = 1; + } + break; + } else { + printk("Invalid policy. %d.th module line.\n", i+1); + } + } /* end if a binary policy definition, i.e., (ntohl(pol->magic) == ACM_MAGIC ) */ + } + return rc; +} + int -acm_init(void) +acm_init(unsigned int *initrdidx, + const multiboot_info_t *mbi, + unsigned long initial_images_start) { int ret = -EINVAL; @@ -127,10 +190,12 @@ if (ret != ACM_OK) return -EINVAL; + acm_setup(initrdidx, mbi, initial_images_start); printk("%s: Enforcing Primary %s, Secondary %s.\n", __func__, ACM_POLICY_NAME(acm_bin_pol.primary_policy_code), ACM_POLICY_NAME(acm_bin_pol.secondary_policy_code)); - return ACM_OK; -} + return ret; +} + #endif diff -r a4196568095c -r b53a65034532 xen/acm/acm_policy.c --- a/xen/acm/acm_policy.c Fri Jul 29 18:52:33 2005 +++ b/xen/acm/acm_policy.c Fri Jul 29 20:25:03 2005 @@ -33,7 +33,7 @@ #include <acm/acm_endian.h> int -acm_set_policy(void *buf, u16 buf_size, u16 policy) +acm_set_policy(void *buf, u16 buf_size, u16 policy, int isuserbuffer) { u8 *policy_buffer = NULL; struct acm_policy_buffer *pol; @@ -53,16 +53,21 @@ /* 1. copy buffer from domain */ if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL) goto error_free; - if (copy_from_user(policy_buffer, buf, buf_size)) { - printk("%s: Error copying!\n",__func__); - goto error_free; + if (isuserbuffer) { + if (copy_from_user(policy_buffer, buf, buf_size)) { + printk("%s: Error copying!\n",__func__); + goto error_free; + } + } else { + memcpy(policy_buffer, buf, buf_size); } /* 2. some sanity checking */ pol = (struct acm_policy_buffer *)policy_buffer; if ((ntohl(pol->magic) != ACM_MAGIC) || (ntohs(pol->primary_policy_code) != acm_bin_pol.primary_policy_code) || - (ntohs(pol->secondary_policy_code) != acm_bin_pol.secondary_policy_code)) { + (ntohs(pol->secondary_policy_code) != acm_bin_pol.secondary_policy_code) || + (ntohl(pol->policyversion) != POLICY_INTERFACE_VERSION)) { printkd("%s: Wrong policy magics!\n", __func__); goto error_free; } diff -r a4196568095c -r b53a65034532 xen/arch/ia64/Makefile --- a/xen/arch/ia64/Makefile Fri Jul 29 18:52:33 2005 +++ b/xen/arch/ia64/Makefile Fri Jul 29 20:25:03 2005 @@ -82,9 +82,4 @@ rm -f asm-xsi-offsets.s $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h rm -f lib/*.o -# setup.o contains bits of compile.h so it must be blown away -delete-unfresh-files: - echo any unfresh-files to delete for ia64\? -# rm -f setup.o - -.PHONY: default clean delete-unfresh-files +.PHONY: default clean diff -r a4196568095c -r b53a65034532 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/Makefile Fri Jul 29 20:25:03 2005 @@ -13,11 +13,18 @@ OBJS := $(subst cpu/cyrix.o,,$(OBJS)) OBJS := $(subst cpu/rise.o,,$(OBJS)) OBJS := $(subst cpu/transmeta.o,,$(OBJS)) -OBJS := $(subst shadow32.o,,$(OBJS)) -else -OBJS := $(subst shadow.o,,$(OBJS)) -OBJS := $(subst shadow_public.o,,$(OBJS)) -OBJS := $(subst shadow_xxx.o,,$(OBJS)) +endif + +OBJS := $(patsubst shadow%.o,,$(OBJS)) # drop all +ifeq ($(TARGET_SUBARCH),x86_64) + OBJS += shadow.o shadow_public.o # x86_64: new code +endif +ifeq ($(TARGET_SUBARCH),x86_32) + ifneq ($(pae),n) + OBJS += shadow.o shadow_public.o # x86_32p: new code + else + OBJS += shadow32.o # x86_32: old code + endif endif OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS)) @@ -37,6 +44,15 @@ $(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \ boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@ + $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S + $(MAKE) $(BASEDIR)/xen-syms.o + $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \ + boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@ + $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S + $(MAKE) $(BASEDIR)/xen-syms.o + $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \ + boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@ + rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS) $(CC) $(CFLAGS) -S -o $@ $< @@ -53,7 +69,4 @@ rm -f genapic/*.o genapic/*~ genapic/core rm -f cpu/*.o cpu/*~ cpu/core -delete-unfresh-files: - # nothing - -.PHONY: default clean delete-unfresh-files +.PHONY: default clean diff -r a4196568095c -r b53a65034532 xen/arch/x86/acpi/boot.c --- a/xen/arch/x86/acpi/boot.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/acpi/boot.c Fri Jul 29 20:25:03 2005 @@ -382,7 +382,7 @@ return -1; } -#ifdef CONFIG_X86_64 +#if 0/*def CONFIG_X86_64*/ vxtime.hpet_address = hpet_tbl->addr.addrl | ((long) hpet_tbl->addr.addrh << 32); diff -r a4196568095c -r b53a65034532 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/apic.c Fri Jul 29 20:25:03 2005 @@ -723,16 +723,8 @@ static void __init setup_APIC_timer(unsigned int clocks) { unsigned long flags; - local_irq_save(flags); - - /* - * Wait for IRQ0's slice: - */ - wait_timer_tick(); - __setup_APIC_LVTT(clocks); - local_irq_restore(flags); } diff -r a4196568095c -r b53a65034532 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/domain.c Fri Jul 29 20:25:03 2005 @@ -373,6 +373,14 @@ out: free_vmcs(vmcs); + if(v->arch.arch_vmx.io_bitmap_a != 0) { + free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000)); + v->arch.arch_vmx.io_bitmap_a = 0; + } + if(v->arch.arch_vmx.io_bitmap_b != 0) { + free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000)); + v->arch.arch_vmx.io_bitmap_b = 0; + } v->arch.arch_vmx.vmcs = 0; return error; } @@ -417,12 +425,12 @@ /* Ensure real hardware interrupts are enabled. */ v->arch.guest_context.user_regs.eflags |= EF_IE; - } else { - __vmwrite(GUEST_RFLAGS, v->arch.guest_context.user_regs.eflags); - if (v->arch.guest_context.user_regs.eflags & EF_TF) - __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); - else - __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + } + else if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + { + return modify_vmcs( + &v->arch.arch_vmx, + &v->arch.guest_context.user_regs); } if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) @@ -926,6 +934,14 @@ BUG_ON(v->arch.arch_vmx.vmcs == NULL); free_vmcs(v->arch.arch_vmx.vmcs); + if(v->arch.arch_vmx.io_bitmap_a != 0) { + free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000)); + v->arch.arch_vmx.io_bitmap_a = 0; + } + if(v->arch.arch_vmx.io_bitmap_b != 0) { + free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000)); + v->arch.arch_vmx.io_bitmap_b = 0; + } v->arch.arch_vmx.vmcs = 0; free_monitor_pagetable(v); diff -r a4196568095c -r b53a65034532 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/domain_build.c Fri Jul 29 20:25:03 2005 @@ -63,7 +63,7 @@ unsigned int order = get_order(max * PAGE_SIZE); if ( (max & (max-1)) != 0 ) order--; - while ( (page = alloc_domheap_pages(d, order)) == NULL ) + while ( (page = alloc_domheap_pages(d, order, 0)) == NULL ) if ( order-- == 0 ) break; return page; @@ -165,6 +165,8 @@ xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no"); return -EINVAL; } + if (strstr(dsi.xen_section_string, "SHADOW=translate")) + opt_dom0_translate = 1; /* Align load address to 4MB boundary. */ dsi.v_start &= ~((1UL<<22)-1); @@ -618,11 +620,13 @@ if ( opt_dom0_shadow || opt_dom0_translate ) { + printk("dom0: shadow enable\n"); shadow_mode_enable(d, (opt_dom0_translate ? SHM_enable | SHM_refcounts | SHM_translate : SHM_enable)); if ( opt_dom0_translate ) { + printk("dom0: shadow translate\n"); #if defined(__i386__) && defined(CONFIG_X86_PAE) printk("FIXME: PAE code needed here: %s:%d (%s)\n", __FILE__, __LINE__, __FUNCTION__); @@ -655,6 +659,7 @@ } update_pagetables(v); /* XXX SMP */ + printk("dom0: shadow setup done\n"); } return 0; diff -r a4196568095c -r b53a65034532 xen/arch/x86/genapic/es7000plat.c --- a/xen/arch/x86/genapic/es7000plat.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/genapic/es7000plat.c Fri Jul 29 20:25:03 2005 @@ -136,7 +136,19 @@ es7000_plat = 0; } else { printk("\nEnabling ES7000 specific features...\n"); - es7000_plat = 1; + /* + * Determine the generation of the ES7000 currently running. + * + * es7000_plat = 0 if the machine is NOT a Unisys ES7000 box + * es7000_plat = 1 if the machine is a 5xx ES7000 box + * es7000_plat = 2 if the machine is a x86_64 ES7000 box + * + */ + if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) + es7000_plat = 2; + else + es7000_plat = 1; + ioapic_renumber_irq = es7000_rename_gsi; } return es7000_plat; @@ -286,7 +298,7 @@ void __init es7000_sw_apic() { - if (es7000_plat) { + if (es7000_plat == 1) { int mip_status; struct mip_reg es7000_mip_reg; diff -r a4196568095c -r b53a65034532 xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/i8259.c Fri Jul 29 20:25:03 2005 @@ -19,7 +19,7 @@ #include <asm/bitops.h> #include <xen/delay.h> #include <asm/apic.h> - +#include <io_ports.h> /* * Common place to define all x86 IRQ vectors @@ -395,9 +395,9 @@ /* Set the clock to HZ Hz */ #define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */ #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) - outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ - outb_p(LATCH & 0xff , 0x40); /* LSB */ - outb(LATCH >> 8 , 0x40); /* MSB */ + outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ + outb(LATCH >> 8, PIT_CH0); /* MSB */ setup_irq(2, &cascade); } diff -r a4196568095c -r b53a65034532 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/io_apic.c Fri Jul 29 20:25:03 2005 @@ -956,6 +956,13 @@ unsigned long flags; /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + return; + + /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ @@ -981,10 +988,6 @@ mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; } - /* Don't check I/O APIC IDs for some xAPIC systems. They have - * no meaning without the serial APIC bus. */ - if (NO_IOAPIC_CHECK) - continue; /* * Sanity check, is the ID really free? Every APIC in a * system must have a unique ID or we get lots of nice diff -r a4196568095c -r b53a65034532 xen/arch/x86/mpparse.c --- a/xen/arch/x86/mpparse.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/mpparse.c Fri Jul 29 20:25:03 2005 @@ -913,7 +913,10 @@ mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + else + mp_ioapics[idx].mpc_apicid = id; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* @@ -995,9 +998,9 @@ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); /* - * ES7000 has no legacy identity mappings - */ - if (es7000_plat) + * Older generations of ES7000 have no legacy identity mappings + */ + if (es7000_plat == 1) return; /* @@ -1053,11 +1056,20 @@ } } +#define MAX_GSI_NUM 4096 + int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrups, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; #ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ @@ -1092,10 +1104,25 @@ if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, diff -r a4196568095c -r b53a65034532 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/setup.c Fri Jul 29 20:25:03 2005 @@ -197,7 +197,12 @@ set_in_cr4(X86_CR4_OSXMMEXCPT); if ( opt_nosmp ) + { max_cpus = 0; + smp_num_siblings = 1; + boot_cpu_data.x86_num_cores = 1; + } + smp_prepare_cpus(max_cpus); /* We aren't hotplug-capable yet. */ @@ -245,6 +250,8 @@ module_t *mod = (module_t *)__va(mbi->mods_addr); unsigned long firsthole_start, nr_pages; unsigned long initial_images_start, initial_images_end; + unsigned long _initrd_start = 0, _initrd_len = 0; + unsigned int initrdidx = 1; struct e820entry e820_raw[E820MAX]; int i, e820_raw_nr = 0, bytes = 0; struct ns16550_defaults ns16550 = { @@ -411,7 +418,7 @@ shadow_mode_init(); /* initialize access control security module */ - acm_init(); + acm_init(&initrdidx, mbi, initial_images_start); /* Create initial domain 0. */ dom0 = do_createdomain(0, 0); @@ -450,6 +457,13 @@ } } + if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) ) + { + _initrd_start = initial_images_start + + (mod[initrdidx].mod_start - mod[0].mod_start); + _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start; + } + /* * We're going to setup domain0 using the module(s) that we stashed safely * above our heap. The second module, if present, is an initrd ramdisk. @@ -457,11 +471,8 @@ if ( construct_dom0(dom0, initial_images_start, mod[0].mod_end-mod[0].mod_start, - (mbi->mods_count == 1) ? 0 : - initial_images_start + - (mod[1].mod_start-mod[0].mod_start), - (mbi->mods_count == 1) ? 0 : - mod[mbi->mods_count-1].mod_end - mod[1].mod_start, + _initrd_start, + _initrd_len, cmdline) != 0) panic("Could not set up DOM0 guest OS\n"); diff -r a4196568095c -r b53a65034532 xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/shadow.c Fri Jul 29 20:25:03 2005 @@ -41,7 +41,13 @@ static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn); #endif -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS == 3 +#include <asm/shadow_64.h> +static unsigned long shadow_l3_table( + struct domain *d, unsigned long gpfn, unsigned long gmfn); +#endif + +#if CONFIG_PAGING_LEVELS == 4 #include <asm/shadow_64.h> static unsigned long shadow_l4_table( struct domain *d, unsigned long gpfn, unsigned long gmfn); @@ -1069,6 +1075,11 @@ int is_l1_shadow = ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) == PGT_l1_shadow); +#if CONFIG_PAGING_LEVELS == 4 + is_l1_shadow |= + ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) == + PGT_fl1_shadow); +#endif match = l1e_from_pfn(readonly_gmfn, flags); @@ -1684,7 +1695,7 @@ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) ) { - if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) ) + if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) ) { allow_writes = 1; l1e_add_flags(gpte, _PAGE_RW); @@ -1833,7 +1844,7 @@ unsigned long gpfn = __mfn_to_gpfn(d, gmfn); unsigned long smfn, old_smfn; -#if defined (__i386__) +#if CONFIG_PAGING_LEVELS == 2 unsigned long hl2mfn; #endif @@ -1890,7 +1901,7 @@ v->arch.shadow_vtable = map_domain_page(smfn); } -#if defined (__i386__) +#if CONFIG_PAGING_LEVELS == 2 /* * arch.hl2_vtable */ @@ -1935,6 +1946,10 @@ // XXX - maybe this can be optimized somewhat?? local_flush_tlb(); } +#endif + +#if CONFIG_PAGING_LEVELS == 3 + /* FIXME: PAE code to be written */ #endif } @@ -2427,6 +2442,7 @@ struct domain *d, unsigned long gpfn, unsigned long gmfn) { BUG(); /* not implemenated yet */ + return 42; } #endif @@ -2581,7 +2597,7 @@ * shadow_set_lxe should be put in shadow.h */ static void shadow_set_l2e_64(unsigned long va, l2_pgentry_t sl2e, - int create_l2_shadow) + int create_l2_shadow, int put_ref_check) { struct vcpu *v = current; l4_pgentry_t sl4e; @@ -2608,6 +2624,17 @@ printk("For non VMX shadow, create_l1_shadow:%d\n", create_l2_shadow); } shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va)); + + } + + if ( put_ref_check ) { + l2_pgentry_t tmp_sl2e; + if ( __shadow_get_l2e(v, va, &tmp_sl2e) ) { + if ( l2e_get_flags(tmp_sl2e) & _PAGE_PRESENT ) + if ( l2e_get_pfn(tmp_sl2e) == l2e_get_pfn(sl2e) ) { + put_shadow_ref(l2e_get_pfn(sl2e)); + } + } } @@ -2681,7 +2708,7 @@ l1_pgentry_t old_sl1e; l2_pgentry_t sl2e; unsigned long nx = 0; - + int put_ref_check = 0; /* Check if gpfn is 2M aligned */ /* Update guest l2e */ @@ -2712,6 +2739,7 @@ l2e_get_pfn(sl2e) == l1_mfn) { ESH_LOG("sl2e PRSENT bit is set: %lx, l1_mfn = %lx\n", l2e_get_pfn(sl2e), l1_mfn); } else { + put_ref_check = 1; if (!get_shadow_ref(l1_mfn)) BUG(); } @@ -2735,7 +2763,7 @@ ESH_LOG("<%s>: sl2e = %lx\n", __func__, l2e_get_intpte(sl2e)); /* Map the page to l2*/ - shadow_set_l2e_64(va, sl2e, 1); + shadow_set_l2e_64(va, sl2e, 1, put_ref_check); if (l2e_get_flags(gl2e) & _PAGE_NX) l2e_add_flags(tmp_l2e, _PAGE_NX); @@ -2900,10 +2928,14 @@ static void shadow_invlpg_64(struct vcpu *v, unsigned long va) { struct domain *d = v->domain; - //l1_pgentry_64_t gl1e, sl1e; - l1_pgentry_t sl1e; + l1_pgentry_t sl1e, old_sl1e; shadow_lock(d); + + if ( __shadow_get_l1e(v, va, &old_sl1e) ) + if ( l1e_get_flags(old_sl1e) & _PAGE_PRESENT ) + put_page_from_l1e(old_sl1e, d); + sl1e = l1e_empty(); __shadow_set_l1e(v, va, &sl1e); diff -r a4196568095c -r b53a65034532 xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/shadow32.c Fri Jul 29 20:25:03 2005 @@ -2612,7 +2612,7 @@ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) ) { - if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) ) + if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) ) { allow_writes = 1; l1e_add_flags(gpte, _PAGE_RW); diff -r a4196568095c -r b53a65034532 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/smpboot.c Fri Jul 29 20:25:03 2005 @@ -40,6 +40,7 @@ #include <xen/sched.h> #include <xen/irq.h> #include <xen/delay.h> +#include <xen/softirq.h> #include <asm/current.h> #include <asm/mc146818rtc.h> #include <asm/desc.h> @@ -406,6 +407,7 @@ */ if (cpu_has_tsc && cpu_khz) synchronize_tsc_ap(); + calibrate_tsc_ap(); } int cpucount; @@ -464,6 +466,8 @@ /* We can take interrupts now: we're officially "up". */ local_irq_enable(); + + init_percpu_time(); wmb(); startup_cpu_idle_loop(); @@ -1149,6 +1153,7 @@ */ if (cpu_has_tsc && cpucount && cpu_khz) synchronize_tsc_bp(); + calibrate_tsc_bp(); } /* These are wrappers to interface to the new boot process. Someone @@ -1167,22 +1172,21 @@ int __devinit __cpu_up(unsigned int cpu) { /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) { - local_irq_enable(); + if (cpu_isset(cpu, smp_commenced_mask)) return -ENOSYS; - } /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) { - local_irq_enable(); + if (!cpu_isset(cpu, cpu_callin_map)) return -EIO; - } - - local_irq_enable(); + /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) + while (!cpu_isset(cpu, cpu_online_map)) { mb(); + if (softirq_pending(0)) + do_softirq(); + } + return 0; } diff -r a4196568095c -r b53a65034532 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/time.c Fri Jul 29 20:25:03 2005 @@ -1,16 +1,12 @@ -/**************************************************************************** - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge - * (C) 2002-2003 University of Cambridge - **************************************************************************** - * - * File: i386/time.c - * Author: Rolf Neugebar & Keir Fraser - */ - -/* - * linux/arch/i386/kernel/time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds +/****************************************************************************** + * arch/x86/time.c + * + * Per-CPU time calibration and management. + * + * Copyright (c) 2002-2005, K A Fraser + * + * Portions from Linux are: + * Copyright (c) 1991, 1992, 1995 Linus Torvalds */ #include <xen/config.h> @@ -31,29 +27,84 @@ #include <asm/processor.h> #include <asm/fixmap.h> #include <asm/mc146818rtc.h> - -/* GLOBAL */ +#include <asm/div64.h> +#include <asm/hpet.h> +#include <io_ports.h> + +/* opt_hpet_force: If true, force HPET configuration via PCI space. */ +/* NB. This is a gross hack. Mainly useful for HPET testing. */ +static int opt_hpet_force = 0; +boolean_param("hpet_force", opt_hpet_force); + +#define EPOCH MILLISECS(1000) + unsigned long cpu_khz; /* CPU clock frequency in kHz. */ +unsigned long hpet_address; spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; int timer_ack = 0; unsigned long volatile jiffies; - -/* PRIVATE */ -static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */ -static u64 cpu_freq; /* CPU frequency (Hz) */ -static u32 st_scale_f; /* Cycles -> ns, fractional part */ -static u32 st_scale_i; /* Cycles -> ns, integer part */ -static u32 shifted_tsc_irq; /* CPU0's TSC at last 'time update' */ -static u64 full_tsc_irq; /* ...ditto, but all 64 bits */ -static s_time_t stime_irq; /* System time at last 'time update' */ -static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */ -static rwlock_t time_lock = RW_LOCK_UNLOCKED; +static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */ + +struct time_scale { + int shift; + u32 mul_frac; +}; + +struct cpu_time { + u64 local_tsc_stamp; + s_time_t stime_local_stamp; + s_time_t stime_master_stamp; + struct time_scale tsc_scale; + struct ac_timer calibration_timer; +} __cacheline_aligned; + +static struct cpu_time cpu_time[NR_CPUS]; + +/* Protected by platform_timer_lock. */ +static s_time_t stime_platform_stamp; +static u64 platform_timer_stamp; +static struct time_scale platform_timer_scale; +static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED; +static u64 (*read_platform_count)(void); + +static inline u32 down_shift(u64 time, int shift) +{ + if ( shift < 0 ) + return (u32)(time >> -shift); + return (u32)((u32)time << shift); +} + +/* + * 32-bit division of integer dividend and integer divisor yielding + * 32-bit fractional quotient. + */ +static inline u32 div_frac(u32 dividend, u32 divisor) +{ + u32 quotient, remainder; + ASSERT(dividend < divisor); + __asm__ ( + "div %4" + : "=a" (quotient), "=d" (remainder) + : "0" (0), "1" (dividend), "r" (divisor) ); + return quotient; +} + +/* + * 32-bit multiplication of multiplicand and fractional multiplier + * yielding 32-bit product (radix point at same position as in multiplicand). + */ +static inline u32 mul_frac(u32 multiplicand, u32 multiplier) +{ + u32 product_int, product_frac; + __asm__ ( + "mul %3" + : "=a" (product_frac), "=d" (product_int) + : "0" (multiplicand), "r" (multiplier) ); + return product_int; +} void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs) { - write_lock_irq(&time_lock); - -#ifdef CONFIG_X86_IO_APIC if ( timer_ack ) { extern spinlock_t i8259A_lock; @@ -63,30 +114,9 @@ inb(0x20); spin_unlock(&i8259A_lock); } -#endif - /* - * Updates TSC timestamp (used to interpolate passage of time between - * interrupts). - */ - rdtscll(full_tsc_irq); - shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift); - /* Update jiffies counter. */ (*(unsigned long *)&jiffies)++; - - /* Update wall time. */ - wc_usec += 1000000/HZ; - if ( wc_usec >= 1000000 ) - { - wc_usec -= 1000000; - wc_sec++; - } - - /* Updates system time (nanoseconds since boot). */ - stime_irq += MILLISECS(1000/HZ); - - write_unlock_irq(&time_lock); /* Rough hack to allow accurate timers to sort-of-work with no APIC. */ if ( !cpu_has_apic ) @@ -103,9 +133,9 @@ #define CALIBRATE_FRAC 20 /* calibrate over 50ms */ #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) -static unsigned long __init calibrate_tsc(void) -{ - u64 start, end, diff; +static u64 calibrate_boot_tsc(void) +{ + u64 start, end; unsigned long count; /* Set the Gate high, disable speaker */ @@ -118,9 +148,9 @@ * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB) * to begin countdown. */ - outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ - outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ - outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ + outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */ + outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */ rdtscll(start); for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) @@ -131,15 +161,368 @@ if ( count == 0 ) return 0; - diff = end - start; - -#if defined(__i386__) - /* If quotient doesn't fit in 32 bits then we return error (zero). */ - if ( diff & ~0xffffffffULL ) + return ((end - start) * (u64)CALIBRATE_FRAC); +} + +static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec) +{ + u64 tps64 = ticks_per_sec; + u32 tps32; + int shift = 0; + + while ( tps64 > (MILLISECS(1000)*2) ) + { + tps64 >>= 1; + shift--; + } + + tps32 = (u32)tps64; + while ( tps32 < (u32)MILLISECS(1000) ) + { + tps32 <<= 1; + shift++; + } + + ts->mul_frac = div_frac(MILLISECS(1000), tps32); + ts->shift = shift; +} + +static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0); +static unsigned int tsc_calibrate_status = 0; + +void calibrate_tsc_bp(void) +{ + while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) ) + mb(); + + outb(CALIBRATE_LATCH & 0xff, PIT_CH2); + outb(CALIBRATE_LATCH >> 8, PIT_CH2); + + tsc_calibrate_status = 1; + wmb(); + + while ( (inb(0x61) & 0x20) == 0 ) + continue; + + tsc_calibrate_status = 2; + wmb(); + + while ( atomic_read(&tsc_calibrate_gang) != 0 ) + mb(); +} + +void calibrate_tsc_ap(void) +{ + u64 t1, t2, ticks_per_sec; + + atomic_inc(&tsc_calibrate_gang); + + while ( tsc_calibrate_status < 1 ) + mb(); + + rdtscll(t1); + + while ( tsc_calibrate_status < 2 ) + mb(); + + rdtscll(t2); + + ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC; + set_time_scale(&cpu_time[smp_processor_id()].tsc_scale, ticks_per_sec); + + atomic_dec(&tsc_calibrate_gang); +} + +static char *freq_string(u64 freq) +{ + static char s[20]; + unsigned int x, y; + y = (unsigned int)do_div(freq, 1000000) / 1000; + x = (unsigned int)freq; + sprintf(s, "%u.%03uMHz", x, y); + return s; +} + +/************************************************************ + * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT) + */ + +/* Protected by platform_timer_lock. */ +static u64 pit_counter64; +static u16 pit_stamp; +static struct ac_timer pit_overflow_timer; + +static u16 pit_read_counter(void) +{ + u16 count; + ASSERT(spin_is_locked(&platform_timer_lock)); + outb(0x80, PIT_MODE); + count = inb(PIT_CH2); + count |= inb(PIT_CH2) << 8; + return count; +} + +static void pit_overflow(void *unused) +{ + u16 counter; + + spin_lock(&platform_timer_lock); + counter = pit_read_counter(); + pit_counter64 += (u16)(pit_stamp - counter); + pit_stamp = counter; + spin_unlock(&platform_timer_lock); + + set_ac_timer(&pit_overflow_timer, NOW() + MILLISECS(20)); +} + +static u64 read_pit_count(void) +{ + return pit_counter64 + (u16)(pit_stamp - pit_read_counter()); +} + +static int init_pit(void) +{ + read_platform_count = read_pit_count; + + init_ac_timer(&pit_overflow_timer, pit_overflow, NULL, 0); + pit_overflow(NULL); + platform_timer_stamp = pit_counter64; + set_time_scale(&platform_timer_scale, CLOCK_TICK_RATE); + + printk("Platform timer is %s PIT\n", freq_string(CLOCK_TICK_RATE)); + + return 1; +} + +/************************************************************ + * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET) + */ + +/* Protected by platform_timer_lock. */ +static u64 hpet_counter64, hpet_overflow_period; +static u32 hpet_stamp; +static struct ac_timer hpet_overflow_timer; + +static void hpet_overflow(void *unused) +{ + u32 counter; + + spin_lock(&platform_timer_lock); + counter = hpet_read32(HPET_COUNTER); + hpet_counter64 += (u32)(counter - hpet_stamp); + hpet_stamp = counter; + spin_unlock(&platform_timer_lock); + + set_ac_timer(&hpet_overflow_timer, NOW() + hpet_overflow_period); +} + +static u64 read_hpet_count(void) +{ + return hpet_counter64 + (u32)(hpet_read32(HPET_COUNTER) - hpet_stamp); +} + +static int init_hpet(void) +{ + u64 hpet_rate; + u32 hpet_id, hpet_period, cfg; + int i; + + if ( (hpet_address == 0) && opt_hpet_force ) + { + outl(0x800038a0, 0xcf8); + outl(0xff000001, 0xcfc); + outl(0x800038a0, 0xcf8); + hpet_address = inl(0xcfc) & 0xfffffffe; + printk("WARNING: Forcibly enabled HPET at %#lx.\n", hpet_address); + } + + if ( hpet_address == 0 ) return 0; -#endif - - return (unsigned long)diff; + + set_fixmap_nocache(FIX_HPET_BASE, hpet_address); + + hpet_id = hpet_read32(HPET_ID); + if ( hpet_id == 0 ) + { + printk("BAD HPET vendor id.\n"); + return 0; + } + + /* Check for sane period (100ps <= period <= 100ns). */ + hpet_period = hpet_read32(HPET_PERIOD); + if ( (hpet_period > 100000000) || (hpet_period < 100000) ) + { + printk("BAD HPET period %u.\n", hpet_period); + return 0; + } + + cfg = hpet_read32(HPET_CFG); + cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); + hpet_write32(cfg, HPET_CFG); + + for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ ) + { + cfg = hpet_read32(HPET_T0_CFG + i*0x20); + cfg &= ~HPET_TN_ENABLE; + hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG); + } + + cfg = hpet_read32(HPET_CFG); + cfg |= HPET_CFG_ENABLE; + hpet_write32(cfg, HPET_CFG); + + read_platform_count = read_hpet_count; + + hpet_rate = 1000000000000000ULL; /* 10^15 */ + (void)do_div(hpet_rate, hpet_period); + set_time_scale(&platform_timer_scale, hpet_rate); + + /* Trigger overflow avoidance roughly when counter increments 2^31. */ + if ( (hpet_rate >> 31) != 0 ) + { + hpet_overflow_period = MILLISECS(1000); + (void)do_div(hpet_overflow_period, (u32)(hpet_rate >> 31) + 1); + } + else + { + hpet_overflow_period = MILLISECS(1000) << 31; + (void)do_div(hpet_overflow_period, (u32)hpet_rate); + } + + init_ac_timer(&hpet_overflow_timer, hpet_overflow, NULL, 0); + hpet_overflow(NULL); + platform_timer_stamp = hpet_counter64; + + printk("Platform timer is %s HPET\n", freq_string(hpet_rate)); + + return 1; +} + +/************************************************************ + * PLATFORM TIMER 3: IBM 'CYCLONE' TIMER + */ + +int use_cyclone; + +/* + * Although the counter is read via a 64-bit register, I believe it is actually + * a 40-bit counter. Since this will wrap, I read only the low 32 bits and + * periodically fold into a 64-bit software counter, just as for PIT and HPET. + */ +#define CYCLONE_CBAR_ADDR 0xFEB00CD0 +#define CYCLONE_PMCC_OFFSET 0x51A0 +#define CYCLONE_MPMC_OFFSET 0x51D0 +#define CYCLONE_MPCS_OFFSET 0x51A8 +#define CYCLONE_TIMER_FREQ 100000000 + +/* Protected by platform_timer_lock. */ +static u64 cyclone_counter64; +static u32 cyclone_stamp; +static struct ac_timer cyclone_overflow_timer; +static volatile u32 *cyclone_timer; /* Cyclone MPMC0 register */ + +static void cyclone_overflow(void *unused) +{ + u32 counter; + + spin_lock(&platform_timer_lock); + counter = *cyclone_timer; + cyclone_counter64 += (u32)(counter - cyclone_stamp); + cyclone_stamp = counter; + spin_unlock(&platform_timer_lock); + + set_ac_timer(&cyclone_overflow_timer, NOW() + MILLISECS(20000)); +} + +static u64 read_cyclone_count(void) +{ + return cyclone_counter64 + (u32)(*cyclone_timer - cyclone_stamp); +} + +static volatile u32 *map_cyclone_reg(unsigned long regaddr) +{ + unsigned long pageaddr = regaddr & PAGE_MASK; + unsigned long offset = regaddr & ~PAGE_MASK; + set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); + return (volatile u32 *)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); +} + +static int init_cyclone(void) +{ + u32 base; + + if ( !use_cyclone ) + return 0; + + /* Find base address. */ + base = *(map_cyclone_reg(CYCLONE_CBAR_ADDR)); + if ( base == 0 ) + { + printk(KERN_ERR "Cyclone: Could not find valid CBAR value.\n"); + return 0; + } + + /* Enable timer and map the counter register. */ + *(map_cyclone_reg(base + CYCLONE_PMCC_OFFSET)) = 1; + *(map_cyclone_reg(base + CYCLONE_MPCS_OFFSET)) = 1; + cyclone_timer = map_cyclone_reg(base + CYCLONE_MPMC_OFFSET); + + read_platform_count = read_cyclone_count; + + init_ac_timer(&cyclone_overflow_timer, cyclone_overflow, NULL, 0); + cyclone_overflow(NULL); + platform_timer_stamp = cyclone_counter64; + set_time_scale(&platform_timer_scale, CYCLONE_TIMER_FREQ); + + printk("Platform timer is %s IBM Cyclone\n", + freq_string(CYCLONE_TIMER_FREQ)); + + return 1; +} + +/************************************************************ + * GENERIC PLATFORM TIMER INFRASTRUCTURE + */ + +static s_time_t __read_platform_stime(u64 platform_time) +{ + u64 diff64 = platform_time - platform_timer_stamp; + u32 diff = down_shift(diff64, platform_timer_scale.shift); + ASSERT(spin_is_locked(&platform_timer_lock)); + return (stime_platform_stamp + + (u64)mul_frac(diff, platform_timer_scale.mul_frac)); +} + +static s_time_t read_platform_stime(void) +{ + u64 counter; + s_time_t stime; + + spin_lock(&platform_timer_lock); + counter = read_platform_count(); + stime = __read_platform_stime(counter); + spin_unlock(&platform_timer_lock); + + return stime; +} + +static void platform_time_calibration(void) +{ + u64 counter; + s_time_t stamp; + + spin_lock(&platform_timer_lock); + counter = read_platform_count(); + stamp = __read_platform_stime(counter); + stime_platform_stamp = stamp; + platform_timer_stamp = counter; + spin_unlock(&platform_timer_lock); +} + +static void init_platform_timer(void) +{ + if ( !init_cyclone() && !init_hpet() ) + BUG_ON(!init_pit()); } @@ -233,140 +616,226 @@ * System Time ***************************************************************************/ -static inline u64 get_time_delta(void) -{ - s32 delta_tsc; - u32 low; - u64 delta, tsc; - - ASSERT(st_scale_f || st_scale_i); +s_time_t get_s_time(void) +{ + struct cpu_time *t = &cpu_time[smp_processor_id()]; + u64 tsc; + u32 delta; + s_time_t now; rdtscll(tsc); - low = (u32)(tsc >> rdtsc_bitshift); - delta_tsc = (s32)(low - shifted_tsc_irq); - if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; - delta = ((u64)delta_tsc * st_scale_f); - delta >>= 32; - delta += ((u64)delta_tsc * st_scale_i); - - return delta; -} - -s_time_t get_s_time(void) -{ - s_time_t now; - unsigned long flags; - - read_lock_irqsave(&time_lock, flags); - - now = stime_irq + get_time_delta(); - - /* Ensure that the returned system time is monotonically increasing. */ - { - static s_time_t prev_now = 0; - if ( unlikely(now < prev_now) ) - now = prev_now; - prev_now = now; - } - - read_unlock_irqrestore(&time_lock, flags); - - return now; + delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift); + now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac); + + return now; } static inline void __update_dom_time(struct vcpu *v) { - struct domain *d = v->domain; - shared_info_t *si = d->shared_info; - - spin_lock(&d->time_lock); - - si->time_version1++; + struct cpu_time *t = &cpu_time[smp_processor_id()]; + struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id]; + + u->time_version1++; wmb(); - si->cpu_freq = cpu_freq; - si->tsc_timestamp = full_tsc_irq; - si->system_time = stime_irq; - si->wc_sec = wc_sec; - si->wc_usec = wc_usec; + u->tsc_timestamp = t->local_tsc_stamp; + u->system_time = t->stime_local_stamp; + u->tsc_to_system_mul = t->tsc_scale.mul_frac; + u->tsc_shift = (s8)t->tsc_scale.shift; wmb(); - si->time_version2++; - - spin_unlock(&d->time_lock); + u->time_version2++; + + /* Should only do this during do_settime(). */ + v->domain->shared_info->wc_sec = wc_sec; + v->domain->shared_info->wc_usec = wc_usec; } void update_dom_time(struct vcpu *v) { - unsigned long flags; - - if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq ) - { - read_lock_irqsave(&time_lock, flags); + if ( v->domain->shared_info->vcpu_time[v->vcpu_id].tsc_timestamp != + cpu_time[smp_processor_id()].local_tsc_stamp ) __update_dom_time(v); - read_unlock_irqrestore(&time_lock, flags); - } } /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) { - s64 delta; - long _usecs = (long)usecs; - - write_lock_irq(&time_lock); - - delta = (s64)(stime_irq - system_time_base); - - _usecs += (long)(delta/1000); - while ( _usecs >= 1000000 ) - { - _usecs -= 1000000; - secs++; - } - - wc_sec = secs; - wc_usec = _usecs; - - /* Others will pick up the change at the next tick. */ + u64 x, base_usecs; + u32 y; + + base_usecs = system_time_base; + do_div(base_usecs, 1000); + + x = (secs * 1000000ULL) + (u64)usecs + base_usecs; + y = do_div(x, 1000000); + + wc_sec = (unsigned long)x; + wc_usec = (unsigned long)y; + __update_dom_time(current); - send_guest_virq(current, VIRQ_TIMER); - - write_unlock_irq(&time_lock); -} - +} + +static void local_time_calibration(void *unused) +{ + unsigned int cpu = smp_processor_id(); + + /* + * System timestamps, extrapolated from local and master oscillators, + * taken during this calibration and the previous calibration. + */ + s_time_t prev_local_stime, curr_local_stime; + s_time_t prev_master_stime, curr_master_stime; + + /* TSC timestamps taken during this calibration and prev calibration. */ + u64 prev_tsc, curr_tsc; + + /* + * System time and TSC ticks elapsed during the previous calibration + * 'epoch'. These values are down-shifted to fit in 32 bits. + */ + u64 stime_elapsed64, tsc_elapsed64; + u32 stime_elapsed32, tsc_elapsed32; + + /* The accumulated error in the local estimate. */ + u64 local_stime_err; + + /* Error correction to slow down a fast local clock. */ + u32 error_factor = 0; + + /* Calculated TSC shift to ensure 32-bit scale multiplier. */ + int tsc_shift = 0; + + /* The overall calibration scale multiplier. */ + u32 calibration_mul_frac; + + prev_tsc = cpu_time[cpu].local_tsc_stamp; + prev_local_stime = cpu_time[cpu].stime_local_stamp; + prev_master_stime = cpu_time[cpu].stime_master_stamp; + + /* Disable IRQs to get 'instantaneous' current timestamps. */ + local_irq_disable(); + rdtscll(curr_tsc); + curr_local_stime = get_s_time(); + curr_master_stime = read_platform_stime(); + local_irq_enable(); + +#if 0 + printk("PRE%d: tsc=%lld stime=%lld master=%lld\n", + cpu, prev_tsc, prev_local_stime, prev_master_stime); + printk("CUR%d: tsc=%lld stime=%lld master=%lld -> %lld\n", + cpu, curr_tsc, curr_local_stime, curr_master_stime, + curr_master_stime - curr_local_stime); +#endif + + /* Local time warps forward if it lags behind master time. */ + if ( curr_local_stime < curr_master_stime ) + curr_local_stime = curr_master_stime; + + stime_elapsed64 = curr_master_stime - prev_master_stime; + tsc_elapsed64 = curr_tsc - prev_tsc; + + /* + * Calculate error-correction factor. This only slows down a fast local + * clock (slow clocks are warped forwards). The scale factor is clamped + * to >= 0.5. + */ + if ( curr_local_stime != curr_master_stime ) + { + local_stime_err = curr_local_stime - curr_master_stime; + if ( local_stime_err > EPOCH ) + local_stime_err = EPOCH; + error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err); + } + + /* + * We require 0 < stime_elapsed < 2^31. + * This allows us to binary shift a 32-bit tsc_elapsed such that: + * stime_elapsed < tsc_elapsed <= 2*stime_elapsed + */ + while ( ((u32)stime_elapsed64 != stime_elapsed64) || + ((s32)stime_elapsed64 < 0) ) + { + stime_elapsed64 >>= 1; + tsc_elapsed64 >>= 1; + } + + /* stime_master_diff now fits in a 32-bit word. */ + stime_elapsed32 = (u32)stime_elapsed64; + + /* tsc_elapsed <= 2*stime_elapsed */ + while ( tsc_elapsed64 > (stime_elapsed32 * 2) ) + { + tsc_elapsed64 >>= 1; + tsc_shift--; + } + + /* Local difference must now fit in 32 bits. */ + ASSERT((u32)tsc_elapsed64 == tsc_elapsed64); + tsc_elapsed32 = (u32)tsc_elapsed64; + + /* tsc_elapsed > stime_elapsed */ + ASSERT(tsc_elapsed32 != 0); + while ( tsc_elapsed32 <= stime_elapsed32 ) + { + tsc_elapsed32 <<= 1; + tsc_shift++; + } + + calibration_mul_frac = div_frac(stime_elapsed32, tsc_elapsed32); + if ( error_factor != 0 ) + calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor); + +#if 0 + printk("---%d: %08x %08x %d\n", cpu, + error_factor, calibration_mul_frac, tsc_shift); +#endif + + /* Record new timestamp information. */ + cpu_time[cpu].tsc_scale.mul_frac = calibration_mul_frac; + cpu_time[cpu].tsc_scale.shift = tsc_shift; + cpu_time[cpu].local_tsc_stamp = curr_tsc; + cpu_time[cpu].stime_local_stamp = curr_local_stime; + cpu_time[cpu].stime_master_stamp = curr_master_stime; + + set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH); + + if ( cpu == 0 ) + platform_time_calibration(); +} + +void init_percpu_time(void) +{ + unsigned int cpu = smp_processor_id(); + unsigned long flags; + s_time_t now; + + local_irq_save(flags); + rdtscll(cpu_time[cpu].local_tsc_stamp); + now = (cpu == 0) ? 0 : read_platform_stime(); + local_irq_restore(flags); + + cpu_time[cpu].stime_master_stamp = now; + cpu_time[cpu].stime_local_stamp = now; + + init_ac_timer(&cpu_time[cpu].calibration_timer, + local_time_calibration, NULL, cpu); + set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH); +} /* Late init function (after all CPUs are booted). */ -int __init init_xen_time() -{ - u64 scale; - unsigned int cpu_ghz; - - cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL); - for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 ) - continue; - - scale = 1000000000LL << (32 + rdtsc_bitshift); - scale /= cpu_freq; - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; +int __init init_xen_time(void) +{ + wc_sec = get_cmos_time(); local_irq_disable(); - /* System time ticks from zero. */ - rdtscll(full_tsc_irq); - stime_irq = (s_time_t)0; - shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift); - - /* Wallclock time starts as the initial RTC time. */ - wc_sec = get_cmos_time(); + init_percpu_time(); + + stime_platform_stamp = 0; + init_platform_timer(); local_irq_enable(); - - printk("Time init:\n"); - printk(".... cpu_freq: %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq); - printk(".... scale: %08X:%08X\n", (u32)(scale>>32),(u32)scale); - printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec); return 0; } @@ -375,15 +844,12 @@ /* Early init function. */ void __init early_time_init(void) { - unsigned long ticks_per_frac = calibrate_tsc(); - - if ( !ticks_per_frac ) - panic("Error calibrating TSC\n"); - - cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC); - - cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC; - + u64 tmp = calibrate_boot_tsc(); + + set_time_scale(&cpu_time[0].tsc_scale, tmp); + + do_div(tmp, 1000); + cpu_khz = (unsigned long)tmp; printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); diff -r a4196568095c -r b53a65034532 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/traps.c Fri Jul 29 20:25:03 2005 @@ -40,6 +40,7 @@ #include <xen/perfc.h> #include <xen/softirq.h> #include <xen/domain_page.h> +#include <xen/symbols.h> #include <asm/shadow.h> #include <asm/system.h> #include <asm/io.h> @@ -100,7 +101,7 @@ static int debug_stack_lines = 20; integer_param("debug_stack_lines", debug_stack_lines); -static inline int kernel_text_address(unsigned long addr) +int is_kernel_text(unsigned long addr) { extern char _stext, _etext; if (addr >= (unsigned long) &_stext && @@ -110,6 +111,12 @@ } +unsigned long kernel_text_end(void) +{ + extern char _etext; + return (unsigned long) &_etext; +} + void show_guest_stack(void) { int i; @@ -150,11 +157,12 @@ while ( ((long) stack & (STACK_SIZE-1)) != 0 ) { addr = *stack++; - if ( kernel_text_address(addr) ) + if ( is_kernel_text(addr) ) { if ( (i != 0) && ((i % 6) == 0) ) printk("\n "); - printk("[<%p>] ", _p(addr)); + printk("[<%p>]", _p(addr)); + print_symbol(" %s\n", addr); i++; } } @@ -177,10 +185,7 @@ if ( (i != 0) && ((i % 8) == 0) ) printk("\n "); addr = *stack++; - if ( kernel_text_address(addr) ) - printk("[%p] ", _p(addr)); - else - printk("%p ", _p(addr)); + printk("%p ", _p(addr)); } if ( i == 0 ) printk("Stack empty."); diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/vmx.c Fri Jul 29 20:25:03 2005 @@ -38,7 +38,7 @@ #include <asm/vmx_vmcs.h> #include <asm/vmx_intercept.h> #include <asm/shadow.h> -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 #include <asm/shadow_64.h> #endif @@ -94,12 +94,16 @@ msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \ break -#define CASE_WRITE_MSR(address) \ - case MSR_ ## address: \ - msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \ - if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)){ \ - set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \ - }\ +#define CASE_WRITE_MSR(address) \ + case MSR_ ## address: \ + { \ + msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \ + if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) { \ + set_bit(VMX_INDEX_MSR_ ## address, &msr->flags); \ + } \ + wrmsrl(MSR_ ## address, msr_content); \ + set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags); \ + } \ break #define IS_CANO_ADDRESS(add) 1 @@ -604,11 +608,6 @@ addr = (exit_qualification >> 16) & (0xffff); else addr = regs->edx & 0xffff; - - if (addr == 0x80) { - __update_guest_eip(inst_len); - return; - } vio = get_vio(d->domain, d->vcpu_id); if (vio == 0) { @@ -1261,6 +1260,7 @@ CASE_SET_REG(EBP, ebp); CASE_SET_REG(ESI, esi); CASE_SET_REG(EDI, edi); + CASE_EXTEND_SET_REG case REG_ESP: __vmwrite(GUEST_RSP, value); regs->esp = value; diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_intercept.c --- a/xen/arch/x86/vmx_intercept.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/vmx_intercept.c Fri Jul 29 20:25:03 2005 @@ -24,10 +24,10 @@ #include <asm/vmx_virpit.h> #include <asm/vmx_intercept.h> #include <public/io/ioreq.h> - #include <xen/lib.h> #include <xen/sched.h> #include <asm/current.h> +#include <io_ports.h> #ifdef CONFIG_VMX @@ -175,7 +175,7 @@ p->port_mm) return 0; - if (p->addr == 0x43 && + if (p->addr == PIT_MODE && p->dir == 0 && /* write */ ((p->u.data >> 4) & 0x3) == 0 && /* latch command */ ((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */ @@ -183,7 +183,7 @@ return 1; } - if (p->addr == (0x40 + vpit->channel) && + if (p->addr == (PIT_CH0 + vpit->channel) && p->dir == 1) { /* read */ p->u.data = pit_read_io(vpit); resume_pit_io(p); @@ -197,12 +197,23 @@ static void pit_timer_fn(void *data) { struct vmx_virpit_t *vpit = data; + s_time_t next; + int missed_ticks; + + missed_ticks = (NOW() - vpit->scheduled) / MILLISECS(vpit->period); /* Set the pending intr bit, and send evtchn notification to myself. */ if (test_and_set_bit(vpit->vector, vpit->intr_bitmap)) vpit->pending_intr_nr++; /* already set, then count the pending intr */ - set_ac_timer(&vpit->pit_timer, NOW() + MILLISECS(vpit->period)); + /* pick up missed timer tick */ + if ( missed_ticks > 0 ) { + vpit->pending_intr_nr+= missed_ticks; + vpit->scheduled += missed_ticks * MILLISECS(vpit->period); + } + next = vpit->scheduled + MILLISECS(vpit->period); + set_ac_timer(&vpit->pit_timer, next); + vpit->scheduled = next; } @@ -263,7 +274,8 @@ vpit->intr_bitmap = intr; - set_ac_timer(&vpit->pit_timer, NOW() + MILLISECS(vpit->period)); + vpit->scheduled = NOW() + MILLISECS(vpit->period); + set_ac_timer(&vpit->pit_timer, vpit->scheduled); /*restore the state*/ p->state = STATE_IORESP_READY; diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/vmx_io.c Fri Jul 29 20:25:03 2005 @@ -39,14 +39,20 @@ #ifdef CONFIG_VMX #if defined (__i386__) -static void load_cpu_user_regs(struct cpu_user_regs *regs) +void load_cpu_user_regs(struct cpu_user_regs *regs) { /* * Write the guest register value into VMCS */ __vmwrite(GUEST_SS_SELECTOR, regs->ss); __vmwrite(GUEST_RSP, regs->esp); + __vmwrite(GUEST_RFLAGS, regs->eflags); + if (regs->eflags & EF_TF) + __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + else + __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + __vmwrite(GUEST_CS_SELECTOR, regs->cs); __vmwrite(GUEST_RIP, regs->eip); } @@ -175,11 +181,17 @@ } } #else -static void load_cpu_user_regs(struct cpu_user_regs *regs) +void load_cpu_user_regs(struct cpu_user_regs *regs) { __vmwrite(GUEST_SS_SELECTOR, regs->ss); __vmwrite(GUEST_RSP, regs->rsp); + __vmwrite(GUEST_RFLAGS, regs->rflags); + if (regs->rflags & EF_TF) + __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + else + __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + __vmwrite(GUEST_CS_SELECTOR, regs->cs); __vmwrite(GUEST_RIP, regs->rip); } diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_platform.c --- a/xen/arch/x86/vmx_platform.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/vmx_platform.c Fri Jul 29 20:25:03 2005 @@ -32,7 +32,7 @@ #include <xen/lib.h> #include <xen/sched.h> #include <asm/current.h> -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 #include <asm/shadow_64.h> #endif #ifdef CONFIG_VMX diff -r a4196568095c -r b53a65034532 xen/arch/x86/vmx_vmcs.c --- a/xen/arch/x86/vmx_vmcs.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/vmx_vmcs.c Fri Jul 29 20:25:03 2005 @@ -59,9 +59,11 @@ free_xenheap_pages(vmcs, order); } -static inline int construct_vmcs_controls(void) +static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx) { int error = 0; + void *io_bitmap_a; + void *io_bitmap_b; error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, MONITOR_PIN_BASED_EXEC_CONTROLS); @@ -72,6 +74,20 @@ error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS); error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS); + + /* need to use 0x1000 instead of PAGE_SIZE */ + io_bitmap_a = (void*) alloc_xenheap_pages(get_order(0x1000)); + io_bitmap_b = (void*) alloc_xenheap_pages(get_order(0x1000)); + memset(io_bitmap_a, 0xff, 0x1000); + /* don't bother debug port access */ + clear_bit(PC_DEBUG_PORT, io_bitmap_a); + memset(io_bitmap_b, 0xff, 0x1000); + + error |= __vmwrite(IO_BITMAP_A, (u64) virt_to_phys(io_bitmap_a)); + error |= __vmwrite(IO_BITMAP_B, (u64) virt_to_phys(io_bitmap_b)); + + arch_vmx->io_bitmap_a = io_bitmap_a; + arch_vmx->io_bitmap_b = io_bitmap_b; return error; } @@ -190,10 +206,14 @@ vmx_setup_platform(v, regs); + __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory"); + host_env.idtr_limit = desc.size; + host_env.idtr_base = desc.address; + error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base); + __asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory"); host_env.gdtr_limit = desc.size; host_env.gdtr_base = desc.address; - error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base); error |= __vmwrite(GUEST_LDTR_SELECTOR, 0); @@ -351,7 +371,6 @@ { int error = 0; unsigned long crn; - struct Xgt_desc_struct desc; /* Host Selectors */ host_env->ds_selector = __HYPERVISOR_DS; @@ -377,14 +396,7 @@ host_env->ds_base = 0; host_env->cs_base = 0; -/* Debug */ - __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory"); - host_env->idtr_limit = desc.size; - host_env->idtr_base = desc.address; - error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base); - __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : ); - host_env->cr0 = crn; error |= __vmwrite(HOST_CR0, crn); /* same CR0 */ @@ -392,6 +404,7 @@ __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : ); host_env->cr4 = crn; error |= __vmwrite(HOST_CR4, crn); + error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler); #ifdef __x86_64__ /* TBD: support cr8 for 64-bit guest */ @@ -435,7 +448,7 @@ (unsigned long) vmcs_phys_ptr); return -EINVAL; } - if ((error = construct_vmcs_controls())) { + if ((error = construct_vmcs_controls(arch_vmx))) { printk("construct_vmcs: construct_vmcs_controls failed\n"); return -EINVAL; } @@ -455,6 +468,35 @@ printk("construct_vmcs: setting Exception bitmap failed\n"); return -EINVAL; } + + if (regs->eflags & EF_TF) + __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + else + __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + + return 0; +} + +/* + * modify guest eflags and execption bitmap for gdb + */ +int modify_vmcs(struct arch_vmx_struct *arch_vmx, + struct cpu_user_regs *regs) +{ + int error; + u64 vmcs_phys_ptr, old, old_phys_ptr; + vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs); + + old_phys_ptr = virt_to_phys(&old); + __vmptrst(old_phys_ptr); + if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) { + printk("modify_vmcs: load_vmcs failed: VMCS = %lx\n", + (unsigned long) vmcs_phys_ptr); + return -EINVAL; + } + load_cpu_user_regs(regs); + + __vmptrld(old_phys_ptr); return 0; } diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/x86_32/mm.c Fri Jul 29 20:25:03 2005 @@ -102,7 +102,7 @@ mpt_size = 4*1024*1024; for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { - if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL ) + if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL ) panic("Not enough memory to bootstrap Xen.\n"); idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i] = l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE); diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/x86_32/traps.c Fri Jul 29 20:25:03 2005 @@ -6,6 +6,7 @@ #include <xen/console.h> #include <xen/mm.h> #include <xen/irq.h> +#include <xen/symbols.h> #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/vmx.h> @@ -63,10 +64,10 @@ } } - printk("CPU: %d\nEIP: %04lx:[<%08lx>] \nEFLAGS: %08lx " - "CONTEXT: %s\n", - smp_processor_id(), (unsigned long)0xffff & regs->cs, - eip, eflags, context); + printk("CPU: %d\nEIP: %04lx:[<%08lx>]", + smp_processor_id(), (unsigned long)0xffff & regs->cs, eip); + print_symbol(" %s\n", eip); + printk("EFLAGS: %08lx CONTEXT: %s\n", eflags, context); printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n", regs->eax, regs->ebx, regs->ecx, regs->edx); printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n", @@ -119,8 +120,10 @@ /* Find information saved during fault and dump it to the console. */ tss = &init_tss[cpu]; - printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n", - cpu, tss->cs, tss->eip, tss->eflags); + printk("CPU: %d\nEIP: %04x:[<%08x>]", + cpu, tss->cs, tss->eip); + print_symbol(" %s\n", tss->eip); + printk("EFLAGS: %08x\n", tss->eflags); printk("CR3: %08x\n", tss->__cr3); printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n", tss->eax, tss->ebx, tss->ecx, tss->edx); diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/x86_64/entry.S Fri Jul 29 20:25:03 2005 @@ -587,6 +587,7 @@ .quad do_boot_vcpu .quad do_set_segment_base /* 25 */ .quad do_mmuext_op + .quad do_policy_op .rept NR_hypercalls-((.-hypercall_table)/4) .quad do_ni_hypercall .endr diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/x86_64/mm.c Fri Jul 29 20:25:03 2005 @@ -100,7 +100,7 @@ */ for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) ) { - pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER); + pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0); if ( pg == NULL ) panic("Not enough memory for m2p table\n"); map_pages_to_xen( diff -r a4196568095c -r b53a65034532 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Fri Jul 29 18:52:33 2005 +++ b/xen/arch/x86/x86_64/traps.c Fri Jul 29 20:25:03 2005 @@ -6,6 +6,7 @@ #include <xen/errno.h> #include <xen/mm.h> #include <xen/irq.h> +#include <xen/symbols.h> #include <xen/console.h> #include <xen/sched.h> #include <asm/current.h> @@ -14,8 +15,10 @@ void show_registers(struct cpu_user_regs *regs) { - printk("CPU: %d\nEIP: %04x:[<%016lx>] \nEFLAGS: %016lx\n", - smp_processor_id(), 0xffff & regs->cs, regs->rip, regs->eflags); + printk("CPU: %d\nEIP: %04x:[<%016lx>]", + smp_processor_id(), 0xffff & regs->cs, regs->rip); + print_symbol(" %s\n", regs->rip); + printk("EFLAGS: %016lx\n", regs->eflags); printk("rax: %016lx rbx: %016lx rcx: %016lx rdx: %016lx\n", regs->rax, regs->rbx, regs->rcx, regs->rdx); printk("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", diff -r a4196568095c -r b53a65034532 xen/common/ac_timer.c --- a/xen/common/ac_timer.c Fri Jul 29 18:52:33 2005 +++ b/xen/common/ac_timer.c Fri Jul 29 20:25:03 2005 @@ -202,7 +202,7 @@ do { heap = ac_timers[cpu].heap; now = NOW(); - + while ( (GET_HEAP_SIZE(heap) != 0) && ((t = heap[1])->expires < (now + TIMER_SLOP)) ) { diff -r a4196568095c -r b53a65034532 xen/common/dom_mem_ops.c --- a/xen/common/dom_mem_ops.c Fri Jul 29 18:52:33 2005 +++ b/xen/common/dom_mem_ops.c Fri Jul 29 20:25:03 2005 @@ -37,7 +37,8 @@ unsigned long *extent_list, unsigned long start_extent, unsigned int nr_extents, - unsigned int extent_order) + unsigned int extent_order, + unsigned int flags) { struct pfn_info *page; unsigned long i; @@ -56,7 +57,8 @@ { PREEMPT_CHECK(MEMOP_increase_reservation); - if ( unlikely((page = alloc_domheap_pages(d, extent_order)) == NULL) ) + if ( unlikely((page = alloc_domheap_pages(d, extent_order, + flags)) == NULL) ) { DPRINTK("Could not allocate a frame\n"); return i; @@ -131,10 +133,15 @@ { struct domain *d; unsigned long rc, start_extent; + unsigned int address_bits_order; /* Extract @start_extent from @op. */ start_extent = op >> START_EXTENT_SHIFT; op &= (1 << START_EXTENT_SHIFT) - 1; + + /* seperate extent_order and address_bits_order */ + address_bits_order = (extent_order >> 8) & 0xff; + extent_order &= 0xff; if ( unlikely(start_extent > nr_extents) ) return -EINVAL; @@ -150,7 +157,8 @@ { case MEMOP_increase_reservation: rc = alloc_dom_mem( - d, extent_list, start_extent, nr_extents, extent_order); + d, extent_list, start_extent, nr_extents, extent_order, + (address_bits_order <= 32) ? ALLOC_DOM_DMA : 0); break; case MEMOP_decrease_reservation: rc = free_dom_mem( diff -r a4196568095c -r b53a65034532 xen/common/domain.c --- a/xen/common/domain.c Fri Jul 29 18:52:33 2005 +++ b/xen/common/domain.c Fri Jul 29 20:25:03 2005 @@ -39,10 +39,8 @@ atomic_set(&d->refcnt, 1); atomic_set(&v->pausecnt, 0); - d->domain_id = dom_id; - v->processor = cpu; - - spin_lock_init(&d->time_lock); + d->domain_id = dom_id; + v->processor = cpu; spin_lock_init(&d->big_lock); diff -r a4196568095c -r b53a65034532 xen/common/grant_table.c --- a/xen/common/grant_table.c Fri Jul 29 18:52:33 2005 +++ b/xen/common/grant_table.c Fri Jul 29 20:25:03 2005 @@ -809,6 +809,146 @@ } #endif +static long +gnttab_donate(gnttab_donate_t *uop, unsigned int count) +{ + struct domain *d = current->domain; + struct domain *e; + struct pfn_info *page; + u32 _d, _nd, x, y; + int i; + int result = GNTST_okay; + + for (i = 0; i < count; i++) { + gnttab_donate_t *gop = &uop[i]; +#if GRANT_DEBUG + printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n", + i, gop->mfn, gop->domid, gop->handle); +#endif + page = &frame_table[gop->mfn]; + + if (unlikely(IS_XEN_HEAP_FRAME(page))) { + printk("gnttab_donate: xen heap frame mfn=%lx\n", (unsigned long) gop->mfn); + gop->status = GNTST_bad_virt_addr; + continue; + } + if (unlikely(!pfn_valid(page_to_pfn(page)))) { + printk("gnttab_donate: invalid pfn for mfn=%lx\n", (unsigned long) gop->mfn); + gop->status = GNTST_bad_virt_addr; + continue; + } + if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) { + printk("gnttab_donate: can't find domain %d\n", gop->domid); + gop->status = GNTST_bad_domain; + continue; + } + + spin_lock(&d->page_alloc_lock); + + /* + * The tricky bit: atomically release ownership while + * there is just one benign reference to the page + * (PGC_allocated). If that reference disappears then the + * deallocation routine will safely spin. + */ + _d = pickle_domptr(d); + _nd = page->u.inuse._domain; + y = page->count_info; + do { + x = y; + if (unlikely((x & (PGC_count_mask|PGC_allocated)) != + (1 | PGC_allocated)) || unlikely(_nd != _d)) { + printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p," + " caf=%08x, taf=%08x\n", (void *) page_to_pfn(page), + d, d->domain_id, unpickle_domptr(_nd), x, + page->u.inuse.type_info); + spin_unlock(&d->page_alloc_lock); + put_domain(e); + return 0; + } + __asm__ __volatile__( + LOCK_PREFIX "cmpxchg8b %2" + : "=d" (_nd), "=a" (y), + "=m" (*(volatile u64 *)(&page->count_info)) + : "0" (_d), "1" (x), "c" (NULL), "b" (x) ); + } while (unlikely(_nd != _d) || unlikely(y != x)); + + /* + * Unlink from 'd'. At least one reference remains (now + * anonymous), so noone else is spinning to try to delete + * this page from 'd'. + */ + d->tot_pages--; + list_del(&page->list); + + spin_unlock(&d->page_alloc_lock); + + spin_lock(&e->page_alloc_lock); + + /* + * Check that 'e' will accept the page and has reservation + * headroom. Also, a domain mustn't have PGC_allocated + * pages when it is dying. + */ +#ifdef GRANT_DEBUG + if (unlikely(e->tot_pages >= e->max_pages)) { + printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n", + e->tot_pages, e->max_pages); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } + if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) { + printk("gnttab_donate: target domain is dying\n"); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } + if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) { + printk("gnttab_donate: gnttab_prepare_for_transfer fails\n"); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } +#else + ASSERT(e->tot_pages <= e->max_pages); + if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) || + unlikely(e->tot_pages == e->max_pages) || + unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) { + printk("gnttab_donate: Transferee has no reservation headroom (%d,%d), or " + "provided a bad grant ref (%08x), or is dying (%p).\n", + e->tot_pages, e->max_pages, gop->handle, e->d_flags); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } +#endif + /* Okay, add the page to 'e'. */ + if (unlikely(e->tot_pages++ == 0)) { + get_knownalive_domain(e); + } + list_add_tail(&page->list, &e->page_list); + page_set_owner(page, e); + + spin_unlock(&e->page_alloc_lock); + + /* + * Transfer is all done: tell the guest about its new page + * frame. + */ + gnttab_notify_transfer(e, d, gop->handle, gop->mfn); + + put_domain(e); + + gop->status = GNTST_okay; + } + return result; +} + long do_grant_table_op( unsigned int cmd, void *uop, unsigned int count) @@ -843,6 +983,11 @@ rc = gnttab_dump_table((gnttab_dump_table_t *)uop); break; #endif + case GNTTABOP_donate: + if (unlikely(!array_access_ok(uop, count, sizeof(gnttab_donate_t)))) + goto out; + rc = gnttab_donate(uop, count); + break; default: rc = -ENOSYS; break; @@ -902,6 +1047,9 @@ for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) { map = &lgt->maptrack[handle]; + + if ( map->domid != rd->domain_id ) + continue; if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) && ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) diff -r a4196568095c -r b53a65034532 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Fri Jul 29 18:52:33 2005 +++ b/xen/common/page_alloc.c Fri Jul 29 20:25:03 2005 @@ -207,7 +207,13 @@ #define MEMZONE_XEN 0 #define MEMZONE_DOM 1 -#define NR_ZONES 2 +#define MEMZONE_DMADOM 2 +#define NR_ZONES 3 + + +#define MAX_DMADOM_PFN 0xFFFFF +#define pfn_dom_zone_type(_pfn) \ + (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM) /* Up to 2^20 pages can be allocated at once. */ #define MAX_ORDER 20 @@ -236,7 +242,7 @@ if ( next_free ) map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */ if ( curr_free ) - free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0); + free_heap_pages(pfn_dom_zone_type(i), pfn_to_page(i), 0); } } @@ -351,10 +357,10 @@ void scrub_heap_pages(void) { void *p; - unsigned long pfn, flags; + unsigned long pfn; + int cpu = smp_processor_id(); printk("Scrubbing Free RAM: "); - watchdog_disable(); for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ ) { @@ -362,12 +368,15 @@ if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 ) printk("."); + if ( unlikely(softirq_pending(cpu)) ) + do_softirq(); + /* Quick lock-free check. */ if ( allocated_in_map(pfn) ) continue; - - spin_lock_irqsave(&heap_lock, flags); - + + spin_lock_irq(&heap_lock); + /* Re-check page status with lock held. */ if ( !allocated_in_map(pfn) ) { @@ -385,11 +394,10 @@ unmap_domain_page(p); } } - - spin_unlock_irqrestore(&heap_lock, flags); - } - - watchdog_enable(); + + spin_unlock_irq(&heap_lock); + } + printk("done.\n"); } @@ -472,14 +480,21 @@ { ASSERT(!in_irq()); - ps = round_pgup(ps); - pe = round_pgdown(pe); - - init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT); -} - - -struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order) + ps = round_pgup(ps) >> PAGE_SHIFT; + pe = round_pgdown(pe) >> PAGE_SHIFT; + + if (ps < MAX_DMADOM_PFN && pe > MAX_DMADOM_PFN) { + init_heap_pages(MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps); + init_heap_pages(MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN), + pe - MAX_DMADOM_PFN); + } + else + init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps); +} + + +struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order, + unsigned int flags) { struct pfn_info *pg; cpumask_t mask; @@ -487,8 +502,13 @@ ASSERT(!in_irq()); - if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) ) - return NULL; + pg = NULL; + if (! (flags & ALLOC_DOM_DMA)) + pg = alloc_heap_pages(MEMZONE_DOM, order); + if (pg == NULL) { + if ( unlikely((pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL) ) + return NULL; + } mask = pg->u.free.cpumask; tlbflush_filter(mask, pg->tlbflush_timestamp); @@ -529,7 +549,7 @@ DPRINTK("...or the domain is dying (%d)\n", !!test_bit(_DOMF_dying, &d->domain_flags)); spin_unlock(&d->page_alloc_lock); - free_heap_pages(MEMZONE_DOM, pg, order); + free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order); return NULL; } @@ -594,7 +614,7 @@ if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) ) { - free_heap_pages(MEMZONE_DOM, pg, order); + free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order); } else { @@ -614,7 +634,7 @@ else { /* Freeing an anonymous domain-heap page. */ - free_heap_pages(MEMZONE_DOM, pg, order); + free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order); drop_dom_ref = 0; } @@ -625,7 +645,7 @@ unsigned long avail_domheap_pages(void) { - return avail[MEMZONE_DOM]; + return avail[MEMZONE_DOM] + avail[MEMZONE_DMADOM]; } @@ -674,7 +694,7 @@ p = map_domain_page(page_to_pfn(pg)); clear_page(p); unmap_domain_page(p); - free_heap_pages(MEMZONE_DOM, pg, 0); + free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, 0); } } while ( (NOW() - start) < MILLISECS(1) ); } diff -r a4196568095c -r b53a65034532 xen/common/policy_ops.c --- a/xen/common/policy_ops.c Fri Jul 29 18:52:33 2005 +++ b/xen/common/policy_ops.c Fri Jul 29 20:25:03 2005 @@ -36,11 +36,6 @@ } #else - -/* function prototypes defined in acm/acm_policy.c */ -int acm_set_policy(void *buf, u16 buf_size, u16 policy); -int acm_get_policy(void *buf, u16 buf_size); -int acm_dump_statistics(void *buf, u16 buf_size); typedef enum policyoperation { POLICY, /* access to policy interface (early drop) */ @@ -89,7 +84,8 @@ ret = acm_set_policy( op->u.setpolicy.pushcache, op->u.setpolicy.pushcache_size, - op->u.setpolicy.policy_type); + op->u.setpolicy.policy_type, + 1); if (ret == ACM_OK) ret = 0; else diff -r a4196568095c -r b53a65034532 xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Fri Jul 29 18:52:33 2005 +++ b/xen/common/sched_sedf.c Fri Jul 29 20:25:03 2005 @@ -609,15 +609,16 @@ inf->score[EXTRA_UTIL_Q] = (inf->period << 10) / inf->slice; else - /*give a domain w/ exweight = 1 as much as a domain with - util = 1/128*/ + /*conversion between realtime utilisation and extrawieght: + full (ie 100%) utilization is equivalent to 128 extraweight*/ inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight; } check_extra_queues: /* Adding a runnable domain to the right queue and removing blocked ones*/ if (sedf_runnable(d)) { /*add according to score: weighted round robin*/ - if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q)) + if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) || + ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q))) extraq_add_sort_update(d, i, oldscore); } else { @@ -627,12 +628,9 @@ /*make sure that we remove a blocked domain from the other extraq too*/ if (i == EXTRA_PEN_Q) { - if (extraq_on(d, EXTRA_UTIL_Q)) - extraq_del(d, EXTRA_UTIL_Q); - } - else { - if (extraq_on(d, EXTRA_PEN_Q)) - extraq_del(d, EXTRA_PEN_Q); + if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q); + } else { + if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q); } #endif } @@ -668,7 +666,8 @@ if (!list_empty(extraq[EXTRA_UTIL_Q])) { /*use elements from the normal extraqueue*/ runinf = list_entry(extraq[EXTRA_UTIL_Q]->next, - struct sedf_vcpu_info, extralist[EXTRA_UTIL_Q]); + struct sedf_vcpu_info, + extralist[EXTRA_UTIL_Q]); runinf->status |= EXTRA_RUN_UTIL; ret.task = runinf->vcpu; ret.time = EXTRA_QUANTUM; @@ -943,8 +942,7 @@ inf->status |= EXTRA_WANT_PEN_Q; /*(re-)add domain to the penalty extraq*/ - extraq_add_sort_update(inf->vcpu, - EXTRA_PEN_Q, 0); + extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0); } } /*give it a fresh slice in the next period!*/ @@ -1119,7 +1117,8 @@ s_time_t now = NOW(); struct sedf_vcpu_info* inf = EDOM_INFO(d); - PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id); + PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, + d->vcpu_id); if (unlikely(is_idle_task(d->domain))) return; @@ -1145,7 +1144,7 @@ inf->block_tot++; #endif if (unlikely(now < PERIOD_BEGIN(inf))) { - PRINT(4,"extratime unblock\n"); + PRINT(4,"extratime unblock\n"); /* unblocking in extra-time! */ #if (EXTRA == EXTRA_BLOCK_WEIGHT) if (inf->status & EXTRA_WANT_PEN_Q) { @@ -1226,6 +1225,9 @@ /*check whether the awakened task needs to invoke the do_schedule routine. Try to avoid unnecessary runs but: Save approximation: Always switch to scheduler!*/ + ASSERT(d->processor >= 0); + ASSERT(d->processor < NR_CPUS); + ASSERT(schedule_data[d->processor].curr); if (should_switch(schedule_data[d->processor].curr, d, now)) cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); } diff -r a4196568095c -r b53a65034532 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Fri Jul 29 18:52:33 2005 +++ b/xen/drivers/char/console.c Fri Jul 29 20:25:03 2005 @@ -635,8 +635,6 @@ debugtrace_bytes = bytes; - memset(debugtrace_buf, '\0', debugtrace_bytes); - return 0; } __initcall(debugtrace_init); diff -r a4196568095c -r b53a65034532 xen/include/acm/acm_core.h --- a/xen/include/acm/acm_core.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/acm/acm_core.h Fri Jul 29 20:25:03 2005 @@ -113,6 +113,9 @@ /* protos */ int acm_init_domain_ssid(domid_t id, ssidref_t ssidref); int acm_free_domain_ssid(struct acm_ssid_domain *ssid); +int acm_set_policy(void *buf, u16 buf_size, u16 policy, int isuserbuffer); +int acm_get_policy(void *buf, u16 buf_size); +int acm_dump_statistics(void *buf, u16 buf_size); #endif diff -r a4196568095c -r b53a65034532 xen/include/acm/acm_hooks.h --- a/xen/include/acm/acm_hooks.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/acm/acm_hooks.h Fri Jul 29 20:25:03 2005 @@ -24,6 +24,7 @@ #include <xen/lib.h> #include <xen/delay.h> #include <xen/sched.h> +#include <xen/multiboot.h> #include <public/acm.h> #include <acm/acm_core.h> #include <public/dom0_ops.h> @@ -136,7 +137,9 @@ { return 0; } static inline int acm_pre_grant_setup(domid_t id) { return 0; } -static inline int acm_init(void) +static inline int acm_init(unsigned int *initrdidx, + const multiboot_info_t *mbi, + unsigned long start) { return 0; } static inline void acm_post_domain0_create(domid_t domid) { return; } @@ -337,7 +340,9 @@ acm_post_domain_create(domid, ACM_DOM0_SSIDREF); } -extern int acm_init(void); +extern int acm_init(unsigned int *initrdidx, + const multiboot_info_t *mbi, + unsigned long start); #endif diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/apicdef.h --- a/xen/include/asm-x86/apicdef.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/apicdef.h Fri Jul 29 20:25:03 2005 @@ -108,10 +108,11 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -#ifdef CONFIG_NUMA - #define MAX_IO_APICS 32 +/* These limits are dictated by ES7000 hardware. */ +#ifdef __i386__ + #define MAX_IO_APICS 65 #else - #define MAX_IO_APICS 8 + #define MAX_IO_APICS 129 #endif /* diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/config.h --- a/xen/include/asm-x86/config.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/config.h Fri Jul 29 20:25:03 2005 @@ -23,6 +23,7 @@ #define CONFIG_X86_LOCAL_APIC 1 #define CONFIG_X86_GOOD_APIC 1 #define CONFIG_X86_IO_APIC 1 +#define CONFIG_HPET_TIMER 1 /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */ #define CONFIG_X86_L1_CACHE_SHIFT 7 diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/fixmap.h --- a/xen/include/asm-x86/fixmap.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/fixmap.h Fri Jul 29 20:25:03 2005 @@ -30,6 +30,8 @@ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, FIX_ACPI_BEGIN, FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, + FIX_HPET_BASE, + FIX_CYCLONE_TIMER, __end_of_fixed_addresses }; diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/genapic.h --- a/xen/include/asm-x86/genapic.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/genapic.h Fri Jul 29 20:25:03 2005 @@ -30,7 +30,6 @@ unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); unsigned long (*check_apicid_present)(int apicid); int no_balance_irq; - int no_ioapic_check; void (*init_apic_ldr)(void); physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); @@ -78,7 +77,6 @@ .int_delivery_mode = INT_DELIVERY_MODE, \ .int_dest_mode = INT_DEST_MODE, \ .no_balance_irq = NO_BALANCE_IRQ, \ - .no_ioapic_check = NO_IOAPIC_CHECK, \ .ESR_DISABLE = esr_disable, \ .apic_destination_logical = APIC_DEST_LOGICAL, \ APICFUNC(apic_id_registered), \ diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-bigsmp/mach_apic.h --- a/xen/include/asm-x86/mach-bigsmp/mach_apic.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/mach-bigsmp/mach_apic.h Fri Jul 29 20:25:03 2005 @@ -13,8 +13,6 @@ #define NO_BALANCE_IRQ (1) #define esr_disable (1) - -#define NO_IOAPIC_CHECK (0) static inline int apic_id_registered(void) { diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-default/mach_apic.h --- a/xen/include/asm-x86/mach-default/mach_apic.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/mach-default/mach_apic.h Fri Jul 29 20:25:03 2005 @@ -18,8 +18,6 @@ #define NO_BALANCE_IRQ (0) #define esr_disable (0) - -#define NO_IOAPIC_CHECK (0) #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-es7000/mach_apic.h --- a/xen/include/asm-x86/mach-es7000/mach_apic.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/mach-es7000/mach_apic.h Fri Jul 29 20:25:03 2005 @@ -37,8 +37,6 @@ #define APIC_DEST_LOGICAL 0x0 #define WAKE_SECONDARY_VIA_INIT #endif - -#define NO_IOAPIC_CHECK (1) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-generic/mach_apic.h --- a/xen/include/asm-x86/mach-generic/mach_apic.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/mach-generic/mach_apic.h Fri Jul 29 20:25:03 2005 @@ -5,7 +5,6 @@ #define esr_disable (genapic->ESR_DISABLE) #define NO_BALANCE_IRQ (genapic->no_balance_irq) -#define NO_IOAPIC_CHECK (genapic->no_ioapic_check) #define INT_DELIVERY_MODE (genapic->int_delivery_mode) #define INT_DEST_MODE (genapic->int_dest_mode) #undef APIC_DEST_LOGICAL diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-summit/mach_apic.h --- a/xen/include/asm-x86/mach-summit/mach_apic.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/mach-summit/mach_apic.h Fri Jul 29 20:25:03 2005 @@ -6,8 +6,6 @@ #define esr_disable (1) #define NO_BALANCE_IRQ (0) - -#define NO_IOAPIC_CHECK (1) /* Don't check I/O APIC ID for xAPIC */ /* In clustered mode, the high nibble of APIC ID is a cluster number. * The low nibble is a 4-bit bitmap. */ diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/mach-summit/mach_mpparse.h --- a/xen/include/asm-x86/mach-summit/mach_mpparse.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/mach-summit/mach_mpparse.h Fri Jul 29 20:25:03 2005 @@ -30,7 +30,7 @@ (!strncmp(productid, "VIGIL SMP", 9) || !strncmp(productid, "EXA", 3) || !strncmp(productid, "RUTHLESS SMP", 12))){ - /*use_cyclone = 1;*/ /*enable cyclone-timer*/ + use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); /*usb_early_handoff = 1;*/ return 1; @@ -44,7 +44,7 @@ if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) || !strncmp(oem_table_id, "EXA", 3))){ - /*use_cyclone = 1;*/ /*enable cyclone-timer*/ + use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); /*usb_early_handoff = 1;*/ return 1; diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/shadow.h Fri Jul 29 20:25:03 2005 @@ -131,12 +131,12 @@ unsigned long pa, l2_pgentry_t l2e, struct domain_mmap_cache *cache); #if CONFIG_PAGING_LEVELS >= 3 +#include <asm/page-guest32.h> extern void shadow_l3_normal_pt_update(struct domain *d, unsigned long pa, l3_pgentry_t l3e, struct domain_mmap_cache *cache); #endif #if CONFIG_PAGING_LEVELS >= 4 -#include <asm/page-guest32.h> extern void shadow_l4_normal_pt_update(struct domain *d, unsigned long pa, l4_pgentry_t l4e, struct domain_mmap_cache *cache); @@ -631,82 +631,6 @@ } #endif -#if CONFIG_PAGING_LEVELS == 3 -/* dummy functions, PAE has no shadow support yet */ - -static inline void -__shadow_get_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e) -{ - BUG(); -} - -static inline void -__shadow_set_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t value) -{ - BUG(); -} - -static inline void -__guest_get_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e) -{ - BUG(); -} - -static inline void -__guest_set_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t value) -{ - BUG(); -} - -static inline void shadow_drop_references( - struct domain *d, struct pfn_info *page) -{ - if ( likely(!shadow_mode_refcounts(d)) || - ((page->u.inuse.type_info & PGT_count_mask) == 0) ) - return; - BUG(); -} - -static inline void shadow_sync_and_drop_references( - struct domain *d, struct pfn_info *page) -{ - if ( likely(!shadow_mode_refcounts(d)) ) - return; - BUG(); -} - -static inline int l1pte_write_fault( - struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p, - unsigned long va) -{ - BUG(); - return 42; -} - -static inline int l1pte_read_fault( - struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p) -{ - BUG(); - return 42; -} - -void static inline -shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow) -{ - BUG(); -} - -static inline unsigned long gva_to_gpa(unsigned long gva) -{ - BUG(); - return 42; -} -#endif - /************************************************************************/ /* @@ -1691,8 +1615,10 @@ /************************************************************************/ static inline int -shadow_mode_page_writable(struct domain *d, unsigned long gpfn) -{ +shadow_mode_page_writable(unsigned long va, struct cpu_user_regs *regs, unsigned long gpfn) +{ + struct vcpu *v = current; + struct domain *d = v->domain; unsigned long mfn = __gpfn_to_mfn(d, gpfn); u32 type = frame_table[mfn].u.inuse.type_info & PGT_type_mask; @@ -1701,11 +1627,14 @@ type = shadow_max_pgtable_type(d, gpfn, NULL); if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && - (type == PGT_l1_page_table) ) + (type == PGT_l1_page_table) && + (va < HYPERVISOR_VIRT_START) && + KERNEL_MODE(v, regs) ) return 1; if ( shadow_mode_write_all(d) && - type && (type <= PGT_l4_page_table) ) + type && (type <= PGT_l4_page_table) && + KERNEL_MODE(v, regs) ) return 1; return 0; diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/shadow_64.h Fri Jul 29 20:25:03 2005 @@ -85,8 +85,10 @@ return (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)); case 3: return (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)); +#if CONFIG_PAGING_LEVELS >= 4 case 4: return (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)); +#endif default: //printk("<table_offset_64> level %d is too big\n", level); return -1; diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/shadow_public.h --- a/xen/include/asm-x86/shadow_public.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/shadow_public.h Fri Jul 29 20:25:03 2005 @@ -21,7 +21,7 @@ #ifndef _XEN_SHADOW_PUBLIC_H #define _XEN_SHADOW_PUBLIC_H -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned) extern int alloc_p2m_table(struct domain *d); @@ -30,10 +30,6 @@ struct domain *d, struct pfn_info *page); extern void shadow_drop_references( struct domain *d, struct pfn_info *page); - -extern void shadow_l4_normal_pt_update(struct domain *d, - unsigned long pa, l4_pgentry_t l4e, - struct domain_mmap_cache *cache); extern int shadow_set_guest_paging_levels(struct domain *d, int levels); @@ -56,4 +52,10 @@ }; #endif +#if CONFIG_PAGING_LEVELS >= 4 +extern void shadow_l4_normal_pt_update(struct domain *d, + unsigned long pa, l4_pgentry_t l4e, + struct domain_mmap_cache *cache); #endif + +#endif diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/time.h --- a/xen/include/asm-x86/time.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/time.h Fri Jul 29 20:25:03 2005 @@ -4,4 +4,7 @@ extern int timer_ack; +extern void calibrate_tsc_bp(void); +extern void calibrate_tsc_ap(void); + #endif /* __X86_TIME_H__ */ diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/types.h --- a/xen/include/asm-x86/types.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/types.h Fri Jul 29 20:25:03 2005 @@ -1,10 +1,9 @@ #ifndef __X86_TYPES_H__ #define __X86_TYPES_H__ -/* - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the - * header files exported to user space - */ +#ifndef __ASSEMBLY__ + +#include <xen/config.h> typedef __signed__ char __s8; typedef unsigned char __u8; @@ -25,8 +24,6 @@ #endif #endif -#include <xen/config.h> - typedef signed char s8; typedef unsigned char u8; @@ -39,9 +36,6 @@ #if defined(__i386__) typedef signed long long s64; typedef unsigned long long u64; -#define BITS_PER_LONG 32 -#define BYTES_PER_LONG 4 -#define LONG_BYTEORDER 2 #if defined(CONFIG_X86_PAE) typedef u64 physaddr_t; #else @@ -50,12 +44,21 @@ #elif defined(__x86_64__) typedef signed long s64; typedef unsigned long u64; -#define BITS_PER_LONG 64 -#define BYTES_PER_LONG 8 -#define LONG_BYTEORDER 3 typedef u64 physaddr_t; #endif typedef unsigned long size_t; +#endif /* __ASSEMBLY__ */ + +#if defined(__i386__) +#define BITS_PER_LONG 32 +#define BYTES_PER_LONG 4 +#define LONG_BYTEORDER 2 +#elif defined(__x86_64__) +#define BITS_PER_LONG 64 +#define BYTES_PER_LONG 8 +#define LONG_BYTEORDER 3 +#endif + #endif /* __X86_TYPES_H__ */ diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/vmx.h --- a/xen/include/asm-x86/vmx.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/vmx.h Fri Jul 29 20:25:03 2005 @@ -61,6 +61,7 @@ CPU_BASED_INVDPG_EXITING | \ CPU_BASED_MWAIT_EXITING | \ CPU_BASED_MOV_DR_EXITING | \ + CPU_BASED_ACTIVATE_IO_BITMAP | \ CPU_BASED_UNCOND_IO_EXITING \ ) @@ -447,4 +448,8 @@ return get_sp(d)->sp_global.eport; } +/* Prototypes */ +void load_cpu_user_regs(struct cpu_user_regs *regs); +void store_cpu_user_regs(struct cpu_user_regs *regs); + #endif /* __ASM_X86_VMX_H__ */ diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/vmx_virpit.h --- a/xen/include/asm-x86/vmx_virpit.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/vmx_virpit.h Fri Jul 29 20:25:03 2005 @@ -19,6 +19,7 @@ /* for simulation of counter 0 in mode 2*/ int vector; /* the pit irq vector */ unsigned int period; /* the frequency. e.g. 10ms*/ + s_time_t scheduled; /* scheduled timer interrupt */ unsigned int channel; /* the pit channel, counter 0~2 */ u64 *intr_bitmap; unsigned int pending_intr_nr; /* the couner for pending timer interrupts */ diff -r a4196568095c -r b53a65034532 xen/include/asm-x86/vmx_vmcs.h --- a/xen/include/asm-x86/vmx_vmcs.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/asm-x86/vmx_vmcs.h Fri Jul 29 20:25:03 2005 @@ -69,6 +69,8 @@ unsigned long shadow_gs; }; +#define PC_DEBUG_PORT 0x80 + struct arch_vmx_struct { struct vmcs_struct *vmcs; /* VMCS pointer in virtual */ unsigned long flags; /* VMCS flags */ @@ -76,6 +78,7 @@ unsigned long cpu_cr3; unsigned long cpu_state; struct msr_state msr_content; + void *io_bitmap_a, *io_bitmap_b; }; #define vmx_schedule_tail(next) \ @@ -97,6 +100,8 @@ int store_vmcs(struct arch_vmx_struct *, u64); int construct_vmcs(struct arch_vmx_struct *, struct cpu_user_regs *, struct vcpu_guest_context *, int); +int modify_vmcs(struct arch_vmx_struct *arch_vmx, + struct cpu_user_regs *regs); #define VMCS_USE_HOST_ENV 1 #define VMCS_USE_SEPARATE_ENV 0 diff -r a4196568095c -r b53a65034532 xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/public/grant_table.h Fri Jul 29 20:25:03 2005 @@ -213,6 +213,19 @@ s16 status; /* GNTST_* */ } gnttab_dump_table_t; +/* + * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain. The + * foreign domain has previously registered the details of the transfer. + * These can be identified from <handle>, a grant reference. + */ +#define GNTTABOP_donate 4 +typedef struct { + memory_t mfn; /* 0 */ + domid_t domid; /* 4 */ + u16 handle; /* 8 */ + s16 status; /* 10: GNTST_* */ + u32 __pad; +} gnttab_donate_t; /* 14 bytes */ /* * Bitfield values for update_pin_status.flags. diff -r a4196568095c -r b53a65034532 xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/public/io/netif.h Fri Jul 29 20:25:03 2005 @@ -23,10 +23,17 @@ typedef struct { u16 id; /* Echoed in response message. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + grant_ref_t gref; /* 2: Reference to incoming granted frame */ +#endif } netif_rx_request_t; typedef struct { +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + u32 addr; /* 0: Offset in page of start of received packet */ +#else memory_t addr; /* Machine address of packet. */ +#endif u16 csum_valid:1; /* Protocol checksum is validated? */ u16 id:15; s16 status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ diff -r a4196568095c -r b53a65034532 xen/include/public/xen.h --- a/xen/include/public/xen.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/public/xen.h Fri Jul 29 20:25:03 2005 @@ -329,12 +329,36 @@ #endif } vcpu_info_t; +typedef struct vcpu_time_info { + /* + * The following values are updated periodically (and not necessarily + * atomically!). The guest OS detects this because 'time_version1' is + * incremented just before updating these values, and 'time_version2' is + * incremented immediately after. See the Xen-specific Linux code for an + * example of how to read these values safely (arch/xen/kernel/time.c). + */ + u32 time_version1; + u32 time_version2; + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + u32 tsc_to_system_mul; + s8 tsc_shift; +} vcpu_time_info_t; + /* * Xen/kernel shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. */ typedef struct shared_info { vcpu_info_t vcpu_data[MAX_VIRT_CPUS]; + + vcpu_time_info_t vcpu_time[MAX_VIRT_CPUS]; u32 n_vcpu; @@ -373,33 +397,11 @@ u32 evtchn_mask[32]; /* - * Time: The following abstractions are exposed: System Time, Clock Time, - * Domain Virtual Time. Domains can access Cycle counter time directly. + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. */ - u64 cpu_freq; /* CPU frequency (Hz). */ - - /* - * The following values are updated periodically (and not necessarily - * atomically!). The guest OS detects this because 'time_version1' is - * incremented just before updating these values, and 'time_version2' is - * incremented immediately after. See the Xen-specific Linux code for an - * example of how to read these values safely (arch/xen/kernel/time.c). - */ - u32 time_version1; - u32 time_version2; - tsc_timestamp_t tsc_timestamp; /* TSC at last update of time vals. */ - u64 system_time; /* Time, in nanosecs, since boot. */ u32 wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ u32 wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */ - u64 domain_time; /* Domain virtual time, in nanosecs. */ - - /* - * Timeout values: - * Allow a domain to specify a timeout value in system time and - * domain virtual time. - */ - u64 wall_timeout; - u64 domain_timeout; arch_shared_info_t arch; @@ -444,7 +446,7 @@ memory_t mod_start; /* VIRTUAL address of pre-loaded module. */ memory_t mod_len; /* Size (bytes) of pre-loaded module. */ s8 cmd_line[MAX_GUEST_CMDLINE]; - memory_t store_page; /* VIRTUAL address of store page. */ + memory_t store_mfn; /* MACHINE page number of shared page. */ u16 store_evtchn; /* Event channel for store communication. */ } start_info_t; diff -r a4196568095c -r b53a65034532 xen/include/xen/mm.h --- a/xen/include/xen/mm.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/xen/mm.h Fri Jul 29 20:25:03 2005 @@ -33,11 +33,14 @@ /* Domain suballocator. These functions are *not* interrupt-safe.*/ void init_domheap_pages(physaddr_t ps, physaddr_t pe); -struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order); +struct pfn_info *alloc_domheap_pages( + struct domain *d, unsigned int order, unsigned int flags); void free_domheap_pages(struct pfn_info *pg, unsigned int order); unsigned long avail_domheap_pages(void); -#define alloc_domheap_page(d) (alloc_domheap_pages(d,0)) +#define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0)) #define free_domheap_page(p) (free_domheap_pages(p,0)) + +#define ALLOC_DOM_DMA 1 /* Automatic page scrubbing for dead domains. */ extern struct list_head page_scrub_list; diff -r a4196568095c -r b53a65034532 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/xen/sched.h Fri Jul 29 20:25:03 2005 @@ -92,7 +92,6 @@ domid_t domain_id; shared_info_t *shared_info; /* shared data area */ - spinlock_t time_lock; spinlock_t big_lock; diff -r a4196568095c -r b53a65034532 xen/include/xen/time.h --- a/xen/include/xen/time.h Fri Jul 29 18:52:33 2005 +++ b/xen/include/xen/time.h Fri Jul 29 20:25:03 2005 @@ -30,7 +30,8 @@ #include <public/xen.h> #include <asm/time.h> -extern int init_xen_time(); +extern int init_xen_time(void); +extern void init_percpu_time(void); extern unsigned long cpu_khz; diff -r a4196568095c -r b53a65034532 xen/tools/Makefile --- a/xen/tools/Makefile Fri Jul 29 18:52:33 2005 +++ b/xen/tools/Makefile Fri Jul 29 20:25:03 2005 @@ -1,6 +1,13 @@ + +include $(BASEDIR)/../Config.mk default: $(MAKE) -C figlet + $(MAKE) symbols clean: - $(MAKE) -C figlet clean \ No newline at end of file + $(MAKE) -C figlet clean + rm -f *.o symbols + +symbols: symbols.c + $(HOSTCC) -o $@ $< diff -r a4196568095c -r b53a65034532 linux-2.6-xen-sparse/kernel/ptrace.c --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/linux-2.6-xen-sparse/kernel/ptrace.c Fri Jul 29 20:25:03 2005 @@ -0,0 +1,391 @@ +/* + * linux/kernel/ptrace.c + * + * (C) Copyright 1999 Linus Torvalds + * + * Common interfaces for "ptrace()" which we do not want + * to continually duplicate across every architecture. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> +#include <linux/ptrace.h> +#include <linux/security.h> +#include <linux/signal.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> + +/* + * ptrace a task: make the debugger its new parent and + * move it to the ptrace list. + * + * Must be called with the tasklist lock write-held. + */ +void __ptrace_link(task_t *child, task_t *new_parent) +{ + if (!list_empty(&child->ptrace_list)) + BUG(); + if (child->parent == new_parent) + return; + list_add(&child->ptrace_list, &child->parent->ptrace_children); + REMOVE_LINKS(child); + child->parent = new_parent; + SET_LINKS(child); +} + +/* + * Turn a tracing stop into a normal stop now, since with no tracer there + * would be no way to wake it up with SIGCONT or SIGKILL. If there was a + * signal sent that would resume the child, but didn't because it was in + * TASK_TRACED, resume it now. + * Requires that irqs be disabled. + */ +void ptrace_untrace(task_t *child) +{ + spin_lock(&child->sighand->siglock); + if (child->state == TASK_TRACED) { + if (child->signal->flags & SIGNAL_STOP_STOPPED) { + child->state = TASK_STOPPED; + } else { + signal_wake_up(child, 1); + } + } + spin_unlock(&child->sighand->siglock); +} + +/* + * unptrace a task: move it back to its original parent and + * remove it from the ptrace list. + * + * Must be called with the tasklist lock write-held. + */ +void __ptrace_unlink(task_t *child) +{ + if (!child->ptrace) + BUG(); + child->ptrace = 0; + if (!list_empty(&child->ptrace_list)) { + list_del_init(&child->ptrace_list); + REMOVE_LINKS(child); + child->parent = child->real_parent; + SET_LINKS(child); + } + + if (child->state == TASK_TRACED) + ptrace_untrace(child); +} + +/* + * Check that we have indeed attached to the thing.. + */ +int ptrace_check_attach(struct task_struct *child, int kill) +{ + int ret = -ESRCH; + + /* + * We take the read lock around doing both checks to close a + * possible race where someone else was tracing our child and + * detached between these two checks. After this locked check, + * we are sure that this is our traced child and that can only + * be changed by us so it's not changing right after this. + */ + read_lock(&tasklist_lock); + if ((child->ptrace & PT_PTRACED) && child->parent == current && + (!(child->ptrace & PT_ATTACHED) || child->real_parent != current) + && child->signal != NULL) { + ret = 0; + spin_lock_irq(&child->sighand->siglock); + if (child->state == TASK_STOPPED) { + child->state = TASK_TRACED; + } else if (child->state != TASK_TRACED && !kill) { + ret = -ESRCH; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); + + if (!ret && !kill) { + wait_task_inactive(child); + } + + /* All systems go.. */ + return ret; +} + +int ptrace_attach(struct task_struct *task) +{ + int retval; + task_lock(task); + retval = -EPERM; + if (task->pid <= 1) + goto bad; + if (task == current) + goto bad; + if (!task->mm) + goto bad; + if(((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + goto bad; + smp_rmb(); + if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + goto bad; + /* the same process cannot be attached many times */ + if (task->ptrace & PT_PTRACED) + goto bad; + retval = security_ptrace(current, task); + if (retval) + goto bad; + + /* Go */ + task->ptrace |= PT_PTRACED | ((task->real_parent != current) + ? PT_ATTACHED : 0); + if (capable(CAP_SYS_PTRACE)) + task->ptrace |= PT_PTRACE_CAP; + task_unlock(task); + + write_lock_irq(&tasklist_lock); + __ptrace_link(task, current); + write_unlock_irq(&tasklist_lock); + + force_sig_specific(SIGSTOP, task); + return 0; + +bad: + task_unlock(task); + return retval; +} + +int ptrace_detach(struct task_struct *child, unsigned int data) +{ + if (!valid_signal(data)) + return -EIO; + + /* Architecture-specific hardware disable .. */ + ptrace_disable(child); + + /* .. re-parent .. */ + child->exit_code = data; + + write_lock_irq(&tasklist_lock); + __ptrace_unlink(child); + /* .. and wake it up. */ + if (child->exit_state != EXIT_ZOMBIE) + wake_up_process(child); + write_unlock_irq(&tasklist_lock); + + return 0; +} + +/* + * Access another process' address space. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ + +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct page *page; + void *old_buf = buf; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + /* ignore errors, just check how much was sucessfully transfered */ + while (len) { + int bytes, ret, offset; + void *maddr; + + ret = get_user_pages(tsk, mm, addr, 1, + write, 1, &page, &vma); + if (ret <= 0) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + page_cache_release(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + up_read(&mm->mmap_sem); + mmput(mm); + + return buf - old_buf; +} +EXPORT_SYMBOL(access_process_vm); + +int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) +{ + int copied = 0; + + while (len > 0) { + char buf[128]; + int this_len, retval; + + this_len = (len > sizeof(buf)) ? sizeof(buf) : len; + retval = access_process_vm(tsk, src, buf, this_len, 0); + if (!retval) { + if (copied) + break; + return -EIO; + } + if (copy_to_user(dst, buf, retval)) + return -EFAULT; + copied += retval; + src += retval; + dst += retval; + len -= retval; + } + return copied; +} + +int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len) +{ + int copied = 0; + + while (len > 0) { + char buf[128]; + int this_len, retval; + + this_len = (len > sizeof(buf)) ? sizeof(buf) : len; + if (copy_from_user(buf, src, this_len)) + return -EFAULT; + retval = access_process_vm(tsk, dst, buf, this_len, 1); + if (!retval) { + if (copied) + break; + return -EIO; + } + copied += retval; + src += retval; + dst += retval; + len -= retval; + } + return copied; +} + +static int ptrace_setoptions(struct task_struct *child, long data) +{ + child->ptrace &= ~PT_TRACE_MASK; + + if (data & PTRACE_O_TRACESYSGOOD) + child->ptrace |= PT_TRACESYSGOOD; + + if (data & PTRACE_O_TRACEFORK) + child->ptrace |= PT_TRACE_FORK; + + if (data & PTRACE_O_TRACEVFORK) + child->ptrace |= PT_TRACE_VFORK; + + if (data & PTRACE_O_TRACECLONE) + child->ptrace |= PT_TRACE_CLONE; + + if (data & PTRACE_O_TRACEEXEC) + child->ptrace |= PT_TRACE_EXEC; + + if (data & PTRACE_O_TRACEVFORKDONE) + child->ptrace |= PT_TRACE_VFORK_DONE; + + if (data & PTRACE_O_TRACEEXIT) + child->ptrace |= PT_TRACE_EXIT; + + return (data & ~PTRACE_O_MASK) ? -EINVAL : 0; +} + +static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data) +{ + siginfo_t lastinfo; + int error = -ESRCH; + + read_lock(&tasklist_lock); + if (likely(child->sighand != NULL)) { + error = -EINVAL; + spin_lock_irq(&child->sighand->siglock); + if (likely(child->last_siginfo != NULL)) { + lastinfo = *child->last_siginfo; + error = 0; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); + if (!error) + return copy_siginfo_to_user(data, &lastinfo); + return error; +} + +static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data) +{ + siginfo_t newinfo; + int error = -ESRCH; + + if (copy_from_user(&newinfo, data, sizeof (siginfo_t))) + return -EFAULT; + + read_lock(&tasklist_lock); + if (likely(child->sighand != NULL)) { + error = -EINVAL; + spin_lock_irq(&child->sighand->siglock); + if (likely(child->last_siginfo != NULL)) { + *child->last_siginfo = newinfo; + error = 0; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); + return error; +} + +int ptrace_request(struct task_struct *child, long request, + long addr, long data) +{ + int ret = -EIO; + + switch (request) { +#ifdef PTRACE_OLDSETOPTIONS + case PTRACE_OLDSETOPTIONS: +#endif + case PTRACE_SETOPTIONS: + ret = ptrace_setoptions(child, data); + break; + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned long __user *) data); + break; + case PTRACE_GETSIGINFO: + ret = ptrace_getsiginfo(child, (siginfo_t __user *) data); + break; + case PTRACE_SETSIGINFO: + ret = ptrace_setsiginfo(child, (siginfo_t __user *) data); + break; + default: + break; + } + + return ret; +} diff -r a4196568095c -r b53a65034532 tools/debugger/pdb/linux-2.6-module/pdb_debug.h --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Fri Jul 29 20:25:03 2005 @@ -0,0 +1,46 @@ + +#ifndef __PDB_DEBUG_H_ +#define __PDB_DEBUG_H_ + +/* debugger.c */ +void pdb_initialize_bwcpoint (void); +int pdb_suspend (struct task_struct *target); +int pdb_resume (struct task_struct *target); +int pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op); +int pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op); +int pdb_read_memory (struct task_struct *target, pdb_op_rd_mem_req_p req, + pdb_op_rd_mem_resp_p resp); +int pdb_write_memory (struct task_struct *target, pdb_op_wr_mem_p op); +int pdb_access_memory (struct task_struct *target, unsigned long address, + void *buffer, int length, int write); +int pdb_continue (struct task_struct *target); +int pdb_step (struct task_struct *target); + +int pdb_insert_memory_breakpoint (struct task_struct *target, + memory_t address, u32 length); +int pdb_remove_memory_breakpoint (struct task_struct *target, + memory_t address, u32 length); + +int pdb_exceptions_notify (struct notifier_block *self, unsigned long val, + void *data); + +int pdb_debug_fn (struct pt_regs *regs, long error_code, + unsigned int condition); +int pdb_int3_fn (struct pt_regs *regs, long error_code); + +/* module.c */ +void pdb_send_response (pdb_response_t *response); + +#endif + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/CreateDomain.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/CreateDomain.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,163 @@ +from xen.sv.Wizard import * +from xen.sv.util import * +from xen.sv.GenTabbed import PreTab + +from xen.xm.create import make_config, OptVals + +from xen.xend.XendClient import server + +class CreateDomain( Wizard ): + def __init__( self, urlWriter ): + + sheets = [ CreatePage0, + CreatePage1, + CreatePage2, + CreatePage3, + CreatePage4, + CreateFinish ] + + Wizard.__init__( self, urlWriter, "Create Domain", sheets ) + +class CreatePage0( Sheet ): + + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "General", 0 ) + self.addControl( InputControl( 'name', 'VM Name', 'VM Name:', "[\\w|\\S]+", "You must enter a name in this field" ) ) + self.addControl( InputControl( 'memory', '64', 'Memory (Mb):', "[\\d]+", "You must enter a number in this field" ) ) + self.addControl( InputControl( 'cpu', '0', 'CPU:', "[\\d]+", "You must enter a number in this feild" ) ) + self.addControl( InputControl( 'cpu_weight', '1', 'CPU Weight:', "[\\d]+", "You must enter a number in this feild" ) ) + +class CreatePage1( Sheet ): + + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "Setup Kernel Image", 1 ) +# For now we don't need to select a builder... +# self.addControl( ListControl( 'builder', [('linux', 'Linux'), ('netbsd', 'NetBSD')], 'Kernel Type:' ) ) + self.addControl( FileControl( 'kernel', '/boot/vmlinuz-2.6.9-xenU', 'Kernel Image:' ) ) + self.addControl( InputControl( 'extra', '', 'Kernel Command Line Parameters:' ) ) + +class CreatePage2( Sheet ): + + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 2 ) + self.addControl( InputControl( 'num_vbds', '1', 'Number of VBDs:', '[\\d]+', "You must enter a number in this field" ) ) + +class CreatePage3( Sheet ): + + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 3 ) + + def write_BODY( self, request, err ): + if not self.passback: self.parseForm( request ) + + previous_values = sxp2hash( string2sxp( self.passback ) ) #get the hash for quick reference + + num_vbds = previous_values.get( 'num_vbds' ) + + for i in range( int( num_vbds ) ): + self.addControl( InputControl( 'vbd%s_dom0' % i, 'phy:sda%s' % str(i + 1), 'Device %s name:' % i ) ) + self.addControl( InputControl( 'vbd%s_domU' % i, 'sda%s' % str(i + 1), 'Virtualized device %s:' % i ) ) + self.addControl( ListControl( 'vbd%s_mode' % i, [('w', 'Read + Write'), ('r', 'Read Only')], 'Device %s mode:' % i ) ) + + self.addControl( InputControl( 'root', '/dev/sda1', 'Root device (in VM):' ) ) + + Sheet.write_BODY( self, request, err ) + +class CreatePage4( Sheet ): + + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "Network settings", 4 ) + self.addControl( ListControl( 'dhcp', [('off', 'No'), ('dhcp', 'Yes')], 'Use DHCP:' ) ) + self.addControl( InputControl( 'hostname', 'hostname', 'VM Hostname:' ) ) + self.addControl( InputControl( 'ip_addr', '1.2.3.4', 'VM IP Address:' ) ) + self.addControl( InputControl( 'ip_subnet', '255.255.255.0', 'VM Subnet Mask:' ) ) + self.addControl( InputControl( 'ip_gateway', '1.2.3.4', 'VM Gateway:' ) ) + self.addControl( InputControl( 'ip_nfs', '1.2.3.4', 'NFS Server:' ) ) + +class CreateFinish( Sheet ): + + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "All Done", 5 ) + + def write_BODY( self, request, err ): + + if not self.passback: self.parseForm( request ) + + xend_sxp = self.translate_sxp( string2sxp( self.passback ) ) + + try: + dom_sxp = server.xend_domain_create( xend_sxp ) + success = "Your domain was successfully created.\n" + except: + success = "There was an error creating your domain.\nThe configuration used is as follows:\n" + dom_sxp = xend_sxp + + + + pt = PreTab( success + sxp2prettystring( dom_sxp ) ) + pt.write_BODY( request ) + + request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback ) + request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location ) + + def translate_sxp( self, fin_sxp ): + fin_hash = ssxp2hash( fin_sxp ) + + def get( key ): + ret = fin_hash.get( key ) + if ret: + return ret + else: + return "" + + vals = OptVals() + + vals.name = get( 'name' ) + vals.memory = get( 'memory' ) + vals.maxmem = get( 'maxmem' ) + vals.cpu = get( 'cpu' ) + vals.cpu_weight = get( 'cpu_weight' ) + + vals.builder = get( 'builder' ) + vals.kernel = get( 'kernel' ) + vals.root = get( 'root' ) + vals.extra = get( 'extra' ) + + #setup vbds + + vbds = [] + + for i in range( int( get( 'num_vbds' ) ) ): + vbds.append( ( get( 'vbd%s_dom0' % i ), get('vbd%s_domU' % i ), get( 'vbd%s_mode' % i ) ) ) + + vals.disk = vbds + + #misc + + vals.pci = [] + + vals.blkif = None + vals.netif = None + vals.restart = None + vals.console = None + vals.ramdisk = None + + #setup vifs + + vals.vif = [] + vals.nics = 1 + + ip = get( 'ip_addr' ) + nfs = get( 'ip_nfs' ) + gate = get( 'ip_gateway' ) + mask = get( 'ip_subnet' ) + host = get( 'hostname' ) + dhcp = get( 'dhcp' ) + + vals.cmdline_ip = "%s:%s:%s:%s:%s:eth0:%s" % (ip, nfs, gate, mask, host, dhcp) + + try: + return make_config( vals ) + except: + return [["Error creating domain config."]] + diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/Daemon.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/Daemon.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,110 @@ +########################################################### +## XenSV Web Control Interface Daemon +## Copyright (C) 2004, K A Fraser (University of Cambridge) +## Copyright (C) 2004, Mike Wray <mike.wray@xxxxxx> +## Copyright (C) 2004, Tom Wilkie <tw275@xxxxxxxxx> +########################################################### + +import os +import os.path +import sys +import re + +from xen.sv.params import * + +from twisted.internet import reactor +from twisted.web import static, server, script + +from xen.util.ip import _readline, _readlines + +class Daemon: + """The xend daemon. + """ + def __init__(self): + self.shutdown = 0 + self.traceon = 0 + + def daemon_pids(self): + pids = [] + pidex = '(?P<pid>\d+)' + pythonex = '(?P<python>\S*python\S*)' + cmdex = '(?P<cmd>.*)' + procre = re.compile('^\s*' + pidex + '\s*' + pythonex + '\s*' + cmdex + '$') + xendre = re.compile('^/usr/sbin/xend\s*(start|restart)\s*.*$') + procs = os.popen('ps -e -o pid,args 2>/dev/null') + for proc in procs: + pm = procre.match(proc) + if not pm: continue + xm = xendre.match(pm.group('cmd')) + if not xm: continue + #print 'pid=', pm.group('pid'), 'cmd=', pm.group('cmd') + pids.append(int(pm.group('pid'))) + return pids + + def new_cleanup(self, kill=0): + err = 0 + pids = self.daemon_pids() + if kill: + for pid in pids: + print "Killing daemon pid=%d" % pid + os.kill(pid, signal.SIGHUP) + elif pids: + err = 1 + print "Daemon already running: ", pids + return err + + def cleanup(self, kill=False): + # No cleanup to do if PID_FILE is empty. + if not os.path.isfile(PID_FILE) or not os.path.getsize(PID_FILE): + return 0 + # Read the pid of the previous invocation and search active process list. + pid = open(PID_FILE, 'r').read() + lines = _readlines(os.popen('ps ' + pid + ' 2>/dev/null')) + for line in lines: + if re.search('^ *' + pid + '.+xensv', line): + if not kill: + print "Daemon is already running (pid %d)" % int(pid) + return 1 + # Old daemon is still active: terminate it. + os.kill(int(pid), 1) + # Delete the stale PID_FILE. + os.remove(PID_FILE) + return 0 + + def start(self, trace=0): + if self.cleanup(kill=False): + return 1 + + # Fork -- parent writes PID_FILE and exits. + pid = os.fork() + if pid: + # Parent + pidfile = open(PID_FILE, 'w') + pidfile.write(str(pid)) + pidfile.close() + return 0 + # Child + self.run() + return 0 + + def stop(self): + return self.cleanup(kill=True) + + def run(self): + root = static.File( SV_ROOT ) + root.indexNames = [ 'Main.rpy' ] + root.processors = { '.rpy': script.ResourceScript } + reactor.listenTCP( SV_PORT, server.Site( root ) ) + reactor.run() + + def exit(self): + reactor.disconnectAll() + sys.exit(0) + +def instance(): + global inst + try: + inst + except: + inst = Daemon() + return inst diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/DomInfo.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/DomInfo.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,148 @@ +from xen.xend.XendClient import server +from xen.xend import PrettyPrint + +from xen.sv.HTMLBase import HTMLBase +from xen.sv.util import * +from xen.sv.GenTabbed import * + +DEBUG=1 + +class DomInfo( GenTabbed ): + + def __init__( self, urlWriter ): + + self.dom = 0; + + def tabUrlWriter( tab ): + return urlWriter( "&dom=%s%s" % ( self.dom, tab ) ) + + GenTabbed.__init__( self, "Domain Info", tabUrlWriter, [ 'General', 'SXP', 'Devices' ], [ DomGeneralTab, DomSXPTab, NullTab ] ) + + def write_BODY( self, request ): + dom = request.args.get('dom') + + if dom is None or len(dom) != 1: + request.write( "<p>Please Select a Domain</p>" ) + return None + else: + self.dom = dom[0] + + GenTabbed.write_BODY( self, request ) + + def write_MENU( self, request ): + pass + +class DomGeneralTab( CompositeTab ): + def __init__( self ): + CompositeTab.__init__( self, [ DomGenTab, DomActionTab ] ) + +class DomGenTab( GeneralTab ): + + def __init__( self ): + + titles = {} + + titles[ 'ID' ] = 'dom' + titles[ 'Name' ] = 'name' + titles[ 'CPU' ] = 'cpu' + titles[ 'Memory' ] = ( 'mem', memoryFormatter ) + titles[ 'State' ] = ( 'state', stateFormatter ) + titles[ 'Total CPU' ] = ( 'cpu_time', smallTimeFormatter ) + titles[ 'Up Time' ] = ( 'up_time', bigTimeFormatter ) + + GeneralTab.__init__( self, {}, titles ) + + def write_BODY( self, request ): + + self.dom = getVar('dom', request) + + if self.dom is None: + request.write( "<p>Please Select a Domain</p>" ) + return None + + self.dict = getDomInfoHash( self.dom ) + + GeneralTab.write_BODY( self, request ) + +class DomSXPTab( PreTab ): + + def __init__( self ): + self.dom = 0 + PreTab.__init__( self, "" ) + + + def write_BODY( self, request ): + self.dom = getVar('dom', request) + + if self.dom is None: + request.write( "<p>Please Select a Domain</p>" ) + return None + + try: + domInfo = server.xend_domain( self.dom ) + except: + domInfo = [["Error getting domain details."]] + + self.source = sxp2prettystring( domInfo ) + + PreTab.write_BODY( self, request ) + +class DomActionTab( ActionTab ): + + def __init__( self ): + actions = { "shutdown" : "shutdown", + "reboot" : "reboot", + "pause" : "pause", + "unpause" : "unpause", + "destroy" : "destroy" } + ActionTab.__init__( self, actions ) + + def op_shutdown( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != '0': + if DEBUG: print ">DomShutDown %s" % dom + try: + server.xend_domain_shutdown( int( dom ), "halt" ) + except: + pass + + def op_reboot( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != '0': + if DEBUG: print ">DomReboot %s" % dom + try: + server.xend_domain_shutdown( int( dom ), "reboot" ) + except: + pass + + def op_pause( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != '0': + if DEBUG: print ">DomPause %s" % dom + try: + server.xend_domain_pause( int( dom ) ) + except: + pass + + def op_unpause( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != '0': + if DEBUG: print ">DomUnpause %s" % dom + try: + server.xend_domain_unpause( int( dom ) ) + except: + pass + + def op_destroy( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != '0': + if DEBUG: print ">DomDestroy %s" % dom + try: + server.xend_domain_destroy( int( dom ), "halt" ) + except: + pass + + + + + diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/DomList.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/DomList.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,81 @@ +from xen.xend.XendClient import server +from xen.xend import sxp + +from xen.sv.HTMLBase import HTMLBase +from xen.sv.util import * + +class DomList( HTMLBase ): + + isLeaf = True + + def __init__( self, urlWriter ): + HTMLBase.__init__(self) + self.urlWriter = urlWriter + + def write_MENU( self, request ): + return self.write_BODY( request, head=True, long=False ) + + def write_BODY( self, request, head=True, long=True ): + + domains = [] + + try: + domains = server.xend_domains() + domains.sort() + except: + pass + + request.write( "\n<table style='border:0px solid white' cellspacing='0' cellpadding='0' border='0' width='100%'>\n" ) + + if head: + request.write( "<tr class='domainInfoHead'>" ) + self.write_DOMAIN_HEAD( request, long ) + request.write( "</tr>" ) + + odd = True + + if not domains is None: + for domain in domains: + if odd: + request.write( "<tr class='domainInfoOdd'>\n" ) + odd = False + else: + request.write( "<tr class='domainInfoEven'>\n" ) + odd = True + self.write_DOMAIN( request, getDomInfoHash( domain ), long ) + request.write( "</tr>\n" ) + else: + request.write( "<tr colspan='10'><p class='small'>Error getting domain list<br/>Perhaps XenD not running?</p></tr>") + + request.write( "</table>\n" ) + + def write_DOMAIN( self, request, domInfoHash, long=True ): + request.write( "<td class='domainInfo' align='center'>%(id)s</td>\n" % domInfoHash ) + + url = self.urlWriter( "&mod=info&dom=%(id)s" % domInfoHash ) + + request.write( "<td class='domainInfo' align='center'><a href='%s'>%s</a></td>\n" % ( url, domInfoHash['name'] ) ) + if long: + request.write( "<td class='domainInfo' align='center'>%(memory)5s</td>\n" % domInfoHash ) + request.write( "<td class='domainInfo' align='center'>%(cpu)2s</td>\n" % domInfoHash ) + request.write( "<td class='domainInfo' align='center'>%(state)5s</td>\n" % domInfoHash ) + if domInfoHash[ 'id' ] != "0": + request.write( "<td class='domainInfo' align='center'>" ) + + if domInfoHash[ 'state' ][ 2 ] == "-": + request.write( "<img src='images/small-pause.png' onclick='doOp2( \"pause\", \"%(dom)-4s\" )'>" % domInfoHash ) + else: + request.write( "<img src='images/small-unpause.png' onclick='doOp2( \"unpause\", \"%(dom)-4s\" )'>" % domInfoHash ) + + request.write( "<img src='images/small-destroy.png' onclick='doOp2( \"destroy\", \"%(dom)-4s\" )'></td>" % domInfoHash) + else: + request.write( "<td> </td>" ) + + def write_DOMAIN_HEAD( self, request, long=True ): + request.write( "<td class='domainInfoHead' align='center'>Domain</td>\n" ) + request.write( "<td class='domainInfoHead' align='center'>Name</td>\n" ) + if long: + request.write( "<td class='domainInfoHead' align='center'>Memory / Mb</td>\n" ) + request.write( "<td class='domainInfoHead' align='center'>CPU</td>\n" ) + request.write( "<td class='domainInfoHead' align='center'>State</td>\n" ) + request.write( "<td class='domainInfoHead' align='center'></td>\n" ) diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/GenTabbed.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/GenTabbed.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,135 @@ +import types + +from xen.sv.HTMLBase import HTMLBase +from xen.sv.TabView import TabView +from xen.sv.util import getVar + +class GenTabbed( HTMLBase ): + + def __init__( self, title, urlWriter, tabStrings, tabObjects ): + HTMLBase.__init__(self) + self.tabStrings = tabStrings + self.tabObjects = tabObjects + self.urlWriter = urlWriter + self.title = title + + def write_BODY( self, request, urlWriter = None ): + try: + tab = int( getVar( 'tab', request, 0 ) ) + except: + tab = 0 + + request.write( "<table style='' width='100%' border='0' cellspacing='0' cellpadding='0'>" ) + request.write( "<tr><td>" ) + request.write( "<p align='center'><u>%s</u></p>" % self.title ) + + TabView( tab, self.tabStrings, self.urlWriter ).write_BODY( request ) + + request.write( "</td></tr><tr><td>" ) + + try: + render_tab = self.tabObjects[ tab ] + render_tab().write_BODY( request ) + except: + request.write( "<p>Error Rendering Tab</p>" ) + + request.write( "</td></tr></table>" ) + + def perform( self, request ): + try: + tab = int( getVar( 'tab', request, 0 ) ) + except: + tab = 0; + + op_tab = self.tabObjects[ tab ] + + if op_tab: + op_tab().perform( request ) + +class PreTab( HTMLBase ): + + def __init__( self, source ): + HTMLBase.__init__( self ) + self.source = source + + def write_BODY( self, request ): + + request.write( "<div style='display: block; overflow: auto; border: 0px solid black; width: 540px; padding: 5px; z-index:0; align: center'><pre>" ) + + request.write( self.source ) + + request.write( "</pre></div>" ) + +class GeneralTab( HTMLBase ): + + def __init__( self, dict, titles ): + HTMLBase.__init__( self ) + self.dict = dict + self.titles = titles + + def write_BODY( self, request ): + + request.write( "<table width='100%' cellspacing='0' cellpadding='0' border='0'>" ) + + def writeAttr( niceName, attr, formatter=None ): + if type( attr ) is types.TupleType: + ( attr, formatter ) = attr + + if attr in self.dict: + if formatter: + temp = formatter( self.dict[ attr ] ) + else: + temp = str( self.dict[ attr ] ) + request.write( "<tr><td width='50%%'><p>%s:</p></td><td width='50%%'><p>%s</p></td></tr>" % ( niceName, temp ) ) + + for niceName, attr in self.titles.items(): + writeAttr( niceName, attr ) + + request.write( "</table>" ) + +class NullTab( HTMLBase ): + + def __init__( self ): + HTMLBase.__init__( self ) + self.title = "Null Tab" + + def __init__( self, title ): + HTMLBase.__init__( self ) + self.title = title + + def write_BODY( self, request ): + request.write( "<p>%s</p>" % self.title ) + +class ActionTab( HTMLBase ): + + def __init__( self, actions ): + self.actions = actions + HTMLBase.__init__( self ) + + def write_BODY( self, request ): + request.write( "<p align='center'><table cellspacing='3' cellpadding='2' border='0'><tr>" ) + + for ( command, text ) in self.actions.items(): + request.write( "<td style='border: 1px solid black; background-color: grey' onmouseover='buttonMouseOver( this )' onmouseout='buttonMouseOut( this )'>" ) + request.write( "<p><a href='javascript: doOp( \"%s\" );'>%s</a></p></td>" % (command, text) ) + + request.write("</table></p>") + +class CompositeTab( HTMLBase ): + + def __init__( self, tabs ): + HTMLBase.__init__( self ) + self.tabs = tabs + + def write_BODY( self, request ): + for tab in self.tabs: + request.write( "<br/>" ) + tab().write_BODY( request ) + + def perform( self, request ): + for tab in self.tabs: + tab().perform( request ) + + + + diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/HTMLBase.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/HTMLBase.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,62 @@ +from xen.sv.util import * + +class HTMLBase: + + isLeaf = True + + def __init__( self ): + pass + + def render_POST( self, request ): + self.perform( request ) + return self.render_GET( request ) + + def render_GET( self, request ): + self.write_TOP( request ) + self.write_BODY( request ) + self.write_BOTTOM( request ) + return '' + + def write_BODY( self, request ): + request.write( "BODY" ) + + def write_TOP( self, request ): + request.write( '<html><head><title>Xen</title><link rel="stylesheet" type="text/css" href="inc/style.css" />' ) + request.write( '<script src="inc/script.js"></script>' ) + request.write( '</head><body>' ) + request.write('<form method="post" action="%s">' % request.uri) + + def write_BOTTOM( self, request ): + request.write('<input type="hidden" name="op" value="">') + request.write('<input type="hidden" name="args" value="">') + request.write('</form>') + request.write( "</body></html>" ) + + def get_op_method(self, op): + """Get the method for an operation. + For operation 'foo' looks for 'op_foo'. + + op operation name + returns method or None + """ + op_method_name = 'op_' + op + return getattr(self, op_method_name, None) + + def perform(self, req): + """General operation handler for posted operations. + For operation 'foo' looks for a method op_foo and calls + it with op_foo(req). Replies with code 500 if op_foo + is not found. + + The method must return a list when req.use_sxp is true + and an HTML string otherwise (or list). + Methods may also return a Deferred (for incomplete processing). + + req request + """ + op = req.args.get('op') + if not op is None and len(op) == 1: + op = op[0] + op_method = self.get_op_method(op) + if op_method: + op_method( req ) diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/Main.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/Main.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,113 @@ +from xen.sv.HTMLBase import HTMLBase +from xen.sv.DomList import DomList +from xen.sv.NodeInfo import NodeInfo +from xen.sv.DomInfo import DomInfo +from xen.sv.CreateDomain import CreateDomain +from xen.sv.MigrateDomain import MigrateDomain +from xen.sv.SaveDomain import SaveDomain +from xen.sv.RestoreDomain import RestoreDomain + +from xen.xend.XendClient import server + +from xen.sv.util import getVar + +class Main( HTMLBase ): + + isLeaf = True + + def __init__( self, urlWriter = None ): + self.modules = { "node": NodeInfo, + "list": DomList, + "info": DomInfo, + "create": CreateDomain, + "migrate" : MigrateDomain, + "save" : SaveDomain, + "restore" : RestoreDomain } + + # ordered list of module menus to display + self.module_menus = [ "node", "create", "migrate", "save", + "restore", "list" ] + HTMLBase.__init__(self) + + def render_POST( self, request ): + + #decide what module post'd the action + + args = getVar( 'args', request ) + + mod = getVar( 'mod', request ) + + if not mod is None and args is None: + module = self.modules[ mod ] + #check module exists + if module: + module( self.mainUrlWriter ).perform( request ) + else: + self.perform( request ) + + return self.render_GET( request ) + + def mainUrlWriter( self, module ): + def fun( f ): + return "Main.rpy?mod=%s%s" % ( module, f ) + return fun + + def write_BODY( self, request ): + + request.write( "\n<table style='border:0px solid black; background: url(images/orb_01.jpg) no-repeat' cellspacing='0' cellpadding='0' border='0' width='780px' height='536px'>\n" ) + request.write( "<tr>\n" ) + request.write( " <td width='15px'> </td>" ) + request.write( " <td width='175px' align='center' valign'center'>" ) + request.write( " <table cellspacing='0' cellpadding='0' border='0' width='100%' height='100%'>" ) + request.write( " <tr><td height='140px' align='center' valign='bottom'><a href='http://www.cl.cam.ac.uk/Research/SRG/netos/xen/'>" ) + request.write( " <img src='images/xen.png' width='150' height='75' border='0'/></a><br/></td></tr>" ) + request.write( " <tr><td height='60px' align='center'><p class='small'>SV Web Interface<br/>(C) <a href='mailto:tw275@xxxxxxxxx'>Tom Wilkie</a> 2004</p></td></tr>") + request.write( " <tr><td align='center' valign='top'>" ) + + for modName in self.module_menus: + self.modules[modName]( self.mainUrlWriter( modName ) ).write_MENU( request ) + + request.write( " </td></tr>" ) + request.write( " </table>" ) + request.write( " " ) + request.write( " </td>\n" ) + request.write( " <td width='15px'> </td>" ) + request.write( " <td width='558px' align='left' valign='top'>" ) + request.write( " <table cellspacing='0' cellpadding='0' border='0' width='100%' height='100%'>" ) + request.write( " <tr><td height='20px'></td></tr>" ) + request.write( " <tr><td align='center' valign='top'>" ) + + modName = getVar('mod', request) + + if modName is None: + request.write( '<p>Please select a module</p>' ) + else: + module = self.modules[ modName ] + if module: + module( self.mainUrlWriter( modName ) ).write_BODY( request ) + else: + request.write( '<p>Invalid module. Please select another</p>' ) + + request.write( " </td></tr>" ) + request.write( " </table>" ) + request.write( " </td>\n" ) + request.write( " <td width='17px'> </td>" ) + request.write( "</tr>\n" ) + + request.write( "</table>\n" ) + + + def op_destroy( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != "0": + server.xend_domain_destroy( int( dom ), "halt" ) + + def op_pause( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != "0": + server.xend_domain_pause( int( dom ) ) + + def op_unpause( self, request ): + dom = getVar( 'dom', request ) + if not dom is None and dom != "0": + server.xend_domain_unpause( int( dom ) ) diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/MigrateDomain.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/MigrateDomain.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,74 @@ +from xen.sv.Wizard import * +from xen.sv.util import * +from xen.sv.GenTabbed import PreTab + +from xen.xm.create import make_config, OptVals + +from xen.xend.XendClient import server + +class MigrateDomain( Wizard ): + def __init__( self, urlWriter ): + + sheets = [ ChooseMigrateDomain, + DoMigrate ] + + Wizard.__init__( self, urlWriter, "Migrate Domain", sheets ) + + +class ChooseMigrateDomain( Sheet ): + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "Configure Migration", 0) + domains = [] + domnames = [] + + try: + domains = server.xend_domains() + domains.sort() + except: + pass + + for i in domains: + if i != 'Domain-0': domnames.append((i,i)) + + self.addControl( ListControl('domid', + domnames, + 'Domain ID:') ) + self.addControl( TickControl('live', + 'True', + 'Live migrate:') ) + self.addControl( InputControl('rate', + '0', + 'Rate limit:') ) + self.addControl( InputControl( 'dest', 'myhost.mydomain', + 'Name or IP address:', + ".*") ) + +class DoMigrate( Sheet ): + def __init__(self, urlWriter ): + Sheet.__init__(self, urlWriter, "Migration Done", 1) + + def write_BODY( self, request, err ): + + if not self.passback: self.parseForm( request ) + +# print string2sxp(self.passback) + + config = ssxp2hash ( string2sxp( self.passback ) ) + + try: + print config + print config['domid'], config['dest'] + dom_sxp = server.xend_domain_migrate( config['domid'], + config['dest'], + config.get('live') == 'True', + config['rate'] ) + success = "Your domain was successfully Migrated.\n" + except Exception, e: + success = "There was an error migrating your domain\n" + dom_sxp = str(e) + + pt = PreTab( success + dom_sxp ) # sxp2prettystring( dom_sxp ) ) + pt.write_BODY( request ) + + request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback ) + request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location ) diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/NodeInfo.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/NodeInfo.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,63 @@ +from xen.xend.XendClient import server + +from xen.sv.util import * +from xen.sv.GenTabbed import * + +class NodeInfo( GenTabbed ): + + def __init__( self, urlWriter ): + GenTabbed.__init__( self, "Node Details", urlWriter, [ 'General', 'Dmesg', ], [ NodeGeneralTab, NodeDmesgTab ] ) + + def write_MENU( self, request ): + request.write( "<p class='small'><a href='%s'>Node details</a></p>" % self.urlWriter( '' ) ) + +class NodeGeneralTab( CompositeTab ): + def __init__( self ): + CompositeTab.__init__( self, [ NodeInfoTab, NodeActionTab ] ) + +class NodeInfoTab( GeneralTab ): + + def __init__( self ): + + nodeInfo = {} + try: + nodeInfo = sxp2hash( server.xend_node() ) + except: + nodeInfo[ 'system' ] = 'Error getting node info' + + dictTitles = {} + dictTitles[ 'System' ] = 'system' + dictTitles[ 'Hostname' ] = 'host' + dictTitles[ 'Release' ] = 'release' + dictTitles[ 'Version' ] ='version' + dictTitles[ 'Machine' ] = 'machine' + dictTitles[ 'Cores' ] = 'cores' + dictTitles[ 'Hyperthreading' ] = ( 'hyperthreads_per_core', hyperthreadFormatter ) + dictTitles[ 'CPU Speed' ] = ( 'cpu_mhz', cpuFormatter ) + dictTitles[ 'Memory' ] = ( 'memory', memoryFormatter ) + dictTitles[ 'Free Memory' ] = ( 'free_memory', memoryFormatter ) + + GeneralTab.__init__( self, dict=nodeInfo, titles=dictTitles ) + +class NodeDmesgTab( PreTab ): + + def __init__( self ): + try: + dmesg = server.xend_node_get_dmesg() + except: + dmesg = "Error getting node information: XenD not running?" + PreTab.__init__( self, dmesg ) + +class NodeActionTab( ActionTab ): + + def __init__( self ): + ActionTab.__init__( self, { "shutdown" : "shutdown", + "reboot" : "reboot" } ) + + def op_shutdown( self, request ): + if debug: print ">NodeShutDown" + server.xend_node_shutdown() + + def op_reboot( self, request ): + if debug: print ">NodeReboot" + server.xend_node_reboot() diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/RestoreDomain.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/RestoreDomain.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,46 @@ +from xen.sv.Wizard import * +from xen.sv.util import * +from xen.sv.GenTabbed import PreTab + +from xen.xm.create import make_config, OptVals + +from xen.xend.XendClient import server + +class RestoreDomain( Wizard ): + def __init__( self, urlWriter ): + + sheets = [ ChooseRestoreDomain, + DoRestore ] + + Wizard.__init__( self, urlWriter, "Restore Domain", sheets ) + + +class ChooseRestoreDomain( Sheet ): + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "Configure Restore", 0) + + self.addControl( InputControl( 'file', '', + 'Suspend file name:', + ".*") ) + +class DoRestore( Sheet ): + def __init__(self, urlWriter ): + Sheet.__init__(self, urlWriter, "Restore Done", 1) + + def write_BODY( self, request, err ): + + if not self.passback: self.parseForm( request ) + config = ssxp2hash ( string2sxp( self.passback ) ) + + try: + dom_sxp = server.xend_domain_restore( config['file'] ) + success = "Your domain was successfully restored.\n" + except Exception, e: + success = "There was an error restoring your domain\n" + dom_sxp = str(e) + + pt = PreTab( success + sxp2prettystring( dom_sxp ) ) + pt.write_BODY( request ) + + request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback ) + request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location ) diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/SaveDomain.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/SaveDomain.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,62 @@ +from xen.sv.Wizard import * +from xen.sv.util import * +from xen.sv.GenTabbed import PreTab + +from xen.xm.create import make_config, OptVals + +from xen.xend.XendClient import server + +class SaveDomain( Wizard ): + def __init__( self, urlWriter ): + + sheets = [ ChooseSaveDomain, + DoSave ] + + Wizard.__init__( self, urlWriter, "Save Domain", sheets ) + + +class ChooseSaveDomain( Sheet ): + def __init__( self, urlWriter ): + Sheet.__init__( self, urlWriter, "Configure Save", 0) + + domains = [] + domnames = [] + + try: + domains = server.xend_domains() + domains.sort() + except: + pass + + for i in domains: + if i != 'Domain-0': domnames.append((i,i)) + + self.addControl( ListControl('domid', + domnames, + 'Domain ID:') ) + self.addControl( InputControl( 'file', '', + 'Suspend file name:', + ".*") ) + +class DoSave( Sheet ): + def __init__(self, urlWriter ): + Sheet.__init__(self, urlWriter, "Save Done", 1) + + def write_BODY( self, request, err ): + + if not self.passback: self.parseForm( request ) + config = ssxp2hash ( string2sxp( self.passback ) ) + + try: + dom_sxp = server.xend_domain_save( config['domid'], + config['file'] ) + success = "Your domain was successfully saved.\n" + except Exception, e: + success = "There was an error saving your domain\n" + dom_sxp = str(e) + + pt = PreTab( success + dom_sxp ) # sxp2prettystring( dom_sxp ) ) + pt.write_BODY( request ) + + request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback ) + request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location ) diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/TabView.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/TabView.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,26 @@ +from xen.sv.HTMLBase import HTMLBase + +class TabView( HTMLBase ): + + # tab - int, id into tabs of selected tab + # tabs - list of strings, tab names + # urlWriter - + def __init__( self, tab, tabs, urlWriter ): + HTMLBase.__init__(self) + self.tab = tab + self.tabs = tabs + self.urlWriter = urlWriter + + def write_BODY( self, request ): + request.write( "<table style='' border='0' cellspacing='3' cellpadding='2' align='center'>" ) + request.write( "<tr height='22'>" ) + + for i in range( len( self.tabs ) ): + if self.tab == i: + backgroundColor = "white" + else: + backgroundColor = "grey" + + request.write( "<td style='border:1px solid black; background-color: %s'><p align='center'><a href='%s'>%s</a></p></td>" % ( backgroundColor, self.urlWriter( "&tab=%s" % i ), self.tabs[ i ] ) ) + + request.write( "</tr></table>" ) diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/Wizard.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/Wizard.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,269 @@ +from xen.sv.util import * +from xen.sv.HTMLBase import HTMLBase +from xen.xend import sxp + +import re + +DEBUG = 0 + +class Wizard( HTMLBase ): + + def __init__( self, urlWriter, title, sheets ): + HTMLBase.__init__( self ) + self.title = title + self.sheets = sheets + self.urlWriter = urlWriter + + def write_MENU( self, request ): + request.write( "<p class='small'><a href='%s'>%s</a></p>" % (self.urlWriter( '' ), self.title) ) + + def write_BODY( self, request ): + + request.write( "<table width='100%' border='0' cellspacing='0' cellpadding='0'><tr><td>" ) + request.write( "<p align='center'><u>%s</u></p></td></tr><tr><td>" % self.title ) + + currSheet = getVar( 'sheet', request ) + + if not currSheet is None: + currSheet = int( currSheet ) + else: + currSheet = 0 + + sheet = self.sheets[ currSheet ]( self.urlWriter ) + + err = not sheet.validate( request ) + + if not err: + op = getVar( 'op', request ) + + if op == 'next': + currSheet += 1 + elif op == 'prev': + currSheet -= 1 + + sheet = self.sheets[ currSheet ]( self.urlWriter ) + + if getVar( 'visited-sheet%s' % currSheet, request ): + sheet.write_BODY( request, err ) + else: + sheet.write_BODY( request, False ) + + + request.write( "</td></tr><tr><td><table width='100%' border='0' cellspacing='0' cellpadding='0'><tr>" ) + request.write( "<td width='80%'></td><td width='20%' align='center'><p align='center'>" ) + if currSheet > 0: + request.write( "<img src='images/previous.png' onclick='doOp( \"prev\" )' onmouseover='update( \"wizText\", \"Previous\" )' onmouseout='update( \"wizText\", \" \" )'> " ) + if currSheet < ( len( self.sheets ) - 2 ): + request.write( "<img src='images/next.png' onclick='doOp( \"next\" )' onmouseover='update( \"wizText\", \"Next\" )' onmouseout='update( \"wizText\", \" \" )'>" ) + elif currSheet == ( len( self.sheets ) - 2 ): + request.write( "<img src='images/finish.png' onclick='doOp( \"next\" )' onmouseover='update( \"wizText\", \"Finish\" )' onmouseout='update( \"wizText\", \" \" )'>" ) + request.write( "</p><p align='center'><span id='wizText'></span></p></td></tr></table>" ) + request.write( "</td></tr></table>" ) + + def op_next( self, request ): + pass + + def op_prev( self, request ): + pass + + def op_finish( self, request ): + pass + +class Sheet( HTMLBase ): + + def __init__( self, urlWriter, title, location ): + HTMLBase.__init__( self ) + self.urlWriter = urlWriter + self.feilds = [] + self.title = title + self.location = location + self.passback = None + + def parseForm( self, request ): + do_not_parse = [ 'mod', 'op', 'sheet', 'passback' ] + + passed_back = request.args + + temp_passback = passed_back.get( "passback" ) + + if temp_passback is not None and len( temp_passback ) > 0: + temp_passback = temp_passback[ len( temp_passback )-1 ] + else: + temp_passback = "( )" + + last_passback = ssxp2hash( string2sxp( temp_passback ) ) #use special function - will work with no head on sxp + + if DEBUG: print last_passback + + for (key, value) in passed_back.items(): + if key not in do_not_parse: + last_passback[ key ] = value[ len( value ) - 1 ] + + self.passback = sxp2string( hash2sxp( last_passback ) ) #store the sxp + + if DEBUG: print self.passback + + def write_BODY( self, request, err ): + + if not self.passback: self.parseForm( request ) + + request.write( "<p>%s</p>" % self.title ) + + previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the hash for quick reference + + request.write( "<table width='100%' cellpadding='0' cellspacing='1' border='0'>" ) + + for (feild, control) in self.feilds: + control.write_Control( request, previous_values.get( feild ) ) + if err and not control.validate( previous_values.get( feild ) ): + control.write_Help( request ) + + request.write( "</table>" ) + + request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback ) + request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location ) + request.write( "<input type='hidden' name='visited-sheet%s' value='True'></p>" % self.location ) + + def addControl( self, control ): + self.feilds.append( [ control.getName(), control ] ) + + def validate( self, request ): + + if not self.passback: self.parseForm( request ) + + check = True + + previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the hash for quick reference + if DEBUG: print previous_values + + for (feild, control) in self.feilds: + if not control.validate( previous_values.get( feild ) ): + check = False + if DEBUG: print "> %s = %s" % (feild, previous_values.get( feild )) + + return check + +class SheetControl( HTMLBase ): + + def __init__( self, reg_exp = ".*" ): + HTMLBase.__init__( self ) + self.name = "" + self.reg_exp = reg_exp + + def write_Control( self, request, persistedValue ): + request.write( "<tr colspan='2'><td>%s</td></tr>" % persistedValue ) + + def write_Help( self, request ): + request.write( "<tr><td align='right' colspan='2'><p class='small'>Text must match pattern:" ) + request.write( " %s</p></td></tr>" % self.reg_exp ) + + def validate( self, persistedValue ): + if persistedValue is None: + persistedValue = "" + + return not re.compile( self.reg_exp ).match( persistedValue ) is None + + def getName( self ): + return self.name + + def setName( self, name ): + self.name = name + +class InputControl( SheetControl ): + + def __init__( self, name, defaultValue, humanText, reg_exp = ".*", help_text = "You must enter the appropriate details in this feild." ): + SheetControl.__init__( self, reg_exp ) + self.setName( name ) + + self.defaultValue = defaultValue + self.humanText = humanText + self.help_text = help_text + + def write_Control( self, request, persistedValue ): + if persistedValue is None: + persistedValue = self.defaultValue + + request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'><input size='40'type='text' name='%s' value=\"%s\"></td></tr>" % (self.humanText, self.getName(), persistedValue) ) + + def write_Help( self, request ): + request.write( "<tr><td align='right' colspan='2'><p class='small'>" ) + request.write( " %s</p></td></tr>" % self.help_text ) + +class TextControl( SheetControl ): + + def __init__( self, text ): + SheetControl.__init__( self ) + self.text = text + + def write_Control( self, request, persistedValue ): + request.write( "<tr><td colspan='2'><p>%s</p></td></tr>" % self.text ) + +class SmallTextControl( SheetControl ): + + def __init__( self, text ): + SheetControl.__init__( self ) + self.text = text + + def write_Control( self, request, persistedValue ): + request.write( "<tr><td colspan='2'><p class='small'>%s</p></tr></td>" % self.text ) + +class ListControl( SheetControl ): + + def __init__( self, name, options, humanText ): + SheetControl.__init__( self ) + self.setName( name ) + self.options = options + self.humanText = humanText + + def write_Control( self, request, persistedValue ): + request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>" % self.humanText ) + request.write( "<select name='%s'>" % self.getName() ) + for (value, text) in self.options: + if value == persistedValue: + request.write( "<option value='%s' selected>%s\n" % (value, text) ) + else: + request.write( "<option value='%s'>%s\n" % (value, text) ) + request.write( "</select></td></tr>" ) + + def validate( self, persistedValue ): + for (value, text) in self.options: + if value == persistedValue: + return True + + return False + +class FileControl( InputControl ): + + def __init__( self, name, defaultValue, humanText, reg_exp = ".*", help_text = "You must enter the appropriate details in this feild." ): + InputControl.__init__( self, name, defaultValue, humanText ) + + def validate( self, persistedValue ): + if persistedValue is None: return False + try: + open( persistedValue ) + return True + except IOError, TypeError: + return False + + def write_Help( self, request ): + request.write( "<tr><td colspan='2' align='right'><p class='small'>File does not exist: you must enter a valid, absolute file path.</p></td></tr>" ) + +class TickControl( SheetControl ): + + def __init__( self, name, defaultValue, humanText ): + SheetControl.__init__( self ) + self.setName( name ) + self.defaultValue = defaultValue + self.humanText = humanText + + def write_Control( self, request, persistedValue ): + request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>" % self.humanText ) + + if persistedValue == 'True': + request.write( "<input type='checkbox' name='%s' value='True' checked>" % self.getName() ) + else: + request.write( "<input type='checkbox' name='%s' value='True'>" % self.getName() ) + + request.write( "</select></td></tr>" ) + + diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/__init__.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/__init__.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,1 @@ + diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/params.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/params.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,3 @@ +SV_PORT = 8080 +SV_ROOT = "/var/lib/xen/sv/" +PID_FILE = "/var/run/xen-sv.pid" diff -r a4196568095c -r b53a65034532 tools/python/xen/sv/util.py --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/python/xen/sv/util.py Fri Jul 29 20:25:03 2005 @@ -0,0 +1,126 @@ +from xen.xend.XendClient import server +from xen.xend import sxp +from xen.xend import PrettyPrint + +import types + +def getDomInfoHash( domain ): + domInfoHash = {} + try: + domInfoHash = sxp2hash( server.xend_domain( domain ) ) + domInfoHash['dom'] = domain + except: + domInfoHash['name'] = "Error getting domain details" + return domInfoHash + +def sxp2hash( s ): + sxphash = {} + + for child in sxp.children( s ): + if isinstance( child, types.ListType ) and len( child ) > 1: + if isinstance( child[1], types.ListType ) and len( child ) > 1: + sxphash[ child[0] ] = sxp2hash( child[1] ) + else: + sxphash[ child[0] ] = child[1] + + return sxphash + +def ssxp2hash( s ): + sxphash = {} + + for i in s: + if isinstance( i, types.ListType ) and len( i ) > 1: + sxphash[ i[0] ] = i[1] + + return sxphash + +def hash2sxp( h ): + hashsxp = [] + + for (key, item) in h.items(): + hashsxp.append( [key, item] ) + + return hashsxp + +def string2sxp( string ): + pin = sxp.Parser() + pin.input( string ) + return pin.get_val() + +def sxp2string( sexp ): + return sxp.to_string( sexp ) + +def sxp2prettystring( sxp ): + class tmp: + def __init__( self ): + self.str = "" + def write( self, str ): + self.str = self.str + str + temp = tmp() + PrettyPrint.prettyprint( sxp, out=temp ) + return temp.str + +def getVar( var, request, default=None ): + + arg = request.args.get( var ) + + if arg is None: + return default + else: + return arg[ len( arg )-1 ] + +def bigTimeFormatter( time ): + time = float( time ) + weeks = time // 604800 + remainder = time % 604800 + days = remainder // 86400 + + remainder = remainder % 86400 + + hms = smallTimeFormatter( remainder ) + + return "%d weeks, %d days, %s" % ( weeks, days, hms ) + +def smallTimeFormatter( time ): + time = float( time ) + hours = time // 3600 + remainder = time % 3600 + mins = remainder // 60 + secs = time % 60 + return "%02d:%02d:%04.1f (hh:mm:ss.s)" % ( hours, mins, secs ) + +def stateFormatter( state ): + states = [ 'Running', 'Blocked', 'Paused', 'Shutdown', 'Crashed' ] + + stateStr = "" + + for i in range( len( state ) ): + if state[i] != "-": + stateStr += "%s, " % states[ i ] + + return stateStr + " (%s)" % state + +def memoryFormatter( mem ): + mem = int( mem ) + if mem >= 1024: + mem = float( mem ) / 1024 + return "%3.2fGb" % mem + else: + return "%7dMb" % mem + +def cpuFormatter( mhz ): + mhz = int( mhz ) + if mhz > 1000: + ghz = float( mhz ) / 1000.0 + return "%4.2fGHz" % ghz + else: + return "%4dMHz" % mhz + +def hyperthreadFormatter( threads ): + try: + if int( threads ) > 1: + return "Yes" + else: + return "No" + except: + return "No" diff -r a4196568095c -r b53a65034532 tools/sv/Makefile --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/sv/Makefile Fri Jul 29 20:25:03 2005 @@ -0,0 +1,2 @@ + +all: diff -r a4196568095c -r b53a65034532 tools/sv/images/destroy.png --- /dev/null Fri Jul 29 18:52:33 2005 +++ b/tools/sv/images/destroy.png Fri Jul 29 20:25:03 2005 @@ -0,0 +1,23 @@ +?PNG + + \ No newline at end of file +IHDR 6 6 ?EjÝ |