[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID c073ebdbde8c0f5c9437706b46c4a34f35033c0c # Parent 9d52a66c74996a66adf5ee71a0d7f91bb880f7fb # Parent 954f4dea9da6336aaa35d0706aed55fde7909644 merge with xen-unstable.hg --- linux-2.6-xen-sparse/drivers/xen/net_driver_util.c | 58 linux-2.6-xen-sparse/include/asm-x86_64/e820.h | 63 linux-2.6-xen-sparse/include/xen/net_driver_util.h | 48 tools/xenstore/xenstored_proc.h | 27 .hgignore | 2 extras/mini-os/Makefile | 9 extras/mini-os/lib/printf.c | 4 extras/mini-os/lib/string.c | 4 linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile | 1 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 2 linux-2.6-xen-sparse/drivers/xen/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 8 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 2 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 2 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 4 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 2 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 2 linux-2.6-xen-sparse/drivers/xen/console/console.c | 28 linux-2.6-xen-sparse/drivers/xen/core/Makefile | 11 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c | 185 + linux-2.6-xen-sparse/drivers/xen/core/evtchn.c | 31 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 3 linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 9 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c | 215 -- linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 4 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 31 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 56 linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c | 2 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c | 4 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 23 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c | 6 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 8 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 4 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h | 63 linux-2.6-xen-sparse/include/xen/cpu_hotplug.h | 42 linux-2.6-xen-sparse/include/xen/xenbus.h | 8 patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch | 13 tools/libxc/Makefile | 1 tools/libxc/xc_csched.c | 50 tools/libxc/xc_linux_build.c | 13 tools/libxc/xc_linux_restore.c | 122 - tools/libxc/xc_private.c | 22 tools/libxc/xc_ptrace.c | 77 tools/libxc/xc_ptrace.h | 3 tools/libxc/xc_ptrace_core.c | 7 tools/libxc/xc_tbuf.c | 56 tools/libxc/xenctrl.h | 11 tools/libxc/xg_private.h | 10 tools/python/xen/lowlevel/xc/xc.c | 61 tools/python/xen/lowlevel/xs/xs.c | 11 tools/python/xen/xend/XendDomain.py | 22 tools/python/xen/xend/XendDomainInfo.py | 14 tools/python/xen/xend/balloon.py | 11 tools/python/xen/xend/server/SrvDomain.py | 14 tools/python/xen/xend/xenstore/xstransact.py | 28 tools/python/xen/xm/main.py | 45 tools/tests/test_x86_emulator.c | 67 tools/xenstore/Makefile | 8 tools/xenstore/xenstored_core.c | 7 tools/xenstore/xenstored_core.h | 8 tools/xenstore/xenstored_domain.c | 37 tools/xenstore/xenstored_linux.c | 69 xen/arch/x86/domain_build.c | 5 xen/arch/x86/hvm/hvm.c | 16 xen/arch/x86/hvm/i8254.c | 405 +-- xen/arch/x86/hvm/intercept.c | 82 xen/arch/x86/hvm/svm/intr.c | 47 xen/arch/x86/hvm/svm/svm.c | 44 xen/arch/x86/hvm/svm/vmcb.c | 14 xen/arch/x86/hvm/vmx/io.c | 62 xen/arch/x86/hvm/vmx/vmx.c | 37 xen/arch/x86/mm.c | 129 + xen/arch/x86/traps.c | 4 xen/arch/x86/x86_emulate.c | 81 xen/common/Makefile | 1 xen/common/grant_table.c | 15 xen/common/kernel.c | 5 xen/common/sched_credit.c | 1233 ++++++++++++ xen/common/schedule.c | 5 xen/common/trace.c | 6 xen/include/asm-x86/domain.h | 12 xen/include/asm-x86/hvm/domain.h | 6 xen/include/asm-x86/hvm/svm/intr.h | 1 xen/include/asm-x86/hvm/svm/svm.h | 1 xen/include/asm-x86/hvm/vcpu.h | 3 xen/include/asm-x86/hvm/vmx/vmx.h | 1 xen/include/asm-x86/hvm/vpit.h | 67 xen/include/asm-x86/string.h | 162 - xen/include/asm-x86/x86_emulate.h | 66 xen/include/public/io/xenbus.h | 59 xen/include/public/sched_ctl.h | 5 xen/include/xen/sched-if.h | 2 xen/include/xen/softirq.h | 13 93 files changed, 2802 insertions(+), 1546 deletions(-) diff -r 9d52a66c7499 -r c073ebdbde8c .hgignore --- a/.hgignore Thu May 25 15:59:18 2006 -0600 +++ b/.hgignore Fri May 26 13:41:49 2006 -0600 @@ -14,7 +14,7 @@ .*\.orig$ .*\.rej$ .*/a\.out$ -.*/cscope\.*$ +.*/cscope\..*$ ^[^/]*\.bz2$ ^TAGS$ ^dist/.*$ diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/Makefile --- a/extras/mini-os/Makefile Thu May 25 15:59:18 2006 -0600 +++ b/extras/mini-os/Makefile Fri May 26 13:41:49 2006 -0600 @@ -13,6 +13,7 @@ override CPPFLAGS := -Iinclude $(CPPFLAG override CPPFLAGS := -Iinclude $(CPPFLAGS) ASFLAGS = -D__ASSEMBLY__ +LDLIBS = -L. -lminios LDFLAGS := -N -T minios-$(TARGET_ARCH).lds ifeq ($(TARGET_ARCH),x86_32) @@ -55,11 +56,11 @@ links: links: [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen -libminios.a: $(OBJS) $(HEAD) - ar r libminios.a $(HEAD) $(OBJS) +libminios.a: links $(OBJS) $(HEAD) + $(AR) r libminios.a $(HEAD) $(OBJS) -$(TARGET): links libminios.a $(HEAD) - $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf +$(TARGET): libminios.a $(HEAD) + $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf gzip -f -9 -c $@.elf >$@.gz .PHONY: clean diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/printf.c --- a/extras/mini-os/lib/printf.c Thu May 25 15:59:18 2006 -0600 +++ b/extras/mini-os/lib/printf.c Fri May 26 13:41:49 2006 -0600 @@ -53,6 +53,8 @@ * * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ */ + +#if !defined HAVE_LIBC #include <os.h> #include <types.h> @@ -789,4 +791,4 @@ int sscanf(const char * buf, const char return i; } - +#endif diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/string.c --- a/extras/mini-os/lib/string.c Thu May 25 15:59:18 2006 -0600 +++ b/extras/mini-os/lib/string.c Fri May 26 13:41:49 2006 -0600 @@ -17,6 +17,8 @@ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ **************************************************************************** */ + +#if !defined HAVE_LIBC #include <os.h> #include <types.h> @@ -153,3 +155,5 @@ char * strstr(const char * s1,const char } return NULL; } + +#endif diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Fri May 26 13:41:49 2006 -0600 @@ -2,7 +2,6 @@ ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y) ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y) obj-y += util.o endif -obj-$(CONFIG_XEN_IA64_DOM0_VP) += net_driver_util.o obj-y += core/ #obj-y += char/ diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Fri May 26 13:41:49 2006 -0600 @@ -329,7 +329,7 @@ out: * Callback received when the backend's state changes. */ static void backend_changed(struct xenbus_device *dev, - XenbusState backend_state) + enum xenbus_state backend_state) { struct tpm_private *tp = dev->data; DPRINTK("\n"); diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Fri May 26 13:41:49 2006 -0600 @@ -1,5 +1,4 @@ -obj-y += net_driver_util.o obj-y += util.o obj-y += core/ diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri May 26 13:41:49 2006 -0600 @@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex); * Also protects non-atomic updates of current_pages and driver_pages, and * balloon lists. */ -spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(balloon_lock); /* We aim for 'current allocation' == 'target allocation'. */ static unsigned long current_pages; @@ -360,6 +360,12 @@ static void balloon_process(void *unused /* Resets the Xen limit, sets new target, and kicks off processing. */ static void set_new_target(unsigned long target) { + unsigned long min_target; + + /* Do not allow target to reduce below 2% of maximum memory size. */ + min_target = max_pfn / 50; + target = max(target, min_target); + /* No need for lock. Not read-modify-write updates. */ hard_limit = ~0UL; target_pages = target; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri May 26 13:41:49 2006 -0600 @@ -82,7 +82,7 @@ typedef struct { static pending_req_t *pending_reqs; static struct list_head pending_free; -static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pending_free_lock); static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); #define BLKBACK_INVALID_HANDLE (~0) diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri May 26 13:41:49 2006 -0600 @@ -247,7 +247,7 @@ static void backend_changed(struct xenbu * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state) + enum xenbus_state frontend_state) { struct backend_info *be = dev->data; int err; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri May 26 13:41:49 2006 -0600 @@ -247,7 +247,7 @@ fail: * Callback received when the backend's state changes. */ static void backend_changed(struct xenbus_device *dev, - XenbusState backend_state) + enum xenbus_state backend_state) { struct blkfront_info *info = dev->data; struct block_device *bd; @@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s have ignored this request initially, as the device was still mounted. */ struct xenbus_device * dev = info->xbdev; - XenbusState state = xenbus_read_driver_state(dev->otherend); + enum xenbus_state state = xenbus_read_driver_state(dev->otherend); if (state == XenbusStateClosing) blkfront_closing(dev); diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Fri May 26 13:41:49 2006 -0600 @@ -93,7 +93,7 @@ static struct block_device_operations xl .ioctl = blkif_ioctl, }; -spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(blkif_io_lock); static struct xlbd_major_info * xlbd_alloc_major_info(int major, int minor, int index) diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri May 26 13:41:49 2006 -0600 @@ -138,7 +138,7 @@ typedef struct { */ static pending_req_t pending_reqs[MAX_PENDING_REQS]; static unsigned char pending_ring[MAX_PENDING_REQS]; -static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(pend_prod_lock); /* NB. We use a different index type to differentiate from shared blk rings. */ typedef unsigned int PEND_RING_IDX; #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Fri May 26 13:41:49 2006 -0600 @@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch { unsigned int goal; goal = simple_strtoul(str, NULL, 0); - while (wbuf_size < goal) - wbuf_size <<= 1; + if (goal) { + goal = roundup_pow_of_two(goal); + if (wbuf_size < goal) + wbuf_size = goal; + } return 1; } __setup("xencons_bufsz=", xencons_bufsz_setup); /* This lock protects accesses to the common transmit buffer. */ -static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(xencons_lock); /* Common transmit-kick routine. */ static void __xencons_tx_flush(void); @@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver /******************** Kernel console driver ********************************/ -static void kcons_write( - struct console *c, const char *s, unsigned int count) +static void kcons_write(struct console *c, const char *s, unsigned int count) { int i = 0; unsigned long flags; @@ -155,14 +157,14 @@ static void kcons_write( spin_unlock_irqrestore(&xencons_lock, flags); } -static void kcons_write_dom0( - struct console *c, const char *s, unsigned int count) -{ - int rc; - - while ((count > 0) && - ((rc = HYPERVISOR_console_io( - CONSOLEIO_write, count, (char *)s)) > 0)) { +static void kcons_write_dom0(struct console *c, const char *s, unsigned int count) +{ + + while (count > 0) { + int rc; + rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s); + if (rc <= 0) + break; count -= rc; s += rc; } diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/core/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Fri May 26 13:41:49 2006 -0600 @@ -4,8 +4,9 @@ obj-y := evtchn.o reboot.o gnttab.o features.o -obj-$(CONFIG_PROC_FS) += xen_proc.o -obj-$(CONFIG_NET) += skbuff.o -obj-$(CONFIG_SMP) += smpboot.o -obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o -obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o +obj-$(CONFIG_PROC_FS) += xen_proc.o +obj-$(CONFIG_NET) += skbuff.o +obj-$(CONFIG_SMP) += smpboot.o +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o +obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o +obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/core/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Fri May 26 13:41:49 2006 -0600 @@ -51,10 +51,10 @@ * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. */ -static spinlock_t irq_mapping_update_lock; +static DEFINE_SPINLOCK(irq_mapping_update_lock); /* IRQ <-> event-channel mappings. */ -static int evtchn_to_irq[NR_EVENT_CHANNELS]; +static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ... NR_EVENT_CHANNELS-1] = -1}; /* Packed IRQ information: binding type, sub-type index, and event channel. */ static u32 irq_info[NR_IRQS]; @@ -91,13 +91,13 @@ static inline unsigned int type_from_irq } /* IRQ <-> VIRQ mapping. */ -DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]); +DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping. */ #ifndef NR_IPIS #define NR_IPIS 1 #endif -DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]); +DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1}; /* Reference counts for bindings to IRQs. */ static int irq_bindcount[NR_IRQS]; @@ -751,7 +751,9 @@ void irq_resume(void) BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND); /* Secondary CPUs must have no VIRQ or IPI bindings. */ - for (cpu = 1; cpu < NR_CPUS; cpu++) { + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; for (virq = 0; virq < NR_VIRQS; virq++) BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1); for (ipi = 0; ipi < NR_IPIS; ipi++) @@ -813,25 +815,12 @@ void __init xen_init_IRQ(void) void __init xen_init_IRQ(void) { int i; - int cpu; - - spin_lock_init(&irq_mapping_update_lock); init_evtchn_cpu_bindings(); - /* No VIRQ or IPI bindings. */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - for (i = 0; i < NR_VIRQS; i++) - per_cpu(virq_to_irq, cpu)[i] = -1; - for (i = 0; i < NR_IPIS; i++) - per_cpu(ipi_to_irq, cpu)[i] = -1; - } - - /* No event-channel -> IRQ mappings. */ - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - evtchn_to_irq[i] = -1; - mask_evtchn(i); /* No event channels are 'live' right now. */ - } + /* No event channels are 'live' right now. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) + mask_evtchn(i); /* No IRQ -> event-channel mappings. */ for (i = 0; i < NR_IRQS; i++) diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri May 26 13:41:49 2006 -0600 @@ -38,7 +38,6 @@ #include <linux/vmalloc.h> #include <asm/pgtable.h> #include <xen/interface/xen.h> -#include <asm/fixmap.h> #include <asm/uaccess.h> #include <xen/gnttab.h> #include <asm/synch_bitops.h> @@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_ static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; static int gnttab_free_count; static grant_ref_t gnttab_free_head; -static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(gnttab_list_lock); static grant_entry_t *shared = NULL; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Fri May 26 13:41:49 2006 -0600 @@ -17,6 +17,7 @@ #include <linux/kthread.h> #include <xen/gnttab.h> #include <xen/xencons.h> +#include <xen/cpu_hotplug.h> #if defined(__i386__) || defined(__x86_64__) /* @@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA static int shutting_down = SHUTDOWN_INVALID; static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); - -#ifdef CONFIG_SMP -int smp_suspend(void); -void smp_resume(void); -#else -#define smp_suspend() (0) -#define smp_resume() ((void)0) -#endif /* Ensure we run on the idle task page tables so that we will switch page tables before running user space. This is needed diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Fri May 26 13:41:49 2006 -0600 @@ -23,6 +23,7 @@ #include <asm/pgalloc.h> #include <xen/evtchn.h> #include <xen/interface/vcpu.h> +#include <xen/cpu_hotplug.h> #include <xen/xenbus.h> #ifdef CONFIG_SMP_ALTERNATIVES @@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); #elif !defined(CONFIG_X86_IO_APIC) unsigned int maxcpus = NR_CPUS; #endif - -/* - * Set of CPUs that remote admin software will allow us to bring online. - * Notified to us via xenbus. - */ -static cpumask_t xenbus_allowed_cpumask; - -/* Set of CPUs that local admin will allow us to bring online. */ -static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; void __init prefill_possible_map(void) { @@ -167,17 +159,17 @@ static void cpu_bringup(void) cpu_idle(); } -static void vcpu_prepare(int vcpu) +void cpu_initialize_context(unsigned int cpu) { vcpu_guest_context_t ctxt; - struct task_struct *idle = idle_task(vcpu); + struct task_struct *idle = idle_task(cpu); #ifdef __x86_64__ - struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu]; + struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; #else - struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu); -#endif - - if (vcpu == 0) + struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu); +#endif + + if (cpu == 0) return; memset(&ctxt, 0, sizeof(ctxt)); @@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu) ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT; - ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu)); -#endif - - BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt)); + ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); +#endif + + BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); } void __init smp_prepare_cpus(unsigned int max_cpus) @@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in cpu_set(cpu, cpu_present_map); #endif - vcpu_prepare(cpu); - } - - xenbus_allowed_cpumask = cpu_present_map; + cpu_initialize_context(cpu); + } + + init_xenbus_allowed_cpumask(); /* Currently, Xen gives no dynamic NUMA/HT info. */ for (cpu = 1; cpu < NR_CPUS; cpu++) { @@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void cpu_online_map = cpumask_of_cpu(0); } -static int local_cpu_hotplug_request(void) -{ - /* - * We assume a CPU hotplug request comes from local admin if it is made - * via a userspace process (i.e., one with a real mm_struct). - */ - return (current->mm != NULL); -} - #ifdef CONFIG_HOTPLUG_CPU /* @@ -355,141 +338,6 @@ static int __init initialize_cpu_present } core_initcall(initialize_cpu_present_map); -static void vcpu_hotplug(unsigned int cpu) -{ - int err; - char dir[32], state[32]; - - if ((cpu >= NR_CPUS) || !cpu_possible(cpu)) - return; - - sprintf(dir, "cpu/%d", cpu); - err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state); - if (err != 1) { - printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); - return; - } - - if (strcmp(state, "online") == 0) { - cpu_set(cpu, xenbus_allowed_cpumask); - (void)cpu_up(cpu); - } else if (strcmp(state, "offline") == 0) { - cpu_clear(cpu, xenbus_allowed_cpumask); - (void)cpu_down(cpu); - } else { - printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", - state, cpu); - } -} - -static void handle_vcpu_hotplug_event( - struct xenbus_watch *watch, const char **vec, unsigned int len) -{ - int cpu; - char *cpustr; - const char *node = vec[XS_WATCH_PATH]; - - if ((cpustr = strstr(node, "cpu/")) != NULL) { - sscanf(cpustr, "cpu/%d", &cpu); - vcpu_hotplug(cpu); - } -} - -static int smpboot_cpu_notify(struct notifier_block *notifier, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - - /* - * We do this in a callback notifier rather than __cpu_disable() - * because local_cpu_hotplug_request() does not work in the latter - * as it's always executed from within a stopmachine kthread. - */ - if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) - cpu_clear(cpu, local_allowed_cpumask); - - return NOTIFY_OK; -} - -static int setup_cpu_watcher(struct notifier_block *notifier, - unsigned long event, void *data) -{ - int i; - - static struct xenbus_watch cpu_watch = { - .node = "cpu", - .callback = handle_vcpu_hotplug_event, - .flags = XBWF_new_thread }; - (void)register_xenbus_watch(&cpu_watch); - - if (!(xen_start_info->flags & SIF_INITDOMAIN)) { - for_each_cpu(i) - vcpu_hotplug(i); - printk(KERN_INFO "Brought up %ld CPUs\n", - (long)num_online_cpus()); - } - - return NOTIFY_DONE; -} - -static int __init setup_vcpu_hotplug_event(void) -{ - static struct notifier_block hotplug_cpu = { - .notifier_call = smpboot_cpu_notify }; - static struct notifier_block xsn_cpu = { - .notifier_call = setup_cpu_watcher }; - - register_cpu_notifier(&hotplug_cpu); - register_xenstore_notifier(&xsn_cpu); - - return 0; -} - -arch_initcall(setup_vcpu_hotplug_event); - -int smp_suspend(void) -{ - int i, err; - - lock_cpu_hotplug(); - - /* - * Take all other CPUs offline. We hold the hotplug mutex to - * avoid other processes bringing up CPUs under our feet. - */ - while (num_online_cpus() > 1) { - unlock_cpu_hotplug(); - for_each_online_cpu(i) { - if (i == 0) - continue; - err = cpu_down(i); - if (err) { - printk(KERN_CRIT "Failed to take all CPUs " - "down: %d.\n", err); - for_each_cpu(i) - vcpu_hotplug(i); - return err; - } - } - lock_cpu_hotplug(); - } - - return 0; -} - -void smp_resume(void) -{ - int i; - - for_each_cpu(i) - vcpu_prepare(i); - - unlock_cpu_hotplug(); - - for_each_cpu(i) - vcpu_hotplug(i); -} - static void remove_siblinginfo(int cpu) { @@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu) #else /* !CONFIG_HOTPLUG_CPU */ -int smp_suspend(void) -{ - if (num_online_cpus() > 1) { - printk(KERN_WARNING "Can't suspend SMP guests " - "without CONFIG_HOTPLUG_CPU\n"); - return -EOPNOTSUPP; - } - return 0; -} - -void smp_resume(void) -{ -} - int __cpu_disable(void) { return -ENOSYS; @@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu) { int rc; - if (local_cpu_hotplug_request()) { - cpu_set(cpu, local_allowed_cpumask); - if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { - printk("%s: attempt to bring up CPU %u disallowed by " - "remote admin.\n", __FUNCTION__, cpu); - return -EBUSY; - } - } else if (!cpu_isset(cpu, local_allowed_cpumask) || - !cpu_isset(cpu, xenbus_allowed_cpumask)) { - return -EBUSY; - } + rc = cpu_up_is_allowed(cpu); + if (rc) + return rc; #ifdef CONFIG_SMP_ALTERNATIVES if (num_online_cpus() == 1) @@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu) cpu_set(cpu, cpu_online_map); rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); - if (rc != 0) - BUG(); + BUG_ON(rc); return 0; } diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri May 26 13:41:49 2006 -0600 @@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock #define MAX_MFN_ALLOC 64 static unsigned long mfn_list[MAX_MFN_ALLOC]; static unsigned int alloc_index = 0; -static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(mfn_lock); static unsigned long alloc_mfn(void) { @@ -691,7 +691,7 @@ static void net_tx_action(unsigned long static void netif_idx_release(u16 pending_idx) { - static spinlock_t _lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(_lock); unsigned long flags; spin_lock_irqsave(&_lock, flags); diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri May 26 13:41:49 2006 -0600 @@ -17,13 +17,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - #include <stdarg.h> #include <linux/module.h> #include <xen/xenbus.h> -#include <xen/net_driver_util.h> #include "common.h" - #if 0 #undef DPRINTK @@ -31,22 +28,19 @@ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) #endif - struct backend_info { struct xenbus_device *dev; netif_t *netif; struct xenbus_watch backend_watch; - XenbusState frontend_state; + enum xenbus_state frontend_state; }; - static int connect_rings(struct backend_info *); static void connect(struct backend_info *); static void maybe_connect(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); - static int netback_remove(struct xenbus_device *dev) { @@ -191,7 +185,7 @@ static void backend_changed(struct xenbu * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state) + enum xenbus_state frontend_state) { struct backend_info *be = dev->data; @@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen kfree(ratestr); } +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) +{ + char *s, *e, *macstr; + int i; + + macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL); + if (IS_ERR(macstr)) + return PTR_ERR(macstr); + + for (i = 0; i < ETH_ALEN; i++) { + mac[i] = simple_strtoul(s, &e, 16); + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { + kfree(macstr); + return -ENOENT; + } + s = e+1; + } + + kfree(macstr); + return 0; +} static void connect(struct backend_info *be) { diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri May 26 13:41:49 2006 -0600 @@ -60,7 +60,6 @@ #include <asm/uaccess.h> #include <xen/interface/grant_table.h> #include <xen/gnttab.h> -#include <xen/net_driver_util.h> #define GRANT_INVALID_REF 0 @@ -88,12 +87,6 @@ struct netfront_info { unsigned int handle; unsigned int evtchn, irq; - - /* What is the status of our connection to the remote backend? */ -#define BEST_CLOSED 0 -#define BEST_DISCONNECTED 1 -#define BEST_CONNECTED 2 - unsigned int backend_state; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 8 @@ -143,14 +136,6 @@ static inline unsigned short get_id_from list[0] = list[id]; return id; } - -#ifdef DEBUG -static const char *be_state_name[] = { - [BEST_CLOSED] = "closed", - [BEST_DISCONNECTED] = "disconnected", - [BEST_CONNECTED] = "connected", -}; -#endif #define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \ __FUNCTION__, __LINE__, ##args) @@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus return talk_to_backend(dev, info); } +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) +{ + char *s, *e, *macstr; + int i; + + macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL); + if (IS_ERR(macstr)) + return PTR_ERR(macstr); + + for (i = 0; i < ETH_ALEN; i++) { + mac[i] = simple_strtoul(s, &e, 16); + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { + kfree(macstr); + return -ENOENT; + } + s = e+1; + } + + kfree(macstr); + return 0; +} /* Common code used when first setting up, and when resuming. */ static int talk_to_backend(struct xenbus_device *dev, @@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de } memset(txs, 0, PAGE_SIZE); memset(rxs, 0, PAGE_SIZE); - info->backend_state = BEST_DISCONNECTED; SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); @@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de * Callback received when the backend's state changes. */ static void backend_changed(struct xenbus_device *dev, - XenbusState backend_state) + enum xenbus_state backend_state) { DPRINTK("\n"); @@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; - if (np->backend_state != BEST_CONNECTED) + if (unlikely(!netif_carrier_ok(dev))) return; do { @@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str struct xen_memory_reservation reservation; grant_ref_t ref; - if (unlikely(np->backend_state != BEST_CONNECTED)) + if (unlikely(!netif_carrier_ok(dev))) return; /* @@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_ spin_lock_irq(&np->tx_lock); - if (np->backend_state != BEST_CONNECTED) { + if (unlikely(!netif_carrier_ok(dev))) { spin_unlock_irq(&np->tx_lock); goto drop; } @@ -748,7 +753,7 @@ static int netif_poll(struct net_device spin_lock(&np->rx_lock); - if (np->backend_state != BEST_CONNECTED) { + if (unlikely(!netif_carrier_ok(dev))) { spin_unlock(&np->rx_lock); return 0; } @@ -1041,7 +1046,7 @@ static void network_connect(struct net_d * domain a kick because we've probably just requeued some * packets. */ - np->backend_state = BEST_CONNECTED; + netif_carrier_on(dev); notify_remote_via_irq(np->irq); network_tx_buf_gc(dev); @@ -1055,7 +1060,7 @@ static void show_device(struct netfront_ if (np) { IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n", np->handle, - be_state_name[np->backend_state], + netif_carrier_ok(np->netdev) ? "on" : "off", netif_running(np->netdev) ? "open" : "closed", np->evtchn, np->tx, @@ -1241,9 +1246,10 @@ static struct net_device * __devinit cre } np = netdev_priv(netdev); - np->backend_state = BEST_CLOSED; np->handle = handle; np->xbdev = dev; + + netif_carrier_off(netdev); spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); @@ -1392,7 +1398,7 @@ static void netif_disconnect_backend(str /* Stop old i/f to prevent errors whilst we rebuild the state. */ spin_lock_irq(&info->tx_lock); spin_lock(&info->rx_lock); - info->backend_state = BEST_DISCONNECTED; + netif_carrier_off(info->netdev); spin_unlock(&info->rx_lock); spin_unlock_irq(&info->tx_lock); diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Fri May 26 13:41:49 2006 -0600 @@ -166,7 +166,7 @@ static int pciback_attach(struct pciback } static void pciback_frontend_changed(struct xenbus_device *xdev, - XenbusState fe_state) + enum xenbus_state fe_state) { struct pciback_device *pdev = xdev->data; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Fri May 26 13:41:49 2006 -0600 @@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc static int pcifront_try_disconnect(struct pcifront_device *pdev) { int err = 0; - XenbusState prev_state; + enum xenbus_state prev_state; spin_lock(&pdev->dev_lock); @@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc } static void pcifront_backend_changed(struct xenbus_device *xdev, - XenbusState be_state) + enum xenbus_state be_state) { struct pcifront_device *pdev = xdev->data; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri May 26 13:41:49 2006 -0600 @@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i break; case IOCTL_PRIVCMD_MMAPBATCH: { -#ifndef __ia64__ - mmu_update_t u; - uint64_t ptep; -#endif privcmd_mmapbatch_t m; struct vm_area_struct *vma = NULL; unsigned long __user *p; @@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) { if (get_user(mfn, p)) return -EFAULT; -#ifdef __ia64__ + ret = direct_remap_pfn_range(vma, addr & PAGE_MASK, - mfn, 1 << PAGE_SHIFT, + mfn, PAGE_SIZE, vma->vm_page_prot, m.dom); if (ret < 0) - goto batch_err; -#else - - ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep); - if (ret) - goto batch_err; - - u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot)); - u.ptr = ptep; - - if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) put_user(0xF0000000 | mfn, p); -#endif } ret = 0; @@ -283,6 +267,9 @@ static int __init privcmd_init(void) set_bit(__HYPERVISOR_mmuext_op, hypercall_permission_map); set_bit(__HYPERVISOR_xen_version, hypercall_permission_map); set_bit(__HYPERVISOR_sched_op, hypercall_permission_map); + set_bit(__HYPERVISOR_sched_op_compat, hypercall_permission_map); + set_bit(__HYPERVISOR_event_channel_op_compat, + hypercall_permission_map); privcmd_intf = create_xen_proc_entry("privcmd", 0400); if (privcmd_intf != NULL) diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Fri May 26 13:41:49 2006 -0600 @@ -34,7 +34,7 @@ struct backend_info /* watch front end for changes */ struct xenbus_watch backend_watch; - XenbusState frontend_state; + enum xenbus_state frontend_state; }; static void maybe_connect(struct backend_info *be); @@ -43,7 +43,7 @@ static void backend_changed(struct xenbu static void backend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len); static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state); + enum xenbus_state frontend_state); static int tpmback_remove(struct xenbus_device *dev) { @@ -129,7 +129,7 @@ static void backend_changed(struct xenbu static void frontend_changed(struct xenbus_device *dev, - XenbusState frontend_state) + enum xenbus_state frontend_state) { struct backend_info *be = dev->data; int err; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Fri May 26 13:41:49 2006 -0600 @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2); EXPORT_SYMBOL_GPL(xenbus_watch_path2); -int xenbus_switch_state(struct xenbus_device *dev, XenbusState state) +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) { /* We check whether the state is currently set to the given value, and if not, then the state is set. We don't want to unconditionally @@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev } -XenbusState xenbus_read_driver_state(const char *path) -{ - XenbusState result; +enum xenbus_state xenbus_read_driver_state(const char *path) +{ + enum xenbus_state result; int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL); if (err) result = XenbusStateClosed; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri May 26 13:41:49 2006 -0600 @@ -284,7 +284,7 @@ static void otherend_changed(struct xenb struct xenbus_device *dev = container_of(watch, struct xenbus_device, otherend_watch); struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); - XenbusState state; + enum xenbus_state state; /* Protect us against watches firing on old details when the otherend details change, say immediately after a resume. */ @@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_ size_t stringlen; char *tmpstring; - XenbusState state = xenbus_read_driver_state(nodename); + enum xenbus_state state = xenbus_read_driver_state(nodename); if (state != XenbusStateInitialising) { /* Device is not new, so ignore it. This can happen if a diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Thu May 25 15:59:18 2006 -0600 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri May 26 13:41:49 2006 -0600 @@ -75,7 +75,7 @@ struct xenbus_device { int otherend_id; struct xenbus_watch otherend_watch; struct device dev; - XenbusState state; + enum xenbus_state state; void *data; }; @@ -98,7 +98,7 @@ struct xenbus_driver { int (*probe)(struct xenbus_device *dev, const struct xenbus_device_id *id); void (*otherend_changed)(struct xenbus_device *dev, - XenbusState backend_state); + enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); @@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev * Return 0 on success, or -errno on error. On error, the device will switch * to XenbusStateClosing, and the error will be saved in the store. */ -int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state); +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); /** @@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev * Return the state of the driver rooted at the given store path, or * XenbusStateClosed if no state can be read. */ -XenbusState xenbus_read_driver_state(const char *path); +enum xenbus_state xenbus_read_driver_state(const char *path); /*** diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/Makefile --- a/tools/libxc/Makefile Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/Makefile Fri May 26 13:41:49 2006 -0600 @@ -20,6 +20,7 @@ SRCS += xc_physdev.c SRCS += xc_physdev.c SRCS += xc_private.c SRCS += xc_sedf.c +SRCS += xc_csched.c SRCS += xc_tbuf.c ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86) diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xc_linux_build.c Fri May 26 13:41:49 2006 -0600 @@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL; l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL; uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab; - unsigned long ppt_alloc, count, nmfn; + unsigned long ppt_alloc, count; /* First allocate page for page dir. */ ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT; - - if ( page_array[ppt_alloc] > 0xfffff ) - { - nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]); - if ( nmfn == 0 ) - { - fprintf(stderr, "Couldn't get a page below 4GB :-(\n"); - goto error_out; - } - page_array[ppt_alloc] = nmfn; - } alloc_pt(l3tab, vl3tab, pl3tab); vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)]; diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xc_linux_restore.c Fri May 26 13:41:49 2006 -0600 @@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int ** A page table page - need to 'uncanonicalize' it, i.e. ** replace all the references to pfns with the corresponding ** mfns for the new domain. - ** - ** On PAE we need to ensure that PGDs are in MFNs < 4G, and - ** so we may need to update the p2m after the main loop. - ** Hence we defer canonicalization of L1s until then. */ - if(pt_levels != 3 || pagetype != L1TAB) { - - if(!uncanonicalize_pagetable(pagetype, page)) { - /* - ** Failing to uncanonicalize a page table can be ok - ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). - */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; - } - + if(!uncanonicalize_pagetable(pagetype, page)) { + /* + ** Failing to uncanonicalize a page table can be ok + ** under live migration since the pages type may have + ** changed by now (and we'll get an update later). + */ + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; } } else if(pagetype != NOTAB) { @@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int } DPRINTF("Received all pages (%d races)\n", nraces); - - if(pt_levels == 3) { - - /* - ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This - ** is a little awkward and involves (a) finding all such PGDs and - ** replacing them with 'lowmem' versions; (b) upating the p2m[] - ** with the new info; and (c) canonicalizing all the L1s using the - ** (potentially updated) p2m[]. - ** - ** This is relatively slow (and currently involves two passes through - ** the pfn_type[] array), but at least seems to be correct. May wish - ** to consider more complex approaches to optimize this later. - */ - - int j, k; - - /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ - for (i = 0; i < max_pfn; i++) { - - if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) { - - unsigned long new_mfn; - uint64_t l3ptes[4]; - uint64_t *l3tab; - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, p2m[i]); - - for(j = 0; j < 4; j++) - l3ptes[j] = l3tab[j]; - - munmap(l3tab, PAGE_SIZE); - - if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { - ERR("Couldn't get a page below 4GB :-("); - goto out; - } - - p2m[i] = new_mfn; - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)new_mfn) - << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE, i)) { - ERR("Couldn't m2p on PAE root pgdir"); - goto out; - } - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, p2m[i]); - - for(j = 0; j < 4; j++) - l3tab[j] = l3ptes[j]; - - munmap(l3tab, PAGE_SIZE); - - } - } - - /* Second pass: find all L1TABs and uncanonicalize them */ - j = 0; - - for(i = 0; i < max_pfn; i++) { - - if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { - region_mfn[j] = p2m[i]; - j++; - } - - if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { - - if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ | PROT_WRITE, - region_mfn, j))) { - ERR("map batch failed"); - goto out; - } - - for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(L1TAB, - region_base + k*PAGE_SIZE)) { - ERR("failed uncanonicalize pt!"); - goto out; - } - } - - munmap(region_base, j*PAGE_SIZE); - j = 0; - } - } - - } if (xc_finish_mmu_updates(xc_handle, mmu)) { diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xc_private.c Fri May 26 13:41:49 2006 -0600 @@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v return rc; } -unsigned long xc_make_page_below_4G( - int xc_handle, uint32_t domid, unsigned long mfn) -{ - unsigned long new_mfn; - - if ( xc_domain_memory_decrease_reservation( - xc_handle, domid, 1, 0, &mfn) != 0 ) - { - fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn); - return 0; - } - - if ( xc_domain_memory_increase_reservation( - xc_handle, domid, 1, 0, 32, &new_mfn) != 0 ) - { - fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn); - return 0; - } - - return new_mfn; -} - /* * Local variables: * mode: C diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xc_ptrace.c Fri May 26 13:41:49 2006 -0600 @@ -185,7 +185,7 @@ map_domain_va_32( void *guest_va, int perm) { - unsigned long l1p, p, va = (unsigned long)guest_va; + unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va; uint32_t *l2, *l1; static void *v[MAX_VIRT_CPUS]; @@ -194,18 +194,20 @@ map_domain_va_32( if ( l2 == NULL ) return NULL; - l1p = to_ma(cpu, l2[l2_table_offset(va)]); + l2e = l2[l2_table_offset_i386(va)]; munmap(l2, PAGE_SIZE); - if ( !(l1p & _PAGE_PRESENT) ) - return NULL; + if ( !(l2e & _PAGE_PRESENT) ) + return NULL; + l1p = to_ma(cpu, l2e); l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT); if ( l1 == NULL ) return NULL; - p = to_ma(cpu, l1[l1_table_offset(va)]); + l1e = l1[l1_table_offset_i386(va)]; munmap(l1, PAGE_SIZE); - if ( !(p & _PAGE_PRESENT) ) - return NULL; + if ( !(l1e & _PAGE_PRESENT) ) + return NULL; + p = to_ma(cpu, l1e); if ( v[cpu] != NULL ) munmap(v[cpu], PAGE_SIZE); v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); @@ -223,7 +225,7 @@ map_domain_va_pae( void *guest_va, int perm) { - unsigned long l2p, l1p, p, va = (unsigned long)guest_va; + unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va; uint64_t *l3, *l2, *l1; static void *v[MAX_VIRT_CPUS]; @@ -232,26 +234,29 @@ map_domain_va_pae( if ( l3 == NULL ) return NULL; - l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]); + l3e = l3[l3_table_offset_pae(va)]; munmap(l3, PAGE_SIZE); - if ( !(l2p & _PAGE_PRESENT) ) - return NULL; + if ( !(l3e & _PAGE_PRESENT) ) + return NULL; + l2p = to_ma(cpu, l3e); l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT); if ( l2 == NULL ) return NULL; - l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]); + l2e = l2[l2_table_offset_pae(va)]; munmap(l2, PAGE_SIZE); - if ( !(l1p & _PAGE_PRESENT) ) - return NULL; + if ( !(l2e & _PAGE_PRESENT) ) + return NULL; + l1p = to_ma(cpu, l2e); l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p >> PAGE_SHIFT); if ( l1 == NULL ) return NULL; - p = to_ma(cpu, l1[l1_table_offset_pae(va)]); + l1e = l1[l1_table_offset_pae(va)]; munmap(l1, PAGE_SIZE); - if ( !(p & _PAGE_PRESENT) ) - return NULL; + if ( !(l1e & _PAGE_PRESENT) ) + return NULL; + p = to_ma(cpu, l1e); if ( v[cpu] != NULL ) munmap(v[cpu], PAGE_SIZE); v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); @@ -269,9 +274,10 @@ map_domain_va_64( void *guest_va, int perm) { - unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va; + unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned long)guest_va; uint64_t *l4, *l3, *l2, *l1; static void *v[MAX_VIRT_CPUS]; + if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ return map_domain_va_32(xc_handle, cpu, guest_va, perm); @@ -281,40 +287,41 @@ map_domain_va_64( if ( l4 == NULL ) return NULL; - l3p = to_ma(cpu, l4[l4_table_offset(va)]); + l4e = l4[l4_table_offset(va)]; munmap(l4, PAGE_SIZE); - if ( !(l3p & _PAGE_PRESENT) ) - return NULL; + if ( !(l4e & _PAGE_PRESENT) ) + return NULL; + l3p = to_ma(cpu, l4e); l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l3p >> PAGE_SHIFT); if ( l3 == NULL ) return NULL; - l2p = to_ma(cpu, l3[l3_table_offset(va)]); + l3e = l3[l3_table_offset(va)]; munmap(l3, PAGE_SIZE); - if ( !(l2p & _PAGE_PRESENT) ) - return NULL; + if ( !(l3e & _PAGE_PRESENT) ) + return NULL; + l2p = to_ma(cpu, l3e); l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT); if ( l2 == NULL ) return NULL; l1 = NULL; - l1e = to_ma(cpu, l2[l2_table_offset(va)]); - if ( !(l1e & _PAGE_PRESENT) ) - { - munmap(l2, PAGE_SIZE); - return NULL; - } - l1p = l1e >> PAGE_SHIFT; - if (l1e & 0x80) { /* 2M pages */ + l2e = l2[l2_table_offset(va)]; + munmap(l2, PAGE_SIZE); + if ( !(l2e & _PAGE_PRESENT) ) + return NULL; + l1p = to_ma(cpu, l2e); + if (l2e & 0x80) { /* 2M pages */ p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT); } else { /* 4K pages */ - l1p = to_ma(cpu, l1p); l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p >> PAGE_SHIFT); - munmap(l2, PAGE_SIZE); if ( l1 == NULL ) return NULL; - p = to_ma(cpu, l1[l1_table_offset(va)]); + l1e = l1[l1_table_offset(va)]; + if ( !(l1e & _PAGE_PRESENT) ) + return NULL; + p = to_ma(cpu, l1e); } if ( v[cpu] != NULL ) munmap(v[cpu], PAGE_SIZE); diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.h --- a/tools/libxc/xc_ptrace.h Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xc_ptrace.h Fri May 26 13:41:49 2006 -0600 @@ -7,7 +7,6 @@ #define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */ #define X86_CR0_PG 0x80000000 /* Paging (RW) */ #define BSD_PAGE_MASK (PAGE_SIZE-1) -#define PDRSHIFT 22 #define PSL_T 0x00000100 /* trace enable bit */ #ifdef __x86_64__ @@ -162,8 +161,6 @@ struct gdb_regs { #endif #define printval(x) printf("%s = %lx\n", #x, (long)x); -#define vtopdi(va) ((va) >> PDRSHIFT) -#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff) #endif typedef void (*thr_ev_handler_t)(long); diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace_core.c --- a/tools/libxc/xc_ptrace_core.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xc_ptrace_core.c Fri May 26 13:41:49 2006 -0600 @@ -3,6 +3,7 @@ #include <sys/ptrace.h> #include <sys/wait.h> #include "xc_private.h" +#include "xg_private.h" #include "xc_ptrace.h" #include <time.h> @@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd, } cr3_virt[cpu] = v; } - if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */ + if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical address */ return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT; @@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd, return NULL; pde_virt[cpu] = v; } - if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */ + if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical address */ return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT; @@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd, map_mtop_offset(page_phys[cpu])); if (v == MAP_FAILED) { - printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, vtopti(va)); + printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, l1_table_offset_i386(va)); page_phys[cpu] = 0; return NULL; } diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_tbuf.c --- a/tools/libxc/xc_tbuf.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xc_tbuf.c Fri May 26 13:41:49 2006 -0600 @@ -18,53 +18,57 @@ static int tbuf_enable(int xc_handle, int enable) { - DECLARE_DOM0_OP; + DECLARE_DOM0_OP; - op.cmd = DOM0_TBUFCONTROL; - op.interface_version = DOM0_INTERFACE_VERSION; - if (enable) - op.u.tbufcontrol.op = DOM0_TBUF_ENABLE; - else - op.u.tbufcontrol.op = DOM0_TBUF_DISABLE; + op.cmd = DOM0_TBUFCONTROL; + op.interface_version = DOM0_INTERFACE_VERSION; + if (enable) + op.u.tbufcontrol.op = DOM0_TBUF_ENABLE; + else + op.u.tbufcontrol.op = DOM0_TBUF_DISABLE; - return xc_dom0_op(xc_handle, &op); + return xc_dom0_op(xc_handle, &op); } int xc_tbuf_set_size(int xc_handle, unsigned long size) { - DECLARE_DOM0_OP; + DECLARE_DOM0_OP; - op.cmd = DOM0_TBUFCONTROL; - op.interface_version = DOM0_INTERFACE_VERSION; - op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE; - op.u.tbufcontrol.size = size; + op.cmd = DOM0_TBUFCONTROL; + op.interface_version = DOM0_INTERFACE_VERSION; + op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE; + op.u.tbufcontrol.size = size; - return xc_dom0_op(xc_handle, &op); + return xc_dom0_op(xc_handle, &op); } int xc_tbuf_get_size(int xc_handle, unsigned long *size) { - int rc; - DECLARE_DOM0_OP; + int rc; + DECLARE_DOM0_OP; - op.cmd = DOM0_TBUFCONTROL; - op.interface_version = DOM0_INTERFACE_VERSION; - op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO; + op.cmd = DOM0_TBUFCONTROL; + op.interface_version = DOM0_INTERFACE_VERSION; + op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO; - rc = xc_dom0_op(xc_handle, &op); - if (rc == 0) - *size = op.u.tbufcontrol.size; - return rc; + rc = xc_dom0_op(xc_handle, &op); + if (rc == 0) + *size = op.u.tbufcontrol.size; + return rc; } int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn, - unsigned long *size) + unsigned long *size) { DECLARE_DOM0_OP; int rc; - if ( xc_tbuf_set_size(xc_handle, cnt) != 0 ) - return -1; + /* + * Ignore errors (at least for now) as we get an error if size is already + * set (since trace buffers cannot be reallocated). If we really have no + * buffers at all then tbuf_enable() will fail, so this is safe. + */ + (void)xc_tbuf_set_size(xc_handle, cnt); if ( tbuf_enable(xc_handle, 1) != 0 ) return -1; diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xenctrl.h Fri May 26 13:41:49 2006 -0600 @@ -354,6 +354,14 @@ int xc_sedf_domain_get(int xc_handle, uint64_t *latency, uint16_t *extratime, uint16_t *weight); +int xc_csched_domain_set(int xc_handle, + uint32_t domid, + struct csched_domain *sdom); + +int xc_csched_domain_get(int xc_handle, + uint32_t domid, + struct csched_domain *sdom); + typedef evtchn_status_t xc_evtchn_status_t; /* @@ -444,9 +452,6 @@ int xc_domain_iomem_permission(int xc_ha unsigned long first_mfn, unsigned long nr_mfns, uint8_t allow_access); - -unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, - unsigned long mfn); typedef dom0_perfc_desc_t xc_perfc_desc_t; /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */ diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Thu May 25 15:59:18 2006 -0600 +++ b/tools/libxc/xg_private.h Fri May 26 13:41:49 2006 -0600 @@ -48,6 +48,8 @@ unsigned long csum_page (void * page); #define L2_PAGETABLE_SHIFT_PAE 21 #define L3_PAGETABLE_SHIFT_PAE 30 +#define L2_PAGETABLE_SHIFT_I386 22 + #if defined(__i386__) #define L1_PAGETABLE_SHIFT 12 #define L2_PAGETABLE_SHIFT 22 @@ -61,6 +63,9 @@ unsigned long csum_page (void * page); #define L1_PAGETABLE_ENTRIES_PAE 512 #define L2_PAGETABLE_ENTRIES_PAE 512 #define L3_PAGETABLE_ENTRIES_PAE 4 + +#define L1_PAGETABLE_ENTRIES_I386 1024 +#define L2_PAGETABLE_ENTRIES_I386 1024 #if defined(__i386__) #define L1_PAGETABLE_ENTRIES 1024 @@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t; #define l3_table_offset_pae(_a) \ (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1)) +#define l1_table_offset_i386(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1)) +#define l2_table_offset_i386(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1)) + #if defined(__i386__) #define l1_table_offset(_a) \ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri May 26 13:41:49 2006 -0600 @@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc "weight", weight); } +static PyObject *pyxc_csched_domain_set(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t domid; + uint16_t weight; + uint16_t cap; + static char *kwd_list[] = { "dom", "weight", "cap", NULL }; + static char kwd_type[] = "I|HH"; + struct csched_domain sdom; + + weight = 0; + cap = (uint16_t)~0U; + if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, + &domid, &weight, &cap) ) + return NULL; + + sdom.weight = weight; + sdom.cap = cap; + + if ( xc_csched_domain_set(self->xc_handle, domid, &sdom) != 0 ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_csched_domain_get(XcObject *self, PyObject *args) +{ + uint32_t domid; + struct csched_domain sdom; + + if( !PyArg_ParseTuple(args, "I", &domid) ) + return NULL; + + if ( xc_csched_domain_get(self->xc_handle, domid, &sdom) != 0 ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("{s:H,s:H}", + "weight", sdom.weight, + "cap", sdom.cap); +} + static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args) { uint32_t dom; @@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = { " slice [long]: CPU reservation per period\n" " latency [long]: domain's wakeup latency hint\n" " extratime [int]: domain aware of extratime?\n"}, + + { "csched_domain_set", + (PyCFunction)pyxc_csched_domain_set, + METH_KEYWORDS, "\n" + "Set the scheduling parameters for a domain when running with the\n" + "SMP credit scheduler.\n" + " domid [int]: domain id to set\n" + " weight [short]: domain's scheduling weight\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "csched_domain_get", + (PyCFunction)pyxc_csched_domain_get, + METH_VARARGS, "\n" + "Get the scheduling parameters for a domain when running with the\n" + "SMP credit scheduler.\n" + " domid [int]: domain id to get\n" + "Returns: [dict]\n" + " weight [short]: domain's scheduling weight\n"}, { "evtchn_alloc_unbound", (PyCFunction)pyxc_evtchn_alloc_unbound, diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/lowlevel/xs/xs.c Fri May 26 13:41:49 2006 -0600 @@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs if (perms) { PyObject *val = PyList_New(perms_n); - for (i = 0; i < perms_n; i++, perms++) { - PyObject *p = Py_BuildValue("{s:i,s:i,s:i}", - "dom", perms->id, - "read", perms->perms & XS_PERM_READ, - "write",perms->perms & XS_PERM_WRITE); + for (i = 0; i < perms_n; i++) { + PyObject *p = + Py_BuildValue("{s:i,s:i,s:i}", + "dom", perms[i].id, + "read", perms[i].perms & XS_PERM_READ, + "write", perms[i].perms & XS_PERM_WRITE); PyList_SetItem(val, i, p); } diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/xend/XendDomain.py Fri May 26 13:41:49 2006 -0600 @@ -522,6 +522,28 @@ class XendDomain: except Exception, ex: raise XendError(str(ex)) + def domain_csched_get(self, domid): + """Get credit scheduler parameters for a domain. + """ + dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + return xc.csched_domain_get(dominfo.getDomid()) + except Exception, ex: + raise XendError(str(ex)) + + def domain_csched_set(self, domid, weight, cap): + """Set credit scheduler parameters for a domain. + """ + dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + return xc.csched_domain_set(dominfo.getDomid(), weight, cap) + except Exception, ex: + raise XendError(str(ex)) + def domain_maxmem_set(self, domid, mem): """Set the memory limit for a domain. diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri May 26 13:41:49 2006 -0600 @@ -701,6 +701,16 @@ class XendDomainInfo: log.debug("Storing VM details: %s", to_store) self.writeVm(to_store) + self.setVmPermissions() + + + def setVmPermissions(self): + """Allow the guest domain to read its UUID. We don't allow it to + access any other entry, for security.""" + xstransact.SetPermissions('%s/uuid' % self.vmpath, + { 'dom' : self.domid, + 'read' : True, + 'write' : False }) def storeDomDetails(self): @@ -1535,6 +1545,10 @@ class XendDomainInfo: self.configure_bootloader() config = self.sxpr() + + if self.infoIsSet('cpus') and len(self.info['cpus']) != 0: + config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y), + self.info['cpus'])]) if self.readVm(RESTART_IN_PROGRESS): log.error('Xend failed during restart of domain %d. ' diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/xend/balloon.py Fri May 26 13:41:49 2006 -0600 @@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB. We need th BALLOON_OUT_SLACK = 1 # MiB. We need this because the physinfo details are # rounded. RETRY_LIMIT = 10 +RETRY_LIMIT_INCR = 5 ## # The time to sleep between retries grows linearly, using this value (in # seconds). When the system is lightly loaded, memory should be scrubbed and @@ -118,7 +119,8 @@ def free(required): retries = 0 sleep_time = SLEEP_TIME_GROWTH last_new_alloc = None - while retries < RETRY_LIMIT: + rlimit = RETRY_LIMIT + while retries < rlimit: free_mem = xc.physinfo()['free_memory'] if free_mem >= need_mem: @@ -127,7 +129,9 @@ def free(required): return if retries == 0: - log.debug("Balloon: free %d; need %d.", free_mem, need_mem) + rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR + log.debug("Balloon: free %d; need %d; retries: %d.", + free_mem, need_mem, rlimit) if dom0_min_mem > 0: dom0_alloc = get_dom0_current_alloc() @@ -143,8 +147,9 @@ def free(required): # Continue to retry, waiting for ballooning. time.sleep(sleep_time) + if retries < 2 * RETRY_LIMIT: + sleep_time += SLEEP_TIME_GROWTH retries += 1 - sleep_time += SLEEP_TIME_GROWTH # Not enough memory; diagnose the problem. if dom0_min_mem == 0: diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/xend/server/SrvDomain.py Fri May 26 13:41:49 2006 -0600 @@ -129,6 +129,20 @@ class SrvDomain(SrvDir): ['latency', 'int'], ['extratime', 'int'], ['weight', 'int']]) + val = fn(req.args, {'dom': self.dom.domid}) + return val + + def op_domain_csched_get(self, _, req): + fn = FormFn(self.xd.domain_csched_get, + [['dom', 'int']]) + val = fn(req.args, {'dom': self.dom.domid}) + return val + + + def op_domain_csched_set(self, _, req): + fn = FormFn(self.xd.domain_csched_set, + [['dom', 'int'], + ['weight', 'int']]) val = fn(req.args, {'dom': self.dom.domid}) return val diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/xend/xenstore/xstransact.py Fri May 26 13:41:49 2006 -0600 @@ -221,6 +221,34 @@ class xstransact: xshandle().mkdir(self.transaction, self.prependPath(key)) + def get_permissions(self, *args): + """If no arguments are given, return the permissions at this + transaction's path. If one argument is given, treat that argument as + a subpath to this transaction's path, and return the permissions at + that path. Otherwise, treat each argument as a subpath to this + transaction's path, and return a list composed of the permissions at + each of those instead. + """ + if len(args) == 0: + return xshandle().get_permissions(self.transaction, self.path) + if len(args) == 1: + return self._get_permissions(args[0]) + ret = [] + for key in args: + ret.append(self._get_permissions(key)) + return ret + + + def _get_permissions(self, key): + path = self.prependPath(key) + try: + return xshandle().get_permissions(self.transaction, path) + except RuntimeError, ex: + raise RuntimeError(ex.args[0], + '%s, while getting permissions from %s' % + (ex.args[1], path)) + + def set_permissions(self, *args): if len(args) == 0: raise TypeError diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu May 25 15:59:18 2006 -0600 +++ b/tools/python/xen/xm/main.py Fri May 26 13:41:49 2006 -0600 @@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT specifies another way of setting a domain's\n\ cpu period/slice." +csched_help = "csched Set or get credit scheduler parameters" block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode> [BackDomId] Create a new virtual block device""" block_detach_help = """block-detach <DomId> <DevId> Destroy a domain's virtual block device, @@ -174,6 +175,7 @@ host_commands = [ ] scheduler_commands = [ + "csched", "sched-bvt", "sched-bvt-ctxallow", "sched-sedf", @@ -735,6 +737,48 @@ def xm_sched_sedf(args): else: print_sedf(sedf_info) +def xm_csched(args): + usage_msg = """Csched: Set or get credit scheduler parameters + Usage: + + csched -d domain [-w weight] [-c cap] + """ + try: + opts, args = getopt.getopt(args[0:], "d:w:c:", + ["domain=", "weight=", "cap="]) + except getopt.GetoptError: + # print help information and exit: + print usage_msg + sys.exit(1) + + domain = None + weight = None + cap = None + + for o, a in opts: + if o == "-d": + domain = a + elif o == "-w": + weight = int(a) + elif o == "-c": + cap = int(a); + + if domain is None: + # place holder for system-wide scheduler parameters + print usage_msg + sys.exit(1) + + if weight is None and cap is None: + print server.xend.domain.csched_get(domain) + else: + if weight is None: + weight = int(0) + if cap is None: + cap = int(~0) + + err = server.xend.domain.csched_set(domain, weight, cap) + if err != 0: + print err def xm_info(args): arg_check(args, "info", 0) @@ -1032,6 +1076,7 @@ commands = { "sched-bvt": xm_sched_bvt, "sched-bvt-ctxallow": xm_sched_bvt_ctxallow, "sched-sedf": xm_sched_sedf, + "csched": xm_csched, # block "block-attach": xm_block_attach, "block-detach": xm_block_detach, diff -r 9d52a66c7499 -r c073ebdbde8c tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/tests/test_x86_emulator.c Fri May 26 13:41:49 2006 -0600 @@ -17,7 +17,8 @@ static int read_any( static int read_any( unsigned long addr, unsigned long *val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { switch ( bytes ) { @@ -32,7 +33,8 @@ static int write_any( static int write_any( unsigned long addr, unsigned long val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { switch ( bytes ) { @@ -48,7 +50,8 @@ static int cmpxchg_any( unsigned long addr, unsigned long old, unsigned long new, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { switch ( bytes ) { @@ -65,34 +68,38 @@ static int cmpxchg8b_any( unsigned long old_lo, unsigned long old_hi, unsigned long new_lo, - unsigned long new_hi) + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt) { ((unsigned long *)addr)[0] = new_lo; ((unsigned long *)addr)[1] = new_hi; return X86EMUL_CONTINUE; } -static struct x86_mem_emulator emulops = { +static struct x86_emulate_ops emulops = { read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any }; int main(int argc, char **argv) { + struct x86_emulate_ctxt ctxt; struct cpu_user_regs regs; char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ unsigned int res = 0x7FFFFFFF; u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 }; - unsigned long cr2; int rc; + + ctxt.regs = ®s; + ctxt.mode = X86EMUL_MODE_PROT32; printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; + ctxt.cr2 = (unsigned long)&res; res = 0x7FFFFFFF; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x92345677) || (regs.eflags != 0xa94) || @@ -109,8 +116,8 @@ int main(int argc, char **argv) #else regs.ecx = 0x12345678UL; #endif - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x92345677) || (regs.ecx != 0x8000000FUL) || @@ -124,8 +131,8 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0x92345677UL; regs.ecx = 0xAA; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x923456AA) || (regs.eflags != 0x244) || @@ -140,8 +147,8 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0xAABBCC77UL; regs.ecx = 0xFF; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x923456AA) || ((regs.eflags&0x240) != 0x200) || @@ -156,8 +163,8 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x12345678) || (regs.eflags != 0x200) || @@ -173,8 +180,8 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0x923456AAUL; regs.ecx = 0xDDEEFF00L; - cr2 = (unsigned long)&res; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = (unsigned long)&res; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0xDDEEFF00) || (regs.eflags != 0x244) || @@ -192,8 +199,8 @@ int main(int argc, char **argv) regs.esi = (unsigned long)&res + 0; regs.edi = (unsigned long)&res + 2; regs.error_code = 0; /* read fault */ - cr2 = regs.esi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.esi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x44554455) || (regs.eflags != 0x200) || @@ -210,8 +217,8 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)&res; - cr2 = regs.edi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.edi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x2233445D) || ((regs.eflags&0x201) != 0x201) || @@ -228,8 +235,8 @@ int main(int argc, char **argv) regs.ecx = 0xCCCCFFFF; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)cmpxchg8b_res; - cr2 = regs.edi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.edi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (cmpxchg8b_res[0] != 0x9999AAAA) || (cmpxchg8b_res[1] != 0xCCCCFFFF) || @@ -242,8 +249,8 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)cmpxchg8b_res; - cr2 = regs.edi; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + ctxt.cr2 = regs.edi; + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (cmpxchg8b_res[0] != 0x9999AAAA) || (cmpxchg8b_res[1] != 0xCCCCFFFF) || @@ -258,9 +265,9 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; + ctxt.cr2 = (unsigned long)&res; res = 0x82; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x82) || (regs.ecx != 0xFFFFFF82) || @@ -273,9 +280,9 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - cr2 = (unsigned long)&res; + ctxt.cr2 = (unsigned long)&res; res = 0x1234aa82; - rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32); + rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || (res != 0x1234aa82) || (regs.ecx != 0xaa82) || diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/Makefile --- a/tools/xenstore/Makefile Thu May 25 15:59:18 2006 -0600 +++ b/tools/xenstore/Makefile Fri May 26 13:41:49 2006 -0600 @@ -27,6 +27,12 @@ CLIENTS += xenstore-write CLIENTS += xenstore-write CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS)) +XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o + +XENSTORED_Linux = xenstored_linux.o + +XENSTORED_OBJS += $(XENSTORED_$(OS)) + .PHONY: all all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls @@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte .PHONY: testcode testcode: xs_test xenstored_test xs_random -xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o +xenstored: $(XENSTORED_OBJS) $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@ $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/xenstore/xenstored_core.c Fri May 26 13:41:49 2006 -0600 @@ -451,6 +451,11 @@ static struct node *read_node(struct con static bool write_node(struct connection *conn, const struct node *node) { + /* + * conn will be null when this is called from manual_node. + * tdb_context copes with this. + */ + TDB_DATA key, data; void *p; @@ -478,7 +483,7 @@ static bool write_node(struct connection /* TDB should set errno, but doesn't even set ecode AFAICT. */ if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) { - corrupt(conn, "Write of %s = %s failed", key, data); + corrupt(conn, "Write of %s failed", key.dptr); goto error; } return true; diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.h --- a/tools/xenstore/xenstored_core.h Thu May 25 15:59:18 2006 -0600 +++ b/tools/xenstore/xenstored_core.h Fri May 26 13:41:49 2006 -0600 @@ -19,6 +19,8 @@ #ifndef _XENSTORED_CORE_H #define _XENSTORED_CORE_H + +#include <xenctrl.h> #include <sys/types.h> #include <dirent.h> @@ -163,6 +165,12 @@ void trace(const char *fmt, ...); extern int event_fd; +/* Map the kernel's xenstore page. */ +void *xenbus_map(void); + +/* Return the event channel used by xenbus. */ +evtchn_port_t xenbus_evtchn(void); + #endif /* _XENSTORED_CORE_H */ /* diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Thu May 25 15:59:18 2006 -0600 +++ b/tools/xenstore/xenstored_domain.c Fri May 26 13:41:49 2006 -0600 @@ -33,12 +33,11 @@ #include "talloc.h" #include "xenstored_core.h" #include "xenstored_domain.h" -#include "xenstored_proc.h" #include "xenstored_watch.h" #include "xenstored_test.h" #include <xenctrl.h> -#include <xen/linux/evtchn.h> +#include <xen/sys/evtchn.h> static int *xc_handle; static evtchn_port_t virq_port; @@ -476,44 +475,24 @@ void restore_existing_connections(void) static int dom0_init(void) { - int rc, fd; - evtchn_port_t port; - char str[20]; - struct domain *dom0; - - fd = open(XENSTORED_PROC_PORT, O_RDONLY); - if (fd == -1) + evtchn_port_t port; + struct domain *dom0; + + port = xenbus_evtchn(); + if (port == -1) return -1; - rc = read(fd, str, sizeof(str)); - if (rc == -1) - goto outfd; - str[rc] = '\0'; - port = strtoul(str, NULL, 0); - - close(fd); - dom0 = new_domain(NULL, 0, port); - fd = open(XENSTORED_PROC_KVA, O_RDWR); - if (fd == -1) + dom0->interface = xenbus_map(); + if (dom0->interface == NULL) return -1; - dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, - MAP_SHARED, fd, 0); - if (dom0->interface == MAP_FAILED) - goto outfd; - - close(fd); - talloc_steal(dom0->conn, dom0); evtchn_notify(dom0->port); return 0; -outfd: - close(fd); - return -1; } diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/domain_build.c Fri May 26 13:41:49 2006 -0600 @@ -367,7 +367,10 @@ int construct_dom0(struct domain *d, if ( (1UL << order) > nr_pages ) panic("Domain 0 allocation is too small for kernel image.\n"); - /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */ + /* + * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1 + * mapping covers the allocation. + */ if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL ) panic("Not enough RAM for domain 0 allocation.\n"); alloc_spfn = page_to_mfn(page); diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/hvm.c Fri May 26 13:41:49 2006 -0600 @@ -185,8 +185,9 @@ void hvm_setup_platform(struct domain* d void hvm_setup_platform(struct domain* d) { struct hvm_domain *platform; - - if ( !hvm_guest(current) || (current->vcpu_id != 0) ) + struct vcpu *v=current; + + if ( !hvm_guest(v) || (v->vcpu_id != 0) ) return; if ( shadow_direct_map_init(d) == 0 ) @@ -208,7 +209,8 @@ void hvm_setup_platform(struct domain* d hvm_vioapic_init(d); } - pit_init(&platform->vpit, current); + init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, v->processor); + pit_init(v, cpu_khz); } void pic_irq_request(void *data, int level) @@ -238,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v) } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs ); do_pic_irqs(pic, irqs); } +} + +u64 hvm_get_guest_time(struct vcpu *v) +{ + u64 host_tsc; + + rdtscll(host_tsc); + return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset; } int cpu_get_interrupt(struct vcpu *v, int *type) diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/i8254.c --- a/xen/arch/x86/hvm/i8254.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/i8254.c Fri May 26 13:41:49 2006 -0600 @@ -22,11 +22,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -/* Edwin Zhai <edwin.zhai@xxxxxxxxx> +/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx> * Ported to xen: - * use actimer for intr generation; + * Add a new layer of periodic time on top of PIT; * move speaker io access to hypervisor; - * use new method for counter/intrs calculation */ #include <xen/config.h> @@ -42,184 +41,117 @@ #include <asm/hvm/vpit.h> #include <asm/current.h> -/*#define DEBUG_PIT*/ +/* Enable DEBUG_PIT may cause guest calibration inaccuracy */ +/* #define DEBUG_PIT */ #define RW_STATE_LSB 1 #define RW_STATE_MSB 2 #define RW_STATE_WORD0 3 #define RW_STATE_WORD1 4 -#ifndef NSEC_PER_SEC -#define NSEC_PER_SEC (1000000000ULL) -#endif - -#ifndef TIMER_SLOP -#define TIMER_SLOP (50*1000) /* ns */ -#endif - -static void pit_irq_timer_update(PITChannelState *s, s64 current_time); - -s_time_t hvm_get_clock(void) -{ - /* TODO: add pause/unpause support */ - return NOW(); +#define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency) +static int handle_pit_io(ioreq_t *p); +static int handle_speaker_io(ioreq_t *p); + +/* compute with 96 bit intermediate result: (a*b)/c */ +uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) +{ + union { + uint64_t ll; + struct { +#ifdef WORDS_BIGENDIAN + uint32_t high, low; +#else + uint32_t low, high; +#endif + } l; + } u, res; + uint64_t rl, rh; + + u.ll = a; + rl = (uint64_t)u.l.low * (uint64_t)b; + rh = (uint64_t)u.l.high * (uint64_t)b; + rh += (rl >> 32); + res.l.high = rh / c; + res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; + return res.ll; +} + +/* + * get processor time. + * unit: TSC + */ +int64_t hvm_get_clock(struct vcpu *v) +{ + uint64_t gtsc; + gtsc = hvm_get_guest_time(v); + return gtsc; } static int pit_get_count(PITChannelState *s) { - u64 d; - u64 counter; - - d = hvm_get_clock() - s->count_load_time; + uint64_t d; + int counter; + + d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu)); switch(s->mode) { case 0: case 1: case 4: case 5: - counter = (s->period - d) & 0xffff; + counter = (s->count - d) & 0xffff; break; case 3: /* XXX: may be incorrect for odd counts */ - counter = s->period - ((2 * d) % s->period); + counter = s->count - ((2 * d) % s->count); break; default: - /* mod 2 counter handle */ - d = hvm_get_clock() - s->hvm_time->count_point; - d += s->hvm_time->count_advance; - counter = s->period - (d % s->period); - break; - } - /* change from ns to pit counter */ - counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC); + counter = s->count - (d % s->count); + break; + } return counter; } /* get pit output bit */ -static int pit_get_out1(PITChannelState *s, s64 current_time) -{ - u64 d; +static int pit_get_out1(PITChannelState *s, int64_t current_time) +{ + uint64_t d; int out; - d = current_time - s->count_load_time; + d = muldiv64(current_time - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu)); switch(s->mode) { default: case 0: - out = (d >= s->period); + out = (d >= s->count); break; case 1: - out = (d < s->period); + out = (d < s->count); break; case 2: - /* mod2 out is no meaning, since intr are generated in background */ - if ((d % s->period) == 0 && d != 0) + if ((d % s->count) == 0 && d != 0) out = 1; else out = 0; break; case 3: - out = (d % s->period) < ((s->period + 1) >> 1); + out = (d % s->count) < ((s->count + 1) >> 1); break; case 4: case 5: - out = (d == s->period); + out = (d == s->count); break; } return out; } -int pit_get_out(hvm_virpit *pit, int channel, s64 current_time) +int pit_get_out(PITState *pit, int channel, int64_t current_time) { PITChannelState *s = &pit->channels[channel]; return pit_get_out1(s, current_time); } -static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time) -{ - struct hvm_time_info *hvm_time = s->hvm_time; - struct domain *d = (void *) s - - offsetof(struct domain, arch.hvm_domain.vpit.channels[0]); - - /* ticks from current time(expected time) to NOW */ - int missed_ticks; - /* current_time is expected time for next intr, check if it's true - * (actimer has a TIMER_SLOP in advance) - */ - s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time; - - if (missed_time >= 0) { - missed_ticks = missed_time/(s_time_t)s->period + 1; - if (test_bit(_DOMF_debugging, &d->domain_flags)) { - hvm_time->pending_intr_nr++; - } else { - hvm_time->pending_intr_nr += missed_ticks; - } - s->next_transition_time = current_time + (missed_ticks ) * s->period; - } - - return s->next_transition_time; -} - -/* only rearm the actimer when return value > 0 - * -2: init state - * -1: the mode has expired - * 0: current VCPU is not running - * >0: the next fired time - */ -s64 pit_get_next_transition_time(PITChannelState *s, - s64 current_time) -{ - s64 d, next_time, base; - int period2; - struct hvm_time_info *hvm_time = s->hvm_time; - - d = current_time - s->count_load_time; - switch(s->mode) { - default: - case 0: - case 1: - if (d < s->period) - next_time = s->period; - else - return -1; - break; - case 2: - next_time = missed_ticks(s, current_time); - if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) ) - return 0; - break; - case 3: - base = (d / s->period) * s->period; - period2 = ((s->period + 1) >> 1); - if ((d - base) < period2) - next_time = base + period2; - else - next_time = base + s->period; - break; - case 4: - case 5: - if (d < s->period) - next_time = s->period; - else if (d == s->period) - next_time = s->period + 1; - else - return -1; - break; - case 0xff: - return -2; /* for init state */ - break; - } - /* XXX: better solution: use a clock at PIT_FREQ Hz */ - if (next_time <= current_time){ -#ifdef DEBUG_PIT - printk("HVM_PIT:next_time <= current_time. next=0x%llx, current=0x%llx!\n",next_time, current_time); -#endif - next_time = current_time + 1; - } - return next_time; -} - /* val must be 0 or 1 */ -void pit_set_gate(hvm_virpit *pit, int channel, int val) +void pit_set_gate(PITState *pit, int channel, int val) { PITChannelState *s = &pit->channels[channel]; @@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c case 5: if (s->gate < val) { /* restart counting on rising edge */ - s->count_load_time = hvm_get_clock(); - pit_irq_timer_update(s, s->count_load_time); + s->count_load_time = hvm_get_clock(s->vcpu); +// pit_irq_timer_update(s, s->count_load_time); } break; case 2: case 3: if (s->gate < val) { /* restart counting on rising edge */ - s->count_load_time = hvm_get_clock(); - pit_irq_timer_update(s, s->count_load_time); + s->count_load_time = hvm_get_clock(s->vcpu); +// pit_irq_timer_update(s, s->count_load_time); } /* XXX: disable/enable counting */ break; @@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c s->gate = val; } -int pit_get_gate(hvm_virpit *pit, int channel) +int pit_get_gate(PITState *pit, int channel) { PITChannelState *s = &pit->channels[channel]; return s->gate; @@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch static inline void pit_load_count(PITChannelState *s, int val) { + u32 period; if (val == 0) val = 0x10000; - - s->count_load_time = hvm_get_clock(); + s->count_load_time = hvm_get_clock(s->vcpu); s->count = val; - s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ); + period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ); #ifdef DEBUG_PIT - printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d, load_time=%lld\n", + printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d, load_time=%lld\n", + s, val, - s->period / 1000, + period, s->mode, - s->count_load_time); + (long long)s->count_load_time); #endif - if (s->mode == HVM_PIT_ACCEL_MODE) { - if (!s->hvm_time) { - printk("HVM_PIT:guest should only set mod 2 on channel 0!\n"); - return; - } - s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L; - s->hvm_time->first_injected = 0; - - if (s->period < 900000) { /* < 0.9 ms */ - printk("HVM_PIT: guest programmed too small an count: %x\n", - s->count); - s->period = 1000000; - } - } - - pit_irq_timer_update(s, s->count_load_time); + switch (s->mode) { + case 2: + /* create periodic time */ + s->pt = create_periodic_time (s->vcpu, period, 0, 0); + break; + case 1: + /* create one shot time */ + s->pt = create_periodic_time (s->vcpu, period, 0, 1); +#ifdef DEBUG_PIT + printk("HVM_PIT: create one shot time.\n"); +#endif + break; + default: + break; + } } /* if already latched, do not latch again */ @@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt } } -static void pit_ioport_write(void *opaque, u32 addr, u32 val) -{ - hvm_virpit *pit = opaque; +static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val) +{ + PITState *pit = opaque; int channel, access; PITChannelState *s; val &= 0xff; @@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu if (!(val & 0x10) && !s->status_latched) { /* status latch */ /* XXX: add BCD and null count */ - s->status = (pit_get_out1(s, hvm_get_clock()) << 7) | + s->status = (pit_get_out1(s, hvm_get_clock(s->vcpu)) << 7) | (s->rw_mode << 4) | (s->mode << 1) | s->bcd; @@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu } } -static u32 pit_ioport_read(void *opaque, u32 addr) -{ - hvm_virpit *pit = opaque; +static uint32_t pit_ioport_read(void *opaque, uint32_t addr) +{ + PITState *pit = opaque; int ret, count; PITChannelState *s; @@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque, return ret; } -static void pit_irq_timer_update(PITChannelState *s, s64 current_time) -{ - s64 expire_time; - int irq_level; - struct vcpu *v = current; - struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic; - - if (!s->hvm_time || s->mode == 0xff) - return; - - expire_time = pit_get_next_transition_time(s, current_time); - /* not generate intr by direct pic_set_irq in mod 2 - * XXX:mod 3 should be same as mod 2 - */ - if (s->mode != HVM_PIT_ACCEL_MODE) { - irq_level = pit_get_out1(s, current_time); - pic_set_irq(pic, s->irq, irq_level); - s->next_transition_time = expire_time; -#ifdef DEBUG_PIT - printk("HVM_PIT:irq_level=%d next_delay=%l ns\n", - irq_level, - (expire_time - current_time)); -#endif - } - - if (expire_time > 0) - set_timer(&(s->hvm_time->pit_timer), s->next_transition_time); - -} - -static void pit_irq_timer(void *data) -{ - PITChannelState *s = data; - - pit_irq_timer_update(s, s->next_transition_time); -} - static void pit_reset(void *opaque) { - hvm_virpit *pit = opaque; + PITState *pit = opaque; PITChannelState *s; int i; for(i = 0;i < 3; i++) { s = &pit->channels[i]; + if ( s -> pt ) { + destroy_periodic_time (s->pt); + s->pt = NULL; + } s->mode = 0xff; /* the init mode */ s->gate = (i != 2); pit_load_count(s, 0); } } -/* hvm_io_assist light-weight version, specific to PIT DM */ -static void resume_pit_io(ioreq_t *p) -{ - struct cpu_user_regs *regs = guest_cpu_user_regs(); - unsigned long old_eax = regs->eax; - p->state = STATE_INVALID; - - switch(p->size) { - case 1: - regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); - break; - case 2: - regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); - break; - case 4: - regs->eax = (p->u.data & 0xffffffff); - break; - default: - BUG(); - } +void pit_init(struct vcpu *v, unsigned long cpu_khz) +{ + PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit; + PITChannelState *s; + + s = &pit->channels[0]; + /* the timer 0 is connected to an IRQ */ + s->vcpu = v; + s++; s->vcpu = v; + s++; s->vcpu = v; + + register_portio_handler(PIT_BASE, 4, handle_pit_io); + /* register the speaker port */ + register_portio_handler(0x61, 1, handle_speaker_io); + ticks_per_sec(v) = cpu_khz * (int64_t)1000; +#ifdef DEBUG_PIT + printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v)); +#endif + pit_reset(pit); + return; } /* the intercept action for PIT DM retval:0--not handled; 1--handled */ -int handle_pit_io(ioreq_t *p) +static int handle_pit_io(ioreq_t *p) { struct vcpu *v = current; - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit); if (p->size != 1 || p->pdata_valid || @@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p) if (p->dir == 0) {/* write */ pit_ioport_write(vpit, p->addr, p->u.data); } else if (p->dir == 1) { /* read */ - p->u.data = pit_ioport_read(vpit, p->addr); - resume_pit_io(p); - } - - /* always return 1, since PIT sit in HV now */ + if ( (p->addr & 3) != 3 ) { + p->u.data = pit_ioport_read(vpit, p->addr); + } else { + printk("HVM_PIT: read A1:A0=3!\n"); + } + } return 1; } static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val) { - hvm_virpit *pit = opaque; - val &= 0xff; + PITState *pit = opaque; pit->speaker_data_on = (val >> 1) & 1; pit_set_gate(pit, 2, val & 1); } @@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void static uint32_t speaker_ioport_read(void *opaque, uint32_t addr) { int out; - hvm_virpit *pit = opaque; - out = pit_get_out(pit, 2, hvm_get_clock()); + PITState *pit = opaque; + out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu)); pit->dummy_refresh_clock ^= 1; return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) | (pit->dummy_refresh_clock << 4); } -int handle_speaker_io(ioreq_t *p) +static int handle_speaker_io(ioreq_t *p) { struct vcpu *v = current; - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit); if (p->size != 1 || p->pdata_valid || @@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p) speaker_ioport_write(vpit, p->addr, p->u.data); } else if (p->dir == 1) {/* read */ p->u.data = speaker_ioport_read(vpit, p->addr); - resume_pit_io(p); } return 1; } - -/* pick up missed timer ticks at deactive time */ -void pickup_deactive_ticks(struct hvm_virpit *vpit) -{ - s64 next_time; - PITChannelState *s = &(vpit->channels[0]); - if ( !active_timer(&(vpit->time_info.pit_timer)) ) { - next_time = pit_get_next_transition_time(s, s->next_transition_time); - if (next_time >= 0) - set_timer(&(s->hvm_time->pit_timer), s->next_transition_time); - } -} - -void pit_init(struct hvm_virpit *pit, struct vcpu *v) -{ - PITChannelState *s; - struct hvm_time_info *hvm_time; - - s = &pit->channels[0]; - /* the timer 0 is connected to an IRQ */ - s->irq = 0; - /* channel 0 need access the related time info for intr injection */ - hvm_time = s->hvm_time = &pit->time_info; - hvm_time->vcpu = v; - - init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor); - - register_portio_handler(PIT_BASE, 4, handle_pit_io); - - /* register the speaker port */ - register_portio_handler(0x61, 1, handle_speaker_io); - - pit_reset(pit); - - return; - -} diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/intercept.c Fri May 26 13:41:49 2006 -0600 @@ -214,6 +214,88 @@ void hlt_timer_fn(void *data) evtchn_set_pending(v, iopacket_port(v)); } +static __inline__ void missed_ticks(struct periodic_time *pt) +{ + int missed_ticks; + + missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period; + if ( missed_ticks++ >= 0 ) { + if ( missed_ticks > 1000 ) { + /* TODO: Adjust guest time togther */ + pt->pending_intr_nr ++; + } + else { + pt->pending_intr_nr += missed_ticks; + } + pt->scheduled += missed_ticks * pt->period; + } +} + +/* hook function for the platform periodic time */ +void pt_timer_fn(void *data) +{ + struct vcpu *v = data; + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + + /* pick up missed timer tick */ + missed_ticks(pt); + if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) { + set_timer(&pt->timer, pt->scheduled); + } +} + +/* pick up missed timer ticks at deactive time */ +void pickup_deactive_ticks(struct periodic_time *pt) +{ + if ( !active_timer(&(pt->timer)) ) { + missed_ticks(pt); + set_timer(&pt->timer, pt->scheduled); + } +} + +/* + * period: fire frequency in ns. + */ +struct periodic_time * create_periodic_time( + struct vcpu *v, + u32 period, + char irq, + char one_shot) +{ + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + if ( pt->enabled ) { + if ( v->vcpu_id != 0 ) { + printk("HVM_PIT: start 2nd periodic time on non BSP!\n"); + } + stop_timer (&pt->timer); + pt->enabled = 0; + } + pt->pending_intr_nr = 0; + pt->first_injected = 0; + if (period < 900000) { /* < 0.9 ms */ + printk("HVM_PlatformTime: program too small period %u\n",period); + period = 900000; /* force to 0.9ms */ + } + pt->period = period; + pt->irq = irq; + pt->period_cycles = (u64)period * cpu_khz / 1000000L; + pt->one_shot = one_shot; + if ( one_shot ) { + printk("HVM_PL: No support for one shot platform time yet\n"); + } + pt->scheduled = NOW() + period; + set_timer (&pt->timer,pt->scheduled); + pt->enabled = 1; + return pt; +} + +void destroy_periodic_time(struct periodic_time *pt) +{ + if ( pt->enabled ) { + stop_timer(&pt->timer); + pt->enabled = 0; + } +} /* * Local variables: diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/svm/intr.c Fri May 26 13:41:49 2006 -0600 @@ -44,45 +44,33 @@ */ #define BSP_CPU(v) (!(v->vcpu_id)) -u64 svm_get_guest_time(struct vcpu *v) -{ - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); - u64 host_tsc; - - rdtscll(host_tsc); - return host_tsc + time_info->cache_tsc_offset; -} - void svm_set_guest_time(struct vcpu *v, u64 gtime) { - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); u64 host_tsc; rdtscll(host_tsc); - time_info->cache_tsc_offset = gtime - host_tsc; - v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset; + v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc; + v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset; } static inline void interrupt_post_injection(struct vcpu * v, int vector, int type) { - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); if ( is_pit_irq(v, vector, type) ) { - if ( !time_info->first_injected ) { - time_info->pending_intr_nr = 0; - time_info->last_pit_gtime = svm_get_guest_time(v); - time_info->first_injected = 1; + if ( !pt->first_injected ) { + pt->pending_intr_nr = 0; + pt->last_plt_gtime = hvm_get_guest_time(v); + pt->scheduled = NOW() + pt->period; + set_timer(&pt->timer, pt->scheduled); + pt->first_injected = 1; } else { - time_info->pending_intr_nr--; + pt->pending_intr_nr--; + pt->last_plt_gtime += pt->period_cycles; + svm_set_guest_time(v, pt->last_plt_gtime); } - time_info->count_advance = 0; - time_info->count_point = NOW(); - - time_info->last_pit_gtime += time_info->period_cycles; - svm_set_guest_time(v, time_info->last_pit_gtime); } switch(type) @@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void) struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; struct hvm_domain *plat=&v->domain->arch.hvm_domain; - struct hvm_virpit *vpit = &plat->vpit; - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &plat->pl_time.periodic_tm; struct hvm_virpic *pic= &plat->vpic; int intr_type = VLAPIC_DELIV_MODE_EXT; int intr_vector = -1; @@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void) if ( cpu_has_pending_irq(v) ) { intr_vector = cpu_get_interrupt(v, &intr_type); } - else if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) { - pic_set_irq(pic, 0, 0); - pic_set_irq(pic, 0, 1); + else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { + pic_set_irq(pic, pt->irq, 0); + pic_set_irq(pic, pt->irq, 1); intr_vector = cpu_get_interrupt(v, &intr_type); } } @@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void) /* Re-injecting a PIT interruptt? */ if (re_injecting && is_pit_irq(v, intr_vector, intr_type)) { - ++time_info->pending_intr_nr; + ++pt->pending_intr_nr; } /* let's inject this interrupt */ TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0); diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Fri May 26 13:41:49 2006 -0600 @@ -51,13 +51,6 @@ #define SVM_EXTRA_DEBUG -#ifdef TRACE_BUFFER -static unsigned long trace_values[NR_CPUS][4]; -#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value -#else -#define TRACE_VMEXIT(index,value) ((void)0) -#endif - /* Useful define */ #define MAX_INST_SIZE 15 @@ -672,12 +665,11 @@ static void arch_svm_do_launch(struct vc static void svm_freeze_time(struct vcpu *v) { - struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info; + struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; - if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) { - v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v); - time_info->count_advance += (NOW() - time_info->count_point); - stop_timer(&(time_info->pit_timer)); + if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) { + v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v); + stop_timer(&(pt->timer)); } } @@ -754,7 +746,7 @@ static void svm_relinquish_guest_resourc } } - kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer); + kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( @@ -784,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v) void svm_migrate_timers(struct vcpu *v) { - struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info; - - migrate_timer(&time_info->pit_timer, v->processor); - migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor); + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + + if ( pt->enabled ) { + migrate_timer( &pt->timer, v->processor ); + migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor ); + } if ( hvm_apic_support(v->domain) && VLAPIC( v )) migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor ); } @@ -816,7 +810,6 @@ static int svm_do_page_fault(unsigned lo return 1; handle_mmio(va, va); - TRACE_VMEXIT(2,2); return 1; } @@ -842,7 +835,6 @@ static int svm_do_page_fault(unsigned lo return 1; } - TRACE_VMEXIT (2,2); handle_mmio(va, gpa); return 1; @@ -854,8 +846,6 @@ static int svm_do_page_fault(unsigned lo /* Let's make sure that the Guest TLB is flushed */ set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); } - - TRACE_VMEXIT (2,result); return result; } @@ -1901,14 +1891,8 @@ static inline void svm_do_msr_access(str regs->edx = 0; switch (regs->ecx) { case MSR_IA32_TIME_STAMP_COUNTER: - { - struct hvm_time_info *time_info; - - rdtscll(msr_content); - time_info = &v->domain->arch.hvm_domain.vpit.time_info; - msr_content += time_info->cache_tsc_offset; + msr_content = hvm_get_guest_time(v); break; - } case MSR_IA32_SYSENTER_CS: msr_content = vmcb->sysenter_cs; break; @@ -1975,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb) { struct vcpu *v = current; - struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit; + struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; s_time_t next_pit = -1, next_wakeup; __update_guest_eip(vmcb, 1); @@ -1985,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str return; if ( !v->vcpu_id ) - next_pit = get_pit_scheduled(v, vpit); + next_pit = get_scheduled(v, pt->irq, pt); next_wakeup = get_apictime_scheduled(v); if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) next_wakeup = next_pit; diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Fri May 26 13:41:49 2006 -0600 @@ -442,19 +442,17 @@ void svm_do_resume(struct vcpu *v) void svm_do_resume(struct vcpu *v) { struct domain *d = v->domain; - struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit; - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm; svm_stts(v); /* pick up the elapsed PIT ticks and re-enable pit_timer */ - if ( time_info->first_injected ) { - if ( v->domain->arch.hvm_domain.guest_time ) { - svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time); - time_info->count_point = NOW(); - v->domain->arch.hvm_domain.guest_time = 0; + if ( pt->enabled && pt->first_injected ) { + if ( v->arch.hvm_vcpu.guest_time ) { + svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time); + v->arch.hvm_vcpu.guest_time = 0; } - pickup_deactive_ticks(vpit); + pickup_deactive_ticks(pt); } if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/io.c Fri May 26 13:41:49 2006 -0600 @@ -49,45 +49,33 @@ void __set_tsc_offset(u64 offset) #endif } -u64 get_guest_time(struct vcpu *v) -{ - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); - u64 host_tsc; - - rdtscll(host_tsc); - return host_tsc + time_info->cache_tsc_offset; -} - void set_guest_time(struct vcpu *v, u64 gtime) { - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); u64 host_tsc; rdtscll(host_tsc); - time_info->cache_tsc_offset = gtime - host_tsc; - __set_tsc_offset(time_info->cache_tsc_offset); + v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc; + __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset); } static inline void interrupt_post_injection(struct vcpu * v, int vector, int type) { - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); if ( is_pit_irq(v, vector, type) ) { - if ( !time_info->first_injected ) { - time_info->pending_intr_nr = 0; - time_info->last_pit_gtime = get_guest_time(v); - time_info->first_injected = 1; + if ( !pt->first_injected ) { + pt->pending_intr_nr = 0; + pt->last_plt_gtime = hvm_get_guest_time(v); + pt->scheduled = NOW() + pt->period; + set_timer(&pt->timer, pt->scheduled); + pt->first_injected = 1; } else { - time_info->pending_intr_nr--; - } - time_info->count_advance = 0; - time_info->count_point = NOW(); - - time_info->last_pit_gtime += time_info->period_cycles; - set_guest_time(v, time_info->last_pit_gtime); + pt->pending_intr_nr--; + pt->last_plt_gtime += pt->period_cycles; + set_guest_time(v, pt->last_plt_gtime); + } } switch(type) @@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void) unsigned long eflags; struct vcpu *v = current; struct hvm_domain *plat=&v->domain->arch.hvm_domain; - struct hvm_time_info *time_info = &plat->vpit.time_info; + struct periodic_time *pt = &plat->pl_time.periodic_tm; struct hvm_virpic *pic= &plat->vpic; unsigned int idtv_info_field; unsigned long inst_len; @@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void) if ( v->vcpu_id == 0 ) hvm_pic_assist(v); - if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) { - pic_set_irq(pic, 0, 0); - pic_set_irq(pic, 0, 1); + if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { + pic_set_irq(pic, pt->irq, 0); + pic_set_irq(pic, pt->irq, 1); } has_ext_irq = cpu_has_pending_irq(v); @@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v) void vmx_do_resume(struct vcpu *v) { struct domain *d = v->domain; - struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit; - struct hvm_time_info *time_info = &vpit->time_info; + struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; vmx_stts(); /* pick up the elapsed PIT ticks and re-enable pit_timer */ - if ( time_info->first_injected ) { - if ( v->domain->arch.hvm_domain.guest_time ) { - time_info->count_point = NOW(); - set_guest_time(v, v->domain->arch.hvm_domain.guest_time); - v->domain->arch.hvm_domain.guest_time = 0; - } - pickup_deactive_ticks(vpit); + if ( pt->enabled && pt->first_injected ) { + if ( v->arch.hvm_vcpu.guest_time ) { + set_guest_time(v, v->arch.hvm_vcpu.guest_time); + v->arch.hvm_vcpu.guest_time = 0; + } + pickup_deactive_ticks(pt); } if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri May 26 13:41:49 2006 -0600 @@ -47,7 +47,7 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> -static unsigned long trace_values[NR_CPUS][4]; +static unsigned long trace_values[NR_CPUS][5]; #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value static void vmx_ctxt_switch_from(struct vcpu *v); @@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc } } - kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer); + kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( @@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write static void vmx_freeze_time(struct vcpu *v) { - struct hvm_time_info *time_info = &(v->domain->arch.hvm_domain.vpit.time_info); + struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; - if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) { - v->domain->arch.hvm_domain.guest_time = get_guest_time(v); - time_info->count_advance += (NOW() - time_info->count_point); - stop_timer(&(time_info->pit_timer)); + if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) { + v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v); + stop_timer(&(pt->timer)); } } @@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc void vmx_migrate_timers(struct vcpu *v) { - struct hvm_time_info *time_info = &v->domain->arch.hvm_domain.vpit.time_info; - - migrate_timer(&time_info->pit_timer, v->processor); - migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor); + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + + if ( pt->enabled ) { + migrate_timer(&pt->timer, v->processor); + migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor); + } if ( hvm_apic_support(v->domain) && VLAPIC(v)) migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor); } @@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc (unsigned long)regs->edx); switch (regs->ecx) { case MSR_IA32_TIME_STAMP_COUNTER: - { - struct hvm_time_info *time_info; - - rdtscll(msr_content); - time_info = &(v->domain->arch.hvm_domain.vpit.time_info); - msr_content += time_info->cache_tsc_offset; - break; - } + msr_content = hvm_get_guest_time(v); + break; case MSR_IA32_SYSENTER_CS: __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content); break; @@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void) void vmx_vmexit_do_hlt(void) { struct vcpu *v=current; - struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); s_time_t next_pit=-1,next_wakeup; if ( !v->vcpu_id ) - next_pit = get_pit_scheduled(v,vpit); + next_pit = get_scheduled(v, pt->irq, pt); next_wakeup = get_apictime_scheduled(v); if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) next_wakeup = next_pit; diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/mm.c Fri May 26 13:41:49 2006 -0600 @@ -260,9 +260,42 @@ void share_xen_page_with_privileged_gues share_xen_page_with_guest(page, dom_xen, readonly); } +static void __write_ptbase(unsigned long mfn) +{ +#ifdef CONFIG_X86_PAE + if ( mfn >= 0x100000 ) + { + l3_pgentry_t *highmem_l3tab, *lowmem_l3tab; + struct vcpu *v = current; + unsigned long flags; + + /* Protects against re-entry and against __pae_flush_pgd(). */ + local_irq_save(flags); + + /* Pick an unused low-memory L3 cache slot. */ + v->arch.lowmem_l3tab_inuse ^= 1; + lowmem_l3tab = v->arch.lowmem_l3tab[v->arch.lowmem_l3tab_inuse]; + v->arch.lowmem_l3tab_high_mfn[v->arch.lowmem_l3tab_inuse] = mfn; + + /* Map the guest L3 table and copy to the chosen low-memory cache. */ + highmem_l3tab = map_domain_page(mfn); + memcpy(lowmem_l3tab, highmem_l3tab, sizeof(v->arch.lowmem_l3tab)); + unmap_domain_page(highmem_l3tab); + + /* Install the low-memory L3 table in CR3. */ + write_cr3(__pa(lowmem_l3tab)); + + local_irq_restore(flags); + return; + } +#endif + + write_cr3(mfn << PAGE_SHIFT); +} + void write_ptbase(struct vcpu *v) { - write_cr3(pagetable_get_paddr(v->arch.monitor_table)); + __write_ptbase(pagetable_get_pfn(v->arch.monitor_table)); } void invalidate_shadow_ldt(struct vcpu *v) @@ -401,6 +434,7 @@ static int get_page_and_type_from_pagenr return 1; } +#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */ /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: @@ -456,6 +490,7 @@ get_linear_pagetable( return 1; } +#endif /* !CONFIG_X86_PAE */ int get_page_from_l1e( @@ -564,10 +599,6 @@ get_page_from_l3e( rc = get_page_and_type_from_pagenr( l3e_get_pfn(l3e), PGT_l2_page_table | vaddr, d); -#if CONFIG_PAGING_LEVELS == 3 - if ( unlikely(!rc) ) - rc = get_linear_pagetable(l3e, pfn, d); -#endif return rc; } #endif /* 3 level */ @@ -773,6 +804,50 @@ static int create_pae_xen_mappings(l3_pg return 1; } +struct pae_flush_pgd { + unsigned long l3tab_mfn; + unsigned int l3tab_idx; + l3_pgentry_t nl3e; +}; + +static void __pae_flush_pgd(void *data) +{ + struct pae_flush_pgd *args = data; + struct vcpu *v = this_cpu(curr_vcpu); + int i = v->arch.lowmem_l3tab_inuse; + intpte_t _ol3e, _nl3e, _pl3e; + l3_pgentry_t *l3tab_ptr; + + ASSERT(!local_irq_is_enabled()); + + if ( v->arch.lowmem_l3tab_high_mfn[i] != args->l3tab_mfn ) + return; + + l3tab_ptr = &v->arch.lowmem_l3tab[i][args->l3tab_idx]; + + _ol3e = l3e_get_intpte(*l3tab_ptr); + _nl3e = l3e_get_intpte(args->nl3e); + _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e); + BUG_ON(_pl3e != _ol3e); +} + +/* Flush a pgdir update into low-memory caches. */ +static void pae_flush_pgd( + unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e) +{ + struct domain *d = page_get_owner(mfn_to_page(mfn)); + struct pae_flush_pgd args = { + .l3tab_mfn = mfn, + .l3tab_idx = idx, + .nl3e = nl3e }; + + /* If below 4GB then the pgdir is not shadowed in low memory. */ + if ( mfn < 0x100000 ) + return; + + on_selected_cpus(d->domain_dirty_cpumask, __pae_flush_pgd, &args, 1, 1); +} + static inline int l1_backptr( unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) { @@ -787,6 +862,7 @@ static inline int l1_backptr( #elif CONFIG_X86_64 # define create_pae_xen_mappings(pl3e) (1) +# define pae_flush_pgd(mfn, idx, nl3e) ((void)0) static inline int l1_backptr( unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) @@ -886,14 +962,6 @@ static int alloc_l3_table(struct page_in ASSERT(!shadow_mode_refcounts(d)); -#ifdef CONFIG_X86_PAE - if ( pfn >= 0x100000 ) - { - MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); - return 0; - } -#endif - pl3e = map_domain_page(pfn); for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) { @@ -1240,6 +1308,8 @@ static int mod_l3_entry(l3_pgentry_t *pl okay = create_pae_xen_mappings(pl3e); BUG_ON(!okay); + + pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); put_page_from_l3e(ol3e, pfn); return 1; @@ -3109,7 +3179,7 @@ void ptwr_flush(struct domain *d, const if ( unlikely(d->arch.ptwr[which].vcpu != current) ) /* Don't use write_ptbase: it may switch to guest_user on x86/64! */ - write_cr3(pagetable_get_paddr( + __write_ptbase(pagetable_get_pfn( d->arch.ptwr[which].vcpu->arch.guest_table)); else TOGGLE_MODE(); @@ -3220,15 +3290,16 @@ static int ptwr_emulated_update( /* Turn a sub-word access into a full-word access. */ if ( bytes != sizeof(paddr_t) ) { - int rc; - paddr_t full; - unsigned int offset = addr & (sizeof(paddr_t)-1); + paddr_t full; + unsigned int offset = addr & (sizeof(paddr_t)-1); /* Align address; read full word. */ addr &= ~(sizeof(paddr_t)-1); - if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full, - sizeof(paddr_t))) ) - return rc; + if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) ) + { + propagate_page_fault(addr, 4); /* user mode, read fault */ + return X86EMUL_PROPAGATE_FAULT; + } /* Mask out bits provided by caller. */ full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8)); /* Shift the caller value and OR in the missing bits. */ @@ -3306,7 +3377,8 @@ static int ptwr_emulated_write( static int ptwr_emulated_write( unsigned long addr, unsigned long val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { return ptwr_emulated_update(addr, 0, val, bytes, 0); } @@ -3315,7 +3387,8 @@ static int ptwr_emulated_cmpxchg( unsigned long addr, unsigned long old, unsigned long new, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { return ptwr_emulated_update(addr, old, new, bytes, 1); } @@ -3325,7 +3398,8 @@ static int ptwr_emulated_cmpxchg8b( unsigned long old, unsigned long old_hi, unsigned long new, - unsigned long new_hi) + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt) { if ( CONFIG_PAGING_LEVELS == 2 ) return X86EMUL_UNHANDLEABLE; @@ -3334,7 +3408,7 @@ static int ptwr_emulated_cmpxchg8b( addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1); } -static struct x86_mem_emulator ptwr_mem_emulator = { +static struct x86_emulate_ops ptwr_emulate_ops = { .read_std = x86_emulate_read_std, .write_std = x86_emulate_write_std, .read_emulated = x86_emulate_read_std, @@ -3353,6 +3427,7 @@ int ptwr_do_page_fault(struct domain *d, l2_pgentry_t *pl2e, l2e; int which, flags; unsigned long l2_idx; + struct x86_emulate_ctxt emul_ctxt; if ( unlikely(shadow_mode_enabled(d)) ) return 0; @@ -3507,8 +3582,10 @@ int ptwr_do_page_fault(struct domain *d, return EXCRET_fault_fixed; emulate: - if ( x86_emulate_memop(guest_cpu_user_regs(), addr, - &ptwr_mem_emulator, X86EMUL_MODE_HOST) ) + emul_ctxt.regs = guest_cpu_user_regs(); + emul_ctxt.cr2 = addr; + emul_ctxt.mode = X86EMUL_MODE_HOST; + if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) ) return 0; perfc_incrc(ptwr_emulations); return EXCRET_fault_fixed; diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/traps.c Fri May 26 13:41:49 2006 -0600 @@ -876,7 +876,7 @@ static int emulate_privileged_op(struct PAGE_FAULT(regs->edi, USER_WRITE_FAULT); break; } - regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; + regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes; break; case 0x6e: /* OUTSB */ @@ -902,7 +902,7 @@ static int emulate_privileged_op(struct outl_user((u32)data, (u16)regs->edx, v, regs); break; } - regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; + regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes; break; } diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/arch/x86/x86_emulate.c Fri May 26 13:41:49 2006 -0600 @@ -363,12 +363,13 @@ do{ __asm__ __volatile__ ( #endif /* __i386__ */ /* Fetch next part of the instruction being emulated. */ -#define insn_fetch(_type, _size, _eip) \ -({ unsigned long _x; \ - if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \ - goto done; \ - (_eip) += (_size); \ - (_type)_x; \ +#define insn_fetch(_type, _size, _eip) \ +({ unsigned long _x; \ + rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt); \ + if ( rc != 0 ) \ + goto done; \ + (_eip) += (_size); \ + (_type)_x; \ }) /* Access/update address held in a register, based on addressing mode. */ @@ -426,12 +427,10 @@ decode_register( return p; } -int +int x86_emulate_memop( - struct cpu_user_regs *regs, - unsigned long cr2, - struct x86_mem_emulator *ops, - int mode) + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) { uint8_t b, d, sib, twobyte = 0, rex_prefix = 0; uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; @@ -439,9 +438,11 @@ x86_emulate_memop( unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i; int rc = 0; struct operand src, dst; + unsigned long cr2 = ctxt->cr2; + int mode = ctxt->mode; /* Shadow copy of register state. Committed on successful emulation. */ - struct cpu_user_regs _regs = *regs; + struct cpu_user_regs _regs = *ctxt->regs; switch ( mode ) { @@ -628,7 +629,7 @@ x86_emulate_memop( dst.bytes = (d & ByteOp) ? 1 : op_bytes; if ( !(d & Mov) && /* optimisation - avoid slow emulated read */ ((rc = ops->read_emulated((unsigned long)dst.ptr, - &dst.val, dst.bytes)) != 0) ) + &dst.val, dst.bytes, ctxt)) != 0) ) goto done; break; } @@ -670,7 +671,7 @@ x86_emulate_memop( src.type = OP_MEM; src.ptr = (unsigned long *)cr2; if ( (rc = ops->read_emulated((unsigned long)src.ptr, - &src.val, src.bytes)) != 0 ) + &src.val, src.bytes, ctxt)) != 0 ) goto done; src.orig_val = src.val; break; @@ -776,7 +777,7 @@ x86_emulate_memop( if ( mode == X86EMUL_MODE_PROT64 ) dst.bytes = 8; if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp), - &dst.val, dst.bytes)) != 0 ) + &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment(_regs.esp, dst.bytes); break; @@ -854,12 +855,12 @@ x86_emulate_memop( { dst.bytes = 8; if ( (rc = ops->read_std((unsigned long)dst.ptr, - &dst.val, 8)) != 0 ) + &dst.val, 8, ctxt)) != 0 ) goto done; } - register_address_increment(_regs.esp, -dst.bytes); + register_address_increment(_regs.esp, -(int)dst.bytes); if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp), - dst.val, dst.bytes)) != 0 ) + dst.val, dst.bytes, ctxt)) != 0 ) goto done; dst.val = dst.orig_val; /* skanky: disable writeback */ break; @@ -887,10 +888,11 @@ x86_emulate_memop( case OP_MEM: if ( lock_prefix ) rc = ops->cmpxchg_emulated( - (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes); + (unsigned long)dst.ptr, dst.orig_val, + dst.val, dst.bytes, ctxt); else rc = ops->write_emulated( - (unsigned long)dst.ptr, dst.val, dst.bytes); + (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt); if ( rc != 0 ) goto done; default: @@ -899,7 +901,7 @@ x86_emulate_memop( } /* Commit shadow register state. */ - *regs = _regs; + *ctxt->regs = _regs; done: return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; @@ -911,11 +913,11 @@ x86_emulate_memop( { if ( _regs.ecx == 0 ) { - regs->eip = _regs.eip; + ctxt->regs->eip = _regs.eip; goto done; } _regs.ecx--; - _regs.eip = regs->eip; + _regs.eip = ctxt->regs->eip; } switch ( b ) { @@ -928,20 +930,21 @@ x86_emulate_memop( dst.ptr = (unsigned long *)cr2; if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds, _regs.esi), - &dst.val, dst.bytes)) != 0 ) + &dst.val, dst.bytes, ctxt)) != 0 ) goto done; } else { /* Read fault: source is special memory. */ dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi); - if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 ) + if ( (rc = ops->read_emulated(cr2, &dst.val, + dst.bytes, ctxt)) != 0 ) goto done; } register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); break; case 0xa6 ... 0xa7: /* cmps */ DPRINTF("Urk! I don't handle CMPS.\n"); @@ -952,16 +955,16 @@ x86_emulate_memop( dst.ptr = (unsigned long *)cr2; dst.val = _regs.eax; register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); break; case 0xac ... 0xad: /* lods */ dst.type = OP_REG; dst.bytes = (d & ByteOp) ? 1 : op_bytes; dst.ptr = (unsigned long *)&_regs.eax; - if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 ) + if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); + _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); break; case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); @@ -1074,8 +1077,8 @@ x86_emulate_memop( #if defined(__i386__) { unsigned long old_lo, old_hi; - if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) || - ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) ) + if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) || + ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) ) goto done; if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) ) { @@ -1090,8 +1093,8 @@ x86_emulate_memop( } else { - if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, - _regs.ebx, _regs.ecx)) != 0 ) + if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx, + _regs.ecx, ctxt)) != 0 ) goto done; _regs.eflags |= EFLG_ZF; } @@ -1100,7 +1103,7 @@ x86_emulate_memop( #elif defined(__x86_64__) { unsigned long old, new; - if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 ) + if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 ) goto done; if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) || ((uint32_t)(old>>32) != (uint32_t)_regs.edx) ) @@ -1112,7 +1115,7 @@ x86_emulate_memop( else { new = (_regs.ecx<<32)|(uint32_t)_regs.ebx; - if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 ) + if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 ) goto done; _regs.eflags |= EFLG_ZF; } @@ -1136,7 +1139,8 @@ x86_emulate_read_std( x86_emulate_read_std( unsigned long addr, unsigned long *val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { *val = 0; if ( copy_from_user((void *)val, (void *)addr, bytes) ) @@ -1151,7 +1155,8 @@ x86_emulate_write_std( x86_emulate_write_std( unsigned long addr, unsigned long val, - unsigned int bytes) + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { if ( copy_to_user((void *)addr, (void *)&val, bytes) ) { diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/Makefile --- a/xen/common/Makefile Thu May 25 15:59:18 2006 -0600 +++ b/xen/common/Makefile Fri May 26 13:41:49 2006 -0600 @@ -13,6 +13,7 @@ obj-y += page_alloc.o obj-y += page_alloc.o obj-y += rangeset.o obj-y += sched_bvt.o +obj-y += sched_credit.o obj-y += sched_sedf.o obj-y += schedule.o obj-y += softirq.o diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/grant_table.c --- a/xen/common/grant_table.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/common/grant_table.c Fri May 26 13:41:49 2006 -0600 @@ -505,15 +505,12 @@ gnttab_setup_table( goto out; } - if ( op.nr_frames <= NR_GRANT_FRAMES ) - { - ASSERT(d->grant_table != NULL); - op.status = GNTST_okay; - for ( i = 0; i < op.nr_frames; i++ ) - { - gmfn = gnttab_shared_gmfn(d, d->grant_table, i); - (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1); - } + ASSERT(d->grant_table != NULL); + op.status = GNTST_okay; + for ( i = 0; i < op.nr_frames; i++ ) + { + gmfn = gnttab_shared_gmfn(d, d->grant_table, i); + (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1); } put_domain(d); diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/kernel.c --- a/xen/common/kernel.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/common/kernel.c Fri May 26 13:41:49 2006 -0600 @@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H switch ( fi.submap_idx ) { case 0: - fi.submap = 0; + fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb); if ( shadow_mode_translate(current->domain) ) fi.submap |= (1U << XENFEAT_writable_page_tables) | - (1U << XENFEAT_auto_translated_physmap) | - (1U << XENFEAT_pae_pgdir_above_4gb); + (1U << XENFEAT_auto_translated_physmap); if ( supervisor_mode_kernel ) fi.submap |= 1U << XENFEAT_supervisor_mode_kernel; break; diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/schedule.c --- a/xen/common/schedule.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/common/schedule.c Fri May 26 13:41:49 2006 -0600 @@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP extern struct scheduler sched_bvt_def; extern struct scheduler sched_sedf_def; +extern struct scheduler sched_credit_def; static struct scheduler *schedulers[] = { &sched_bvt_def, &sched_sedf_def, + &sched_credit_def, NULL }; @@ -639,6 +641,8 @@ static void t_timer_fn(void *unused) page_scrub_schedule_work(); + SCHED_OP(tick, cpu); + set_timer(&t_timer[cpu], NOW() + MILLISECS(10)); } @@ -681,6 +685,7 @@ void __init scheduler_init(void) printk("Could not find scheduler: %s\n", opt_sched); printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); + SCHED_OP(init); if ( idle_vcpu[0] != NULL ) { diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/trace.c --- a/xen/common/trace.c Thu May 25 15:59:18 2006 -0600 +++ b/xen/common/trace.c Fri May 26 13:41:49 2006 -0600 @@ -91,6 +91,7 @@ static int alloc_trace_bufs(void) if ( (rawbuf = alloc_xenheap_pages(order)) == NULL ) { printk("Xen trace buffers: memory allocation failed\n"); + opt_tbuf_size = 0; return -EINVAL; } @@ -135,10 +136,7 @@ static int tb_set_size(int size) opt_tbuf_size = size; if ( alloc_trace_bufs() != 0 ) - { - opt_tbuf_size = 0; - return -EINVAL; - } + return -EINVAL; printk("Xen trace buffers: initialized\n"); return 0; diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/domain.h Fri May 26 13:41:49 2006 -0600 @@ -120,6 +120,18 @@ struct arch_vcpu struct vcpu_guest_context guest_context __attribute__((__aligned__(16))); +#ifdef CONFIG_X86_PAE + /* + * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest + * supplies a >=4GB PAE L3 table. We need two because we cannot set up + * an L3 table while we are currently running on it (without using + * expensive atomic 64-bit operations). + */ + l3_pgentry_t lowmem_l3tab[2][4] __attribute__((__aligned__(32))); + unsigned long lowmem_l3tab_high_mfn[2]; /* The >=4GB MFN being shadowed. */ + unsigned int lowmem_l3tab_inuse; /* Which lowmem_l3tab is in use? */ +#endif + unsigned long flags; /* TF_ */ void (*schedule_tail) (struct vcpu *); diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/hvm/domain.h Fri May 26 13:41:49 2006 -0600 @@ -35,9 +35,9 @@ struct hvm_domain { unsigned int nr_vcpus; unsigned int apic_enabled; unsigned int pae_enabled; - - struct hvm_virpit vpit; - u64 guest_time; + s64 tsc_frequency; + struct pl_time pl_time; + struct hvm_virpic vpic; struct hvm_vioapic vioapic; struct hvm_io_handler io_handler; diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/intr.h --- a/xen/include/asm-x86/hvm/svm/intr.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/hvm/svm/intr.h Fri May 26 13:41:49 2006 -0600 @@ -21,7 +21,6 @@ #ifndef __ASM_X86_HVM_SVM_INTR_H__ #define __ASM_X86_HVM_SVM_INTR_H__ -extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit); extern void svm_intr_assist(void); extern void svm_intr_assist_update(struct vcpu *v, int highest_vector); extern void svm_intr_assist_test_valid(struct vcpu *v, diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/svm.h --- a/xen/include/asm-x86/hvm/svm/svm.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/hvm/svm/svm.h Fri May 26 13:41:49 2006 -0600 @@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v extern void svm_do_launch(struct vcpu *v); extern void svm_do_resume(struct vcpu *v); extern void svm_set_guest_time(struct vcpu *v, u64 gtime); -extern u64 svm_get_guest_time(struct vcpu *v); extern void arch_svm_do_resume(struct vcpu *v); extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa); /* For debugging. Remove when no longer needed. */ diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/hvm/vcpu.h Fri May 26 13:41:49 2006 -0600 @@ -32,6 +32,9 @@ struct hvm_vcpu { unsigned long ioflags; struct mmio_op mmio_op; struct vlapic *vlapic; + s64 cache_tsc_offset; + u64 guest_time; + /* For AP startup */ unsigned long init_sipi_sipi_state; diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri May 26 13:41:49 2006 -0600 @@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc extern void arch_vmx_do_launch(struct vcpu *); extern void arch_vmx_do_resume(struct vcpu *); extern void set_guest_time(struct vcpu *v, u64 gtime); -extern u64 get_guest_time(struct vcpu *v); extern unsigned int cpu_rev; diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vpit.h --- a/xen/include/asm-x86/hvm/vpit.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/hvm/vpit.h Fri May 26 13:41:49 2006 -0600 @@ -29,9 +29,7 @@ #include <asm/hvm/vpic.h> #define PIT_FREQ 1193181 - -#define PIT_BASE 0x40 -#define HVM_PIT_ACCEL_MODE 2 +#define PIT_BASE 0x40 typedef struct PITChannelState { int count; /* can be 65536 */ @@ -48,47 +46,56 @@ typedef struct PITChannelState { u8 gate; /* timer start */ s64 count_load_time; /* irq handling */ - s64 next_transition_time; - int irq; - struct hvm_time_info *hvm_time; - u32 period; /* period(ns) based on count */ + struct vcpu *vcpu; + struct periodic_time *pt; } PITChannelState; - -struct hvm_time_info { - /* extra info for the mode 2 channel */ - struct timer pit_timer; - struct vcpu *vcpu; /* which vcpu the ac_timer bound to */ - u64 period_cycles; /* pit frequency in cpu cycles */ - s_time_t count_advance; /* accumulated count advance since last fire */ - s_time_t count_point; /* last point accumulating count advance */ - unsigned int pending_intr_nr; /* the couner for pending timer interrupts */ - int first_injected; /* flag to prevent shadow window */ - s64 cache_tsc_offset; /* cache of VMCS TSC_OFFSET offset */ - u64 last_pit_gtime; /* guest time when last pit is injected */ + +/* + * Abstract layer of periodic time, one short time. + */ +struct periodic_time { + char enabled; /* enabled */ + char one_shot; /* one shot time */ + char irq; + char first_injected; /* flag to prevent shadow window */ + u32 pending_intr_nr; /* the couner for pending timer interrupts */ + u32 period; /* frequency in ns */ + u64 period_cycles; /* frequency in cpu cycles */ + s_time_t scheduled; /* scheduled timer interrupt */ + u64 last_plt_gtime; /* platform time when last IRQ is injected */ + struct timer timer; /* ac_timer */ }; -typedef struct hvm_virpit { +typedef struct PITState { PITChannelState channels[3]; - struct hvm_time_info time_info; int speaker_data_on; int dummy_refresh_clock; -}hvm_virpit; +} PITState; +struct pl_time { /* platform time */ + struct periodic_time periodic_tm; + struct PITState vpit; + /* TODO: RTC/ACPI time */ +}; -static __inline__ s_time_t get_pit_scheduled( - struct vcpu *v, - struct hvm_virpit *vpit) +static __inline__ s_time_t get_scheduled( + struct vcpu *v, int irq, + struct periodic_time *pt) { - struct PITChannelState *s = &(vpit->channels[0]); - if ( is_irq_enabled(v, 0) ) { - return s->next_transition_time; + if ( is_irq_enabled(v, irq) ) { + return pt->scheduled; } else return -1; } /* to hook the ioreq packet to get the PIT initialization info */ -extern void pit_init(struct hvm_virpit *pit, struct vcpu *v); -extern void pickup_deactive_ticks(struct hvm_virpit *vpit); +extern void hvm_hooks_assist(struct vcpu *v); +extern void pickup_deactive_ticks(struct periodic_time *vpit); +extern u64 hvm_get_guest_time(struct vcpu *v); +extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period, char irq, char one_shot); +extern void destroy_periodic_time(struct periodic_time *pt); +void pit_init(struct vcpu *v, unsigned long cpu_khz); +void pt_timer_fn(void *data); #endif /* __ASM_X86_HVM_VPIT_H__ */ diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/string.h --- a/xen/include/asm-x86/string.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/string.h Fri May 26 13:41:49 2006 -0600 @@ -2,152 +2,6 @@ #define __X86_STRING_H__ #include <xen/config.h> - -#define __HAVE_ARCH_STRCPY -static inline char *strcpy(char *dest, const char *src) -{ - long d0, d1, d2; - __asm__ __volatile__ ( - "1: lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - : "=&S" (d0), "=&D" (d1), "=&a" (d2) - : "0" (src), "1" (dest) : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRNCPY -static inline char *strncpy(char *dest, const char *src, size_t count) -{ - long d0, d1, d2, d3; - __asm__ __volatile__ ( - "1: dec %2 \n" - " js 2f \n" - " lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - " rep ; stosb \n" - "2: \n" - : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - : "0" (src), "1" (dest), "2" (count) : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRCAT -static inline char *strcat(char *dest, const char *src) -{ - long d0, d1, d2, d3; - __asm__ __volatile__ ( - " repne ; scasb \n" - " dec %1 \n" - "1: lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRNCAT -static inline char *strncat(char *dest, const char *src, size_t count) -{ - long d0, d1, d2, d3; - __asm__ __volatile__ ( - " repne ; scasb \n" - " dec %1 \n" - " mov %8,%3 \n" - "1: dec %3 \n" - " js 2f \n" - " lodsb \n" - " stosb \n" - " test %%al,%%al \n" - " jne 1b \n" - "2: xor %%eax,%%eax\n" - " stosb" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count) - : "memory" ); - return dest; -} - -#define __HAVE_ARCH_STRCMP -static inline int strcmp(const char *cs, const char *ct) -{ - long d0, d1; - register int __res; - __asm__ __volatile__ ( - "1: lodsb \n" - " scasb \n" - " jne 2f \n" - " test %%al,%%al \n" - " jne 1b \n" - " xor %%eax,%%eax\n" - " jmp 3f \n" - "2: sbb %%eax,%%eax\n" - " or $1,%%al \n" - "3: \n" - : "=a" (__res), "=&S" (d0), "=&D" (d1) - : "1" (cs), "2" (ct) ); - return __res; -} - -#define __HAVE_ARCH_STRNCMP -static inline int strncmp(const char *cs, const char *ct, size_t count) -{ - long d0, d1, d2; - register int __res; - __asm__ __volatile__ ( - "1: dec %3 \n" - " js 2f \n" - " lodsb \n" - " scasb \n" - " jne 3f \n" - " test %%al,%%al \n" - " jne 1b \n" - "2: xor %%eax,%%eax\n" - " jmp 4f \n" - "3: sbb %%eax,%%eax\n" - " or $1,%%al \n" - "4: \n" - : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - : "1" (cs), "2" (ct), "3" (count) ); - return __res; -} - -#define __HAVE_ARCH_STRCHR -static inline char *strchr(const char *s, int c) -{ - long d0; - register char *__res; - __asm__ __volatile__ ( - " mov %%al,%%ah \n" - "1: lodsb \n" - " cmp %%ah,%%al \n" - " je 2f \n" - " test %%al,%%al \n" - " jne 1b \n" - " mov $1,%1 \n" - "2: mov %1,%0 \n" - " dec %0 \n" - : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) ); - return __res; -} - -#define __HAVE_ARCH_STRLEN -static inline size_t strlen(const char *s) -{ - long d0; - register int __res; - __asm__ __volatile__ ( - " repne ; scasb \n" - " notl %0 \n" - " decl %0 \n" - : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) ); - return __res; -} static inline void *__variable_memcpy(void *to, const void *from, size_t n) { @@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v #define __HAVE_ARCH_MEMCMP #define memcmp __builtin_memcmp -#define __HAVE_ARCH_MEMCHR -static inline void *memchr(const void *cs, int c, size_t count) -{ - long d0; - register void *__res; - if ( count == 0 ) - return NULL; - __asm__ __volatile__ ( - " repne ; scasb\n" - " je 1f \n" - " mov $1,%0 \n" - "1: dec %0 \n" - : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) ); - return __res; -} - static inline void *__memset_generic(void *s, char c, size_t count) { long d0, d1; diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/x86_emulate.h --- a/xen/include/asm-x86/x86_emulate.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/asm-x86/x86_emulate.h Fri May 26 13:41:49 2006 -0600 @@ -9,8 +9,10 @@ #ifndef __X86_EMULATE_H__ #define __X86_EMULATE_H__ -/* - * x86_mem_emulator: +struct x86_emulate_ctxt; + +/* + * x86_emulate_ops: * * These operations represent the instruction emulator's interface to memory. * There are two categories of operation: those that act on ordinary memory @@ -47,7 +49,7 @@ #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ #define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ -struct x86_mem_emulator +struct x86_emulate_ops { /* * read_std: Read bytes of standard (non-emulated/special) memory. @@ -59,7 +61,8 @@ struct x86_mem_emulator int (*read_std)( unsigned long addr, unsigned long *val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * write_std: Write bytes of standard (non-emulated/special) memory. @@ -71,7 +74,8 @@ struct x86_mem_emulator int (*write_std)( unsigned long addr, unsigned long val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * read_emulated: Read bytes from emulated/special memory area. @@ -82,7 +86,8 @@ struct x86_mem_emulator int (*read_emulated)( unsigned long addr, unsigned long *val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * write_emulated: Read bytes from emulated/special memory area. @@ -93,7 +98,8 @@ struct x86_mem_emulator int (*write_emulated)( unsigned long addr, unsigned long val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -107,11 +113,12 @@ struct x86_mem_emulator unsigned long addr, unsigned long old, unsigned long new, - unsigned int bytes); - - /* - * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an - * emulated/special memory area. + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); + + /* + * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an + * emulated/special memory area. * @addr: [IN ] Linear address to access. * @old: [IN ] Value expected to be current at @addr. * @new: [IN ] Value to write to @addr. @@ -126,7 +133,8 @@ struct x86_mem_emulator unsigned long old_lo, unsigned long old_hi, unsigned long new_lo, - unsigned long new_hi); + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt); }; /* Standard reader/writer functions that callers may wish to use. */ @@ -134,14 +142,28 @@ x86_emulate_read_std( x86_emulate_read_std( unsigned long addr, unsigned long *val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); extern int x86_emulate_write_std( unsigned long addr, unsigned long val, - unsigned int bytes); + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); struct cpu_user_regs; + +struct x86_emulate_ctxt +{ + /* Register state before/after emulation. */ + struct cpu_user_regs *regs; + + /* Linear faulting address (if emulating a page-faulting instruction). */ + unsigned long cr2; + + /* Emulated execution mode, represented by an X86EMUL_MODE value. */ + int mode; +}; /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -159,25 +181,19 @@ struct cpu_user_regs; /* * x86_emulate_memop: Emulate an instruction that faulted attempting to * read/write a 'special' memory area. - * @regs: Register state at time of fault. - * @cr2: Linear faulting address within an emulated/special memory area. - * @ops: Interface to access special memory. - * @mode: Emulated execution mode, represented by an X86EMUL_MODE value. * Returns -1 on failure, 0 on success. */ -extern int +int x86_emulate_memop( - struct cpu_user_regs *regs, - unsigned long cr2, - struct x86_mem_emulator *ops, - int mode); + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops); /* * Given the 'reg' portion of a ModRM byte, and a register block, return a * pointer into the block that addresses the relevant register. * @highbyte_regs specifies whether to decode AH,CH,DH,BH. */ -extern void * +void * decode_register( uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs); diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/io/xenbus.h --- a/xen/include/public/io/xenbus.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/public/io/xenbus.h Fri May 26 13:41:49 2006 -0600 @@ -9,34 +9,37 @@ #ifndef _XEN_PUBLIC_IO_XENBUS_H #define _XEN_PUBLIC_IO_XENBUS_H -/* The state of either end of the Xenbus, i.e. the current communication - status of initialisation across the bus. States here imply nothing about - the state of the connection between the driver and the kernel's device - layers. */ -typedef enum -{ - XenbusStateUnknown = 0, - XenbusStateInitialising = 1, - XenbusStateInitWait = 2, /* Finished early initialisation, but waiting - for information from the peer or hotplug - scripts. */ - XenbusStateInitialised = 3, /* Initialised and waiting for a connection - from the peer. */ - XenbusStateConnected = 4, - XenbusStateClosing = 5, /* The device is being closed due to an error - or an unplug event. */ - XenbusStateClosed = 6 +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, -} XenbusState; + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6 +}; +typedef enum xenbus_state XenbusState; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ - -/* - * Local variables: - * c-file-style: "linux" - * indent-tabs-mode: t - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/sched_ctl.h --- a/xen/include/public/sched_ctl.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/public/sched_ctl.h Fri May 26 13:41:49 2006 -0600 @@ -10,6 +10,7 @@ /* Scheduler types. */ #define SCHED_BVT 0 #define SCHED_SEDF 4 +#define SCHED_CREDIT 5 /* Set or get info? */ #define SCHED_INFO_PUT 0 @@ -48,6 +49,10 @@ struct sched_adjdom_cmd { uint32_t extratime; uint32_t weight; } sedf; + struct csched_domain { + uint16_t weight; + uint16_t cap; + } credit; } u; }; diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/xen/sched-if.h Fri May 26 13:41:49 2006 -0600 @@ -58,6 +58,8 @@ struct scheduler { char *opt_name; /* option name for this scheduler */ unsigned int sched_id; /* ID for this scheduler */ + void (*init) (void); + void (*tick) (unsigned int cpu); int (*alloc_task) (struct vcpu *); void (*add_task) (struct vcpu *); void (*free_task) (struct domain *); diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/softirq.h --- a/xen/include/xen/softirq.h Thu May 25 15:59:18 2006 -0600 +++ b/xen/include/xen/softirq.h Fri May 26 13:41:49 2006 -0600 @@ -26,6 +26,19 @@ asmlinkage void do_softirq(void); asmlinkage void do_softirq(void); extern void open_softirq(int nr, softirq_handler handler); +static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr) +{ + int cpu; + + for_each_cpu_mask(cpu, mask) + { + if ( test_and_set_bit(nr, &softirq_pending(cpu)) ) + cpu_clear(cpu, mask); + } + + smp_send_event_check_mask(mask); +} + static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) { if ( !test_and_set_bit(nr, &softirq_pending(cpu)) ) diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Fri May 26 13:41:49 2006 -0600 @@ -0,0 +1,185 @@ +#include <linux/config.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <xen/cpu_hotplug.h> +#include <xen/xenbus.h> + +/* + * Set of CPUs that remote admin software will allow us to bring online. + * Notified to us via xenbus. + */ +static cpumask_t xenbus_allowed_cpumask; + +/* Set of CPUs that local admin will allow us to bring online. */ +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; + +static int local_cpu_hotplug_request(void) +{ + /* + * We assume a CPU hotplug request comes from local admin if it is made + * via a userspace process (i.e., one with a real mm_struct). + */ + return (current->mm != NULL); +} + +static void vcpu_hotplug(unsigned int cpu) +{ + int err; + char dir[32], state[32]; + + if ((cpu >= NR_CPUS) || !cpu_possible(cpu)) + return; + + sprintf(dir, "cpu/%d", cpu); + err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state); + if (err != 1) { + printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); + return; + } + + if (strcmp(state, "online") == 0) { + cpu_set(cpu, xenbus_allowed_cpumask); + (void)cpu_up(cpu); + } else if (strcmp(state, "offline") == 0) { + cpu_clear(cpu, xenbus_allowed_cpumask); + (void)cpu_down(cpu); + } else { + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", + state, cpu); + } +} + +static void handle_vcpu_hotplug_event( + struct xenbus_watch *watch, const char **vec, unsigned int len) +{ + int cpu; + char *cpustr; + const char *node = vec[XS_WATCH_PATH]; + + if ((cpustr = strstr(node, "cpu/")) != NULL) { + sscanf(cpustr, "cpu/%d", &cpu); + vcpu_hotplug(cpu); + } +} + +static int smpboot_cpu_notify(struct notifier_block *notifier, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + /* + * We do this in a callback notifier rather than __cpu_disable() + * because local_cpu_hotplug_request() does not work in the latter + * as it's always executed from within a stopmachine kthread. + */ + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) + cpu_clear(cpu, local_allowed_cpumask); + + return NOTIFY_OK; +} + +static int setup_cpu_watcher(struct notifier_block *notifier, + unsigned long event, void *data) +{ + int i; + + static struct xenbus_watch cpu_watch = { + .node = "cpu", + .callback = handle_vcpu_hotplug_event, + .flags = XBWF_new_thread }; + (void)register_xenbus_watch(&cpu_watch); + + if (!(xen_start_info->flags & SIF_INITDOMAIN)) { + for_each_cpu(i) + vcpu_hotplug(i); + printk(KERN_INFO "Brought up %ld CPUs\n", + (long)num_online_cpus()); + } + + return NOTIFY_DONE; +} + +static int __init setup_vcpu_hotplug_event(void) +{ + static struct notifier_block hotplug_cpu = { + .notifier_call = smpboot_cpu_notify }; + static struct notifier_block xsn_cpu = { + .notifier_call = setup_cpu_watcher }; + + register_cpu_notifier(&hotplug_cpu); + register_xenstore_notifier(&xsn_cpu); + + return 0; +} + +arch_initcall(setup_vcpu_hotplug_event); + +int smp_suspend(void) +{ + int i, err; + + lock_cpu_hotplug(); + + /* + * Take all other CPUs offline. We hold the hotplug mutex to + * avoid other processes bringing up CPUs under our feet. + */ + while (num_online_cpus() > 1) { + unlock_cpu_hotplug(); + for_each_online_cpu(i) { + if (i == 0) + continue; + err = cpu_down(i); + if (err) { + printk(KERN_CRIT "Failed to take all CPUs " + "down: %d.\n", err); + for_each_cpu(i) + vcpu_hotplug(i); + return err; + } + } + lock_cpu_hotplug(); + } + + return 0; +} + +void smp_resume(void) +{ + int cpu; + + for_each_cpu(cpu) + cpu_initialize_context(cpu); + + unlock_cpu_hotplug(); + + for_each_cpu(cpu) + vcpu_hotplug(cpu); +} + +int cpu_up_is_allowed(unsigned int cpu) +{ + int rc = 0; + + if (local_cpu_hotplug_request()) { + cpu_set(cpu, local_allowed_cpumask); + if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { + printk("%s: attempt to bring up CPU %u disallowed by " + "remote admin.\n", __FUNCTION__, cpu); + rc = -EBUSY; + } + } else if (!cpu_isset(cpu, local_allowed_cpumask) || + !cpu_isset(cpu, xenbus_allowed_cpumask)) { + rc = -EBUSY; + } + + return rc; +} + +void init_xenbus_allowed_cpumask(void) +{ + xenbus_allowed_cpumask = cpu_present_map; +} diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h Fri May 26 13:41:49 2006 -0600 @@ -0,0 +1,63 @@ +/* + * structures and definitions for the int 15, ax=e820 memory map + * scheme. + * + * In a nutshell, setup.S populates a scratch table in the + * empty_zero_block that contains a list of usable address/size + * duples. setup.c, this information is transferred into the e820map, + * and in init.c/numa.c, that new information is used to mark pages + * reserved or not. + */ +#ifndef __E820_HEADER +#define __E820_HEADER + +#include <linux/mmzone.h> + +#define E820MAP 0x2d0 /* our map */ +#define E820MAX 128 /* number of entries in E820MAP */ +#define E820NR 0x1e8 /* # entries in E820MAP */ + +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ +#define E820_NVS 4 + +#define HIGH_MEMORY (1024*1024) + +#define LOWMEMSIZE() (0x9f000) + +#ifndef __ASSEMBLY__ +struct e820entry { + u64 addr; /* start of memory segment */ + u64 size; /* size of memory segment */ + u32 type; /* type of memory segment */ +} __attribute__((packed)); + +struct e820map { + int nr_map; + struct e820entry map[E820MAX]; +}; + +extern unsigned long find_e820_area(unsigned long start, unsigned long end, + unsigned size); +extern void add_memory_region(unsigned long start, unsigned long size, + int type); +extern void setup_memory_region(void); +extern void contig_e820_setup(void); +extern unsigned long e820_end_of_ram(void); +extern void e820_reserve_resources(struct e820entry *e820, int nr_map); +extern void e820_print_map(char *who); +extern int e820_mapped(unsigned long start, unsigned long end, unsigned type); + +extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); +extern void e820_setup_gap(struct e820entry *e820, int nr_map); +extern unsigned long e820_hole_size(unsigned long start_pfn, + unsigned long end_pfn); + +extern void __init parse_memopt(char *p, char **end); +extern void __init parse_memmapopt(char *p, char **end); + +extern struct e820map e820; +#endif/*!__ASSEMBLY__*/ + +#endif/*__E820_HEADER*/ diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/xen/cpu_hotplug.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Fri May 26 13:41:49 2006 -0600 @@ -0,0 +1,42 @@ +#ifndef __XEN_CPU_HOTPLUG_H__ +#define __XEN_CPU_HOTPLUG_H__ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/cpumask.h> + +#if defined(CONFIG_HOTPLUG_CPU) + +#if defined(CONFIG_X86) +void cpu_initialize_context(unsigned int cpu); +#else +#define cpu_initialize_context(cpu) ((void)0) +#endif + +int cpu_up_is_allowed(unsigned int cpu); +void init_xenbus_allowed_cpumask(void); +int smp_suspend(void); +void smp_resume(void); + +#else /* !defined(CONFIG_HOTPLUG_CPU) */ + +#define cpu_up_is_allowed(cpu) (1) +#define init_xenbus_allowed_cpumask() ((void)0) + +static inline int smp_suspend(void) +{ + if (num_online_cpus() > 1) { + printk(KERN_WARNING "Can't suspend SMP guests " + "without CONFIG_HOTPLUG_CPU\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static inline void smp_resume(void) +{ +} + +#endif /* !defined(CONFIG_HOTPLUG_CPU) */ + +#endif /* __XEN_CPU_HOTPLUG_H__ */ diff -r 9d52a66c7499 -r c073ebdbde8c patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Fri May 26 13:41:49 2006 -0600 @@ -0,0 +1,18 @@ +diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c +--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 2006-05-02 22:38:44.000000000 +0100 ++++ ./drivers/ide/ide-lib.c 2006-05-24 18:37:05.000000000 +0100 +@@ -410,10 +410,10 @@ + { + u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ + +- if (!PCI_DMA_BUS_IS_PHYS) { +- addr = BLK_BOUNCE_ANY; +- } else if (on && drive->media == ide_disk) { +- if (HWIF(drive)->pci_dev) ++ if (on && drive->media == ide_disk) { ++ if (!PCI_DMA_BUS_IS_PHYS) ++ addr = BLK_BOUNCE_ANY; ++ else if (HWIF(drive)->pci_dev) + addr = HWIF(drive)->pci_dev->dma_mask; + } + diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_csched.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_csched.c Fri May 26 13:41:49 2006 -0600 @@ -0,0 +1,50 @@ +/**************************************************************************** + * (C) 2006 - Emmanuel Ackaouy - XenSource Inc. + **************************************************************************** + * + * File: xc_csched.c + * Author: Emmanuel Ackaouy + * + * Description: XC Interface to the credit scheduler + * + */ +#include "xc_private.h" + + +int +xc_csched_domain_set( + int xc_handle, + uint32_t domid, + struct csched_domain *sdom) +{ + DECLARE_DOM0_OP; + + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.domain = (domid_t) domid; + op.u.adjustdom.sched_id = SCHED_CREDIT; + op.u.adjustdom.direction = SCHED_INFO_PUT; + op.u.adjustdom.u.credit = *sdom; + + return do_dom0_op(xc_handle, &op); +} + +int +xc_csched_domain_get( + int xc_handle, + uint32_t domid, + struct csched_domain *sdom) +{ + DECLARE_DOM0_OP; + int err; + + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.domain = (domid_t) domid; + op.u.adjustdom.sched_id = SCHED_CREDIT; + op.u.adjustdom.direction = SCHED_INFO_GET; + + err = do_dom0_op(xc_handle, &op); + if ( err == 0 ) + *sdom = op.u.adjustdom.u.credit; + + return err; +} diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_linux.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xenstore/xenstored_linux.c Fri May 26 13:41:49 2006 -0600 @@ -0,0 +1,69 @@ +/****************************************************************************** + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include "xenstored_core.h" + +#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva" +#define XENSTORED_PROC_PORT "/proc/xen/xsd_port" + +evtchn_port_t xenbus_evtchn(void) +{ + int fd; + int rc; + evtchn_port_t port; + char str[20]; + + fd = open(XENSTORED_PROC_PORT, O_RDONLY); + if (fd == -1) + return -1; + + rc = read(fd, str, sizeof(str)); + if (rc == -1) + { + int err = errno; + close(fd); + errno = err; + return -1; + } + + str[rc] = '\0'; + port = strtoul(str, NULL, 0); + + close(fd); + return port; +} + +void *xenbus_map(void) +{ + int fd; + void *addr; + + fd = open(XENSTORED_PROC_KVA, O_RDWR); + if (fd == -1) + return NULL; + + addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, + MAP_SHARED, fd, 0); + + if (addr == MAP_FAILED) + addr = NULL; + + close(fd); + + return addr; +} diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/sched_credit.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/common/sched_credit.c Fri May 26 13:41:49 2006 -0600 @@ -0,0 +1,1233 @@ +/**************************************************************************** + * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc. + **************************************************************************** + * + * File: common/csched_credit.c + * Author: Emmanuel Ackaouy + * + * Description: Credit-based SMP CPU scheduler + */ + +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/domain.h> +#include <xen/delay.h> +#include <xen/event.h> +#include <xen/time.h> +#include <xen/perfc.h> +#include <xen/sched-if.h> +#include <xen/softirq.h> +#include <asm/atomic.h> + + +/* + * CSCHED_STATS + * + * Manage very basic counters and stats. + * + * Useful for debugging live systems. The stats are displayed + * with runq dumps ('r' on the Xen console). + */ +#define CSCHED_STATS + + +/* + * Basic constants + */ +#define CSCHED_TICK 10 /* milliseconds */ +#define CSCHED_TSLICE 30 /* milliseconds */ +#define CSCHED_ACCT_NTICKS 3 +#define CSCHED_ACCT_PERIOD (CSCHED_ACCT_NTICKS * CSCHED_TICK) +#define CSCHED_DEFAULT_WEIGHT 256 + + +/* + * Priorities + */ +#define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */ +#define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */ +#define CSCHED_PRI_IDLE -64 /* idle */ +#define CSCHED_PRI_TS_PARKED -65 /* time-share w/ capped credits */ + + +/* + * Useful macros + */ +#define CSCHED_PCPU(_c) ((struct csched_pcpu *)schedule_data[_c].sched_priv) +#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv) +#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv) +#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq)) + + +/* + * Stats + */ +#ifdef CSCHED_STATS + +#define CSCHED_STAT(_X) (csched_priv.stats._X) +#define CSCHED_STAT_DEFINE(_X) uint32_t _X; +#define CSCHED_STAT_PRINTK(_X) \ + do \ + { \ + printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X)); \ + } while ( 0 ); + +#define CSCHED_STATS_EXPAND_SCHED(_MACRO) \ + _MACRO(vcpu_alloc) \ + _MACRO(vcpu_add) \ + _MACRO(vcpu_sleep) \ + _MACRO(vcpu_wake_running) \ + _MACRO(vcpu_wake_onrunq) \ + _MACRO(vcpu_wake_runnable) \ + _MACRO(vcpu_wake_not_runnable) \ + _MACRO(dom_free) \ + _MACRO(schedule) \ + _MACRO(tickle_local_idler) \ + _MACRO(tickle_local_over) \ + _MACRO(tickle_local_under) \ + _MACRO(tickle_local_other) \ + _MACRO(acct_run) \ + _MACRO(acct_no_work) \ + _MACRO(acct_balance) \ + _MACRO(acct_reorder) \ + _MACRO(acct_min_credit) \ + _MACRO(acct_vcpu_active) \ + _MACRO(acct_vcpu_idle) \ + _MACRO(acct_vcpu_credit_min) + +#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \ + _MACRO(vcpu_migrate) \ + _MACRO(load_balance_idle) \ + _MACRO(load_balance_over) \ + _MACRO(load_balance_other) \ + _MACRO(steal_trylock_failed) \ + _MACRO(steal_peer_down) \ + _MACRO(steal_peer_idle) \ + _MACRO(steal_peer_running) \ + _MACRO(steal_peer_pinned) \ + _MACRO(tickle_idlers_none) \ + _MACRO(tickle_idlers_some) + +#ifndef NDEBUG +#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) \ + _MACRO(vcpu_check) +#else +#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) +#endif + +#define CSCHED_STATS_EXPAND(_MACRO) \ + CSCHED_STATS_EXPAND_SCHED(_MACRO) \ + CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \ + CSCHED_STATS_EXPAND_CHECKS(_MACRO) + +#define CSCHED_STATS_RESET() \ + do \ + { \ + memset(&csched_priv.stats, 0, sizeof(csched_priv.stats)); \ + } while ( 0 ) + +#define CSCHED_STATS_DEFINE() \ + struct \ + { \ + CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \ + } stats + +#define CSCHED_STATS_PRINTK() \ + do \ + { \ + printk("stats:\n"); \ + CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \ + } while ( 0 ) + +#define CSCHED_STAT_CRANK(_X) (CSCHED_STAT(_X)++) + +#else /* CSCHED_STATS */ + +#define CSCHED_STATS_RESET() do {} while ( 0 ) +#define CSCHED_STATS_DEFINE() do {} while ( 0 ) +#define CSCHED_STATS_PRINTK() do {} while ( 0 ) +#define CSCHED_STAT_CRANK(_X) do {} while ( 0 ) + +#endif /* CSCHED_STATS */ + + +/* + * Physical CPU + */ +struct csched_pcpu { + struct list_head runq; + uint32_t runq_sort_last; +}; + +/* + * Virtual CPU + */ +struct csched_vcpu { + struct list_head runq_elem; + struct list_head active_vcpu_elem; + struct csched_dom *sdom; + struct vcpu *vcpu; + atomic_t credit; + int credit_last; + uint32_t credit_incr; + uint32_t state_active; + uint32_t state_idle; + int16_t pri; +}; + +/* + * Domain + */ +struct csched_dom { + struct list_head active_vcpu; + struct list_head active_sdom_elem; + struct domain *dom; + uint16_t active_vcpu_count; + uint16_t weight; + uint16_t cap; +}; + +/* + * System-wide private data + */ +struct csched_private { + spinlock_t lock; + struct list_head active_sdom; + uint32_t ncpus; + unsigned int master; + cpumask_t idlers; + uint32_t weight; + uint32_t credit; + int credit_balance; + uint32_t runq_sort; + CSCHED_STATS_DEFINE(); +}; + + +/* + * Global variables + */ +static struct csched_private csched_priv; + + + +static inline int +__vcpu_on_runq(struct csched_vcpu *svc) +{ + return !list_empty(&svc->runq_elem); +} + +static inline struct csched_vcpu * +__runq_elem(struct list_head *elem) +{ + return list_entry(elem, struct csched_vcpu, runq_elem); +} + +static inline void +__runq_insert(unsigned int cpu, struct csched_vcpu *svc) +{ + const struct list_head * const runq = RUNQ(cpu); + struct list_head *iter; + + BUG_ON( __vcpu_on_runq(svc) ); + BUG_ON( cpu != svc->vcpu->processor ); + + list_for_each( iter, runq ) + { + const struct csched_vcpu * const iter_svc = __runq_elem(iter); + if ( svc->pri > iter_svc->pri ) + break; + } + + list_add_tail(&svc->runq_elem, iter); +} + +static inline void +__runq_remove(struct csched_vcpu *svc) +{ + BUG_ON( !__vcpu_on_runq(svc) ); + list_del_init(&svc->runq_elem); +} + +static inline void +__runq_tickle(unsigned int cpu, struct csched_vcpu *new) +{ + struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr); + cpumask_t mask; + + ASSERT(cur); + cpus_clear(mask); + + /* If strictly higher priority than current VCPU, signal the CPU */ + if ( new->pri > cur->pri ) + { + if ( cur->pri == CSCHED_PRI_IDLE ) + CSCHED_STAT_CRANK(tickle_local_idler); + else if ( cur->pri == CSCHED_PRI_TS_OVER ) + CSCHED_STAT_CRANK(tickle_local_over); + else if ( cur->pri == CSCHED_PRI_TS_UNDER ) + CSCHED_STAT_CRANK(tickle_local_under); + else + CSCHED_STAT_CRANK(tickle_local_other); + + cpu_set(cpu, mask); + } + + /* + * If this CPU has at least two runnable VCPUs, we tickle any idlers to + * let them know there is runnable work in the system... + */ + if ( cur->pri > CSCHED_PRI_IDLE ) + { + if ( cpus_empty(csched_priv.idlers) ) + { + CSCHED_STAT_CRANK(tickle_idlers_none); + } + else + { + CSCHED_STAT_CRANK(tickle_idlers_some); + cpus_or(mask, mask, csched_priv.idlers); + } + } + + /* Send scheduler interrupts to designated CPUs */ + if ( !cpus_empty(mask) ) + cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ); +} + +static void +csched_pcpu_init(int cpu) +{ + struct csched_pcpu *spc; + unsigned long flags; + + spin_lock_irqsave(&csched_priv.lock, flags); + + /* Initialize/update system-wide config */ + csched_priv.credit += CSCHED_ACCT_PERIOD; + if ( csched_priv.ncpus <= cpu ) + csched_priv.ncpus = cpu + 1; + if ( csched_priv.master >= csched_priv.ncpus ) + csched_priv.master = cpu; + + /* Allocate per-PCPU info */ + spc = xmalloc(struct csched_pcpu); + BUG_ON( spc == NULL ); + INIT_LIST_HEAD(&spc->runq); + spc->runq_sort_last = csched_priv.runq_sort; + schedule_data[cpu].sched_priv = spc; + + /* Start off idling... */ + BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) ); + cpu_set(cpu, csched_priv.idlers); + + spin_unlock_irqrestore(&csched_priv.lock, flags); +} + +#ifndef NDEBUG +static inline void +__csched_vcpu_check(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + struct csched_dom * const sdom = svc->sdom; + + BUG_ON( svc->vcpu != vc ); + BUG_ON( sdom != CSCHED_DOM(vc->domain) ); + if ( sdom ) + { + BUG_ON( is_idle_vcpu(vc) ); + BUG_ON( sdom->dom != vc->domain ); + } + else + { + BUG_ON( !is_idle_vcpu(vc) ); + } + + CSCHED_STAT_CRANK(vcpu_check); +} +#define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc)) +#else +#define CSCHED_VCPU_CHECK(_vc) +#endif + +static inline int +__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc) +{ + /* + * Don't pick up work that's in the peer's scheduling tail. Also only pick + * up work that's allowed to run on our CPU. + */ + if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) ) + { + CSCHED_STAT_CRANK(steal_peer_running); + return 0; + } + + if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) ) + { + CSCHED_STAT_CRANK(steal_peer_pinned); + return 0; + } + + return 1; +} + +static void +csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec) +{ + struct csched_dom * const sdom = svc->sdom; + unsigned long flags; + + /* Update credits */ + atomic_sub(credit_dec, &svc->credit); + + /* Put this VCPU and domain back on the active list if it was idling */ + if ( list_empty(&svc->active_vcpu_elem) ) + { + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( list_empty(&svc->active_vcpu_elem) ) + { + CSCHED_STAT_CRANK(acct_vcpu_active); + svc->state_active++; + + sdom->active_vcpu_count++; + list_add(&svc->active_vcpu_elem, &sdom->active_vcpu); + if ( list_empty(&sdom->active_sdom_elem) ) + { + list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + csched_priv.weight += sdom->weight; + } + } + + spin_unlock_irqrestore(&csched_priv.lock, flags); + } +} + +static inline void +__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc) +{ + struct csched_dom * const sdom = svc->sdom; + + BUG_ON( list_empty(&svc->active_vcpu_elem) ); + + CSCHED_STAT_CRANK(acct_vcpu_idle); + svc->state_idle++; + + sdom->active_vcpu_count--; + list_del_init(&svc->active_vcpu_elem); + if ( list_empty(&sdom->active_vcpu) ) + { + BUG_ON( csched_priv.weight < sdom->weight ); + list_del_init(&sdom->active_sdom_elem); + csched_priv.weight -= sdom->weight; + } + + atomic_set(&svc->credit, 0); +} + +static int +csched_vcpu_alloc(struct vcpu *vc) +{ + struct domain * const dom = vc->domain; + struct csched_dom *sdom; + struct csched_vcpu *svc; + int16_t pri; + + CSCHED_STAT_CRANK(vcpu_alloc); + + /* Allocate, if appropriate, per-domain info */ + if ( is_idle_vcpu(vc) ) + { + sdom = NULL; + pri = CSCHED_PRI_IDLE; + } + else if ( CSCHED_DOM(dom) ) + { + sdom = CSCHED_DOM(dom); + pri = CSCHED_PRI_TS_UNDER; + } + else + { + sdom = xmalloc(struct csched_dom); + if ( !sdom ) + return -1; + + /* Initialize credit and weight */ + INIT_LIST_HEAD(&sdom->active_vcpu); + sdom->active_vcpu_count = 0; + INIT_LIST_HEAD(&sdom->active_sdom_elem); + sdom->dom = dom; + sdom->weight = CSCHED_DEFAULT_WEIGHT; + sdom->cap = 0U; + dom->sched_priv = sdom; + pri = CSCHED_PRI_TS_UNDER; + } + + /* Allocate per-VCPU info */ + svc = xmalloc(struct csched_vcpu); + if ( !svc ) + return -1; + + INIT_LIST_HEAD(&svc->runq_elem); + INIT_LIST_HEAD(&svc->active_vcpu_elem); + svc->sdom = sdom; + svc->vcpu = vc; + atomic_set(&svc->credit, 0); + svc->credit_last = 0; + svc->credit_incr = 0U; + svc->state_active = 0U; + svc->state_idle = 0U; + svc->pri = pri; + vc->sched_priv = svc; + + CSCHED_VCPU_CHECK(vc); + + /* Attach fair-share VCPUs to the accounting list */ + if ( likely(sdom != NULL) ) + csched_vcpu_acct(svc, 0); + + return 0; +} + +static void +csched_vcpu_add(struct vcpu *vc) +{ + CSCHED_STAT_CRANK(vcpu_add); + + /* Allocate per-PCPU info */ + if ( unlikely(!CSCHED_PCPU(vc->processor)) ) + csched_pcpu_init(vc->processor); + + CSCHED_VCPU_CHECK(vc); +} + +static void +csched_vcpu_free(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + struct csched_dom * const sdom = svc->sdom; + unsigned long flags; + + BUG_ON( sdom == NULL ); + BUG_ON( !list_empty(&svc->runq_elem) ); + + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( !list_empty(&svc->active_vcpu_elem) ) + __csched_vcpu_acct_idle_locked(svc); + + spin_unlock_irqrestore(&csched_priv.lock, flags); + + xfree(svc); +} + +static void +csched_vcpu_sleep(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + + CSCHED_STAT_CRANK(vcpu_sleep); + + BUG_ON( is_idle_vcpu(vc) ); + + if ( schedule_data[vc->processor].curr == vc ) + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + else if ( __vcpu_on_runq(svc) ) + __runq_remove(svc); +} + +static void +csched_vcpu_wake(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + const unsigned int cpu = vc->processor; + + BUG_ON( is_idle_vcpu(vc) ); + + if ( unlikely(schedule_data[cpu].curr == vc) ) + { + CSCHED_STAT_CRANK(vcpu_wake_running); + return; + } + if ( unlikely(__vcpu_on_runq(svc)) ) + { + CSCHED_STAT_CRANK(vcpu_wake_onrunq); + return; + } + + if ( likely(vcpu_runnable(vc)) ) + CSCHED_STAT_CRANK(vcpu_wake_runnable); + else + CSCHED_STAT_CRANK(vcpu_wake_not_runnable); + + /* Put the VCPU on the runq and tickle CPUs */ + __runq_insert(cpu, svc); + __runq_tickle(cpu, svc); +} + +static int +csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity) +{ + unsigned long flags; + int lcpu; + + if ( vc == current ) + { + /* No locking needed but also can't move on the spot... */ + if ( !cpu_isset(vc->processor, *affinity) ) + return -EBUSY; + + vc->cpu_affinity = *affinity; + } + else + { + /* Pause, modify, and unpause. */ + vcpu_pause(vc); + + vc->cpu_affinity = *affinity; + if ( !cpu_isset(vc->processor, vc->cpu_affinity) ) + { + /* + * We must grab the scheduler lock for the CPU currently owning + * this VCPU before changing its ownership. + */ + vcpu_schedule_lock_irqsave(vc, flags); + lcpu = vc->processor; + + vc->processor = first_cpu(vc->cpu_affinity); + + spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags); + } + + vcpu_unpause(vc); + } + + return 0; +} + +static int +csched_dom_cntl( + struct domain *d, + struct sched_adjdom_cmd *cmd) +{ + struct csched_dom * const sdom = CSCHED_DOM(d); + unsigned long flags; + + if ( cmd->direction == SCHED_INFO_GET ) + { + cmd->u.credit.weight = sdom->weight; + cmd->u.credit.cap = sdom->cap; + } + else + { + ASSERT( cmd->direction == SCHED_INFO_PUT ); + + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( cmd->u.credit.weight != 0 ) + { + csched_priv.weight -= sdom->weight; + sdom->weight = cmd->u.credit.weight; + csched_priv.weight += sdom->weight; + } + + if ( cmd->u.credit.cap != (uint16_t)~0U ) + sdom->cap = cmd->u.credit.cap; + + spin_unlock_irqrestore(&csched_priv.lock, flags); + } + + return 0; +} + +static void +csched_dom_free(struct domain *dom) +{ + struct csched_dom * const sdom = CSCHED_DOM(dom); + int i; + + CSCHED_STAT_CRANK(dom_free); + + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + { + if ( dom->vcpu[i] ) + csched_vcpu_free(dom->vcpu[i]); + } + + xfree(sdom); +} + +/* + * This is a O(n) optimized sort of the runq. + * + * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk + * through the runq and move up any UNDERs that are preceded by OVERS. We + * remember the last UNDER to make the move up operation O(1). + */ +static void +csched_runq_sort(unsigned int cpu) +{ + struct csched_pcpu * const spc = CSCHED_PCPU(cpu); + struct list_head *runq, *elem, *next, *last_under; + struct csched_vcpu *svc_elem; + unsigned long flags; + int sort_epoch; + + sort_epoch = csched_priv.runq_sort; + if ( sort_epoch == spc->runq_sort_last ) + return; + + spc->runq_sort_last = sort_epoch; + + spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); + + runq = &spc->runq; + elem = runq->next; + last_under = runq; + + while ( elem != runq ) + { + next = elem->next; + svc_elem = __runq_elem(elem); + + if ( svc_elem->pri == CSCHED_PRI_TS_UNDER ) + { + /* does elem need to move up the runq? */ + if ( elem->prev != last_under ) + { + list_del(elem); + list_add(elem, last_under); + } + last_under = elem; + } + + elem = next; + } + + spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); +} + +static void +csched_acct(void) +{ + unsigned long flags; + struct list_head *iter_vcpu, *next_vcpu; + struct list_head *iter_sdom, *next_sdom; + struct csched_vcpu *svc; + struct csched_dom *sdom; + uint32_t credit_total; + uint32_t weight_total; + uint32_t weight_left; + uint32_t credit_fair; + uint32_t credit_peak; + int credit_balance; + int credit_xtra; + int credit; + + + spin_lock_irqsave(&csched_priv.lock, flags); + + weight_total = csched_priv.weight; + credit_total = csched_priv.credit; + + /* Converge balance towards 0 when it drops negative */ + if ( csched_priv.credit_balance < 0 ) + { + credit_total -= csched_priv.credit_balance; + CSCHED_STAT_CRANK(acct_balance); + } + + if ( unlikely(weight_total == 0) ) + { + csched_priv.credit_balance = 0; + spin_unlock_irqrestore(&csched_priv.lock, flags); + CSCHED_STAT_CRANK(acct_no_work); + return; + } + + CSCHED_STAT_CRANK(acct_run); + + weight_left = weight_total; + credit_balance = 0; + credit_xtra = 0; + + list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom ) + { + sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); + + BUG_ON( is_idle_domain(sdom->dom) ); + BUG_ON( sdom->active_vcpu_count == 0 ); + BUG_ON( sdom->weight == 0 ); + BUG_ON( sdom->weight > weight_left ); + + weight_left -= sdom->weight; + + /* + * A domain's fair share is computed using its weight in competition + * with that of all other active domains. + * + * At most, a domain can use credits to run all its active VCPUs + * for one full accounting period. We allow a domain to earn more + * only when the system-wide credit balance is negative. + */ + credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD; + if ( csched_priv.credit_balance < 0 ) + { + credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) + + (weight_total - 1) + ) / weight_total; + } + if ( sdom->cap != 0U ) + { + uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) / 100; + if ( credit_cap < credit_peak ) + credit_peak = credit_cap; + } + + credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1) + ) / weight_total; + + if ( credit_fair < credit_peak ) + { + credit_xtra = 1; + } + else + { + if ( weight_left != 0U ) + { + /* Give other domains a chance at unused credits */ + credit_total += ( ( ( credit_fair - credit_peak + ) * weight_total + ) + ( weight_left - 1 ) + ) / weight_left; + } + + if ( credit_xtra ) + { + /* + * Lazily keep domains with extra credits at the head of + * the queue to give others a chance at them in future + * accounting periods. + */ + CSCHED_STAT_CRANK(acct_reorder); + list_del(&sdom->active_sdom_elem); + list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + } + + credit_fair = credit_peak; + } + + /* Compute fair share per VCPU */ + credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 ) + ) / sdom->active_vcpu_count; + + + list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu ) + { + svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem); + BUG_ON( sdom != svc->sdom ); + + /* Increment credit */ + atomic_add(credit_fair, &svc->credit); + credit = atomic_read(&svc->credit); + + /* + * Recompute priority or, if VCPU is idling, remove it from + * the active list. + */ + if ( credit < 0 ) + { + if ( sdom->cap == 0U ) + svc->pri = CSCHED_PRI_TS_OVER; + else + svc->pri = CSCHED_PRI_TS_PARKED; + + if ( credit < -CSCHED_TSLICE ) + { + CSCHED_STAT_CRANK(acct_min_credit); + credit = -CSCHED_TSLICE; + atomic_set(&svc->credit, credit); + } + } + else + { + svc->pri = CSCHED_PRI_TS_UNDER; + + if ( credit > CSCHED_TSLICE ) + __csched_vcpu_acct_idle_locked(svc); + } + + svc->credit_last = credit; + svc->credit_incr = credit_fair; + credit_balance += credit; + } + } + + csched_priv.credit_balance = credit_balance; + + spin_unlock_irqrestore(&csched_priv.lock, flags); + + /* Inform each CPU that its runq needs to be sorted */ + csched_priv.runq_sort++; +} + +static void +csched_tick(unsigned int cpu) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(current); + struct csched_dom * const sdom = svc->sdom; + + /* + * Accounting for running VCPU + * + * Note: Some VCPUs, such as the idle tasks, are not credit scheduled. + */ + if ( likely(sdom != NULL) ) + { + csched_vcpu_acct(svc, CSCHED_TICK); + } + + /* + * Accounting duty + * + * Note: Currently, this is always done by the master boot CPU. Eventually, + * we could distribute or at the very least cycle the duty. + */ + if ( (csched_priv.master == cpu) && + (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 ) + { + csched_acct(); + } + + /* + * Check if runq needs to be sorted + * + * Every physical CPU resorts the runq after the accounting master has + * modified priorities. This is a special O(n) sort and runs at most + * once per accounting period (currently 30 milliseconds). + */ + csched_runq_sort(cpu); +} + +static struct csched_vcpu * +csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri) +{ + struct list_head *iter; + struct csched_vcpu *speer; + struct vcpu *vc; + + list_for_each( iter, &spc->runq ) + { + speer = __runq_elem(iter); + + /* + * If next available VCPU here is not of higher priority than ours, + * this PCPU is useless to us. + */ + if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri ) + { + CSCHED_STAT_CRANK(steal_peer_idle); + break; + } + + /* Is this VCPU is runnable on our PCPU? */ + vc = speer->vcpu; + BUG_ON( is_idle_vcpu(vc) ); + + if ( __csched_vcpu_is_stealable(cpu, vc) ) + { + /* We got a candidate. Grab it! */ + __runq_remove(speer); + vc->processor = cpu; + + return speer; + } + } + + return NULL; +} + +static struct csched_vcpu * +csched_load_balance(int cpu, struct csched_vcpu *snext) +{ + struct csched_pcpu *spc; + struct csched_vcpu *speer; + int peer_cpu; + + if ( snext->pri == CSCHED_PRI_IDLE ) + CSCHED_STAT_CRANK(load_balance_idle); + else if ( snext->pri == CSCHED_PRI_TS_OVER ) + CSCHED_STAT_CRANK(load_balance_over); + else + CSCHED_STAT_CRANK(load_balance_other); + + peer_cpu = cpu; + BUG_ON( peer_cpu != snext->vcpu->processor ); + + while ( 1 ) + { + /* For each PCPU in the system starting with our neighbour... */ + peer_cpu = (peer_cpu + 1) % csched_priv.ncpus; + if ( peer_cpu == cpu ) + break; + + BUG_ON( peer_cpu >= csched_priv.ncpus ); + BUG_ON( peer_cpu == cpu ); + + /* + * Get ahold of the scheduler lock for this peer CPU. + * + * Note: We don't spin on this lock but simply try it. Spinning could + * cause a deadlock if the peer CPU is also load balancing and trying + * to lock this CPU. + */ + if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) ) + { + + spc = CSCHED_PCPU(peer_cpu); + if ( unlikely(spc == NULL) ) + { + CSCHED_STAT_CRANK(steal_peer_down); + speer = NULL; + } + else + { + speer = csched_runq_steal(spc, cpu, snext->pri); + } + + spin_unlock(&schedule_data[peer_cpu].schedule_lock); + + /* Got one! */ + if ( speer ) + { + CSCHED_STAT_CRANK(vcpu_migrate); + return speer; + } + } + else + { + CSCHED_STAT_CRANK(steal_trylock_failed); + } + } + + + /* Failed to find more important work */ + __runq_remove(snext); + return snext; +} + +/* + * This function is in the critical path. It is designed to be simple and + * fast for the common case. + */ +static struct task_slice +csched_schedule(s_time_t now) +{ + const int cpu = smp_processor_id(); + struct list_head * const runq = RUNQ(cpu); + struct csched_vcpu * const scurr = CSCHED_VCPU(current); + struct csched_vcpu *snext; + struct task_slice ret; + + CSCHED_STAT_CRANK(schedule); + CSCHED_VCPU_CHECK(current); + + /* + * Select next runnable local VCPU (ie top of local runq) + */ + if ( vcpu_runnable(current) ) + __runq_insert(cpu, scurr); + else + BUG_ON( is_idle_vcpu(current) || list_empty(runq) ); + + snext = __runq_elem(runq->next); + + /* + * SMP Load balance: + * + * If the next highest priority local runnable VCPU has already eaten + * through its credits, look on other PCPUs to see if we have more + * urgent work... If not, csched_load_balance() will return snext, but + * already removed from the runq. + */ + if ( snext->pri > CSCHED_PRI_TS_OVER ) + __runq_remove(snext); + else + snext = csched_load_balance(cpu, snext); + + /* + * Update idlers mask if necessary. When we're idling, other CPUs + * will tickle us when they get extra work. + */ + if ( snext->pri == CSCHED_PRI_IDLE ) + { + if ( !cpu_isset(cpu, csched_priv.idlers) ) + cpu_set(cpu, csched_priv.idlers); + } + else if ( cpu_isset(cpu, csched_priv.idlers) ) + { + cpu_clear(cpu, csched_priv.idlers); + } + + /* + * Return task to run next... + */ + ret.time = MILLISECS(CSCHED_TSLICE); + ret.task = snext->vcpu; + + CSCHED_VCPU_CHECK(ret.task); + BUG_ON( !vcpu_runnable(ret.task) ); + + return ret; +} + +static void +csched_dump_vcpu(struct csched_vcpu *svc) +{ + struct csched_dom * const sdom = svc->sdom; + + printk("[%i.%i] pri=%i cpu=%i", + svc->vcpu->domain->domain_id, + svc->vcpu->vcpu_id, + svc->pri, + svc->vcpu->processor); + + if ( sdom ) + { + printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}", + atomic_read(&svc->credit), + svc->credit_last, + svc->credit_incr, + svc->state_active, + svc->state_idle, + sdom->weight); + } + + printk("\n"); +} + +static void +csched_dump_pcpu(int cpu) +{ + struct list_head *runq, *iter; + struct csched_pcpu *spc; + struct csched_vcpu *svc; + int loop; + + spc = CSCHED_PCPU(cpu); + runq = &spc->runq; + + printk(" tick=%lu, sort=%d\n", + schedule_data[cpu].tick, + spc->runq_sort_last); + + /* current VCPU */ + svc = CSCHED_VCPU(schedule_data[cpu].curr); + if ( svc ) + { + printk("\trun: "); + csched_dump_vcpu(svc); + } + + loop = 0; + list_for_each( iter, runq ) + { + svc = __runq_elem(iter); + if ( svc ) + { + printk("\t%3d: ", ++loop); + csched_dump_vcpu(svc); + } + } +} + +static void +csched_dump(void) +{ + struct list_head *iter_sdom, *iter_svc; + int loop; + + printk("info:\n" + "\tncpus = %u\n" + "\tmaster = %u\n" + "\tcredit = %u\n" + "\tcredit balance = %d\n" + "\tweight = %u\n" + "\trunq_sort = %u\n" + "\ttick = %dms\n" + "\ttslice = %dms\n" + "\taccounting period = %dms\n" + "\tdefault-weight = %d\n", + csched_priv.ncpus, + csched_priv.master, + csched_priv.credit, + csched_priv.credit_balance, + csched_priv.weight, + csched_priv.runq_sort, + CSCHED_TICK, + CSCHED_TSLICE, + CSCHED_ACCT_PERIOD, + CSCHED_DEFAULT_WEIGHT); + + printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]); + + CSCHED_STATS_PRINTK(); + + printk("active vcpus:\n"); + loop = 0; + list_for_each( iter_sdom, &csched_priv.active_sdom ) + { + struct csched_dom *sdom; + sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); + + list_for_each( iter_svc, &sdom->active_vcpu ) + { + struct csched_vcpu *svc; + svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem); + + printk("\t%3d: ", ++loop); + csched_dump_vcpu(svc); + } + } +} + +static void +csched_init(void) +{ + spin_lock_init(&csched_priv.lock); + INIT_LIST_HEAD(&csched_priv.active_sdom); + csched_priv.ncpus = 0; + csched_priv.master = UINT_MAX; + cpus_clear(csched_priv.idlers); + csched_priv.weight = 0U; + csched_priv.credit = 0U; + csched_priv.credit_balance = 0; + csched_priv.runq_sort = 0U; + CSCHED_STATS_RESET(); +} + + +struct scheduler sched_credit_def = { + .name = "SMP Credit Scheduler", + .opt_name = "credit", + .sched_id = SCHED_CREDIT, + + .alloc_task = csched_vcpu_alloc, + .add_task = csched_vcpu_add, + .sleep = csched_vcpu_sleep, + .wake = csched_vcpu_wake, + .set_affinity = csched_vcpu_set_affinity, + + .adjdom = csched_dom_cntl, + .free_task = csched_dom_free, + + .tick = csched_tick, + .do_schedule = csched_schedule, + + .dump_cpu_state = csched_dump_pcpu, + .dump_settings = csched_dump, + .init = csched_init, +}; diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/net_driver_util.c --- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c Thu May 25 15:59:18 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -/***************************************************************************** - * - * Utility functions for Xen network devices. - * - * Copyright (c) 2005 XenSource Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <linux/if_ether.h> -#include <linux/err.h> -#include <linux/module.h> -#include <xen/net_driver_util.h> - - -int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) -{ - char *s; - int i; - char *e; - char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL); - if (IS_ERR(macstr)) - return PTR_ERR(macstr); - s = macstr; - for (i = 0; i < ETH_ALEN; i++) { - mac[i] = simple_strtoul(s, &e, 16); - if (s == e || (e[0] != ':' && e[0] != 0)) { - kfree(macstr); - return -ENOENT; - } - s = &e[1]; - } - kfree(macstr); - return 0; -} -EXPORT_SYMBOL_GPL(xen_net_read_mac); diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/asm-x86_64/e820.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h Thu May 25 15:59:18 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -/* - * structures and definitions for the int 15, ax=e820 memory map - * scheme. - * - * In a nutshell, setup.S populates a scratch table in the - * empty_zero_block that contains a list of usable address/size - * duples. setup.c, this information is transferred into the e820map, - * and in init.c/numa.c, that new information is used to mark pages - * reserved or not. - */ -#ifndef __E820_HEADER -#define __E820_HEADER - -#include <linux/mmzone.h> - -#define E820MAP 0x2d0 /* our map */ -#define E820MAX 128 /* number of entries in E820MAP */ -#define E820NR 0x1e8 /* # entries in E820MAP */ - -#define E820_RAM 1 -#define E820_RESERVED 2 -#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ -#define E820_NVS 4 - -#define HIGH_MEMORY (1024*1024) - -#define LOWMEMSIZE() (0x9f000) - -#ifndef __ASSEMBLY__ -struct e820entry { - u64 addr; /* start of memory segment */ - u64 size; /* size of memory segment */ - u32 type; /* type of memory segment */ -} __attribute__((packed)); - -struct e820map { - int nr_map; - struct e820entry map[E820MAX]; -}; - -extern unsigned long find_e820_area(unsigned long start, unsigned long end, - unsigned size); -extern void add_memory_region(unsigned long start, unsigned long size, - int type); -extern void setup_memory_region(void); -extern void contig_e820_setup(void); -extern unsigned long e820_end_of_ram(void); -extern void e820_reserve_resources(struct e820entry *e820, int nr_map); -extern void e820_print_map(char *who); -extern int e820_mapped(unsigned long start, unsigned long end, unsigned type); - -extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); -extern void e820_setup_gap(struct e820entry *e820, int nr_map); -extern unsigned long e820_hole_size(unsigned long start_pfn, - unsigned long end_pfn); - -extern void __init parse_memopt(char *p, char **end); -extern void __init parse_memmapopt(char *p, char **end); - -extern struct e820map e820; -#endif/*!__ASSEMBLY__*/ - -#endif/*__E820_HEADER*/ diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/xen/net_driver_util.h --- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h Thu May 25 15:59:18 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -/***************************************************************************** - * - * Utility functions for Xen network devices. - * - * Copyright (c) 2005 XenSource Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _ASM_XEN_NET_DRIVER_UTIL_H -#define _ASM_XEN_NET_DRIVER_UTIL_H - - -#include <xen/xenbus.h> - - -/** - * Read the 'mac' node at the given device's node in the store, and parse that - * as colon-separated octets, placing result the given mac array. mac must be - * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). - * Return 0 on success, or -errno on error. - */ -int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]); - - -#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */ diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_proc.h --- a/tools/xenstore/xenstored_proc.h Thu May 25 15:59:18 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -/* - Copyright (C) 2005 XenSource Ltd - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ - -#ifndef _XENSTORED_PROC_H -#define _XENSTORED_PROC_H - -#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva" -#define XENSTORED_PROC_PORT "/proc/xen/xsd_port" - - -#endif /* _XENSTORED_PROC_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |