[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merge with xenppc-unstable.
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Node ID b60ea69932b1a4d10c3aae945a1ce1aa160c689b # Parent e4f1519b473f6cda34496e491646d8683a410cae # Parent 15304ad81c5068e0d4115623cf063c62add22407 Merge with xenppc-unstable. --- buildconfigs/linux-defconfig_xen0_ia64 | 3 buildconfigs/linux-defconfig_xen0_x86_32 | 1 buildconfigs/linux-defconfig_xen0_x86_64 | 1 buildconfigs/linux-defconfig_xen_ia64 | 3 buildconfigs/linux-defconfig_xen_x86_32 | 1 buildconfigs/linux-defconfig_xen_x86_64 | 1 linux-2.6-xen-sparse/drivers/xen/Kconfig | 11 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 9 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 23 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 50 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 4 linux-2.6-xen-sparse/drivers/xen/pciback/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/pciback/slot.c | 151 ++ linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h | 3 tools/Makefile | 2 tools/examples/xend-config.sxp | 2 tools/examples/xmexample.hvm | 4 tools/firmware/Makefile | 2 tools/ioemu/patches/domain-destroy | 10 tools/ioemu/patches/domain-reset | 10 tools/ioemu/patches/domain-timeoffset | 8 tools/ioemu/patches/qemu-target-i386-dm | 25 tools/ioemu/patches/series | 2 tools/ioemu/patches/vnc-display-find-unused | 101 + tools/ioemu/patches/xen-support-buffered-ioreqs | 77 + tools/ioemu/patches/xenstore-block-device-config | 45 tools/ioemu/patches/xenstore-write-vnc-port | 10 tools/ioemu/target-i386-dm/helper2.c | 107 + tools/ioemu/vl.c | 20 tools/ioemu/vl.h | 2 tools/ioemu/vnc.c | 17 tools/libxc/xc_hvm_build.c | 46 tools/libxc/xc_linux_build.c | 6 tools/pygrub/src/GrubConf.py | 72 - tools/pygrub/src/pygrub | 541 +++++++--- tools/python/xen/util/xmlrpclib2.py | 5 tools/python/xen/xend/XendDomain.py | 4 tools/python/xen/xend/image.py | 5 tools/python/xen/xend/sxp.py | 2 tools/python/xen/xend/tests/test_sxp.py | 21 tools/python/xen/xend/tests/xend-config.sxp | 132 ++ tools/python/xen/xm/create.py | 61 + tools/python/xen/xm/main.py | 10 tools/python/xen/xm/tests/test_create.py | 3 tools/xenstat/libxenstat/src/xenstat.c | 101 + xen/arch/ia64/xen/hypercall.c | 2 xen/arch/ia64/xen/xensetup.c | 3 xen/arch/powerpc/setup.c | 2 xen/arch/x86/domain.c | 2 xen/arch/x86/flushtlb.c | 4 xen/arch/x86/hvm/hvm.c | 58 - xen/arch/x86/hvm/intercept.c | 68 + xen/arch/x86/hvm/io.c | 89 - xen/arch/x86/hvm/platform.c | 54 xen/arch/x86/hvm/svm/svm.c | 6 xen/arch/x86/hvm/svm/vmcb.c | 34 xen/arch/x86/hvm/svm/x86_32/exits.S | 3 xen/arch/x86/hvm/svm/x86_64/exits.S | 2 xen/arch/x86/hvm/vlapic.c | 2 xen/arch/x86/hvm/vmx/io.c | 18 xen/arch/x86/hvm/vmx/vmcs.c | 12 xen/arch/x86/hvm/vmx/vmx.c | 131 +- xen/arch/x86/hvm/vmx/x86_32/exits.S | 3 xen/arch/x86/hvm/vmx/x86_64/exits.S | 2 xen/arch/x86/irq.c | 87 - xen/arch/x86/mm.c | 84 - xen/arch/x86/nmi.c | 38 xen/arch/x86/time.c | 64 - xen/arch/x86/x86_32/domain_page.c | 3 xen/arch/x86/x86_32/traps.c | 2 xen/arch/x86/x86_32/xen.lds.S | 3 xen/arch/x86/x86_64/xen.lds.S | 3 xen/common/domain.c | 11 xen/common/multicall.c | 4 xen/common/sched_bvt.c | 16 xen/common/sched_credit.c | 31 xen/common/sched_sedf.c | 21 xen/common/schedule.c | 65 - xen/common/timer.c | 86 - xen/common/trace.c | 17 xen/drivers/char/console.c | 1 xen/include/asm-ia64/linux-xen/asm/cache.h | 2 xen/include/asm-powerpc/cache.h | 2 xen/include/asm-powerpc/flushtlb.h | 15 xen/include/asm-x86/cache.h | 2 xen/include/asm-x86/current.h | 2 xen/include/asm-x86/e820.h | 1 xen/include/asm-x86/flushtlb.h | 16 xen/include/asm-x86/hvm/domain.h | 2 xen/include/asm-x86/hvm/hvm.h | 12 xen/include/asm-x86/hvm/io.h | 3 xen/include/asm-x86/hvm/support.h | 8 xen/include/asm-x86/hvm/vcpu.h | 2 xen/include/asm-x86/hvm/vmx/vmx.h | 359 +++--- xen/include/public/hvm/ioreq.h | 9 xen/include/xen/config.h | 1 xen/include/xen/event.h | 1 xen/include/xen/multicall.h | 5 xen/include/xen/percpu.h | 1 xen/include/xen/sched-if.h | 10 100 files changed, 2152 insertions(+), 1047 deletions(-) diff -r e4f1519b473f -r b60ea69932b1 buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Tue Aug 08 19:07:32 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Wed Aug 09 18:04:20 2006 +0100 @@ -1533,8 +1533,9 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y CONFIG_XEN_PCIDEV_BACKEND=y -CONFIG_XEN_PCIDEV_BACKEND_VPCI=y +# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set +CONFIG_XEN_PCIDEV_BACKEND_SLOT=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set # CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y diff -r e4f1519b473f -r b60ea69932b1 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Tue Aug 08 19:07:32 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Wed Aug 09 18:04:20 2006 +0100 @@ -1320,6 +1320,7 @@ CONFIG_XEN_PCIDEV_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r e4f1519b473f -r b60ea69932b1 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Tue Aug 08 19:07:32 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Wed Aug 09 18:04:20 2006 +0100 @@ -1261,6 +1261,7 @@ CONFIG_XEN_PCIDEV_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r e4f1519b473f -r b60ea69932b1 buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Tue Aug 08 19:07:32 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen_ia64 Wed Aug 09 18:04:20 2006 +0100 @@ -1539,8 +1539,9 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y CONFIG_XEN_PCIDEV_BACKEND=y -CONFIG_XEN_PCIDEV_BACKEND_VPCI=y +# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set +CONFIG_XEN_PCIDEV_BACKEND_SLOT=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set # CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y diff -r e4f1519b473f -r b60ea69932b1 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Tue Aug 08 19:07:32 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Wed Aug 09 18:04:20 2006 +0100 @@ -3021,6 +3021,7 @@ CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_BACKEND_VPCI=y # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r e4f1519b473f -r b60ea69932b1 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Tue Aug 08 19:07:32 2006 -0500 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Wed Aug 09 18:04:20 2006 +0100 @@ -2853,6 +2853,7 @@ CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_BACKEND=m # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Tue Aug 08 19:07:32 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Wed Aug 09 18:04:20 2006 +0100 @@ -117,7 +117,7 @@ config XEN_PCIDEV_BACKEND_VPCI This PCI Backend hides the true PCI topology and makes the frontend think there is a single PCI bus with only the exported devices on it. For example, a device at 03:05.0 will be re-assigned to 00:00.0. A - second device at 02:1a.0 will be re-assigned to 00:01.0. + second device at 02:1a.1 will be re-assigned to 00:01.1. config XEN_PCIDEV_BACKEND_PASS bool "Passthrough" @@ -129,6 +129,15 @@ config XEN_PCIDEV_BACKEND_PASS which depend on finding their hardward in certain bus/slot locations. +config XEN_PCIDEV_BACKEND_SLOT + bool "Slot" + ---help--- + This PCI Backend hides the true PCI topology and makes the frontend + think there is a single PCI bus with only the exported devices on it. + Contrary to the virtual PCI backend, a function becomes a new slot. + For example, a device at 03:05.2 will be re-assigned to 00:00.0. A + second device at 02:1a.1 will be re-assigned to 00:01.0. + endchoice config XEN_PCIDEV_BE_DEBUG diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue Aug 08 19:07:32 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Wed Aug 09 18:04:20 2006 +0100 @@ -563,10 +563,14 @@ struct page *balloon_alloc_empty_page_ra set_xen_guest_handle(reservation.extent_start, &gmfn); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + if (ret == -ENOSYS) + goto err; BUG_ON(ret != 1); } else { ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL); + if (ret == -ENOSYS) + goto err; BUG_ON(ret); } current_pages -= 1UL << order; @@ -583,6 +587,11 @@ struct page *balloon_alloc_empty_page_ra set_page_count(page + i, 1); return page; + + err: + free_pages(vstart, order); + balloon_unlock(flags); + return NULL; } void balloon_dealloc_empty_page_range( diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Aug 08 19:07:32 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Wed Aug 09 18:04:20 2006 +0100 @@ -518,6 +518,19 @@ static int __init blkif_init(void) return -ENODEV; mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + +#ifdef CONFIG_XEN_IA64_DOM0_NON_VP + extern unsigned long alloc_empty_foreign_map_page_range( + unsigned long pages); + mmap_vstart = (unsigned long) + alloc_empty_foreign_map_page_range(mmap_pages); +#else /* ! ia64 */ + page = balloon_alloc_empty_page_range(mmap_pages); + if (page == NULL) + return -ENOMEM; + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); +#endif + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * blkif_reqs, GFP_KERNEL); pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * @@ -534,16 +547,6 @@ static int __init blkif_init(void) blkif_interface_init(); -#ifdef CONFIG_XEN_IA64_DOM0_NON_VP - extern unsigned long alloc_empty_foreign_map_page_range( - unsigned long pages); - mmap_vstart = (unsigned long) - alloc_empty_foreign_map_page_range(mmap_pages); -#else /* ! ia64 */ - page = balloon_alloc_empty_page_range(mmap_pages); - BUG_ON(page == NULL); - mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); -#endif printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart); BUG_ON(mmap_vstart == 0); diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 08 19:07:32 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Aug 09 18:04:20 2006 +0100 @@ -709,29 +709,18 @@ static void make_response(blkif_t *blkif /****************************************************************** * misc small helpers */ -/* FIXME: Return ENOMEM properly on failure to allocate additional reqs. */ -static void req_increase(void) +static int req_increase(void) { int i, j; struct page *page; unsigned long flags; + int ret; spin_lock_irqsave(&pending_free_lock, flags); + ret = -EINVAL; if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) goto done; - - pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) * - blkif_reqs, GFP_KERNEL); - pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) * - mmap_pages, GFP_KERNEL); - - if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) { - kfree(pending_reqs[mmap_alloc]); - kfree(pending_addrs[mmap_alloc]); - WPRINTK("%s: out of memory\n", __FUNCTION__); - goto done; - } #ifdef __ia64__ extern unsigned long alloc_empty_foreign_map_page_range( @@ -740,7 +729,11 @@ static void req_increase(void) alloc_empty_foreign_map_page_range(mmap_pages); #else /* ! ia64 */ page = balloon_alloc_empty_page_range(mmap_pages); - BUG_ON(page == NULL); + ret = -ENOMEM; + if (page == NULL) { + printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__); + goto done; + } /* Pin all of the pages. */ for (i=0; i<mmap_pages; i++) @@ -751,6 +744,23 @@ static void req_increase(void) mmap_start[mmap_alloc].mpage = page; #endif + + pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) * + blkif_reqs, GFP_KERNEL); + pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) * + mmap_pages, GFP_KERNEL); + + ret = -ENOMEM; + if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) { + kfree(pending_reqs[mmap_alloc]); + kfree(pending_addrs[mmap_alloc]); + WPRINTK("%s: out of memory\n", __FUNCTION__); + ret = -ENOMEM; + goto done; + } + + ret = 0; + DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", __FUNCTION__, blkif_reqs, mmap_pages, mmap_start[mmap_alloc].start); @@ -774,7 +784,7 @@ static void req_increase(void) DPRINTK("# MMAPs increased to %d\n",mmap_alloc); done: spin_unlock_irqrestore(&pending_free_lock, flags); - + return ret; } static void mmap_req_del(int mmap) @@ -1394,7 +1404,13 @@ static int __init blkif_init(void) return -ENODEV; INIT_LIST_HEAD(&pending_free); - for(i = 0; i < 2; i++) req_increase(); + for(i = 0; i < 2; i++) { + ret = req_increase(); + if (ret) + break; + } + if (i == 0) + return ret; tap_blkif_interface_init(); diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue Aug 08 19:07:32 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Wed Aug 09 18:04:20 2006 +0100 @@ -1306,7 +1306,9 @@ static int __init netback_init(void) net_timer.function = net_alarm; page = balloon_alloc_empty_page_range(MAX_PENDING_REQS); - BUG_ON(page == NULL); + if (page == NULL) + return -ENOMEM; + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); for (i = 0; i < MAX_PENDING_REQS; i++) { diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/drivers/xen/pciback/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/pciback/Makefile Tue Aug 08 19:07:32 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/Makefile Wed Aug 09 18:04:20 2006 +0100 @@ -7,6 +7,7 @@ pciback-y += conf_space.o conf_space_hea conf_space_capability_pm.o \ conf_space_quirks.o pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Tue Aug 08 19:07:32 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Wed Aug 09 18:04:20 2006 +0100 @@ -51,7 +51,8 @@ static inline void switch_mm(struct mm_s struct mmuext_op _op[2], *op = _op; if (likely(prev != next)) { - BUG_ON(!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)); + BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && + !test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)); /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); diff -r e4f1519b473f -r b60ea69932b1 tools/Makefile --- a/tools/Makefile Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/Makefile Wed Aug 09 18:04:20 2006 +0100 @@ -8,7 +8,7 @@ SUBDIRS-y += examples SUBDIRS-y += examples SUBDIRS-y += xentrace SUBDIRS-$(CONFIG_XCUTILS) += xcutils -SUBDIRS-y += firmware +SUBDIRS-$(CONFIG_X86) += firmware SUBDIRS-y += security SUBDIRS-y += console SUBDIRS-y += xenmon diff -r e4f1519b473f -r b60ea69932b1 tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/examples/xend-config.sxp Wed Aug 09 18:04:20 2006 +0100 @@ -54,7 +54,7 @@ # (xend-relocation-hosts-allow '^localhost$ ^.*\.example\.org$') # #(xend-relocation-hosts-allow '') -(xend-relocation-hosts-allow '^localhost$') +(xend-relocation-hosts-allow '^localhost$ ^localhost\.localdomain$') # The limit (in kilobytes) on the size of the console buffer #(console-limit 1024) diff -r e4f1519b473f -r b60ea69932b1 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/examples/xmexample.hvm Wed Aug 09 18:04:20 2006 +0100 @@ -130,6 +130,10 @@ vnc=1 #vncdisplay=1 #---------------------------------------------------------------------------- +# try to find an unused port for the VNC server, default = 1 +#vncunused=1 + +#---------------------------------------------------------------------------- # enable spawning vncviewer for domain's console # (only valid when vnc=1), default = 0 #vncconsole=0 diff -r e4f1519b473f -r b60ea69932b1 tools/firmware/Makefile --- a/tools/firmware/Makefile Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/firmware/Makefile Wed Aug 09 18:04:20 2006 +0100 @@ -30,7 +30,7 @@ all: .PHONY: install install: all [ -d $(INSTALL_DIR) ] || install -d -m0755 $(INSTALL_DIR) - install -m0644 $(TARGET) $(INSTALL_DIR) + [ ! -e $(TARGET) ] || install -m0644 $(TARGET) $(INSTALL_DIR) .PHONY: clean clean: diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/domain-destroy --- a/tools/ioemu/patches/domain-destroy Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/patches/domain-destroy Wed Aug 09 18:04:20 2006 +0100 @@ -1,7 +1,7 @@ Index: ioemu/monitor.c Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-06 02:22:01.487319736 +0100 -+++ ioemu/monitor.c 2006-08-06 02:23:02.269544103 +0100 +--- ioemu.orig/monitor.c 2006-08-08 11:27:48.555190337 +0100 ++++ ioemu/monitor.c 2006-08-08 11:27:53.984584612 +0100 @@ -308,6 +308,7 @@ static void do_quit(void) @@ -12,10 +12,10 @@ Index: ioemu/monitor.c Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-08-06 02:22:59.251880493 +0100 -+++ ioemu/target-i386-dm/helper2.c 2006-08-06 02:23:02.270543991 +0100 -@@ -483,5 +483,25 @@ - shared_page->vcpu_iodata[send_vcpu].dm_eport); +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-08 11:27:53.063687351 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:27:54.011581601 +0100 +@@ -488,5 +488,25 @@ + xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); } } + destroy_hvm_domain(); @@ -42,8 +42,8 @@ Index: ioemu/target-i386-dm/helper2.c +} Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:22:59.255880047 +0100 -+++ ioemu/vl.h 2006-08-06 02:23:02.271543880 +0100 +--- ioemu.orig/vl.h 2006-08-08 11:27:53.067686905 +0100 ++++ ioemu/vl.h 2006-08-08 11:27:54.061576023 +0100 @@ -1189,4 +1189,7 @@ void kqemu_record_dump(void); diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/domain-reset --- a/tools/ioemu/patches/domain-reset Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/patches/domain-reset Wed Aug 09 18:04:20 2006 +0100 @@ -1,8 +1,8 @@ Index: ioemu/target-i386-dm/helper2.c Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-08-06 02:21:15.779415007 +0100 -+++ ioemu/target-i386-dm/helper2.c 2006-08-06 02:22:59.251880493 +0100 -@@ -123,6 +123,25 @@ +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-08 11:27:45.566523765 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:27:53.063687351 +0100 +@@ -127,6 +127,25 @@ /* called from main_cpu_reset */ void cpu_reset(CPUX86State *env) { @@ -28,7 +28,7 @@ Index: ioemu/target-i386-dm/helper2.c } void cpu_x86_close(CPUX86State *env) -@@ -449,6 +468,10 @@ +@@ -455,6 +474,10 @@ if (vm_running) { if (shutdown_requested) break; @@ -41,8 +41,8 @@ Index: ioemu/target-i386-dm/helper2.c /* Wait up to 10 msec. */ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:22:56.618174081 +0100 -+++ ioemu/vl.c 2006-08-06 02:22:59.254880158 +0100 +--- ioemu.orig/vl.c 2006-08-08 11:27:52.994695048 +0100 ++++ ioemu/vl.c 2006-08-08 11:27:53.066687017 +0100 @@ -4948,7 +4948,7 @@ } QEMUResetEntry; @@ -54,8 +54,8 @@ Index: ioemu/vl.c Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:22:01.501318175 +0100 -+++ ioemu/vl.h 2006-08-06 02:22:59.255880047 +0100 +--- ioemu.orig/vl.h 2006-08-08 11:27:48.757167803 +0100 ++++ ioemu/vl.h 2006-08-08 11:27:53.067686905 +0100 @@ -130,6 +130,7 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque); diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/domain-timeoffset --- a/tools/ioemu/patches/domain-timeoffset Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/patches/domain-timeoffset Wed Aug 09 18:04:20 2006 +0100 @@ -1,7 +1,7 @@ Index: ioemu/hw/mc146818rtc.c Index: ioemu/hw/mc146818rtc.c =================================================================== ---- ioemu.orig/hw/mc146818rtc.c 2006-08-07 17:44:43.593604340 +0100 -+++ ioemu/hw/mc146818rtc.c 2006-08-07 17:44:47.594168708 +0100 +--- ioemu.orig/hw/mc146818rtc.c 2006-08-09 15:04:17.857242121 +0100 ++++ ioemu/hw/mc146818rtc.c 2006-08-09 15:04:24.588603423 +0100 @@ -178,10 +178,27 @@ } } @@ -46,8 +46,8 @@ Index: ioemu/hw/mc146818rtc.c static void rtc_copy_date(RTCState *s) Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-07 17:44:47.324198106 +0100 -+++ ioemu/hw/pc.c 2006-08-07 17:44:54.830380715 +0100 +--- ioemu.orig/hw/pc.c 2006-08-09 15:04:24.316629266 +0100 ++++ ioemu/hw/pc.c 2006-08-09 15:04:24.589603328 +0100 @@ -159,7 +159,7 @@ } @@ -117,8 +117,8 @@ Index: ioemu/hw/pc.c QEMUMachine pc_machine = { Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:44:47.464182863 +0100 -+++ ioemu/vl.c 2006-08-07 17:44:54.830380715 +0100 +--- ioemu.orig/vl.c 2006-08-09 15:04:24.457615869 +0100 ++++ ioemu/vl.c 2006-08-09 15:04:24.592603043 +0100 @@ -163,6 +163,8 @@ int xc_handle; @@ -174,8 +174,8 @@ Index: ioemu/vl.c if (usb_enabled) { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:44:47.329197562 +0100 -+++ ioemu/vl.h 2006-08-07 17:44:54.830380715 +0100 +--- ioemu.orig/vl.h 2006-08-09 15:04:24.321628791 +0100 ++++ ioemu/vl.h 2006-08-09 15:04:24.593602948 +0100 @@ -575,7 +575,7 @@ int boot_device, DisplayState *ds, const char **fd_filename, int snapshot, diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/qemu-target-i386-dm --- a/tools/ioemu/patches/qemu-target-i386-dm Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/patches/qemu-target-i386-dm Wed Aug 09 18:04:20 2006 +0100 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-06 02:14:09.794902973 +0100 -+++ ioemu/Makefile.target 2006-08-06 02:21:42.270461924 +0100 +--- ioemu.orig/Makefile.target 2006-08-08 11:24:33.479955101 +0100 ++++ ioemu/Makefile.target 2006-08-08 11:24:39.008338255 +0100 @@ -62,6 +62,8 @@ QEMU_SYSTEM=qemu-fast endif @@ -32,8 +32,8 @@ Index: ioemu/Makefile.target DEFINES += -DHAS_AUDIO Index: ioemu/configure =================================================================== ---- ioemu.orig/configure 2006-08-06 02:14:09.795902861 +0100 -+++ ioemu/configure 2006-08-06 02:15:01.771108621 +0100 +--- ioemu.orig/configure 2006-08-08 11:24:33.480954990 +0100 ++++ ioemu/configure 2006-08-08 11:24:38.122437102 +0100 @@ -373,6 +373,8 @@ if [ "$user" = "yes" ] ; then target_list="i386-user arm-user armeb-user sparc-user ppc-user mips-user mipsel-user $target_list" @@ -45,8 +45,8 @@ Index: ioemu/configure fi Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-06 02:14:49.574468309 +0100 -+++ ioemu/monitor.c 2006-08-06 02:21:16.172371202 +0100 +--- ioemu.orig/monitor.c 2006-08-08 11:24:33.484954543 +0100 ++++ ioemu/monitor.c 2006-08-08 11:24:39.253310921 +0100 @@ -1262,6 +1262,10 @@ "", "show profiling information", }, { "capture", "", do_info_capture, @@ -60,8 +60,8 @@ Index: ioemu/monitor.c Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:14:09.802902081 +0100 -+++ ioemu/vl.c 2006-08-06 02:21:16.369349244 +0100 +--- ioemu.orig/vl.c 2006-08-08 11:24:33.486954320 +0100 ++++ ioemu/vl.c 2006-08-08 11:24:39.454288496 +0100 @@ -87,7 +87,7 @@ #include "exec-all.h" @@ -98,8 +98,8 @@ Index: ioemu/vl.c { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:13:56.733359091 +0100 -+++ ioemu/vl.h 2006-08-06 02:21:16.369349244 +0100 +--- ioemu.orig/vl.h 2006-08-08 11:24:31.082222636 +0100 ++++ ioemu/vl.h 2006-08-08 11:24:39.454288496 +0100 @@ -37,6 +37,8 @@ #include <unistd.h> #include <fcntl.h> @@ -132,7 +132,7 @@ Index: ioemu/target-i386-dm/cpu.h Index: ioemu/target-i386-dm/cpu.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/cpu.h 2006-08-06 02:21:16.023387810 +0100 ++++ ioemu/target-i386-dm/cpu.h 2006-08-08 11:24:39.099328102 +0100 @@ -0,0 +1,86 @@ +/* + * i386 virtual CPU header @@ -223,7 +223,7 @@ Index: ioemu/target-i386-dm/exec-dm.c Index: ioemu/target-i386-dm/exec-dm.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/exec-dm.c 2006-08-06 02:21:16.024387698 +0100 ++++ ioemu/target-i386-dm/exec-dm.c 2006-08-08 11:24:39.099328102 +0100 @@ -0,0 +1,516 @@ +/* + * virtual page mapping and translated block handling @@ -744,8 +744,8 @@ Index: ioemu/target-i386-dm/helper2.c Index: ioemu/target-i386-dm/helper2.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/helper2.c 2006-08-06 02:21:15.779415007 +0100 -@@ -0,0 +1,464 @@ ++++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:24:44.888682140 +0100 +@@ -0,0 +1,469 @@ +/* + * i386 helpers (without register variable usage) + * @@ -830,6 +830,10 @@ Index: ioemu/target-i386-dm/helper2.c +/* which vcpu we are serving */ +int send_vcpu = 0; + ++//the evtchn port for polling the notification, ++#define NR_CPUS 32 ++evtchn_port_t ioreq_local_port[NR_CPUS]; ++ +CPUX86State *cpu_x86_init(void) +{ + CPUX86State *env; @@ -861,7 +865,7 @@ Index: ioemu/target-i386-dm/helper2.c + fprintf(logfile, "bind interdomain ioctl error %d\n", errno); + return NULL; + } -+ shared_page->vcpu_iodata[i].dm_eport = rc; ++ ioreq_local_port[i] = rc; + } + } + @@ -913,8 +917,7 @@ Index: ioemu/target-i386-dm/helper2.c + + for (i = 0; i < vcpus; i++) { + req = &(shared_page->vcpu_iodata[i].vp_ioreq); -+ term_printf("vcpu %d: event port %d\n", i, -+ shared_page->vcpu_iodata[i].vp_eport); ++ term_printf("vcpu %d: event port %d\n", i, ioreq_local_port[i]); + term_printf(" req state: %x, pvalid: %x, addr: %"PRIx64", " + "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", + req->state, req->pdata_valid, req->addr, @@ -933,6 +936,7 @@ Index: ioemu/target-i386-dm/helper2.c + + if (req->state == STATE_IOREQ_READY) { + req->state = STATE_IOREQ_INPROCESS; ++ rmb(); + return req; + } + @@ -955,7 +959,7 @@ Index: ioemu/target-i386-dm/helper2.c + port = xc_evtchn_pending(xce_handle); + if (port != -1) { + for ( i = 0; i < vcpus; i++ ) -+ if ( shared_page->vcpu_iodata[i].dm_eport == port ) ++ if ( ioreq_local_port[i] == port ) + break; + + if ( i == vcpus ) { @@ -1176,8 +1180,10 @@ Index: ioemu/target-i386-dm/helper2.c + } + + /* No state change if state = STATE_IORESP_HOOK */ -+ if (req->state == STATE_IOREQ_INPROCESS) ++ if (req->state == STATE_IOREQ_INPROCESS) { ++ mb(); + req->state = STATE_IORESP_READY; ++ } + env->send_event = 1; + } +} @@ -1204,8 +1210,7 @@ Index: ioemu/target-i386-dm/helper2.c + + if (env->send_event) { + env->send_event = 0; -+ xc_evtchn_notify(xce_handle, -+ shared_page->vcpu_iodata[send_vcpu].dm_eport); ++ xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); + } + } + return 0; @@ -1213,7 +1218,7 @@ Index: ioemu/target-i386-dm/i8259-dm.c Index: ioemu/target-i386-dm/i8259-dm.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/i8259-dm.c 2006-08-06 02:15:01.777107952 +0100 ++++ ioemu/target-i386-dm/i8259-dm.c 2006-08-08 11:24:33.505952200 +0100 @@ -0,0 +1,107 @@ +/* Xen 8259 stub for interrupt controller emulation + * @@ -1325,7 +1330,7 @@ Index: ioemu/target-i386-dm/qemu-dm.debu Index: ioemu/target-i386-dm/qemu-dm.debug =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/qemu-dm.debug 2006-08-06 02:15:01.778107841 +0100 ++++ ioemu/target-i386-dm/qemu-dm.debug 2006-08-08 11:24:33.505952200 +0100 @@ -0,0 +1,5 @@ +#!/bin/sh + @@ -1335,7 +1340,7 @@ Index: ioemu/target-i386-dm/qemu-ifup Index: ioemu/target-i386-dm/qemu-ifup =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/qemu-ifup 2006-08-06 02:15:01.778107841 +0100 ++++ ioemu/target-i386-dm/qemu-ifup 2006-08-08 11:24:33.505952200 +0100 @@ -0,0 +1,10 @@ +#!/bin/sh + diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/series --- a/tools/ioemu/patches/series Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/patches/series Wed Aug 09 18:04:20 2006 +0100 @@ -34,8 +34,10 @@ vnc-start-vncviewer vnc-start-vncviewer vnc-title-domain-name vnc-access-monitor-vt +vnc-display-find-unused xenstore-block-device-config xenstore-write-vnc-port qemu-allow-disable-sdl qemu-fix-memset-args qemu-fix-write-to-disk-synchronous +xen-support-buffered-ioreqs diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/xenstore-block-device-config --- a/tools/ioemu/patches/xenstore-block-device-config Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/patches/xenstore-block-device-config Wed Aug 09 18:04:20 2006 +0100 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-07 17:42:27.802386071 +0100 -+++ ioemu/Makefile.target 2006-08-07 17:42:28.683289358 +0100 +--- ioemu.orig/Makefile.target 2006-08-09 15:04:24.795583755 +0100 ++++ ioemu/Makefile.target 2006-08-09 15:04:25.373528824 +0100 @@ -358,6 +358,7 @@ VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o VL_OBJS+= usb-uhci.o @@ -13,7 +13,7 @@ Index: ioemu/xenstore.c Index: ioemu/xenstore.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/xenstore.c 2006-08-07 17:42:28.684289249 +0100 ++++ ioemu/xenstore.c 2006-08-09 15:04:25.374528729 +0100 @@ -0,0 +1,187 @@ +/* + * This file is subject to the terms and conditions of the GNU General @@ -204,9 +204,9 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:42:28.393320909 +0100 -+++ ioemu/vl.c 2006-08-07 17:42:28.687288922 +0100 -@@ -5242,9 +5242,11 @@ +--- ioemu.orig/vl.c 2006-08-09 15:04:25.312534622 +0100 ++++ ioemu/vl.c 2006-08-09 15:04:25.377528443 +0100 +@@ -5243,9 +5243,11 @@ "Standard options:\n" "-M machine select emulated machine (-M ? for list)\n" "-fda/-fdb file use 'file' as floppy disk 0/1 image\n" @@ -218,7 +218,7 @@ Index: ioemu/vl.c "-boot [a|c|d] boot on floppy (a), hard disk (c) or CD-ROM (d)\n" "-snapshot write to temporary files instead of disk image files\n" #ifdef TARGET_I386 -@@ -5370,11 +5372,13 @@ +@@ -5372,11 +5374,13 @@ QEMU_OPTION_M, QEMU_OPTION_fda, QEMU_OPTION_fdb, @@ -232,7 +232,7 @@ Index: ioemu/vl.c QEMU_OPTION_boot, QEMU_OPTION_snapshot, #ifdef TARGET_I386 -@@ -5445,11 +5449,13 @@ +@@ -5448,11 +5452,13 @@ { "M", HAS_ARG, QEMU_OPTION_M }, { "fda", HAS_ARG, QEMU_OPTION_fda }, { "fdb", HAS_ARG, QEMU_OPTION_fdb }, @@ -246,7 +246,7 @@ Index: ioemu/vl.c { "boot", HAS_ARG, QEMU_OPTION_boot }, { "snapshot", 0, QEMU_OPTION_snapshot }, #ifdef TARGET_I386 -@@ -5797,10 +5803,16 @@ +@@ -5801,10 +5807,16 @@ #ifdef CONFIG_GDBSTUB int use_gdbstub, gdbstub_port; #endif @@ -265,7 +265,7 @@ Index: ioemu/vl.c const char *kernel_filename, *kernel_cmdline; DisplayState *ds = &display_state; int cyls, heads, secs, translation; -@@ -5861,8 +5873,10 @@ +@@ -5865,8 +5877,10 @@ initrd_filename = NULL; for(i = 0; i < MAX_FD; i++) fd_filename[i] = NULL; @@ -276,8 +276,8 @@ Index: ioemu/vl.c ram_size = DEFAULT_RAM_SIZE * 1024 * 1024; vga_ram_size = VGA_RAM_SIZE; bios_size = BIOS_SIZE; -@@ -5875,11 +5889,13 @@ - vncviewer = 0; +@@ -5880,11 +5894,13 @@ + vncunused = 0; kernel_filename = NULL; kernel_cmdline = ""; +#ifndef CONFIG_DM @@ -290,7 +290,7 @@ Index: ioemu/vl.c cyls = heads = secs = 0; translation = BIOS_ATA_TRANSLATION_AUTO; pstrcpy(monitor_device, sizeof(monitor_device), "vc"); -@@ -5912,7 +5928,11 @@ +@@ -5917,7 +5933,11 @@ break; r = argv[optind]; if (r[0] != '-') { @@ -302,7 +302,7 @@ Index: ioemu/vl.c } else { const QEMUOption *popt; -@@ -5956,6 +5976,7 @@ +@@ -5961,6 +5981,7 @@ case QEMU_OPTION_initrd: initrd_filename = optarg; break; @@ -310,7 +310,7 @@ Index: ioemu/vl.c case QEMU_OPTION_hda: case QEMU_OPTION_hdb: case QEMU_OPTION_hdc: -@@ -5968,6 +5989,7 @@ +@@ -5973,6 +5994,7 @@ cdrom_index = -1; } break; @@ -318,7 +318,7 @@ Index: ioemu/vl.c case QEMU_OPTION_snapshot: snapshot = 1; break; -@@ -6020,11 +6042,13 @@ +@@ -6025,11 +6047,13 @@ case QEMU_OPTION_append: kernel_cmdline = optarg; break; @@ -332,7 +332,7 @@ Index: ioemu/vl.c case QEMU_OPTION_boot: boot_device = optarg[0]; if (boot_device != 'a' && -@@ -6274,12 +6298,18 @@ +@@ -6284,12 +6308,18 @@ } } @@ -351,7 +351,7 @@ Index: ioemu/vl.c if (!linux_boot && hd_filename[0] == '\0' && (cdrom_index >= 0 && hd_filename[cdrom_index] == '\0') && -@@ -6293,6 +6323,7 @@ +@@ -6303,6 +6333,7 @@ else boot_device = 'd'; } @@ -359,7 +359,7 @@ Index: ioemu/vl.c setvbuf(stdout, NULL, _IOLBF, 0); -@@ -6407,6 +6438,7 @@ +@@ -6417,6 +6448,7 @@ #endif /* !CONFIG_DM */ @@ -367,7 +367,7 @@ Index: ioemu/vl.c /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); if (cdrom_index >= 0) { -@@ -6433,6 +6465,7 @@ +@@ -6443,6 +6475,7 @@ } } } @@ -375,7 +375,7 @@ Index: ioemu/vl.c /* we always create at least one floppy disk */ fd_table[0] = bdrv_new("fda"); -@@ -6511,6 +6544,8 @@ +@@ -6521,6 +6554,8 @@ } } @@ -386,8 +386,8 @@ Index: ioemu/vl.c kernel_filename, kernel_cmdline, initrd_filename, Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-07 17:42:27.132461888 +0100 -+++ ioemu/monitor.c 2006-08-07 17:42:28.688288814 +0100 +--- ioemu.orig/monitor.c 2006-08-09 15:04:24.105649313 +0100 ++++ ioemu/monitor.c 2006-08-09 15:04:25.379528253 +0100 @@ -24,6 +24,7 @@ #include "vl.h" #include "disas.h" @@ -416,8 +416,8 @@ Index: ioemu/monitor.c int i; Index: ioemu/block.c =================================================================== ---- ioemu.orig/block.c 2006-08-07 17:42:21.704076241 +0100 -+++ ioemu/block.c 2006-08-07 17:42:28.689288705 +0100 +--- ioemu.orig/block.c 2006-08-09 15:04:17.487277167 +0100 ++++ ioemu/block.c 2006-08-09 15:04:25.379528253 +0100 @@ -758,6 +758,7 @@ static void raw_close(BlockDriverState *bs) { @@ -428,8 +428,8 @@ Index: ioemu/block.c Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:42:28.394320800 +0100 -+++ ioemu/vl.h 2006-08-07 17:42:28.689288705 +0100 +--- ioemu.orig/vl.h 2006-08-09 15:04:25.313534527 +0100 ++++ ioemu/vl.h 2006-08-09 15:04:25.380528158 +0100 @@ -1184,6 +1184,8 @@ void term_print_help(void); void monitor_readline(const char *prompt, int is_password, @@ -455,8 +455,8 @@ Index: ioemu/vl.h extern char domain_name[]; Index: ioemu/hw/ide.c =================================================================== ---- ioemu.orig/hw/ide.c 2006-08-07 17:42:27.552414361 +0100 -+++ ioemu/hw/ide.c 2006-08-07 17:42:28.691288487 +0100 +--- ioemu.orig/hw/ide.c 2006-08-09 15:04:24.524609503 +0100 ++++ ioemu/hw/ide.c 2006-08-09 15:04:25.381528063 +0100 @@ -1158,6 +1158,7 @@ } else { ide_atapi_cmd_error(s, SENSE_NOT_READY, diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/xenstore-write-vnc-port --- a/tools/ioemu/patches/xenstore-write-vnc-port Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/patches/xenstore-write-vnc-port Wed Aug 09 18:04:20 2006 +0100 @@ -1,7 +1,7 @@ Index: ioemu/xenstore.c Index: ioemu/xenstore.c =================================================================== ---- ioemu.orig/xenstore.c 2006-08-07 17:42:28.684289249 +0100 -+++ ioemu/xenstore.c 2006-08-07 17:42:28.891266728 +0100 +--- ioemu.orig/xenstore.c 2006-08-09 15:04:25.374528729 +0100 ++++ ioemu/xenstore.c 2006-08-09 15:04:25.579509243 +0100 @@ -185,3 +185,31 @@ free(image); free(vec); @@ -36,10 +36,10 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:42:28.687288922 +0100 -+++ ioemu/vl.c 2006-08-07 17:42:28.894266401 +0100 -@@ -6501,6 +6501,7 @@ - vnc_display_init(ds, vnc_display); +--- ioemu.orig/vl.c 2006-08-09 15:04:25.377528443 +0100 ++++ ioemu/vl.c 2006-08-09 15:04:25.583508863 +0100 +@@ -6511,6 +6511,7 @@ + vnc_display = vnc_display_init(ds, vnc_display, vncunused); if (vncviewer) vnc_start_viewer(vnc_display); + xenstore_write_vncport(vnc_display); @@ -48,8 +48,8 @@ Index: ioemu/vl.c sdl_display_init(ds, full_screen); Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:42:28.689288705 +0100 -+++ ioemu/vl.h 2006-08-07 17:42:28.895266293 +0100 +--- ioemu.orig/vl.h 2006-08-09 15:04:25.380528158 +0100 ++++ ioemu/vl.h 2006-08-09 15:04:25.584508768 +0100 @@ -1203,6 +1203,7 @@ int xenstore_fd(void); void xenstore_process_event(void *opaque); diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/target-i386-dm/helper2.c Wed Aug 09 18:04:20 2006 +0100 @@ -76,11 +76,19 @@ int xc_handle; shared_iopage_t *shared_page = NULL; +#define BUFFER_IO_MAX_DELAY 100 +buffered_iopage_t *buffered_io_page = NULL; +QEMUTimer *buffered_io_timer; + /* the evtchn fd for polling */ int xce_handle = -1; /* which vcpu we are serving */ int send_vcpu = 0; + +//the evtchn port for polling the notification, +#define NR_CPUS 32 +evtchn_port_t ioreq_local_port[NR_CPUS]; CPUX86State *cpu_x86_init(void) { @@ -113,7 +121,7 @@ CPUX86State *cpu_x86_init(void) fprintf(logfile, "bind interdomain ioctl error %d\n", errno); return NULL; } - shared_page->vcpu_iodata[i].dm_eport = rc; + ioreq_local_port[i] = rc; } } @@ -184,8 +192,7 @@ void sp_info() for (i = 0; i < vcpus; i++) { req = &(shared_page->vcpu_iodata[i].vp_ioreq); - term_printf("vcpu %d: event port %d\n", i, - shared_page->vcpu_iodata[i].vp_eport); + term_printf("vcpu %d: event port %d\n", i, ioreq_local_port[i]); term_printf(" req state: %x, pvalid: %x, addr: %"PRIx64", " "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", req->state, req->pdata_valid, req->addr, @@ -204,6 +211,7 @@ static ioreq_t *__cpu_get_ioreq(int vcpu if (req->state == STATE_IOREQ_READY) { req->state = STATE_IOREQ_INPROCESS; + rmb(); return req; } @@ -226,7 +234,7 @@ static ioreq_t *cpu_get_ioreq(void) port = xc_evtchn_pending(xce_handle); if (port != -1) { for ( i = 0; i < vcpus; i++ ) - if ( shared_page->vcpu_iodata[i].dm_eport == port ) + if ( ioreq_local_port[i] == port ) break; if ( i == vcpus ) { @@ -415,40 +423,74 @@ void cpu_ioreq_xor(CPUState *env, ioreq_ req->u.data = tmp1; } +void __handle_ioreq(CPUState *env, ioreq_t *req) +{ + if (!req->pdata_valid && req->dir == IOREQ_WRITE && req->size != 4) + req->u.data &= (1UL << (8 * req->size)) - 1; + + switch (req->type) { + case IOREQ_TYPE_PIO: + cpu_ioreq_pio(env, req); + break; + case IOREQ_TYPE_COPY: + cpu_ioreq_move(env, req); + break; + case IOREQ_TYPE_AND: + cpu_ioreq_and(env, req); + break; + case IOREQ_TYPE_OR: + cpu_ioreq_or(env, req); + break; + case IOREQ_TYPE_XOR: + cpu_ioreq_xor(env, req); + break; + default: + hw_error("Invalid ioreq type 0x%x\n", req->type); + } +} + +void __handle_buffered_iopage(CPUState *env) +{ + ioreq_t *req = NULL; + + if (!buffered_io_page) + return; + + while (buffered_io_page->read_pointer != + buffered_io_page->write_pointer) { + req = &buffered_io_page->ioreq[buffered_io_page->read_pointer % + IOREQ_BUFFER_SLOT_NUM]; + + __handle_ioreq(env, req); + + mb(); + buffered_io_page->read_pointer++; + } +} + +void handle_buffered_io(void *opaque) +{ + CPUState *env = opaque; + + __handle_buffered_iopage(env); + qemu_mod_timer(buffered_io_timer, BUFFER_IO_MAX_DELAY + + qemu_get_clock(rt_clock)); +} + void cpu_handle_ioreq(void *opaque) { CPUState *env = opaque; ioreq_t *req = cpu_get_ioreq(); + handle_buffered_io(env); if (req) { - if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { - if (req->size != 4) - req->u.data &= (1UL << (8 * req->size))-1; - } - - switch (req->type) { - case IOREQ_TYPE_PIO: - cpu_ioreq_pio(env, req); - break; - case IOREQ_TYPE_COPY: - cpu_ioreq_move(env, req); - break; - case IOREQ_TYPE_AND: - cpu_ioreq_and(env, req); - break; - case IOREQ_TYPE_OR: - cpu_ioreq_or(env, req); - break; - case IOREQ_TYPE_XOR: - cpu_ioreq_xor(env, req); - break; - default: - hw_error("Invalid ioreq type 0x%x\n", req->type); - } + __handle_ioreq(env, req); /* No state change if state = STATE_IORESP_HOOK */ - if (req->state == STATE_IOREQ_INPROCESS) + if (req->state == STATE_IOREQ_INPROCESS) { + mb(); req->state = STATE_IORESP_READY; + } env->send_event = 1; } } @@ -459,6 +501,10 @@ int main_loop(void) extern int shutdown_requested; CPUState *env = cpu_single_env; int evtchn_fd = xc_evtchn_fd(xce_handle); + + buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io, + cpu_single_env); + qemu_mod_timer(buffered_io_timer, qemu_get_clock(rt_clock)); qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env); @@ -479,8 +525,7 @@ int main_loop(void) if (env->send_event) { env->send_event = 0; - xc_evtchn_notify(xce_handle, - shared_page->vcpu_iodata[send_vcpu].dm_eport); + xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); } } destroy_hvm_domain(); diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/vl.c Wed Aug 09 18:04:20 2006 +0100 @@ -121,6 +121,7 @@ static DisplayState display_state; static DisplayState display_state; int nographic; int vncviewer; +int vncunused; const char* keyboard_layout = NULL; int64_t ticks_per_sec; int boot_device = 'c'; @@ -5344,6 +5345,7 @@ void help(void) "-loadvm file start right away with a saved state (loadvm in monitor)\n" "-vnc display start a VNC server on display\n" "-vncviewer start a vncviewer process for this domain\n" + "-vncunused bind the VNC server to an unused port\n" "-timeoffset time offset (in seconds) from local time\n" "-acpi disable or enable ACPI of HVM domain \n" "\n" @@ -5435,6 +5437,7 @@ enum { QEMU_OPTION_timeoffset, QEMU_OPTION_acpi, QEMU_OPTION_vncviewer, + QEMU_OPTION_vncunused, }; typedef struct QEMUOption { @@ -5512,6 +5515,7 @@ const QEMUOption qemu_options[] = { { "smp", HAS_ARG, QEMU_OPTION_smp }, { "vnc", HAS_ARG, QEMU_OPTION_vnc }, { "vncviewer", 0, QEMU_OPTION_vncviewer }, + { "vncunused", 0, QEMU_OPTION_vncunused }, /* temporary options */ { "usb", 0, QEMU_OPTION_usb }, @@ -5834,6 +5838,7 @@ int main(int argc, char **argv) unsigned long nr_pages; xen_pfn_t *page_array; extern void *shared_page; + extern void *buffered_io_page; char qemu_dm_logfilename[64]; @@ -5887,6 +5892,7 @@ int main(int argc, char **argv) snapshot = 0; nographic = 0; vncviewer = 0; + vncunused = 0; kernel_filename = NULL; kernel_cmdline = ""; #ifndef CONFIG_DM @@ -6294,6 +6300,11 @@ int main(int argc, char **argv) case QEMU_OPTION_vncviewer: vncviewer++; break; + case QEMU_OPTION_vncunused: + vncunused++; + if (vnc_display == -1) + vnc_display = -2; + break; } } } @@ -6378,11 +6389,16 @@ int main(int argc, char **argv) phys_ram_base = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE, page_array, - nr_pages - 1); + nr_pages - 3); if (phys_ram_base == 0) { fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno); exit(-1); } + + /* not yet add for IA64 */ + buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, + page_array[nr_pages - 3]); shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ|PROT_WRITE, @@ -6498,7 +6514,7 @@ int main(int argc, char **argv) if (nographic) { dumb_display_init(ds); } else if (vnc_display != -1) { - vnc_display_init(ds, vnc_display); + vnc_display = vnc_display_init(ds, vnc_display, vncunused); if (vncviewer) vnc_start_viewer(vnc_display); xenstore_write_vncport(vnc_display); diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/vl.h Wed Aug 09 18:04:20 2006 +0100 @@ -784,7 +784,7 @@ void cocoa_display_init(DisplayState *ds void cocoa_display_init(DisplayState *ds, int full_screen); /* vnc.c */ -void vnc_display_init(DisplayState *ds, int display); +int vnc_display_init(DisplayState *ds, int display, int find_unused); int vnc_start_viewer(int port); /* ide.c */ diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/vnc.c --- a/tools/ioemu/vnc.c Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/ioemu/vnc.c Wed Aug 09 18:04:20 2006 +0100 @@ -1183,7 +1183,7 @@ static void vnc_listen_read(void *opaque } } -void vnc_display_init(DisplayState *ds, int display) +int vnc_display_init(DisplayState *ds, int display, int find_unused) { struct sockaddr_in addr; int reuse_addr, ret; @@ -1213,10 +1213,6 @@ void vnc_display_init(DisplayState *ds, fprintf(stderr, "Could not create socket\n"); exit(1); } - - addr.sin_family = AF_INET; - addr.sin_port = htons(5900 + display); - memset(&addr.sin_addr, 0, sizeof(addr.sin_addr)); reuse_addr = 1; ret = setsockopt(vs->lsock, SOL_SOCKET, SO_REUSEADDR, @@ -1226,7 +1222,16 @@ void vnc_display_init(DisplayState *ds, exit(1); } + retry: + addr.sin_family = AF_INET; + addr.sin_port = htons(5900 + display); + memset(&addr.sin_addr, 0, sizeof(addr.sin_addr)); + if (bind(vs->lsock, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + if (find_unused && errno == EADDRINUSE) { + display++; + goto retry; + } fprintf(stderr, "bind() failed\n"); exit(1); } @@ -1247,6 +1252,8 @@ void vnc_display_init(DisplayState *ds, vs->ds->dpy_refresh = vnc_dpy_refresh; vnc_dpy_resize(vs->ds, 640, 400); + + return display; } int vnc_start_viewer(int port) diff -r e4f1519b473f -r b60ea69932b1 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/libxc/xc_hvm_build.c Wed Aug 09 18:04:20 2006 +0100 @@ -26,6 +26,7 @@ #define E820_IO 16 #define E820_SHARED_PAGE 17 #define E820_XENSTORE 18 +#define E820_BUFFERED_IO 19 #define E820_MAP_PAGE 0x00090000 #define E820_MAP_NR_OFFSET 0x000001E8 @@ -96,7 +97,13 @@ static void build_e820map(void *e820_pag e820entry[nr_map].type = E820_RESERVED; nr_map++; -#define STATIC_PAGES 2 /* for ioreq_t and store_mfn */ +#define STATIC_PAGES 3 + /* 3 static pages: + * - ioreq buffer. + * - xenstore. + * - shared_page. + */ + /* Most of the ram goes here */ e820entry[nr_map].addr = 0x100000; e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE; @@ -104,6 +111,12 @@ static void build_e820map(void *e820_pag nr_map++; /* Statically allocated special pages */ + + /* For buffered IO requests */ + e820entry[nr_map].addr = mem_size - 3 * PAGE_SIZE; + e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].type = E820_BUFFERED_IO; + nr_map++; /* For xenstore */ e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE; @@ -213,6 +226,9 @@ static int setup_guest(int xc_handle, unsigned long shared_page_frame = 0; shared_iopage_t *sp; + unsigned long ioreq_buffer_frame = 0; + void *ioreq_buffer_page; + memset(&dsi, 0, sizeof(struct domain_setup_info)); if ( (parseelfimage(image, image_size, &dsi)) != 0 ) @@ -294,27 +310,27 @@ static int setup_guest(int xc_handle, shared_info->vcpu_info[i].evtchn_upcall_mask = 1; munmap(shared_info, PAGE_SIZE); - /* Populate the event channel port in the shared page */ + /* Paranoia */ shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1]; if ( (sp = (shared_iopage_t *) xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, shared_page_frame)) == 0 ) goto error_out; memset(sp, 0, PAGE_SIZE); - - /* FIXME: how about if we overflow the page here? */ - for ( i = 0; i < vcpus; i++ ) { - unsigned int vp_eport; - - vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0); - if ( vp_eport < 0 ) { - PERROR("Couldn't get unbound port from VMX guest.\n"); - goto error_out; - } - sp->vcpu_iodata[i].vp_eport = vp_eport; - } - munmap(sp, PAGE_SIZE); + + /* clean the buffered IO requests page */ + ioreq_buffer_frame = page_array[(v_end >> PAGE_SHIFT) - 3]; + ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, + ioreq_buffer_frame); + + if ( ioreq_buffer_page == NULL ) + goto error_out; + + memset(ioreq_buffer_page, 0, PAGE_SIZE); + + munmap(ioreq_buffer_page, PAGE_SIZE); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, (v_end >> PAGE_SHIFT) - 2); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); diff -r e4f1519b473f -r b60ea69932b1 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/libxc/xc_linux_build.c Wed Aug 09 18:04:20 2006 +0100 @@ -1373,10 +1373,10 @@ int xc_linux_build(int xc_handle, error_out: free(image); - if ( fd >= 0 ) + if ( initrd_info.type == INITRD_file && initrd_info.u.file_handle ) + gzclose(initrd_info.u.file_handle); + else if ( fd >= 0 ) close(fd); - if ( initrd_info.u.file_handle ) - gzclose(initrd_info.u.file_handle); return sts; } diff -r e4f1519b473f -r b60ea69932b1 tools/pygrub/src/GrubConf.py --- a/tools/pygrub/src/GrubConf.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/pygrub/src/GrubConf.py Wed Aug 09 18:04:20 2006 +0100 @@ -1,7 +1,7 @@ # # GrubConf.py - Simple grub.conf parsing # -# Copyright 2005 Red Hat, Inc. +# Copyright 2005-2006 Red Hat, Inc. # Jeremy Katz <katzj@xxxxxxxxxx> # # This software may be freely redistributed under the terms of the GNU @@ -16,7 +16,6 @@ import logging import logging def grub_split(s, maxsplit = -1): - """Split a grub option screen separated with either '=' or whitespace.""" eq = s.find('=') if eq == -1: return s.split(None, maxsplit) @@ -31,6 +30,12 @@ def grub_split(s, maxsplit = -1): return s.split('=', maxsplit) else: return s.split(None, maxsplit) + +def grub_exact_split(s, num): + ret = grub_split(s, num - 1) + if len(ret) < num: + return ret + [""] * (num - len(ret)) + return ret def get_path(s): """Returns a tuple of (GrubDiskPart, path) corresponding to string.""" @@ -75,25 +80,39 @@ class GrubDiskPart(object): class GrubImage(object): def __init__(self, lines): - self._root = self._initrd = self._kernel = self._args = None - for l in lines: - (com, arg) = grub_split(l, 1) - - if self.commands.has_key(com): - if self.commands[com] is not None: - exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) - else: - logging.info("Ignored image directive %s" %(com,)) - else: - logging.warning("Unknown image directive %s" %(com,)) + self.reset(lines) def __repr__(self): return ("title: %s\n" " root: %s\n" " kernel: %s\n" " args: %s\n" - " initrd: %s" %(self.title, self.root, self.kernel, + " initrd: %s\n" %(self.title, self.root, self.kernel, self.args, self.initrd)) + + def reset(self, lines): + self._root = self._initrd = self._kernel = self._args = None + self.title = "" + self.lines = [] + map(self.set_from_line, lines) + + def set_from_line(self, line, replace = None): + (com, arg) = grub_exact_split(line, 2) + + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) + else: + logging.info("Ignored image directive %s" %(com,)) + else: + logging.warning("Unknown image directive %s" %(com,)) + + # now put the line in the list of lines + if replace is None: + self.lines.append(line) + else: + self.lines.pop(replace) + self.lines.insert(replace, line) def set_root(self, val): self._root = GrubDiskPart(val) @@ -137,6 +156,7 @@ class GrubConfigFile(object): self.filename = fn self.images = [] self.timeout = -1 + self._default = 0 if fn is not None: self.parse() @@ -164,7 +184,7 @@ class GrubConfigFile(object): # new image if l.startswith("title"): if len(img) > 0: - self.images.append(GrubImage(img)) + self.add_image(GrubImage(img)) img = [l] continue @@ -172,12 +192,7 @@ class GrubConfigFile(object): img.append(l) continue - try: - (com, arg) = grub_split(l, 1) - except ValueError: - com = l - arg = "" - + (com, arg) = grub_exact_split(l, 2) if self.commands.has_key(com): if self.commands[com] is not None: exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) @@ -187,7 +202,20 @@ class GrubConfigFile(object): logging.warning("Unknown directive %s" %(com,)) if len(img) > 0: - self.images.append(GrubImage(img)) + self.add_image(GrubImage(img)) + + def set(self, line): + (com, arg) = grub_exact_split(line, 2) + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) + else: + logging.info("Ignored directive %s" %(com,)) + else: + logging.warning("Unknown directive %s" %(com,)) + + def add_image(self, image): + self.images.append(image) def _get_default(self): return self._default diff -r e4f1519b473f -r b60ea69932b1 tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/pygrub/src/pygrub Wed Aug 09 18:04:20 2006 +0100 @@ -2,7 +2,7 @@ # # pygrub - simple python-based bootloader for Xen # -# Copyright 2005 Red Hat, Inc. +# Copyright 2005-2006 Red Hat, Inc. # Jeremy Katz <katzj@xxxxxxxxxx> # # This software may be freely redistributed under the terms of the GNU @@ -14,9 +14,10 @@ # import os, sys, string, struct, tempfile +import copy import logging -import curses, _curses, curses.wrapper +import curses, _curses, curses.wrapper, curses.textpad, curses.ascii import getopt sys.path = [ '/usr/lib/python' ] + sys.path @@ -24,122 +25,387 @@ import grub.GrubConf import grub.GrubConf import grub.fsys -PYGRUB_VER = 0.3 - - -def draw_window(): - stdscr = curses.initscr() - if hasattr(curses, 'use_default_colors'): - curses.use_default_colors() - try: - curses.curs_set(0) - except _curses.error: - pass - - stdscr.addstr(1, 4, "pyGRUB version %s" %(PYGRUB_VER,)) - - win = curses.newwin(10, 74, 2, 1) - win.box() - win.refresh() - - stdscr.addstr(12, 5, "Use the U and D keys to select which entry is highlighted.") - stdscr.addstr(13, 5, "Press enter to boot the selected OS. 'e' to edit the") - stdscr.addstr(14, 5, "commands before booting, 'a' to modify the kernel arguments ") - stdscr.addstr(15, 5, "before booting, or 'c' for a command line.") - stdscr.addch(12, 13, curses.ACS_UARROW) - stdscr.addch(12, 19, curses.ACS_DARROW) - (y, x) = stdscr.getmaxyx() - stdscr.move(y - 1, x - 1) - - stdscr.refresh() - return (stdscr, win) - -def fill_entries(win, cfg, selected): - y = 0 - - for i in cfg.images: - if (0, y) > win.getmaxyx(): - break - if y == selected: - attr = curses.A_REVERSE - else: - attr = 0 - win.addstr(y + 1, 2, i.title.ljust(70), attr) - y += 1 - win.refresh() - -def select(win, line): - win.attron(curses.A_REVERSE) - win.redrawln(line + 1, 1) - win.refresh() +PYGRUB_VER = 0.4 def is_disk_image(file): fd = os.open(file, os.O_RDONLY) buf = os.read(fd, 512) os.close(fd) - if len(buf) >= 512 and struct.unpack("H", buf[0x1fe: 0x200]) == (0xaa55,): + if len(buf) >= 512 and \ + struct.unpack("H", buf[0x1fe: 0x200]) == (0xaa55,): return True return False SECTOR_SIZE=512 def get_active_offset(file): - """Find the offset for the start of the first active partition in the - disk image file.""" + """Find the offset for the start of the first active partition " + "in the disk image file.""" + fd = os.open(file, os.O_RDONLY) buf = os.read(fd, 512) for poff in (446, 462, 478, 494): # partition offsets # active partition has 0x80 as the first byte if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',): - return struct.unpack("<L", buf[poff+8:poff+12])[0] * SECTOR_SIZE - return -1 - -def get_config(fn, isconfig = False): - if not os.access(fn, os.R_OK): - raise RuntimeError, "Unable to access %s" %(fn,) - - cf = grub.GrubConf.GrubConfigFile() - - if isconfig: - # set the config file and parse it - cf.filename = fn - cf.parse() - return cf - - offset = 0 - if is_disk_image(fn): - offset = get_active_offset(fn) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - - # open the image and read the grub config - fs = None - for fstype in grub.fsys.fstypes.values(): - if fstype.sniff_magic(fn, offset): - fs = fstype.open_fs(fn, offset) + return struct.unpack("<L", + buf[poff+8:poff+12])[0] * SECTOR_SIZE + + # if there's not a partition marked as active, fall back to + # the first partition + P1 = 446 + return struct.unpack("<L", buf[P1+8:P1+12])[0] * SECTOR_SIZE + +class GrubLineEditor(curses.textpad.Textbox): + def __init__(self, screen, startx, starty, line = ""): + screen.addstr(startx, starty, "> ") + screen.refresh() + win = curses.newwin(1, 74, startx, starty + 2) + curses.textpad.Textbox.__init__(self, win) + + self.line = list(line) + self.pos = len(line) + self.cancelled = False + self.show_text() + + def show_text(self): + """Show the text. One of our advantages over standard textboxes + is that we can handle lines longer than the window.""" + + self.win.clear() + if self.pos > 70: + if self.pos > 130: + off = 120 + else: + off = 55 + l = [ "<" ] + self.line[off:] + p = self.pos - off + else: + l = self.line[:70] + p = self.pos + self.win.addstr(0, 0, string.join(l, (""))) + if self.pos > 70: + self.win.addch(0, 0, curses.ACS_LARROW) + + self.win.move(0, p) + + def do_command(self, ch): + # we handle escape as well as moving the line around, so have + # to override some of the default handling + + self.lastcmd = ch + if ch == 27: # esc + self.cancelled = True + return 0 + elif curses.ascii.isprint(ch): + self.line.insert(self.pos, chr(ch)) + self.pos += 1 + elif ch == curses.ascii.SOH: # ^a + self.pos = 0 + elif ch in (curses.ascii.STX,curses.KEY_LEFT): + self.pos -= 1 + elif ch in (curses.ascii.BS,curses.KEY_BACKSPACE): + if self.pos > 0: + self.pos -= 1 + self.line.pop(self.pos) + elif ch == curses.ascii.EOT: # ^d + self.line.pop(self.pos) + elif ch == curses.ascii.ENQ: # ^e + self.pos = len(self.line) + elif ch in (curses.ascii.ACK, curses.KEY_RIGHT): + self.pos +=1 + elif ch == curses.ascii.VT: # ^k + self.line = self.line[:self.pos] + else: + return curses.textpad.Textbox.do_command(self, ch) + self.show_text() + return 1 + + def edit(self): + r = curses.textpad.Textbox.edit(self) + if self.cancelled: + return None + return string.join(self.line, "") + + +class Grub: + def __init__(self, file, isconfig = False): + self.screen = None + self.entry_win = None + self.text_win = None + if file: + self.read_config(file, isconfig) + + def draw_main_windows(self): + if self.screen is None: #only init stuff once + self.screen = curses.initscr() + self.screen.timeout(1000) + if hasattr(curses, 'use_default_colors'): + curses.use_default_colors() + try: + curses.curs_set(0) + except _curses.error: + pass + self.entry_win = curses.newwin(10, 74, 2, 1) + self.text_win = curses.newwin(10, 70, 12, 5) + + self.screen.clear() + self.screen.refresh() + + # create basic grub screen with a box of entries and a textbox + self.screen.addstr(1, 4, "pyGRUB version %s" %(PYGRUB_VER,)) + self.entry_win.box() + self.screen.refresh() + + def fill_entry_list(self): + self.entry_win.clear() + self.entry_win.box() + for y in range(0, len(self.cf.images)): + i = self.cf.images[y] + if (0, y) > self.entry_win.getmaxyx(): + break + if y == self.selected_image: + attr = curses.A_REVERSE + else: + attr = 0 + self.entry_win.addstr(y + 1, 2, i.title.ljust(70), attr) + self.entry_win.refresh() + + def edit_entry(self, origimg): + def draw(): + self.draw_main_windows() + + self.text_win.addstr(0, 0, "Use the U and D keys to select which entry is highlighted.") + self.text_win.addstr(1, 0, "Press 'b' to boot, 'e' to edit the selected command in the") + self.text_win.addstr(2, 0, "boot sequence, 'c' for a command-line, 'o' to open a new line") + self.text_win.addstr(3, 0, "after ('O' for before) the selected line, 'd' to remove the") + self.text_win.addstr(4, 0, "selected line, or escape to go back to the main menu.") + self.text_win.addch(0, 8, curses.ACS_UARROW) + self.text_win.addch(0, 14, curses.ACS_DARROW) + (y, x) = self.text_win.getmaxyx() + self.text_win.move(y - 1, x - 1) + self.text_win.refresh() + + curline = 1 + img = copy.deepcopy(origimg) + while 1: + draw() + self.entry_win.clear() + self.entry_win.box() + for idx in range(1, len(img.lines)): + # current line should be highlighted + attr = 0 + if idx == curline: + attr = curses.A_REVERSE + + # trim the line + l = img.lines[idx].ljust(70) + if len(l) > 70: + l = l[:69] + ">" + + self.entry_win.addstr(idx, 2, l, attr) + self.entry_win.refresh() + + c = self.screen.getch() + if c in (ord('q'), 27): # 27 == esc + break + elif c == curses.KEY_UP: + curline -= 1 + elif c == curses.KEY_DOWN: + curline += 1 + elif c == ord('b'): + self.isdone = True + break + elif c == ord('e'): + l = self.edit_line(img.lines[curline]) + if l is not None: + img.set_from_line(l, replace = curline) + elif c == ord('d'): + img.lines.pop(curline) + elif c == ord('o'): + img.lines.insert(curline+1, "") + curline += 1 + elif c == ord('O'): + img.lines.insert(curline, "") + elif c == ord('c'): + self.command_line_mode() + if self.isdone: + return + + # bound at the top and bottom + if curline < 1: + curline = 1 + elif curline >= len(img.lines): + curline = len(img.lines) - 1 + + if self.isdone: + origimg.reset(img.lines) + + def edit_line(self, line): + self.screen.clear() + self.screen.addstr(1, 2, "[ Minimal BASH-like line editing is supported. ") + self.screen.addstr(2, 2, " ESC at any time cancels. ENTER at any time accepts your changes. ]") + self.screen.refresh() + + t = GrubLineEditor(self.screen, 5, 2, line) + ret = t.edit() + if ret: + return ret + return None + + def command_line_mode(self): + self.screen.clear() + self.screen.addstr(1, 2, "[ Minimal BASH-like line editing is supported. ESC at any time ") + self.screen.addstr(2, 2, " exits. Typing 'boot' will boot with your entered commands. ] ") + self.screen.refresh() + + y = 5 + lines = [] + while 1: + t = GrubLineEditor(self.screen, y, 2) + ret = t.edit() + if ret: + if ret in ("quit", "return"): + break + elif ret != "boot": + y += 1 + lines.append(ret) + continue + + # if we got boot, then we want to boot the entered image + img = grub.GrubConf.GrubImage(lines) + self.cf.add_image(img) + self.selected_image = len(self.cf.images) - 1 + self.isdone = True + break + + # else, we cancelled and should just go back break - if fs is not None: - grubfile = None - for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", - "/grub/menu.lst", "/grub/grub.conf"): - if fs.file_exist(f): - grubfile = f - break - if grubfile is None: - raise RuntimeError, "we couldn't find /boot/grub{menu.lst,grub.conf} " + \ - "in the image provided. halt!" - f = fs.open_file(grubfile) - buf = f.read() - f.close() - fs.close() - # then parse the grub config - cf.parse(buf) - else: - raise RuntimeError, "Unable to read filesystem" - - return cf - + def read_config(self, fn, isConfig = False): + """Read the given file to parse the config. If isconfig, then + we're being given a raw config file rather than a disk image.""" + + if not os.access(fn, os.R_OK): + raise RuntimeError, "Unable to access %s" %(fn,) + + self.cf = grub.GrubConf.GrubConfigFile() + + if isConfig: + # set the config file and parse it + self.cf.filename = fn + self.cf.parse() + return + + offset = 0 + if is_disk_image(fn): + offset = get_active_offset(fn) + if offset == -1: + raise RuntimeError, "Unable to find active partition on disk" + + # open the image and read the grub config + fs = None + for fstype in grub.fsys.fstypes.values(): + if fstype.sniff_magic(fn, offset): + fs = fstype.open_fs(fn, offset) + break + + if fs is not None: + grubfile = None + for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", + "/grub/menu.lst", "/grub/grub.conf"): + if fs.file_exist(f): + grubfile = f + break + if grubfile is None: + raise RuntimeError, "we couldn't find grub config file in the image provided." + f = fs.open_file(grubfile) + buf = f.read() + f.close() + fs.close() + # then parse the grub config + self.cf.parse(buf) + else: + raise RuntimeError, "Unable to read filesystem" + + def run(self): + timeout = int(self.cf.timeout) + + self.selected_image = self.cf.default + self.isdone = False + while not self.isdone: + self.run_main(timeout) + timeout = -1 + + return self.selected_image + + def run_main(self, timeout = -1): + def draw(): + # set up the screen + self.draw_main_windows() + self.text_win.addstr(0, 0, "Use the U and D keys to select which entry is highlighted.") + self.text_win.addstr(1, 0, "Press enter to boot the selected OS. 'e' to edit the") + self.text_win.addstr(2, 0, "commands before booting, 'a' to modify the kernel arguments ") + self.text_win.addstr(3, 0, "before booting, or 'c' for a command line.") + self.text_win.addch(0, 8, curses.ACS_UARROW) + self.text_win.addch(0, 14, curses.ACS_DARROW) + (y, x) = self.text_win.getmaxyx() + self.text_win.move(y - 1, x - 1) + self.text_win.refresh() + + # now loop until we hit the timeout or get a go from the user + mytime = 0 + while (timeout == -1 or mytime < int(timeout)): + draw() + if timeout != -1 and mytime != -1: + self.screen.addstr(20, 5, "Will boot selected entry in %2d seconds" + %(int(timeout) - mytime)) + else: + self.screen.addstr(20, 5, " " * 80) + + self.fill_entry_list() + c = self.screen.getch() + if mytime != -1: + mytime += 1 + + # handle keypresses + if c == ord('c'): + self.command_line_mode() + break + elif c == ord('a'): + # find the kernel line, edit it and then boot + img = self.cf.images[self.selected_image] + for line in img.lines: + if line.startswith("kernel"): + l = self.edit_line(line) + if l is not None: + img.set_from_line(l, replace = True) + self.isdone = True + break + break + elif c == ord('e'): + img = self.cf.images[self.selected_image] + self.edit_entry(img) + break + elif c in (curses.KEY_ENTER, ord('\n'), ord('\r')): + self.isdone = True + break + elif c == curses.KEY_UP: + mytime = -1 + self.selected_image -= 1 + elif c == curses.KEY_DOWN: + mytime = -1 + self.selected_image += 1 +# elif c in (ord('q'), 27): # 27 == esc +# self.selected_image = -1 +# self.isdone = True +# break + + # bound at the top and bottom + if self.selected_image < 0: + self.selected_image = 0 + elif self.selected_image >= len(self.cf.images): + self.selected_image = len(self.cf.images) - 1 + def get_entry_idx(cf, entry): # first, see if the given entry is numeric try: @@ -155,63 +421,12 @@ def get_entry_idx(cf, entry): return None -def main(cf = None): - mytime = 0 - timeout = int(cf.timeout) - - (stdscr, win) = draw_window() - stdscr.timeout(1000) - selected = cf.default - - while (timeout == -1 or mytime < int(timeout)): - if timeout != -1 and mytime != -1: - stdscr.addstr(20, 5, "Will boot selected entry in %2d seconds" - %(int(timeout) - mytime)) - else: - stdscr.addstr(20, 5, " " * 80) - - fill_entries(win, cf, selected) - c = stdscr.getch() - if mytime != -1: - mytime += 1 -# if c == ord('q'): -# selected = -1 -# break - if c == ord('c'): - # FIXME: needs to go to command line mode - continue - elif c == ord('a'): - # FIXME: needs to go to append mode - continue - elif c == ord('e'): - # FIXME: needs to go to edit mode - continue - elif c in (curses.KEY_ENTER, ord('\n'), ord('\r')): - break - elif c == curses.KEY_UP: - mytime = -1 - selected -= 1 - elif c == curses.KEY_DOWN: - mytime = -1 - selected += 1 - else: - pass - - # bound at the top and bottom - if selected < 0: - selected = 0 - elif selected >= len(cf.images): - selected = len(cf.images) - 1 - - if selected >= 0: - return selected - if __name__ == "__main__": sel = None def run_main(scr, *args): global sel - sel = main(cf) + sel = g.run() def usage(): print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--entry=] <image>" %(sys.argv[0],) @@ -253,24 +468,32 @@ if __name__ == "__main__": else: fd = os.open(output, os.O_WRONLY) - cf = get_config(file, isconfig) + g = Grub(file, isconfig) if interactive: curses.wrapper(run_main) else: - sel = cf.default + sel = g.cf.default # set the entry to boot as requested if entry is not None: - idx = get_entry_idx(cf, entry) - if idx is not None and idx > 0 and idx < len(cf.images): + idx = get_entry_idx(g.cf, entry) + if idx is not None and idx > 0 and idx < len(g.cf.images): sel = idx - img = cf.images[sel] + if sel == -1: + print "No kernel image selected!" + sys.exit(1) + + img = g.cf.images[sel] print "Going to boot %s" %(img.title) print " kernel: %s" %(img.kernel[1],) if img.initrd: print " initrd: %s" %(img.initrd[1],) + if isconfig: + print " args: %s" %(img.args,) + sys.exit(0) + offset = 0 if is_disk_image(file): offset = get_active_offset(file) @@ -288,14 +511,14 @@ if __name__ == "__main__": raise RuntimeError, "Unable to open filesystem" kernel = fs.open_file(img.kernel[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="vmlinuz.") + (tfd, fn) = tempfile.mkstemp(prefix="vmlinuz.", dir="/var/lib/xen") os.write(tfd, kernel) os.close(tfd) sxp = "linux (kernel %s)" %(fn,) if img.initrd: initrd = fs.open_file(img.initrd[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="initrd.") + (tfd, fn) = tempfile.mkstemp(prefix="initrd.", dir="/var/lib/xen") os.write(tfd, initrd) os.close(tfd) sxp += "(ramdisk %s)" %(fn,) diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/util/xmlrpclib2.py --- a/tools/python/xen/util/xmlrpclib2.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/util/xmlrpclib2.py Wed Aug 09 18:04:20 2006 +0100 @@ -138,6 +138,11 @@ class TCPXMLRPCServer(SocketServer.Threa def _marshaled_dispatch(self, data, dispatch_method = None): params, method = xmlrpclib.loads(data) + if False: + # Enable this block of code to exit immediately without sending + # a response. This allows you to test client-side crash handling. + import sys + sys.exit(1) try: if dispatch_method is not None: response = dispatch_method(method, params) diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/xend/XendDomain.py Wed Aug 09 18:04:20 2006 +0100 @@ -402,9 +402,9 @@ class XendDomain: val = dominfo.destroy() else: try: - val = xc.domain_destroy(domid) + val = xc.domain_destroy(int(domid)) except Exception, ex: - raise XendError(str(ex)) + raise XendInvalidDomain(str(domid)) return val def domain_migrate(self, domid, dst, live=False, resource=0, port=0): diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/xend/image.py Wed Aug 09 18:04:20 2006 +0100 @@ -260,7 +260,7 @@ class HVMImageHandler(ImageHandler): if a == 'stdvga': a = 'std-vga' # Handle booleans gracefully - if a in ['localtime', 'std-vga', 'isa', 'usb']: + if a in ['localtime', 'std-vga', 'isa', 'usb', 'acpi']: if v != None: v = int(v) if v: ret.append("-%s" % a) else: @@ -307,6 +307,7 @@ class HVMImageHandler(ImageHandler): vnc = sxp.child_value(config, 'vnc') vncdisplay = sxp.child_value(config, 'vncdisplay', int(self.vm.getDomid())) + vncunused = sxp.child_value(config, 'vncunused') sdl = sxp.child_value(config, 'sdl') ret = [] nographic = sxp.child_value(config, 'nographic') @@ -315,6 +316,8 @@ class HVMImageHandler(ImageHandler): return ret if vnc: ret = ret + ['-vnc', '%d' % vncdisplay, '-k', 'en-us'] + if vncunused: + ret += ['-vncunused'] return ret def createDeviceModel(self): diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xend/sxp.py --- a/tools/python/xen/xend/sxp.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/xend/sxp.py Wed Aug 09 18:04:20 2006 +0100 @@ -291,7 +291,7 @@ class Parser: raise ParseError(self, "unexpected EOF") elif '0' <= c <= '7': octaldigit(c) - elif len(self.buf): + elif len(self.state.buf): octaldone() self.input_char(c) diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xend/tests/test_sxp.py --- a/tools/python/xen/xend/tests/test_sxp.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/xend/tests/test_sxp.py Wed Aug 09 18:04:20 2006 +0100 @@ -14,5 +14,26 @@ class test_sxp(unittest.TestCase): t('(String) (Thing)', [['String'], ['Thing']]) + def testParseFixed(self): + fin = file('../xen/xend/tests/xend-config.sxp', 'rb') + try: + config = xen.xend.sxp.parse(fin) + self.assertEqual( + xen.xend.sxp.child_value( + config, + 'xend-relocation-hosts-allow'), + '^localhost$ ^localhost\\.localdomain$') + finally: + fin.close() + + + def testParseConfigExample(self): + fin = file('../../examples/xend-config.sxp', 'rb') + try: + config = xen.xend.sxp.parse(fin) + finally: + fin.close() + + def test_suite(): return unittest.makeSuite(test_sxp) diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/xm/create.py Wed Aug 09 18:04:20 2006 +0100 @@ -411,6 +411,11 @@ gopts.var('vncdisplay', val='', gopts.var('vncdisplay', val='', fn=set_value, default=None, use="""VNC display to use""") + +gopts.var('vncunused', val='', + fn=set_bool, default=1, + use="""Try to find an unused port for the VNC server. + Only valid when vnc=1.""") gopts.var('sdl', val='', fn=set_value, default=None, @@ -627,7 +632,7 @@ def configure_hvm(config_image, vals): """ args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw', - 'vnc', 'vncdisplay', 'vncconsole', 'sdl', 'display', + 'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'sdl', 'display', 'acpi', 'apic', 'xauthority', 'usb', 'usbdevice' ] for a in args: if (vals.__dict__[a]): @@ -844,14 +849,58 @@ def choose_vnc_display(): vncpid = None +def daemonize(prog, args): + """Runs a program as a daemon with the list of arguments. Returns the PID + of the daemonized program, or returns 0 on error. + """ + r, w = os.pipe() + pid = os.fork() + + if pid == 0: + os.close(r) + w = os.fdopen(w, 'w') + os.setsid() + try: + pid2 = os.fork() + except: + pid2 = None + if pid2 == 0: + os.chdir("/") + for fd in range(0, 256): + try: + os.close(fd) + except: + pass + os.open("/dev/null", os.O_RDWR) + os.dup2(0, 1) + os.dup2(0, 2) + os.execvp(prog, args) + os._exit(1) + else: + w.write(str(pid2 or 0)) + w.close() + os._exit(0) + + os.close(w) + r = os.fdopen(r) + daemon_pid = int(r.read()) + r.close() + os.waitpid(pid, 0) + return daemon_pid + def spawn_vnc(display): + """Spawns a vncviewer that listens on the specified display. On success, + returns the port that the vncviewer is listening on and sets the global + vncpid. On failure, returns 0. Note that vncviewer is daemonized. + """ vncargs = (["vncviewer", "-log", "*:stdout:0", "-listen", "%d" % (VNC_BASE_PORT + display) ]) - global vncpid - vncpid = os.spawnvp(os.P_NOWAIT, "vncviewer", vncargs) - + global vncpid + vncpid = daemonize("vncviewer", vncargs) + if vncpid == 0: + return 0 return VNC_BASE_PORT + display - + def preprocess_vnc(vals): """If vnc was specified, spawn a vncviewer in listen mode and pass its address to the domain on the kernel command line. @@ -928,7 +977,7 @@ def make_domain(opts, config): import signal if vncpid: os.kill(vncpid, signal.SIGKILL) - raise ex + raise dom = sxp.child_value(dominfo, 'name') diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/xm/main.py Wed Aug 09 18:04:20 2006 +0100 @@ -1257,6 +1257,16 @@ def main(argv=sys.argv): else: print >>sys.stderr, "Error: %s" % ex.faultString sys.exit(1) + except xmlrpclib.ProtocolError, ex: + if ex.errcode == -1: + print >>sys.stderr, ( + "Xend has probably crashed! Invalid or missing HTTP " + "status code.") + else: + print >>sys.stderr, ( + "Xend has probably crashed! ProtocolError(%d, %s)." % + (ex.errcode, ex.errmsg)) + sys.exit(1) except (ValueError, OverflowError): err("Invalid argument.") usage(argv[1]) diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xm/tests/test_create.py --- a/tools/python/xen/xm/tests/test_create.py Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/python/xen/xm/tests/test_create.py Wed Aug 09 18:04:20 2006 +0100 @@ -179,7 +179,8 @@ ne2000=0 'extra' : ('VNC_VIEWER=%s:%d ' % (xen.xm.create.get_host_addr(), - xen.xm.create.VNC_BASE_PORT + 1)), + xen.xm.create.VNC_BASE_PORT + + xen.xm.create.choose_vnc_display())), 'vnc' : 1, 'vncviewer' : 1, diff -r e4f1519b473f -r b60ea69932b1 tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Tue Aug 08 19:07:32 2006 -0500 +++ b/tools/xenstat/libxenstat/src/xenstat.c Wed Aug 09 18:04:20 2006 +0100 @@ -103,7 +103,10 @@ struct xenstat_vbd { * Data-collection types */ /* Called to collect the information for the node and all the domains on - * it. When called, the domain information has already been collected. */ + * it. When called, the domain information has already been collected. + * Return status is 0 if fatal error occurs, 1 for success. Collectors + * may prune a domain from the list if it has been deleted between the + * time the list was setup and the time the colector is called */ typedef int (*xenstat_collect_func)(xenstat_node * node); /* Called to free the information collected by the collect function. The free * function will only be called on a xenstat_node if that node includes @@ -134,6 +137,7 @@ static void xenstat_uninit_xen_version(x static void xenstat_uninit_xen_version(xenstat_handle * handle); static void xenstat_uninit_vbds(xenstat_handle * handle); static char *xenstat_get_domain_name(xenstat_handle * handle, unsigned int domain_id); +static void xenstat_prune_domain(xenstat_node *node, unsigned int entry); static xenstat_collector collectors[] = { { XENSTAT_VCPU, xenstat_collect_vcpus, @@ -208,7 +212,7 @@ xenstat_node *xenstat_get_node(xenstat_h xenstat_node *node; dom0_physinfo_t physinfo; dom0_getdomaininfo_t domaininfo[DOMAIN_CHUNK_SIZE]; - unsigned int num_domains, new_domains; + unsigned int new_domains; unsigned int i; /* Create the node */ @@ -242,15 +246,17 @@ xenstat_node *xenstat_get_node(xenstat_h return NULL; } - num_domains = 0; + node->num_domains = 0; do { xenstat_domain *domain, *tmp; new_domains = xc_domain_getinfolist(handle->xc_handle, - num_domains, DOMAIN_CHUNK_SIZE, domaininfo); + node->num_domains, + DOMAIN_CHUNK_SIZE, + domaininfo); tmp = realloc(node->domains, - (num_domains + new_domains) + (node->num_domains + new_domains) * sizeof(xenstat_domain)); if (tmp == NULL) { free(node->domains); @@ -259,12 +265,29 @@ xenstat_node *xenstat_get_node(xenstat_h } node->domains = tmp; - domain = node->domains + num_domains; + domain = node->domains + node->num_domains; + + /* zero out newly allocated memory in case error occurs below */ + memset(domain, 0, new_domains * sizeof(xenstat_domain)); for (i = 0; i < new_domains; i++) { /* Fill in domain using domaininfo[i] */ domain->id = domaininfo[i].domain; - domain->name = xenstat_get_domain_name(handle, domaininfo[i].domain); + domain->name = xenstat_get_domain_name(handle, + domain->id); + if (domain->name == NULL) { + if (errno == ENOMEM) { + /* fatal error */ + xenstat_free_node(node); + return NULL; + } + else { + /* failed to get name -- this means the + domain is being destroyed so simply + ignore this entry */ + continue; + } + } domain->state = domaininfo[i].flags; domain->cpu_ns = domaininfo[i].cpu_time; domain->num_vcpus = (domaininfo[i].max_vcpu_id+1); @@ -284,10 +307,9 @@ xenstat_node *xenstat_get_node(xenstat_h domain->vbds = NULL; domain++; - } - num_domains += new_domains; + node->num_domains++; + } } while (new_domains == DOMAIN_CHUNK_SIZE); - node->num_domains = num_domains; /* Run all the extra data collectors requested */ node->flags = 0; @@ -495,10 +517,12 @@ xenstat_vbd *xenstat_domain_vbd(xenstat_ /* Collect information about VCPUs */ static int xenstat_collect_vcpus(xenstat_node * node) { - unsigned int i, vcpu; + unsigned int i, vcpu, inc_index; /* Fill in VCPU information */ - for (i = 0; i < node->num_domains; i++) { + for (i = 0; i < node->num_domains; i+=inc_index) { + inc_index = 1; /* default is to increment to next domain */ + node->domains[i].vcpus = malloc(node->domains[i].num_vcpus * sizeof(xenstat_vcpu)); if (node->domains[i].vcpus == NULL) @@ -509,11 +533,25 @@ static int xenstat_collect_vcpus(xenstat dom0_getvcpuinfo_t info; if (xc_vcpu_getinfo(node->handle->xc_handle, - node->domains[i].id, vcpu, &info) != 0) - return 0; - - node->domains[i].vcpus[vcpu].online = info.online; - node->domains[i].vcpus[vcpu].ns = info.cpu_time; + node->domains[i].id, vcpu, &info) != 0) { + if (errno == ENOMEM) { + /* fatal error */ + return 0; + } + else { + /* domain is in transition - remove + from list */ + xenstat_prune_domain(node, i); + + /* remember not to increment index! */ + inc_index = 0; + break; + } + } + else { + node->domains[i].vcpus[vcpu].online = info.online; + node->domains[i].vcpus[vcpu].ns = info.cpu_time; + } } } return 1; @@ -884,13 +922,30 @@ static char *xenstat_get_domain_name(xen static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int domain_id) { char path[80]; - char *name; snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id); - name = xs_read(handle->xshandle, XBT_NULL, path, NULL); - if (name == NULL) - name = strdup(" "); - - return name; + return xs_read(handle->xshandle, XBT_NULL, path, NULL); } + +/* Remove specified entry from list of domains */ +static void xenstat_prune_domain(xenstat_node *node, unsigned int entry) +{ + /* nothing to do if array is empty or entry is beyond end */ + if (node->num_domains == 0 || entry >= node->num_domains) + return; + + /* decrement count of domains */ + node->num_domains--; + + /* shift entries following specified entry up by one */ + if (entry < node->num_domains) { + xenstat_domain *domain = &node->domains[entry]; + memmove(domain,domain+1,node->num_domains-entry); + } + + /* zero out original last entry from node -- not + strictly necessary but safer! */ + memset(&node->domains[node->num_domains], 0, sizeof(xenstat_domain)); +} + diff -r e4f1519b473f -r b60ea69932b1 xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/ia64/xen/hypercall.c Wed Aug 09 18:04:20 2006 +0100 @@ -335,7 +335,7 @@ unsigned long hypercall_create_continuat unsigned long hypercall_create_continuation( unsigned int op, const char *format, ...) { - struct mc_state *mcs = &mc_state[smp_processor_id()]; + struct mc_state *mcs = &this_cpu(mc_state); struct vcpu *v = current; const char *p = format; unsigned long arg; diff -r e4f1519b473f -r b60ea69932b1 xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/ia64/xen/xensetup.c Wed Aug 09 18:04:20 2006 +0100 @@ -423,13 +423,14 @@ void start_kernel(void) (xenheap_phys_end-__pa(heap_start)) >> 20, (xenheap_phys_end-__pa(heap_start)) >> 10); + late_setup_arch(&cmdline); + scheduler_init(); idle_vcpu[0] = (struct vcpu*) ia64_r13; idle_domain = domain_create(IDLE_DOMAIN_ID); if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) BUG(); - late_setup_arch(&cmdline); alloc_dom_xen_and_dom_io(); setup_per_cpu_areas(); mem_init(); diff -r e4f1519b473f -r b60ea69932b1 xen/arch/powerpc/setup.c --- a/xen/arch/powerpc/setup.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/powerpc/setup.c Wed Aug 09 18:04:20 2006 +0100 @@ -55,7 +55,7 @@ boolean_param("earlygdb", opt_earlygdb); boolean_param("earlygdb", opt_earlygdb); u32 tlbflush_clock = 1U; -u32 tlbflush_time[NR_CPUS]; +DEFINE_PER_CPU(u32, tlbflush_time); unsigned int watchdog_on; unsigned long wait_init_idle; diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/domain.c Wed Aug 09 18:04:20 2006 +0100 @@ -797,7 +797,7 @@ unsigned long hypercall_create_continuat unsigned long hypercall_create_continuation( unsigned int op, const char *format, ...) { - struct mc_state *mcs = &mc_state[smp_processor_id()]; + struct mc_state *mcs = &this_cpu(mc_state); struct cpu_user_regs *regs; const char *p = format; unsigned long arg; diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/flushtlb.c --- a/xen/arch/x86/flushtlb.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/flushtlb.c Wed Aug 09 18:04:20 2006 +0100 @@ -20,7 +20,7 @@ #endif u32 tlbflush_clock = 1U; -u32 tlbflush_time[NR_CPUS]; +DEFINE_PER_CPU(u32, tlbflush_time); void write_cr3(unsigned long cr3) { @@ -71,7 +71,7 @@ void write_cr3(unsigned long cr3) * case, so really we are being ultra paranoid. */ - tlbflush_time[smp_processor_id()] = t2; + this_cpu(tlbflush_time) = t2; local_irq_restore(flags); } diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/hvm.c Wed Aug 09 18:04:20 2006 +0100 @@ -29,6 +29,7 @@ #include <xen/domain_page.h> #include <xen/hypercall.h> #include <xen/guest_access.h> +#include <xen/event.h> #include <asm/current.h> #include <asm/io.h> #include <asm/shadow.h> @@ -133,15 +134,28 @@ static void e820_map_io_shared_callback( } } -void hvm_map_io_shared_page(struct vcpu *v) -{ - unsigned long mfn = INVALID_MFN; +static void e820_map_buffered_io_callback(struct domain *d, + struct e820entry *e, + void *data) +{ + unsigned long *mfn = data; + if ( e->type == E820_BUFFERED_IO ) { + ASSERT(*mfn == INVALID_MFN); + *mfn = gmfn_to_mfn(d, e->addr >> PAGE_SHIFT); + } +} + +void hvm_map_io_shared_pages(struct vcpu *v) +{ + unsigned long mfn; void *p; struct domain *d = v->domain; - if ( d->arch.hvm_domain.shared_page_va ) + if ( d->arch.hvm_domain.shared_page_va || + d->arch.hvm_domain.buffered_io_va ) return; + mfn = INVALID_MFN; e820_foreach(d, e820_map_io_shared_callback, &mfn); if ( mfn == INVALID_MFN ) @@ -158,7 +172,38 @@ void hvm_map_io_shared_page(struct vcpu } d->arch.hvm_domain.shared_page_va = (unsigned long)p; -} + + mfn = INVALID_MFN; + e820_foreach(d, e820_map_buffered_io_callback, &mfn); + if ( mfn != INVALID_MFN ) { + p = map_domain_page_global(mfn); + if ( p ) + d->arch.hvm_domain.buffered_io_va = (unsigned long)p; + } +} + +void hvm_create_event_channels(struct vcpu *v) +{ + vcpu_iodata_t *p; + struct vcpu *o; + + if ( v->vcpu_id == 0 ) { + /* Ugly: create event channels for every vcpu when vcpu 0 + starts, so that they're available for ioemu to bind to. */ + for_each_vcpu(v->domain, o) { + p = get_vio(v->domain, o->vcpu_id); + o->arch.hvm_vcpu.xen_port = p->vp_eport = + alloc_unbound_xen_event_channel(o, 0); + DPRINTK("Allocated port %d for hvm.\n", o->arch.hvm_vcpu.xen_port); + } + } +} + +void hvm_release_assist_channel(struct vcpu *v) +{ + free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port); +} + void hvm_setup_platform(struct domain* d) { @@ -175,7 +220,6 @@ void hvm_setup_platform(struct domain* d } hvm_zap_iommu_pages(d); - hvm_map_io_shared_page(v); platform = &d->arch.hvm_domain; pic_init(&platform->vpic, pic_irq_request, &platform->interrupt_request); @@ -186,6 +230,8 @@ void hvm_setup_platform(struct domain* d spin_lock_init(&d->arch.hvm_domain.round_robin_lock); hvm_vioapic_init(d); } + + spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, v->processor); diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/intercept.c Wed Aug 09 18:04:20 2006 +0100 @@ -36,10 +36,24 @@ extern struct hvm_mmio_handler vioapic_m #define HVM_MMIO_HANDLER_NR 2 -struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = +static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = { &vlapic_mmio_handler, &vioapic_mmio_handler +}; + +struct hvm_buffered_io_range { + unsigned long start_addr; + unsigned long length; +}; + +#define HVM_BUFFERED_IO_RANGE_NR 1 + +static struct hvm_buffered_io_range buffered_stdvga_range = {0xA0000, 0x20000}; +static struct hvm_buffered_io_range +*hvm_buffered_io_ranges[HVM_BUFFERED_IO_RANGE_NR] = +{ + &buffered_stdvga_range }; static inline void hvm_mmio_access(struct vcpu *v, @@ -140,6 +154,56 @@ static inline void hvm_mmio_access(struc } } +int hvm_buffered_io_intercept(ioreq_t *p) +{ + struct vcpu *v = current; + spinlock_t *buffered_io_lock; + buffered_iopage_t *buffered_iopage = + (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va); + unsigned long tmp_write_pointer = 0; + int i; + + /* ignore READ ioreq_t! */ + if ( p->dir == IOREQ_READ ) + return 0; + + for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) { + if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr && + p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr + + hvm_buffered_io_ranges[i]->length ) + break; + } + + if ( i == HVM_BUFFERED_IO_RANGE_NR ) + return 0; + + buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock; + spin_lock(buffered_io_lock); + + if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer == + (unsigned long)IOREQ_BUFFER_SLOT_NUM ) { + /* the queue is full. + * send the iopacket through the normal path. + * NOTE: The arithimetic operation could handle the situation for + * write_pointer overflow. + */ + spin_unlock(buffered_io_lock); + return 0; + } + + tmp_write_pointer = buffered_iopage->write_pointer % IOREQ_BUFFER_SLOT_NUM; + + memcpy(&buffered_iopage->ioreq[tmp_write_pointer], p, sizeof(ioreq_t)); + + /*make the ioreq_t visible before write_pointer*/ + wmb(); + buffered_iopage->write_pointer++; + + spin_unlock(buffered_io_lock); + + return 1; +} + int hvm_mmio_intercept(ioreq_t *p) { struct vcpu *v = current; @@ -211,7 +275,7 @@ void hlt_timer_fn(void *data) { struct vcpu *v = data; - evtchn_set_pending(v, iopacket_port(v)); + hvm_prod_vcpu(v); } static __inline__ void missed_ticks(struct periodic_time *pt) diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/io.c Wed Aug 09 18:04:20 2006 +0100 @@ -687,84 +687,17 @@ void hvm_io_assist(struct vcpu *v) p = &vio->vp_ioreq; - /* clear IO wait HVM flag */ - if ( test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) { - if ( p->state == STATE_IORESP_READY ) { - p->state = STATE_INVALID; - clear_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); - - if ( p->type == IOREQ_TYPE_PIO ) - hvm_pio_assist(regs, p, io_opp); - else - hvm_mmio_assist(regs, p, io_opp); - - /* Copy register changes back into current guest state. */ - hvm_load_cpu_guest_regs(v, regs); - memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES); - } - /* else an interrupt send event raced us */ - } -} - -/* - * On exit from hvm_wait_io, we're guaranteed not to be waiting on - * I/O response from the device model. - */ -void hvm_wait_io(void) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(v); - - for ( ; ; ) - { - /* Clear master flag, selector flag, event flag each in turn. */ - v->vcpu_info->evtchn_upcall_pending = 0; - clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - smp_mb__after_clear_bit(); - if ( test_and_clear_bit(port, &d->shared_info->evtchn_pending[0]) ) - hvm_io_assist(v); - - /* Need to wait for I/O responses? */ - if ( !test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - break; - - do_sched_op_compat(SCHEDOP_block, 0); - } - - /* - * Re-set the selector and master flags in case any other notifications - * are pending. - */ - if ( d->shared_info->evtchn_pending[port/BITS_PER_LONG] ) - set_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - if ( v->vcpu_info->evtchn_pending_sel ) - v->vcpu_info->evtchn_upcall_pending = 1; -} - -void hvm_safe_block(void) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(v); - - for ( ; ; ) - { - /* Clear master flag & selector flag so we will wake from block. */ - v->vcpu_info->evtchn_upcall_pending = 0; - clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - smp_mb__after_clear_bit(); - - /* Event pending already? */ - if ( test_bit(port, &d->shared_info->evtchn_pending[0]) ) - break; - - do_sched_op_compat(SCHEDOP_block, 0); - } - - /* Reflect pending event in selector and master flags. */ - set_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - v->vcpu_info->evtchn_upcall_pending = 1; + if ( p->state == STATE_IORESP_READY ) { + p->state = STATE_INVALID; + if ( p->type == IOREQ_TYPE_PIO ) + hvm_pio_assist(regs, p, io_opp); + else + hvm_mmio_assist(regs, p, io_opp); + + /* Copy register changes back into current guest state. */ + hvm_load_cpu_guest_regs(v, regs); + memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES); + } } /* diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/platform.c Wed Aug 09 18:04:20 2006 +0100 @@ -669,6 +669,30 @@ int inst_copy_from_guest(unsigned char * return inst_len; } +static void hvm_send_assist_req(struct vcpu *v) +{ + ioreq_t *p; + + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + if ( unlikely(p->state != STATE_INVALID) ) { + /* This indicates a bug in the device model. Crash the + domain. */ + printf("Device model set bad IO state %d.\n", p->state); + domain_crash(v->domain); + return; + } + wmb(); + p->state = STATE_IOREQ_READY; + notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port); +} + + +/* Wake up a vcpu whihc is waiting for interrupts to come in */ +void hvm_prod_vcpu(struct vcpu *v) +{ + vcpu_unblock(v); +} + void send_pio_req(struct cpu_user_regs *regs, unsigned long port, unsigned long count, int size, long value, int dir, int pvalid) { @@ -682,13 +706,10 @@ void send_pio_req(struct cpu_user_regs * domain_crash_synchronous(); } - if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { - printf("HVM I/O has not yet completed\n"); - domain_crash_synchronous(); - } - set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); - p = &vio->vp_ioreq; + if ( p->state != STATE_INVALID ) + printf("WARNING: send pio with something already pending (%d)?\n", + p->state); p->dir = dir; p->pdata_valid = pvalid; @@ -714,10 +735,7 @@ void send_pio_req(struct cpu_user_regs * return; } - p->state = STATE_IOREQ_READY; - - evtchn_send(iopacket_port(v)); - hvm_wait_io(); + hvm_send_assist_req(v); } void send_mmio_req( @@ -739,12 +757,9 @@ void send_mmio_req( p = &vio->vp_ioreq; - if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { - printf("HVM I/O has not yet completed\n"); - domain_crash_synchronous(); - } - - set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); + if ( p->state != STATE_INVALID ) + printf("WARNING: send mmio with something already pending (%d)?\n", + p->state); p->dir = dir; p->pdata_valid = pvalid; @@ -764,16 +779,13 @@ void send_mmio_req( } else p->u.data = value; - if (hvm_mmio_intercept(p)){ + if ( hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p) ) { p->state = STATE_IORESP_READY; hvm_io_assist(v); return; } - p->state = STATE_IOREQ_READY; - - evtchn_send(iopacket_port(v)); - hvm_wait_io(); + hvm_send_assist_req(v); } static void mmio_operands(int type, unsigned long gpa, struct instruction *inst, diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Aug 09 18:04:20 2006 +0100 @@ -25,6 +25,7 @@ #include <xen/sched.h> #include <xen/irq.h> #include <xen/softirq.h> +#include <xen/hypercall.h> #include <asm/current.h> #include <asm/io.h> #include <asm/shadow.h> @@ -808,6 +809,9 @@ static void svm_relinquish_guest_resourc if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( (void *)d->arch.hvm_domain.shared_page_va); + + if ( d->arch.hvm_domain.buffered_io_va ) + unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va); shadow_direct_map_clean(d); } @@ -2121,7 +2125,7 @@ static inline void svm_vmexit_do_hlt(str next_wakeup = next_pit; if ( next_wakeup != - 1 ) set_timer(¤t->arch.hvm_svm.hlt_timer, next_wakeup); - hvm_safe_block(); + do_sched_op_compat(SCHEDOP_block, 0); } diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/svm/vmcb.c Wed Aug 09 18:04:20 2006 +0100 @@ -370,18 +370,6 @@ void svm_do_launch(struct vcpu *v) if (v->vcpu_id == 0) hvm_setup_platform(v->domain); - if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) - { - printk("HVM domain bind port %d to vcpu %d failed!\n", - iopacket_port(v), v->vcpu_id); - domain_crash_synchronous(); - } - - HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); - - clear_bit(iopacket_port(v), - &v->domain->shared_info->evtchn_mask[0]); - if (hvm_apic_support(v->domain)) vlapic_init(v); init_timer(&v->arch.hvm_svm.hlt_timer, @@ -439,10 +427,12 @@ void set_hsa_to_guest( struct arch_svm_s /* * Resume the guest. */ +/* XXX svm_do_resume and vmx_do_resume are remarkably similar; could + they be unified? */ void svm_do_resume(struct vcpu *v) { - struct domain *d = v->domain; - struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm; + struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; + ioreq_t *p; svm_stts(v); @@ -455,12 +445,16 @@ void svm_do_resume(struct vcpu *v) pickup_deactive_ticks(pt); } - if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || - test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - hvm_wait_io(); - - /* We can't resume the guest if we're waiting on I/O */ - ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + wait_on_xen_event_channel(v->arch.hvm.xen_port, + p->state != STATE_IOREQ_READY && + p->state != STATE_IOREQ_INPROCESS); + if ( p->state == STATE_IORESP_READY ) + hvm_io_assist(v); + if ( p->state != STATE_INVALID ) { + printf("Weird HVM iorequest state %d.\n", p->state); + domain_crash(v->domain); + } } void svm_launch_fail(unsigned long eflags) diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/svm/x86_32/exits.S --- a/xen/arch/x86/hvm/svm/x86_32/exits.S Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/svm/x86_32/exits.S Wed Aug 09 18:04:20 2006 +0100 @@ -132,6 +132,9 @@ ENTRY(svm_asm_do_resume) ENTRY(svm_asm_do_resume) svm_test_all_events: GET_CURRENT(%ebx) + pushl %ebx + call svm_do_resume + addl $4, %esp /*test_all_events:*/ xorl %ecx,%ecx notl %ecx diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/svm/x86_64/exits.S --- a/xen/arch/x86/hvm/svm/x86_64/exits.S Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/svm/x86_64/exits.S Wed Aug 09 18:04:20 2006 +0100 @@ -147,6 +147,8 @@ ENTRY(svm_asm_do_resume) ENTRY(svm_asm_do_resume) svm_test_all_events: GET_CURRENT(%rbx) + movq %rbx, %rdi + call svm_do_resume /*test_all_events:*/ cli # tests must not race interrupts /*test_softirqs:*/ diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/vlapic.c Wed Aug 09 18:04:20 2006 +0100 @@ -232,7 +232,7 @@ static int vlapic_accept_irq(struct vcpu "level trig mode for vector %d\n", vector); set_bit(vector, vlapic->regs + APIC_TMR); } - evtchn_set_pending(v, iopacket_port(v)); + hvm_prod_vcpu(v); result = 1; break; diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/io.c Wed Aug 09 18:04:20 2006 +0100 @@ -221,7 +221,7 @@ asmlinkage void vmx_intr_assist(void) void vmx_do_resume(struct vcpu *v) { - struct domain *d = v->domain; + ioreq_t *p; struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; vmx_stts(); @@ -235,12 +235,16 @@ void vmx_do_resume(struct vcpu *v) pickup_deactive_ticks(pt); } - if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || - test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - hvm_wait_io(); - - /* We can't resume the guest if we're waiting on I/O */ - ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + wait_on_xen_event_channel(v->arch.hvm.xen_port, + p->state != STATE_IOREQ_READY && + p->state != STATE_IOREQ_INPROCESS); + if ( p->state == STATE_IORESP_READY ) + hvm_io_assist(v); + if ( p->state != STATE_INVALID ) { + printf("Weird HVM iorequest state %d.\n", p->state); + domain_crash(v->domain); + } } /* diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Aug 09 18:04:20 2006 +0100 @@ -245,18 +245,6 @@ static void vmx_do_launch(struct vcpu *v if (v->vcpu_id == 0) hvm_setup_platform(v->domain); - if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) - { - printk("VMX domain bind port %d to vcpu %d failed!\n", - iopacket_port(v), v->vcpu_id); - domain_crash_synchronous(); - } - - HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); - - clear_bit(iopacket_port(v), - &v->domain->shared_info->evtchn_mask[0]); - __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : ); error |= __vmwrite(GUEST_CR0, cr0); diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Aug 09 18:04:20 2006 +0100 @@ -25,6 +25,7 @@ #include <xen/irq.h> #include <xen/softirq.h> #include <xen/domain_page.h> +#include <xen/hypercall.h> #include <asm/current.h> #include <asm/io.h> #include <asm/shadow.h> @@ -48,8 +49,8 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> -static unsigned long trace_values[NR_CPUS][5]; -#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value +static DEFINE_PER_CPU(unsigned long, trace_values[5]); +#define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); @@ -141,6 +142,7 @@ static void vmx_relinquish_guest_resourc free_domheap_page(VLAPIC(v)->regs_page); xfree(VLAPIC(v)); } + hvm_release_assist_channel(v); } kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); @@ -149,12 +151,15 @@ static void vmx_relinquish_guest_resourc unmap_domain_page_global( (void *)d->arch.hvm_domain.shared_page_va); + if ( d->arch.hvm_domain.buffered_io_va ) + unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va); + shadow_direct_map_clean(d); } #ifdef __x86_64__ -static struct vmx_msr_state percpu_msr[NR_CPUS]; +static DEFINE_PER_CPU(struct vmx_msr_state, percpu_msr); static u32 msr_data_index[VMX_MSR_COUNT] = { @@ -175,7 +180,7 @@ static void vmx_save_segments(struct vcp */ static void vmx_load_msrs(void) { - struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; + struct vmx_msr_state *host_state = &this_cpu(percpu_msr); int i; while ( host_state->flags ) @@ -188,7 +193,7 @@ static void vmx_load_msrs(void) static void vmx_save_init_msrs(void) { - struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; + struct vmx_msr_state *host_state = &this_cpu(percpu_msr); int i; for ( i = 0; i < VMX_MSR_COUNT; i++ ) @@ -277,7 +282,7 @@ static inline int long_mode_do_msr_write u64 msr_content = regs->eax | ((u64)regs->edx << 32); struct vcpu *v = current; struct vmx_msr_state *msr = &v->arch.hvm_vmx.msr_content; - struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; + struct vmx_msr_state *host_state = &this_cpu(percpu_msr); HVM_DBG_LOG(DBG_LEVEL_1, "msr 0x%lx msr_content 0x%"PRIx64"\n", (unsigned long)regs->ecx, msr_content); @@ -359,7 +364,7 @@ static void vmx_restore_msrs(struct vcpu unsigned long guest_flags ; guest_state = &v->arch.hvm_vmx.msr_content;; - host_state = &percpu_msr[smp_processor_id()]; + host_state = &this_cpu(percpu_msr); wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs); guest_flags = guest_state->flags; @@ -671,28 +676,6 @@ static int check_vmx_controls(u32 ctrls, return 1; } -/* Setup HVM interfaces */ -static void vmx_setup_hvm_funcs(void) -{ - if ( hvm_enabled ) - return; - - hvm_funcs.disable = stop_vmx; - - hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources; - hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources; - - hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; - hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; - - hvm_funcs.realmode = vmx_realmode; - hvm_funcs.paging_enabled = vmx_paging_enabled; - hvm_funcs.instruction_length = vmx_instruction_length; - hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg; - - hvm_funcs.init_ap_context = vmx_init_ap_context; -} - static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page) { char *p; @@ -713,6 +696,30 @@ static void vmx_init_hypercall_page(stru /* Don't support HYPERVISOR_iret at the moment */ *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */ +} + +/* Setup HVM interfaces */ +static void vmx_setup_hvm_funcs(void) +{ + if ( hvm_enabled ) + return; + + hvm_funcs.disable = stop_vmx; + + hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources; + hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources; + + hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; + hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; + + hvm_funcs.realmode = vmx_realmode; + hvm_funcs.paging_enabled = vmx_paging_enabled; + hvm_funcs.instruction_length = vmx_instruction_length; + hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg; + + hvm_funcs.init_ap_context = vmx_init_ap_context; + + hvm_funcs.init_hypercall_page = vmx_init_hypercall_page; } int start_vmx(void) @@ -780,8 +787,6 @@ int start_vmx(void) vmx_save_init_msrs(); vmx_setup_hvm_funcs(); - - hvm_funcs.init_hypercall_page = vmx_init_hypercall_page; hvm_enabled = 1; @@ -2014,7 +2019,7 @@ void vmx_vmexit_do_hlt(void) next_wakeup = next_pit; if ( next_wakeup != - 1 ) set_timer(¤t->arch.hvm_vmx.hlt_timer, next_wakeup); - hvm_safe_block(); + do_sched_op_compat(SCHEDOP_block, 0); } static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs) @@ -2128,12 +2133,10 @@ asmlinkage void vmx_vmexit_handler(struc asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs) { unsigned int exit_reason; - unsigned long exit_qualification, eip, inst_len = 0; + unsigned long exit_qualification, rip, inst_len = 0; struct vcpu *v = current; - int error; - - error = __vmread(VM_EXIT_REASON, &exit_reason); - BUG_ON(error); + + __vmread(VM_EXIT_REASON, &exit_reason); perfc_incra(vmexits, exit_reason); @@ -2172,11 +2175,9 @@ asmlinkage void vmx_vmexit_handler(struc domain_crash_synchronous(); } - __vmread(GUEST_RIP, &eip); TRACE_VMEXIT(0,exit_reason); - switch ( exit_reason ) - { + switch ( exit_reason ) { case EXIT_REASON_EXCEPTION_NMI: { /* @@ -2187,15 +2188,15 @@ asmlinkage void vmx_vmexit_handler(struc unsigned int vector; unsigned long va; - if (__vmread(VM_EXIT_INTR_INFO, &vector) - || !(vector & INTR_INFO_VALID_MASK)) - __hvm_bug(®s); + if ( __vmread(VM_EXIT_INTR_INFO, &vector) || + !(vector & INTR_INFO_VALID_MASK) ) + domain_crash_synchronous(); vector &= INTR_INFO_VECTOR_MASK; TRACE_VMEXIT(1,vector); perfc_incra(cause_vector, vector); - switch (vector) { + switch ( vector ) { #ifdef XEN_DEBUGGER case TRAP_debug: { @@ -2236,7 +2237,7 @@ asmlinkage void vmx_vmexit_handler(struc { if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) ) domain_pause_for_debugger(); - else + else vmx_reflect_exception(v); break; } @@ -2260,7 +2261,7 @@ asmlinkage void vmx_vmexit_handler(struc (unsigned long)regs.ecx, (unsigned long)regs.edx, (unsigned long)regs.esi, (unsigned long)regs.edi); - if (!vmx_do_page_fault(va, ®s)) { + if ( !vmx_do_page_fault(va, ®s) ) { /* * Inject #PG using Interruption-Information Fields */ @@ -2281,6 +2282,9 @@ asmlinkage void vmx_vmexit_handler(struc } case EXIT_REASON_EXTERNAL_INTERRUPT: vmx_vmexit_do_extint(®s); + break; + case EXIT_REASON_TRIPLE_FAULT: + domain_crash_synchronous(); break; case EXIT_REASON_PENDING_INTERRUPT: /* @@ -2296,7 +2300,7 @@ asmlinkage void vmx_vmexit_handler(struc v->arch.hvm_vcpu.u.vmx.exec_control); break; case EXIT_REASON_TASK_SWITCH: - __hvm_bug(®s); + domain_crash_synchronous(); break; case EXIT_REASON_CPUID: vmx_vmexit_do_cpuid(®s); @@ -2321,7 +2325,7 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_VMCALL: { __get_instruction_length(inst_len); - __vmread(GUEST_RIP, &eip); + __vmread(GUEST_RIP, &rip); __vmread(EXIT_QUALIFICATION, &exit_qualification); hvm_do_hypercall(®s); @@ -2330,13 +2334,13 @@ asmlinkage void vmx_vmexit_handler(struc } case EXIT_REASON_CR_ACCESS: { - __vmread(GUEST_RIP, &eip); + __vmread(GUEST_RIP, &rip); __get_instruction_length(inst_len); __vmread(EXIT_QUALIFICATION, &exit_qualification); - HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx", - eip, inst_len, exit_qualification); - if (vmx_cr_access(exit_qualification, ®s)) + HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, inst_len =%lx, exit_qualification = %lx", + rip, inst_len, exit_qualification); + if ( vmx_cr_access(exit_qualification, ®s) ) __update_guest_eip(inst_len); TRACE_VMEXIT(3,regs.error_code); TRACE_VMEXIT(4,exit_qualification); @@ -2360,13 +2364,14 @@ asmlinkage void vmx_vmexit_handler(struc __update_guest_eip(inst_len); break; case EXIT_REASON_MSR_WRITE: - __vmread(GUEST_RIP, &eip); vmx_do_msr_write(®s); __get_instruction_length(inst_len); __update_guest_eip(inst_len); break; case EXIT_REASON_MWAIT_INSTRUCTION: - __hvm_bug(®s); + case EXIT_REASON_MONITOR_INSTRUCTION: + case EXIT_REASON_PAUSE_INSTRUCTION: + domain_crash_synchronous(); break; case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: @@ -2375,15 +2380,15 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: - case EXIT_REASON_VMOFF: - case EXIT_REASON_VMON: - /* Report invalid opcode exception when a VMX guest tries to execute + case EXIT_REASON_VMXOFF: + case EXIT_REASON_VMXON: + /* Report invalid opcode exception when a VMX guest tries to execute any of the VMX instructions */ vmx_inject_hw_exception(v, TRAP_invalid_op, VMX_DELIVER_NO_ERROR_CODE); break; default: - __hvm_bug(®s); /* should not happen */ + domain_crash_synchronous(); /* should not happen */ } } @@ -2398,11 +2403,11 @@ asmlinkage void vmx_trace_vmentry (void) asmlinkage void vmx_trace_vmentry (void) { TRACE_5D(TRC_VMX_VMENTRY, - trace_values[smp_processor_id()][0], - trace_values[smp_processor_id()][1], - trace_values[smp_processor_id()][2], - trace_values[smp_processor_id()][3], - trace_values[smp_processor_id()][4]); + this_cpu(trace_values)[0], + this_cpu(trace_values)[1], + this_cpu(trace_values)[2], + this_cpu(trace_values)[3], + this_cpu(trace_values)[4]); TRACE_VMEXIT(0,9); TRACE_VMEXIT(1,9); TRACE_VMEXIT(2,9); diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Wed Aug 09 18:04:20 2006 +0100 @@ -94,6 +94,9 @@ vmx_process_softirqs: ALIGN ENTRY(vmx_asm_do_vmentry) GET_CURRENT(%ebx) + pushl %ebx + call vmx_do_resume + addl $4, %esp cli # tests must not race interrupts movl VCPU_processor(%ebx),%eax diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Wed Aug 09 18:04:20 2006 +0100 @@ -105,6 +105,8 @@ vmx_process_softirqs: ALIGN ENTRY(vmx_asm_do_vmentry) GET_CURRENT(%rbx) + movq %rbx, %rdi + call vmx_do_resume cli # tests must not race interrupts movl VCPU_processor(%rbx),%eax diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/irq.c Wed Aug 09 18:04:20 2006 +0100 @@ -160,11 +160,12 @@ typedef struct { * Stack of interrupts awaiting EOI on each CPU. These must be popped in * order, as only the current highest-priority pending irq can be EOIed. */ -static struct { +struct pending_eoi { u8 vector; /* Vector awaiting EOI */ u8 ready; /* Ready for EOI now? */ -} pending_eoi[NR_CPUS][NR_VECTORS] __cacheline_aligned; -#define pending_eoi_sp(cpu) (pending_eoi[cpu][NR_VECTORS-1].vector) +}; +static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]); +#define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector) static void __do_IRQ_guest(int vector) { @@ -172,7 +173,8 @@ static void __do_IRQ_guest(int vector) irq_desc_t *desc = &irq_desc[vector]; irq_guest_action_t *action = (irq_guest_action_t *)desc->action; struct domain *d; - int i, sp, cpu = smp_processor_id(); + int i, sp; + struct pending_eoi *peoi = this_cpu(pending_eoi); if ( unlikely(action->nr_guests == 0) ) { @@ -185,13 +187,13 @@ static void __do_IRQ_guest(int vector) if ( action->ack_type == ACKTYPE_EOI ) { - sp = pending_eoi_sp(cpu); - ASSERT((sp == 0) || (pending_eoi[cpu][sp-1].vector < vector)); + sp = pending_eoi_sp(peoi); + ASSERT((sp == 0) || (peoi[sp-1].vector < vector)); ASSERT(sp < (NR_VECTORS-1)); - pending_eoi[cpu][sp].vector = vector; - pending_eoi[cpu][sp].ready = 0; - pending_eoi_sp(cpu) = sp+1; - cpu_set(cpu, action->cpu_eoi_map); + peoi[sp].vector = vector; + peoi[sp].ready = 0; + pending_eoi_sp(peoi) = sp+1; + cpu_set(smp_processor_id(), action->cpu_eoi_map); } for ( i = 0; i < action->nr_guests; i++ ) @@ -207,43 +209,45 @@ static void __do_IRQ_guest(int vector) /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */ static void flush_ready_eoi(void *unused) { - irq_desc_t *desc; - int vector, sp, cpu = smp_processor_id(); + struct pending_eoi *peoi = this_cpu(pending_eoi); + irq_desc_t *desc; + int vector, sp; ASSERT(!local_irq_is_enabled()); - sp = pending_eoi_sp(cpu); - - while ( (--sp >= 0) && pending_eoi[cpu][sp].ready ) - { - vector = pending_eoi[cpu][sp].vector; + sp = pending_eoi_sp(peoi); + + while ( (--sp >= 0) && peoi[sp].ready ) + { + vector = peoi[sp].vector; desc = &irq_desc[vector]; spin_lock(&desc->lock); desc->handler->end(vector); spin_unlock(&desc->lock); } - pending_eoi_sp(cpu) = sp+1; + pending_eoi_sp(peoi) = sp+1; } static void __set_eoi_ready(irq_desc_t *desc) { irq_guest_action_t *action = (irq_guest_action_t *)desc->action; - int vector, sp, cpu = smp_processor_id(); + struct pending_eoi *peoi = this_cpu(pending_eoi); + int vector, sp; vector = desc - irq_desc; if ( !(desc->status & IRQ_GUEST) || (action->in_flight != 0) || - !cpu_test_and_clear(cpu, action->cpu_eoi_map) ) + !cpu_test_and_clear(smp_processor_id(), action->cpu_eoi_map) ) return; - sp = pending_eoi_sp(cpu); + sp = pending_eoi_sp(peoi); do { ASSERT(sp > 0); - } while ( pending_eoi[cpu][--sp].vector != vector ); - ASSERT(!pending_eoi[cpu][sp].ready); - pending_eoi[cpu][sp].ready = 1; + } while ( peoi[--sp].vector != vector ); + ASSERT(!peoi[sp].ready); + peoi[sp].ready = 1; } /* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */ @@ -269,16 +273,17 @@ static void flush_all_pending_eoi(void * { irq_desc_t *desc; irq_guest_action_t *action; - int i, vector, sp, cpu = smp_processor_id(); + struct pending_eoi *peoi = this_cpu(pending_eoi); + int i, vector, sp; ASSERT(!local_irq_is_enabled()); - sp = pending_eoi_sp(cpu); + sp = pending_eoi_sp(peoi); while ( --sp >= 0 ) { - if ( pending_eoi[cpu][sp].ready ) + if ( peoi[sp].ready ) continue; - vector = pending_eoi[cpu][sp].vector; + vector = peoi[sp].vector; desc = &irq_desc[vector]; spin_lock(&desc->lock); action = (irq_guest_action_t *)desc->action; @@ -668,7 +673,7 @@ static int __init setup_dump_irqs(void) } __initcall(setup_dump_irqs); -static struct timer end_irq_timer[NR_CPUS]; +static DEFINE_PER_CPU(struct timer, end_irq_timer); /* * force_intack: Forcibly emit all pending EOIs on each CPU every second. @@ -677,22 +682,13 @@ static struct timer end_irq_timer[NR_CPU static void end_irq_timeout(void *unused) { - int cpu = smp_processor_id(); - local_irq_disable(); flush_all_pending_eoi(NULL); local_irq_enable(); on_selected_cpus(cpu_online_map, flush_ready_eoi, NULL, 1, 0); - set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); -} - -static void __init __setup_irq_timeout(void *unused) -{ - int cpu = smp_processor_id(); - init_timer(&end_irq_timer[cpu], end_irq_timeout, NULL, cpu); - set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); + set_timer(&this_cpu(end_irq_timer), NOW() + MILLISECS(1000)); } static int force_intack; @@ -700,8 +696,17 @@ boolean_param("force_intack", force_inta static int __init setup_irq_timeout(void) { - if ( force_intack ) - on_each_cpu(__setup_irq_timeout, NULL, 1, 1); + unsigned int cpu; + + if ( !force_intack ) + return 0; + + for_each_online_cpu ( cpu ) + { + init_timer(&per_cpu(end_irq_timer, cpu), end_irq_timeout, NULL, cpu); + set_timer(&per_cpu(end_irq_timer, cpu), NOW() + MILLISECS(1000)); + } + return 0; } __initcall(setup_irq_timeout); diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/mm.c Wed Aug 09 18:04:20 2006 +0100 @@ -139,20 +139,21 @@ static int mod_l1_entry(l1_pgentry_t *, static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t); /* Used to defer flushing of memory structures. */ -static struct { +struct percpu_mm_info { #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */ #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */ #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */ unsigned int deferred_ops; /* If non-NULL, specifies a foreign subject domain for some operations. */ struct domain *foreign; -} __cacheline_aligned percpu_info[NR_CPUS]; +}; +static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info); /* * Returns the current foreign domain; defaults to the currently-executing * domain if a foreign override hasn't been specified. */ -#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ?: current->domain) +#define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain) /* Private domain structs for DOMID_XEN and DOMID_IO. */ static struct domain *dom_xen, *dom_io; @@ -189,8 +190,6 @@ void arch_init_memory(void) extern void subarch_init_memory(void); unsigned long i, pfn, rstart_pfn, rend_pfn; - - memset(percpu_info, 0, sizeof(percpu_info)); /* * Initialise our DOMID_XEN domain. @@ -378,7 +377,8 @@ void invalidate_shadow_ldt(struct vcpu * } /* Dispose of the (now possibly invalid) mappings from the TLB. */ - percpu_info[v->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; + ASSERT(v->processor == smp_processor_id()); + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; } @@ -1503,7 +1503,7 @@ void free_page_type(struct page_info *pa * (e.g., update_va_mapping()) or we could end up modifying a page * that is no longer a page table (and hence screw up ref counts). */ - percpu_info[smp_processor_id()].deferred_ops |= DOP_FLUSH_ALL_TLBS; + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; if ( unlikely(shadow_mode_enabled(owner)) ) { @@ -1781,7 +1781,8 @@ int new_guest_cr3(unsigned long mfn) /* Failure here is unrecoverable: the VCPU has no pagetable! */ MEM_LOG("Fatal error while installing new baseptr %lx", mfn); domain_crash(d); - percpu_info[v->processor].deferred_ops = 0; + ASSERT(v->processor == smp_processor_id()); + this_cpu(percpu_mm_info).deferred_ops = 0; return 0; } } @@ -1817,13 +1818,14 @@ int new_guest_cr3(unsigned long mfn) return 1; } -static void process_deferred_ops(unsigned int cpu) +static void process_deferred_ops(void) { unsigned int deferred_ops; struct domain *d = current->domain; - - deferred_ops = percpu_info[cpu].deferred_ops; - percpu_info[cpu].deferred_ops = 0; + struct percpu_mm_info *info = &this_cpu(percpu_mm_info); + + deferred_ops = info->deferred_ops; + info->deferred_ops = 0; if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) ) { @@ -1838,19 +1840,20 @@ static void process_deferred_ops(unsigne if ( deferred_ops & DOP_RELOAD_LDT ) (void)map_ldt_shadow_page(0); - if ( unlikely(percpu_info[cpu].foreign != NULL) ) - { - put_domain(percpu_info[cpu].foreign); - percpu_info[cpu].foreign = NULL; - } -} - -static int set_foreigndom(unsigned int cpu, domid_t domid) + if ( unlikely(info->foreign != NULL) ) + { + put_domain(info->foreign); + info->foreign = NULL; + } +} + +static int set_foreigndom(domid_t domid) { struct domain *e, *d = current->domain; + struct percpu_mm_info *info = &this_cpu(percpu_mm_info); int okay = 1; - ASSERT(percpu_info[cpu].foreign == NULL); + ASSERT(info->foreign == NULL); if ( likely(domid == DOMID_SELF) ) goto out; @@ -1867,7 +1870,7 @@ static int set_foreigndom(unsigned int c { case DOMID_IO: get_knownalive_domain(dom_io); - percpu_info[cpu].foreign = dom_io; + info->foreign = dom_io; break; default: MEM_LOG("Dom %u cannot set foreign dom", d->domain_id); @@ -1877,18 +1880,18 @@ static int set_foreigndom(unsigned int c } else { - percpu_info[cpu].foreign = e = find_domain_by_id(domid); + info->foreign = e = find_domain_by_id(domid); if ( e == NULL ) { switch ( domid ) { case DOMID_XEN: get_knownalive_domain(dom_xen); - percpu_info[cpu].foreign = dom_xen; + info->foreign = dom_xen; break; case DOMID_IO: get_knownalive_domain(dom_io); - percpu_info[cpu].foreign = dom_io; + info->foreign = dom_io; break; default: MEM_LOG("Unknown domain '%u'", domid); @@ -1928,7 +1931,7 @@ int do_mmuext_op( unsigned int foreigndom) { struct mmuext_op op; - int rc = 0, i = 0, okay, cpu = smp_processor_id(); + int rc = 0, i = 0, okay; unsigned long mfn, type; unsigned int done = 0; struct page_info *page; @@ -1946,7 +1949,7 @@ int do_mmuext_op( (void)copy_from_guest(&done, pdone, 1); } - if ( !set_foreigndom(cpu, foreigndom) ) + if ( !set_foreigndom(foreigndom) ) { rc = -ESRCH; goto out; @@ -2042,7 +2045,7 @@ int do_mmuext_op( case MMUEXT_NEW_BASEPTR: mfn = gmfn_to_mfn(current->domain, mfn); okay = new_guest_cr3(mfn); - percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; + this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; break; #ifdef __x86_64__ @@ -2065,7 +2068,7 @@ int do_mmuext_op( #endif case MMUEXT_TLB_FLUSH_LOCAL: - percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB; + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB; break; case MMUEXT_INVLPG_LOCAL: @@ -2137,9 +2140,9 @@ int do_mmuext_op( v->arch.guest_context.ldt_base = ptr; v->arch.guest_context.ldt_ents = ents; load_LDT(v); - percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; + this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT; if ( ents != 0 ) - percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; + this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT; } break; } @@ -2160,7 +2163,7 @@ int do_mmuext_op( } out: - process_deferred_ops(cpu); + process_deferred_ops(); /* Add incremental work we have done to the @done output parameter. */ done += i; @@ -2181,7 +2184,7 @@ int do_mmu_update( void *va; unsigned long gpfn, gmfn, mfn; struct page_info *page; - int rc = 0, okay = 1, i = 0, cpu = smp_processor_id(); + int rc = 0, okay = 1, i = 0; unsigned int cmd, done = 0; struct vcpu *v = current; struct domain *d = v->domain; @@ -2205,7 +2208,7 @@ int do_mmu_update( domain_mmap_cache_init(&mapcache); domain_mmap_cache_init(&sh_mapcache); - if ( !set_foreigndom(cpu, foreigndom) ) + if ( !set_foreigndom(foreigndom) ) { rc = -ESRCH; goto out; @@ -2396,7 +2399,7 @@ int do_mmu_update( domain_mmap_cache_destroy(&mapcache); domain_mmap_cache_destroy(&sh_mapcache); - process_deferred_ops(cpu); + process_deferred_ops(); /* Add incremental work we have done to the @done output parameter. */ done += i; @@ -2690,7 +2693,6 @@ int do_update_va_mapping(unsigned long v l1_pgentry_t val = l1e_from_intpte(val64); struct vcpu *v = current; struct domain *d = v->domain; - unsigned int cpu = smp_processor_id(); unsigned long vmask, bmap_ptr; cpumask_t pmask; int rc = 0; @@ -2713,9 +2715,10 @@ int do_update_va_mapping(unsigned long v if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) ) { - if ( unlikely(percpu_info[cpu].foreign && + if ( unlikely(this_cpu(percpu_mm_info).foreign && (shadow_mode_translate(d) || - shadow_mode_translate(percpu_info[cpu].foreign))) ) + shadow_mode_translate( + this_cpu(percpu_mm_info).foreign))) ) { /* * The foreign domain's pfn's are in a different namespace. There's @@ -2773,7 +2776,7 @@ int do_update_va_mapping(unsigned long v break; } - process_deferred_ops(cpu); + process_deferred_ops(); UNLOCK_BIGLOCK(d); @@ -2784,13 +2787,12 @@ int do_update_va_mapping_otherdomain(uns unsigned long flags, domid_t domid) { - unsigned int cpu = smp_processor_id(); int rc; if ( unlikely(!IS_PRIV(current->domain)) ) return -EPERM; - if ( !set_foreigndom(cpu, domid) ) + if ( !set_foreigndom(domid) ) return -ESRCH; rc = do_update_va_mapping(va, val64, flags); diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/nmi.c --- a/xen/arch/x86/nmi.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/nmi.c Wed Aug 09 18:04:20 2006 +0100 @@ -36,8 +36,8 @@ static unsigned int nmi_hz = HZ; static unsigned int nmi_hz = HZ; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; -static struct timer nmi_timer[NR_CPUS]; -static unsigned int nmi_timer_ticks[NR_CPUS]; +static DEFINE_PER_CPU(struct timer, nmi_timer); +static DEFINE_PER_CPU(unsigned int, nmi_timer_ticks); /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -132,9 +132,8 @@ int __init check_nmi_watchdog (void) static void nmi_timer_fn(void *unused) { - int cpu = smp_processor_id(); - nmi_timer_ticks[cpu]++; - set_timer(&nmi_timer[cpu], NOW() + MILLISECS(1000)); + this_cpu(nmi_timer_ticks)++; + set_timer(&this_cpu(nmi_timer), NOW() + MILLISECS(1000)); } static void disable_lapic_nmi_watchdog(void) @@ -340,9 +339,8 @@ void __pminit setup_apic_nmi_watchdog(vo nmi_active = 1; } -static unsigned int -last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; +static DEFINE_PER_CPU(unsigned int, last_irq_sums); +static DEFINE_PER_CPU(unsigned int, alert_counter); static atomic_t watchdog_disable_count = ATOMIC_INIT(1); @@ -366,35 +364,35 @@ void watchdog_enable(void) */ for_each_online_cpu ( cpu ) { - init_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu); - set_timer(&nmi_timer[cpu], NOW()); + init_timer(&per_cpu(nmi_timer, cpu), nmi_timer_fn, NULL, cpu); + set_timer(&per_cpu(nmi_timer, cpu), NOW()); } } void nmi_watchdog_tick(struct cpu_user_regs * regs) { - int sum, cpu = smp_processor_id(); - - sum = nmi_timer_ticks[cpu]; - - if ( (last_irq_sums[cpu] == sum) && !atomic_read(&watchdog_disable_count) ) + unsigned int sum = this_cpu(nmi_timer_ticks); + + if ( (this_cpu(last_irq_sums) == sum) && + !atomic_read(&watchdog_disable_count) ) { /* * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds) * before doing the oops ... */ - alert_counter[cpu]++; - if ( alert_counter[cpu] == 5*nmi_hz ) + this_cpu(alert_counter)++; + if ( this_cpu(alert_counter) == 5*nmi_hz ) { console_force_unlock(); - printk("Watchdog timer detects that CPU%d is stuck!\n", cpu); + printk("Watchdog timer detects that CPU%d is stuck!\n", + smp_processor_id()); fatal_trap(TRAP_nmi, regs); } } else { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; + this_cpu(last_irq_sums) = sum; + this_cpu(alert_counter) = 0; } if ( nmi_perfctr_msr ) diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/time.c Wed Aug 09 18:04:20 2006 +0100 @@ -56,9 +56,9 @@ struct cpu_time { s_time_t stime_master_stamp; struct time_scale tsc_scale; struct timer calibration_timer; -} __cacheline_aligned; - -static struct cpu_time cpu_time[NR_CPUS]; +}; + +static DEFINE_PER_CPU(struct cpu_time, cpu_time); /* * Protected by platform_timer_lock, which must be acquired with interrupts @@ -263,7 +263,7 @@ void calibrate_tsc_ap(void) rdtscll(t2); ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC; - set_time_scale(&cpu_time[smp_processor_id()].tsc_scale, ticks_per_sec); + set_time_scale(&this_cpu(cpu_time).tsc_scale, ticks_per_sec); atomic_dec(&tsc_calibrate_gang); } @@ -646,7 +646,7 @@ static unsigned long get_cmos_time(void) s_time_t get_s_time(void) { - struct cpu_time *t = &cpu_time[smp_processor_id()]; + struct cpu_time *t = &this_cpu(cpu_time); u64 tsc, delta; s_time_t now; @@ -675,7 +675,7 @@ static inline void __update_vcpu_system_ struct cpu_time *t; struct vcpu_time_info *u; - t = &cpu_time[smp_processor_id()]; + t = &this_cpu(cpu_time); u = &v->domain->shared_info->vcpu_info[v->vcpu_id].time; version_update_begin(&u->version); @@ -691,7 +691,7 @@ void update_vcpu_system_time(struct vcpu void update_vcpu_system_time(struct vcpu *v) { if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp != - cpu_time[smp_processor_id()].local_tsc_stamp ) + this_cpu(cpu_time).local_tsc_stamp ) __update_vcpu_system_time(v); } @@ -728,7 +728,7 @@ void do_settime(unsigned long secs, unsi static void local_time_calibration(void *unused) { - unsigned int cpu = smp_processor_id(); + struct cpu_time *t = &this_cpu(cpu_time); /* * System timestamps, extrapolated from local and master oscillators, @@ -759,9 +759,9 @@ static void local_time_calibration(void /* The overall calibration scale multiplier. */ u32 calibration_mul_frac; - prev_tsc = cpu_time[cpu].local_tsc_stamp; - prev_local_stime = cpu_time[cpu].stime_local_stamp; - prev_master_stime = cpu_time[cpu].stime_master_stamp; + prev_tsc = t->local_tsc_stamp; + prev_local_stime = t->stime_local_stamp; + prev_master_stime = t->stime_master_stamp; /* Disable IRQs to get 'instantaneous' current timestamps. */ local_irq_disable(); @@ -772,9 +772,9 @@ static void local_time_calibration(void #if 0 printk("PRE%d: tsc=%lld stime=%lld master=%lld\n", - cpu, prev_tsc, prev_local_stime, prev_master_stime); + smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime); printk("CUR%d: tsc=%lld stime=%lld master=%lld -> %lld\n", - cpu, curr_tsc, curr_local_stime, curr_master_stime, + smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime, curr_master_stime - curr_local_stime); #endif @@ -844,41 +844,41 @@ static void local_time_calibration(void calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor); #if 0 - printk("---%d: %08x %08x %d\n", cpu, + printk("---%d: %08x %08x %d\n", smp_processor_id(), error_factor, calibration_mul_frac, tsc_shift); #endif /* Record new timestamp information. */ - cpu_time[cpu].tsc_scale.mul_frac = calibration_mul_frac; - cpu_time[cpu].tsc_scale.shift = tsc_shift; - cpu_time[cpu].local_tsc_stamp = curr_tsc; - cpu_time[cpu].stime_local_stamp = curr_local_stime; - cpu_time[cpu].stime_master_stamp = curr_master_stime; + t->tsc_scale.mul_frac = calibration_mul_frac; + t->tsc_scale.shift = tsc_shift; + t->local_tsc_stamp = curr_tsc; + t->stime_local_stamp = curr_local_stime; + t->stime_master_stamp = curr_master_stime; out: - set_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH); - - if ( cpu == 0 ) + set_timer(&t->calibration_timer, NOW() + EPOCH); + + if ( smp_processor_id() == 0 ) platform_time_calibration(); } void init_percpu_time(void) { - unsigned int cpu = smp_processor_id(); + struct cpu_time *t = &this_cpu(cpu_time); unsigned long flags; s_time_t now; local_irq_save(flags); - rdtscll(cpu_time[cpu].local_tsc_stamp); - now = (cpu == 0) ? 0 : read_platform_stime(); + rdtscll(t->local_tsc_stamp); + now = (smp_processor_id() == 0) ? 0 : read_platform_stime(); local_irq_restore(flags); - cpu_time[cpu].stime_master_stamp = now; - cpu_time[cpu].stime_local_stamp = now; - - init_timer(&cpu_time[cpu].calibration_timer, - local_time_calibration, NULL, cpu); - set_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH); + t->stime_master_stamp = now; + t->stime_local_stamp = now; + + init_timer(&t->calibration_timer, local_time_calibration, + NULL, smp_processor_id()); + set_timer(&t->calibration_timer, NOW() + EPOCH); } /* Late init function (after all CPUs are booted). */ @@ -904,7 +904,7 @@ void __init early_time_init(void) { u64 tmp = calibrate_boot_tsc(); - set_time_scale(&cpu_time[0].tsc_scale, tmp); + set_time_scale(&per_cpu(cpu_time, 0).tsc_scale, tmp); do_div(tmp, 1000); cpu_khz = (unsigned long)tmp; diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/x86_32/domain_page.c --- a/xen/arch/x86/x86_32/domain_page.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/x86_32/domain_page.c Wed Aug 09 18:04:20 2006 +0100 @@ -73,8 +73,7 @@ void *map_domain_page(unsigned long pfn) if ( unlikely(cache->epoch != cache->shadow_epoch[vcpu]) ) { cache->shadow_epoch[vcpu] = cache->epoch; - if ( NEED_FLUSH(tlbflush_time[smp_processor_id()], - cache->tlbflush_timestamp) ) + if ( NEED_FLUSH(this_cpu(tlbflush_time), cache->tlbflush_timestamp) ) { perfc_incrc(domain_page_tlb_flush); local_flush_tlb(); diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/x86_32/traps.c Wed Aug 09 18:04:20 2006 +0100 @@ -19,7 +19,7 @@ #include <public/callback.h> /* All CPUs have their own IDT to allow int80 direct trap. */ -idt_entry_t *idt_tables[NR_CPUS] = { 0 }; +idt_entry_t *idt_tables[NR_CPUS] __read_mostly; void show_registers(struct cpu_user_regs *regs) { diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/x86_32/xen.lds.S --- a/xen/arch/x86/x86_32/xen.lds.S Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/x86_32/xen.lds.S Wed Aug 09 18:04:20 2006 +0100 @@ -46,6 +46,9 @@ SECTIONS CONSTRUCTORS } :text + . = ALIGN(128); + .data.read_mostly : { *(.data.read_mostly) } :text + . = ALIGN(4096); /* Init code and data */ __init_begin = .; .text.init : { *(.text.init) } :text diff -r e4f1519b473f -r b60ea69932b1 xen/arch/x86/x86_64/xen.lds.S --- a/xen/arch/x86/x86_64/xen.lds.S Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/arch/x86/x86_64/xen.lds.S Wed Aug 09 18:04:20 2006 +0100 @@ -44,6 +44,9 @@ SECTIONS CONSTRUCTORS } :text + . = ALIGN(128); + .data.read_mostly : { *(.data.read_mostly) } :text + . = ALIGN(4096); /* Init code and data */ __init_begin = .; .text.init : { *(.text.init) } :text diff -r e4f1519b473f -r b60ea69932b1 xen/common/domain.c --- a/xen/common/domain.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/domain.c Wed Aug 09 18:04:20 2006 +0100 @@ -21,6 +21,7 @@ #include <xen/hypercall.h> #include <xen/delay.h> #include <xen/shutdown.h> +#include <xen/percpu.h> #include <asm/debugger.h> #include <public/dom0_ops.h> #include <public/sched.h> @@ -33,7 +34,7 @@ struct domain *domain_list; struct domain *dom0; -struct vcpu *idle_vcpu[NR_CPUS]; +struct vcpu *idle_vcpu[NR_CPUS] __read_mostly; struct domain *alloc_domain(domid_t domid) { @@ -245,15 +246,15 @@ void __domain_crash_synchronous(void) } -static struct domain *domain_shuttingdown[NR_CPUS]; +static DEFINE_PER_CPU(struct domain *, domain_shuttingdown); static void domain_shutdown_finalise(void) { struct domain *d; struct vcpu *v; - d = domain_shuttingdown[smp_processor_id()]; - domain_shuttingdown[smp_processor_id()] = NULL; + d = this_cpu(domain_shuttingdown); + this_cpu(domain_shuttingdown) = NULL; BUG_ON(d == NULL); BUG_ON(d == current->domain); @@ -302,7 +303,7 @@ void domain_shutdown(struct domain *d, u vcpu_sleep_nosync(v); get_knownalive_domain(d); - domain_shuttingdown[smp_processor_id()] = d; + this_cpu(domain_shuttingdown) = d; raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ); } diff -r e4f1519b473f -r b60ea69932b1 xen/common/multicall.c --- a/xen/common/multicall.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/multicall.c Wed Aug 09 18:04:20 2006 +0100 @@ -14,13 +14,13 @@ #include <asm/current.h> #include <asm/hardirq.h> -struct mc_state mc_state[NR_CPUS]; +DEFINE_PER_CPU(struct mc_state, mc_state); long do_multicall( XEN_GUEST_HANDLE(multicall_entry_t) call_list, unsigned int nr_calls) { - struct mc_state *mcs = &mc_state[smp_processor_id()]; + struct mc_state *mcs = &this_cpu(mc_state); unsigned int i; if ( unlikely(__test_and_set_bit(_MCSF_in_multicall, &mcs->flags)) ) diff -r e4f1519b473f -r b60ea69932b1 xen/common/sched_bvt.c --- a/xen/common/sched_bvt.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/sched_bvt.c Wed Aug 09 18:04:20 2006 +0100 @@ -60,7 +60,8 @@ struct bvt_cpu_info #define BVT_INFO(p) ((struct bvt_dom_info *)(p)->sched_priv) #define EBVT_INFO(p) ((struct bvt_vcpu_info *)(p)->sched_priv) -#define CPU_INFO(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv) +#define CPU_INFO(cpu) \ + ((struct bvt_cpu_info *)(per_cpu(schedule_data, cpu).sched_priv)) #define RUNLIST(p) ((struct list_head *)&(EBVT_INFO(p)->run_list)) #define RUNQUEUE(cpu) ((struct list_head *)&(CPU_INFO(cpu)->runqueue)) #define CPU_SVT(cpu) (CPU_INFO(cpu)->svt) @@ -203,7 +204,8 @@ static int bvt_init_vcpu(struct vcpu *v) /* Allocate per-CPU context if this is the first domain to be added. */ if ( CPU_INFO(v->processor) == NULL ) { - schedule_data[v->processor].sched_priv = xmalloc(struct bvt_cpu_info); + per_cpu(schedule_data, v->processor).sched_priv = + xmalloc(struct bvt_cpu_info); BUG_ON(CPU_INFO(v->processor) == NULL); INIT_LIST_HEAD(RUNQUEUE(v->processor)); CPU_SVT(v->processor) = 0; @@ -251,7 +253,7 @@ static void bvt_wake(struct vcpu *v) /* Deal with warping here. */ einf->evt = calc_evt(v, einf->avt); - curr = schedule_data[cpu].curr; + curr = per_cpu(schedule_data, cpu).curr; curr_evt = calc_evt(curr, calc_avt(curr, now)); /* Calculate the time the current domain would run assuming the second smallest evt is of the newly woken domain */ @@ -261,14 +263,14 @@ static void bvt_wake(struct vcpu *v) if ( is_idle_vcpu(curr) || (einf->evt <= curr_evt) ) cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); - else if ( schedule_data[cpu].s_timer.expires > r_time ) - set_timer(&schedule_data[cpu].s_timer, r_time); + else if ( per_cpu(schedule_data, cpu).s_timer.expires > r_time ) + set_timer(&per_cpu(schedule_data, cpu).s_timer, r_time); } static void bvt_sleep(struct vcpu *v) { - if ( schedule_data[v->processor].curr == v ) + if ( per_cpu(schedule_data, v->processor).curr == v ) cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ); else if ( __task_on_runqueue(v) ) __del_from_runqueue(v); @@ -418,7 +420,7 @@ static struct task_slice bvt_do_schedule * *and* the task the second lowest evt. * this code is O(n) but we expect n to be small. */ - next_einf = EBVT_INFO(schedule_data[cpu].idle); + next_einf = EBVT_INFO(per_cpu(schedule_data, cpu).idle); next_prime_einf = NULL; next_evt = ~0U; diff -r e4f1519b473f -r b60ea69932b1 xen/common/sched_credit.c --- a/xen/common/sched_credit.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/sched_credit.c Wed Aug 09 18:04:20 2006 +0100 @@ -55,7 +55,8 @@ /* * Useful macros */ -#define CSCHED_PCPU(_c) ((struct csched_pcpu *)schedule_data[_c].sched_priv) +#define CSCHED_PCPU(_c) \ + ((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv) #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv) #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv) #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq)) @@ -253,7 +254,8 @@ static inline void static inline void __runq_tickle(unsigned int cpu, struct csched_vcpu *new) { - struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr); + struct csched_vcpu * const cur = + CSCHED_VCPU(per_cpu(schedule_data, cpu).curr); cpumask_t mask; ASSERT(cur); @@ -318,10 +320,10 @@ csched_pcpu_init(int cpu) INIT_LIST_HEAD(&spc->runq); spc->runq_sort_last = csched_priv.runq_sort; - schedule_data[cpu].sched_priv = spc; + per_cpu(schedule_data, cpu).sched_priv = spc; /* Start off idling... */ - BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) ); + BUG_ON( !is_idle_vcpu(per_cpu(schedule_data, cpu).curr) ); cpu_set(cpu, csched_priv.idlers); spin_unlock_irqrestore(&csched_priv.lock, flags); @@ -533,7 +535,7 @@ csched_vcpu_sleep(struct vcpu *vc) BUG_ON( is_idle_vcpu(vc) ); - if ( schedule_data[vc->processor].curr == vc ) + if ( per_cpu(schedule_data, vc->processor).curr == vc ) cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); else if ( __vcpu_on_runq(svc) ) __runq_remove(svc); @@ -547,7 +549,7 @@ csched_vcpu_wake(struct vcpu *vc) BUG_ON( is_idle_vcpu(vc) ); - if ( unlikely(schedule_data[cpu].curr == vc) ) + if ( unlikely(per_cpu(schedule_data, cpu).curr == vc) ) { CSCHED_STAT_CRANK(vcpu_wake_running); return; @@ -599,7 +601,8 @@ csched_vcpu_set_affinity(struct vcpu *vc vc->processor = first_cpu(vc->cpu_affinity); - spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags); + spin_unlock_irqrestore(&per_cpu(schedule_data, lcpu).schedule_lock, + flags); } vcpu_unpause(vc); @@ -685,7 +688,7 @@ csched_runq_sort(unsigned int cpu) spc->runq_sort_last = sort_epoch; - spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); + spin_lock_irqsave(&per_cpu(schedule_data, cpu).schedule_lock, flags); runq = &spc->runq; elem = runq->next; @@ -710,7 +713,7 @@ csched_runq_sort(unsigned int cpu) elem = next; } - spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); + spin_unlock_irqrestore(&per_cpu(schedule_data, cpu).schedule_lock, flags); } static void @@ -900,7 +903,7 @@ csched_tick(unsigned int cpu) * we could distribute or at the very least cycle the duty. */ if ( (csched_priv.master == cpu) && - (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 ) + (per_cpu(schedule_data, cpu).tick % CSCHED_ACCT_NTICKS) == 0 ) { csched_acct(); } @@ -984,7 +987,7 @@ csched_load_balance(int cpu, struct csch * cause a deadlock if the peer CPU is also load balancing and trying * to lock this CPU. */ - if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) ) + if ( spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) ) { spc = CSCHED_PCPU(peer_cpu); @@ -998,7 +1001,7 @@ csched_load_balance(int cpu, struct csch speer = csched_runq_steal(spc, cpu, snext->pri); } - spin_unlock(&schedule_data[peer_cpu].schedule_lock); + spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); /* Got one! */ if ( speer ) @@ -1120,11 +1123,11 @@ csched_dump_pcpu(int cpu) runq = &spc->runq; printk(" tick=%lu, sort=%d\n", - schedule_data[cpu].tick, + per_cpu(schedule_data, cpu).tick, spc->runq_sort_last); /* current VCPU */ - svc = CSCHED_VCPU(schedule_data[cpu].curr); + svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr); if ( svc ) { printk("\trun: "); diff -r e4f1519b473f -r b60ea69932b1 xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/sched_sedf.c Wed Aug 09 18:04:20 2006 +0100 @@ -113,13 +113,14 @@ struct sedf_cpu_info { }; #define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv)) -#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv) +#define CPU_INFO(cpu) \ + ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv) #define LIST(d) (&EDOM_INFO(d)->list) #define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i])) #define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq) #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq) #define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i])) -#define IDLETASK(cpu) ((struct vcpu *)schedule_data[cpu].idle) +#define IDLETASK(cpu) ((struct vcpu *)per_cpu(schedule_data, cpu).idle) #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period) @@ -348,11 +349,11 @@ static int sedf_init_vcpu(struct vcpu *v inf->vcpu = v; /* Allocate per-CPU context if this is the first domain to be added. */ - if ( unlikely(schedule_data[v->processor].sched_priv == NULL) ) - { - schedule_data[v->processor].sched_priv = + if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) ) + { + per_cpu(schedule_data, v->processor).sched_priv = xmalloc(struct sedf_cpu_info); - BUG_ON(schedule_data[v->processor].sched_priv == NULL); + BUG_ON(per_cpu(schedule_data, v->processor).sched_priv == NULL); memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor))); INIT_LIST_HEAD(WAITQ(v->processor)); INIT_LIST_HEAD(RUNQ(v->processor)); @@ -847,7 +848,7 @@ static void sedf_sleep(struct vcpu *d) EDOM_INFO(d)->status |= SEDF_ASLEEP; - if ( schedule_data[d->processor].curr == d ) + if ( per_cpu(schedule_data, d->processor).curr == d ) { cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); } @@ -1167,9 +1168,9 @@ void sedf_wake(struct vcpu *d) Save approximation: Always switch to scheduler!*/ ASSERT(d->processor >= 0); ASSERT(d->processor < NR_CPUS); - ASSERT(schedule_data[d->processor].curr); - - if ( should_switch(schedule_data[d->processor].curr, d, now) ) + ASSERT(per_cpu(schedule_data, d->processor).curr); + + if ( should_switch(per_cpu(schedule_data, d->processor).curr, d, now) ) cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); } diff -r e4f1519b473f -r b60ea69932b1 xen/common/schedule.c --- a/xen/common/schedule.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/schedule.c Wed Aug 09 18:04:20 2006 +0100 @@ -46,7 +46,7 @@ static void poll_timer_fn(void *data); static void poll_timer_fn(void *data); /* This is global for now so that private implementations can reach it */ -struct schedule_data schedule_data[NR_CPUS]; +DEFINE_PER_CPU(struct schedule_data, schedule_data); extern struct scheduler sched_bvt_def; extern struct scheduler sched_sedf_def; @@ -67,13 +67,13 @@ static struct scheduler ops; : (typeof(ops.fn(__VA_ARGS__)))0 ) /* Per-CPU periodic timer sends an event to the currently-executing domain. */ -static struct timer t_timer[NR_CPUS]; +static DEFINE_PER_CPU(struct timer, t_timer); static inline void vcpu_runstate_change( struct vcpu *v, int new_state, s_time_t new_entry_time) { ASSERT(v->runstate.state != new_state); - ASSERT(spin_is_locked(&schedule_data[v->processor].schedule_lock)); + ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock)); v->runstate.time[v->runstate.state] += new_entry_time - v->runstate.state_entry_time; @@ -107,8 +107,8 @@ int sched_init_vcpu(struct vcpu *v) if ( is_idle_vcpu(v) ) { - schedule_data[v->processor].curr = v; - schedule_data[v->processor].idle = v; + per_cpu(schedule_data, v->processor).curr = v; + per_cpu(schedule_data, v->processor).idle = v; set_bit(_VCPUF_running, &v->vcpu_flags); } @@ -500,19 +500,21 @@ long sched_adjdom(struct sched_adjdom_cm */ static void __enter_scheduler(void) { - struct vcpu *prev = current, *next = NULL; - int cpu = smp_processor_id(); - s_time_t now = NOW(); - struct task_slice next_slice; - s32 r_time; /* time for new dom to run */ + struct vcpu *prev = current, *next = NULL; + s_time_t now = NOW(); + struct schedule_data *sd; + struct task_slice next_slice; + s32 r_time; /* time for new dom to run */ ASSERT(!in_irq()); perfc_incrc(sched_run); - spin_lock_irq(&schedule_data[cpu].schedule_lock); - - stop_timer(&schedule_data[cpu].s_timer); + sd = &this_cpu(schedule_data); + + spin_lock_irq(&sd->schedule_lock); + + stop_timer(&sd->s_timer); /* get policy-specific decision on scheduling... */ next_slice = ops.do_schedule(now); @@ -520,13 +522,13 @@ static void __enter_scheduler(void) r_time = next_slice.time; next = next_slice.task; - schedule_data[cpu].curr = next; + sd->curr = next; - set_timer(&schedule_data[cpu].s_timer, now + r_time); + set_timer(&sd->s_timer, now + r_time); if ( unlikely(prev == next) ) { - spin_unlock_irq(&schedule_data[cpu].schedule_lock); + spin_unlock_irq(&sd->schedule_lock); return continue_running(prev); } @@ -552,17 +554,17 @@ static void __enter_scheduler(void) ASSERT(!test_bit(_VCPUF_running, &next->vcpu_flags)); set_bit(_VCPUF_running, &next->vcpu_flags); - spin_unlock_irq(&schedule_data[cpu].schedule_lock); + spin_unlock_irq(&sd->schedule_lock); perfc_incrc(sched_ctx); - prev->sleep_tick = schedule_data[cpu].tick; + prev->sleep_tick = sd->tick; /* Ensure that the domain has an up-to-date time base. */ if ( !is_idle_vcpu(next) ) { update_vcpu_system_time(next); - if ( next->sleep_tick != schedule_data[cpu].tick ) + if ( next->sleep_tick != sd->tick ) send_timer_event(next); } @@ -591,10 +593,9 @@ static void s_timer_fn(void *unused) /* Periodic tick timer: send timer event to current domain */ static void t_timer_fn(void *unused) { - struct vcpu *v = current; - unsigned int cpu = smp_processor_id(); - - schedule_data[cpu].tick++; + struct vcpu *v = current; + + this_cpu(schedule_data).tick++; if ( !is_idle_vcpu(v) ) { @@ -604,9 +605,9 @@ static void t_timer_fn(void *unused) page_scrub_schedule_work(); - SCHED_OP(tick, cpu); - - set_timer(&t_timer[cpu], NOW() + MILLISECS(10)); + SCHED_OP(tick, smp_processor_id()); + + set_timer(&this_cpu(t_timer), NOW() + MILLISECS(10)); } /* Per-VCPU timer function: sends a virtual timer interrupt. */ @@ -633,9 +634,9 @@ void __init scheduler_init(void) for ( i = 0; i < NR_CPUS; i++ ) { - spin_lock_init(&schedule_data[i].schedule_lock); - init_timer(&schedule_data[i].s_timer, s_timer_fn, NULL, i); - init_timer(&t_timer[i], t_timer_fn, NULL, i); + spin_lock_init(&per_cpu(schedule_data, i).schedule_lock); + init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i); + init_timer(&per_cpu(t_timer, i), t_timer_fn, NULL, i); } for ( i = 0; schedulers[i] != NULL; i++ ) @@ -676,10 +677,10 @@ void dump_runq(unsigned char key) for_each_online_cpu ( i ) { - spin_lock(&schedule_data[i].schedule_lock); + spin_lock(&per_cpu(schedule_data, i).schedule_lock); printk("CPU[%02d] ", i); - SCHED_OP(dump_cpu_state,i); - spin_unlock(&schedule_data[i].schedule_lock); + SCHED_OP(dump_cpu_state, i); + spin_unlock(&per_cpu(schedule_data, i).schedule_lock); } local_irq_restore(flags); diff -r e4f1519b473f -r b60ea69932b1 xen/common/timer.c --- a/xen/common/timer.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/timer.c Wed Aug 09 18:04:20 2006 +0100 @@ -17,6 +17,7 @@ #include <xen/softirq.h> #include <xen/timer.h> #include <xen/keyhandler.h> +#include <xen/percpu.h> #include <asm/system.h> #include <asm/desc.h> @@ -32,7 +33,7 @@ struct timers { struct timer *running; } __cacheline_aligned; -struct timers timers[NR_CPUS]; +static DEFINE_PER_CPU(struct timers, timers); extern int reprogram_timer(s_time_t timeout); @@ -149,7 +150,7 @@ static inline void __add_timer(struct ti static inline void __add_timer(struct timer *timer) { int cpu = timer->cpu; - if ( add_entry(&timers[cpu].heap, timer) ) + if ( add_entry(&per_cpu(timers, cpu).heap, timer) ) cpu_raise_softirq(cpu, TIMER_SOFTIRQ); } @@ -157,7 +158,7 @@ static inline void __stop_timer(struct t static inline void __stop_timer(struct timer *timer) { int cpu = timer->cpu; - if ( remove_entry(timers[cpu].heap, timer) ) + if ( remove_entry(per_cpu(timers, cpu).heap, timer) ) cpu_raise_softirq(cpu, TIMER_SOFTIRQ); } @@ -168,10 +169,10 @@ static inline void timer_lock(struct tim for ( ; ; ) { cpu = timer->cpu; - spin_lock(&timers[cpu].lock); + spin_lock(&per_cpu(timers, cpu).lock); if ( likely(timer->cpu == cpu) ) break; - spin_unlock(&timers[cpu].lock); + spin_unlock(&per_cpu(timers, cpu).lock); } } @@ -182,7 +183,7 @@ static inline void timer_lock(struct tim static inline void timer_unlock(struct timer *timer) { - spin_unlock(&timers[timer->cpu].lock); + spin_unlock(&per_cpu(timers, timer->cpu).lock); } #define timer_unlock_irq(t) \ @@ -234,20 +235,20 @@ void migrate_timer(struct timer *timer, if ( old_cpu < new_cpu ) { - spin_lock_irqsave(&timers[old_cpu].lock, flags); - spin_lock(&timers[new_cpu].lock); + spin_lock_irqsave(&per_cpu(timers, old_cpu).lock, flags); + spin_lock(&per_cpu(timers, new_cpu).lock); } else { - spin_lock_irqsave(&timers[new_cpu].lock, flags); - spin_lock(&timers[old_cpu].lock); + spin_lock_irqsave(&per_cpu(timers, new_cpu).lock, flags); + spin_lock(&per_cpu(timers, old_cpu).lock); } if ( likely(timer->cpu == old_cpu) ) break; - spin_unlock(&timers[old_cpu].lock); - spin_unlock_irqrestore(&timers[new_cpu].lock, flags); + spin_unlock(&per_cpu(timers, old_cpu).lock); + spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags); } if ( active_timer(timer) ) @@ -261,8 +262,8 @@ void migrate_timer(struct timer *timer, timer->cpu = new_cpu; } - spin_unlock(&timers[old_cpu].lock); - spin_unlock_irqrestore(&timers[new_cpu].lock, flags); + spin_unlock(&per_cpu(timers, old_cpu).lock); + spin_unlock_irqrestore(&per_cpu(timers, new_cpu).lock, flags); } @@ -271,7 +272,7 @@ void kill_timer(struct timer *timer) int cpu; unsigned long flags; - BUG_ON(timers[smp_processor_id()].running == timer); + BUG_ON(this_cpu(timers).running == timer); timer_lock_irqsave(timer, flags); @@ -282,23 +283,25 @@ void kill_timer(struct timer *timer) timer_unlock_irqrestore(timer, flags); for_each_online_cpu ( cpu ) - while ( timers[cpu].running == timer ) + while ( per_cpu(timers, cpu).running == timer ) cpu_relax(); } static void timer_softirq_action(void) { - int cpu = smp_processor_id(); - struct timer *t, **heap; - s_time_t now; - void (*fn)(void *); - void *data; - - spin_lock_irq(&timers[cpu].lock); + struct timer *t, **heap; + struct timers *ts; + s_time_t now; + void (*fn)(void *); + void *data; + + ts = &this_cpu(timers); + + spin_lock_irq(&ts->lock); do { - heap = timers[cpu].heap; + heap = ts->heap; now = NOW(); while ( (GET_HEAP_SIZE(heap) != 0) && @@ -306,24 +309,24 @@ static void timer_softirq_action(void) { remove_entry(heap, t); - timers[cpu].running = t; + ts->running = t; fn = t->function; data = t->data; - spin_unlock_irq(&timers[cpu].lock); + spin_unlock_irq(&ts->lock); (*fn)(data); - spin_lock_irq(&timers[cpu].lock); + spin_lock_irq(&ts->lock); /* Heap may have grown while the lock was released. */ - heap = timers[cpu].heap; + heap = ts->heap; } - timers[cpu].running = NULL; + ts->running = NULL; } while ( !reprogram_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) ); - spin_unlock_irq(&timers[cpu].lock); + spin_unlock_irq(&ts->lock); } @@ -338,25 +341,28 @@ void process_pending_timers(void) static void dump_timerq(unsigned char key) { - struct timer *t; - unsigned long flags; - s_time_t now = NOW(); - int i, j; + struct timer *t; + struct timers *ts; + unsigned long flags; + s_time_t now = NOW(); + int i, j; printk("Dumping timer queues: NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); for_each_online_cpu( i ) { + ts = &per_cpu(timers, i); + printk("CPU[%02d] ", i); - spin_lock_irqsave(&timers[i].lock, flags); - for ( j = 1; j <= GET_HEAP_SIZE(timers[i].heap); j++ ) + spin_lock_irqsave(&ts->lock, flags); + for ( j = 1; j <= GET_HEAP_SIZE(ts->heap); j++ ) { - t = timers[i].heap[j]; + t = ts->heap[j]; printk (" %d : %p ex=0x%08X%08X %p\n", j, t, (u32)(t->expires>>32), (u32)t->expires, t->data); } - spin_unlock_irqrestore(&timers[i].lock, flags); + spin_unlock_irqrestore(&ts->lock, flags); printk("\n"); } } @@ -378,8 +384,8 @@ void __init timer_init(void) for ( i = 0; i < NR_CPUS; i++ ) { - spin_lock_init(&timers[i].lock); - timers[i].heap = &dummy_heap; + spin_lock_init(&per_cpu(timers, i).lock); + per_cpu(timers, i).heap = &dummy_heap; } register_keyhandler('a', dump_timerq, "dump timer queues"); diff -r e4f1519b473f -r b60ea69932b1 xen/common/trace.c --- a/xen/common/trace.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/common/trace.c Wed Aug 09 18:04:20 2006 +0100 @@ -39,8 +39,8 @@ integer_param("tbuf_size", opt_tbuf_size integer_param("tbuf_size", opt_tbuf_size); /* Pointers to the meta-data objects for all system trace buffers */ -static struct t_buf *t_bufs[NR_CPUS]; -static struct t_rec *t_recs[NR_CPUS]; +static DEFINE_PER_CPU(struct t_buf *, t_bufs); +static DEFINE_PER_CPU(struct t_rec *, t_recs); static int nr_recs; /* High water mark for trace buffers; */ @@ -105,9 +105,10 @@ static int alloc_trace_bufs(void) for_each_online_cpu ( i ) { - buf = t_bufs[i] = (struct t_buf *)&rawbuf[i*opt_tbuf_size*PAGE_SIZE]; + buf = per_cpu(t_bufs, i) = (struct t_buf *) + &rawbuf[i*opt_tbuf_size*PAGE_SIZE]; buf->cons = buf->prod = 0; - t_recs[i] = (struct t_rec *)(buf + 1); + per_cpu(t_recs, i) = (struct t_rec *)(buf + 1); } t_buf_highwater = nr_recs >> 1; /* 50% high water */ @@ -186,7 +187,7 @@ int tb_control(dom0_tbufcontrol_t *tbc) case DOM0_TBUF_GET_INFO: tbc->cpu_mask = tb_cpu_mask; tbc->evt_mask = tb_event_mask; - tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(t_bufs[0]) : 0UL; + tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(per_cpu(t_bufs, 0)) : 0; tbc->size = opt_tbuf_size * PAGE_SIZE; break; case DOM0_TBUF_SET_CPU_MASK: @@ -258,7 +259,7 @@ void trace(u32 event, unsigned long d1, /* Read tb_init_done /before/ t_bufs. */ rmb(); - buf = t_bufs[smp_processor_id()]; + buf = this_cpu(t_bufs); local_irq_save(flags); @@ -272,7 +273,7 @@ void trace(u32 event, unsigned long d1, if ( unlikely(this_cpu(lost_records) != 0) ) { - rec = &t_recs[smp_processor_id()][buf->prod % nr_recs]; + rec = &this_cpu(t_recs)[buf->prod % nr_recs]; memset(rec, 0, sizeof(*rec)); rec->cycles = (u64)get_cycles(); rec->event = TRC_LOST_RECORDS; @@ -283,7 +284,7 @@ void trace(u32 event, unsigned long d1, buf->prod++; } - rec = &t_recs[smp_processor_id()][buf->prod % nr_recs]; + rec = &this_cpu(t_recs)[buf->prod % nr_recs]; rec->cycles = (u64)get_cycles(); rec->event = event; rec->data[0] = d1; diff -r e4f1519b473f -r b60ea69932b1 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/drivers/char/console.c Wed Aug 09 18:04:20 2006 +0100 @@ -716,7 +716,6 @@ void panic(const char *fmt, ...) char buf[128]; unsigned long flags; static DEFINE_SPINLOCK(lock); - extern void machine_restart(char *); debugtrace_dump(); diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-ia64/linux-xen/asm/cache.h --- a/xen/include/asm-ia64/linux-xen/asm/cache.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-ia64/linux-xen/asm/cache.h Wed Aug 09 18:04:20 2006 +0100 @@ -32,4 +32,6 @@ #endif #endif +#define __read_mostly + #endif /* _ASM_IA64_CACHE_H */ diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-powerpc/cache.h --- a/xen/include/asm-powerpc/cache.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-powerpc/cache.h Wed Aug 09 18:04:20 2006 +0100 @@ -57,4 +57,6 @@ static __inline__ void synchronize_cache isync(); } +#define __read_mostly + #endif diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-powerpc/flushtlb.h --- a/xen/include/asm-powerpc/flushtlb.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-powerpc/flushtlb.h Wed Aug 09 18:04:20 2006 +0100 @@ -22,6 +22,7 @@ #define _ASM_FLUSHTLB_H_ #include <xen/config.h> +#include <xen/percpu.h> #include <xen/types.h> #include <asm/misc.h> @@ -30,7 +31,7 @@ extern u32 tlbflush_clock; #define tlbflush_current_time() tlbflush_clock /* Time at which each CPU's TLB was last flushed. */ -extern u32 tlbflush_time[NR_CPUS]; +DECLARE_PER_CPU(u32, tlbflush_time); static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp) { @@ -44,12 +45,12 @@ static inline int NEED_FLUSH(u32 cpu_sta * Filter the given set of CPUs, removing those that definitely flushed their * TLB since @page_timestamp. */ -#define tlbflush_filter(mask, page_timestamp) \ -do { \ - unsigned int cpu; \ - for_each_cpu_mask ( cpu, mask ) \ - if ( !NEED_FLUSH(tlbflush_time[cpu], page_timestamp) ) \ - cpu_clear(cpu, mask); \ +#define tlbflush_filter(mask, page_timestamp) \ +do { \ + unsigned int cpu; \ + for_each_cpu_mask ( cpu, mask ) \ + if ( !NEED_FLUSH(per_cpu(tlbflush_time, cpu), page_timestamp) ) \ + cpu_clear(cpu, mask); \ } while ( 0 ) diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/cache.h --- a/xen/include/asm-x86/cache.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/cache.h Wed Aug 09 18:04:20 2006 +0100 @@ -10,4 +10,6 @@ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) + #endif diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/current.h --- a/xen/include/asm-x86/current.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/current.h Wed Aug 09 18:04:20 2006 +0100 @@ -8,6 +8,7 @@ #define __X86_CURRENT_H__ #include <xen/config.h> +#include <xen/percpu.h> #include <public/xen.h> #include <asm/page.h> @@ -53,7 +54,6 @@ static inline struct cpu_info *get_cpu_i #define schedule_tail(vcpu) (((vcpu)->arch.schedule_tail)(vcpu)) -#include <xen/percpu.h> /* * Which VCPU's state is currently running on each CPU? * This is not necesasrily the same as 'current' as a CPU may be diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/e820.h --- a/xen/include/asm-x86/e820.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/e820.h Wed Aug 09 18:04:20 2006 +0100 @@ -12,6 +12,7 @@ #define E820_IO 16 #define E820_SHARED_PAGE 17 #define E820_XENSTORE 18 +#define E820_BUFFERED_IO 19 #define E820_MAP_PAGE 0x00090000 #define E820_MAP_NR_OFFSET 0x000001E8 diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/flushtlb.h --- a/xen/include/asm-x86/flushtlb.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/flushtlb.h Wed Aug 09 18:04:20 2006 +0100 @@ -11,13 +11,15 @@ #define __FLUSHTLB_H__ #include <xen/config.h> +#include <xen/percpu.h> #include <xen/smp.h> +#include <xen/types.h> /* The current time as shown by the virtual TLB clock. */ extern u32 tlbflush_clock; /* Time at which each CPU's TLB was last flushed. */ -extern u32 tlbflush_time[NR_CPUS]; +DECLARE_PER_CPU(u32, tlbflush_time); #define tlbflush_current_time() tlbflush_clock @@ -47,12 +49,12 @@ static inline int NEED_FLUSH(u32 cpu_sta * Filter the given set of CPUs, removing those that definitely flushed their * TLB since @page_timestamp. */ -#define tlbflush_filter(mask, page_timestamp) \ -do { \ - unsigned int cpu; \ - for_each_cpu_mask ( cpu, mask ) \ - if ( !NEED_FLUSH(tlbflush_time[cpu], page_timestamp) ) \ - cpu_clear(cpu, mask); \ +#define tlbflush_filter(mask, page_timestamp) \ +do { \ + unsigned int cpu; \ + for_each_cpu_mask ( cpu, mask ) \ + if ( !NEED_FLUSH(per_cpu(tlbflush_time, cpu), page_timestamp) ) \ + cpu_clear(cpu, mask); \ } while ( 0 ) extern void new_tlbflush_clock_period(void); diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/hvm/domain.h Wed Aug 09 18:04:20 2006 +0100 @@ -33,6 +33,8 @@ struct hvm_domain { unsigned long shared_page_va; + unsigned long buffered_io_va; + spinlock_t buffered_io_lock; s64 tsc_frequency; struct pl_time pl_time; diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/hvm/hvm.h Wed Aug 09 18:04:20 2006 +0100 @@ -77,12 +77,20 @@ hvm_disable(void) hvm_funcs.disable(); } +void hvm_create_event_channels(struct vcpu *v); +void hvm_map_io_shared_pages(struct vcpu *v); + static inline int hvm_initialize_guest_resources(struct vcpu *v) { + int ret = 1; if ( hvm_funcs.initialize_guest_resources ) - return hvm_funcs.initialize_guest_resources(v); - return 0; + ret = hvm_funcs.initialize_guest_resources(v); + if ( ret == 1 ) { + hvm_map_io_shared_pages(v); + hvm_create_event_channels(v); + } + return ret; } static inline void diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/hvm/io.h Wed Aug 09 18:04:20 2006 +0100 @@ -150,13 +150,12 @@ static inline int irq_masked(unsigned lo #endif extern void handle_mmio(unsigned long, unsigned long); -extern void hvm_wait_io(void); -extern void hvm_safe_block(void); extern void hvm_io_assist(struct vcpu *v); extern void pic_irq_request(void *data, int level); extern void hvm_pic_assist(struct vcpu *v); extern int cpu_get_interrupt(struct vcpu *v, int *type); extern int cpu_has_pending_irq(struct vcpu *v); +extern void hvm_release_assist_channel(struct vcpu *v); // XXX - think about this, maybe use bit 30 of the mfn to signify an MMIO frame. #define mmio_space(gpa) (!VALID_MFN(get_mfn_from_gpfn((gpa) >> PAGE_SHIFT))) diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/hvm/support.h Wed Aug 09 18:04:20 2006 +0100 @@ -42,11 +42,6 @@ static inline vcpu_iodata_t *get_vio(str static inline vcpu_iodata_t *get_vio(struct domain *d, unsigned long cpu) { return &get_sp(d)->vcpu_iodata[cpu]; -} - -static inline int iopacket_port(struct vcpu *v) -{ - return get_vio(v->domain, v->vcpu_id)->vp_eport; } /* XXX these are really VMX specific */ @@ -144,10 +139,13 @@ extern void hvm_setup_platform(struct do extern void hvm_setup_platform(struct domain* d); extern int hvm_mmio_intercept(ioreq_t *p); extern int hvm_io_intercept(ioreq_t *p, int type); +extern int hvm_buffered_io_intercept(ioreq_t *p); extern void hvm_hooks_assist(struct vcpu *v); extern void hvm_print_line(struct vcpu *v, const char c); extern void hlt_timer_fn(void *data); void hvm_do_hypercall(struct cpu_user_regs *pregs); +void hvm_prod_vcpu(struct vcpu *v); + #endif /* __ASM_X86_HVM_SUPPORT_H__ */ diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/hvm/vcpu.h Wed Aug 09 18:04:20 2006 +0100 @@ -38,6 +38,8 @@ struct hvm_vcpu { /* For AP startup */ unsigned long init_sipi_sipi_state; + int xen_port; + /* Flags */ int flag_dr_dirty; diff -r e4f1519b473f -r b60ea69932b1 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Aug 09 18:04:20 2006 +0100 @@ -40,82 +40,91 @@ extern unsigned int cpu_rev; * Need fill bits for SENTER */ -#define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x00000016 - -#define MONITOR_PIN_BASED_EXEC_CONTROLS \ - ( \ - MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE | \ - PIN_BASED_EXT_INTR_MASK | \ - PIN_BASED_NMI_EXITING \ - ) - -#define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x0401e172 - -#define _MONITOR_CPU_BASED_EXEC_CONTROLS \ - ( \ +#define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x00000016 + +#define MONITOR_PIN_BASED_EXEC_CONTROLS \ + ( \ + MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE | \ + PIN_BASED_EXT_INTR_MASK | \ + PIN_BASED_NMI_EXITING \ + ) + +#define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x0401e172 + +#define _MONITOR_CPU_BASED_EXEC_CONTROLS \ + ( \ MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE | \ - CPU_BASED_HLT_EXITING | \ - CPU_BASED_INVDPG_EXITING | \ - CPU_BASED_MWAIT_EXITING | \ - CPU_BASED_MOV_DR_EXITING | \ - CPU_BASED_ACTIVATE_IO_BITMAP | \ - CPU_BASED_USE_TSC_OFFSETING \ - ) - -#define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ - ( \ - CPU_BASED_CR8_LOAD_EXITING | \ - CPU_BASED_CR8_STORE_EXITING \ - ) - -#define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE 0x0003edff - -#define MONITOR_VM_EXIT_CONTROLS_IA32E_MODE 0x00000200 - -#define _MONITOR_VM_EXIT_CONTROLS \ - ( \ - MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE |\ - VM_EXIT_ACK_INTR_ON_EXIT \ + CPU_BASED_HLT_EXITING | \ + CPU_BASED_INVDPG_EXITING | \ + CPU_BASED_MWAIT_EXITING | \ + CPU_BASED_MOV_DR_EXITING | \ + CPU_BASED_ACTIVATE_IO_BITMAP | \ + CPU_BASED_USE_TSC_OFFSETING \ + ) + +#define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ + ( \ + CPU_BASED_CR8_LOAD_EXITING | \ + CPU_BASED_CR8_STORE_EXITING \ + ) + +#define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE 0x0003edff + +#define MONITOR_VM_EXIT_CONTROLS_IA32E_MODE 0x00000200 + +#define _MONITOR_VM_EXIT_CONTROLS \ + ( \ + MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE | \ + VM_EXIT_ACK_INTR_ON_EXIT \ ) #if defined (__x86_64__) -#define MONITOR_CPU_BASED_EXEC_CONTROLS \ - ( \ - _MONITOR_CPU_BASED_EXEC_CONTROLS | \ - MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ - ) -#define MONITOR_VM_EXIT_CONTROLS \ - ( \ - _MONITOR_VM_EXIT_CONTROLS | \ - MONITOR_VM_EXIT_CONTROLS_IA32E_MODE \ +#define MONITOR_CPU_BASED_EXEC_CONTROLS \ + ( \ + _MONITOR_CPU_BASED_EXEC_CONTROLS | \ + MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ + ) +#define MONITOR_VM_EXIT_CONTROLS \ + ( \ + _MONITOR_VM_EXIT_CONTROLS | \ + MONITOR_VM_EXIT_CONTROLS_IA32E_MODE \ ) #else -#define MONITOR_CPU_BASED_EXEC_CONTROLS \ - _MONITOR_CPU_BASED_EXEC_CONTROLS - -#define MONITOR_VM_EXIT_CONTROLS \ +#define MONITOR_CPU_BASED_EXEC_CONTROLS \ + _MONITOR_CPU_BASED_EXEC_CONTROLS + +#define MONITOR_VM_EXIT_CONTROLS \ _MONITOR_VM_EXIT_CONTROLS #endif -#define VM_ENTRY_CONTROLS_RESERVED_VALUE 0x000011ff -#define VM_ENTRY_CONTROLS_IA32E_MODE 0x00000200 -#define MONITOR_VM_ENTRY_CONTROLS VM_ENTRY_CONTROLS_RESERVED_VALUE +#define VM_ENTRY_CONTROLS_RESERVED_VALUE 0x000011ff +#define VM_ENTRY_CONTROLS_IA32E_MODE 0x00000200 + +#define MONITOR_VM_ENTRY_CONTROLS \ + VM_ENTRY_CONTROLS_RESERVED_VALUE + /* * Exit Reasons */ -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 - +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_INIT 3 +#define EXIT_REASON_SIPI 4 +#define EXIT_REASON_IO_SMI 5 +#define EXIT_REASON_OTHER_SMI 6 #define EXIT_REASON_PENDING_INTERRUPT 7 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 #define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 #define EXIT_REASON_INVLPG 14 #define EXIT_REASON_RDPMC 15 #define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_RSM 17 #define EXIT_REASON_VMCALL 18 #define EXIT_REASON_VMCLEAR 19 #define EXIT_REASON_VMLAUNCH 20 @@ -124,19 +133,24 @@ extern unsigned int cpu_rev; #define EXIT_REASON_VMREAD 23 #define EXIT_REASON_VMRESUME 24 #define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 +#define EXIT_REASON_VMXOFF 26 +#define EXIT_REASON_VMXON 27 #define EXIT_REASON_CR_ACCESS 28 #define EXIT_REASON_DR_ACCESS 29 #define EXIT_REASON_IO_INSTRUCTION 30 #define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_INVALID_GUEST_STATE 33 #define EXIT_REASON_MSR_LOADING 34 + +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 + #define EXIT_REASON_MACHINE_CHECK 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 /* * Interruption-information format @@ -146,9 +160,9 @@ extern unsigned int cpu_rev; #define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ -#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ -#define INTR_TYPE_HW_EXCEPTION (3 << 8) /* hardware exception */ -#define INTR_TYPE_SW_EXCEPTION (6 << 8) /* software exception */ +#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_HW_EXCEPTION (3 << 8) /* hardware exception */ +#define INTR_TYPE_SW_EXCEPTION (6 << 8) /* software exception */ /* * Exit Qualifications for MOV for Control Register Access @@ -156,33 +170,33 @@ extern unsigned int cpu_rev; #define CONTROL_REG_ACCESS_NUM 0xf /* 3:0, number of control register */ #define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */ #define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose register */ -#define LMSW_SOURCE_DATA (0xFFFF << 16) /* 16:31 lmsw source */ -#define REG_EAX (0 << 8) -#define REG_ECX (1 << 8) -#define REG_EDX (2 << 8) -#define REG_EBX (3 << 8) -#define REG_ESP (4 << 8) -#define REG_EBP (5 << 8) -#define REG_ESI (6 << 8) -#define REG_EDI (7 << 8) -#define REG_R8 (8 << 8) -#define REG_R9 (9 << 8) -#define REG_R10 (10 << 8) -#define REG_R11 (11 << 8) -#define REG_R12 (12 << 8) -#define REG_R13 (13 << 8) -#define REG_R14 (14 << 8) -#define REG_R15 (15 << 8) +#define LMSW_SOURCE_DATA (0xFFFF << 16) /* 16:31 lmsw source */ +#define REG_EAX (0 << 8) +#define REG_ECX (1 << 8) +#define REG_EDX (2 << 8) +#define REG_EBX (3 << 8) +#define REG_ESP (4 << 8) +#define REG_EBP (5 << 8) +#define REG_ESI (6 << 8) +#define REG_EDI (7 << 8) +#define REG_R8 (8 << 8) +#define REG_R9 (9 << 8) +#define REG_R10 (10 << 8) +#define REG_R11 (11 << 8) +#define REG_R12 (12 << 8) +#define REG_R13 (13 << 8) +#define REG_R14 (14 << 8) +#define REG_R15 (15 << 8) /* * Exit Qualifications for MOV for Debug Register Access */ #define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug register */ #define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ -#define TYPE_MOV_TO_DR (0 << 4) +#define TYPE_MOV_TO_DR (0 << 4) #define TYPE_MOV_FROM_DR (1 << 4) #define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */ - + /* These bits in the CR4 are owned by the host */ #if CONFIG_PAGING_LEVELS >= 3 #define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE) @@ -212,7 +226,7 @@ static inline void __vmptrld(u64 addr) /* CF==1 or ZF==1 --> crash (ud2) */ "ja 1f ; ud2 ; 1:\n" : - : "a" (&addr) + : "a" (&addr) : "memory"); } @@ -221,7 +235,7 @@ static inline void __vmptrst(u64 addr) __asm__ __volatile__ ( VMPTRST_OPCODE MODRM_EAX_07 : - : "a" (&addr) + : "a" (&addr) : "memory"); } @@ -232,7 +246,7 @@ static inline void __vmpclear(u64 addr) /* CF==1 or ZF==1 --> crash (ud2) */ "ja 1f ; ud2 ; 1:\n" : - : "a" (&addr) + : "a" (&addr) : "memory"); } @@ -252,7 +266,7 @@ static always_inline int ___vmread( : "0" (0), "a" (field) : "memory"); - switch (size) { + switch ( size ) { case 1: *((u8 *) (ptr)) = ecx; break; @@ -274,43 +288,45 @@ static always_inline int ___vmread( } -static always_inline void __vmwrite_vcpu(struct vcpu *v, unsigned long field, unsigned long value) -{ - switch(field) { +static always_inline void __vmwrite_vcpu( + struct vcpu *v, unsigned long field, unsigned long value) +{ + switch ( field ) { case CR0_READ_SHADOW: - v->arch.hvm_vmx.cpu_shadow_cr0 = value; - break; + v->arch.hvm_vmx.cpu_shadow_cr0 = value; + break; case GUEST_CR0: - v->arch.hvm_vmx.cpu_cr0 = value; - break; + v->arch.hvm_vmx.cpu_cr0 = value; + break; case CPU_BASED_VM_EXEC_CONTROL: - v->arch.hvm_vmx.cpu_based_exec_control = value; - break; + v->arch.hvm_vmx.cpu_based_exec_control = value; + break; default: - printk("__vmwrite_cpu: invalid field %lx\n", field); - break; - } -} - -static always_inline void __vmread_vcpu(struct vcpu *v, unsigned long field, unsigned long *value) -{ - switch(field) { + printk("__vmwrite_cpu: invalid field %lx\n", field); + break; + } +} + +static always_inline void __vmread_vcpu( + struct vcpu *v, unsigned long field, unsigned long *value) +{ + switch ( field ) { case CR0_READ_SHADOW: - *value = v->arch.hvm_vmx.cpu_shadow_cr0; - break; + *value = v->arch.hvm_vmx.cpu_shadow_cr0; + break; case GUEST_CR0: - *value = v->arch.hvm_vmx.cpu_cr0; - break; + *value = v->arch.hvm_vmx.cpu_cr0; + break; case CPU_BASED_VM_EXEC_CONTROL: - *value = v->arch.hvm_vmx.cpu_based_exec_control; - break; + *value = v->arch.hvm_vmx.cpu_based_exec_control; + break; default: - printk("__vmread_cpu: invalid field %lx\n", field); - break; - } -} - -static inline int __vmwrite (unsigned long field, unsigned long value) + printk("__vmread_cpu: invalid field %lx\n", field); + break; + } +} + +static inline int __vmwrite(unsigned long field, unsigned long value) { struct vcpu *v = current; int rc; @@ -323,12 +339,12 @@ static inline int __vmwrite (unsigned lo : "0" (0), "a" (field) , "c" (value) : "memory"); - switch(field) { + switch ( field ) { case CR0_READ_SHADOW: case GUEST_CR0: case CPU_BASED_VM_EXEC_CONTROL: - __vmwrite_vcpu(v, field, value); - break; + __vmwrite_vcpu(v, field, value); + break; } return rc; @@ -336,31 +352,31 @@ static inline int __vmwrite (unsigned lo static inline int __vm_set_bit(unsigned long field, unsigned long mask) { - unsigned long tmp; - int err = 0; - - err |= __vmread(field, &tmp); - tmp |= mask; - err |= __vmwrite(field, tmp); - - return err; + unsigned long tmp; + int err = 0; + + err |= __vmread(field, &tmp); + tmp |= mask; + err |= __vmwrite(field, tmp); + + return err; } static inline int __vm_clear_bit(unsigned long field, unsigned long mask) { - unsigned long tmp; - int err = 0; - - err |= __vmread(field, &tmp); - tmp &= ~mask; - err |= __vmwrite(field, tmp); - - return err; + unsigned long tmp; + int err = 0; + + err |= __vmread(field, &tmp); + tmp &= ~mask; + err |= __vmwrite(field, tmp); + + return err; } static inline void __vmxoff (void) { - __asm__ __volatile__ ( VMXOFF_OPCODE + __asm__ __volatile__ ( VMXOFF_OPCODE ::: "memory"); } @@ -373,7 +389,7 @@ static inline int __vmxon (u64 addr) /* CF==1 or ZF==1 --> rc = -1 */ "setna %b0 ; neg %0" : "=q" (rc) - : "0" (0), "a" (&addr) + : "0" (0), "a" (&addr) : "memory"); return rc; @@ -390,9 +406,9 @@ static inline void vmx_stts(void) return; /* - * If the guest does not have TS enabled then we must cause and handle an - * exception on first use of the FPU. If the guest *does* have TS enabled - * then this is not necessary: no FPU activity can occur until the guest + * If the guest does not have TS enabled then we must cause and handle an + * exception on first use of the FPU. If the guest *does* have TS enabled + * then this is not necessary: no FPU activity can occur until the guest * clears CR0.TS, and we will initialise the FPU when that happens. */ __vmread_vcpu(v, CR0_READ_SHADOW, &cr0); @@ -421,66 +437,64 @@ static inline int vmx_pgbit_test(struct return (cr0 & X86_CR0_PG); } -static inline int __vmx_inject_exception(struct vcpu *v, int trap, int type, +static inline void __vmx_inject_exception(struct vcpu *v, int trap, int type, int error_code, int ilen) { unsigned long intr_fields; /* Reflect it back into the guest */ intr_fields = (INTR_INFO_VALID_MASK | type | trap); - if (error_code != VMX_DELIVER_NO_ERROR_CODE) { + if ( error_code != VMX_DELIVER_NO_ERROR_CODE ) { __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); intr_fields |= INTR_INFO_DELIVER_CODE_MASK; } - if(ilen) + if ( ilen ) __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen); __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields); - return 0; -} - -static inline int vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code) +} + +static inline void vmx_inject_hw_exception( + struct vcpu *v, int trap, int error_code) { v->arch.hvm_vmx.vector_injected = 1; - return __vmx_inject_exception(v, trap, INTR_TYPE_HW_EXCEPTION, - error_code, 0); -} - -static inline int vmx_inject_sw_exception(struct vcpu *v, int trap, int instruction_len) { - v->arch.hvm_vmx.vector_injected=1; - return __vmx_inject_exception(v, trap, INTR_TYPE_SW_EXCEPTION, - VMX_DELIVER_NO_ERROR_CODE, - instruction_len); -} - -static inline int vmx_inject_extint(struct vcpu *v, int trap, int error_code) + __vmx_inject_exception(v, trap, INTR_TYPE_HW_EXCEPTION, error_code, 0); +} + +static inline void vmx_inject_sw_exception( + struct vcpu *v, int trap, int instruction_len) +{ + v->arch.hvm_vmx.vector_injected = 1; + __vmx_inject_exception(v, trap, INTR_TYPE_SW_EXCEPTION, + VMX_DELIVER_NO_ERROR_CODE, + instruction_len); +} + +static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code) { __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0); __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); - - return 0; -} - -static inline int vmx_reflect_exception(struct vcpu *v) +} + +static inline void vmx_reflect_exception(struct vcpu *v) { int error_code, intr_info, vector; __vmread(VM_EXIT_INTR_INFO, &intr_info); vector = intr_info & 0xff; - if (intr_info & INTR_INFO_DELIVER_CODE_MASK) + if ( intr_info & INTR_INFO_DELIVER_CODE_MASK ) __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code); else error_code = VMX_DELIVER_NO_ERROR_CODE; #ifndef NDEBUG { - unsigned long eip; - - __vmread(GUEST_RIP, &eip); - HVM_DBG_LOG(DBG_LEVEL_1, - "vmx_reflect_exception: eip = %lx, error_code = %x", - eip, error_code); + unsigned long rip; + + __vmread(GUEST_RIP, &rip); + HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, error_code = %x", + rip, error_code); } #endif /* NDEBUG */ @@ -489,15 +503,14 @@ static inline int vmx_reflect_exception( 2.8.3, SW_EXCEPTION should be used for #BP and #OV, and HW_EXCPEPTION used for everything else. The main difference appears to be that for SW_EXCEPTION, the EIP/RIP is incremented - by VM_ENTER_INSTRUCTION_LEN bytes, whereas for HW_EXCEPTION, + by VM_ENTER_INSTRUCTION_LEN bytes, whereas for HW_EXCEPTION, it is not. */ - if((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_SW_EXCEPTION) { - int ilen; - __vmread(VM_EXIT_INSTRUCTION_LEN, &ilen); - vmx_inject_sw_exception(v, vector, ilen); + if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_SW_EXCEPTION ) { + int ilen; + __vmread(VM_EXIT_INSTRUCTION_LEN, &ilen); + vmx_inject_sw_exception(v, vector, ilen); } else - vmx_inject_hw_exception(v, vector, error_code); - return 0; + vmx_inject_hw_exception(v, vector, error_code); } #endif /* __ASM_X86_HVM_VMX_VMX_H__ */ diff -r e4f1519b473f -r b60ea69932b1 xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/public/hvm/ioreq.h Wed Aug 09 18:04:20 2006 +0100 @@ -69,7 +69,6 @@ struct vcpu_iodata { struct ioreq vp_ioreq; /* Event channel port */ unsigned int vp_eport; /* VMX vcpu uses this to notify DM */ - unsigned int dm_eport; /* DM uses this to notify VMX vcpu */ }; typedef struct vcpu_iodata vcpu_iodata_t; @@ -78,6 +77,14 @@ struct shared_iopage { struct vcpu_iodata vcpu_iodata[1]; }; typedef struct shared_iopage shared_iopage_t; + +#define IOREQ_BUFFER_SLOT_NUM 80 +struct buffered_iopage { + unsigned long read_pointer; + unsigned long write_pointer; + ioreq_t ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* sizeof this structure must be in one page */ +typedef struct buffered_iopage buffered_iopage_t; #endif /* _IOREQ_H_ */ diff -r e4f1519b473f -r b60ea69932b1 xen/include/xen/config.h --- a/xen/include/xen/config.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/xen/config.h Wed Aug 09 18:04:20 2006 +0100 @@ -50,6 +50,5 @@ #endif /* !__ASSEMBLY__ */ #define fastcall -#define __read_mostly #endif /* __XEN_CONFIG_H__ */ diff -r e4f1519b473f -r b60ea69932b1 xen/include/xen/event.h --- a/xen/include/xen/event.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/xen/event.h Wed Aug 09 18:04:20 2006 +0100 @@ -12,6 +12,7 @@ #include <xen/config.h> #include <xen/sched.h> #include <xen/smp.h> +#include <xen/softirq.h> #include <asm/bitops.h> #include <asm/event.h> diff -r e4f1519b473f -r b60ea69932b1 xen/include/xen/multicall.h --- a/xen/include/xen/multicall.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/xen/multicall.h Wed Aug 09 18:04:20 2006 +0100 @@ -5,6 +5,7 @@ #ifndef __XEN_MULTICALL_H__ #define __XEN_MULTICALL_H__ +#include <xen/percpu.h> #include <asm/multicall.h> #define _MCSF_in_multicall 0 @@ -14,8 +15,8 @@ struct mc_state { struct mc_state { unsigned long flags; struct multicall_entry call; -} __cacheline_aligned; +}; -extern struct mc_state mc_state[NR_CPUS]; +DECLARE_PER_CPU(struct mc_state, mc_state); #endif /* __XEN_MULTICALL_H__ */ diff -r e4f1519b473f -r b60ea69932b1 xen/include/xen/percpu.h --- a/xen/include/xen/percpu.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/xen/percpu.h Wed Aug 09 18:04:20 2006 +0100 @@ -2,7 +2,6 @@ #define __XEN_PERCPU_H__ #include <xen/config.h> -#include <xen/smp.h> #include <asm/percpu.h> /* Preferred on Xen. Also see arch-defined per_cpu(). */ diff -r e4f1519b473f -r b60ea69932b1 xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Tue Aug 08 19:07:32 2006 -0500 +++ b/xen/include/xen/sched-if.h Wed Aug 09 18:04:20 2006 +0100 @@ -8,6 +8,8 @@ #ifndef __XEN_SCHED_IF_H__ #define __XEN_SCHED_IF_H__ +#include <xen/percpu.h> + struct schedule_data { spinlock_t schedule_lock; /* spinlock protecting curr */ struct vcpu *curr; /* current task */ @@ -17,7 +19,7 @@ struct schedule_data { unsigned long tick; /* current periodic 'tick' */ } __cacheline_aligned; -extern struct schedule_data schedule_data[]; +DECLARE_PER_CPU(struct schedule_data, schedule_data); static inline void vcpu_schedule_lock(struct vcpu *v) { @@ -26,10 +28,10 @@ static inline void vcpu_schedule_lock(st for ( ; ; ) { cpu = v->processor; - spin_lock(&schedule_data[cpu].schedule_lock); + spin_lock(&per_cpu(schedule_data, cpu).schedule_lock); if ( likely(v->processor == cpu) ) break; - spin_unlock(&schedule_data[cpu].schedule_lock); + spin_unlock(&per_cpu(schedule_data, cpu).schedule_lock); } } @@ -40,7 +42,7 @@ static inline void vcpu_schedule_lock(st static inline void vcpu_schedule_unlock(struct vcpu *v) { - spin_unlock(&schedule_data[v->processor].schedule_lock); + spin_unlock(&per_cpu(schedule_data, v->processor).schedule_lock); } #define vcpu_schedule_unlock_irq(v) \ diff -r e4f1519b473f -r b60ea69932b1 linux-2.6-xen-sparse/drivers/xen/pciback/slot.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/slot.c Wed Aug 09 18:04:20 2006 +0100 @@ -0,0 +1,151 @@ +/* + * PCI Backend - Provides a Virtual PCI bus (with real devices) + * to the frontend + * + * Author: Ryan Wilson <hap9@xxxxxxxxxxxxxx> (vpci.c) + * Author: Tristan Gingold <tristan.gingold@xxxxxxxx>, from vpci.c + */ + +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include "pciback.h" + +/* There are at most 32 slots in a pci bus. */ +#define PCI_SLOT_MAX 32 + +#define PCI_BUS_NBR 2 + +struct slot_dev_data { + /* Access to dev_list must be protected by lock */ + struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX]; + spinlock_t lock; +}; + +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn) +{ + struct pci_dev *dev = NULL; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + unsigned long flags; + + if (domain != 0 || PCI_FUNC(devfn) != 0) + return NULL; + + if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR) + return NULL; + + spin_lock_irqsave(&slot_dev->lock, flags); + dev = slot_dev->slots[bus][PCI_SLOT(devfn)]; + spin_unlock_irqrestore(&slot_dev->lock, flags); + + return dev; +} + +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +{ + int err = 0, slot, bus; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + unsigned long flags; + + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { + err = -EFAULT; + xenbus_dev_fatal(pdev->xdev, err, + "Can't export bridges on the virtual PCI bus"); + goto out; + } + + spin_lock_irqsave(&slot_dev->lock, flags); + + /* Assign to a new slot on the virtual PCI bus */ + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (slot_dev->slots[bus][slot] == NULL) { + printk(KERN_INFO + "pciback: slot: %s: assign to virtual slot %d, bus %d\n", + pci_name(dev), slot, bus); + slot_dev->slots[bus][slot] = dev; + goto unlock; + } + } + + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "No more space on root virtual PCI bus"); + + unlock: + spin_unlock_irqrestore(&slot_dev->lock, flags); + out: + return err; +} + +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +{ + int slot, bus; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&slot_dev->lock, flags); + + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (slot_dev->slots[bus][slot] == dev) { + slot_dev->slots[bus][slot] = NULL; + found_dev = dev; + goto out; + } + } + + out: + spin_unlock_irqrestore(&slot_dev->lock, flags); + + if (found_dev) + pcistub_put_pci_dev(found_dev); +} + +int pciback_init_devices(struct pciback_device *pdev) +{ + int slot, bus; + struct slot_dev_data *slot_dev; + + slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL); + if (!slot_dev) + return -ENOMEM; + + spin_lock_init(&slot_dev->lock); + + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) + slot_dev->slots[bus][slot] = NULL; + + pdev->pci_dev_data = slot_dev; + + return 0; +} + +int pciback_publish_pci_roots(struct pciback_device *pdev, + publish_pci_root_cb publish_cb) +{ + /* The Virtual PCI bus has only one root */ + return publish_cb(pdev, 0, 0); +} + +void pciback_release_devices(struct pciback_device *pdev) +{ + int slot, bus; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + struct pci_dev *dev; + + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + dev = slot_dev->slots[bus][slot]; + if (dev != NULL) + pcistub_put_pci_dev(dev); + } + + kfree(slot_dev); + pdev->pci_dev_data = NULL; +} diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/vnc-display-find-unused --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ioemu/patches/vnc-display-find-unused Wed Aug 09 18:04:20 2006 +0100 @@ -0,0 +1,128 @@ +Index: ioemu/vnc.c +=================================================================== +--- ioemu.orig/vnc.c 2006-08-09 14:44:44.721942535 +0100 ++++ ioemu/vnc.c 2006-08-09 14:52:37.262165292 +0100 +@@ -1183,7 +1183,7 @@ + } + } + +-void vnc_display_init(DisplayState *ds, int display) ++int vnc_display_init(DisplayState *ds, int display, int find_unused) + { + struct sockaddr_in addr; + int reuse_addr, ret; +@@ -1214,10 +1214,6 @@ + exit(1); + } + +- addr.sin_family = AF_INET; +- addr.sin_port = htons(5900 + display); +- memset(&addr.sin_addr, 0, sizeof(addr.sin_addr)); +- + reuse_addr = 1; + ret = setsockopt(vs->lsock, SOL_SOCKET, SO_REUSEADDR, + (const char *)&reuse_addr, sizeof(reuse_addr)); +@@ -1226,7 +1222,16 @@ + exit(1); + } + ++ retry: ++ addr.sin_family = AF_INET; ++ addr.sin_port = htons(5900 + display); ++ memset(&addr.sin_addr, 0, sizeof(addr.sin_addr)); ++ + if (bind(vs->lsock, (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ if (find_unused && errno == EADDRINUSE) { ++ display++; ++ goto retry; ++ } + fprintf(stderr, "bind() failed\n"); + exit(1); + } +@@ -1247,6 +1252,8 @@ + vs->ds->dpy_refresh = vnc_dpy_refresh; + + vnc_dpy_resize(vs->ds, 640, 400); ++ ++ return display; + } + + int vnc_start_viewer(int port) +Index: ioemu/vl.c +=================================================================== +--- ioemu.orig/vl.c 2006-08-09 14:44:44.721942535 +0100 ++++ ioemu/vl.c 2006-08-09 14:52:06.783905832 +0100 +@@ -121,6 +121,7 @@ + static DisplayState display_state; + int nographic; + int vncviewer; ++int vncunused; + const char* keyboard_layout = NULL; + int64_t ticks_per_sec; + int boot_device = 'c'; +@@ -5342,6 +5343,7 @@ + "-loadvm file start right away with a saved state (loadvm in monitor)\n" + "-vnc display start a VNC server on display\n" + "-vncviewer start a vncviewer process for this domain\n" ++ "-vncunused bind the VNC server to an unused port\n" + "-timeoffset time offset (in seconds) from local time\n" + "-acpi disable or enable ACPI of HVM domain \n" + "\n" +@@ -5431,6 +5433,7 @@ + QEMU_OPTION_timeoffset, + QEMU_OPTION_acpi, + QEMU_OPTION_vncviewer, ++ QEMU_OPTION_vncunused, + }; + + typedef struct QEMUOption { +@@ -5506,6 +5509,7 @@ + { "smp", HAS_ARG, QEMU_OPTION_smp }, + { "vnc", HAS_ARG, QEMU_OPTION_vnc }, + { "vncviewer", 0, QEMU_OPTION_vncviewer }, ++ { "vncunused", 0, QEMU_OPTION_vncunused }, + + /* temporary options */ + { "usb", 0, QEMU_OPTION_usb }, +@@ -5873,6 +5877,7 @@ + snapshot = 0; + nographic = 0; + vncviewer = 0; ++ vncunused = 0; + kernel_filename = NULL; + kernel_cmdline = ""; + #ifdef TARGET_PPC +@@ -6270,6 +6275,11 @@ + case QEMU_OPTION_vncviewer: + vncviewer++; + break; ++ case QEMU_OPTION_vncunused: ++ vncunused++; ++ if (vnc_display == -1) ++ vnc_display = -2; ++ break; + } + } + } +@@ -6465,7 +6475,7 @@ + if (nographic) { + dumb_display_init(ds); + } else if (vnc_display != -1) { +- vnc_display_init(ds, vnc_display); ++ vnc_display = vnc_display_init(ds, vnc_display, vncunused); + if (vncviewer) + vnc_start_viewer(vnc_display); + } else { +Index: ioemu/vl.h +=================================================================== +--- ioemu.orig/vl.h 2006-08-09 14:44:44.721942535 +0100 ++++ ioemu/vl.h 2006-08-09 14:52:06.783905832 +0100 +@@ -784,7 +784,7 @@ + void cocoa_display_init(DisplayState *ds, int full_screen); + + /* vnc.c */ +-void vnc_display_init(DisplayState *ds, int display); ++int vnc_display_init(DisplayState *ds, int display, int find_unused); + int vnc_start_viewer(int port); + + /* ide.c */ diff -r e4f1519b473f -r b60ea69932b1 tools/ioemu/patches/xen-support-buffered-ioreqs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ioemu/patches/xen-support-buffered-ioreqs Wed Aug 09 18:04:20 2006 +0100 @@ -0,0 +1,150 @@ +Index: ioemu/vl.c +=================================================================== +--- ioemu.orig/vl.c 2006-08-09 15:04:25.583508863 +0100 ++++ ioemu/vl.c 2006-08-09 15:04:26.034465993 +0100 +@@ -5838,6 +5838,7 @@ + unsigned long nr_pages; + xen_pfn_t *page_array; + extern void *shared_page; ++ extern void *buffered_io_page; + + char qemu_dm_logfilename[64]; + +@@ -6388,12 +6389,17 @@ + + phys_ram_base = xc_map_foreign_batch(xc_handle, domid, + PROT_READ|PROT_WRITE, page_array, +- nr_pages - 1); ++ nr_pages - 3); + if (phys_ram_base == 0) { + fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno); + exit(-1); + } + ++ /* not yet add for IA64 */ ++ buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, ++ PROT_READ|PROT_WRITE, ++ page_array[nr_pages - 3]); ++ + shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, + page_array[nr_pages - 1]); +Index: ioemu/target-i386-dm/helper2.c +=================================================================== +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-09 15:04:24.105649313 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-09 15:04:26.040465422 +0100 +@@ -76,6 +76,10 @@ + + shared_iopage_t *shared_page = NULL; + ++#define BUFFER_IO_MAX_DELAY 100 ++buffered_iopage_t *buffered_io_page = NULL; ++QEMUTimer *buffered_io_timer; ++ + /* the evtchn fd for polling */ + int xce_handle = -1; + +@@ -419,36 +423,68 @@ + req->u.data = tmp1; + } + ++void __handle_ioreq(CPUState *env, ioreq_t *req) ++{ ++ if (!req->pdata_valid && req->dir == IOREQ_WRITE && req->size != 4) ++ req->u.data &= (1UL << (8 * req->size)) - 1; ++ ++ switch (req->type) { ++ case IOREQ_TYPE_PIO: ++ cpu_ioreq_pio(env, req); ++ break; ++ case IOREQ_TYPE_COPY: ++ cpu_ioreq_move(env, req); ++ break; ++ case IOREQ_TYPE_AND: ++ cpu_ioreq_and(env, req); ++ break; ++ case IOREQ_TYPE_OR: ++ cpu_ioreq_or(env, req); ++ break; ++ case IOREQ_TYPE_XOR: ++ cpu_ioreq_xor(env, req); ++ break; ++ default: ++ hw_error("Invalid ioreq type 0x%x\n", req->type); ++ } ++} ++ ++void __handle_buffered_iopage(CPUState *env) ++{ ++ ioreq_t *req = NULL; ++ ++ if (!buffered_io_page) ++ return; ++ ++ while (buffered_io_page->read_pointer != ++ buffered_io_page->write_pointer) { ++ req = &buffered_io_page->ioreq[buffered_io_page->read_pointer % ++ IOREQ_BUFFER_SLOT_NUM]; ++ ++ __handle_ioreq(env, req); ++ ++ mb(); ++ buffered_io_page->read_pointer++; ++ } ++} ++ ++void handle_buffered_io(void *opaque) ++{ ++ CPUState *env = opaque; ++ ++ __handle_buffered_iopage(env); ++ qemu_mod_timer(buffered_io_timer, BUFFER_IO_MAX_DELAY + ++ qemu_get_clock(rt_clock)); ++} ++ + void cpu_handle_ioreq(void *opaque) + { + CPUState *env = opaque; + ioreq_t *req = cpu_get_ioreq(); + ++ handle_buffered_io(env); + if (req) { +- if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { +- if (req->size != 4) +- req->u.data &= (1UL << (8 * req->size))-1; +- } +- +- switch (req->type) { +- case IOREQ_TYPE_PIO: +- cpu_ioreq_pio(env, req); +- break; +- case IOREQ_TYPE_COPY: +- cpu_ioreq_move(env, req); +- break; +- case IOREQ_TYPE_AND: +- cpu_ioreq_and(env, req); +- break; +- case IOREQ_TYPE_OR: +- cpu_ioreq_or(env, req); +- break; +- case IOREQ_TYPE_XOR: +- cpu_ioreq_xor(env, req); +- break; +- default: +- hw_error("Invalid ioreq type 0x%x\n", req->type); +- } ++ __handle_ioreq(env, req); + + /* No state change if state = STATE_IORESP_HOOK */ + if (req->state == STATE_IOREQ_INPROCESS) { +@@ -466,6 +502,10 @@ + CPUState *env = cpu_single_env; + int evtchn_fd = xc_evtchn_fd(xce_handle); + ++ buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io, ++ cpu_single_env); ++ qemu_mod_timer(buffered_io_timer, qemu_get_clock(rt_clock)); ++ + qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env); + + env->send_event = 0; diff -r e4f1519b473f -r b60ea69932b1 tools/python/xen/xend/tests/xend-config.sxp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/tests/xend-config.sxp Wed Aug 09 18:04:20 2006 +0100 @@ -0,0 +1,132 @@ +# -*- sh -*- + +# +# Xend configuration file. +# + +# This example configuration is appropriate for an installation that +# utilizes a bridged network configuration. Access to xend via http +# is disabled. + +# Commented out entries show the default for that entry, unless otherwise +# specified. + +#(logfile /var/log/xend.log) +#(loglevel DEBUG) + +#(xend-http-server no) +#(xend-unix-server no) +#(xend-tcp-xmlrpc-server no) +#(xend-unix-xmlrpc-server yes) +#(xend-relocation-server no) +(xend-relocation-server yes) + +#(xend-unix-path /var/lib/xend/xend-socket) + +# Port xend should use for the HTTP interface, if xend-http-server is set. +#(xend-port 8000) + +# Port xend should use for the relocation interface, if xend-relocation-server +# is set. +#(xend-relocation-port 8002) + +# Address xend should listen on for HTTP connections, if xend-http-server is +# set. +# Specifying 'localhost' prevents remote connections. +# Specifying the empty string '' (the default) allows all connections. +#(xend-address '') +#(xend-address localhost) + +# Address xend should listen on for relocation-socket connections, if +# xend-relocation-server is set. +# Meaning and default as for xend-address above. +#(xend-relocation-address '') + +# The hosts allowed to talk to the relocation port. If this is empty (the +# default), then all connections are allowed (assuming that the connection +# arrives on a port and interface on which we are listening; see +# xend-relocation-port and xend-relocation-address above). Otherwise, this +# should be a space-separated sequence of regular expressions. Any host with +# a fully-qualified domain name or an IP address that matches one of these +# regular expressions will be accepted. +# +# For example: +# (xend-relocation-hosts-allow '^localhost$ ^.*\\.example\\.org$') +# +#(xend-relocation-hosts-allow '') +(xend-relocation-hosts-allow '^localhost$ ^localhost\\.localdomain$') + +# The limit (in kilobytes) on the size of the console buffer +#(console-limit 1024) + +## +# To bridge network traffic, like this: +# +# dom0: fake eth0 -> vif0.0 -+ +# | +# bridge -> real eth0 -> the network +# | +# domU: fake eth0 -> vifN.0 -+ +# +# use +# +# (network-script network-bridge) +# +# Your default ethernet device is used as the outgoing interface, by default. +# To use a different one (e.g. eth1) use +# +# (network-script 'network-bridge netdev=eth1') +# +# The bridge is named xenbr0, by default. To rename the bridge, use +# +# (network-script 'network-bridge bridge=<name>') +# +# It is possible to use the network-bridge script in more complicated +# scenarios, such as having two outgoing interfaces, with two bridges, and +# two fake interfaces per guest domain. To do things like this, write +# yourself a wrapper script, and call network-bridge from it, as appropriate. +# +(network-script network-bridge) + +# The script used to control virtual interfaces. This can be overridden on a +# per-vif basis when creating a domain or a configuring a new vif. The +# vif-bridge script is designed for use with the network-bridge script, or +# similar configurations. +# +# If you have overridden the bridge name using +# (network-script 'network-bridge bridge=<name>') then you may wish to do the +# same here. The bridge name can also be set when creating a domain or +# configuring a new vif, but a value specified here would act as a default. +# +# If you are using only one bridge, the vif-bridge script will discover that, +# so there is no need to specify it explicitly. +# +(vif-script vif-bridge) + + +## Use the following if network traffic is routed, as an alternative to the +# settings for bridged networking given above. +#(network-script network-route) +#(vif-script vif-route) + + +## Use the following if network traffic is routed with NAT, as an alternative +# to the settings for bridged networking given above. +#(network-script network-nat) +#(vif-script vif-nat) + + +# Dom0 will balloon out when needed to free memory for domU. +# dom0-min-mem is the lowest memory level (in MB) dom0 will get down to. +# If dom0-min-mem=0, dom0 will never balloon out. +(dom0-min-mem 196) + +# In SMP system, dom0 will use dom0-cpus # of CPUS +# If dom0-cpus = 0, dom0 will take all cpus available +(dom0-cpus 0) + +# Whether to enable core-dumps when domains crash. +#(enable-dump no) + +# The tool used for initiating virtual TPM migration +#(external-migration-tool '') _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |