[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [merge] with xen-unstable
# HG changeset patch # User Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx> # Date 1169218137 0 # Node ID 3c8bb086025ee18f077582a5343da631c67fbaca # Parent 8475a4e0425ed158923d9849a8e5a6821e8bdb34 # Parent 3157835b1d45f7175aba2b4a98cac93f527d6b10 [merge] with xen-unstable Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx> --- tools/python/xen/xend/XendRoot.py | 322 - xen/arch/ia64/linux/cmdline.c | 121 xen/include/asm-ia64/linux-null/linux/ioport.h | 1 xen/include/asm-ia64/linux-xen/asm/sn/sn_sal.h | 994 ---- xen/include/asm-ia64/linux/asm/machvec.h | 390 - xen/include/asm-ia64/linux/asm/pci.h | 161 Config.mk | 5 buildconfigs/linux-defconfig_xen0_ia64 | 3 buildconfigs/linux-defconfig_xen0_x86_32 | 6 buildconfigs/linux-defconfig_xen0_x86_64 | 6 buildconfigs/linux-defconfig_xenU_ia64 | 3 buildconfigs/linux-defconfig_xenU_x86_32 | 2 buildconfigs/linux-defconfig_xenU_x86_64 | 2 buildconfigs/linux-defconfig_xen_ia64 | 3 buildconfigs/linux-defconfig_xen_x86_32 | 2 buildconfigs/linux-defconfig_xen_x86_64 | 2 buildconfigs/mk.linux-2.6-xen | 4 extras/mini-os/Makefile | 2 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h | 14 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h | 14 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c | 4 linux-2.6-xen-sparse/arch/ia64/Kconfig | 6 linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c | 1 linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S | 925 ++++ linux-2.6-xen-sparse/arch/ia64/kernel/gate.S | 24 linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S | 8 linux-2.6-xen-sparse/arch/ia64/kernel/patch.c | 4 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c | 10 linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S | 267 - linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 88 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S | 49 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S | 54 linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h | 11 linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S | 23 linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S | 7 linux-2.6-xen-sparse/drivers/xen/Kconfig | 8 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h | 108 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 3 linux-2.6-xen-sparse/include/asm-ia64/maddr.h | 1 linux-2.6-xen-sparse/include/asm-ia64/page.h | 1 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h | 18 linux-2.6-xen-sparse/include/linux/skbuff.h | 3 linux-2.6-xen-sparse/net/core/skbuff.c | 41 tools/ioemu/hw/cirrus_vga.c | 52 tools/ioemu/hw/ide.c | 3 tools/ioemu/hw/pci.c | 3 tools/ioemu/target-i386-dm/helper2.c | 15 tools/ioemu/vl.c | 38 tools/libxc/Makefile | 2 tools/libxc/ia64/xc_ia64_hvm_build.c | 44 tools/libxc/xc_domain.c | 44 tools/libxc/xc_hvm_restore.c | 360 + tools/libxc/xc_hvm_save.c | 727 +++ tools/libxc/xenctrl.h | 24 tools/libxc/xenguest.h | 20 tools/pygrub/src/pygrub | 6 tools/python/setup.py | 13 tools/python/xen/lowlevel/scf/scf.c | 156 tools/python/xen/lowlevel/xc/xc.c | 22 tools/python/xen/util/xmlrpclib2.py | 1 tools/python/xen/web/httpserver.py | 12 tools/python/xen/xend/Vifctl.py | 4 tools/python/xen/xend/XendCheckpoint.py | 77 tools/python/xen/xend/XendConfig.py | 45 tools/python/xen/xend/XendDomain.py | 12 tools/python/xen/xend/XendDomainInfo.py | 64 tools/python/xen/xend/XendNode.py | 4 tools/python/xen/xend/XendOptions.py | 373 + tools/python/xen/xend/XendProtocol.py | 6 tools/python/xen/xend/balloon.py | 6 tools/python/xen/xend/image.py | 28 tools/python/xen/xend/osdep.py | 10 tools/python/xen/xend/server/DevController.py | 8 tools/python/xen/xend/server/SrvRoot.py | 2 tools/python/xen/xend/server/SrvServer.py | 58 tools/python/xen/xend/server/XMLRPCServer.py | 17 tools/python/xen/xend/server/netif.py | 13 tools/python/xen/xend/server/relocate.py | 14 tools/python/xen/xend/server/tests/test_controllers.py | 10 tools/python/xen/xend/server/tpmif.py | 8 tools/python/xen/xend/server/vfbif.py | 4 tools/python/xen/xm/create.py | 6 tools/python/xen/xm/main.py | 13 tools/python/xen/xm/opts.py | 8 tools/python/xen/xm/tests/test_create.py | 6 tools/xcutils/xc_restore.c | 19 tools/xcutils/xc_save.c | 5 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 9 xen/arch/ia64/asm-offsets.c | 2 xen/arch/ia64/asm-xsi-offsets.c | 2 xen/arch/ia64/linux-xen/Makefile | 3 xen/arch/ia64/linux-xen/README.origin | 1 xen/arch/ia64/linux-xen/cmdline.c | 131 xen/arch/ia64/linux-xen/entry.S | 120 xen/arch/ia64/linux-xen/sn/Makefile | 1 xen/arch/ia64/linux-xen/sn/kernel/Makefile | 5 xen/arch/ia64/linux-xen/sn/kernel/README.origin | 12 xen/arch/ia64/linux-xen/sn/kernel/io_init.c | 783 +++ xen/arch/ia64/linux-xen/sn/kernel/iomv.c | 82 xen/arch/ia64/linux-xen/sn/kernel/irq.c | 542 ++ xen/arch/ia64/linux-xen/sn/kernel/setup.c | 808 +++ xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c | 548 ++ xen/arch/ia64/linux/Makefile | 6 xen/arch/ia64/linux/README.origin | 4 xen/arch/ia64/linux/dig/Makefile | 1 xen/arch/ia64/linux/dig/README.origin | 7 xen/arch/ia64/linux/dig/machvec.c | 3 xen/arch/ia64/linux/hp/Makefile | 1 xen/arch/ia64/linux/hp/zx1/Makefile | 1 xen/arch/ia64/linux/hp/zx1/README.origin | 7 xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c | 3 xen/arch/ia64/linux/io.c | 164 xen/arch/ia64/linux/sn/Makefile | 2 xen/arch/ia64/linux/sn/kernel/Makefile | 3 xen/arch/ia64/linux/sn/kernel/README.origin | 9 xen/arch/ia64/linux/sn/kernel/machvec.c | 11 xen/arch/ia64/linux/sn/kernel/pio_phys.S | 71 xen/arch/ia64/linux/sn/kernel/ptc_deadlock.S | 92 xen/arch/ia64/linux/sn/pci/Makefile | 1 xen/arch/ia64/linux/sn/pci/pcibr/Makefile | 1 xen/arch/ia64/linux/sn/pci/pcibr/README.origin | 7 xen/arch/ia64/linux/sn/pci/pcibr/pcibr_reg.c | 285 + xen/arch/ia64/vmx/mmio.c | 10 xen/arch/ia64/vmx/vmx_process.c | 23 xen/arch/ia64/vmx/vmx_vcpu.c | 45 xen/arch/ia64/vmx/vmx_virt.c | 2 xen/arch/ia64/xen/dom0_ops.c | 11 xen/arch/ia64/xen/dom_fw.c | 103 xen/arch/ia64/xen/domain.c | 149 xen/arch/ia64/xen/faults.c | 37 xen/arch/ia64/xen/hypercall.c | 172 xen/arch/ia64/xen/hyperprivop.S | 153 xen/arch/ia64/xen/irq.c | 7 xen/arch/ia64/xen/ivt.S | 458 +- xen/arch/ia64/xen/mm.c | 19 xen/arch/ia64/xen/tlb_track.c | 25 xen/arch/ia64/xen/vcpu.c | 75 xen/arch/ia64/xen/vhpt.c | 13 xen/arch/ia64/xen/xenasm.S | 22 xen/arch/ia64/xen/xencomm.c | 4 xen/arch/ia64/xen/xensetup.c | 6 xen/arch/x86/hvm/hpet.c | 24 xen/arch/x86/hvm/hvm.c | 11 xen/arch/x86/hvm/i8254.c | 153 xen/arch/x86/hvm/intercept.c | 303 + xen/arch/x86/hvm/rtc.c | 2 xen/arch/x86/hvm/vioapic.c | 132 xen/arch/x86/hvm/vlapic.c | 74 xen/arch/x86/hvm/vmx/vmx.c | 296 + xen/arch/x86/hvm/vpic.c | 83 xen/arch/x86/hvm/vpt.c | 10 xen/arch/x86/mm/shadow/common.c | 7 xen/arch/x86/mm/shadow/multi.c | 2 xen/common/domain.c | 1 xen/common/domctl.c | 73 xen/include/asm-ia64/config.h | 29 xen/include/asm-ia64/domain.h | 4 xen/include/asm-ia64/hypercall.h | 2 xen/include/asm-ia64/linux-null/asm/nmi.h | 1 xen/include/asm-ia64/linux-null/linux/dmapool.h | 1 xen/include/asm-ia64/linux-null/linux/rwsem.h | 1 xen/include/asm-ia64/linux-xen/asm/README.origin | 7 xen/include/asm-ia64/linux-xen/asm/machvec.h | 498 ++ xen/include/asm-ia64/linux-xen/asm/machvec_dig.h | 46 xen/include/asm-ia64/linux-xen/asm/machvec_hpzx1.h | 66 xen/include/asm-ia64/linux-xen/asm/machvec_sn2.h | 166 xen/include/asm-ia64/linux-xen/asm/page.h | 10 xen/include/asm-ia64/linux-xen/asm/pci.h | 185 xen/include/asm-ia64/linux-xen/asm/sn/README.origin | 16 xen/include/asm-ia64/linux-xen/asm/sn/addrs.h | 299 + xen/include/asm-ia64/linux-xen/asm/sn/arch.h | 92 xen/include/asm-ia64/linux-xen/asm/sn/hubdev.h | 95 xen/include/asm-ia64/linux-xen/asm/sn/intr.h | 73 xen/include/asm-ia64/linux-xen/asm/sn/io.h | 281 + xen/include/asm-ia64/linux-xen/asm/sn/nodepda.h | 87 xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h | 153 xen/include/asm-ia64/linux-xen/asm/sn/rw_mmr.h | 32 xen/include/asm-ia64/linux-xen/asm/sn/types.h | 28 xen/include/asm-ia64/linux-xen/asm/system.h | 1 xen/include/asm-ia64/linux-xen/asm/types.h | 8 xen/include/asm-ia64/linux-xen/linux/README.origin | 5 xen/include/asm-ia64/linux-xen/linux/device.h | 489 ++ xen/include/asm-ia64/linux-xen/linux/kobject.h | 286 + xen/include/asm-ia64/linux-xen/linux/pci.h | 820 +++ xen/include/asm-ia64/linux/README.origin | 12 xen/include/asm-ia64/linux/asm/README.origin | 4 xen/include/asm-ia64/linux/asm/machvec_init.h | 32 xen/include/asm-ia64/linux/asm/sn/README.origin | 24 xen/include/asm-ia64/linux/asm/sn/geo.h | 132 xen/include/asm-ia64/linux/asm/sn/klconfig.h | 246 + xen/include/asm-ia64/linux/asm/sn/l1.h | 51 xen/include/asm-ia64/linux/asm/sn/leds.h | 33 xen/include/asm-ia64/linux/asm/sn/module.h | 127 xen/include/asm-ia64/linux/asm/sn/pcibus_provider_defs.h | 68 xen/include/asm-ia64/linux/asm/sn/pcidev.h | 83 xen/include/asm-ia64/linux/asm/sn/pda.h | 69 xen/include/asm-ia64/linux/asm/sn/pic.h | 261 + xen/include/asm-ia64/linux/asm/sn/shub_mmr.h | 502 ++ xen/include/asm-ia64/linux/asm/sn/shubio.h | 3358 +++++++++++++++ xen/include/asm-ia64/linux/asm/sn/simulator.h | 20 xen/include/asm-ia64/linux/asm/sn/sn_cpuid.h | 132 xen/include/asm-ia64/linux/asm/sn/sn_feature_sets.h | 51 xen/include/asm-ia64/linux/asm/sn/sn_sal.h | 1157 +++++ xen/include/asm-ia64/linux/asm/sn/tiocp.h | 257 + xen/include/asm-ia64/linux/asm/sn/xbow.h | 301 + xen/include/asm-ia64/linux/asm/sn/xwidgetdev.h | 70 xen/include/asm-ia64/linux/completion.h | 57 xen/include/asm-ia64/linux/ioport.h | 136 xen/include/asm-ia64/linux/klist.h | 61 xen/include/asm-ia64/linux/kref.h | 32 xen/include/asm-ia64/linux/mod_devicetable.h | 323 + xen/include/asm-ia64/linux/pci_ids.h | 2356 ++++++++++ xen/include/asm-ia64/linux/pci_regs.h | 488 ++ xen/include/asm-ia64/linux/pm.h | 279 + xen/include/asm-ia64/linux/sysfs.h | 206 xen/include/asm-ia64/multicall.h | 12 xen/include/asm-ia64/tlbflush.h | 2 xen/include/asm-ia64/vcpu.h | 1 xen/include/asm-ia64/vmx_vcpu.h | 2 xen/include/asm-ia64/xensystem.h | 1 xen/include/asm-ia64/xentypes.h | 19 xen/include/asm-x86/hvm/domain.h | 17 xen/include/asm-x86/hvm/hvm.h | 38 xen/include/asm-x86/hvm/support.h | 127 xen/include/asm-x86/hvm/vpt.h | 2 xen/include/public/arch-ia64.h | 64 xen/include/public/arch-x86/xen.h | 64 xen/include/public/domctl.h | 16 xen/include/xlat.lst | 2 229 files changed, 23967 insertions(+), 3425 deletions(-) diff -r 8475a4e0425e -r 3c8bb086025e Config.mk --- a/Config.mk Thu Jan 18 15:18:07 2007 +0000 +++ b/Config.mk Fri Jan 19 14:48:57 2007 +0000 @@ -6,8 +6,11 @@ XEN_COMPILE_ARCH ?= $(shell uname -m XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ -e s/ppc/powerpc/ -e s/i86pc/x86_32/) XEN_TARGET_ARCH ?= $(XEN_COMPILE_ARCH) -XEN_TARGET_X86_PAE ?= n XEN_OS ?= $(shell uname -s) + +ifeq ($(XEN_TARGET_ARCH),x86_32) +XEN_TARGET_X86_PAE ?= y +endif CONFIG_$(XEN_OS) := y diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Fri Jan 19 14:48:57 2007 +0000 @@ -1512,10 +1512,7 @@ CONFIG_CRYPTO_DES=y # Hardware crypto devices # # CONFIG_XEN_UTIL is not set -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_XEN_BALLOON=y -CONFIG_XEN_SKBUFF=y # CONFIG_XEN_DEVMEM is not set CONFIG_XEN_REBOOT=y # CONFIG_XEN_SMPBOOT is not set diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Fri Jan 19 14:48:57 2007 +0000 @@ -569,7 +569,7 @@ CONFIG_MEGARAID_NEWGEN=y # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y -# CONFIG_SCSI_SATA_AHCI is not set +CONFIG_SCSI_SATA_AHCI=y # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y # CONFIG_SCSI_SATA_MV is not set @@ -734,7 +734,7 @@ CONFIG_SK98LIN=y CONFIG_SK98LIN=y # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -# CONFIG_BNX2 is not set +CONFIG_BNX2=y # # Ethernet (10000 Mbit) @@ -1413,8 +1413,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT_030002=y -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y CONFIG_NO_IDLE_HZ=y CONFIG_XEN_UTIL=y diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Fri Jan 19 14:48:57 2007 +0000 @@ -517,7 +517,7 @@ CONFIG_MEGARAID_NEWGEN=y # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y -# CONFIG_SCSI_SATA_AHCI is not set +CONFIG_SCSI_SATA_AHCI=y # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y # CONFIG_SCSI_SATA_MV is not set @@ -683,7 +683,7 @@ CONFIG_SK98LIN=y CONFIG_SK98LIN=y # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -# CONFIG_BNX2 is not set +CONFIG_BNX2=y # # Ethernet (10000 Mbit) @@ -1363,8 +1363,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT_030002=y -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y CONFIG_NO_IDLE_HZ=y CONFIG_XEN_UTIL=y diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Fri Jan 19 14:48:57 2007 +0000 @@ -1386,10 +1386,7 @@ CONFIG_CRYPTO_DES=y # Hardware crypto devices # # CONFIG_XEN_UTIL is not set -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_XEN_BALLOON=y -CONFIG_XEN_SKBUFF=y # CONFIG_XEN_DEVMEM is not set CONFIG_XEN_REBOOT=y # CONFIG_XEN_SMPBOOT is not set diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_x86_32 --- a/buildconfigs/linux-defconfig_xenU_x86_32 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xenU_x86_32 Fri Jan 19 14:48:57 2007 +0000 @@ -922,8 +922,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT_030002=y -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y CONFIG_NO_IDLE_HZ=y CONFIG_XEN_UTIL=y diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_x86_64 --- a/buildconfigs/linux-defconfig_xenU_x86_64 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xenU_x86_64 Fri Jan 19 14:48:57 2007 +0000 @@ -1218,8 +1218,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT_030002=y -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y CONFIG_NO_IDLE_HZ=y CONFIG_XEN_UTIL=y diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen_ia64 Fri Jan 19 14:48:57 2007 +0000 @@ -1518,10 +1518,7 @@ CONFIG_CRYPTO_DES=y # Hardware crypto devices # # CONFIG_XEN_UTIL is not set -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_XEN_BALLOON=y -CONFIG_XEN_SKBUFF=y # CONFIG_XEN_DEVMEM is not set CONFIG_XEN_REBOOT=y # CONFIG_XEN_SMPBOOT is not set diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Jan 19 14:48:57 2007 +0000 @@ -3272,8 +3272,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT_030002=y -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y CONFIG_NO_IDLE_HZ=y CONFIG_XEN_UTIL=y diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Fri Jan 19 14:48:57 2007 +0000 @@ -3103,8 +3103,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT_030002=y -CONFIG_HAVE_ARCH_ALLOC_SKB=y -CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y CONFIG_NO_IDLE_HZ=y CONFIG_XEN_UTIL=y diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Thu Jan 18 15:18:07 2007 +0000 +++ b/buildconfigs/mk.linux-2.6-xen Fri Jan 19 14:48:57 2007 +0000 @@ -8,6 +8,10 @@ LINUX_DIR = build-linux-$(LINUX_VER)- IMAGE_TARGET ?= vmlinuz INSTALL_BOOT_PATH ?= $(DESTDIR) + +ifeq ($(XEN_TARGET_ARCH),ia64) +INSTALL_BOOT_PATH := $(DESTDIR)/boot +endif LINUX_VER3 := $(LINUX_SERIES).$(word 3, $(subst ., ,$(LINUX_VER))) diff -r 8475a4e0425e -r 3c8bb086025e extras/mini-os/Makefile --- a/extras/mini-os/Makefile Thu Jan 18 15:18:07 2007 +0000 +++ b/extras/mini-os/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -9,7 +9,7 @@ XEN_ROOT = ../.. XEN_ROOT = ../.. include $(XEN_ROOT)/Config.mk -XEN_INTERFACE_VERSION := 0x00030204 +XEN_INTERFACE_VERSION := 0x00030205 export XEN_INTERFACE_VERSION # Set TARGET_ARCH diff -r 8475a4e0425e -r 3c8bb086025e extras/mini-os/include/x86/x86_32/hypercall-x86_32.h --- a/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Thu Jan 18 15:18:07 2007 +0000 +++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Fri Jan 19 14:48:57 2007 +0000 @@ -303,6 +303,20 @@ HYPERVISOR_nmi_op( unsigned long arg) { return _hypercall2(int, nmi_op, op, arg); +} + +static inline int +HYPERVISOR_sysctl( + unsigned long op) +{ + return _hypercall1(int, sysctl, op); +} + +static inline int +HYPERVISOR_domctl( + unsigned long op) +{ + return _hypercall1(int, domctl, op); } #endif /* __HYPERCALL_X86_32_H__ */ diff -r 8475a4e0425e -r 3c8bb086025e extras/mini-os/include/x86/x86_64/hypercall-x86_64.h --- a/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Thu Jan 18 15:18:07 2007 +0000 +++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Fri Jan 19 14:48:57 2007 +0000 @@ -303,6 +303,20 @@ HYPERVISOR_nmi_op( unsigned long arg) { return _hypercall2(int, nmi_op, op, arg); +} + +static inline int +HYPERVISOR_sysctl( + unsigned long op) +{ + return _hypercall1(int, sysctl, op); +} + +static inline int +HYPERVISOR_domctl( + unsigned long op) +{ + return _hypercall1(int, domctl, op); } #endif /* __HYPERCALL_X86_64_H__ */ diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Fri Jan 19 14:48:57 2007 +0000 @@ -76,7 +76,9 @@ static void *syscall_page; int __init sysenter_setup(void) { - syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + void *page = (void *)get_zeroed_page(GFP_ATOMIC); + + syscall_page = page; #ifdef CONFIG_XEN if (boot_cpu_has(X86_FEATURE_SEP)) { diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Fri Jan 19 14:48:57 2007 +0000 @@ -579,12 +579,6 @@ config XEN_UTIL config XEN_UTIL default n -config HAVE_ARCH_ALLOC_SKB - default y - -config HAVE_ARCH_DEV_ALLOC_SKB - default y - config XEN_BALLOON default y diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c Fri Jan 19 14:48:57 2007 +0000 @@ -287,7 +287,6 @@ void foo(void) DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha); DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir); DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); - DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe); DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,925 @@ +/* + * This file contains the light-weight system call handlers (fsyscall-handlers). + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * + * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). + * 18-Feb-03 louisk Implement fsys_gettimeofday(). + * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, + * probably broke it along the way... ;-) + * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make + * it capable of using memory based clocks without falling back to C code. + */ + +#include <asm/asmmacro.h> +#include <asm/errno.h> +#include <asm/asm-offsets.h> +#include <asm/percpu.h> +#include <asm/thread_info.h> +#include <asm/sal.h> +#include <asm/signal.h> +#include <asm/system.h> +#include <asm/unistd.h> + +#include "entry.h" + +/* + * See Documentation/ia64/fsys.txt for details on fsyscalls. + * + * On entry to an fsyscall handler: + * r10 = 0 (i.e., defaults to "successful syscall return") + * r11 = saved ar.pfs (a user-level value) + * r15 = system call number + * r16 = "current" task pointer (in normal kernel-mode, this is in r13) + * r32-r39 = system call arguments + * b6 = return address (a user-level value) + * ar.pfs = previous frame-state (a user-level value) + * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) + * all other registers may contain values passed in from user-mode + * + * On return from an fsyscall handler: + * r11 = saved ar.pfs (as passed into the fsyscall handler) + * r15 = system call number (as passed into the fsyscall handler) + * r32-r39 = system call arguments (as passed into the fsyscall handler) + * b6 = return address (as passed into the fsyscall handler) + * ar.pfs = previous frame-state (as passed into the fsyscall handler) + */ + +ENTRY(fsys_ni_syscall) + .prologue + .altrp b6 + .body + mov r8=ENOSYS + mov r10=-1 + FSYS_RETURN +END(fsys_ni_syscall) + +ENTRY(fsys_getpid) + .prologue + .altrp b6 + .body + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + ld4 r9=[r9] + add r8=IA64_TASK_TGID_OFFSET,r16 + ;; + and r9=TIF_ALLWORK_MASK,r9 + ld4 r8=[r8] // r8 = current->tgid + ;; + cmp.ne p8,p0=0,r9 +(p8) br.spnt.many fsys_fallback_syscall + FSYS_RETURN +END(fsys_getpid) + +ENTRY(fsys_getppid) + .prologue + .altrp b6 + .body + add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 + ;; + ld8 r17=[r17] // r17 = current->group_leader + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + + ld4 r9=[r9] + add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent + ;; + and r9=TIF_ALLWORK_MASK,r9 + +1: ld8 r18=[r17] // r18 = current->group_leader->real_parent + ;; + cmp.ne p8,p0=0,r9 + add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid + ;; + + /* + * The .acq is needed to ensure that the read of tgid has returned its data before + * we re-check "real_parent". + */ + ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid +#ifdef CONFIG_SMP + /* + * Re-read current->group_leader->real_parent. + */ + ld8 r19=[r17] // r19 = current->group_leader->real_parent +(p8) br.spnt.many fsys_fallback_syscall + ;; + cmp.ne p6,p0=r18,r19 // did real_parent change? + mov r19=0 // i must not leak kernel bits... +(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check + ;; + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... +#else + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... + mov r19=0 // i must not leak kernel bits... +#endif + FSYS_RETURN +END(fsys_getppid) + +ENTRY(fsys_set_tid_address) + .prologue + .altrp b6 + .body + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + ld4 r9=[r9] + tnat.z p6,p7=r32 // check argument register for being NaT + ;; + and r9=TIF_ALLWORK_MASK,r9 + add r8=IA64_TASK_PID_OFFSET,r16 + add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 + ;; + ld4 r8=[r8] + cmp.ne p8,p0=0,r9 + mov r17=-1 + ;; +(p6) st8 [r18]=r32 +(p7) st8 [r18]=r17 +(p8) br.spnt.many fsys_fallback_syscall + ;; + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... + FSYS_RETURN +END(fsys_set_tid_address) + +/* + * Ensure that the time interpolator structure is compatible with the asm code + */ +#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \ + || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4 +#error fsys_gettimeofday incompatible with changes to struct time_interpolator +#endif +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_DIVIDE_BY_1000 0x4000 +#define CLOCK_ADD_MONOTONIC 0x8000 + +ENTRY(fsys_gettimeofday) + .prologue + .altrp b6 + .body + mov r31 = r32 + tnat.nz p6,p0 = r33 // guard against NaT argument +(p6) br.cond.spnt.few .fail_einval + mov r30 = CLOCK_DIVIDE_BY_1000 + ;; +.gettime: + // Register map + // Incoming r31 = pointer to address where to place result + // r30 = flags determining how time is processed + // r2,r3 = temp r4-r7 preserved + // r8 = result nanoseconds + // r9 = result seconds + // r10 = temporary storage for clock difference + // r11 = preserved: saved ar.pfs + // r12 = preserved: memory stack + // r13 = preserved: thread pointer + // r14 = address of mask / mask + // r15 = preserved: system call number + // r16 = preserved: current task pointer + // r17 = wall to monotonic use + // r18 = time_interpolator->offset + // r19 = address of wall_to_monotonic + // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address + // r21 = shift factor + // r22 = address of time interpolator->last_counter + // r23 = address of time_interpolator->last_cycle + // r24 = adress of time_interpolator->offset + // r25 = last_cycle value + // r26 = last_counter value + // r27 = pointer to xtime + // r28 = sequence number at the beginning of critcal section + // r29 = address of seqlock + // r30 = time processing flags / memory address + // r31 = pointer to result + // Predicates + // p6,p7 short term use + // p8 = timesource ar.itc + // p9 = timesource mmio64 + // p10 = timesource mmio32 + // p11 = timesource not to be handled by asm code + // p12 = memory time source ( = p9 | p10) + // p13 = do cmpxchg with time_interpolator_last_cycle + // p14 = Divide by 1000 + // p15 = Add monotonic + // + // Note that instructions are optimized for McKinley. McKinley can process two + // bundles simultaneously and therefore we continuously try to feed the CPU + // two bundles and then a stop. + tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure + mov pr = r30,0xc000 // Set predicates according to function + add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 + movl r20 = time_interpolator + ;; + ld8 r20 = [r20] // get pointer to time_interpolator structure + movl r29 = xtime_lock + ld4 r2 = [r2] // process work pending flags + movl r27 = xtime + ;; // only one bundle here + ld8 r21 = [r20] // first quad with control information + and r2 = TIF_ALLWORK_MASK,r2 +(p6) br.cond.spnt.few .fail_einval // deferred branch + ;; + add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20 + extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc + extr r8 = r21,0,16 // time_interpolator->source + cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled +(p6) br.cond.spnt.many fsys_fallback_syscall + ;; + cmp.eq p8,p12 = 0,r8 // Check for cpu timer + cmp.eq p9,p0 = 1,r8 // MMIO64 ? + extr r2 = r21,24,8 // time_interpolator->jitter + cmp.eq p10,p0 = 2,r8 // MMIO32 ? + cmp.ltu p11,p0 = 2,r8 // function or other clock +(p11) br.cond.spnt.many fsys_fallback_syscall + ;; + setf.sig f7 = r3 // Setup for scaling of counter +(p15) movl r19 = wall_to_monotonic +(p12) ld8 r30 = [r10] + cmp.ne p13,p0 = r2,r0 // need jitter compensation? + extr r21 = r21,16,8 // shift factor + ;; +.time_redo: + .pred.rel.mutex p8,p9,p10 + ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes +(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! + add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20 +(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues.. +(p10) ld4 r2 = [r30] // readw(ti->address) +(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20 + ;; // could be removed by moving the last add upward + ld8 r26 = [r22] // time_interpolator->last_counter +(p13) ld8 r25 = [r23] // time interpolator->last_cycle + add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20 +(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET + ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET + add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20 + ;; + ld8 r18 = [r24] // time_interpolator->offset + ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec +(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) + ;; + ld8 r14 = [r14] // time_interpolator->mask +(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared + sub r10 = r2,r26 // current_counter - last_counter + ;; +(p6) sub r10 = r25,r26 // time we got was less than last_cycle +(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg + ;; + and r10 = r10,r14 // Apply mask + ;; + setf.sig f8 = r10 + nop.i 123 + ;; +(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv +EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time + xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) +(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs + ;; +(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET +(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo + // simulate tbit.nz.or p7,p0 = r28,0 + and r28 = ~1,r28 // Make sequence even to force retry if odd + getf.sig r2 = f8 + mf + add r8 = r8,r18 // Add time interpolator offset + ;; + ld4 r10 = [r29] // xtime_lock.sequence +(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs + shr.u r2 = r2,r21 + ;; // overloaded 3 bundles! + // End critical section. + add r8 = r8,r2 // Add xtime.nsecs + cmp4.ne.or p7,p0 = r28,r10 +(p7) br.cond.dpnt.few .time_redo // sequence number changed ? + // Now r8=tv->tv_nsec and r9=tv->tv_sec + mov r10 = r0 + movl r2 = 1000000000 + add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 +(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack + ;; +.time_normalize: + mov r21 = r8 + cmp.ge p6,p0 = r8,r2 +(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time + ;; +(p14) setf.sig f8 = r20 +(p6) sub r8 = r8,r2 +(p6) add r9 = 1,r9 // two nops before the branch. +(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod +(p6) br.cond.dpnt.few .time_normalize + ;; + // Divided by 8 though shift. Now divide by 125 + // The compiler was able to do that with a multiply + // and a shift and we do the same +EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles +(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it... + ;; + mov r8 = r0 +(p14) getf.sig r2 = f8 + ;; +(p14) shr.u r21 = r2, 4 + ;; +EX(.fail_efault, st8 [r31] = r9) +EX(.fail_efault, st8 [r23] = r21) + FSYS_RETURN +.fail_einval: + mov r8 = EINVAL + mov r10 = -1 + FSYS_RETURN +.fail_efault: + mov r8 = EFAULT + mov r10 = -1 + FSYS_RETURN +END(fsys_gettimeofday) + +ENTRY(fsys_clock_gettime) + .prologue + .altrp b6 + .body + cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 + // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC +(p6) br.spnt.few fsys_fallback_syscall + mov r31 = r33 + shl r30 = r32,15 + br.many .gettime +END(fsys_clock_gettime) + +/* + * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). + */ +#if _NSIG_WORDS != 1 +# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. +#endif +ENTRY(fsys_rt_sigprocmask) + .prologue + .altrp b6 + .body + + add r2=IA64_TASK_BLOCKED_OFFSET,r16 + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + cmp4.ltu p6,p0=SIG_SETMASK,r32 + + cmp.ne p15,p0=r0,r34 // oset != NULL? + tnat.nz p8,p0=r34 + add r31=IA64_TASK_SIGHAND_OFFSET,r16 + ;; + ld8 r3=[r2] // read/prefetch current->blocked + ld4 r9=[r9] + tnat.nz.or p6,p0=r35 + + cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 + tnat.nz.or p6,p0=r32 +(p6) br.spnt.few .fail_einval // fail with EINVAL + ;; +#ifdef CONFIG_SMP + ld8 r31=[r31] // r31 <- current->sighand +#endif + and r9=TIF_ALLWORK_MASK,r9 + tnat.nz.or p8,p0=r33 + ;; + cmp.ne p7,p0=0,r9 + cmp.eq p6,p0=r0,r33 // set == NULL? + add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock +(p8) br.spnt.few .fail_efault // fail with EFAULT +(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... +(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask + + /* Argh, we actually have to do some work and _update_ the signal mask: */ + +EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set +EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set + mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) + ;; + + rsm psr.i // mask interrupt delivery + mov ar.ccv=0 + andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP + +#ifdef CONFIG_SMP + mov r17=1 + ;; + cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock + mov r8=EINVAL // default to EINVAL + ;; + ld8 r3=[r2] // re-read current->blocked now that we hold the lock + cmp4.ne p6,p0=r18,r0 +(p6) br.cond.spnt.many .lock_contention + ;; +#else + ld8 r3=[r2] // re-read current->blocked now that we hold the lock + mov r8=EINVAL // default to EINVAL +#endif + add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 + add r19=IA64_TASK_SIGNAL_OFFSET,r16 + cmp4.eq p6,p0=SIG_BLOCK,r32 + ;; + ld8 r19=[r19] // r19 <- current->signal + cmp4.eq p7,p0=SIG_UNBLOCK,r32 + cmp4.eq p8,p0=SIG_SETMASK,r32 + ;; + ld8 r18=[r18] // r18 <- current->pending.signal + .pred.rel.mutex p6,p7,p8 +(p6) or r14=r3,r14 // SIG_BLOCK +(p7) andcm r14=r3,r14 // SIG_UNBLOCK + +(p8) mov r14=r14 // SIG_SETMASK +(p6) mov r8=0 // clear error code + // recalc_sigpending() + add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 + + add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 + ;; + ld4 r17=[r17] // r17 <- current->signal->group_stop_count +(p7) mov r8=0 // clear error code + + ld8 r19=[r19] // r19 <- current->signal->shared_pending + ;; + cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? +(p8) mov r8=0 // clear error code + + or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending + ;; + // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: + andcm r18=r18,r14 + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + +(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending + mov r19=0 // i must not leak kernel bits... +(p6) br.cond.dpnt.many .sig_pending + ;; + +1: ld4 r17=[r9] // r17 <- current->thread_info->flags + ;; + mov ar.ccv=r17 + and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) + ;; + + st8 [r2]=r14 // update current->blocked with new mask + cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 + ;; + cmp.ne p6,p0=r17,r8 // update failed? +(p6) br.cond.spnt.few 1b // yes -> retry + +#ifdef CONFIG_SMP + st4.rel [r31]=r0 // release the lock +#endif + ssm psr.i + ;; + + srlz.d // ensure psr.i is set again + mov r18=0 // i must not leak kernel bits... + +.store_mask: +EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset +EX(.fail_efault, (p15) st8 [r34]=r3) + mov r2=0 // i must not leak kernel bits... + mov r3=0 // i must not leak kernel bits... + mov r8=0 // return 0 + mov r9=0 // i must not leak kernel bits... + mov r14=0 // i must not leak kernel bits... + mov r17=0 // i must not leak kernel bits... + mov r31=0 // i must not leak kernel bits... + FSYS_RETURN + +.sig_pending: +#ifdef CONFIG_SMP + st4.rel [r31]=r0 // release the lock +#endif + ssm psr.i + ;; + srlz.d + br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall + +#ifdef CONFIG_SMP +.lock_contention: + /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ + ssm psr.i + ;; + srlz.d + br.sptk.many fsys_fallback_syscall +#endif +END(fsys_rt_sigprocmask) + +ENTRY(fsys_fallback_syscall) + .prologue + .altrp b6 + .body + /* + * We only get here from light-weight syscall handlers. Thus, we already + * know that r15 contains a valid syscall number. No need to re-check. + */ + adds r17=-1024,r15 + movl r14=sys_call_table + ;; +#ifdef CONFIG_XEN + movl r18=running_on_xen;; + ld4 r18=[r18];; + // p14 = running_on_xen + // p15 = !running_on_xen + cmp.ne p14,p15=r0,r18 + ;; +(p14) movl r18=XSI_PSR_I_ADDR;; +(p14) ld8 r18=[r18] +(p14) mov r29=1;; +(p14) st1 [r18]=r29 +(p15) rsm psr.i +#else + rsm psr.i +#endif + shladd r18=r17,3,r14 + ;; + ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point +#ifdef CONFIG_XEN +(p14) mov r27=r8 +(p14) XEN_HYPER_GET_PSR + ;; +(p14) mov r29=r8 +(p14) mov r8=r27 +(p15) mov r29=psr // read psr (12 cyc load latency) +#else + mov r29=psr // read psr (12 cyc load latency) +#endif + mov r27=ar.rsc + mov r21=ar.fpsr + mov r26=ar.pfs +END(fsys_fallback_syscall) + /* FALL THROUGH */ +GLOBAL_ENTRY(fsys_bubble_down) + .prologue + .altrp b6 + .body + /* + * We get here for syscalls that don't have a lightweight + * handler. For those, we need to bubble down into the kernel + * and that requires setting up a minimal pt_regs structure, + * and initializing the CPU state more or less as if an + * interruption had occurred. To make syscall-restarts work, + * we setup pt_regs such that cr_iip points to the second + * instruction in syscall_via_break. Decrementing the IP + * hence will restart the syscall via break and not + * decrementing IP will return us to the caller, as usual. + * Note that we preserve the value of psr.pp rather than + * initializing it from dcr.pp. This makes it possible to + * distinguish fsyscall execution from other privileged + * execution. + * + * On entry: + * - normal fsyscall handler register usage, except + * that we also have: + * - r18: address of syscall entry point + * - r21: ar.fpsr + * - r26: ar.pfs + * - r27: ar.rsc + * - r29: psr + * + * We used to clear some PSR bits here but that requires slow + * serialization. Fortuntely, that isn't really necessary. + * The rationale is as follows: we used to clear bits + * ~PSR_PRESERVED_BITS in PSR.L. Since + * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we + * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. + * However, + * + * PSR.BE : already is turned off in __kernel_syscall_via_epc() + * PSR.AC : don't care (kernel normally turns PSR.AC on) + * PSR.I : already turned off by the time fsys_bubble_down gets + * invoked + * PSR.DFL: always 0 (kernel never turns it on) + * PSR.DFH: don't care --- kernel never touches f32-f127 on its own + * initiative + * PSR.DI : always 0 (kernel never turns it on) + * PSR.SI : always 0 (kernel never turns it on) + * PSR.DB : don't care --- kernel never enables kernel-level + * breakpoints + * PSR.TB : must be 0 already; if it wasn't zero on entry to + * __kernel_syscall_via_epc, the branch to fsys_bubble_down + * will trigger a taken branch; the taken-trap-handler then + * converts the syscall into a break-based system-call. + */ + /* + * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. + * The rest we have to synthesize. + */ +# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ + | (0x1 << IA64_PSR_RI_BIT) \ + | IA64_PSR_BN | IA64_PSR_I) + + invala // M0|1 + movl r14=ia64_ret_from_syscall // X + + nop.m 0 + movl r28=__kernel_syscall_via_break // X create cr.iip + ;; + + mov r2=r16 // A get task addr to addl-addressable register + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A + mov r31=pr // I0 save pr (2 cyc) + ;; + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag + addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS + add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A + ;; + ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store + nop.i 0 + ;; + mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 + nop.m 0 + nop.i 0 + ;; + mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore + mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) + nop.i 0 + ;; + mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS + movl r8=PSR_ONE_BITS // X + ;; + mov r25=ar.unat // M2 (5 cyc) save ar.unat + mov r19=b6 // I0 save b6 (2 cyc) + mov r20=r1 // A save caller's gp in r20 + ;; + or r29=r8,r29 // A construct cr.ipsr value to save + mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack + + mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) + cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 + br.call.sptk.many b7=ia64_syscall_setup // B + ;; + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + mov rp=r14 // I0 set the real return addr + and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A + ;; +#ifdef CONFIG_XEN + movl r14=running_on_xen;; + ld4 r14=[r14];; + // p14 = running_on_xen + // p15 = !running_on_xen + cmp.ne p14,p15=r0,r14 + ;; +(p14) movl r28=XSI_PSR_I_ADDR;; +(p14) ld8 r28=[r28];; +(p14) adds r28=-1,r28;; // event_pending +(p14) ld1 r14=[r28];; +(p14) cmp.ne.unc p13,p14=r14,r0;; +(p13) XEN_HYPER_SSM_I +(p14) adds r28=1,r28;; // event_mask +(p14) st1 [r28]=r0;; +(p15) ssm psr.i +#else + ssm psr.i // M2 we're on kernel stacks now, reenable irqs +#endif + cmp.eq p8,p0=r3,r0 // A +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT + + nop.m 0 +(p8) br.call.sptk.many b6=b6 // B (ignore return address) + br.cond.spnt ia64_trace_syscall // B +END(fsys_bubble_down) + + .rodata + .align 8 + .globl fsyscall_table + + data8 fsys_bubble_down +fsyscall_table: + data8 fsys_ni_syscall + data8 0 // exit // 1025 + data8 0 // read + data8 0 // write + data8 0 // open + data8 0 // close + data8 0 // creat // 1030 + data8 0 // link + data8 0 // unlink + data8 0 // execve + data8 0 // chdir + data8 0 // fchdir // 1035 + data8 0 // utimes + data8 0 // mknod + data8 0 // chmod + data8 0 // chown + data8 0 // lseek // 1040 + data8 fsys_getpid // getpid + data8 fsys_getppid // getppid + data8 0 // mount + data8 0 // umount + data8 0 // setuid // 1045 + data8 0 // getuid + data8 0 // geteuid + data8 0 // ptrace + data8 0 // access + data8 0 // sync // 1050 + data8 0 // fsync + data8 0 // fdatasync + data8 0 // kill + data8 0 // rename + data8 0 // mkdir // 1055 + data8 0 // rmdir + data8 0 // dup + data8 0 // pipe + data8 0 // times + data8 0 // brk // 1060 + data8 0 // setgid + data8 0 // getgid + data8 0 // getegid + data8 0 // acct + data8 0 // ioctl // 1065 + data8 0 // fcntl + data8 0 // umask + data8 0 // chroot + data8 0 // ustat + data8 0 // dup2 // 1070 + data8 0 // setreuid + data8 0 // setregid + data8 0 // getresuid + data8 0 // setresuid + data8 0 // getresgid // 1075 + data8 0 // setresgid + data8 0 // getgroups + data8 0 // setgroups + data8 0 // getpgid + data8 0 // setpgid // 1080 + data8 0 // setsid + data8 0 // getsid + data8 0 // sethostname + data8 0 // setrlimit + data8 0 // getrlimit // 1085 + data8 0 // getrusage + data8 fsys_gettimeofday // gettimeofday + data8 0 // settimeofday + data8 0 // select + data8 0 // poll // 1090 + data8 0 // symlink + data8 0 // readlink + data8 0 // uselib + data8 0 // swapon + data8 0 // swapoff // 1095 + data8 0 // reboot + data8 0 // truncate + data8 0 // ftruncate + data8 0 // fchmod + data8 0 // fchown // 1100 + data8 0 // getpriority + data8 0 // setpriority + data8 0 // statfs + data8 0 // fstatfs + data8 0 // gettid // 1105 + data8 0 // semget + data8 0 // semop + data8 0 // semctl + data8 0 // msgget + data8 0 // msgsnd // 1110 + data8 0 // msgrcv + data8 0 // msgctl + data8 0 // shmget + data8 0 // shmat + data8 0 // shmdt // 1115 + data8 0 // shmctl + data8 0 // syslog + data8 0 // setitimer + data8 0 // getitimer + data8 0 // 1120 + data8 0 + data8 0 + data8 0 // vhangup + data8 0 // lchown + data8 0 // remap_file_pages // 1125 + data8 0 // wait4 + data8 0 // sysinfo + data8 0 // clone + data8 0 // setdomainname + data8 0 // newuname // 1130 + data8 0 // adjtimex + data8 0 + data8 0 // init_module + data8 0 // delete_module + data8 0 // 1135 + data8 0 + data8 0 // quotactl + data8 0 // bdflush + data8 0 // sysfs + data8 0 // personality // 1140 + data8 0 // afs_syscall + data8 0 // setfsuid + data8 0 // setfsgid + data8 0 // getdents + data8 0 // flock // 1145 + data8 0 // readv + data8 0 // writev + data8 0 // pread64 + data8 0 // pwrite64 + data8 0 // sysctl // 1150 + data8 0 // mmap + data8 0 // munmap + data8 0 // mlock + data8 0 // mlockall + data8 0 // mprotect // 1155 + data8 0 // mremap + data8 0 // msync + data8 0 // munlock + data8 0 // munlockall + data8 0 // sched_getparam // 1160 + data8 0 // sched_setparam + data8 0 // sched_getscheduler + data8 0 // sched_setscheduler + data8 0 // sched_yield + data8 0 // sched_get_priority_max // 1165 + data8 0 // sched_get_priority_min + data8 0 // sched_rr_get_interval + data8 0 // nanosleep + data8 0 // nfsservctl + data8 0 // prctl // 1170 + data8 0 // getpagesize + data8 0 // mmap2 + data8 0 // pciconfig_read + data8 0 // pciconfig_write + data8 0 // perfmonctl // 1175 + data8 0 // sigaltstack + data8 0 // rt_sigaction + data8 0 // rt_sigpending + data8 fsys_rt_sigprocmask // rt_sigprocmask + data8 0 // rt_sigqueueinfo // 1180 + data8 0 // rt_sigreturn + data8 0 // rt_sigsuspend + data8 0 // rt_sigtimedwait + data8 0 // getcwd + data8 0 // capget // 1185 + data8 0 // capset + data8 0 // sendfile + data8 0 + data8 0 + data8 0 // socket // 1190 + data8 0 // bind + data8 0 // connect + data8 0 // listen + data8 0 // accept + data8 0 // getsockname // 1195 + data8 0 // getpeername + data8 0 // socketpair + data8 0 // send + data8 0 // sendto + data8 0 // recv // 1200 + data8 0 // recvfrom + data8 0 // shutdown + data8 0 // setsockopt + data8 0 // getsockopt + data8 0 // sendmsg // 1205 + data8 0 // recvmsg + data8 0 // pivot_root + data8 0 // mincore + data8 0 // madvise + data8 0 // newstat // 1210 + data8 0 // newlstat + data8 0 // newfstat + data8 0 // clone2 + data8 0 // getdents64 + data8 0 // getunwind // 1215 + data8 0 // readahead + data8 0 // setxattr + data8 0 // lsetxattr + data8 0 // fsetxattr + data8 0 // getxattr // 1220 + data8 0 // lgetxattr + data8 0 // fgetxattr + data8 0 // listxattr + data8 0 // llistxattr + data8 0 // flistxattr // 1225 + data8 0 // removexattr + data8 0 // lremovexattr + data8 0 // fremovexattr + data8 0 // tkill + data8 0 // futex // 1230 + data8 0 // sched_setaffinity + data8 0 // sched_getaffinity + data8 fsys_set_tid_address // set_tid_address + data8 0 // fadvise64_64 + data8 0 // tgkill // 1235 + data8 0 // exit_group + data8 0 // lookup_dcookie + data8 0 // io_setup + data8 0 // io_destroy + data8 0 // io_getevents // 1240 + data8 0 // io_submit + data8 0 // io_cancel + data8 0 // epoll_create + data8 0 // epoll_ctl + data8 0 // epoll_wait // 1245 + data8 0 // restart_syscall + data8 0 // semtimedop + data8 0 // timer_create + data8 0 // timer_settime + data8 0 // timer_gettime // 1250 + data8 0 // timer_getoverrun + data8 0 // timer_delete + data8 0 // clock_settime + data8 fsys_clock_gettime // clock_gettime + + // fill in zeros for the remaining entries + .zero: + .space fsyscall_table + 8*NR_syscalls - .zero, 0 diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/kernel/gate.S --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S Fri Jan 19 14:48:57 2007 +0000 @@ -55,18 +55,6 @@ #define LOAD_RUNNING_ON_XEN(reg) \ [1:] movl reg=0; \ .xdata4 ".data.patch.running_on_xen", 1b-. - - .section ".data.patch.brl_xen_rsm_be_i", "a" - .previous -#define BRL_COND_XEN_RSM_BE_I(pr) \ -[1:](pr)brl.cond.sptk 0; \ - .xdata4 ".data.patch.brl_xen_rsm_be_i", 1b-. - - .section ".data.patch.brl_xen_get_psr", "a" - .previous -#define BRL_COND_XEN_GET_PSR(pr) \ -[1:](pr)brl.cond.sptk 0; \ - .xdata4 ".data.patch.brl_xen_get_psr", 1b-. .section ".data.patch.brl_xen_ssm_i_0", "a" .previous @@ -155,9 +143,9 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) cmp.ne isXen,isRaw=r0,r30 ;; (isRaw) rsm psr.be | psr.i - BRL_COND_XEN_RSM_BE_I(isXen) - .global .vdso_rsm_be_i_ret -.vdso_rsm_be_i_ret: +(isXen) st1 [r22]=r20 +(isXen) rum psr.be + ;; #else rsm psr.be | psr.i // M2 (5 cyc to srlz.d) #endif @@ -170,9 +158,9 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) lfetch [r18] // M0|1 #ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT (isRaw) mov r29=psr - BRL_COND_XEN_GET_PSR(isXen) - .global .vdso_get_psr_ret -.vdso_get_psr_ret: +(isXen) XEN_HYPER_GET_PSR + ;; +(isXen) mov r29=r8 #else mov r29=psr // M2 (12 cyc) #endif diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S Fri Jan 19 14:48:57 2007 +0000 @@ -48,14 +48,6 @@ SECTIONS __start_gate_running_on_xen_patchlist = .; *(.data.patch.running_on_xen) __end_gate_running_on_xen_patchlist = .; - - __start_gate_brl_xen_rsm_be_i_patchlist = .; - *(.data.patch.brl_xen_rsm_be_i) - __end_gate_brl_xen_rsm_be_i_patchlist = .; - - __start_gate_brl_xen_get_psr_patchlist = .; - *(.data.patch.brl_xen_get_psr) - __end_gate_brl_xen_get_psr_patchlist = .; __start_gate_brl_xen_ssm_i_0_patchlist = .; *(.data.patch.brl_xen_ssm_i_0) diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/kernel/patch.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c Fri Jan 19 14:48:57 2007 +0000 @@ -236,13 +236,9 @@ static void static void patch_brl_in_vdso(void) { - EXTERN_PATCHLIST(xen_rsm_be_i); - EXTERN_PATCHLIST(xen_get_psr); EXTERN_PATCHLIST(xen_ssm_i_0); EXTERN_PATCHLIST(xen_ssm_i_1); - PATCH_BRL_SYMADDR(xen_rsm_be_i); - PATCH_BRL_SYMADDR(xen_get_psr); PATCH_BRL_SYMADDR(xen_ssm_i_0); PATCH_BRL_SYMADDR(xen_ssm_i_1); } diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Fri Jan 19 14:48:57 2007 +0000 @@ -76,10 +76,18 @@ EXPORT_SYMBOL(__per_cpu_offset); #endif #ifdef CONFIG_XEN +static void +xen_panic_hypercall(struct unw_frame_info *info, void *arg) +{ + current->thread.ksp = (__u64)info->sw - 16; + HYPERVISOR_shutdown(SHUTDOWN_crash); + /* we're never actually going to get here... */ +} + static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - HYPERVISOR_shutdown(SHUTDOWN_crash); + unw_init_running(xen_panic_hypercall, NULL); /* we're never actually going to get here... */ return NOTIFY_DONE; } diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Fri Jan 19 14:48:57 2007 +0000 @@ -8,336 +8,110 @@ #include <asm/processor.h> #include <asm/asmmacro.h> -/* To clear vpsr.ic, vpsr.i needs to be cleared first */ -#define XEN_CLEAR_PSR_IC \ - mov r14=1; \ - movl r15=XSI_PSR_I_ADDR; \ - movl r2=XSI_PSR_IC; \ - ;; \ - ld8 r15=[r15]; \ - ld4 r3=[r2]; \ - ;; \ - ld1 r16=[r15]; \ - ;; \ - st1 [r15]=r14; \ - st4 [r2]=r0; \ - ;; - -/* First restore vpsr.ic, and then vpsr.i */ -#define XEN_RESTORE_PSR_IC \ - st4 [r2]=r3; \ - st1 [r15]=r16; \ - ;; +GLOBAL_ENTRY(xen_get_psr) + XEN_HYPER_GET_PSR + br.ret.sptk.many rp + ;; +END(xen_get_psr) GLOBAL_ENTRY(xen_get_ivr) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov r8=cr.ivr;; -(p7) br.ret.sptk.many rp - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_GET_IVR - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_get_ivr) GLOBAL_ENTRY(xen_get_tpr) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov r8=cr.tpr;; -(p7) br.ret.sptk.many rp - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_GET_TPR - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_get_tpr) GLOBAL_ENTRY(xen_set_tpr) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov cr.tpr=r32;; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_SET_TPR - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_set_tpr) GLOBAL_ENTRY(xen_eoi) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov cr.eoi=r0;; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_EOI - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_eoi) GLOBAL_ENTRY(xen_thash) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) thash r8=r32;; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_THASH - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_thash) GLOBAL_ENTRY(xen_set_itm) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov cr.itm=r32;; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_SET_ITM - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_set_itm) GLOBAL_ENTRY(xen_ptcga) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) ptc.ga r32,r33;; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 mov r9=r33 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_PTC_GA - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_ptcga) GLOBAL_ENTRY(xen_get_rr) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov r8=rr[r32];; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_GET_RR - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_get_rr) GLOBAL_ENTRY(xen_set_rr) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov rr[r32]=r33;; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 mov r9=r33 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_SET_RR - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp ;; END(xen_set_rr) GLOBAL_ENTRY(xen_set_kr) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.ne p7,p0=r8,r0;; -(p7) br.cond.spnt.few 1f; - ;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar0=r9 -(p7) br.ret.sptk.many rp;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar1=r9 -(p7) br.ret.sptk.many rp;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar2=r9 -(p7) br.ret.sptk.many rp;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar3=r9 -(p7) br.ret.sptk.many rp;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar4=r9 -(p7) br.ret.sptk.many rp;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar5=r9 -(p7) br.ret.sptk.many rp;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar6=r9 -(p7) br.ret.sptk.many rp;; - cmp.eq p7,p0=r8,r0 - adds r8=-1,r8;; -(p7) mov ar7=r9 -(p7) br.ret.sptk.many rp;; - -1: mov r8=r32 + mov r8=r32 mov r9=r33 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_SET_KR - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp END(xen_set_kr) GLOBAL_ENTRY(xen_fc) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) fc r32;; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_FC - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp END(xen_fc) GLOBAL_ENTRY(xen_get_cpuid) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov r8=cpuid[r32];; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_GET_CPUID - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp END(xen_get_cpuid) GLOBAL_ENTRY(xen_get_pmd) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov r8=pmd[r32];; -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_GET_PMD - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp END(xen_get_pmd) #ifdef CONFIG_IA32_SUPPORT GLOBAL_ENTRY(xen_get_eflag) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov r8=ar24;; -(p7) br.ret.sptk.many rp - ;; - mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_GET_EFLAG - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp END(xen_get_eflag) // some bits aren't set if pl!=0, see SDM vol1 3.1.8 GLOBAL_ENTRY(xen_set_eflag) - movl r8=running_on_xen;; - ld4 r8=[r8];; - cmp.eq p7,p0=r8,r0;; -(p7) mov ar24=r32 -(p7) br.ret.sptk.many rp - ;; mov r8=r32 - ;; - XEN_CLEAR_PSR_IC - ;; XEN_HYPER_SET_EFLAG - ;; - XEN_RESTORE_PSR_IC - ;; br.ret.sptk.many rp END(xen_set_eflag) #endif @@ -355,27 +129,6 @@ END(xen_send_ipi) #ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT // Those are vdso specialized. // In fsys mode, call, ret can't be used. -GLOBAL_ENTRY(xen_rsm_be_i) - st1 [r22]=r20 - st4 [r23]=r0 - XEN_HYPER_RSM_BE - st4 [r23]=r20 - brl.cond.sptk .vdso_rsm_be_i_ret - ;; -END(xen_rsm_be_i) - -GLOBAL_ENTRY(xen_get_psr) - mov r31=r8 - mov r25=IA64_PSR_IC - st4 [r23]=r0 - XEN_HYPER_GET_PSR - ;; - st4 [r23]=r20 - or r29=r8,r25 // vpsr.ic was cleared for hyperprivop - mov r8=r31 - brl.cond.sptk .vdso_get_psr_ret - ;; -END(xen_get_psr) // see xen_ssm_i() in privop.h // r22 = &vcpu->vcpu_info->evtchn_upcall_mask @@ -395,7 +148,6 @@ END(xen_get_psr) (p14) cmp.ne.unc p11,p0=r0,r25; \ ;; \ (p11) st1 [r22]=r20; \ -(p11) st4 [r23]=r0; \ (p11) XEN_HYPER_SSM_I; GLOBAL_ENTRY(xen_ssm_i_0) @@ -409,4 +161,11 @@ GLOBAL_ENTRY(xen_ssm_i_1) brl.cond.sptk .vdso_ssm_i_1_ret ;; END(xen_ssm_i_1) + +GLOBAL_ENTRY(__hypercall) + mov r2=r37 + break 0x1000 + br.ret.sptk.many b0 + ;; +END(__hypercall) #endif diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Fri Jan 19 14:48:57 2007 +0000 @@ -25,7 +25,10 @@ #include <linux/bootmem.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/efi.h> #include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/meminit.h> #include <asm/hypervisor.h> #include <asm/hypercall.h> #include <xen/interface/memory.h> @@ -45,6 +48,8 @@ static int p2m_expose_init(void); #else #define p2m_expose_init() (-ENOSYS) #endif + +EXPORT_SYMBOL(__hypercall); //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear() // move those to lib/contiguous_bitmap? @@ -56,13 +61,90 @@ static int p2m_expose_init(void); */ unsigned long *contiguous_bitmap; +#ifdef CONFIG_VIRTUAL_MEM_MAP +/* Following logic is stolen from create_mem_map_table() for virtual memmap */ +static int +create_contiguous_bitmap(u64 start, u64 end, void *arg) +{ + unsigned long address, start_page, end_page; + unsigned long bitmap_start, bitmap_end; + unsigned char *bitmap; + int node; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + bitmap_start = (unsigned long)contiguous_bitmap + + ((__pa(start) >> PAGE_SHIFT) >> 3); + bitmap_end = (unsigned long)contiguous_bitmap + + (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3); + + start_page = bitmap_start & PAGE_MASK; + end_page = PAGE_ALIGN(bitmap_end); + node = paddr_to_nid(__pa(start)); + + bitmap = alloc_bootmem_pages_node(NODE_DATA(node), + end_page - start_page); + BUG_ON(!bitmap); + memset(bitmap, 0, end_page - start_page); + + for (address = start_page; address < end_page; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, + alloc_bootmem_pages_node(NODE_DATA(node), + PAGE_SIZE)); + pud = pud_offset(pgd, address); + + if (pud_none(*pud)) + pud_populate(&init_mm, pud, + alloc_bootmem_pages_node(NODE_DATA(node), + PAGE_SIZE)); + pmd = pmd_offset(pud, address); + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, + alloc_bootmem_pages_node + (NODE_DATA(node), PAGE_SIZE)); + pte = pte_offset_kernel(pmd, address); + + if (pte_none(*pte)) + set_pte(pte, + pfn_pte(__pa(bitmap + (address - start_page)) + >> PAGE_SHIFT, PAGE_KERNEL)); + } + return 0; +} +#endif + +static void +__contiguous_bitmap_init(unsigned long size) +{ + contiguous_bitmap = alloc_bootmem_pages(size); + BUG_ON(!contiguous_bitmap); + memset(contiguous_bitmap, 0, size); +} + void contiguous_bitmap_init(unsigned long end_pfn) { unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3; - contiguous_bitmap = alloc_bootmem_low_pages(size); - BUG_ON(!contiguous_bitmap); - memset(contiguous_bitmap, 0, size); +#ifndef CONFIG_VIRTUAL_MEM_MAP + __contiguous_bitmap_init(size); +#else + unsigned long max_gap = 0; + + efi_memmap_walk(find_largest_hole, (u64*)&max_gap); + if (max_gap < LARGE_GAP) { + __contiguous_bitmap_init(size); + } else { + unsigned long map_size = PAGE_ALIGN(size); + vmalloc_end -= map_size; + contiguous_bitmap = (unsigned long*)vmalloc_end; + efi_memmap_walk(create_contiguous_bitmap, NULL); + } +#endif } #if 0 diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Fri Jan 19 14:48:57 2007 +0000 @@ -59,12 +59,6 @@ GLOBAL_ENTRY(ia64_switch_to) shr.u r26=r20,IA64_GRANULE_SHIFT cmp.eq p7,p6=r25,in0 ;; -#ifdef CONFIG_XEN - movl r8=XSI_PSR_IC - ;; - st4 [r8]=r0 // force psr.ic off for hyperprivop(s) - ;; -#endif /* * If we've already mapped this task's page, we can skip doing it again. */ @@ -72,19 +66,13 @@ GLOBAL_ENTRY(ia64_switch_to) (p6) br.cond.dpnt .map ;; .done: -#ifdef CONFIG_XEN - // psr.ic already off + ld8 sp=[r21] // load kernel stack pointer of new task +#ifdef CONFIG_XEN // update "current" application register mov r8=IA64_KR_CURRENT mov r9=in0;; XEN_HYPER_SET_KR - ld8 sp=[r21] // load kernel stack pointer of new task - movl r27=XSI_PSR_IC - mov r8=1 - ;; - st4 [r27]=r8 // psr.ic back on -#else - ld8 sp=[r21] // load kernel stack pointer of new task +#else mov IA64_KR(CURRENT)=in0 // update "current" application register #endif mov r8=r13 // return pointer to previously running task @@ -99,7 +87,10 @@ GLOBAL_ENTRY(ia64_switch_to) .map: #ifdef CONFIG_XEN - // psr.ic already off + movl r25=XSI_PSR_IC // clear psr.ic + ;; + st4 [r25]=r0 + ;; #else rsm psr.ic // interrupts (psr.i) are already disabled here #endif @@ -132,7 +123,13 @@ GLOBAL_ENTRY(ia64_switch_to) #endif ;; itr.d dtr[r25]=r23 // wire in new mapping... -#ifndef CONFIG_XEN +#ifdef CONFIG_XEN + mov r9=1 + movl r8=XSI_PSR_IC + ;; + st4 [r8]=r9 + ;; +#else ssm psr.ic // reenable the psr.ic bit ;; srlz.d @@ -415,7 +412,16 @@ ENTRY(ia64_leave_syscall) (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs +#ifdef CONFIG_XEN +(pKStk) mov r21=r8 +(pKStk) XEN_HYPER_GET_PSR + ;; +(pKStk) mov r22=r8 +(pKStk) mov r8=r21 + ;; +#else (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled +#endif nop 0 ;; ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 @@ -645,7 +651,16 @@ GLOBAL_ENTRY(ia64_leave_kernel) adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 +#ifdef CONFIG_XEN +(pKStk) mov r29=r8 +(pKStk) XEN_HYPER_GET_PSR + ;; +(pKStk) mov r22=r8 +(pKStk) mov r8=r29 + ;; +#else (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled +#endif nop.i 0 nop.i 0 ;; diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Fri Jan 19 14:48:57 2007 +0000 @@ -709,11 +709,23 @@ ENTRY(page_fault) ;; #endif #ifdef CONFIG_XEN - br.cond.sptk.many xen_page_fault - ;; -done_xen_page_fault: -#endif + +#define MASK_TO_PEND_OFS (-1) + +(p15) movl r14=XSI_PSR_I_ADDR + ;; +(p15) ld8 r14=[r14] + ;; +(p15) st1 [r14]=r0,MASK_TO_PEND_OFS // if (p15) vpsr.i = 1 + ;; // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0 +(p15) ld1 r14=[r14] // if (vcpu->vcpu_info->evtchn_upcall_pending) + ;; +(p15) cmp.ne p15,p0=r14,r0 + ;; +(p15) XEN_HYPER_SSM_I +#else (p15) ssm psr.i // restore psr.i +#endif movl r14=ia64_leave_kernel ;; SAVE_REST @@ -729,25 +741,6 @@ ENTRY(dkey_miss) ENTRY(dkey_miss) DBG_FAULT(7) FAULT(7) -#ifdef CONFIG_XEN - // Leaving this code inline above results in an IVT section overflow - // There is no particular reason for this code to be here... -xen_page_fault: -(p15) movl r3=XSI_PSR_I_ADDR - ;; -(p15) ld8 r3=[r3] - ;; -(p15) st1 [r3]=r0,-1 // if (p15) vpsr.i = 1 - mov r14=r0 - ;; -(p15) ld1 r14=[r3] // if (pending_events) - adds r3=8,r2 // re-set up second base pointer - ;; -(p15) cmp.ne p15,p0=r14,r0 - ;; - br.cond.sptk.many done_xen_page_fault - ;; -#endif END(dkey_miss) .org ia64_ivt+0x2000 @@ -1170,14 +1163,13 @@ 1: #ifdef CONFIG_XEN (p15) ld8 r16=[r16] // vpsr.i ;; -(p15) st1 [r16]=r0,-1 // if (p15) vpsr.i = 1 - mov r2=r0 - ;; -(p15) ld1 r2=[r16] // if (pending_events) - ;; - cmp.ne p6,p0=r2,r0 - ;; -(p6) ssm psr.i // do a real ssm psr.i +(p15) st1 [r16]=r0,MASK_TO_PEND_OFS // if (p15) vpsr.i = 1 + ;; // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0 +(p15) ld1 r2=[r16] // if (vcpu->vcpu_info->evtchn_upcall_pending) + ;; +(p15) cmp.ne.unc p6,p0=r2,r0 + ;; +(p6) XEN_HYPER_SSM_I // do a real ssm psr.i #else (p15) ssm psr.i // M2 restore psr.i #endif diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h Fri Jan 19 14:48:57 2007 +0000 @@ -151,16 +151,7 @@ .mem.offset 8,0; st8.spill [r17]=r11,24; \ ;; \ /* xen special handling for possibly lazy cover */ \ - movl r8=XSI_INCOMPL_REGFR; \ - ;; \ - ld4 r30=[r8]; \ - ;; \ - /* set XSI_INCOMPL_REGFR 0 */ \ - st4 [r8]=r0; \ - cmp.eq p6,p7=r30,r0; \ - ;; /* not sure if this stop bit is necessary */ \ -(p6) adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8; \ -(p7) adds r8=XSI_IFS-XSI_INCOMPL_REGFR,r8; \ + movl r8=XSI_PRECOVER_IFS; \ ;; \ ld8 r30=[r8]; \ ;; \ diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S Fri Jan 19 14:48:57 2007 +0000 @@ -33,7 +33,16 @@ 1: { mov loc4=ar.rsc // save RSE configuration ;; mov ar.rsc=0 // put RSE in enforced lazy, LE mode +#ifdef CONFIG_XEN + mov r9 = r8 + XEN_HYPER_GET_PSR + ;; + mov loc3 = r8 + mov r8 = r9 + ;; +#else mov loc3 = psr +#endif mov loc0 = rp .body mov r30 = in2 @@ -41,16 +50,16 @@ 1: { #ifdef CONFIG_XEN // this is low priority for paravirtualization, but is called // from the idle loop so confuses privop counting - movl r31=XSI_PSR_IC + movl r31=XSI_PSR_I_ADDR ;; -(p6) st4 [r31]=r0 + ld8 r31=[r31] + mov r22=1 ;; -(p7) adds r31=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r31 -(p7) mov r22=1 + st1 [r31]=r22 + ;; +(p6) movl r31=XSI_PSR_IC ;; -(p7) ld8 r31=[r31] - ;; -(p7) st1 [r31]=r22 +(p6) st4.rel [r31]=r0 ;; mov r31 = in3 mov b7 = loc2 diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Fri Jan 19 14:48:57 2007 +0000 @@ -38,17 +38,16 @@ END(early_xen_setup) /* Stub for suspend. Just force the stacked registers to be written in memory. */ GLOBAL_ENTRY(xencomm_arch_hypercall_suspend) - mov r15=r32 ;; - alloc r20=ar.pfs,0,0,0,0 + alloc r20=ar.pfs,0,0,6,0 mov r2=__HYPERVISOR_sched_op ;; /* We don't want to deal with RSE. */ flushrs - mov r14=2 // SCHEDOP_shutdown + mov r33=r32 + mov r32=2 // SCHEDOP_shutdown ;; break 0x1000 ;; - mov ar.pfs=r20 br.ret.sptk.many b0 END(xencomm_arch_hypercall_suspend) diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Jan 19 14:48:57 2007 +0000 @@ -238,14 +238,6 @@ config XEN_COMPAT_030002 endmenu -config HAVE_ARCH_ALLOC_SKB - bool - default y - -config HAVE_ARCH_DEV_ALLOC_SKB - bool - default y - config HAVE_IRQ_IGNORE_UNHANDLED bool default y diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Fri Jan 19 14:48:57 2007 +0000 @@ -39,6 +39,9 @@ #include <asm/xen/xcom_hcall.h> struct xencomm_handle; +extern unsigned long __hypercall(unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long cmd); /* * Assembler stubs for hyper-calls. @@ -47,115 +50,58 @@ struct xencomm_handle; #define _hypercall0(type, name) \ ({ \ long __res; \ - __asm__ __volatile__ (";;\n" \ - "mov r2=%1\n" \ - "break 0x1000 ;;\n" \ - "mov %0=r8 ;;\n" \ - : "=r" (__res) \ - : "J" (__HYPERVISOR_##name) \ - : "r2","r8", \ - "memory" ); \ + __res=__hypercall(0, 0, 0, 0, 0, __HYPERVISOR_##name); \ (type)__res; \ }) #define _hypercall1(type, name, a1) \ ({ \ long __res; \ - __asm__ __volatile__ (";;\n" \ - "mov r14=%2\n" \ - "mov r2=%1\n" \ - "break 0x1000 ;;\n" \ - "mov %0=r8 ;;\n" \ - : "=r" (__res) \ - : "J" (__HYPERVISOR_##name), \ - "rI" ((unsigned long)(a1)) \ - : "r14","r2","r8", \ - "memory" ); \ + __res = __hypercall((unsigned long)a1, \ + 0, 0, 0, 0, __HYPERVISOR_##name); \ (type)__res; \ }) #define _hypercall2(type, name, a1, a2) \ ({ \ long __res; \ - __asm__ __volatile__ (";;\n" \ - "mov r14=%2\n" \ - "mov r15=%3\n" \ - "mov r2=%1\n" \ - "break 0x1000 ;;\n" \ - "mov %0=r8 ;;\n" \ - : "=r" (__res) \ - : "J" (__HYPERVISOR_##name), \ - "rI" ((unsigned long)(a1)), \ - "rI" ((unsigned long)(a2)) \ - : "r14","r15","r2","r8", \ - "memory" ); \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + 0, 0, 0, __HYPERVISOR_##name); \ (type)__res; \ }) #define _hypercall3(type, name, a1, a2, a3) \ ({ \ long __res; \ - __asm__ __volatile__ (";;\n" \ - "mov r14=%2\n" \ - "mov r15=%3\n" \ - "mov r16=%4\n" \ - "mov r2=%1\n" \ - "break 0x1000 ;;\n" \ - "mov %0=r8 ;;\n" \ - : "=r" (__res) \ - : "J" (__HYPERVISOR_##name), \ - "rI" ((unsigned long)(a1)), \ - "rI" ((unsigned long)(a2)), \ - "rI" ((unsigned long)(a3)) \ - : "r14","r15","r16","r2","r8", \ - "memory" ); \ - (type)__res; \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + (unsigned long)a3, \ + 0, 0, __HYPERVISOR_##name); \ + (type)__res; \ }) #define _hypercall4(type, name, a1, a2, a3, a4) \ ({ \ long __res; \ - __asm__ __volatile__ (";;\n" \ - "mov r14=%2\n" \ - "mov r15=%3\n" \ - "mov r16=%4\n" \ - "mov r17=%5\n" \ - "mov r2=%1\n" \ - "break 0x1000 ;;\n" \ - "mov %0=r8 ;;\n" \ - : "=r" (__res) \ - : "J" (__HYPERVISOR_##name), \ - "rI" ((unsigned long)(a1)), \ - "rI" ((unsigned long)(a2)), \ - "rI" ((unsigned long)(a3)), \ - "rI" ((unsigned long)(a4)) \ - : "r14","r15","r16","r2","r8", \ - "r17","memory" ); \ - (type)__res; \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + (unsigned long)a3, \ + (unsigned long)a4, \ + 0, __HYPERVISOR_##name); \ + (type)__res; \ }) #define _hypercall5(type, name, a1, a2, a3, a4, a5) \ ({ \ long __res; \ - __asm__ __volatile__ (";;\n" \ - "mov r14=%2\n" \ - "mov r15=%3\n" \ - "mov r16=%4\n" \ - "mov r17=%5\n" \ - "mov r18=%6\n" \ - "mov r2=%1\n" \ - "break 0x1000 ;;\n" \ - "mov %0=r8 ;;\n" \ - : "=r" (__res) \ - : "J" (__HYPERVISOR_##name), \ - "rI" ((unsigned long)(a1)), \ - "rI" ((unsigned long)(a2)), \ - "rI" ((unsigned long)(a3)), \ - "rI" ((unsigned long)(a4)), \ - "rI" ((unsigned long)(a5)) \ - : "r14","r15","r16","r2","r8", \ - "r17","r18","memory" ); \ - (type)__res; \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + (unsigned long)a3, \ + (unsigned long)a4, \ + (unsigned long)a5, \ + __HYPERVISOR_##name); \ + (type)__res; \ }) diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Fri Jan 19 14:48:57 2007 +0000 @@ -215,7 +215,8 @@ asmlinkage int xprintk(const char *fmt, #endif /* CONFIG_XEN || CONFIG_VMX_GUEST */ #ifdef CONFIG_XEN_PRIVILEGED_GUEST -#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN) +#define is_initial_xendomain() \ + (is_running_on_xen() ? xen_start_info->flags & SIF_INITDOMAIN : 0) #else #define is_initial_xendomain() 0 #endif diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/asm-ia64/maddr.h --- a/linux-2.6-xen-sparse/include/asm-ia64/maddr.h Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-ia64/maddr.h Fri Jan 19 14:48:57 2007 +0000 @@ -68,7 +68,6 @@ static inline unsigned long static inline unsigned long mfn_to_local_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn = mfn_to_pfn_for_dma(mfn); if (!pfn_valid(pfn)) return INVALID_P2M_ENTRY; diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/asm-ia64/page.h --- a/linux-2.6-xen-sparse/include/asm-ia64/page.h Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h Fri Jan 19 14:48:57 2007 +0000 @@ -119,6 +119,7 @@ extern struct page *vmem_map; #endif #ifdef CONFIG_FLATMEM +extern unsigned long max_mapnr; # define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn)) #elif defined(CONFIG_DISCONTIGMEM) extern unsigned long min_low_pfn; diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h --- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Fri Jan 19 14:48:57 2007 +0000 @@ -45,12 +45,10 @@ #define XEN_HYPER_GET_PMD break HYPERPRIVOP_GET_PMD #define XEN_HYPER_GET_EFLAG break HYPERPRIVOP_GET_EFLAG #define XEN_HYPER_SET_EFLAG break HYPERPRIVOP_SET_EFLAG -#define XEN_HYPER_RSM_BE break HYPERPRIVOP_RSM_BE #define XEN_HYPER_GET_PSR break HYPERPRIVOP_GET_PSR #define XSI_IFS (XSI_BASE + XSI_IFS_OFS) #define XSI_PRECOVER_IFS (XSI_BASE + XSI_PRECOVER_IFS_OFS) -#define XSI_INCOMPL_REGFR (XSI_BASE + XSI_INCOMPL_REGFR_OFS) #define XSI_IFA (XSI_BASE + XSI_IFA_OFS) #define XSI_ISR (XSI_BASE + XSI_ISR_OFS) #define XSI_IIM (XSI_BASE + XSI_IIM_OFS) @@ -123,8 +121,6 @@ extern void xen_set_eflag(unsigned long) * that we inline it */ #define xen_hyper_ssm_i() \ ({ \ - xen_set_virtual_psr_i(0); \ - xen_set_virtual_psr_ic(0); \ XEN_HYPER_SSM_I; \ }) @@ -139,8 +135,12 @@ extern void xen_set_eflag(unsigned long) #define xen_ssm_i() \ ({ \ int old = xen_get_virtual_psr_i(); \ - xen_set_virtual_psr_i(1); \ - if (!old && xen_get_virtual_pend()) xen_hyper_ssm_i(); \ + if (!old) { \ + if (xen_get_virtual_pend()) \ + xen_hyper_ssm_i(); \ + else \ + xen_set_virtual_psr_i(1); \ + } \ }) #define xen_ia64_intrin_local_irq_restore(x) \ @@ -182,6 +182,7 @@ extern void xen_set_eflag(unsigned long) * be properly handled by Xen, some are frequent enough that we use * hyperprivops for performance. */ +extern unsigned long xen_get_psr(void); extern unsigned long xen_get_ivr(void); extern unsigned long xen_get_tpr(void); extern void xen_set_itm(unsigned long); @@ -201,6 +202,11 @@ extern void xen_ptcga(unsigned long addr __u64 ia64_intri_res; \ \ switch(regnum) { \ + case _IA64_REG_PSR: \ + ia64_intri_res = (is_running_on_xen()) ? \ + xen_get_psr() : \ + __ia64_getreg(regnum); \ + break; \ case _IA64_REG_CR_IVR: \ ia64_intri_res = (is_running_on_xen()) ? \ xen_get_ivr() : \ diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/linux/skbuff.h --- a/linux-2.6-xen-sparse/include/linux/skbuff.h Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/include/linux/skbuff.h Fri Jan 19 14:48:57 2007 +0000 @@ -353,8 +353,7 @@ static inline struct sk_buff *alloc_skb_ extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - gfp_t priority, - int fclone); + gfp_t priority); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/net/core/skbuff.c --- a/linux-2.6-xen-sparse/net/core/skbuff.c Thu Jan 18 15:18:07 2007 +0000 +++ b/linux-2.6-xen-sparse/net/core/skbuff.c Fri Jan 19 14:48:57 2007 +0000 @@ -210,18 +210,14 @@ nodata: */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - gfp_t gfp_mask, - int fclone) -{ - kmem_cache_t *cache; - struct skb_shared_info *shinfo; + gfp_t gfp_mask) +{ struct sk_buff *skb; u8 *data; - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; - /* Get the HEAD */ - skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); if (!skb) goto out; @@ -238,29 +234,18 @@ struct sk_buff *alloc_skb_from_cache(kme skb->data = data; skb->tail = data; skb->end = data + size; - /* make sure we initialize shinfo sequentially */ - shinfo = skb_shinfo(skb); - atomic_set(&shinfo->dataref, 1); - shinfo->nr_frags = 0; - shinfo->gso_size = 0; - shinfo->gso_segs = 0; - shinfo->gso_type = 0; - shinfo->ip6_frag_id = 0; - shinfo->frag_list = NULL; - - if (fclone) { - struct sk_buff *child = skb + 1; - atomic_t *fclone_ref = (atomic_t *) (child + 1); - - skb->fclone = SKB_FCLONE_ORIG; - atomic_set(fclone_ref, 1); - - child->fclone = SKB_FCLONE_UNAVAILABLE; - } + + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->ip6_frag_id = 0; + skb_shinfo(skb)->frag_list = NULL; out: return skb; nodata: - kmem_cache_free(cache, skb); + kmem_cache_free(skbuff_head_cache, skb); skb = NULL; goto out; } diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/cirrus_vga.c --- a/tools/ioemu/hw/cirrus_vga.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/ioemu/hw/cirrus_vga.c Fri Jan 19 14:48:57 2007 +0000 @@ -3010,11 +3010,44 @@ static CPUWriteMemoryFunc *cirrus_mmio_w cirrus_mmio_writel, }; +void cirrus_stop_acc(CirrusVGAState *s) +{ + if (s->map_addr){ + int error; + s->map_addr = 0; + error = unset_vram_mapping(s->cirrus_lfb_addr, + s->cirrus_lfb_end); + fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n"); + + munmap(s->vram_ptr, VGA_RAM_SIZE); + } +} + +void cirrus_restart_acc(CirrusVGAState *s) +{ + if (s->cirrus_lfb_addr && s->cirrus_lfb_end) { + void *vram_pointer, *old_vram; + fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, lfb_end=0x%lx.\n", + s->cirrus_lfb_addr, s->cirrus_lfb_end); + vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end); + if (!vram_pointer){ + fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n"); + } else { + old_vram = vga_update_vram((VGAState *)s, vram_pointer, + VGA_RAM_SIZE); + qemu_free(old_vram); + s->map_addr = s->cirrus_lfb_addr; + s->map_end = s->cirrus_lfb_end; + } + } +} + /* load/save state */ static void cirrus_vga_save(QEMUFile *f, void *opaque) { CirrusVGAState *s = opaque; + uint8_t vga_acc; qemu_put_be32s(f, &s->latch); qemu_put_8s(f, &s->sr_index); @@ -3049,11 +3082,20 @@ static void cirrus_vga_save(QEMUFile *f, qemu_put_be32s(f, &s->hw_cursor_y); /* XXX: we do not save the bitblt state - we assume we do not save the state when the blitter is active */ + + vga_acc = (!!s->map_addr); + qemu_put_8s(f, &vga_acc); + qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr); + qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end); + qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); + if (vga_acc) + cirrus_stop_acc(s); } static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id) { CirrusVGAState *s = opaque; + uint8_t vga_acc = 0; if (version_id != 1) return -EINVAL; @@ -3091,6 +3133,14 @@ static int cirrus_vga_load(QEMUFile *f, qemu_get_be32s(f, &s->hw_cursor_x); qemu_get_be32s(f, &s->hw_cursor_y); + + qemu_get_8s(f, &vga_acc); + qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr); + qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end); + qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); + if (vga_acc){ + cirrus_restart_acc(s); + } /* force refresh */ s->graphic_mode = -1; @@ -3297,6 +3347,8 @@ void pci_cirrus_vga_init(PCIBus *bus, Di ds, vga_ram_base, vga_ram_offset, vga_ram_size); cirrus_init_common(s, device_id, 1); + register_savevm("cirrus_vga_pci", 0, 1, generic_pci_save, generic_pci_load, d); + /* setup memory space */ /* memory #0 LFB */ /* memory #1 memory-mapped I/O */ diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/ide.c --- a/tools/ioemu/hw/ide.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/ioemu/hw/ide.c Fri Jan 19 14:48:57 2007 +0000 @@ -2512,6 +2512,9 @@ void pci_piix3_ide_init(PCIBus *bus, Blo pic_set_irq_new, isa_pic, 15); ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6); ide_init_ioport(&d->ide_if[2], 0x170, 0x376); + + register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d); + #ifdef DMA_MULTI_THREAD dma_create_thread(); #endif //DMA_MULTI_THREAD diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/pci.c --- a/tools/ioemu/hw/pci.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/ioemu/hw/pci.c Fri Jan 19 14:48:57 2007 +0000 @@ -40,6 +40,8 @@ static int pci_irq_index; static int pci_irq_index; static PCIBus *first_bus; +static void pci_update_mappings(PCIDevice *d); + PCIBus *pci_register_bus(pci_set_irq_fn set_irq, void *pic, int devfn_min) { PCIBus *bus; @@ -71,6 +73,7 @@ int generic_pci_load(QEMUFile* f, void * return -EINVAL; qemu_get_buffer(f, s->config, 256); + pci_update_mappings(s); return 0; } diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/ioemu/target-i386-dm/helper2.c Fri Jan 19 14:48:57 2007 +0000 @@ -546,6 +546,7 @@ int main_loop(void) { extern int vm_running; extern int shutdown_requested; + extern int suspend_requested; CPUState *env = cpu_single_env; int evtchn_fd = xc_evtchn_fd(xce_handle); @@ -563,12 +564,24 @@ int main_loop(void) qemu_system_reset(); reset_requested = 0; } + if (suspend_requested) { + fprintf(logfile, "device model received suspend signal!\n"); + break; + } } /* Wait up to 10 msec. */ main_loop_wait(10); } - destroy_hvm_domain(); + if (!suspend_requested) + destroy_hvm_domain(); + else { + char qemu_file[20]; + sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid); + if (qemu_savevm(qemu_file) < 0) + fprintf(stderr, "qemu save fail.\n"); + } + return 0; } diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/vl.c --- a/tools/ioemu/vl.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/ioemu/vl.c Fri Jan 19 14:48:57 2007 +0000 @@ -4441,6 +4441,11 @@ int qemu_loadvm(const char *filename) qemu_fseek(f, cur_pos + record_len, SEEK_SET); } fclose(f); + + /* del tmp file */ + if (unlink(filename) == -1) + fprintf(stderr, "delete tmp qemu state file failed.\n"); + ret = 0; the_end: if (saved_vm_running) @@ -5027,6 +5032,7 @@ static QEMUResetEntry *first_reset_entry static QEMUResetEntry *first_reset_entry; int reset_requested; int shutdown_requested; +int suspend_requested; static int powerdown_requested; void qemu_register_reset(QEMUResetHandler *func, void *opaque) @@ -5806,6 +5812,14 @@ int set_mm_mapping(int xc_handle, uint32 } return 0; +} + +void suspend(int sig) +{ + fprintf(logfile, "suspend sig handler called with requested=%d!\n", suspend_requested); + if (sig != SIGUSR1) + fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig); + suspend_requested = 1; } #if defined(__i386__) || defined(__x86_64__) @@ -6464,10 +6478,6 @@ int main(int argc, char **argv) } #if defined (__ia64__) - /* ram_size passed from xend has added on GFW memory, - so we must subtract it here */ - ram_size -= 16 * MEM_M; - if (ram_size > MMIO_START) ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */ #endif @@ -6718,6 +6728,26 @@ int main(int argc, char **argv) vm_start(); } } + + /* register signal for the suspend request when save */ + { + struct sigaction act; + sigset_t set; + act.sa_handler = suspend; + act.sa_flags = SA_RESTART; + sigemptyset(&act.sa_mask); + + sigaction(SIGUSR1, &act, NULL); + + /* control panel mask some signals when spawn qemu, need unmask here*/ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + sigaddset(&set, SIGTERM); + if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1) + fprintf(stderr, "unblock signal fail, possible issue for HVM save!\n"); + + } + main_loop(); quit_timers(); return 0; diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/libxc/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -27,7 +27,7 @@ GUEST_SRCS-$(CONFIG_X86) += xc_linux_bui GUEST_SRCS-$(CONFIG_X86) += xc_linux_build.c GUEST_SRCS-$(CONFIG_IA64) += xc_linux_build.c GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c -include $(XEN_TARGET_ARCH)/Makefile diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/ia64/xc_ia64_hvm_build.c --- a/tools/libxc/ia64/xc_ia64_hvm_build.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/libxc/ia64/xc_ia64_hvm_build.c Fri Jan 19 14:48:57 2007 +0000 @@ -569,18 +569,13 @@ setup_guest(int xc_handle, uint32_t dom, xen_pfn_t *pfn_list; shared_iopage_t *sp; void *ioreq_buffer_page; - // memsize equal to normal memory size(in configure file) + 16M - // dom_memsize will pass to xc_ia64_build_hob(), so must be subbed 16M - unsigned long dom_memsize = ((memsize - 16) << 20); - unsigned long nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT); - unsigned long normal_pages = nr_pages - GFW_PAGES; + unsigned long dom_memsize = memsize << 20; + unsigned long nr_pages = memsize << (20 - PAGE_SHIFT); unsigned long vcpus; int rc; - long i, j; + long i; DECLARE_DOMCTL; - // ROM size for guest firmware, ioreq page and xenstore page - nr_pages += 3; if ((image_size > 12 * MEM_M) || (image_size & (PAGE_SIZE - 1))) { PERROR("Guest firmware size is incorrect [%ld]?", image_size); @@ -598,20 +593,20 @@ setup_guest(int xc_handle, uint32_t dom, pfn_list[i] = i; // If normal memory > 3G. Reserve 3G ~ 4G for MMIO, GFW and others. - for (j = (MMIO_START >> PAGE_SHIFT); j < (dom_memsize >> PAGE_SHIFT); j++) - pfn_list[j] += ((1 * MEM_G) >> PAGE_SHIFT); + for (i = (MMIO_START >> PAGE_SHIFT); i < (dom_memsize >> PAGE_SHIFT); i++) + pfn_list[i] += ((1 * MEM_G) >> PAGE_SHIFT); // Allocate memory for VTI guest, up to VGA hole from 0xA0000-0xC0000. rc = xc_domain_memory_populate_physmap(xc_handle, dom, - (normal_pages > VGA_START_PAGE) ? - VGA_START_PAGE : normal_pages, + (nr_pages > VGA_START_PAGE) ? + VGA_START_PAGE : nr_pages, 0, 0, &pfn_list[0]); // We're not likely to attempt to create a domain with less than // 640k of memory, but test for completeness if (rc == 0 && nr_pages > VGA_END_PAGE) rc = xc_domain_memory_populate_physmap(xc_handle, dom, - normal_pages - VGA_END_PAGE, + nr_pages - VGA_END_PAGE, 0, 0, &pfn_list[VGA_END_PAGE]); if (rc != 0) { PERROR("Could not allocate normal memory for Vti guest.\n"); @@ -621,24 +616,22 @@ setup_guest(int xc_handle, uint32_t dom, // We allocate additional pfn for GFW and other three pages, so // the pfn_list is not contiguous. Due to this we must support // old interface xc_ia64_get_pfn_list(). - // Here i = (dom_memsize >> PAGE_SHIFT) - for (j = 0; i < nr_pages - 3; i++, j++) - pfn_list[i] = (GFW_START >> PAGE_SHIFT) + j; + for (i = 0; i < GFW_PAGES; i++) + pfn_list[i] = (GFW_START >> PAGE_SHIFT) + i; rc = xc_domain_memory_populate_physmap(xc_handle, dom, GFW_PAGES, - 0, 0, &pfn_list[normal_pages]); + 0, 0, &pfn_list[0]); if (rc != 0) { PERROR("Could not allocate GFW memory for Vti guest.\n"); goto error_out; } - // Here i = (dom_memsize >> PAGE_SHIFT) + GFW_PAGES - pfn_list[i] = IO_PAGE_START >> PAGE_SHIFT; - pfn_list[i+1] = STORE_PAGE_START >> PAGE_SHIFT; - pfn_list[i+2] = BUFFER_IO_PAGE_START >> PAGE_SHIFT; + pfn_list[0] = IO_PAGE_START >> PAGE_SHIFT; + pfn_list[1] = STORE_PAGE_START >> PAGE_SHIFT; + pfn_list[2] = BUFFER_IO_PAGE_START >> PAGE_SHIFT; rc = xc_domain_memory_populate_physmap(xc_handle, dom, 3, - 0, 0, &pfn_list[nr_pages - 3]); + 0, 0, &pfn_list[0]); if (rc != 0) { PERROR("Could not allocate IO page or store page or buffer io page.\n"); goto error_out; @@ -675,13 +668,12 @@ setup_guest(int xc_handle, uint32_t dom, goto error_out; } - xc_set_hvm_param(xc_handle, dom, - HVM_PARAM_STORE_PFN, pfn_list[nr_pages - 2]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, pfn_list[1]); // Retrieve special pages like io, xenstore, etc. sp = (shared_iopage_t *)xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - pfn_list[nr_pages - 3]); + pfn_list[0]); if (sp == 0) goto error_out; @@ -689,7 +681,7 @@ setup_guest(int xc_handle, uint32_t dom, munmap(sp, PAGE_SIZE); ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - pfn_list[nr_pages - 1]); + pfn_list[2]); memset(ioreq_buffer_page,0,PAGE_SIZE); munmap(ioreq_buffer_page, PAGE_SIZE); free(pfn_list); diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/libxc/xc_domain.c Fri Jan 19 14:48:57 2007 +0000 @@ -233,6 +233,50 @@ int xc_domain_getinfolist(int xc_handle, unlock_pages(info, max_domains*sizeof(xc_domaininfo_t)); return ret; +} + +/* get info from hvm guest for save */ +int xc_domain_hvm_getcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt) +{ + int rc; + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_gethvmcontext; + domctl.domain = (domid_t)domid; + set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt); + + if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 ) + return rc; + + rc = do_domctl(xc_handle, &domctl); + + safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt)); + + return rc; +} + +/* set info to hvm guest for restore */ +int xc_domain_hvm_setcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt) +{ + int rc; + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_sethvmcontext; + domctl.domain = domid; + set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt); + + if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 ) + return rc; + + rc = do_domctl(xc_handle, &domctl); + + safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt)); + + return rc; } int xc_vcpu_getcontext(int xc_handle, diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_hvm_restore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_hvm_restore.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,360 @@ +/****************************************************************************** + * xc_hvm_restore.c + * + * Restore the state of a HVM guest. + * + * Copyright (c) 2003, K A Fraser. + * Copyright (c) 2006 Intel Corperation + * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <stdlib.h> +#include <unistd.h> + +#include "xg_private.h" +#include "xg_save_restore.h" + +#include <xen/hvm/ioreq.h> +#include <xen/hvm/params.h> +#include <xen/hvm/e820.h> + +/* max mfn of the whole machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the currrent guest */ +static unsigned int pt_levels; + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +/* A table mapping each PFN to its new MFN. */ +static xen_pfn_t *p2m = NULL; + +static ssize_t +read_exact(int fd, void *buf, size_t count) +{ + int r = 0, s; + unsigned char *b = buf; + + while (r < count) { + s = read(fd, &b[r], count - r); + if ((s == -1) && (errno == EINTR)) + continue; + if (s <= 0) { + break; + } + r += s; + } + + return (r == count) ? 1 : 0; +} + +int xc_hvm_restore(int xc_handle, int io_fd, + uint32_t dom, unsigned long nr_pfns, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int pae, unsigned int apic) +{ + DECLARE_DOMCTL; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + char *region_base; + + unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; + + xc_dominfo_t info; + unsigned int rc = 1, n, i; + uint32_t rec_len, nr_vcpus; + hvm_domain_context_t hvm_ctxt; + unsigned long long v_end, memsize; + unsigned long shared_page_nr; + + unsigned long mfn, pfn; + unsigned int prev_pc, this_pc; + int verify = 0; + + /* Types of the pfns in the current region */ + unsigned long region_pfn_type[MAX_BATCH_SIZE]; + + /* hvm guest mem size (Mb) */ + memsize = (unsigned long long)*store_mfn; + v_end = memsize << 20; + + DPRINTF("xc_hvm_restore:dom=%d, nr_pfns=0x%lx, store_evtchn=%d, *store_mfn=%ld, console_evtchn=%d, *console_mfn=%ld, pae=%u, apic=%u.\n", + dom, nr_pfns, store_evtchn, *store_mfn, console_evtchn, *console_mfn, pae, apic); + + max_pfn = nr_pfns; + + if(!get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels)) { + ERROR("Unable to get platform info."); + return 1; + } + + DPRINTF("xc_hvm_restore start: max_pfn = %lx, max_mfn = %lx, hvirt_start=%lx, pt_levels=%d\n", + max_pfn, + max_mfn, + hvirt_start, + pt_levels); + + if (mlock(&ctxt, sizeof(ctxt))) { + /* needed for build dom0 op, but might as well do early */ + ERROR("Unable to mlock ctxt"); + return 1; + } + + + p2m = malloc(max_pfn * sizeof(xen_pfn_t)); + + if (p2m == NULL) { + ERROR("memory alloc failed"); + errno = ENOMEM; + goto out; + } + + /* Get the domain's shared-info frame. */ + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + + if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { + errno = ENOMEM; + goto out; + } + + for ( i = 0; i < max_pfn; i++ ) + p2m[i] = i; + for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < max_pfn; i++ ) + p2m[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; + + /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, (max_pfn > 0xa0) ? 0xa0 : max_pfn, + 0, 0, &p2m[0x00]); + if ( (rc == 0) && (max_pfn > 0xc0) ) + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, max_pfn - 0xc0, 0, 0, &p2m[0xc0]); + if ( rc != 0 ) + { + PERROR("Could not allocate memory for HVM guest.\n"); + goto out; + } + + + /**********XXXXXXXXXXXXXXXX******************/ + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("Could not get domain info"); + return 1; + } + + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + + for ( i = 0; i < max_pfn; i++) + p2m[i] = i; + + prev_pc = 0; + + n = 0; + while (1) { + + int j; + + this_pc = (n * 100) / max_pfn; + if ( (this_pc - prev_pc) >= 5 ) + { + PPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + if (!read_exact(io_fd, &j, sizeof(int))) { + ERROR("HVM restore Error when reading batch size"); + goto out; + } + + PPRINTF("batch %d\n",j); + + if (j == -1) { + verify = 1; + DPRINTF("Entering page verify mode\n"); + continue; + } + + if (j == 0) + break; /* our work here is done */ + + if (j > MAX_BATCH_SIZE) { + ERROR("Max batch size exceeded. Giving up."); + goto out; + } + + if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { + ERROR("Error when reading region pfn types"); + goto out; + } + + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_WRITE, region_pfn_type, j); + + for ( i = 0; i < j; i++ ) + { + void *page; + + pfn = region_pfn_type[i]; + if ( pfn > max_pfn ) + { + ERROR("pfn out of range"); + goto out; + } + + if ( pfn >= 0xa0 && pfn < 0xc0) { + ERROR("hvm restore:pfn in vga hole"); + goto out; + } + + + mfn = p2m[pfn]; + + /* In verify mode, we use a copy; otherwise we work in place */ + page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); + + if (!read_exact(io_fd, page, PAGE_SIZE)) { + ERROR("Error when reading page (%x)", i); + goto out; + } + + if (verify) { + + int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); + + if (res) { + + int v; + + DPRINTF("************** pfn=%lx mfn=%lx gotcs=%08lx " + "actualcs=%08lx\n", pfn, p2m[pfn], + csum_page(region_base + i*PAGE_SIZE), + csum_page(buf)); + + for (v = 0; v < 4; v++) { + + unsigned long *p = (unsigned long *) + (region_base + i*PAGE_SIZE); + if (buf[v] != p[v]) + DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); + } + } + } + + } /* end of 'batch' for loop */ + munmap(region_base, j*PAGE_SIZE); + n+= j; /* crude stats */ + + }/*while 1*/ + +/* xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/ + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); + + if ( v_end > HVM_BELOW_4G_RAM_END ) + shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; + else + shared_page_nr = (v_end >> PAGE_SHIFT) - 1; + + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); + + /* caculate the store_mfn , wrong val cause hang when introduceDomain */ + *store_mfn = (v_end >> PAGE_SHIFT) - 2; + DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", *store_mfn, v_end); + + /* restore hvm context including pic/pit/shpage */ + if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { + ERROR("error read hvm context size!\n"); + goto out; + } + if (rec_len != sizeof(hvm_ctxt)) { + ERROR("hvm context size dismatch!\n"); + goto out; + } + + if (!read_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt))) { + ERROR("error read hvm context!\n"); + goto out; + } + + if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, &hvm_ctxt))) { + ERROR("error set hvm context!\n"); + goto out; + } + + if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { + ERROR("error read nr vcpu !\n"); + goto out; + } + DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus); + + for (i =0; i < nr_vcpus; i++) { + if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { + ERROR("error read vcpu context size!\n"); + goto out; + } + if (rec_len != sizeof(ctxt)) { + ERROR("vcpu context size dismatch!\n"); + goto out; + } + + if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) { + ERROR("error read vcpu context.\n"); + goto out; + } + + if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) { + ERROR("Could not set vcpu context, rc=%d", rc); + goto out; + } + } + + rc = 0; + goto out; + + out: + if ( (rc != 0) && (dom != 0) ) + xc_domain_destroy(xc_handle, dom); + free(p2m); + + DPRINTF("Restore exit with rc=%d\n", rc); + + return rc; +} diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_hvm_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_hvm_save.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,727 @@ +/****************************************************************************** + * xc_hvm_save.c + * + * Save the state of a running HVM guest. + * + * Copyright (c) 2003, K A Fraser. + * Copyright (c) 2006 Intel Corperation + * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <inttypes.h> +#include <time.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> + +#include "xc_private.h" +#include "xg_private.h" +#include "xg_save_restore.h" + +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_hvm_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ + +/* max mfn of the whole machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the currrent guest */ +static unsigned int pt_levels; + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, to fixup, and to skip. +*/ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline int permute( int i, int nr, int order_nr ) +{ + /* Need a simple permutation function so that we scan pages in a + pseudo random order, enabling us to get a better estimate of + the domain's page dirtying rate as we go (there are often + contiguous ranges of pfns that have similar behaviour, and we + want to mix them up. */ + + /* e.g. nr->oder 15->4 16->4 17->5 */ + /* 512MB domain, 128k pages, order 17 */ + + /* + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA + */ + + /* + QPONMLKJIHGFEDCBA + EDCBA + QPONM + LKJIHGF + */ + + do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } + while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ + + return i; +} + +static uint64_t tv_to_us(struct timeval *new) +{ + return (new->tv_sec * 1000000) + new->tv_usec; +} + +static uint64_t llgettimeofday(void) +{ + struct timeval now; + gettimeofday(&now, NULL); + return tv_to_us(&now); +} + +static uint64_t tv_delta(struct timeval *new, struct timeval *old) +{ + return ((new->tv_sec - old->tv_sec)*1000000 ) + + (new->tv_usec - old->tv_usec); +} + + +#define RATE_IS_MAX() (0) +#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) +#define initialize_mbit_rate() + +static inline ssize_t write_exact(int fd, void *buf, size_t count) +{ + if(write(fd, buf, count) != count) + return 0; + return 1; +} + +static int print_stats(int xc_handle, uint32_t domid, int pages_sent, + xc_shadow_op_stats_t *stats, int print) +{ + static struct timeval wall_last; + static long long d0_cpu_last; + static long long d1_cpu_last; + + struct timeval wall_now; + long long wall_delta; + long long d0_cpu_now, d0_cpu_delta; + long long d1_cpu_now, d1_cpu_delta; + + gettimeofday(&wall_now, NULL); + + d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; + d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; + + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + DPRINTF("ARRHHH!!\n"); + + wall_delta = tv_delta(&wall_now,&wall_last)/1000; + + if (wall_delta == 0) wall_delta = 1; + + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; + d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; + + if (print) + DPRINTF( + "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " + "dirtied %dMb/s %" PRId32 " pages\n", + wall_delta, + (int)((d0_cpu_delta*100)/wall_delta), + (int)((d1_cpu_delta*100)/wall_delta), + (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), + (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), + stats->dirty_count); + + d0_cpu_last = d0_cpu_now; + d1_cpu_last = d1_cpu_now; + wall_last = wall_now; + + return 0; +} + +static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, + unsigned long *arr, int runs) +{ + long long start, now; + xc_shadow_op_stats_t stats; + int j; + + start = llgettimeofday(); + + for (j = 0; j < runs; j++) { + int i; + + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, + arr, max_pfn, NULL, 0, NULL); + DPRINTF("#Flush\n"); + for ( i = 0; i < 40; i++ ) { + usleep(50000); + now = llgettimeofday(); + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, + NULL, 0, NULL, 0, &stats); + + DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", + ((now-start)+500)/1000, + stats.fault_count, stats.dirty_count); + } + } + + return -1; +} + +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, + int dom, xc_dominfo_t *info, + vcpu_guest_context_t *ctxt) +{ + int i = 0; + + if (!(*suspend)(dom)) { + ERROR("Suspend request failed"); + return -1; + } + + retry: + + if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { + ERROR("Could not get domain info"); + return -1; + } + + if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) + ERROR("Could not get vcpu context"); + + + if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) + return 0; // success + + if (info->paused) { + // try unpausing domain, wait, and retest + xc_domain_unpause( xc_handle, dom ); + + ERROR("Domain was paused. Wait and re-test."); + usleep(10000); // 10ms + + goto retry; + } + + + if( ++i < 100 ) { + ERROR("Retry suspend domain."); + usleep(10000); // 10ms + goto retry; + } + + ERROR("Unable to suspend domain."); + + return -1; +} + +int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int)) +{ + xc_dominfo_t info; + + int rc = 1, i, last_iter, iter = 0; + int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); + int sent_last_iter, skip_this_iter; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A table containg the type of each PFN (/not/ MFN!). */ + unsigned long *pfn_type = NULL; + unsigned long *pfn_batch = NULL; + + /* A copy of hvm domain context */ + hvm_domain_context_t hvm_ctxt; + + /* Live mapping of shared info structure */ + shared_info_t *live_shinfo = NULL; + + /* base of the region in which domain memory is mapped */ + unsigned char *region_base = NULL; + + uint32_t nr_pfns, rec_size, nr_vcpus; + unsigned long *page_array = NULL; + + /* power of 2 order of max_pfn */ + int order_nr; + + /* bitmap of pages: + - that should be sent this iteration (unless later marked as skip); + - to skip this iteration because already dirty; */ + unsigned long *to_send = NULL, *to_skip = NULL; + + xc_shadow_op_stats_t stats; + + unsigned long total_sent = 0; + + DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, live=%d, debug=%d.\n", + dom, max_iters, max_factor, flags, + live, debug); + + /* If no explicit control parameters given, use defaults */ + if(!max_iters) + max_iters = DEF_MAX_ITERS; + if(!max_factor) + max_factor = DEF_MAX_FACTOR; + + initialize_mbit_rate(); + + if(!get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels)) { + ERROR("HVM:Unable to get platform info."); + return 1; + } + + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("HVM:Could not get domain info"); + return 1; + } + nr_vcpus = info.nr_online_vcpus; + + if (mlock(&ctxt, sizeof(ctxt))) { + ERROR("HVM:Unable to mlock ctxt"); + return 1; + } + + /* Only have to worry about vcpu 0 even for SMP */ + if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { + ERROR("HVM:Could not get vcpu context"); + goto out; + } + shared_info_frame = info.shared_info_frame; + + /* A cheesy test to see whether the domain contains valid state. */ + if (ctxt.ctrlreg[3] == 0) + { + ERROR("Domain is not in a valid HVM guest state"); + goto out; + } + + /* cheesy sanity check */ + if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { + ERROR("Invalid HVM state record -- pfn count out of range: %lu", + (info.max_memkb >> (PAGE_SHIFT - 10))); + goto out; + } + + /* Map the shared info frame */ + if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, shared_info_frame))) { + ERROR("HVM:Couldn't map live_shinfo"); + goto out; + } + + max_pfn = live_shinfo->arch.max_pfn; + + DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); + + /* nr_pfns: total pages excluding vga acc mem + * max_pfn: nr_pfns + 0x20 vga hole(0xa0~0xc0) + * getdomaininfo.tot_pages: all the allocated pages for this domain + */ + if (live) { + ERROR("hvm domain doesn't support live migration now.\n"); + goto out; + + if (xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0) { + ERROR("Couldn't enable shadow mode"); + goto out; + } + + /* excludes vga acc mem */ + nr_pfns = info.nr_pages - 0x800; + + last_iter = 0; + DPRINTF("hvm domain live migration debug start: logdirty enable.\n"); + } else { + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + + last_iter = 1; + + /* suspend hvm domain */ + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { + ERROR("HVM Domain appears not to have suspended"); + goto out; + } + nr_pfns = info.nr_pages; + DPRINTF("after suspend hvm domain nr_pages=0x%x.\n", nr_pfns); + } + + DPRINTF("after 1st handle hvm domain nr_pfns=0x%x, nr_pages=0x%lx, max_memkb=0x%lx, live=%d.\n", + nr_pfns, + info.nr_pages, + info.max_memkb, + live); + + nr_pfns = info.nr_pages; + + /*XXX: caculate the VGA hole*/ + max_pfn = nr_pfns + 0x20; + + skip_this_iter = 0;/*XXX*/ + /* pretend we sent all the pages last iteration */ + sent_last_iter = max_pfn; + + /* calculate the power of 2 order of max_pfn, e.g. + 15->4 16->4 17->5 */ + for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++) + continue; + + /* Setup to_send / to_fix and to_skip bitmaps */ + to_send = malloc(BITMAP_SIZE); + to_skip = malloc(BITMAP_SIZE); + + if (!to_send ||!to_skip) { + ERROR("Couldn't allocate to_send array"); + goto out; + } + + memset(to_send, 0xff, BITMAP_SIZE); + + if (lock_pages(to_send, BITMAP_SIZE)) { + ERROR("Unable to lock to_send"); + return 1; + } + + /* (to fix is local only) */ + if (lock_pages(to_skip, BITMAP_SIZE)) { + ERROR("Unable to lock to_skip"); + return 1; + } + + analysis_phase(xc_handle, dom, max_pfn, to_skip, 0); + + /* get all the HVM domain pfns */ + if ( (page_array = (unsigned long *) malloc (sizeof(unsigned long) * max_pfn)) == NULL) { + ERROR("HVM:malloc fail!\n"); + goto out; + } + + for ( i = 0; i < max_pfn; i++) + page_array[i] = i; + + + /* We want zeroed memory so use calloc rather than malloc. */ + pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type)); + pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); + + if ((pfn_type == NULL) || (pfn_batch == NULL)) { + ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); + errno = ENOMEM; + goto out; + } + + if (lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type))) { + ERROR("Unable to lock"); + goto out; + } + + /* Start writing out the saved-domain record. */ + if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + ERROR("write: max_pfn"); + goto out; + } + + while(1) { + + unsigned int prev_pc, sent_this_iter, N, batch; + + iter++; + sent_this_iter = 0; + skip_this_iter = 0; + prev_pc = 0; + N=0; + + DPRINTF("Saving HVM domain memory pages: iter %d 0%%", iter); + + while( N < max_pfn ){ + + unsigned int this_pc = (N * 100) / max_pfn; + + if ((this_pc - prev_pc) >= 5) { + DPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + /* slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + if (!last_iter && xc_shadow_control( + xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, + to_skip, max_pfn, NULL, 0, NULL) != max_pfn) { + ERROR("Error peeking HVM shadow bitmap"); + goto out; + } + + + /* load pfn_type[] with the mfn of all the pages we're doing in + this batch. */ + for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) { + + int n = permute(N, max_pfn, order_nr); + + if (debug) { + DPRINTF("%d pfn= %08lx mfn= %08lx %d \n", + iter, (unsigned long)n, page_array[n], + test_bit(n, to_send)); + } + + if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) + skip_this_iter++; /* stats keeping */ + + if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter))) + continue; + + if (n >= 0xa0 && n < 0xc0) { +/* DPRINTF("get a vga hole pfn= %x.\n", n);*/ + continue; + } + /* + ** we get here if: + ** 1. page is marked to_send & hasn't already been re-dirtied + ** 2. (ignore to_skip in last iteration) + */ + + pfn_batch[batch] = n; + pfn_type[batch] = page_array[n]; + + batch++; + } + + if (batch == 0) + goto skip; /* vanishingly unlikely... */ + + /* map_foreign use pfns now !*/ + if ((region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ, pfn_batch, batch)) == 0) { + ERROR("map batch failed"); + goto out; + } + + /* write num of pfns */ + if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { + ERROR("Error when writing to state file (2)"); + goto out; + } + + /* write all the pfns */ + if(!write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch)) { + ERROR("Error when writing to state file (3)"); + goto out; + } + + if (ratewrite(io_fd, region_base, PAGE_SIZE * batch) != PAGE_SIZE * batch) { + ERROR("ERROR when writting to state file (4)"); + goto out; + } + + + sent_this_iter += batch; + + munmap(region_base, batch*PAGE_SIZE); + + } /* end of this while loop for this iteration */ + + skip: + + total_sent += sent_this_iter; + + DPRINTF("\r %d: sent %d, skipped %d, ", + iter, sent_this_iter, skip_this_iter ); + + if (last_iter) { + print_stats( xc_handle, dom, sent_this_iter, &stats, 1); + + DPRINTF("Total pages sent= %ld (%.2fx)\n", + total_sent, ((float)total_sent)/max_pfn ); + } + + if (last_iter && debug){ + int minusone = -1; + memset(to_send, 0xff, BITMAP_SIZE); + debug = 0; + DPRINTF("Entering debug resend-all mode\n"); + + /* send "-1" to put receiver into debug mode */ + if(!write_exact(io_fd, &minusone, sizeof(int))) { + ERROR("Error when writing to state file (6)"); + goto out; + } + + continue; + } + + if (last_iter) break; + + if (live) { + + + if( + ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > max_pfn*max_factor) ) { + + DPRINTF("Start last iteration for HVM domain\n"); + last_iter = 1; + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, + &ctxt)) { + ERROR("Domain appears not to have suspended"); + goto out; + } + + DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", + info.shared_info_frame, + (unsigned long)ctxt.user_regs.eip, + (unsigned long)ctxt.user_regs.edx); + } + + if (xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, + max_pfn, NULL, 0, &stats) != max_pfn) { + ERROR("Error flushing shadow PT"); + goto out; + } + + sent_last_iter = sent_this_iter; + + print_stats(xc_handle, dom, sent_this_iter, &stats, 1); + + } + + + } /* end of while 1 */ + + + DPRINTF("All HVM memory is saved\n"); + + /* Zero terminate */ + i = 0; + if (!write_exact(io_fd, &i, sizeof(int))) { + ERROR("Error when writing to state file (6)"); + goto out; + } + + /* save hvm hypervisor state including pic/pit/shpage */ + if (mlock(&hvm_ctxt, sizeof(hvm_ctxt))) { + ERROR("Unable to mlock ctxt"); + return 1; + } + + if (xc_domain_hvm_getcontext(xc_handle, dom, &hvm_ctxt)){ + ERROR("HVM:Could not get hvm context"); + goto out; + } + + rec_size = sizeof(hvm_ctxt); + if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + ERROR("error write hvm ctxt size"); + goto out; + } + + if ( !write_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt)) ) { + ERROR("write HVM info failed!\n"); + } + + /* save vcpu/vmcs context */ + if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { + ERROR("error write nr vcpus"); + goto out; + } + + /*XXX: need a online map to exclude down cpu */ + for (i = 0; i < nr_vcpus; i++) { + + if (xc_vcpu_getcontext(xc_handle, dom, i, &ctxt)) { + ERROR("HVM:Could not get vcpu context"); + goto out; + } + + rec_size = sizeof(ctxt); + DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); + if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + ERROR("error write vcpu ctxt size"); + goto out; + } + + if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) { + ERROR("write vmcs failed!\n"); + goto out; + } + } + + /* Success! */ + rc = 0; + + out: + + if (live) { + if(xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0) { + DPRINTF("Warning - couldn't disable shadow mode"); + } + } + + free(page_array); + + free(pfn_type); + free(pfn_batch); + free(to_send); + free(to_skip); + + return !!rc; +} diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/libxc/xenctrl.h Fri Jan 19 14:48:57 2007 +0000 @@ -313,6 +313,30 @@ int xc_domain_getinfolist(int xc_handle, xc_domaininfo_t *info); /** + * This function returns information about the context of a hvm domain + * @parm xc_handle a handle to an open hypervisor interface + * @parm domid the domain to get information from + * @parm hvm_ctxt a pointer to a structure to store the execution context of the + * hvm domain + * @return 0 on success, -1 on failure + */ +int xc_domain_hvm_getcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt); + +/** + * This function will set the context for hvm domain + * + * @parm xc_handle a handle to an open hypervisor interface + * @parm domid the domain to set the hvm domain context for + * @parm hvm_ctxt pointer to the the hvm context with the values to set + * @return 0 on success, -1 on failure + */ +int xc_domain_hvm_setcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt); + +/** * This function returns information about the execution context of a * particular vcpu of a domain. * diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/libxc/xenguest.h Fri Jan 19 14:48:57 2007 +0000 @@ -11,6 +11,7 @@ #define XCFLAGS_LIVE 1 #define XCFLAGS_DEBUG 2 +#define XCFLAGS_HVM 4 /** @@ -25,6 +26,13 @@ int xc_linux_save(int xc_handle, int io_ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, int (*suspend)(int domid)); +/** + * This function will save a hvm domain running unmodified guest. + * @return 0 on success, -1 on failure + */ +int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, + int (*suspend)(int domid)); /** * This function will restore a saved domain running Linux. @@ -41,6 +49,18 @@ int xc_linux_restore(int xc_handle, int unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn); + +/** + * This function will restore a saved hvm domain running unmodified guest. + * + * @parm store_mfn pass mem size & returned with the mfn of the store page + * @return 0 on success, -1 on failure + */ +int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned long nr_pfns, unsigned int store_evtchn, + unsigned long *store_mfn, unsigned int console_evtchn, + unsigned long *console_mfn, + unsigned int pae, unsigned int apic); /** * This function will create a domain for a paravirtualized Linux diff -r 8475a4e0425e -r 3c8bb086025e tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/pygrub/src/pygrub Fri Jan 19 14:48:57 2007 +0000 @@ -503,7 +503,7 @@ def run_grub(file, entry, fs): # If nothing has been specified, look for a Solaris domU. If found, perform the # necessary tweaks. def sniff_solaris(fs, cfg): - if not fs.file_exists("/platform/i86xen/kernel/unix"): + if not fs.file_exists("/platform/i86xpv/kernel/unix"): return cfg # darned python @@ -516,10 +516,10 @@ def sniff_solaris(fs, cfg): longmode = True if not cfg["kernel"]: - cfg["kernel"] = "/platform/i86xen/kernel/unix" + cfg["kernel"] = "/platform/i86xpv/kernel/unix" cfg["ramdisk"] = "/platform/i86pc/boot_archive" if longmode: - cfg["kernel"] = "/platform/i86xen/kernel/amd64/unix" + cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix" cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive" # Unpleasant. Typically we'll have 'root=foo -k' or 'root=foo /kernel -k', diff -r 8475a4e0425e -r 3c8bb086025e tools/python/setup.py --- a/tools/python/setup.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/setup.py Fri Jan 19 14:48:57 2007 +0000 @@ -30,12 +30,23 @@ xs = Extension("xs", libraries = libraries, sources = [ "xen/lowlevel/xs/xs.c" ]) +scf = Extension("scf", + extra_compile_args = extra_compile_args, + include_dirs = include_dirs + [ "xen/lowlevel/scf" ], + library_dirs = library_dirs, + libraries = libraries, + sources = [ "xen/lowlevel/scf/scf.c" ]) + acm = Extension("acm", extra_compile_args = extra_compile_args, include_dirs = include_dirs + [ "xen/lowlevel/acm" ], library_dirs = library_dirs, libraries = libraries, sources = [ "xen/lowlevel/acm/acm.c" ]) + +modules = [ xc, xs, acm ] +if os.uname()[0] == 'SunOS': + modules.append(scf) setup(name = 'xen', version = '3.0', @@ -56,7 +67,7 @@ setup(name = 'xen', 'xen.xm.tests' ], ext_package = "xen.lowlevel", - ext_modules = [ xc, xs, acm ] + ext_modules = modules ) os.chdir('logging') diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/lowlevel/scf/scf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/lowlevel/scf/scf.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,156 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <Python.h> + +#include <libscf.h> +#include <stdio.h> + +#define XEND_FMRI "svc:/system/xen/xend:default" +#define XEND_PG "config" + +static PyObject *scf_exc; + +static void * +scf_exception(const char *err, const char *value) +{ + int scferr = scf_error(); + const char *scfstrerr = scf_strerror(scferr); + PyObject *obj = Py_BuildValue("(isss)", scferr, err, scfstrerr, value); + PyErr_SetObject(scf_exc, obj); + return (NULL); +} + +static PyObject * +pyscf_get_bool(PyObject *o, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "name", NULL }; + scf_simple_prop_t *prop; + uint8_t *val; + char *name; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name)) + return (NULL); + + prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name); + + if (prop == NULL) + return (scf_exception("scf_simple_prop_get() failed", name)); + + if ((val = scf_simple_prop_next_boolean(prop)) == NULL) + return (scf_exception("scf_simple_prop_next_boolean() failed", + name)); + + if (*val) { + scf_simple_prop_free(prop); + Py_INCREF(Py_True); + return (Py_True); + } + + scf_simple_prop_free(prop); + Py_INCREF(Py_False); + return (Py_False); +} + +static PyObject * +pyscf_get_int(PyObject *o, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "name", NULL }; + scf_simple_prop_t *prop; + PyObject *obj; + int64_t *val; + char *name; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name)) + return (NULL); + + prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name); + + if (prop == NULL) + return (scf_exception("scf_simple_prop_get() failed", name)); + + if ((val = scf_simple_prop_next_integer(prop)) == NULL) + return (scf_exception("scf_simple_prop_next_integer() failed", + name)); + + obj = PyInt_FromLong((long)*val); + scf_simple_prop_free(prop); + return (obj); +} + +static PyObject * +pyscf_get_string(PyObject *o, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "name", NULL }; + scf_simple_prop_t *prop; + PyObject *obj; + char *name; + char *str; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name)) + return (NULL); + + prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name); + + if (prop == NULL) + return (scf_exception("scf_simple_prop_get() failed", name)); + + if ((str = scf_simple_prop_next_astring(prop)) == NULL) { + scf_simple_prop_free(prop); + return (scf_exception("scf_simple_prop_next_astring() failed", + name)); + } + + obj = PyString_FromString(str); + scf_simple_prop_free(prop); + return (obj); +} + +PyDoc_STRVAR(pyscf_get_bool__doc__, + "get_bool(name) - get the value of the named boolean property"); +PyDoc_STRVAR(pyscf_get_int__doc__, + "get_int(name) - get the value of the named integer property"); +PyDoc_STRVAR(pyscf_get_string__doc__, + "get_string(name) - get the value of the named string property"); + +static struct PyMethodDef pyscf_module_methods[] = { + { "get_bool", (PyCFunction) pyscf_get_bool, + METH_VARARGS|METH_KEYWORDS, pyscf_get_bool__doc__ }, + { "get_int", (PyCFunction) pyscf_get_int, + METH_VARARGS|METH_KEYWORDS, pyscf_get_int__doc__ }, + { "get_string", (PyCFunction) pyscf_get_string, + METH_VARARGS|METH_KEYWORDS, pyscf_get_string__doc__ }, + { NULL, NULL, 0, NULL } +}; + +PyMODINIT_FUNC +initscf(void) +{ + PyObject *m; + m = Py_InitModule("scf", pyscf_module_methods); + + scf_exc = PyErr_NewException("scf.error", NULL, NULL); + Py_INCREF(scf_exc); + PyModule_AddObject(m, "error", scf_exc); + PyModule_AddIntConstant(m, "SCF_ERROR_NOT_FOUND", SCF_ERROR_NOT_FOUND); +} diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jan 19 14:48:57 2007 +0000 @@ -158,6 +158,20 @@ static PyObject *pyxc_domain_destroy(XcO static PyObject *pyxc_domain_destroy(XcObject *self, PyObject *args) { return dom_op(self, args, xc_domain_destroy); +} + +static PyObject *pyxc_domain_shutdown(XcObject *self, PyObject *args) +{ + uint32_t dom, reason; + + if (!PyArg_ParseTuple(args, "ii", &dom, &reason)) + return NULL; + + if (xc_domain_shutdown(self->xc_handle, dom, reason) != 0) + return pyxc_error_to_exception(); + + Py_INCREF(zero); + return zero; } @@ -1027,6 +1041,14 @@ static PyMethodDef pyxc_methods[] = { METH_VARARGS, "\n" "Destroy a domain.\n" " dom [int]: Identifier of domain to be destroyed.\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "domain_shutdown", + (PyCFunction)pyxc_domain_shutdown, + METH_VARARGS, "\n" + "Shutdown a domain.\n" + " dom [int, 0]: Domain identifier to use.\n" + " reason [int, 0]: Reason for shutdown.\n" "Returns: [int] 0 on success; -1 on error.\n" }, { "vcpu_setaffinity", diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/util/xmlrpclib2.py --- a/tools/python/xen/util/xmlrpclib2.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/util/xmlrpclib2.py Fri Jan 19 14:48:57 2007 +0000 @@ -256,6 +256,7 @@ class UnixXMLRPCRequestHandler(XMLRPCReq class UnixXMLRPCServer(TCPXMLRPCServer): address_family = socket.AF_UNIX + allow_address_reuse = True def __init__(self, addr, allowed, xenapi, logRequests = 1): mkdir.parents(os.path.dirname(addr), stat.S_IRWXU, True) diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/web/httpserver.py --- a/tools/python/xen/web/httpserver.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/web/httpserver.py Fri Jan 19 14:48:57 2007 +0000 @@ -294,8 +294,6 @@ class HttpServer: backlog = 5 - closed = False - def __init__(self, root, interface, port=8080): self.root = root self.interface = interface @@ -303,6 +301,7 @@ class HttpServer: # ready indicates when we are ready to begin accept connections # it should be set after a successful bind self.ready = False + self.closed = False def run(self): self.bind() @@ -316,7 +315,6 @@ class HttpServer: def stop(self): self.close() - def bind(self): self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -334,7 +332,10 @@ class HttpServer: def close(self): self.closed = True - try: + self.ready = False + try: + # shutdown socket explicitly to allow reuse + self.socket.shutdown(socket.SHUT_RDWR) self.socket.close() except: pass @@ -344,6 +345,9 @@ class HttpServer: def getResource(self, req): return self.root.getRequestResource(req) + + def shutdown(self): + self.close() class UnixHttpServer(HttpServer): diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/Vifctl.py --- a/tools/python/xen/xend/Vifctl.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/Vifctl.py Fri Jan 19 14:48:57 2007 +0000 @@ -20,7 +20,7 @@ """ import os -import XendRoot +import XendOptions def network(op): @@ -30,7 +30,7 @@ def network(op): """ if op not in ['start', 'stop']: raise ValueError('Invalid operation: ' + op) - script = XendRoot.instance().get_network_script() + script = XendOptions.instance().get_network_script() if script: script.insert(1, op) os.spawnv(os.P_WAIT, script[0], script) diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/XendCheckpoint.py Fri Jan 19 14:48:57 2007 +0000 @@ -22,11 +22,14 @@ from xen.xend.XendConstants import * from xen.xend.XendConstants import * SIGNATURE = "LinuxGuestRecord" +QEMU_SIGNATURE = "QemuDeviceModelRecord" +dm_batch = 512 XC_SAVE = "xc_save" XC_RESTORE = "xc_restore" sizeof_int = calcsize("i") +sizeof_unsigned_int = calcsize("I") sizeof_unsigned_long = calcsize("L") @@ -69,6 +72,11 @@ def save(fd, dominfo, network, live, dst "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") + image_cfg = dominfo.info.get('image', {}) + hvm = image_cfg.has_key('hvm') + + if hvm: + log.info("save hvm domain") # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the # first two further customize behaviour when 'live' save is @@ -76,7 +84,7 @@ def save(fd, dominfo, network, live, dst # libxenguest; see the comments and/or code in xc_linux_save() for # more information. cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), - str(dominfo.getDomid()), "0", "0", str(int(live)) ] + str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 2)) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): @@ -90,11 +98,28 @@ def save(fd, dominfo, network, live, dst log.info("Domain %d suspended.", dominfo.getDomid()) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3, domain_name) + #send signal to device model for save + if hvm: + log.info("release_devices for hvm domain") + dominfo._releaseDevices(True) tochild.write("done\n") tochild.flush() log.debug('Written done') forkHelper(cmd, fd, saveInputHandler, False) + + # put qemu device model state + if hvm: + write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature") + qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(), os.O_RDONLY) + while True: + buf = os.read(qemu_fd, dm_batch) + if len(buf): + write_exact(fd, buf, "could not write device model state") + else: + break + os.close(qemu_fd) + os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid()) dominfo.destroyDomain() try: @@ -149,19 +174,43 @@ def restore(xd, fd, dominfo = None, paus nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 + # if hvm, pass mem size to calculate the store_mfn + image_cfg = dominfo.info.get('image', {}) + is_hvm = image_cfg.has_key('hvm') + if is_hvm: + hvm = dominfo.info['memory_static_min'] + apic = dominfo.info['image']['hvm'].get('apic', 0) + pae = dominfo.info['image']['hvm'].get('pae', 0) + log.info("restore hvm domain %d, mem=%d, apic=%d, pae=%d", + dominfo.domid, hvm, apic, pae) + else: + hvm = 0 + apic = 0 + pae = 0 + try: l = read_exact(fd, sizeof_unsigned_long, "not a valid guest state file: pfn count read") max_pfn = unpack("L", l)[0] # native sizeof long + if max_pfn > 16*1024*1024: # XXX raise XendError( "not a valid guest state file: pfn count out of range") - balloon.free(xc.pages_to_kib(nr_pfns)) + shadow = dominfo.info['shadow_memory'] + log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, " + "nr_pfns=0x%x.", dominfo.info['shadow_memory'], + dominfo.info['memory_static_max'], + dominfo.info['memory_static_min'], nr_pfns) + + balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024) + + shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow) + dominfo.info['shadow_memory'] = shadow_cur cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), fd, dominfo.getDomid(), max_pfn, - store_port, console_port]) + store_port, console_port, hvm, pae, apic]) log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() @@ -171,10 +220,30 @@ def restore(xd, fd, dominfo = None, paus if handler.store_mfn is None or handler.console_mfn is None: raise XendError('Could not read store/console MFN') - os.read(fd, 1) # Wait for source to close connection dominfo.waitForDevices() # Wait for backends to set up if not paused: dominfo.unpause() + + # get qemu state and create a tmp file for dm restore + if is_hvm: + qemu_signature = read_exact(fd, len(QEMU_SIGNATURE), + "invalid device model signature read") + if qemu_signature != QEMU_SIGNATURE: + raise XendError("not a valid device model state: found '%s'" % + qemu_signature) + qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(), + os.O_WRONLY | os.O_CREAT | os.O_TRUNC) + while True: + buf = os.read(fd, dm_batch) + if len(buf): + write_exact(qemu_fd, buf, + "could not write dm state to tmp file") + else: + break + os.close(qemu_fd) + + + os.read(fd, 1) # Wait for source to close connection dominfo.completeRestore(handler.store_mfn, handler.console_mfn) diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/XendConfig.py Fri Jan 19 14:48:57 2007 +0000 @@ -77,6 +77,25 @@ def scrub_password(data): return re.sub(r'\(vncpasswd\s+[^\)]+\)','(vncpasswd XXXXXX)', data) else: return data + +# +# CPU fields: +# +# vcpus_number -- the maximum number of vcpus that this domain may ever have. +# aka XendDomainInfo.getVCpuCount(). +# vcpus -- the legacy configuration name for above. +# max_vcpu_id -- vcpus_number - 1. This is given to us by Xen. +# +# cpus -- the list of pCPUs available to each vCPU. +# +# vcpu_avail: a bitmap telling the guest domain whether it may use each of +# its VCPUs. This is translated to +# <dompath>/cpu/<id>/availability = {online,offline} for use +# by the guest domain. +# online_vpcus -- the number of VCPUs currently up, as reported by Xen. This +# is changed by changing vcpu_avail, and waiting for the +# domain to respond. +# # Mapping from XendConfig configuration keys to the old @@ -185,7 +204,7 @@ LEGACY_CFG_TYPES = { 'uuid': str, 'name': str, 'vcpus': int, - 'vcpu_avail': int, + 'vcpu_avail': long, 'memory': int, 'shadow_memory': int, 'maxmem': int, @@ -355,9 +374,6 @@ class XendConfig(dict): 'cpu_weight': 256, 'cpu_cap': 0, 'vcpus_number': 1, - 'online_vcpus': 1, - 'max_vcpu_id': 0, - 'vcpu_avail': 1, 'console_refs': [], 'vif_refs': [], 'vbd_refs': [], @@ -389,7 +405,7 @@ class XendConfig(dict): event) def _vcpus_sanity_check(self): - if self.get('vcpus_number') != None: + if 'vcpus_number' in self and 'vcpu_avail' not in self: self['vcpu_avail'] = (1 << self['vcpus_number']) - 1 def _uuid_sanity_check(self): @@ -405,7 +421,7 @@ class XendConfig(dict): def _dominfo_to_xapi(self, dominfo): self['domid'] = dominfo['domid'] self['online_vcpus'] = dominfo['online_vcpus'] - self['max_vcpu_id'] = dominfo['max_vcpu_id'] + self['vcpus_number'] = dominfo['max_vcpu_id'] + 1 self['memory_dynamic_min'] = (dominfo['mem_kb'] + 1023)/1024 self['memory_dynamic_max'] = (dominfo['maxmem_kb'] + 1023)/1024 self['cpu_time'] = dominfo['cpu_time']/1e9 @@ -636,9 +652,6 @@ class XendConfig(dict): self['memory_dynamic_max'] = self['memory_static_max'] self['memory_dynamic_min'] = self['memory_static_min'] - # make sure max_vcpu_id is set correctly - self['max_vcpu_id'] = self['vcpus_number'] - 1 - # set device references in the configuration self['devices'] = cfg.get('devices', {}) @@ -720,13 +733,11 @@ class XendConfig(dict): _set_cfg_if_exists('on_xend_stop') _set_cfg_if_exists('on_xend_start') _set_cfg_if_exists('vcpu_avail') - _set_cfg_if_exists('max_vcpu_id') # needed for vcpuDomDetails _set_cfg_if_exists('cpu_weight') _set_cfg_if_exists('cpu_cap') # Parse and store runtime configuration _set_cfg_if_exists('start_time') - _set_cfg_if_exists('online_vcpus') _set_cfg_if_exists('cpu_time') _set_cfg_if_exists('shutdown_reason') _set_cfg_if_exists('up_time') @@ -1115,19 +1126,17 @@ class XendConfig(dict): # configuration log.debug("update_with_image_sxp(%s)" % scrub_password(image_sxp)) - kernel_args = "" + # user-specified args must come last: previous releases did this and + # some domU kernels rely upon the ordering. + kernel_args = sxp.child_value(image_sxp, 'args', '') # attempt to extract extra arguments from SXP config arg_ip = sxp.child_value(image_sxp, 'ip') if arg_ip and not re.search(r'ip=[^ ]+', kernel_args): - kernel_args += 'ip=%s ' % arg_ip + kernel_args = 'ip=%s ' % arg_ip + kernel_args arg_root = sxp.child_value(image_sxp, 'root') if arg_root and not re.search(r'root=', kernel_args): - kernel_args += 'root=%s ' % arg_root - - # user-specified args must come last: previous releases did this and - # some domU kernels rely upon the ordering. - kernel_args += sxp.child_value(image_sxp, 'args', '') + kernel_args = 'root=%s ' % arg_root + kernel_args if bootloader: self['_temp_using_bootloader'] = '1' diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/XendDomain.py Fri Jan 19 14:48:57 2007 +0000 @@ -32,7 +32,7 @@ import xen.lowlevel.xc import xen.lowlevel.xc -from xen.xend import XendRoot, XendCheckpoint, XendDomainInfo +from xen.xend import XendOptions, XendCheckpoint, XendDomainInfo from xen.xend.PrettyPrint import prettyprint from xen.xend.XendConfig import XendConfig from xen.xend.XendError import XendError, XendInvalidDomain, VmError @@ -51,7 +51,7 @@ from xen.xend import uuid from xen.xend import uuid xc = xen.lowlevel.xc.xc() -xroot = XendRoot.instance() +xoptions = XendOptions.instance() __all__ = [ "XendDomain" ] @@ -214,7 +214,7 @@ class XendDomain: @rtype: String @return: Path. """ - dom_path = xroot.get_xend_domains_path() + dom_path = xoptions.get_xend_domains_path() if domuuid: dom_path = os.path.join(dom_path, domuuid) return dom_path @@ -361,7 +361,7 @@ class XendDomain: def _setDom0CPUCount(self): """Sets the number of VCPUs dom0 has. Retreived from the - Xend configuration, L{XendRoot}. + Xend configuration, L{XendOptions}. @requires: Expects to be protected by domains_lock. @rtype: None @@ -369,7 +369,7 @@ class XendDomain: dom0 = self.privilegedDomain() # get max number of vcpus to use for dom0 from config - target = int(xroot.get_dom0_vcpus()) + target = int(xoptions.get_dom0_vcpus()) log.debug("number of vcpus to use is %d", target) # target == 0 means use all processors @@ -1164,7 +1164,7 @@ class XendDomain: dominfo.checkLiveMigrateMemory() if port == 0: - port = xroot.get_xend_relocation_port() + port = xoptions.get_xend_relocation_port() try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((dst, port)) diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Jan 19 14:48:57 2007 +0000 @@ -38,7 +38,7 @@ from xen.util import security from xen.util import security from xen.xend import balloon, sxp, uuid, image, arch, osdep -from xen.xend import XendRoot, XendNode, XendConfig +from xen.xend import XendOptions, XendNode, XendConfig from xen.xend.XendConfig import scrub_password from xen.xend.XendBootloader import bootloader @@ -54,29 +54,10 @@ BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp' BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp' xc = xen.lowlevel.xc.xc() -xroot = XendRoot.instance() +xoptions = XendOptions.instance() log = logging.getLogger("xend.XendDomainInfo") #log.setLevel(logging.TRACE) - - -# -# There are a number of CPU-related fields: -# -# vcpus: the number of virtual CPUs this domain is configured to use. -# vcpu_avail: a bitmap telling the guest domain whether it may use each of -# its VCPUs. This is translated to -# <dompath>/cpu/<id>/availability = {online,offline} for use -# by the guest domain. -# cpumap: a list of bitmaps, one for each VCPU, giving the physical -# CPUs that that VCPU may use. -# cpu: a configuration setting requesting that VCPU 0 is pinned to -# the specified physical CPU. -# -# vcpus and vcpu_avail settings persist with the VM (i.e. they are persistent -# across save, restore, migrate, and restart). The other settings are only -# specific to the domain, so are lost when the VM moves. -# def create(config): @@ -451,6 +432,16 @@ class XendDomainInfo: self._removeVm('xend/previous_restart_time') self.storeDom("control/shutdown", reason) + ## shutdown hypercall for hvm domain desides xenstore write + image_cfg = self.info.get('image', {}) + hvm = image_cfg.has_key('hvm') + if hvm: + for code in DOMAIN_SHUTDOWN_REASONS.keys(): + if DOMAIN_SHUTDOWN_REASONS[code] == reason: + break + xc.domain_shutdown(self.domid, code) + + def pause(self): """Pause domain @@ -614,7 +605,7 @@ class XendDomainInfo: ['name', self.info['name_label']], ['vcpu_count', self.info['vcpus_number']]] - for i in range(0, self.info['max_vcpu_id']+1): + for i in range(0, self.info['vcpus_number']): info = xc.vcpu_getinfo(self.domid, i) sxpr.append(['vcpu', @@ -739,7 +730,7 @@ class XendDomainInfo: 'domid': str(self.domid), 'vm': self.vmpath, 'name': self.info['name_label'], - 'console/limit': str(xroot.get_console_limit() * 1024), + 'console/limit': str(xoptions.get_console_limit() * 1024), 'memory/target': str(self.info['memory_static_min'] * 1024) } @@ -898,8 +889,9 @@ class XendDomainInfo: self._writeDom(self._vcpuDomDetails()) else: self.info['vcpus_number'] = vcpus - self.info['online_vcpus'] = vcpus xen.xend.XendDomain.instance().managed_config_save(self) + log.info("Set VCPU count on domain %s to %d", self.info['name_label'], + vcpus) def getLabel(self): return security.get_security_info(self.info, 'label') @@ -976,7 +968,7 @@ class XendDomainInfo: self.info['name_label'], self.domid) self._writeVm(LAST_SHUTDOWN_REASON, 'crash') - if xroot.get_enable_dump(): + if xoptions.get_enable_dump(): try: self.dumpCore() except XendError: @@ -1228,8 +1220,11 @@ class XendDomainInfo: if self.image: self.image.createDeviceModel() - def _releaseDevices(self): + def _releaseDevices(self, suspend = False): """Release all domain's devices. Nothrow guarantee.""" + if suspend and self.image: + self.image.destroy(suspend) + return while True: t = xstransact("%s/device" % self.dompath) @@ -1381,7 +1376,7 @@ class XendDomainInfo: # this is done prior to memory allocation to aide in memory # distribution for NUMA systems. if self.info['cpus'] is not None and len(self.info['cpus']) > 0: - for v in range(0, self.info['max_vcpu_id']+1): + for v in range(0, self.info['vcpus_number']): xc.vcpu_setaffinity(self.domid, v, self.info['cpus']) # Use architecture- and image-specific calculations to determine @@ -1395,6 +1390,7 @@ class XendDomainInfo: self.info['shadow_memory'] * 1024, self.info['memory_static_max'] * 1024) + log.debug("_initDomain:shadow_memory=0x%x, memory_static_max=0x%x, memory_static_min=0x%x.", self.info['shadow_memory'], self.info['memory_static_max'], self.info['memory_static_min'],) # Round shadow up to a multiple of a MiB, as shadow_mem_control # takes MiB and we must not round down and end up under-providing. shadow = ((shadow + 1023) / 1024) * 1024 @@ -1494,6 +1490,16 @@ class XendDomainInfo: self.console_mfn = console_mfn self._introduceDomain() + image_cfg = self.info.get('image', {}) + hvm = image_cfg.has_key('hvm') + if hvm: + self.image = image.create(self, + self.info, + self.info['image'], + self.info['devices']) + if self.image: + self.image.createDeviceModel(True) + self.image.register_shutdown_watch() self._storeDomDetails() self._registerWatches() self.refreshShutdown() @@ -2028,8 +2034,8 @@ class XendDomainInfo: # TODO: spec says that key is int, however, python does not allow # non-string keys to dictionaries. vcpu_util = {} - if 'max_vcpu_id' in self.info and self.domid != None: - for i in range(0, self.info['max_vcpu_id']+1): + if 'vcpus_number' in self.info and self.domid != None: + for i in range(0, self.info['vcpus_number']): info = xc.vcpu_getinfo(self.domid, i) vcpu_util[str(i)] = info['cpu_time']/1000000000.0 diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/XendNode.py Fri Jan 19 14:48:57 2007 +0000 @@ -24,7 +24,7 @@ from xen.util import Brctl from xen.xend import uuid from xen.xend.XendError import XendError, NetworkAlreadyConnected -from xen.xend.XendRoot import instance as xendroot +from xen.xend.XendOptions import instance as xendoptions from xen.xend.XendStorageRepository import XendStorageRepository from xen.xend.XendLogging import log from xen.xend.XendPIF import * @@ -45,7 +45,7 @@ class XendNode: """ self.xc = xen.lowlevel.xc.xc() - self.state_store = XendStateStore(xendroot().get_xend_state_path()) + self.state_store = XendStateStore(xendoptions().get_xend_state_path()) # load host state from XML file saved_host = self.state_store.load_state('host') diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendOptions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/XendOptions.py Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,373 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 XenSource Ltd +#============================================================================ + +"""Xend root class. +Creates the servers and handles configuration. + +Other classes get config variables by importing this module, +using instance() to get a XendOptions instance, and then +the config functions (e.g. get_xend_port()) to get +configured values. +""" + +import os +import os.path +import string +import sys + +from xen.xend import sxp, osdep, XendLogging +from xen.xend.XendError import XendError + +if os.uname()[0] == 'SunOS': + from xen.lowlevel import scf + +class XendOptions: + """Configuration options.""" + + """Where network control scripts live.""" + network_script_dir = osdep.scripts_dir + + """Where block control scripts live.""" + block_script_dir = osdep.scripts_dir + + """Default path to the log file. """ + logfile_default = "/var/log/xen/xend.log" + + """Default level of information to be logged.""" + loglevel_default = 'DEBUG' + + """Default Xen-API server configuration. """ + xen_api_server_default = [['unix']] + + """Default for the flag indicating whether xend should run an http server + (deprecated).""" + xend_http_server_default = 'no' + + xend_tcp_xmlrpc_server_default = 'no' + + xend_unix_xmlrpc_server_default = 'yes' + + """Default interface address xend listens at. """ + xend_address_default = '' + + """Default for the flag indicating whether xend should run a relocation server.""" + xend_relocation_server_default = 'no' + + """Default interface address the xend relocation server listens at. """ + xend_relocation_address_default = '' + + """Default port xend serves HTTP at. """ + xend_port_default = 8000 + + """Default port xend serves relocation at. """ + xend_relocation_port_default = 8002 + + xend_relocation_hosts_allow_default = '' + + """Default for the flag indicating whether xend should run a unix-domain + server (deprecated).""" + xend_unix_server_default = 'no' + + """Default external migration tool """ + external_migration_tool_default = '' + + """Default path the unix-domain server listens at.""" + xend_unix_path_default = '/var/lib/xend/xend-socket' + + dom0_min_mem_default = 0 + + dom0_vcpus_default = 0 + + vncpasswd_default = None + + """Default interface to listen for VNC connections on""" + xend_vnc_listen_default = '127.0.0.1' + + """Default session storage path.""" + xend_domains_path_default = '/var/lib/xend/domains' + + """Default xend management state storage.""" + xend_state_path_default = '/var/lib/xend/state' + + """Default type of backend network interfaces""" + netback_type = osdep.netback_type + + """Default script to configure a backend network interface""" + vif_script = osdep.vif_script + + def __init__(self): + self.configure() + + def _logError(self, fmt, *args): + """Logging function to log to stderr. We use this for XendOptions log + messages because they may be logged before the logger has been + configured. Other components can safely use the logger. + """ + print >>sys.stderr, "xend [ERROR]", fmt % args + + + def configure(self): + self.set_config() + XendLogging.init(self.get_config_string("logfile", + self.logfile_default), + self.get_config_string("loglevel", + self.loglevel_default)) + + def set_config(self): + raise NotImplementedError() + + def get_config_bool(self, name, val=None): + raise NotImplementedError() + + def get_config_int(self, name, val=None): + raise NotImplementedError() + + def get_config_string(self, name, val=None): + raise NotImplementedError() + + def get_xen_api_server(self): + raise NotImplementedError() + + def get_xend_http_server(self): + """Get the flag indicating whether xend should run an http server. + """ + return self.get_config_bool("xend-http-server", self.xend_http_server_default) + + def get_xend_tcp_xmlrpc_server(self): + return self.get_config_bool("xend-tcp-xmlrpc-server", + self.xend_tcp_xmlrpc_server_default) + + def get_xend_unix_xmlrpc_server(self): + return self.get_config_bool("xend-unix-xmlrpc-server", + self.xend_unix_xmlrpc_server_default) + + def get_xend_relocation_server(self): + """Get the flag indicating whether xend should run a relocation server. + """ + return self.get_config_bool("xend-relocation-server", + self.xend_relocation_server_default) + + def get_xend_port(self): + """Get the port xend listens at for its HTTP interface. + """ + return self.get_config_int('xend-port', self.xend_port_default) + + def get_xend_relocation_port(self): + """Get the port xend listens at for connection to its relocation server. + """ + return self.get_config_int('xend-relocation-port', + self.xend_relocation_port_default) + + def get_xend_relocation_hosts_allow(self): + return self.get_config_string("xend-relocation-hosts-allow", + self.xend_relocation_hosts_allow_default) + + def get_xend_address(self): + """Get the address xend listens at for its HTTP port. + This defaults to the empty string which allows all hosts to connect. + If this is set to 'localhost' only the localhost will be able to connect + to the HTTP port. + """ + return self.get_config_string('xend-address', self.xend_address_default) + + def get_xend_relocation_address(self): + """Get the address xend listens at for its relocation server port. + This defaults to the empty string which allows all hosts to connect. + If this is set to 'localhost' only the localhost will be able to connect + to the relocation port. + """ + return self.get_config_string('xend-relocation-address', self.xend_relocation_address_default) + + def get_xend_unix_server(self): + """Get the flag indicating whether xend should run a unix-domain server. + """ + return self.get_config_bool("xend-unix-server", self.xend_unix_server_default) + + def get_xend_unix_path(self): + """Get the path the xend unix-domain server listens at. + """ + return self.get_config_string("xend-unix-path", self.xend_unix_path_default) + + def get_xend_domains_path(self): + """ Get the path for persistent domain configuration storage + """ + return self.get_config_string("xend-domains-path", self.xend_domains_path_default) + + def get_xend_state_path(self): + """ Get the path for persistent domain configuration storage + """ + return self.get_config_string("xend-state-path", self.xend_state_path_default) + + def get_network_script(self): + """@return the script used to alter the network configuration when + Xend starts and stops, or None if no such script is specified.""" + + s = self.get_config_string('network-script') + + if s: + result = s.split(" ") + result[0] = os.path.join(self.network_script_dir, result[0]) + return result + else: + return None + + def get_external_migration_tool(self): + """@return the name of the tool to handle virtual TPM migration.""" + return self.get_config_string('external-migration-tool', self.external_migration_tool_default) + + def get_enable_dump(self): + return self.get_config_bool('enable-dump', 'no') + + def get_vif_script(self): + return self.get_config_string('vif-script', self.vif_script) + + def get_dom0_min_mem(self): + return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default) + + def get_dom0_vcpus(self): + return self.get_config_int('dom0-cpus', self.dom0_vcpus_default) + + def get_console_limit(self): + return self.get_config_int('console-limit', 1024) + + def get_vnclisten_address(self): + return self.get_config_string('vnc-listen', self.xend_vnc_listen_default) + + def get_vncpasswd_default(self): + return self.get_config_string('vncpasswd', + self.vncpasswd_default) + +class XendOptionsFile(XendOptions): + + """Default path to the config file.""" + config_default = "/etc/xen/xend-config.sxp" + + """Environment variable used to override config_default.""" + config_var = "XEND_CONFIG" + + def set_config(self): + """If the config file exists, read it. If not, ignore it. + + The config file is a sequence of sxp forms. + """ + self.config_path = os.getenv(self.config_var, self.config_default) + if os.path.exists(self.config_path): + try: + fin = file(self.config_path, 'rb') + try: + config = sxp.parse(fin) + finally: + fin.close() + if config is None: + config = ['xend-config'] + else: + config.insert(0, 'xend-config') + self.config = config + except Exception, ex: + self._logError('Reading config file %s: %s', + self.config_path, str(ex)) + raise + else: + self._logError('Config file does not exist: %s', + self.config_path) + self.config = ['xend-config'] + + def get_config_value(self, name, val=None): + """Get the value of an atomic configuration element. + + @param name: element name + @param val: default value (optional, defaults to None) + @return: value + """ + return sxp.child_value(self.config, name, val=val) + + def get_config_bool(self, name, val=None): + v = string.lower(str(self.get_config_value(name, val))) + if v in ['yes', 'y', '1', 'on', 'true', 't']: + return True + if v in ['no', 'n', '0', 'off', 'false', 'f']: + return False + raise XendError("invalid xend config %s: expected bool: %s" % (name, v)) + + def get_config_int(self, name, val=None): + v = self.get_config_value(name, val) + try: + return int(v) + except Exception: + raise XendError("invalid xend config %s: expected int: %s" % (name, v)) + + def get_config_string(self, name, val=None): + return self.get_config_value(name, val) + + def get_xen_api_server(self): + """Get the Xen-API server configuration. + """ + return self.get_config_value('xen-api-server', + self.xen_api_server_default) + +if os.uname()[0] == 'SunOS': + class XendOptionsSMF(XendOptions): + + def set_config(self): + pass + + def get_config_bool(self, name, val=None): + try: + return scf.get_bool(name) + except scf.error, e: + if e[0] == scf.SCF_ERROR_NOT_FOUND: + return val + else: + raise XendError("option %s: %s:%s" % (name, e[1], e[2])) + + def get_config_int(self, name, val=None): + try: + return scf.get_int(name) + except scf.error, e: + if e[0] == scf.SCF_ERROR_NOT_FOUND: + return val + else: + raise XendError("option %s: %s:%s" % (name, e[1], e[2])) + + def get_config_string(self, name, val=None): + try: + return scf.get_string(name) + except scf.error, e: + if e[0] == scf.SCF_ERROR_NOT_FOUND: + return val + else: + raise XendError("option %s: %s:%s" % (name, e[1], e[2])) + + def get_xen_api_server(self): + # When the new server is a supported configuration, we should + # expand this. + return [["unix"]] + +def instance(): + """Get an instance of XendOptions. + Use this instead of the constructor. + """ + global inst + try: + inst + except: + if os.uname()[0] == 'SunOS': + inst = XendOptionsSMF() + else: + inst = XendOptionsFile() + return inst diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendProtocol.py --- a/tools/python/xen/xend/XendProtocol.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/XendProtocol.py Fri Jan 19 14:48:57 2007 +0000 @@ -24,7 +24,7 @@ from encode import * from encode import * from xen.xend import sxp -from xen.xend import XendRoot +from xen.xend import XendOptions DEBUG = 0 @@ -34,7 +34,7 @@ HTTP_NO_CONTENT = 2 HTTP_NO_CONTENT = 204 -xroot = XendRoot.instance() +xoptions = XendOptions.instance() class XendError(RuntimeError): @@ -218,7 +218,7 @@ class UnixXendClientProtocol(HttpXendCli def __init__(self, path=None): if path is None: - path = xroot.get_xend_unix_path() + path = xoptions.get_xend_unix_path() self.path = path def makeConnection(self, _): diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Thu Jan 18 15:18:07 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,322 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> -# Copyright (C) 2005 XenSource Ltd -#============================================================================ - -"""Xend root class. -Creates the servers and handles configuration. - -Other classes get config variables by importing this module, -using instance() to get a XendRoot instance, and then -the config functions (e.g. get_xend_port()) to get -configured values. -""" - -import os -import os.path -import string -import sys - -from xen.xend import sxp, osdep, XendLogging -from xen.xend.XendError import XendError - -class XendRoot: - """Root of the management classes.""" - - """Default path to the config file.""" - config_default = "/etc/xen/xend-config.sxp" - - """Environment variable used to override config_default.""" - config_var = "XEND_CONFIG" - - """Where network control scripts live.""" - network_script_dir = osdep.scripts_dir - - """Where block control scripts live.""" - block_script_dir = osdep.scripts_dir - - """Default path to the log file. """ - logfile_default = "/var/log/xen/xend.log" - - """Default level of information to be logged.""" - loglevel_default = 'DEBUG' - - """Default Xen-API server configuration. """ - xen_api_server_default = [['unix']] - - """Default for the flag indicating whether xend should run an http server - (deprecated).""" - xend_http_server_default = 'no' - - xend_tcp_xmlrpc_server_default = 'no' - - xend_unix_xmlrpc_server_default = 'yes' - - """Default interface address xend listens at. """ - xend_address_default = '' - - """Default for the flag indicating whether xend should run a relocation server.""" - xend_relocation_server_default = 'no' - - """Default interface address the xend relocation server listens at. """ - xend_relocation_address_default = '' - - """Default port xend serves HTTP at. """ - xend_port_default = '8000' - - """Default port xend serves relocation at. """ - xend_relocation_port_default = '8002' - - xend_relocation_hosts_allow_default = '' - - """Default for the flag indicating whether xend should run a unix-domain - server (deprecated).""" - xend_unix_server_default = 'no' - - """Default external migration tool """ - external_migration_tool_default = '' - - """Default path the unix-domain server listens at.""" - xend_unix_path_default = '/var/lib/xend/xend-socket' - - dom0_min_mem_default = '0' - - dom0_vcpus_default = '0' - - vncpasswd_default = None - - """Default interface to listen for VNC connections on""" - xend_vnc_listen_default = '127.0.0.1' - - """Default session storage path.""" - xend_domains_path_default = '/var/lib/xend/domains' - - """Default xend management state storage.""" - xend_state_path_default = '/var/lib/xend/state' - - components = {} - - def __init__(self): - self.config_path = None - self.config = None - self.configure() - - - def _logError(self, fmt, *args): - """Logging function to log to stderr. We use this for XendRoot log - messages because they may be logged before the logger has been - configured. Other components can safely use the logger. - """ - print >>sys.stderr, "xend [ERROR]", fmt % args - - - def configure(self): - self.set_config() - XendLogging.init(self.get_config_value("logfile", - self.logfile_default), - self.get_config_value("loglevel", - self.loglevel_default)) - - - def set_config(self): - """If the config file exists, read it. If not, ignore it. - - The config file is a sequence of sxp forms. - """ - self.config_path = os.getenv(self.config_var, self.config_default) - if os.path.exists(self.config_path): - try: - fin = file(self.config_path, 'rb') - try: - config = sxp.parse(fin) - finally: - fin.close() - if config is None: - config = ['xend-config'] - else: - config.insert(0, 'xend-config') - self.config = config - except Exception, ex: - self._logError('Reading config file %s: %s', - self.config_path, str(ex)) - raise - else: - self._logError('Config file does not exist: %s', - self.config_path) - self.config = ['xend-config'] - - def get_config(self, name=None): - """Get the configuration element with the given name, or - the whole configuration if no name is given. - - @param name: element name (optional) - @return: config or none - """ - if name is None: - val = self.config - else: - val = sxp.child(self.config, name) - return val - - def get_config_value(self, name, val=None): - """Get the value of an atomic configuration element. - - @param name: element name - @param val: default value (optional, defaults to None) - @return: value - """ - return sxp.child_value(self.config, name, val=val) - - def get_config_bool(self, name, val=None): - v = string.lower(str(self.get_config_value(name, val))) - if v in ['yes', 'y', '1', 'on', 'true', 't']: - return True - if v in ['no', 'n', '0', 'off', 'false', 'f']: - return False - raise XendError("invalid xend config %s: expected bool: %s" % (name, v)) - - def get_config_int(self, name, val=None): - v = self.get_config_value(name, val) - try: - return int(v) - except Exception: - raise XendError("invalid xend config %s: expected int: %s" % (name, v)) - - def get_xen_api_server(self): - """Get the Xen-API server configuration. - """ - return self.get_config_value('xen-api-server', - self.xen_api_server_default) - - def get_xend_http_server(self): - """Get the flag indicating whether xend should run an http server. - """ - return self.get_config_bool("xend-http-server", self.xend_http_server_default) - - def get_xend_tcp_xmlrpc_server(self): - return self.get_config_bool("xend-tcp-xmlrpc-server", - self.xend_tcp_xmlrpc_server_default) - - def get_xend_unix_xmlrpc_server(self): - return self.get_config_bool("xend-unix-xmlrpc-server", - self.xend_unix_xmlrpc_server_default) - - def get_xend_relocation_server(self): - """Get the flag indicating whether xend should run a relocation server. - """ - return self.get_config_bool("xend-relocation-server", - self.xend_relocation_server_default) - - def get_xend_port(self): - """Get the port xend listens at for its HTTP interface. - """ - return self.get_config_int('xend-port', self.xend_port_default) - - def get_xend_relocation_port(self): - """Get the port xend listens at for connection to its relocation server. - """ - return self.get_config_int('xend-relocation-port', - self.xend_relocation_port_default) - - def get_xend_relocation_hosts_allow(self): - return self.get_config_value("xend-relocation-hosts-allow", - self.xend_relocation_hosts_allow_default) - - def get_xend_address(self): - """Get the address xend listens at for its HTTP port. - This defaults to the empty string which allows all hosts to connect. - If this is set to 'localhost' only the localhost will be able to connect - to the HTTP port. - """ - return self.get_config_value('xend-address', self.xend_address_default) - - def get_xend_relocation_address(self): - """Get the address xend listens at for its relocation server port. - This defaults to the empty string which allows all hosts to connect. - If this is set to 'localhost' only the localhost will be able to connect - to the relocation port. - """ - return self.get_config_value('xend-relocation-address', self.xend_relocation_address_default) - - def get_xend_unix_server(self): - """Get the flag indicating whether xend should run a unix-domain server. - """ - return self.get_config_bool("xend-unix-server", self.xend_unix_server_default) - - def get_xend_unix_path(self): - """Get the path the xend unix-domain server listens at. - """ - return self.get_config_value("xend-unix-path", self.xend_unix_path_default) - - def get_xend_domains_path(self): - """ Get the path for persistent domain configuration storage - """ - return self.get_config_value("xend-domains-path", self.xend_domains_path_default) - - def get_xend_state_path(self): - """ Get the path for persistent domain configuration storage - """ - return self.get_config_value("xend-state-path", self.xend_state_path_default) - - def get_network_script(self): - """@return the script used to alter the network configuration when - Xend starts and stops, or None if no such script is specified.""" - - s = self.get_config_value('network-script') - - if s: - result = s.split(" ") - result[0] = os.path.join(self.network_script_dir, result[0]) - return result - else: - return None - - def get_external_migration_tool(self): - """@return the name of the tool to handle virtual TPM migration.""" - return self.get_config_value('external-migration-tool', self.external_migration_tool_default) - - def get_enable_dump(self): - return self.get_config_bool('enable-dump', 'no') - - def get_vif_script(self): - return self.get_config_value('vif-script', 'vif-bridge') - - def get_dom0_min_mem(self): - return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default) - - def get_dom0_vcpus(self): - return self.get_config_int('dom0-cpus', self.dom0_vcpus_default) - - def get_console_limit(self): - return self.get_config_int('console-limit', 1024) - - def get_vnclisten_address(self): - return self.get_config_value('vnc-listen', self.xend_vnc_listen_default) - - def get_vncpasswd_default(self): - return self.get_config_value('vncpasswd', - self.vncpasswd_default) - -def instance(): - """Get an instance of XendRoot. - Use this instead of the constructor. - """ - global inst - try: - inst - except: - inst = XendRoot() - return inst diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/balloon.py Fri Jan 19 14:48:57 2007 +0000 @@ -22,7 +22,7 @@ import xen.lowlevel.xc import xen.lowlevel.xc import XendDomain -import XendRoot +import XendOptions from XendLogging import log from XendError import VmError @@ -107,11 +107,11 @@ def free(need_mem): # usage, so we recheck the required alloc each time around the loop, but # track the last used value so that we don't trigger too many watches. - xroot = XendRoot.instance() + xoptions = XendOptions.instance() xc = xen.lowlevel.xc.xc() try: - dom0_min_mem = xroot.get_dom0_min_mem() * 1024 + dom0_min_mem = xoptions.get_dom0_min_mem() * 1024 retries = 0 sleep_time = SLEEP_TIME_GROWTH diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/image.py Fri Jan 19 14:48:57 2007 +0000 @@ -173,7 +173,7 @@ class ImageHandler: """Build the domain. Define in subclass.""" raise NotImplementedError() - def createDeviceModel(self): + def createDeviceModel(self, restore = False): """Create device model for the domain (define in subclass if needed).""" pass @@ -377,11 +377,12 @@ class HVMImageHandler(ImageHandler): # xm config file def parseDeviceModelArgs(self, imageConfig, deviceConfig): dmargs = [ 'boot', 'fda', 'fdb', 'soundhw', - 'localtime', 'serial', 'stdvga', 'isa', 'vcpus', + 'localtime', 'serial', 'stdvga', 'isa', 'acpi', 'usb', 'usbdevice', 'keymap' ] - ret = [] hvmDeviceConfig = imageConfig['hvm']['devices'] - + + ret = ['-vcpus', str(self.vm.getVCpuCount())] + for a in dmargs: v = hvmDeviceConfig.get(a) @@ -461,14 +462,14 @@ class HVMImageHandler(ImageHandler): vnclisten = imageConfig.get('vnclisten') if not(vnclisten): - vnclisten = (xen.xend.XendRoot.instance(). + vnclisten = (xen.xend.XendOptions.instance(). get_vnclisten_address()) if vnclisten: ret += ['-vnclisten', vnclisten] vncpasswd = vncpasswd_vmconfig if vncpasswd is None: - vncpasswd = (xen.xend.XendRoot.instance(). + vncpasswd = (xen.xend.XendOptions.instance(). get_vncpasswd_default()) if vncpasswd is None: raise VmError('vncpasswd is not set up in ' + @@ -478,7 +479,7 @@ class HVMImageHandler(ImageHandler): return ret - def createDeviceModel(self): + def createDeviceModel(self, restore = False): if self.pid: return # Execute device model. @@ -487,6 +488,8 @@ class HVMImageHandler(ImageHandler): args = args + ([ "-d", "%d" % self.vm.getDomid(), "-m", "%s" % (self.getRequiredInitialReservation() / 1024)]) args = args + self.dmargs + if restore: + args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" % self.vm.getDomid() ]) env = dict(os.environ) if self.display: env['DISPLAY'] = self.display @@ -505,12 +508,16 @@ class HVMImageHandler(ImageHandler): self.register_reboot_feature_watch() self.pid = self.vm.gatherDom(('image/device-model-pid', int)) - def destroy(self): + def destroy(self, suspend = False): self.unregister_shutdown_watch() self.unregister_reboot_feature_watch(); if self.pid: try: - os.kill(self.pid, signal.SIGKILL) + sig = signal.SIGKILL + if suspend: + log.info("use sigusr1 to signal qemu %d", self.pid) + sig = signal.SIGUSR1 + os.kill(self.pid, sig) except OSError, exn: log.exception(exn) try: @@ -598,6 +605,9 @@ class IA64_HVM_ImageHandler(HVMImageHand # ROM size for guest firmware, ioreq page and xenstore page extra_pages = 1024 + 3 return mem_kb + extra_pages * page_kb + + def getRequiredInitialReservation(self): + return self.vm.getMemoryTarget() def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): # Explicit shadow memory is not a concept diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/osdep.py --- a/tools/python/xen/xend/osdep.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/osdep.py Fri Jan 19 14:48:57 2007 +0000 @@ -33,9 +33,19 @@ _pygrub_path = { "SunOS": "/usr/lib/xen/bin/pygrub" } +_netback_type = { + "SunOS": "SUNW_mac" +} + +_vif_script = { + "SunOS": "vif-vnic" +} + def _get(var, default=None): return var.get(os.uname()[0], default) scripts_dir = _get(_scripts_dir, "/etc/xen/scripts") xend_autorestart = _get(_xend_autorestart) pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub") +netback_type = _get(_netback_type, "netfront") +vif_script = _get(_vif_script, "vif-bridge") diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/DevController.py Fri Jan 19 14:48:57 2007 +0000 @@ -19,7 +19,7 @@ from threading import Event from threading import Event import types -from xen.xend import sxp, XendRoot +from xen.xend import sxp, XendOptions from xen.xend.XendError import VmError from xen.xend.XendLogging import log @@ -50,7 +50,7 @@ xenbusState = { 'Closed' : 6, } -xroot = XendRoot.instance() +xoptions = XendOptions.instance() xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys()))) @@ -324,7 +324,7 @@ class DevController: Make sure that the migration has finished and only then return from the call. """ - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool: log.info("Calling external migration tool for step %d" % step) fd = os.popen("%s -type %s -step %d -host %s -domname %s" % @@ -341,7 +341,7 @@ class DevController: """ Recover from device migration. The given step was the last one that was successfully executed. """ - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool: log.info("Calling external migration tool") fd = os.popen("%s -type %s -step %d -host %s -domname %s -recover" % diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/SrvRoot.py --- a/tools/python/xen/xend/server/SrvRoot.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/SrvRoot.py Fri Jan 19 14:48:57 2007 +0000 @@ -25,7 +25,7 @@ class SrvRoot(SrvDir): """Server sub-components. Each entry is (name, class), where 'name' is the entry name and 'class' is the name of its class. """ - #todo Get this list from the XendRoot config. + #todo Get this list from the XendOptions config. subdirs = [ ('node', 'SrvNode' ), ('domain', 'SrvDomainDir' ), diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/SrvServer.py --- a/tools/python/xen/xend/server/SrvServer.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/SrvServer.py Fri Jan 19 14:48:57 2007 +0000 @@ -48,7 +48,7 @@ from threading import Thread from xen.web.httpserver import HttpServer, UnixHttpServer -from xen.xend import XendNode, XendRoot, XendAPI +from xen.xend import XendNode, XendOptions, XendAPI from xen.xend import Vifctl from xen.xend.XendLogging import log from xen.xend.XendClient import XEN_API_SOCKET @@ -57,7 +57,7 @@ from SrvRoot import SrvRoot from SrvRoot import SrvRoot from XMLRPCServer import XMLRPCServer -xroot = XendRoot.instance() +xoptions = XendOptions.instance() class XendServers: @@ -65,6 +65,7 @@ class XendServers: def __init__(self, root): self.servers = [] self.root = root + self.running = False self.cleaningUp = False self.reloadingConfig = False @@ -79,6 +80,7 @@ class XendServers: server.shutdown() except: pass + self.running = False def reloadConfig(self, signum = 0, frame = None): log.debug("SrvServer.reloadConfig()") @@ -107,12 +109,11 @@ class XendServers: if server.ready: continue - thread = Thread(target=server.run, name=server.__class__.__name__) - if isinstance(server, HttpServer): - thread.setDaemon(True) + thread = Thread(target=server.run, + name=server.__class__.__name__) + thread.setDaemon(True) thread.start() threads.append(thread) - # check for when all threads have initialized themselves and then # close the status pipe @@ -143,47 +144,32 @@ class XendServers: status.close() status = None - # Interruptible Thread.join - Python Bug #1167930 - # Replaces: for t in threads: t.join() - # Reason: The above will cause python signal handlers to be - # blocked so we're not able to catch SIGTERM in any - # way for cleanup - runningThreads = threads - while len(runningThreads) > 0: - try: - for t in threads: - t.join(1.0) - runningThreads = [t for t in threads - if t.isAlive() and not t.isDaemon()] - if self.cleaningUp and len(runningThreads) > 0: - log.debug("Waiting for %s." % - [x.getName() for x in runningThreads]) - except: - pass - + # loop to keep main thread alive until it receives a SIGTERM + self.running = True + while self.running: + time.sleep(100000000) + if self.reloadingConfig: log.info("Restarting all XML-RPC and Xen-API servers...") self.cleaningUp = False self.reloadingConfig = False - xroot.set_config() - new_servers = [x for x in self.servers - if isinstance(x, HttpServer)] - self.servers = new_servers + xoptions.set_config() + self.servers = [] _loadConfig(self, self.root, True) else: break def _loadConfig(servers, root, reload): - if not reload and xroot.get_xend_http_server(): + if xoptions.get_xend_http_server(): servers.add(HttpServer(root, - xroot.get_xend_address(), - xroot.get_xend_port())) - if not reload and xroot.get_xend_unix_server(): - path = xroot.get_xend_unix_path() + xoptions.get_xend_address(), + xoptions.get_xend_port())) + if xoptions.get_xend_unix_server(): + path = xoptions.get_xend_unix_path() log.info('unix path=' + path) servers.add(UnixHttpServer(root, path)) - api_cfg = xroot.get_xen_api_server() + api_cfg = xoptions.get_xen_api_server() if api_cfg: try: addrs = [(str(x[0]).split(':'), @@ -218,10 +204,10 @@ def _loadConfig(servers, root, reload): except TypeError, exn: log.error('Xen-API server configuration %s is invalid.', api_cfg) - if xroot.get_xend_tcp_xmlrpc_server(): + if xoptions.get_xend_tcp_xmlrpc_server(): servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False, True)) - if xroot.get_xend_unix_xmlrpc_server(): + if xoptions.get_xend_unix_xmlrpc_server(): servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False)) diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/XMLRPCServer.py --- a/tools/python/xen/xend/server/XMLRPCServer.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/XMLRPCServer.py Fri Jan 19 14:48:57 2007 +0000 @@ -179,21 +179,24 @@ class XMLRPCServer: # Custom runloop so we can cleanup when exiting. # ----------------------------------------------------------------- try: - self.server.socket.settimeout(1.0) while self.running: self.server.handle_request() finally: - self.cleanup() + self.shutdown() def cleanup(self): - log.debug("XMLRPCServer.cleanup()") - try: - self.server.socket.close() + log.debug('XMLRPCServer.cleanup()') + try: + if hasattr(self, 'server'): + # shutdown socket explicitly to allow reuse + self.server.socket.shutdown(socket.SHUT_RDWR) + self.server.socket.close() except Exception, exn: log.exception(exn) pass def shutdown(self): self.running = False - self.ready = False - + if self.ready: + self.ready = False + self.cleanup() diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/netif.py Fri Jan 19 14:48:57 2007 +0000 @@ -24,10 +24,10 @@ import random import random import re -from xen.xend import XendRoot +from xen.xend import XendOptions from xen.xend.server.DevController import DevController -xroot = XendRoot.instance() +xoptions = XendOptions.instance() def randomMAC(): """Generate a random MAC address. @@ -138,8 +138,8 @@ class NetifController(DevController): def getDeviceDetails(self, config): """@see DevController.getDeviceDetails""" - script = os.path.join(xroot.network_script_dir, - config.get('script', xroot.get_vif_script())) + script = os.path.join(xoptions.network_script_dir, + config.get('script', xoptions.get_vif_script())) typ = config.get('type') bridge = config.get('bridge') mac = config.get('mac') @@ -150,9 +150,8 @@ class NetifController(DevController): devid = self.allocateDeviceID() - # The default type is 'netfront'. if not typ: - typ = 'netfront' + typ = xoptions.netback_type if not mac: mac = randomMAC() @@ -190,7 +189,7 @@ class NetifController(DevController): (script, ip, bridge, mac, typ, vifname, rate, uuid) = devinfo if script: - network_script_dir = xroot.network_script_dir + os.sep + network_script_dir = xoptions.network_script_dir + os.sep result['script'] = script.replace(network_script_dir, "") if ip: result['ip'] = ip diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/relocate.py --- a/tools/python/xen/xend/server/relocate.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/relocate.py Fri Jan 19 14:48:57 2007 +0000 @@ -24,7 +24,7 @@ from xen.web import protocol, tcp, unix from xen.xend import sxp from xen.xend import XendDomain -from xen.xend import XendRoot +from xen.xend import XendOptions from xen.xend.XendError import XendError from xen.xend.XendLogging import log @@ -114,15 +114,15 @@ class RelocationProtocol(protocol.Protoc def listenRelocation(): - xroot = XendRoot.instance() - if xroot.get_xend_unix_server(): + xoptions = XendOptions.instance() + if xoptions.get_xend_unix_server(): path = '/var/lib/xend/relocation-socket' unix.UnixListener(path, RelocationProtocol) - if xroot.get_xend_relocation_server(): - port = xroot.get_xend_relocation_port() - interface = xroot.get_xend_relocation_address() + if xoptions.get_xend_relocation_server(): + port = xoptions.get_xend_relocation_port() + interface = xoptions.get_xend_relocation_address() - hosts_allow = xroot.get_xend_relocation_hosts_allow() + hosts_allow = xoptions.get_xend_relocation_hosts_allow() if hosts_allow == '': hosts_allow = None else: diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/tests/test_controllers.py --- a/tools/python/xen/xend/server/tests/test_controllers.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/tests/test_controllers.py Fri Jan 19 14:48:57 2007 +0000 @@ -2,9 +2,9 @@ import re import re import unittest -import xen.xend.XendRoot +import xen.xend.XendOptions -xen.xend.XendRoot.XendRoot.config_default = '/dev/null' +xen.xend.XendOptions.XendOptions.config_default = '/dev/null' from xen.xend.server import netif @@ -13,7 +13,7 @@ FAKE_DEVID = 63 FAKE_DEVID = 63 -xroot = xen.xend.XendRoot.instance() +xoptions = xen.xend.XendOptions.instance() class test_controllers(unittest.TestCase): @@ -36,8 +36,8 @@ class test_controllers(unittest.TestCase self.assertEqual(backdets['handle'], str(FAKE_DEVID)) self.assertEqual(backdets['script'], - os.path.join(xroot.network_script_dir, - xroot.get_vif_script())) + os.path.join(xoptions.network_script_dir, + xoptions.get_vif_script())) self.assertValidMac(backdets['mac'], expectedMac) self.assertEqual(frontdets['handle'], str(FAKE_DEVID)) diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/tpmif.py --- a/tools/python/xen/xend/server/tpmif.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/tpmif.py Fri Jan 19 14:48:57 2007 +0000 @@ -20,7 +20,7 @@ """Support for virtual TPM interfaces.""" -from xen.xend import XendRoot +from xen.xend import XendOptions from xen.xend.XendLogging import log from xen.xend.XendError import XendError from xen.xend.XendConstants import DEV_MIGRATE_TEST, VTPM_DELETE_SCRIPT @@ -29,7 +29,7 @@ import os import os import re -xroot = XendRoot.instance() +xoptions = XendOptions.instance() def destroy_vtpmstate(name): if os.path.exists(VTPM_DELETE_SCRIPT): @@ -88,7 +88,7 @@ class TPMifController(DevController): def migrate(self, deviceConfig, network, dst, step, domName): """@see DevContoller.migrate""" if network: - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool != '': log.info("Request to network-migrate device to %s. step=%d.", dst, step) @@ -116,7 +116,7 @@ class TPMifController(DevController): def recover_migrate(self, deviceConfig, network, dst, step, domName): """@see DevContoller.recover_migrate""" if network: - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool != '': log.info("Request to recover network-migrated device. last good step=%d.", step) diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/vfbif.py --- a/tools/python/xen/xend/server/vfbif.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xend/server/vfbif.py Fri Jan 19 14:48:57 2007 +0000 @@ -52,7 +52,7 @@ class VfbifController(DevController): if config.has_key("vncpasswd"): passwd = config["vncpasswd"] else: - passwd = xen.xend.XendRoot.instance().get_vncpasswd_default() + passwd = xen.xend.XendOptions.instance().get_vncpasswd_default() if passwd: self.vm.storeVm("vncpasswd", passwd) log.debug("Stored a VNC password for vfb access") @@ -66,7 +66,7 @@ class VfbifController(DevController): elif config.has_key("vncdisplay"): args += ["--vncport", "%d" % (5900 + int(config["vncdisplay"]))] vnclisten = config.get("vnclisten", - xen.xend.XendRoot.instance().get_vnclisten_address()) + xen.xend.XendOptions.instance().get_vnclisten_address()) args += [ "--listen", vnclisten ] spawn_detached(args[0], args + std_args, os.environ) elif t == "sdl": diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xm/create.py Fri Jan 19 14:48:57 2007 +0000 @@ -189,6 +189,10 @@ gopts.var('vcpus', val='VCPUS', gopts.var('vcpus', val='VCPUS', fn=set_int, default=1, use="# of Virtual CPUS in domain.") + +gopts.var('vcpu_avail', val='VCPUS', + fn=set_long, default=None, + use="Bitmask for virtual CPUs to make available immediately.") gopts.var('cpu_cap', val='CAP', fn=set_int, default=None, @@ -740,7 +744,7 @@ def make_config(vals): map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory', 'restart', 'on_poweroff', - 'on_reboot', 'on_crash', 'vcpus', 'features', + 'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features', 'on_xend_start', 'on_xend_stop']) if vals.uuid is not None: diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xm/main.py Fri Jan 19 14:48:57 2007 +0000 @@ -693,12 +693,15 @@ def parse_doms_info(info): up_time = time.time() - start_time return { - 'domid' : get_info('domid', str, ''), - 'name' : get_info('name', str, '??'), + 'domid' : get_info('domid', str, ''), + 'name' : get_info('name', str, '??'), 'mem' : get_info('memory_dynamic_min', int, 0), - 'vcpus' : get_info('online_vcpus', int, 0), - 'state' : get_info('state', str, ''), - 'cpu_time' : get_info('cpu_time', float, 0), + 'state' : get_info('state', str, ''), + 'cpu_time' : get_info('cpu_time', float, 0.0), + # VCPUs is the number online when the VM is up, or the number + # configured otherwise. + 'vcpus' : get_info('online_vcpus', int, + get_info('vcpus', int, 0)), 'up_time' : up_time, 'seclabel' : security.get_security_printlabel(info), } diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/opts.py --- a/tools/python/xen/xm/opts.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xm/opts.py Fri Jan 19 14:48:57 2007 +0000 @@ -571,6 +571,14 @@ def set_int(opt, k, v): opt.opts.err('Invalid value: ' + str(v)) opt.set(v) +def set_long(opt, k, v): + """Set an option to a long integer value.""" + try: + v = long(v) + except: + opt.opts.err('Invalid value: ' + str(v)) + opt.set(v) + def set_float(opt, k, v): """Set an option to a float value.""" try: diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/tests/test_create.py --- a/tools/python/xen/xm/tests/test_create.py Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/python/xen/xm/tests/test_create.py Fri Jan 19 14:48:57 2007 +0000 @@ -3,9 +3,9 @@ import tempfile import tempfile import unittest -import xen.xend.XendRoot - -xen.xend.XendRoot.XendRoot.config_default = '/dev/null' +import xen.xend.XendOptions + +xen.xend.XendOptions.XendOptions.config_default = '/dev/null' import xen.xm.create diff -r 8475a4e0425e -r 3c8bb086025e tools/xcutils/xc_restore.c --- a/tools/xcutils/xc_restore.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/xcutils/xc_restore.c Fri Jan 19 14:48:57 2007 +0000 @@ -19,12 +19,13 @@ main(int argc, char **argv) main(int argc, char **argv) { unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn; + unsigned int hvm, pae, apic; int ret; unsigned long store_mfn, console_mfn; - if (argc != 6) + if (argc != 9) errx(1, - "usage: %s iofd domid nr_pfns store_evtchn console_evtchn", + "usage: %s iofd domid nr_pfns store_evtchn console_evtchn hvm pae apic", argv[0]); xc_fd = xc_interface_open(); @@ -36,9 +37,19 @@ main(int argc, char **argv) nr_pfns = atoi(argv[3]); store_evtchn = atoi(argv[4]); console_evtchn = atoi(argv[5]); + hvm = atoi(argv[6]); + pae = atoi(argv[7]); + apic = atoi(argv[8]); - ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn, - &store_mfn, console_evtchn, &console_mfn); + if (hvm) { + /* pass the memsize to xc_hvm_restore to find the store_mfn */ + store_mfn = hvm; + ret = xc_hvm_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn, + &store_mfn, console_evtchn, &console_mfn, pae, apic); + } else + ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn, + &store_mfn, console_evtchn, &console_mfn); + if (ret == 0) { printf("store-mfn %li\n", store_mfn); printf("console-mfn %li\n", console_mfn); diff -r 8475a4e0425e -r 3c8bb086025e tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c Thu Jan 18 15:18:07 2007 +0000 +++ b/tools/xcutils/xc_save.c Fri Jan 19 14:48:57 2007 +0000 @@ -51,7 +51,10 @@ main(int argc, char **argv) max_f = atoi(argv[4]); flags = atoi(argv[5]); - ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); + if (flags & XCFLAGS_HVM) + ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); + else + ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); xc_interface_close(xc_fd); diff -r 8475a4e0425e -r 3c8bb086025e unmodified_drivers/linux-2.6/platform-pci/platform-pci.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Thu Jan 18 15:18:07 2007 +0000 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Fri Jan 19 14:48:57 2007 +0000 @@ -182,12 +182,17 @@ static uint64_t get_callback_via(struct static uint64_t get_callback_via(struct pci_dev *pdev) { #ifdef __ia64__ - int irq; + int irq, rid; for (irq = 0; irq < 16; irq++) { if (isa_irq_to_vector(irq) == pdev->irq) return irq; } - return 0; + /* use Requester-ID as callback_irq */ + /* RID: '<#bus(8)><#dev(5)><#func(3)>' (cf. PCI-Express spec) */ + rid = ((pdev->bus->number & 0xff) << 8) | pdev->devfn; + printk(KERN_INFO DRV_NAME ":use Requester-ID(%04x) as callback irq\n", + rid); + return rid | IA64_CALLBACK_IRQ_RID; #else /* !__ia64__ */ if (pdev->irq < 16) return pdev->irq; /* ISA IRQ */ diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Thu Jan 18 15:18:07 2007 +0000 +++ b/xen/arch/ia64/asm-offsets.c Fri Jan 19 14:48:57 2007 +0000 @@ -56,10 +56,12 @@ void foo(void) DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, arch._thread.on_ustack)); DEFINE(IA64_VCPU_DOMAIN_OFFSET, offsetof (struct vcpu, domain)); + DEFINE(IA64_VCPU_HYPERCALL_CONTINUATION_OFS, offsetof (struct vcpu, arch.hypercall_continuation)); DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_rr0)); DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_saved_rr0)); DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, arch.breakimm)); DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva)); + DEFINE(IA64_VCPU_EVENT_CALLBACK_IP_OFFSET, offsetof (struct vcpu, arch.event_callback_ip)); DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0])); DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3])); DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3])); diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/asm-xsi-offsets.c --- a/xen/arch/ia64/asm-xsi-offsets.c Thu Jan 18 15:18:07 2007 +0000 +++ b/xen/arch/ia64/asm-xsi-offsets.c Fri Jan 19 14:48:57 2007 +0000 @@ -62,7 +62,7 @@ void foo(void) DEFINE_MAPPED_REG_OFS(XSI_ITV_OFS, itv); DEFINE_MAPPED_REG_OFS(XSI_PTA_OFS, pta); DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); - DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe); + DEFINE_MAPPED_REG_OFS(XSI_VPSR_PP_OFS, vpsr_pp); DEFINE_MAPPED_REG_OFS(XSI_METAPHYS_OFS, metaphysical_mode); DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/Makefile --- a/xen/arch/ia64/linux-xen/Makefile Thu Jan 18 15:18:07 2007 +0000 +++ b/xen/arch/ia64/linux-xen/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -1,3 +1,6 @@ obj-y += efi.o +subdir-y += sn + +obj-y += cmdline.o obj-y += efi.o obj-y += entry.o obj-y += irq_ia64.o diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/README.origin --- a/xen/arch/ia64/linux-xen/README.origin Thu Jan 18 15:18:07 2007 +0000 +++ b/xen/arch/ia64/linux-xen/README.origin Fri Jan 19 14:48:57 2007 +0000 @@ -5,6 +5,7 @@ # (e.g. with #ifdef XEN or XEN in a comment) so that they can be # easily updated to future versions of the corresponding Linux files. +cmdline.c -> linux/lib/cmdline.c efi.c -> linux/arch/ia64/kernel/efi.c entry.h -> linux/arch/ia64/kernel/entry.h entry.S -> linux/arch/ia64/kernel/entry.S diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/cmdline.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/cmdline.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,131 @@ +/* + * linux/lib/cmdline.c + * Helper functions generally used for parsing kernel command line + * and module options. + * + * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/string.h> +#ifdef XEN +#include <xen/lib.h> +#endif + + +/** + * get_option - Parse integer from an option string + * @str: option string + * @pint: (output) integer value parsed from @str + * + * Read an int from an option string; if available accept a subsequent + * comma as well. + * + * Return values: + * 0 : no int in string + * 1 : int found, no subsequent comma + * 2 : int found including a subsequent comma + */ + +int get_option (char **str, int *pint) +{ + char *cur = *str; + + if (!cur || !(*cur)) + return 0; +#ifndef XEN + *pint = simple_strtol (cur, str, 0); +#else + *pint = simple_strtol (cur, (const char**)str, 0); +#endif + if (cur == *str) + return 0; + if (**str == ',') { + (*str)++; + return 2; + } + + return 1; +} + +/** + * get_options - Parse a string into a list of integers + * @str: String to be parsed + * @nints: size of integer array + * @ints: integer array + * + * This function parses a string containing a comma-separated + * list of integers. The parse halts when the array is + * full, or when no more numbers can be retrieved from the + * string. + * + * Return value is the character in the string which caused + * the parse to end (typically a null terminator, if @str is + * completely parseable). + */ + +char *get_options(const char *str, int nints, int *ints) +{ + int res, i = 1; + + while (i < nints) { + res = get_option ((char **)&str, ints + i); + if (res == 0) + break; + i++; + if (res == 1) + break; + } + ints[0] = i - 1; + return (char *)str; +} + +/** + * memparse - parse a string with mem suffixes into a number + * @ptr: Where parse begins + * @retptr: (output) Pointer to next char after parse completes + * + * Parses a string into a number. The number stored at @ptr is + * potentially suffixed with %K (for kilobytes, or 1024 bytes), + * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or + * 1073741824). If the number is suffixed with K, M, or G, then + * the return value is the number multiplied by one kilobyte, one + * megabyte, or one gigabyte, respectively. + */ + +unsigned long long memparse (char *ptr, char **retptr) +{ +#ifndef XEN + unsigned long long ret = simple_strtoull (ptr, retptr, 0); +#else + unsigned long long ret = simple_strtoull (ptr, (const char**)retptr, 0); +#endif + + switch (**retptr) { + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + (*retptr)++; + default: + break; + } + return ret; +} + + +EXPORT_SYMBOL(memparse); +EXPORT_SYMBOL(get_option); +EXPORT_SYMBOL(get_options); diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/entry.S --- a/xen/arch/ia64/linux-xen/entry.S Thu Jan 18 15:18:07 2007 +0000 +++ b/xen/arch/ia64/linux-xen/entry.S Fri Jan 19 14:48:57 2007 +0000 @@ -676,7 +676,9 @@ GLOBAL_ENTRY(ia64_ret_from_syscall) cmp.ge p6,p7=r8,r0 // syscall executed successfully? adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 mov r10=r0 // clear error indication in r10 +#ifndef XEN (p7) br.cond.spnt handle_syscall_error // handle potential syscall failure +#endif END(ia64_ret_from_syscall) // fall through /* @@ -764,7 +766,9 @@ ENTRY(ia64_leave_syscall) ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" nop.i 0 ;; +#ifndef XEN mov r16=ar.bsp // M2 get existing backing store pointer +#endif ld8 r18=[r2],PT(R9)-PT(B6) // load b6 #ifndef XEN (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? @@ -814,7 +818,11 @@ ENTRY(ia64_leave_syscall) mov f8=f0 // F clear f8 ;; ld8.fill r12=[r2] // M0|1 restore r12 (sp) +#ifdef XEN + ld8.fill r2=[r3] // M0|1 +#else ld8.fill r15=[r3] // M0|1 restore r15 +#endif mov b6=r18 // I0 restore b6 #ifdef XEN @@ -827,7 +835,9 @@ ENTRY(ia64_leave_syscall) srlz.d // M0 ensure interruption collection is off (for cover) shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition +#ifndef XEN cover // B add current frame into dirty partition & set cr.ifs +#endif ;; (pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 mov r19=ar.bsp // M2 get new backing store pointer @@ -893,23 +903,18 @@ GLOBAL_ENTRY(ia64_leave_kernel) .work_processed_kernel: #ifdef XEN ;; -(pUStk) ssm psr.i -(pUStk) br.call.sptk.many b0=do_softirq -(pUStk) rsm psr.i - ;; - alloc loc0=ar.pfs,0,1,1,0 - adds out0=16,r12 +(pUStk) ssm psr.i +(pUStk) br.call.sptk.many b0=do_softirq +(pUStk) ssm psr.i + ;; +(pUStk) br.call.sptk.many b0=reflect_event + ;; adds r7 = PT(EML_UNAT)+16,r12 ;; ld8 r7 = [r7] ;; -(pUStk) br.call.sptk.many b0=reflect_event -//(pUStk) br.call.sptk.many b0=deliver_pending_interrupt - ;; - mov ar.pfs=loc0 mov ar.unat=r7 /* load eml_unat */ mov r31=r0 - #else adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 @@ -1184,8 +1189,11 @@ skip_rbs_switch: mov cr.ipsr=r29 // M2 mov ar.pfs=r26 // I0 (pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise - +#ifdef XEN + mov cr.ifs=r30 // M2 +#else (p9) mov cr.ifs=r30 // M2 +#endif mov b0=r21 // I0 (pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise @@ -1195,7 +1203,11 @@ skip_rbs_switch: ;; (pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode nop 0 +#ifdef XEN +(pLvSys)mov r15=r0 +#else (pLvSys)mov r2=r0 +#endif mov ar.rsc=r27 // M2 mov pr=r31,-1 // I0 @@ -1459,7 +1471,89 @@ 1: mov gp=loc2 // restore gp br.ret.sptk.many rp END(unw_init_running) -#ifndef XEN +#ifdef XEN +GLOBAL_ENTRY(ia64_do_multicall_call) + movl r2=ia64_hypercall_table;; + shladd r2=r38,3,r2;; + ld8 r2=[r2];; + mov b6=r2 + br.sptk.many b6;; +END(ia64_do_multicall_call) + + + .rodata + .align 8 + .globl ia64_hypercall_table +ia64_hypercall_table: + data8 do_ni_hypercall /* do_set_trap_table *//* 0 */ + data8 do_ni_hypercall /* do_mmu_update */ + data8 do_ni_hypercall /* do_set_gdt */ + data8 do_ni_hypercall /* do_stack_switch */ + data8 do_ni_hypercall /* do_set_callbacks */ + data8 do_ni_hypercall /* do_fpu_taskswitch *//* 5 */ + data8 do_sched_op_compat + data8 do_ni_hypercall + data8 do_ni_hypercall /* do_set_debugreg */ + data8 do_ni_hypercall /* do_get_debugreg */ + data8 do_ni_hypercall /* do_update_descriptor * 10 */ + data8 do_ni_hypercall /* do_ni_hypercall */ + data8 do_memory_op + data8 do_multicall + data8 do_ni_hypercall /* do_update_va_mapping */ + data8 do_ni_hypercall /* do_set_timer_op */ /* 15 */ + data8 do_ni_hypercall + data8 do_xen_version + data8 do_console_io + data8 do_ni_hypercall + data8 do_grant_table_op /* 20 */ + data8 do_ni_hypercall /* do_vm_assist */ + data8 do_ni_hypercall /* do_update_va_mapping_othe */ + data8 do_ni_hypercall /* (x86 only) */ + data8 do_ni_hypercall /* do_vcpu_op */ + data8 do_ni_hypercall /* (x86_64 only) */ /* 25 */ + data8 do_ni_hypercall /* do_mmuext_op */ + data8 do_ni_hypercall /* do_acm_op */ + data8 do_ni_hypercall /* do_nmi_op */ + data8 do_sched_op + data8 do_callback_op /* */ /* 30 */ + data8 do_xenoprof_op /* */ + data8 do_event_channel_op + data8 do_physdev_op + data8 do_hvm_op /* */ + data8 do_sysctl /* */ /* 35 */ + data8 do_domctl /* */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ /* 40 */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ /* 45 */ + data8 do_ni_hypercall /* */ + data8 do_ni_hypercall /* */ + data8 do_dom0vp_op /* dom0vp_op */ + data8 do_pirq_guest_eoi /* arch_1 */ + data8 do_ni_hypercall /* arch_2 */ /* 50 */ + data8 do_ni_hypercall /* arch_3 */ + data8 do_ni_hypercall /* arch_4 */ + data8 do_ni_hypercall /* arch_5 */ + data8 do_ni_hypercall /* arch_6 */ + data8 do_ni_hypercall /* arch_7 */ /* 55 */ + data8 do_ni_hypercall + data8 do_ni_hypercall + data8 do_ni_hypercall + data8 do_ni_hypercall + data8 do_ni_hypercall /* 60 */ + data8 do_ni_hypercall + data8 do_ni_hypercall + data8 do_ni_hypercall + + // guard against failures to increase NR_hypercalls + .org ia64_hypercall_table + 8*NR_hypercalls + +#else .rodata .align 8 .globl sys_call_table diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,1 @@ +subdir-y += kernel diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/kernel/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,5 @@ +obj-y += sn2_smp.o +obj-y += setup.o +obj-y += iomv.o +obj-y += irq.o +obj-y += io_init.o diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/README.origin --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/kernel/README.origin Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,12 @@ +# Source files in this directory are near-identical copies of linux-2.6.19 +# files: + +# NOTE: ALL changes to these files should be clearly marked +# (e.g. with #ifdef XEN or XEN in a comment) so that they can be +# easily updated to future versions of the corresponding Linux files. + +io_init.c -> linux/arch/ia64/sn/kernel/io_init.c +iomv.c -> linux/arch/ia64/sn/kernel/iomv.c +irq.c -> linux/arch/ia64/sn/kernel/irq.c +setup.c -> linux/arch/ia64/sn/kernel/setup.c +sn2_smp.c -> linux/arch/ia64/sn/kernel/sn2/sn2_smp.c diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/io_init.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/kernel/io_init.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,783 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/bootmem.h> +#include <linux/nodemask.h> +#ifdef XEN +#include <linux/init.h> +#endif +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/sn_feature_sets.h> +#include <asm/sn/geo.h> +#include <asm/sn/io.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#ifndef XEN +#include <asm/sn/pcidev.h> +#endif +#include <asm/sn/simulator.h> +#include <asm/sn/sn_sal.h> +#ifndef XEN +#include <asm/sn/tioca_provider.h> +#include <asm/sn/tioce_provider.h> +#endif +#ifdef XEN +#include "asm/sn/hubdev.h" +#include "asm/sn/xwidgetdev.h" +#else +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" +#endif + + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); +#ifdef XEN +extern void sn_irq_lh_init(void); +#endif + +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + +struct slab_info { + struct hubdev_info hubdev; +}; + +struct brick { + moduleid_t id; /* Module ID of this module */ + struct slab_info slab_info[MAX_SLABS + 1]; +}; + +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ + +struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ + +#ifndef XEN +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ + +/* + * Hooks and struct for unsupported pci providers + */ + +static dma_addr_t +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) +{ + return 0; +} + +static void +sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction) +{ + return; +} + +static void * +sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller) +{ + return NULL; +} + +static struct sn_pcibus_provider sn_pci_default_provider = { + .dma_map = sn_default_pci_map, + .dma_map_consistent = sn_default_pci_map, + .dma_unmap = sn_default_pci_unmap, + .bus_fixup = sn_default_pci_bus_fixup, +}; +#endif + +/* + * Retrieve the DMA Flush List given nasid, widget, and device. + * This list is needed to implement the WAR - Flush DMA data on PIO Reads. + */ +static inline u64 +sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, + u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST, + (u64) nasid, (u64) widget_num, + (u64) device_num, (u64) address, 0, 0, 0); + return ret_stuff.status; +} + +/* + * Retrieve the hub device info structure for the given nasid. + */ +static inline u64 sal_get_hubdev_info(u64 handle, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_HUBDEV_INFO, + (u64) handle, (u64) address, 0, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci bus information given the bus number. + */ +static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIBUS_INFO, + (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0); + return ret_stuff.v0; +} + +#ifndef XEN +/* + * Retrieve the pci device information given the bus and device|function number. + */ +static inline u64 +sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, + u64 sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, + (u64) segment, (u64) bus_number, (u64) devfn, + (u64) pci_dev, + sn_irq_info, 0, 0); + return ret_stuff.v0; +} + +/* + * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified + * device. + */ +inline struct pcidev_info * +sn_pcidev_info_get(struct pci_dev *dev) +{ + struct pcidev_info *pcidev; + + list_for_each_entry(pcidev, + &(SN_PCI_CONTROLLER(dev)->pcidev_info), pdi_list) { + if (pcidev->pdi_linux_pcidev == dev) { + return pcidev; + } + } + return NULL; +} + +/* Older PROM flush WAR + * + * 01/16/06 -- This war will be in place until a new official PROM is released. + * Additionally note that the struct sn_flush_device_war also has to be + * removed from arch/ia64/sn/include/xtalk/hubdev.h + */ +static u8 war_implemented = 0; + +static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, + struct sn_flush_device_common *common) +{ + struct sn_flush_device_war *war_list; + struct sn_flush_device_war *dev_entry; + struct ia64_sal_retval isrv = {0,0,0,0}; + + if (!war_implemented) { + printk(KERN_WARNING "PROM version < 4.50 -- implementing old " + "PROM flush WAR\n"); + war_implemented = 1; + } + + war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); + if (!war_list) + BUG(); + + SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, + nasid, widget, __pa(war_list), 0, 0, 0 ,0); + if (isrv.status) + panic("sn_device_fixup_war failed: %s\n", + ia64_sal_strerror(isrv.status)); + + dev_entry = war_list + device; + memcpy(common,dev_entry, sizeof(*common)); + kfree(war_list); + + return isrv.status; +} + +/* + * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for + * each node in the system. + */ +static void __init sn_fixup_ionodes(void) +{ + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_kernel *dev_entry; + struct hubdev_info *hubdev; + u64 status; + u64 nasid; + int i, widget, device, size; + + /* + * Get SGI Specific HUB chipset information. + * Inform Prom that this kernel can support domain bus numbering. + */ + for (i = 0; i < num_cnodes; i++) { + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + nasid = cnodeid_to_nasid(i); + hubdev->max_segment_number = 0xffffffff; + hubdev->max_pcibus_number = 0xff; + status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev)); + if (status) + continue; + + /* Save the largest Domain and pcibus numbers found. */ + if (hubdev->max_segment_number) { + /* + * Dealing with a Prom that supports segments. + */ + max_segment_number = hubdev->max_segment_number; + max_pcibus_number = hubdev->max_pcibus_number; + } + + /* Attach the error interrupt handlers */ + if (nasid & 1) + ice_error_init(hubdev); + else + hub_error_init(hubdev); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) + hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; + + if (!hubdev->hdi_flush_nasid_list.widget_p) + continue; + + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); + hubdev->hdi_flush_nasid_list.widget_p = + kzalloc(size, GFP_KERNEL); + if (!hubdev->hdi_flush_nasid_list.widget_p) + BUG(); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); + if (!sn_flush_device_kernel) + BUG(); + + dev_entry = sn_flush_device_kernel; + for (device = 0; device < DEV_PER_WIDGET; + device++,dev_entry++) { + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); + if (!dev_entry->common) + BUG(); + + if (sn_prom_feature_available( + PRF_DEVICE_FLUSH_LIST)) + status = sal_get_device_dmaflush_list( + nasid, widget, device, + (u64)(dev_entry->common)); + else +#ifdef XEN + BUG(); +#else + status = sn_device_fixup_war(nasid, + widget, device, + dev_entry->common); +#endif + if (status != SALRET_OK) + panic("SAL call failed: %s\n", + ia64_sal_strerror(status)); + + spin_lock_init(&dev_entry->sfdl_flush_lock); + } + + if (sn_flush_device_kernel) + hubdev->hdi_flush_nasid_list.widget_p[widget] = + sn_flush_device_kernel; + } + } +} + +/* + * sn_pci_window_fixup() - Create a pci_window for each device resource. + * Until ACPI support is added, we need this code + * to setup pci_windows for use by + * pcibios_bus_to_resource(), + * pcibios_resource_to_bus(), etc. + */ +static void +sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, + s64 * pci_addrs) +{ + struct pci_controller *controller = PCI_CONTROLLER(dev->bus); + unsigned int i; + unsigned int idx; + unsigned int new_count; + struct pci_window *new_window; + + if (count == 0) + return; + idx = controller->windows; + new_count = controller->windows + count; + new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL); + if (new_window == NULL) + BUG(); + if (controller->window) { + memcpy(new_window, controller->window, + sizeof(struct pci_window) * controller->windows); + kfree(controller->window); + } + + /* Setup a pci_window for each device resource. */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + if (pci_addrs[i] == -1) + continue; + + new_window[idx].offset = dev->resource[i].start - pci_addrs[i]; + new_window[idx].resource = dev->resource[i]; + idx++; + } + + controller->windows = new_count; + controller->window = new_window; +} + +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + +/* + * sn_pci_fixup_slot() - This routine sets up a slot's resources + * consistent with the Linux PCI abstraction layer. Resources acquired + * from our PCI provider include PIO maps to BAR space and interrupt + * objects. + */ +void sn_pci_fixup_slot(struct pci_dev *dev) +{ + unsigned int count = 0; + int idx; + int segment = pci_domain_nr(dev->bus); + int status = 0; + struct pcibus_bussoft *bs; + struct pci_bus *host_pci_bus; + struct pci_dev *host_pci_dev; + struct pcidev_info *pcidev_info; + s64 pci_addrs[PCI_ROM_RESOURCE + 1]; + struct sn_irq_info *sn_irq_info; + unsigned long size; + unsigned int bus_no, devfn; + + pci_dev_get(dev); /* for the sysdata pointer */ + pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_info) + BUG(); /* Cannot afford to run out of memory */ + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) + BUG(); /* Cannot afford to run out of memory */ + + /* Call to retrieve pci device information needed by kernel. */ + status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, + dev->devfn, + (u64) __pa(pcidev_info), + (u64) __pa(sn_irq_info)); + if (status) + BUG(); /* Cannot get platform pci device information */ + + /* Add pcidev_info to list in sn_pci_controller struct */ + list_add_tail(&pcidev_info->pdi_list, + &(SN_PCI_CONTROLLER(dev->bus)->pcidev_info)); + + /* Copy over PIO Mapped Addresses */ + for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { + unsigned long start, end, addr; + + if (!pcidev_info->pdi_pio_mapped_addr[idx]) { + pci_addrs[idx] = -1; + continue; + } + + start = dev->resource[idx].start; + end = dev->resource[idx].end; + size = end - start; + if (size == 0) { + pci_addrs[idx] = -1; + continue; + } + pci_addrs[idx] = start; + count++; + addr = pcidev_info->pdi_pio_mapped_addr[idx]; + addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET; + dev->resource[idx].start = addr; + dev->resource[idx].end = addr + size; + if (dev->resource[idx].flags & IORESOURCE_IO) + dev->resource[idx].parent = &ioport_resource; + else + dev->resource[idx].parent = &iomem_resource; + } + /* Create a pci_window in the pci_controller struct for + * each device resource. + */ + if (count > 0) + sn_pci_window_fixup(dev, count, pci_addrs); + + /* + * Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff; + devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff; + host_pci_bus = pci_find_bus(segment, bus_no); + host_pci_dev = pci_get_slot(host_pci_bus, devfn); + + pcidev_info->host_pci_dev = host_pci_dev; + pcidev_info->pdi_linux_pcidev = dev; + pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev); + bs = SN_PCIBUS_BUSSOFT(dev->bus); + pcidev_info->pdi_pcibus_info = bs; + + if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { + SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type]; + } else { + SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider; + } + + /* Only set up IRQ stuff if this device has a host bus context */ + if (bs && sn_irq_info->irq_irq) { + pcidev_info->pdi_sn_irq_info = sn_irq_info; + dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq; + sn_irq_fixup(dev, sn_irq_info); + } else { + pcidev_info->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); + } +} + +/* + * sn_pci_controller_fixup() - This routine sets up a bus's resources + * consistent with the Linux PCI abstraction layer. + */ +void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) +{ + int status; + int nasid, cnode; + struct pci_controller *controller; + struct sn_pci_controller *sn_controller; + struct pcibus_bussoft *prom_bussoft_ptr; + struct hubdev_info *hubdev_info; + void *provider_soft; + struct sn_pcibus_provider *provider; + + status = sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) + return; /*bus # does not exist */ + prom_bussoft_ptr = __va(prom_bussoft_ptr); + + /* Allocate a sn_pci_controller, which has a pci_controller struct + * as the first member. + */ + sn_controller = kzalloc(sizeof(struct sn_pci_controller), GFP_KERNEL); + if (!sn_controller) + BUG(); + INIT_LIST_HEAD(&sn_controller->pcidev_info); + controller = &sn_controller->pci_controller; + controller->segment = segment; + + if (bus == NULL) { + bus = pci_scan_bus(busnum, &pci_root_ops, controller); + if (bus == NULL) + goto error_return; /* error, or bus already scanned */ + bus->sysdata = NULL; + } + + if (bus->sysdata) + goto error_return; /* sysdata already alloc'd */ + + /* + * Per-provider fixup. Copies the contents from prom to local + * area and links SN_PCIBUS_BUSSOFT(). + */ + + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) + goto error_return; /* unsupported asic type */ + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + goto error_return; /* no further fixup necessary */ + + provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; + if (provider == NULL) + goto error_return; /* no provider registerd for this asic */ + + bus->sysdata = controller; + if (provider->bus_fixup) + provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller); + else + provider_soft = NULL; + + if (provider_soft == NULL) { + /* fixup failed or not applicable */ + bus->sysdata = NULL; + goto error_return; + } + + /* + * Setup pci_windows for legacy IO and MEM space. + * (Temporary until ACPI support is in place.) + */ + controller->window = kcalloc(2, sizeof(struct pci_window), GFP_KERNEL); + if (controller->window == NULL) + BUG(); + controller->window[0].offset = prom_bussoft_ptr->bs_legacy_io; + controller->window[0].resource.name = "legacy_io"; + controller->window[0].resource.flags = IORESOURCE_IO; + controller->window[0].resource.start = prom_bussoft_ptr->bs_legacy_io; + controller->window[0].resource.end = + controller->window[0].resource.start + 0xffff; + controller->window[0].resource.parent = &ioport_resource; + controller->window[1].offset = prom_bussoft_ptr->bs_legacy_mem; + controller->window[1].resource.name = "legacy_mem"; + controller->window[1].resource.flags = IORESOURCE_MEM; + controller->window[1].resource.start = prom_bussoft_ptr->bs_legacy_mem; + controller->window[1].resource.end = + controller->window[1].resource.start + (1024 * 1024) - 1; + controller->window[1].resource.parent = &iomem_resource; + controller->windows = 2; + + /* + * Generic bus fixup goes here. Don't reference prom_bussoft_ptr + * after this point. + */ + + PCI_CONTROLLER(bus)->platform_data = provider_soft; + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = + &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + /* + * If the node information we obtained during the fixup phase is invalid + * then set controller->node to -1 (undetermined) + */ + if (controller->node >= num_online_nodes()) { + struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); + + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u" + "L_IO=%lx L_MEM=%lx BASE=%lx\n", + b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, + b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); + printk(KERN_WARNING "on node %d but only %d nodes online." + "Association set to undetermined.\n", + controller->node, num_online_nodes()); + controller->node = -1; + } + return; + +error_return: + + kfree(sn_controller); + return; +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__); + return; + } + element->sysdata = SN_PCIDEV_INFO(dev); + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list, *safe; + + list_for_each_safe(list, safe, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + list_del(&(((struct pcidev_info *) + (element->sysdata))->pdi_list)); + kfree(element->sysdata); + kfree(element); + } + return; +} +#endif + +/* + * Ugly hack to get PCI setup until we have a proper ACPI namespace. + */ + +#define PCI_BUSES_TO_SCAN 256 + +static int __init sn_pci_init(void) +{ +#ifndef XEN + int i, j; + struct pci_dev *pci_dev = NULL; +#endif + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + +#ifndef XEN + /* + * prime sn_pci_provider[]. Individial provider init routines will + * override their respective default entries. + */ + + for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++) + sn_pci_provider[i] = &sn_pci_default_provider; + + pcibr_init_provider(); + tioca_init_provider(); + tioce_init_provider(); +#endif + + /* + * This is needed to avoid bounce limit checks in the blk layer + */ + ia64_max_iommu_merge_mask = ~PAGE_MASK; +#ifndef XEN + sn_fixup_ionodes(); +#endif + sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); +#ifndef XEN + sn_init_cpei_timer(); + +#ifdef CONFIG_PROC_FS + register_sn_procfs(); +#endif + + /* busses are not known yet ... */ + for (i = 0; i <= max_segment_number; i++) + for (j = 0; j <= max_pcibus_number; j++) + sn_pci_controller_fixup(i, j, NULL); + + /* + * Generic Linux PCI Layer has created the pci_bus and pci_dev + * structures - time for us to add our SN PLatform specific + * information. + */ + + while ((pci_dev = + pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) + sn_pci_fixup_slot(pci_dev); +#endif + + sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */ + + return 0; +} + +/* + * hubdev_init_node() - Creates the HUB data structure and link them to it's + * own NODE specific data area. + */ +void hubdev_init_node(nodepda_t * npda, cnodeid_t node) +{ + struct hubdev_info *hubdev_info; + int size; +#ifndef XEN + pg_data_t *pg; +#else + struct pglist_data *pg; +#endif + + size = sizeof(struct hubdev_info); + + if (node >= num_online_nodes()) /* Headless/memless IO nodes */ + pg = NODE_DATA(0); + else + pg = NODE_DATA(node); + + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; +} + +geoid_t +cnodeid_get_geoid(cnodeid_t cnode) +{ + struct hubdev_info *hubdev; + + hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + return hubdev->hdi_geoid; +} + +#ifndef XEN +void sn_generate_path(struct pci_bus *pci_bus, char *address) +{ + nasid_t nasid; + cnodeid_t cnode; + geoid_t geoid; + moduleid_t moduleid; + u16 bricktype; + + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnode); + moduleid = geo_module(geoid); + + sprintf(address, "module_%c%c%c%c%.2d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)), + MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid)); + + /* Tollhouse requires slot id to be displayed */ + bricktype = MODULE_GET_BTYPE(moduleid); + if ((bricktype == L1_BRICKTYPE_191010) || + (bricktype == L1_BRICKTYPE_1932)) + sprintf(address, "%s^%d", address, geo_slot(geoid)); +} +#endif + +#ifdef XEN +__initcall(sn_pci_init); +#else +subsys_initcall(sn_pci_init); +#endif +#ifndef XEN +EXPORT_SYMBOL(sn_pci_fixup_slot); +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_pci_controller_fixup); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); +EXPORT_SYMBOL(sn_generate_path); +#endif diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/iomv.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/kernel/iomv.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,82 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <asm/io.h> +#include <asm/delay.h> +#ifndef XEN +#include <asm/vga.h> +#endif +#include <asm/sn/nodepda.h> +#include <asm/sn/simulator.h> +#include <asm/sn/pda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/shub_mmr.h> + +#define IS_LEGACY_VGA_IOPORT(p) \ + (((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df)) + +#ifdef XEN +#define vga_console_iobase 0 +#endif + +/** + * sn_io_addr - convert an in/out port to an i/o address + * @port: port to convert + * + * Legacy in/out instructions are converted to ld/st instructions + * on IA64. This routine will convert a port number into a valid + * SN i/o address. Used by sn_in*() and sn_out*(). + */ +void *sn_io_addr(unsigned long port) +{ + if (!IS_RUNNING_ON_SIMULATOR()) { + if (IS_LEGACY_VGA_IOPORT(port)) + port += vga_console_iobase; + /* On sn2, legacy I/O ports don't point at anything */ + if (port < (64 * 1024)) + return NULL; + return ((void *)(port | __IA64_UNCACHED_OFFSET)); + } else { + /* but the simulator uses them... */ + unsigned long addr; + + /* + * word align port, but need more than 10 bits + * for accessing registers in bedrock local block + * (so we don't do port&0xfff) + */ + addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12); + if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7) + addr |= port; + return (void *)addr; + } +} + +EXPORT_SYMBOL(sn_io_addr); + +/** + * __sn_mmiowb - I/O space memory barrier + * + * See include/asm-ia64/io.h and Documentation/DocBook/deviceiobook.tmpl + * for details. + * + * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear. + * See PV 871084 for details about the WAR about zero value. + * + */ +void __sn_mmiowb(void) +{ + volatile unsigned long *adr = pda->pio_write_status_addr; + unsigned long val = pda->pio_write_status_val; + + while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val) + cpu_relax(); +} + +EXPORT_SYMBOL(__sn_mmiowb); diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/irq.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/kernel/irq.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,542 @@ +/* + * Platform dependent support for SGI SN + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/irq.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#ifdef XEN +#include <linux/pci.h> +#include <asm/hw_irq.h> +#endif +#include <asm/sn/addrs.h> +#include <asm/sn/arch.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#ifndef XEN +#include <asm/sn/pcidev.h> +#endif +#include <asm/sn/shub_mmr.h> +#include <asm/sn/sn_sal.h> + +#ifdef XEN +#define move_native_irq(foo) do {} while(0) +#endif + +static void force_interrupt(int irq); +#ifndef XEN +static void register_intr_pda(struct sn_irq_info *sn_irq_info); +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); +#endif + +int sn_force_interrupt_flag = 1; +extern int sn_ioif_inited; +struct list_head **sn_irq_lh; +static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */ + +u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_ALLOC, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), (u64) req_irq, + (u64) req_nasid, (u64) req_slice); + + return ret_stuff.status; +} + +void sn_intr_free(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_FREE, (u64) local_nasid, + (u64) local_widget, (u64) sn_irq_info->irq_irq, + (u64) sn_irq_info->irq_cookie, 0, 0); +} + +static unsigned int sn_startup_irq(unsigned int irq) +{ + return 0; +} + +static void sn_shutdown_irq(unsigned int irq) +{ +} + +static void sn_disable_irq(unsigned int irq) +{ +} + +static void sn_enable_irq(unsigned int irq) +{ +} + +static void sn_ack_irq(unsigned int irq) +{ + u64 event_occurred, mask; + + irq = irq & 0xff; + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); + mask = event_occurred & SH_ALL_INT_MASK; + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask); + __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); + + move_native_irq(irq); +} + +static void sn_end_irq(unsigned int irq) +{ + int ivec; + u64 event_occurred; + + ivec = irq & 0xff; + if (ivec == SGI_UART_VECTOR) { + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR (SH_EVENT_OCCURRED)); + /* If the UART bit is set here, we may have received an + * interrupt from the UART that the driver missed. To + * make sure, we IPI ourselves to force us to look again. + */ + if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) { + platform_send_ipi(smp_processor_id(), SGI_UART_VECTOR, + IA64_IPI_DM_INT, 0); + } + } + __clear_bit(ivec, (volatile void *)pda->sn_in_service_ivecs); + if (sn_force_interrupt_flag) + force_interrupt(irq); +} + +#ifndef XEN +static void sn_irq_info_free(struct rcu_head *head); + +struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, + nasid_t nasid, int slice) +{ + int vector; + int cpuphys; + int64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; + + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; + + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); + + bridge = (u64) new_irq_info->irq_bridge; + if (!bridge) { + kfree(new_irq_info); + return NULL; /* irq is not a device interrupt */ + } + + local_nasid = NASID_GET(bridge); + + if (local_nasid & 1) + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); + + vector = sn_irq_info->irq_irq; + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + /* Update kernels new_irq_info with new target info */ + unregister_intr_pda(new_irq_info); + + /* allocate a new PROM new_irq_info struct */ + status = sn_intr_alloc(local_nasid, local_widget, + new_irq_info, vector, + nasid, slice); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + return NULL; + } + + cpuphys = nasid_slice_to_cpuid(nasid, slice); + new_irq_info->irq_cpuid = cpuphys; + register_intr_pda(new_irq_info); + + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + + /* + * If this represents a line interrupt, target it. If it's + * an msi (irq_int_bit < 0), it's already targeted. + */ + if (new_irq_info->irq_int_bit >= 0 && + pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); + + spin_lock(&sn_irq_info_lock); +#ifdef XEN + list_replace(&sn_irq_info->list, &new_irq_info->list); +#else + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); +#endif + spin_unlock(&sn_irq_info_lock); +#ifndef XEN + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); +#endif + +#ifdef CONFIG_SMP + set_irq_affinity_info((vector & 0xff), cpuphys, 0); +#endif + + return new_irq_info; +} + +static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; + nasid_t nasid; + int slice; + + nasid = cpuid_to_nasid(first_cpu(mask)); + slice = cpuid_to_slice(first_cpu(mask)); + + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) + (void)sn_retarget_vector(sn_irq_info, nasid, slice); +} +#endif + +struct hw_interrupt_type irq_type_sn = { +#ifndef XEN + .name = "SN hub", +#endif + .startup = sn_startup_irq, + .shutdown = sn_shutdown_irq, + .enable = sn_enable_irq, + .disable = sn_disable_irq, + .ack = sn_ack_irq, + .end = sn_end_irq, +#ifndef XEN + .set_affinity = sn_set_affinity_irq +#endif +}; + +unsigned int sn_local_vector_to_irq(u8 vector) +{ + return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector)); +} + +void sn_irq_init(void) +{ +#ifndef XEN + int i; + irq_desc_t *base_desc = irq_desc; + + ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR; + ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR; + + for (i = 0; i < NR_IRQS; i++) { + if (base_desc[i].chip == &no_irq_type) { + base_desc[i].chip = &irq_type_sn; + } + } +#endif +} + +#ifndef XEN +static void register_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + + if (pdacpu(cpu)->sn_last_irq < irq) { + pdacpu(cpu)->sn_last_irq = irq; + } + + if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) + pdacpu(cpu)->sn_first_irq = irq; +} + +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + struct sn_irq_info *tmp_irq_info; + int i, foundmatch; + +#ifndef XEN + rcu_read_lock(); +#else + spin_lock(&sn_irq_info_lock); +#endif + if (pdacpu(cpu)->sn_last_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_last_irq - 1; + i && !foundmatch; i--) { +#ifdef XEN + list_for_each_entry(tmp_irq_info, + sn_irq_lh[i], + list) { +#else + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { +#endif + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_last_irq = i; + } + + if (pdacpu(cpu)->sn_first_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_first_irq + 1; + i < NR_IRQS && !foundmatch; i++) { +#ifdef XEN + list_for_each_entry(tmp_irq_info, + sn_irq_lh[i], + list) { +#else + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { +#endif + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); + } +#ifndef XEN + rcu_read_unlock(); +#else + spin_unlock(&sn_irq_info_lock); +#endif +} +#endif /* XEN */ + +#ifndef XEN +static void sn_irq_info_free(struct rcu_head *head) +{ + struct sn_irq_info *sn_irq_info; + + sn_irq_info = container_of(head, struct sn_irq_info, rcu); + kfree(sn_irq_info); +} +#endif + +#ifndef XEN +void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) +{ + nasid_t nasid = sn_irq_info->irq_nasid; + int slice = sn_irq_info->irq_slice; + int cpu = nasid_slice_to_cpuid(nasid, slice); + + pci_dev_get(pci_dev); + sn_irq_info->irq_cpuid = cpu; + sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); + + /* link it into the sn_irq[irq] list */ + spin_lock(&sn_irq_info_lock); +#ifdef XEN + list_add(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); +#else + list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); +#endif +#ifndef XEN + reserve_irq_vector(sn_irq_info->irq_irq); +#endif + spin_unlock(&sn_irq_info_lock); + + register_intr_pda(sn_irq_info); +} + +void sn_irq_unfixup(struct pci_dev *pci_dev) +{ + struct sn_irq_info *sn_irq_info; + + /* Only cleanup IRQ stuff if this device has a host bus context */ + if (!SN_PCIDEV_BUSSOFT(pci_dev)) + return; + + sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; + if (!sn_irq_info) + return; + if (!sn_irq_info->irq_irq) { + kfree(sn_irq_info); + return; + } + + unregister_intr_pda(sn_irq_info); + spin_lock(&sn_irq_info_lock); +#ifdef XEN + list_del(&sn_irq_info->list); +#else + list_del_rcu(&sn_irq_info->list); +#endif + spin_unlock(&sn_irq_info_lock); + if (list_empty(sn_irq_lh[sn_irq_info->irq_irq])) + free_irq_vector(sn_irq_info->irq_irq); +#ifndef XEN + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); +#endif + pci_dev_put(pci_dev); + +} +#endif + +static inline void +sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info) +{ + struct sn_pcibus_provider *pci_provider; + + pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type]; + if (pci_provider && pci_provider->force_interrupt) + (*pci_provider->force_interrupt)(sn_irq_info); +} + +static void force_interrupt(int irq) +{ + struct sn_irq_info *sn_irq_info; + +#ifndef XEN + if (!sn_ioif_inited) + return; +#endif + +#ifdef XEN + spin_lock(&sn_irq_info_lock); +#else + rcu_read_lock(); +#endif +#ifdef XEN + list_for_each_entry(sn_irq_info, sn_irq_lh[irq], list) +#else + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) +#endif + sn_call_force_intr_provider(sn_irq_info); + +#ifdef XEN + spin_unlock(&sn_irq_info_lock); +#else + rcu_read_unlock(); +#endif +} + +#ifndef XEN +/* + * Check for lost interrupts. If the PIC int_status reg. says that + * an interrupt has been sent, but not handled, and the interrupt + * is not pending in either the cpu irr regs or in the soft irr regs, + * and the interrupt is not in service, then the interrupt may have + * been lost. Force an interrupt on that pin. It is possible that + * the interrupt is in flight, so we may generate a spurious interrupt, + * but we should never miss a real lost interrupt. + */ +static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) +{ + u64 regval; + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + + /* + * Bridge types attached to TIO (anything but PIC) do not need this WAR + * since they do not target Shub II interrupt registers. If that + * ever changes, this check needs to accomodate. + */ + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + regval = pcireg_intr_status_get(pcibus_info); + + if (!ia64_get_irr(irq_to_vector(irq))) { + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= ~(sn_irq_info->irq_int_bit & regval); + sn_call_force_intr_provider(sn_irq_info); + } + } + } + sn_irq_info->irq_last_intr = regval; +} +#endif + +void sn_lb_int_war_check(void) +{ +#ifndef XEN + struct sn_irq_info *sn_irq_info; + int i; + +#ifdef XEN + if (pda->sn_first_irq == 0) +#else + if (!sn_ioif_inited || pda->sn_first_irq == 0) +#endif + return; + +#ifdef XEN + spin_lock(&sn_irq_info_lock); +#else + rcu_read_lock(); +#endif + for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { +#ifdef XEN + list_for_each_entry(sn_irq_info, sn_irq_lh[i], list) { +#else + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { +#endif + sn_check_intr(i, sn_irq_info); + } + } +#ifdef XEN + spin_unlock(&sn_irq_info_lock); +#else + rcu_read_unlock(); +#endif +#endif +} + +void __init sn_irq_lh_init(void) +{ + int i; + + sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL); + if (!sn_irq_lh) + panic("SN PCI INIT: Failed to allocate memory for PCI init\n"); + + for (i = 0; i < NR_IRQS; i++) { + sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!sn_irq_lh[i]) + panic("SN PCI INIT: Failed IRQ memory allocation\n"); + + INIT_LIST_HEAD(sn_irq_lh[i]); + } +} diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/setup.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/kernel/setup.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,808 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#ifndef XEN +#include <linux/kdev_t.h> +#endif +#include <linux/string.h> +#ifndef XEN +#include <linux/screen_info.h> +#endif +#include <linux/console.h> +#include <linux/timex.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/serial.h> +#include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/mmzone.h> +#include <linux/interrupt.h> +#include <linux/acpi.h> +#include <linux/compiler.h> +#include <linux/sched.h> +#ifndef XEN +#include <linux/root_dev.h> +#endif +#include <linux/nodemask.h> +#include <linux/pm.h> +#include <linux/efi.h> + +#include <asm/io.h> +#include <asm/sal.h> +#include <asm/machvec.h> +#include <asm/system.h> +#include <asm/processor.h> +#ifndef XEN +#include <asm/vga.h> +#endif +#include <asm/sn/arch.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pda.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/simulator.h> +#include <asm/sn/leds.h> +#ifndef XEN +#include <asm/sn/bte.h> +#endif +#include <asm/sn/shub_mmr.h> +#ifndef XEN +#include <asm/sn/clksupport.h> +#endif +#include <asm/sn/sn_sal.h> +#include <asm/sn/geo.h> +#include <asm/sn/sn_feature_sets.h> +#ifndef XEN +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#else +#include "asm/sn/xwidgetdev.h" +#include "asm/sn/hubdev.h" +#endif +#include <asm/sn/klconfig.h> +#ifdef XEN +#include <asm/sn/shubio.h> + +/* Xen has no clue about NUMA .... grrrr */ +#define pxm_to_node(foo) 0 +#define node_to_pxm(foo) 0 +#define numa_node_id() 0 +#endif + + +DEFINE_PER_CPU(struct pda_s, pda_percpu); + +#define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */ + +extern void bte_init_node(nodepda_t *, cnodeid_t); + +extern void sn_timer_init(void); +extern unsigned long last_time_offset; +extern void (*ia64_mark_idle) (int); +extern void snidle(int); +extern unsigned long long (*ia64_printk_clock)(void); + +unsigned long sn_rtc_cycles_per_second; +EXPORT_SYMBOL(sn_rtc_cycles_per_second); + +DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); +EXPORT_PER_CPU_SYMBOL(__sn_hub_info); + +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); + +DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); +EXPORT_PER_CPU_SYMBOL(__sn_nodepda); + +char sn_system_serial_number_string[128]; +EXPORT_SYMBOL(sn_system_serial_number_string); +u64 sn_partition_serial_number; +EXPORT_SYMBOL(sn_partition_serial_number); +u8 sn_partition_id; +EXPORT_SYMBOL(sn_partition_id); +u8 sn_system_size; +EXPORT_SYMBOL(sn_system_size); +u8 sn_sharing_domain_size; +EXPORT_SYMBOL(sn_sharing_domain_size); +u8 sn_coherency_id; +EXPORT_SYMBOL(sn_coherency_id); +u8 sn_region_size; +EXPORT_SYMBOL(sn_region_size); +int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ + +short physical_node_map[MAX_NUMALINK_NODES]; +static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS]; + +EXPORT_SYMBOL(physical_node_map); + +int num_cnodes; + +static void sn_init_pdas(char **); +static void build_cnode_tables(void); + +static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; + +#ifndef XEN +/* + * The format of "screen_info" is strange, and due to early i386-setup + * code. This is just enough to make the console code think we're on a + * VGA color display. + */ +struct screen_info sn_screen_info = { + .orig_x = 0, + .orig_y = 0, + .orig_video_mode = 3, + .orig_video_cols = 80, + .orig_video_ega_bx = 3, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; +#endif + +/* + * This routine can only be used during init, since + * smp_boot_data is an init data structure. + * We have to use smp_boot_data.cpu_phys_id to find + * the physical id of the processor because the normal + * cpu_physical_id() relies on data structures that + * may not be initialized yet. + */ + +static int __init pxm_to_nasid(int pxm) +{ + int i; + int nid; + + nid = pxm_to_node(pxm); + for (i = 0; i < num_node_memblks; i++) { + if (node_memblk[i].nid == nid) { + return NASID_GET(node_memblk[i].start_paddr); + } + } + return -1; +} + +/** + * early_sn_setup - early setup routine for SN platforms + * + * Sets up an initial console to aid debugging. Intended primarily + * for bringup. See start_kernel() in init/main.c. + */ + +void __init early_sn_setup(void) +{ + efi_system_table_t *efi_systab; + efi_config_table_t *config_tables; + struct ia64_sal_systab *sal_systab; + struct ia64_sal_desc_entry_point *ep; + char *p; + int i, j; + + /* + * Parse enough of the SAL tables to locate the SAL entry point. Since, console + * IO on SN2 is done via SAL calls, early_printk won't work without this. + * + * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. + * Any changes to those file may have to be made hereas well. + */ + efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); + config_tables = __va(efi_systab->tables); + for (i = 0; i < efi_systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == + 0) { + sal_systab = __va(config_tables[i].table); + p = (char *)(sal_systab + 1); + for (j = 0; j < sal_systab->entry_count; j++) { + if (*p == SAL_DESC_ENTRY_POINT) { + ep = (struct ia64_sal_desc_entry_point + *)p; + ia64_sal_handler_init(__va + (ep->sal_proc), + __va(ep->gp)); + return; + } + p += SAL_DESC_SIZE(*p); + } + } + } + /* Uh-oh, SAL not available?? */ + printk(KERN_ERR "failed to find SAL entry point\n"); +} + +extern int platform_intr_list[]; +static int __initdata shub_1_1_found; + +/* + * sn_check_for_wars + * + * Set flag for enabling shub specific wars + */ + +static inline int __init is_shub_1_1(int nasid) +{ + unsigned long id; + int rev; + + if (is_shub2()) + return 0; + id = REMOTE_HUB_L(nasid, SH1_SHUB_ID); + rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT; + return rev <= 2; +} + +static void __init sn_check_for_wars(void) +{ + int cnode; + + if (is_shub2()) { + /* none yet */ + } else { + for_each_online_node(cnode) { + if (is_shub_1_1(cnodeid_to_nasid(cnode))) + shub_1_1_found = 1; + } + } +} + +#ifndef XEN +/* + * Scan the EFI PCDP table (if it exists) for an acceptable VGA console + * output device. If one exists, pick it and set sn_legacy_{io,mem} to + * reflect the bus offsets needed to address it. + * + * Since pcdp support in SN is not supported in the 2.4 kernel (or at least + * the one lbs is based on) just declare the needed structs here. + * + * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf + * + * Returns 0 if no acceptable vga is found, !0 otherwise. + * + * Note: This stuff is duped here because Altix requires the PCDP to + * locate a usable VGA device due to lack of proper ACPI support. Structures + * could be used from drivers/firmware/pcdp.h, but it was decided that moving + * this file to a more public location just for Altix use was undesireable. + */ + +struct hcdp_uart_desc { + u8 pad[45]; +}; + +struct pcdp { + u8 signature[4]; /* should be 'HCDP' */ + u32 length; + u8 rev; /* should be >=3 for pcdp, <3 for hcdp */ + u8 sum; + u8 oem_id[6]; + u64 oem_tableid; + u32 oem_rev; + u32 creator_id; + u32 creator_rev; + u32 num_type0; + struct hcdp_uart_desc uart[0]; /* num_type0 of these */ + /* pcdp descriptors follow */ +} __attribute__((packed)); + +struct pcdp_device_desc { + u8 type; + u8 primary; + u16 length; + u16 index; + /* interconnect specific structure follows */ + /* device specific structure follows that */ +} __attribute__((packed)); + +struct pcdp_interface_pci { + u8 type; /* 1 == pci */ + u8 reserved; + u16 length; + u8 segment; + u8 bus; + u8 dev; + u8 fun; + u16 devid; + u16 vendid; + u32 acpi_interrupt; + u64 mmio_tra; + u64 ioport_tra; + u8 flags; + u8 translation; +} __attribute__((packed)); + +struct pcdp_vga_device { + u8 num_eas_desc; + /* ACPI Extended Address Space Desc follows */ +} __attribute__((packed)); + +/* from pcdp_device_desc.primary */ +#define PCDP_PRIMARY_CONSOLE 0x01 + +/* from pcdp_device_desc.type */ +#define PCDP_CONSOLE_INOUT 0x0 +#define PCDP_CONSOLE_DEBUG 0x1 +#define PCDP_CONSOLE_OUT 0x2 +#define PCDP_CONSOLE_IN 0x3 +#define PCDP_CONSOLE_TYPE_VGA 0x8 + +#define PCDP_CONSOLE_VGA (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT) + +/* from pcdp_interface_pci.type */ +#define PCDP_IF_PCI 1 + +/* from pcdp_interface_pci.translation */ +#define PCDP_PCI_TRANS_IOPORT 0x02 +#define PCDP_PCI_TRANS_MMIO 0x01 + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) +static void +sn_scan_pcdp(void) +{ + u8 *bp; + struct pcdp *pcdp; + struct pcdp_device_desc device; + struct pcdp_interface_pci if_pci; + extern struct efi efi; + + if (efi.hcdp == EFI_INVALID_TABLE_ADDR) + return; /* no hcdp/pcdp table */ + + pcdp = __va(efi.hcdp); + + if (pcdp->rev < 3) + return; /* only support PCDP (rev >= 3) */ + + for (bp = (u8 *)&pcdp->uart[pcdp->num_type0]; + bp < (u8 *)pcdp + pcdp->length; + bp += device.length) { + memcpy(&device, bp, sizeof(device)); + if (! (device.primary & PCDP_PRIMARY_CONSOLE)) + continue; /* not primary console */ + + if (device.type != PCDP_CONSOLE_VGA) + continue; /* not VGA descriptor */ + + memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci)); + if (if_pci.type != PCDP_IF_PCI) + continue; /* not PCI interconnect */ + + if (if_pci.translation & PCDP_PCI_TRANS_IOPORT) + vga_console_iobase = + if_pci.ioport_tra | __IA64_UNCACHED_OFFSET; + + if (if_pci.translation & PCDP_PCI_TRANS_MMIO) + vga_console_membase = + if_pci.mmio_tra | __IA64_UNCACHED_OFFSET; + + break; /* once we find the primary, we're done */ + } +} +#endif + +static unsigned long sn2_rtc_initial; + +static unsigned long long ia64_sn2_printk_clock(void) +{ + unsigned long rtc_now = rtc_time(); + + return (rtc_now - sn2_rtc_initial) * + (1000000000 / sn_rtc_cycles_per_second); +} +#endif + +/** + * sn_setup - SN platform setup routine + * @cmdline_p: kernel command line + * + * Handles platform setup for SN machines. This includes determining + * the RTC frequency (via a SAL call), initializing secondary CPUs, and + * setting up per-node data areas. The console is also initialized here. + */ +#ifdef XEN +void __cpuinit sn_cpu_init(void); +#endif + +void __init sn_setup(char **cmdline_p) +{ +#ifndef XEN + long status, ticks_per_sec, drift; +#else + unsigned long status, ticks_per_sec, drift; +#endif + u32 version = sn_sal_rev(); +#ifndef XEN + extern void sn_cpu_init(void); + + sn2_rtc_initial = rtc_time(); + ia64_sn_plat_set_error_handling_features(); // obsolete + ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV); + ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES); + + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) + /* + * Handle SN vga console. + * + * SN systems do not have enough ACPI table information + * being passed from prom to identify VGA adapters and the legacy + * addresses to access them. Until that is done, SN systems rely + * on the PCDP table to identify the primary VGA console if one + * exists. + * + * However, kernel PCDP support is optional, and even if it is built + * into the kernel, it will not be used if the boot cmdline contains + * console= directives. + * + * So, to work around this mess, we duplicate some of the PCDP code + * here so that the primary VGA console (as defined by PCDP) will + * work on SN systems even if a different console (e.g. serial) is + * selected on the boot line (or CONFIG_EFI_PCDP is off). + */ + + if (! vga_console_membase) + sn_scan_pcdp(); + + if (vga_console_membase) { + /* usable vga ... make tty0 the preferred default console */ + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("tty", 0, NULL); + } else { + printk(KERN_DEBUG "SGI: Disabling VGA console\n"); + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("ttySG", 0, NULL); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#else + conswitchp = NULL; +#endif /* CONFIG_DUMMY_CONSOLE */ + } +#endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */ + + MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; +#endif + + /* + * Build the tables for managing cnodes. + */ + build_cnode_tables(); + + status = + ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, + &drift); + if (status != 0 || ticks_per_sec < 100000) { + printk(KERN_WARNING + "unable to determine platform RTC clock frequency, guessing.\n"); + /* PROM gives wrong value for clock freq. so guess */ + sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; + } else + sn_rtc_cycles_per_second = ticks_per_sec; +#ifndef XEN + + platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR; + + ia64_printk_clock = ia64_sn2_printk_clock; +#endif + + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); + + /* + * we set the default root device to /dev/hda + * to make simulation easy + */ +#ifndef XEN + ROOT_DEV = Root_HDA1; +#endif + + /* + * Create the PDAs and NODEPDAs for all the cpus. + */ + sn_init_pdas(cmdline_p); + +#ifndef XEN + ia64_mark_idle = &snidle; +#endif + + /* + * For the bootcpu, we do this here. All other cpus will make the + * call as part of cpu_init in slave cpu initialization. + */ + sn_cpu_init(); + +#ifndef XEN +#ifdef CONFIG_SMP + init_smp_config(); +#endif + screen_info = sn_screen_info; + + sn_timer_init(); + + /* + * set pm_power_off to a SAL call to allow + * sn machines to power off. The SAL call can be replaced + * by an ACPI interface call when ACPI is fully implemented + * for sn. + */ + pm_power_off = ia64_sn_power_down; + current->thread.flags |= IA64_THREAD_MIGRATION; +#endif +} + +/** + * sn_init_pdas - setup node data areas + * + * One time setup for Node Data Area. Called by sn_setup(). + */ +static void __init sn_init_pdas(char **cmdline_p) +{ + cnodeid_t cnode; + + /* + * Allocate & initalize the nodepda for each node. + */ + for_each_online_node(cnode) { + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t)); + memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); + memset(nodepdaindr[cnode]->phys_cpuid, -1, + sizeof(nodepdaindr[cnode]->phys_cpuid)); + spin_lock_init(&nodepdaindr[cnode]->ptc_lock); + } + + /* + * Allocate & initialize nodepda for TIOs. For now, put them on node 0. + */ + for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) { + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); + memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); + } + + /* + * Now copy the array of nodepda pointers to each nodepda. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) + memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, + sizeof(nodepdaindr)); + +#ifndef XEN + /* + * Set up IO related platform-dependent nodepda fields. + * The following routine actually sets up the hubinfo struct + * in nodepda. + */ + for_each_online_node(cnode) { + bte_init_node(nodepdaindr[cnode], cnode); + } + + /* + * Initialize the per node hubdev. This includes IO Nodes and + * headless/memless nodes. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { + hubdev_init_node(nodepdaindr[cnode], cnode); + } +#endif +} + +/** + * sn_cpu_init - initialize per-cpu data areas + * @cpuid: cpuid of the caller + * + * Called during cpu initialization on each cpu as it starts. + * Currently, initializes the per-cpu data area for SNIA. + * Also sets up a few fields in the nodepda. Also known as + * platform_cpu_init() by the ia64 machvec code. + */ +void __cpuinit sn_cpu_init(void) +{ + int cpuid; + int cpuphyid; + int nasid; + int subnode; + int slice; + int cnode; + int i; + static int wars_have_been_checked; + + cpuid = smp_processor_id(); +#ifndef XEN + if (cpuid == 0 && IS_MEDUSA()) { + if (ia64_sn_is_fake_prom()) + sn_prom_type = 2; + else + sn_prom_type = 1; + printk(KERN_INFO "Running on medusa with %s PROM\n", + (sn_prom_type == 1) ? "real" : "fake"); + } +#endif + + memset(pda, 0, sizeof(pda)); + if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, + &sn_hub_info->nasid_bitmask, + &sn_hub_info->nasid_shift, + &sn_system_size, &sn_sharing_domain_size, + &sn_partition_id, &sn_coherency_id, + &sn_region_size)) + BUG(); + sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2; + + /* + * Don't check status. The SAL call is not supported on all PROMs + * but a failure is harmless. + */ + (void) ia64_sn_set_cpu_number(cpuid); + + /* + * The boot cpu makes this call again after platform initialization is + * complete. + */ + if (nodepdaindr[0] == NULL) + return; + + for (i = 0; i < MAX_PROM_FEATURE_SETS; i++) + if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0) + break; + + cpuphyid = get_sapicid(); + + if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice)) + BUG(); + + for (i=0; i < MAX_NUMNODES; i++) { + if (nodepdaindr[i]) { + nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid; + nodepdaindr[i]->phys_cpuid[cpuid].slice = slice; + nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode; + } + } + + cnode = nasid_to_cnodeid(nasid); + + sn_nodepda = nodepdaindr[cnode]; + + pda->led_address = + (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); + pda->led_state = LED_ALWAYS_SET; + pda->hb_count = HZ / 2; + pda->hb_state = 0; + pda->idle_flag = 0; + + if (cpuid != 0) { + /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */ + memcpy(sn_cnodeid_to_nasid, + (&per_cpu(__sn_cnodeid_to_nasid, 0)), + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + } + + /* + * Check for WARs. + * Only needs to be done once, on BSP. + * Has to be done after loop above, because it uses this cpu's + * sn_cnodeid_to_nasid table which was just initialized if this + * isn't cpu 0. + * Has to be done before assignment below. + */ + if (!wars_have_been_checked) { + sn_check_for_wars(); + wars_have_been_checked = 1; + } + sn_hub_info->shub_1_1_found = shub_1_1_found; + + /* + * Set up addresses of PIO/MEM write status registers. + */ + { + u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2, + SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; + u64 *pio; + pio = is_shub1() ? pio1 : pio2; + pda->pio_write_status_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); + pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; + } + +#ifndef XEN /* local_node_data is not allocated .... yet */ + /* + * WAR addresses for SHUB 1.x. + */ + if (local_node_data->active_cpu_count++ == 0 && is_shub1()) { + int buddy_nasid; + buddy_nasid = + cnodeid_to_nasid(numa_node_id() == + num_online_nodes() - 1 ? 0 : numa_node_id() + 1); + pda->pio_shub_war_cam_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, + SH1_PI_CAM_CONTROL); + } +#endif +} + +/* + * Build tables for converting between NASIDs and cnodes. + */ +static inline int __init board_needs_cnode(int type) +{ + return (type == KLTYPE_SNIA || type == KLTYPE_TIO); +} + +void __init build_cnode_tables(void) +{ + int nasid; + int node; + lboard_t *brd; + + memset(physical_node_map, -1, sizeof(physical_node_map)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + + /* + * First populate the tables with C/M bricks. This ensures that + * cnode == node for all C & M bricks. + */ + for_each_online_node(node) { + nasid = pxm_to_nasid(node_to_pxm(node)); + sn_cnodeid_to_nasid[node] = nasid; + physical_node_map[nasid] = node; + } + + /* + * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node + * limit on the number of nodes, we can't use the generic node numbers + * for this. Note that num_cnodes is incremented below as TIOs or + * headless/memoryless nodes are discovered. + */ + num_cnodes = num_online_nodes(); + + /* fakeprom does not support klgraph */ + if (IS_RUNNING_ON_FAKE_PROM()) + return; + + /* Find TIOs & headless/memoryless nodes and add them to the tables */ + for_each_online_node(node) { + kl_config_hdr_t *klgraph_header; + nasid = cnodeid_to_nasid(node); + klgraph_header = ia64_sn_get_klconfig_addr(nasid); + if (klgraph_header == NULL) + BUG(); + brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); + while (brd) { + if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) { + sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid; + physical_node_map[brd->brd_nasid] = num_cnodes++; + } + brd = find_lboard_next(brd); + } + } +} + +int +nasid_slice_to_cpuid(int nasid, int slice) +{ + long cpu; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpuid_to_nasid(cpu) == nasid && + cpuid_to_slice(cpu) == slice) + return cpu; + + return -1; +} + +int sn_prom_feature_available(int id) +{ + if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS) + return 0; + return test_bit(id, sn_prom_features); +} +EXPORT_SYMBOL(sn_prom_feature_available); + diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,548 @@ +/* + * SN2 Platform specific SMP Support + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/nodemask.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/delay.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/tlb.h> +#include <asm/numa.h> +#include <asm/hw_irq.h> +#include <asm/current.h> +#ifdef XEN +#include <asm/sn/arch.h> +#endif +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/rw_mmr.h> + +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); + +static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); + +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); + +/* + * Note: some is the following is captured here to make degugging easier + * (the macros make more sense if you see the debug patch - not posted) + */ +#define sn2_ptctest 0 +#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0) +#define max_active_pio(sh1) ((sh1) ? 32 : 7) +#define reset_max_active_on_deadlock() 1 +#ifndef XEN +#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) +#else +#define PTC_LOCK(sh1) &sn2_global_ptc_lock +#endif + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long deadlocks2; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; + unsigned long shub_ptc_flushes_not_my_mm; +}; + +#define sn2_ptctest 0 + +static inline unsigned long wait_piowc(void) +{ + volatile unsigned long *piows; + unsigned long zeroval, ws; + + piows = pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + do { + cpu_relax(); + } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); + return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; +} + +#ifndef XEN /* No idea if Xen will ever support this */ +/** + * sn_migrate - SN-specific task migration actions + * @task: Task being migrated to new CPU + * + * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. + * Context switching user threads which have memory-mapped MMIO may cause + * PIOs to issue from seperate CPUs, thus the PIO writes must be drained + * from the previous CPU's Shub before execution resumes on the new CPU. + */ +void sn_migrate(struct task_struct *task) +{ + pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); + volatile unsigned long *adr = last_pda->pio_write_status_addr; + unsigned long val = last_pda->pio_write_status_val; + + /* Drain PIO writes from old CPU's Shub */ + while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) + != val)) + cpu_relax(); +} + +void sn_tlb_migrate_finish(struct mm_struct *mm) +{ + /* flush_tlb_mm is inefficient if more than 1 users of mm */ +#ifndef XEN + if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1) +#else + if (mm == ¤t->arch.mm && mm && atomic_read(&mm->mm_users) == 1) +#endif + flush_tlb_mm(mm); +} +#endif + +/** + * sn2_global_tlb_purge - globally purge translation cache of virtual address range + * @mm: mm_struct containing virtual address range + * @start: start of virtual address range + * @end: end of virtual address range + * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) + * + * Purges the translation caches of all processors of the given virtual address + * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu & the + * process is purging its own virtual address range, then only the + * local TLB needs to be flushed. This flushing can be done using + * ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. + */ + +#ifdef XEN /* Xen is soooooooo stupid! */ +// static cpumask_t mask_all = CPU_MASK_ALL; +#endif + +void +#ifndef XEN +sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, +#else +sn2_global_tlb_purge(unsigned long start, +#endif + unsigned long end, unsigned long nbits) +{ + int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid; +#ifndef XEN + int mymm = (mm == current->active_mm && mm == current->mm); +#else + // struct mm_struct *mm; + int mymm = 0; +#endif + int use_cpu_ptcga; + volatile unsigned long *ptc0, *ptc1; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; + short nasids[MAX_NUMNODES], nix; + nodemask_t nodes_flushed; + int active, max_active, deadlock; + + nodes_clear(nodes_flushed); + i = 0; + +#ifndef XEN /* One day Xen will grow up! */ + for_each_cpu_mask(cpu, mm->cpu_vm_mask) { + cnode = cpu_to_node(cpu); + node_set(cnode, nodes_flushed); + lcpu = cpu; + i++; + } +#else + for_each_cpu(cpu) { + cnode = cpu_to_node(cpu); + node_set(cnode, nodes_flushed); + lcpu = cpu; + i++; + } +#endif + + if (i == 0) + return; + + preempt_disable(); + + if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { + do { + ia64_ptcl(start, nbits << 2); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + __get_cpu_var(ptcstats).ptc_l++; + preempt_enable(); + return; + } + +#ifndef XEN + if (atomic_read(&mm->mm_users) == 1 && mymm) { +#ifndef XEN /* I hate Xen! */ + flush_tlb_mm(mm); +#else + flush_tlb_mask(mask_all); +#endif + __get_cpu_var(ptcstats).change_rid++; + preempt_enable(); + return; + } +#endif + + itc = ia64_get_itc(); + nix = 0; + for_each_node_mask(cnode, nodes_flushed) + nasids[nix++] = cnodeid_to_nasid(cnode); + +#ifndef XEN + rr_value = (mm->context << 3) | REGION_NUMBER(start); +#else + rr_value = REGION_NUMBER(start); +#endif + + shub1 = is_shub1(); + if (shub1) { + data0 = (1UL << SH1_PTC_0_A_SHFT) | + (nbits << SH1_PTC_0_PS_SHFT) | + (rr_value << SH1_PTC_0_RID_SHFT) | + (1UL << SH1_PTC_0_START_SHFT); +#ifndef XEN + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); + ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); +#else + ptc0 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); + ptc1 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); +#endif + } else { + data0 = (1UL << SH2_PTC_A_SHFT) | + (nbits << SH2_PTC_PS_SHFT) | + (1UL << SH2_PTC_START_SHFT); +#ifndef XEN + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + +#else + ptc0 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + +#endif + (rr_value << SH2_PTC_RID_SHFT)); + ptc1 = NULL; + } + + + mynasid = get_nasid(); + use_cpu_ptcga = local_node_uses_ptc_ga(shub1); + max_active = max_active_pio(shub1); + + itc = ia64_get_itc(); + spin_lock_irqsave(PTC_LOCK(shub1), flags); + itc2 = ia64_get_itc(); + + __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; + __get_cpu_var(ptcstats).shub_ptc_flushes++; + __get_cpu_var(ptcstats).nodes_flushed += nix; + if (!mymm) + __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++; + + if (use_cpu_ptcga && !mymm) { + old_rr = ia64_get_rr(start); + ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8)); + ia64_srlz_d(); + } + + wait_piowc(); + do { + if (shub1) + data1 = start | (1UL << SH1_PTC_1_START_SHFT); + else + data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); + deadlock = 0; + active = 0; + for (ibegin = 0, i = 0; i < nix; i++) { + nasid = nasids[i]; + if (use_cpu_ptcga && unlikely(nasid == mynasid)) { + ia64_ptcga(start, nbits << 2); + ia64_srlz_i(); + } else { + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1); + active++; + } + if (active >= max_active || i == (nix - 1)) { + if ((deadlock = wait_piowc())) { + sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); + if (reset_max_active_on_deadlock()) + max_active = 1; + } + active = 0; + ibegin = i + 1; + } + } + start += (1UL << nbits); + } while (start < end); + + itc2 = ia64_get_itc() - itc2; + __get_cpu_var(ptcstats).shub_itc_clocks += itc2; + if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) + __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + + if (old_rr) { + ia64_set_rr(start, old_rr); + ia64_srlz_d(); + } + + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); + + preempt_enable(); +} + +/* + * sn2_ptc_deadlock_recovery + * + * Recover from PTC deadlocks conditions. Recovery requires stepping thru each + * TLB flush transaction. The recovery sequence is somewhat tricky & is + * coded in assembly language. + */ + +void +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) +{ + short nasid, i; + unsigned long *piows, zeroval, n; + + __get_cpu_var(ptcstats).deadlocks++; + + piows = (unsigned long *) pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + + + for (i=ib; i <= ie; i++) { + nasid = nasids[i]; + if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) + continue; + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + + n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + __get_cpu_var(ptcstats).deadlocks2 += n; + } + +} + +/** + * sn_send_IPI_phys - send an IPI to a Nasid and slice + * @nasid: nasid to receive the interrupt (may be outside partition) + * @physid: physical cpuid to receive the interrupt. + * @vector: command to send + * @delivery_mode: delivery mechanism + * + * Sends an IPI (interprocessor interrupt) to the processor specified by + * @physid + * + * @delivery_mode can be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) +{ + long val; + unsigned long flags = 0; + volatile long *p; + + p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); + val = (1UL << SH_IPI_INT_SEND_SHFT) | + (physid << SH_IPI_INT_PID_SHFT) | + ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) | + ((long)vector << SH_IPI_INT_IDX_SHFT) | + (0x000feeUL << SH_IPI_INT_BASE_SHFT); + + mb(); + if (enable_shub_wars_1_1()) { + spin_lock_irqsave(&sn2_global_ptc_lock, flags); + } + pio_phys_write_mmr(p, val); + if (enable_shub_wars_1_1()) { + wait_piowc(); + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + } + +} + +EXPORT_SYMBOL(sn_send_IPI_phys); + +/** + * sn2_send_IPI - send an IPI to a processor + * @cpuid: target of the IPI + * @vector: command to send + * @delivery_mode: delivery mechanism + * @redirect: redirect the IPI? + * + * Sends an IPI (InterProcessor Interrupt) to the processor specified by + * @cpuid. @vector specifies the command to send, while @delivery_mode can + * be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) +{ + long physid; + int nasid; + + physid = cpu_physical_id(cpuid); +#ifdef XEN + if (!sn_nodepda) { + ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); + } else +#endif + nasid = cpuid_to_nasid(cpuid); + + /* the following is used only when starting cpus at boot time */ + if (unlikely(nasid == -1)) + ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); + + sn_send_IPI_phys(nasid, physid, vector, delivery_mode); +} + +#ifdef CONFIG_PROC_FS + +#define PTC_BASENAME "sgi_sn/ptc_statistics" + +static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) +{ + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) +{ + (*offset)++; + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void sn2_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static int sn2_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *) data; + + if (!cpu) { + seq_printf(file, + "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n"); + seq_printf(file, "# ptctest %d\n", sn2_ptctest); + } + + if (cpu < NR_CPUS && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, + stat->shub_ptc_flushes_not_my_mm, + stat->deadlocks2); + } + return 0; +} + +static struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +static int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!ia64_platform_is("sn2")) + return 0; + + if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations; + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ + diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/Makefile --- a/xen/arch/ia64/linux/Makefile Thu Jan 18 15:18:07 2007 +0000 +++ b/xen/arch/ia64/linux/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -1,6 +1,9 @@ obj-y += bitop.o +subdir-y += dig +subdir-y += hp +subdir-y += sn + obj-y += bitop.o obj-y += clear_page.o -obj-y += cmdline.o obj-y += copy_page_mck.o obj-y += efi_stub.o obj-y += extable.o @@ -23,6 +26,7 @@ obj-y += __moddi3.o obj-y += __moddi3.o obj-y += __umoddi3.o obj-y += carta_random.o +obj-y += io.o ## variants of divide/modulo ## see files in xen/arch/ia64/linux/lib (linux/arch/ia64/lib) diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/README.origin --- a/xen/arch/ia64/linux/README.origin Thu Jan 18 15:18:07 2007 +0000 +++ b/xen/arch/ia64/linux/README.origin Fri Jan 19 14:48:57 2007 +0000 @@ -4,7 +4,6 @@ needs to be changed, move it to ../linux needs to be changed, move it to ../linux-xen and follow the instructions in the README there. -cmdline.c -> linux/lib/cmdline.c efi_stub.S -> linux/arch/ia64/kernel/efi_stub.S extable.c -> linux/arch/ia64/mm/extable.c hpsim.S -> linux/arch/ia64/hp/sim/hpsim.S @@ -27,3 +26,6 @@ strlen.S -> linux/arch/ia64/lib/strlen. # The files below are from Linux-2.6.16.33 carta_random.S -> linux/arch/ia64/lib/carta_random.S + +# The files below are from Linux-2.6.19 +io.c -> linux/arch/ia64/lib/io.c diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/cmdline.c --- a/xen/arch/ia64/linux/cmdline.c Thu Jan 18 15:18:07 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ -/* - * linux/lib/cmdline.c - * Helper functions generally used for parsing kernel command line - * and module options. - * - * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c. - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - * - * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs - * - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <xen/lib.h> - - -/** - * get_option - Parse integer from an option string - * @str: option string - * @pint: (output) integer value parsed from @str - * - * Read an int from an option string; if available accept a subsequent - * comma as well. - * - * Return values: - * 0 : no int in string - * 1 : int found, no subsequent comma - * 2 : int found including a subsequent comma - */ - -int get_option (char **str, int *pint) -{ - char *cur = *str; - - if (!cur || !(*cur)) - return 0; - *pint = simple_strtol (cur, str, 0); - if (cur == *str) - return 0; - if (**str == ',') { - (*str)++; - return 2; - } - - return 1; -} - -/** - * get_options - Parse a string into a list of integers - * @str: String to be parsed - * @nints: size of integer array - * @ints: integer array - * - * This function parses a string containing a comma-separated - * list of integers. The parse halts when the array is - * full, or when no more numbers can be retrieved from the - * string. - * - * Return value is the character in the string which caused - * the parse to end (typically a null terminator, if @str is - * completely parseable). - */ - -char *get_options(const char *str, int nints, int *ints) -{ - int res, i = 1; - - while (i < nints) { - res = get_option ((char **)&str, ints + i); - if (res == 0) - break; - i++; - if (res == 1) - break; - } - ints[0] = i - 1; - return (char *)str; -} - -/** - * memparse - parse a string with mem suffixes into a number - * @ptr: Where parse begins - * @retptr: (output) Pointer to next char after parse completes - * - * Parses a string into a number. The number stored at @ptr is - * potentially suffixed with %K (for kilobytes, or 1024 bytes), - * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or - * 1073741824). If the number is suffixed with K, M, or G, then - * the return value is the number multiplied by one kilobyte, one - * megabyte, or one gigabyte, respectively. - */ - -unsigned long long memparse (char *ptr, char **retptr) -{ - unsigned long long ret = simple_strtoull (ptr, retptr, 0); - - switch (**retptr) { - case 'G': - case 'g': - ret <<= 10; - case 'M': - case 'm': - ret <<= 10; - case 'K': - case 'k': - ret <<= 10; - (*retptr)++; - default: - break; - } - return ret; -} - - -EXPORT_SYMBOL(memparse); -EXPORT_SYMBOL(get_option); -EXPORT_SYMBOL(get_options); diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/dig/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,1 @@ +obj-y += machvec.o diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/README.origin --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/dig/README.origin Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,7 @@ +Source files in this directory are identical copies of linux-2.6.19 files: + +NOTE: DO NOT commit changes to these files! If a file +needs to be changed, move it to ../linux-xen and follow +the instructions in the README there. + +machvec.c -> linux/arch/ia64/dig/machvec.c diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/machvec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/dig/machvec.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME dig +#define MACHVEC_PLATFORM_HEADER <asm/machvec_dig.h> +#include <asm/machvec_init.h> diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/hp/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,1 @@ +subdir-y += zx1 diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/hp/zx1/Makefile Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,1 @@ +obj-y += hpzx1_machvec.o diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/README.origin --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/hp/zx1/README.origin Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,7 @@ +Source files in this directory are identical copies of linux-2.6.19 files: + +NOTE: DO NOT commit changes to these files! If a file +needs to be changed, move it to ../linux-xen and follow +the instructions in the README there. + +hpzx1_machvec.c -> linux/arch/ia64/hp/zx1/hpzx1_machvec.c diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME hpzx1 +#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpzx1.h> +#include <asm/machvec_init.h> diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/io.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux/io.c Fri Jan 19 14:48:57 2007 +0000 @@ -0,0 +1,164 @@ +#include <linux/module.h> +#include <linux/types.h> + +#include <asm/io.h> + +/* + * Copy data from IO memory space to "real" memory space. + * This needs to be optimized. + */ +void memcpy_fromio(void *to, const volatile void __iomem *from, long count) +{ + char *dst = to; + + while (count) { + count--; + *dst++ = readb(from++); + } +} +EXPORT_SYMBOL(memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + * This needs to be optimized. + */ +void memcpy_toio(volatile void __iomem *to, const void *from, long count) +{ + const char *src = from; + + while (count) { + count--; + writeb(*src++, to++); + } +} +EXPORT_SYMBOL(memcpy_toio); + +/* + * "memset" on IO memory space. + * This needs to be optimized. + */ +void memset_io(volatile void __iomem *dst, int c, long count) +{ + unsigned char ch = (char)(c & 0xff); + + while (count) { + count--; + writeb(ch, dst); + dst++; + } +} +EXPORT_SYMBOL(memset_io); + +#ifdef CONFIG_IA64_GENERIC + +#undef __ia64_inb +#undef __ia64_inw +#undef __ia64_inl +#undef __ia64_outb +#undef __ia64_outw +#undef __ia64_outl +#undef __ia64_readb _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |