[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1233115605 -32400 # Node ID 79f259a26a11cb57617982ce3bc829cdd76fff46 # Parent 4fd4dcf2f8916ab4656911a76e52fc6b1ad42c2f # Parent 31983c30c460fb405b4fc6ab8e2ae49ada2cfec5 merge with xen-unstable.hg --- tools/firmware/rombios/32bitgateway.h | 18 xen/arch/ia64/tools/p2m_foreign/Makefile | 51 xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c | 233 ---- xen/include/public/elfstructs.h | 527 ---------- xen/include/public/libelf.h | 265 ----- Config.mk | 15 buildconfigs/mk.linux-2.6-common | 4 docs/check_pkgs | 12 docs/misc/dump-core-format.txt | 13 docs/misc/vtd.txt | 29 stubdom/Makefile | 1 stubdom/README | 5 tools/Rules.mk | 5 tools/blktap/drivers/Makefile | 6 tools/console/Makefile | 4 tools/examples/xmexample.hvm | 33 tools/firmware/Makefile | 4 tools/firmware/Rules.mk | 8 tools/firmware/hvmloader/32bitbios_support.c | 32 tools/firmware/hvmloader/Makefile | 4 tools/firmware/hvmloader/acpi/Makefile | 18 tools/firmware/hvmloader/acpi/build.c | 56 - tools/firmware/hvmloader/acpi/dsdt.asl | 2 tools/firmware/hvmloader/acpi/dsdt.c | 8 tools/firmware/hvmloader/cacheattr.c | 24 tools/firmware/hvmloader/config.h | 32 tools/firmware/hvmloader/hvmloader.c | 206 ++- tools/firmware/hvmloader/mp_tables.c | 4 tools/firmware/hvmloader/smbios.c | 29 tools/firmware/hvmloader/smp.c | 2 tools/firmware/hvmloader/util.c | 185 +-- tools/firmware/hvmloader/util.h | 17 tools/firmware/rombios/32bit/32bitbios.c | 41 tools/firmware/rombios/32bit/Makefile | 15 tools/firmware/rombios/32bit/pmm.c | 531 ++++++++++ tools/firmware/rombios/32bit/rombios_compat.h | 4 tools/firmware/rombios/32bit/tcgbios/Makefile | 14 tools/firmware/rombios/32bit/tcgbios/tcgbios.c | 18 tools/firmware/rombios/32bitgateway.c | 459 ++------ tools/firmware/rombios/32bitprotos.h | 63 - tools/firmware/rombios/Makefile | 2 tools/firmware/rombios/rombios.c | 88 + tools/firmware/rombios/tcgbios.c | 168 --- tools/firmware/vgabios/vbe.c | 18 tools/firmware/vgabios/vbe.h | 64 - tools/firmware/vgabios/vbetables-gen.c | 41 tools/firmware/vgabios/vgabios.c | 6 tools/flask/libflask/Makefile | 1 tools/flask/loadpolicy/Makefile | 6 tools/fs-back/Makefile | 4 tools/include/Makefile | 3 tools/include/xen-foreign/reference.size | 2 tools/libaio/src/Makefile | 2 tools/libfsimage/Rules.mk | 4 tools/libfsimage/common/Makefile | 3 tools/libxc/Makefile | 5 tools/libxc/xc_core.c | 68 + tools/libxc/xc_core.h | 10 tools/libxc/xc_core_ia64.c | 17 tools/libxc/xc_core_x86.c | 81 + tools/libxc/xc_core_x86.h | 13 tools/libxc/xc_dom.h | 2 tools/libxc/xc_domain.c | 30 tools/libxc/xc_elf.h | 2 tools/libxc/xc_hvm_build.c | 189 +-- tools/libxc/xc_private.c | 10 tools/libxc/xc_ptrace_core.c | 4 tools/libxc/xenctrl.h | 6 tools/misc/Makefile | 4 tools/misc/xenpm.c | 894 ++++++++++------- tools/pygrub/Makefile | 4 tools/python/Makefile | 3 tools/python/xen/lowlevel/xc/xc.c | 10 tools/python/xen/util/oshelp.py | 2 tools/python/xen/xend/XendConfig.py | 12 tools/python/xen/xend/XendDPCI.py | 7 tools/python/xen/xend/XendDomain.py | 4 tools/python/xen/xend/XendDomainInfo.py | 23 tools/python/xen/xend/balloon.py | 4 tools/python/xen/xend/image.py | 13 tools/python/xen/xend/server/pciif.py | 20 tools/python/xen/xend/server/relocate.py | 2 tools/python/xen/xm/create.dtd | 7 tools/python/xen/xm/create.py | 54 - tools/python/xen/xm/main.py | 34 tools/python/xen/xm/xenapi_create.py | 12 tools/tests/blowfish.mk | 8 tools/vnet/libxutil/Makefile | 7 tools/vtpm/Makefile | 2 tools/vtpm/Rules.mk | 6 tools/vtpm_manager/Rules.mk | 6 tools/xcutils/Makefile | 7 tools/xcutils/readnotes.c | 2 tools/xenmon/Makefile | 4 tools/xenpmd/Makefile | 4 tools/xenstat/libxenstat/Makefile | 4 tools/xenstat/xentop/Makefile | 4 tools/xenstore/Makefile | 7 tools/xentrace/Makefile | 5 xen/Rules.mk | 39 xen/arch/ia64/Makefile | 6 xen/arch/ia64/Rules.mk | 17 xen/arch/ia64/xen/domain.c | 2 xen/arch/ia64/xen/irq.c | 2 xen/arch/ia64/xen/machine_kexec.c | 1 xen/arch/ia64/xen/mm.c | 99 - xen/arch/ia64/xen/xensetup.c | 5 xen/arch/x86/Makefile | 7 xen/arch/x86/Rules.mk | 16 xen/arch/x86/acpi/cpu_idle.c | 9 xen/arch/x86/acpi/power.c | 1 xen/arch/x86/apic.c | 4 xen/arch/x86/boot/Makefile | 3 xen/arch/x86/boot/mkelf32.c | 2 xen/arch/x86/bzimage.c | 242 ++++ xen/arch/x86/cpu/common.c | 37 xen/arch/x86/cpu/mcheck/mce_intel.c | 43 xen/arch/x86/domain.c | 13 xen/arch/x86/domain_build.c | 157 ++- xen/arch/x86/hvm/hvm.c | 15 xen/arch/x86/hvm/mtrr.c | 20 xen/arch/x86/hvm/vmsi.c | 2 xen/arch/x86/hvm/vmx/vmcs.c | 34 xen/arch/x86/io_apic.c | 14 xen/arch/x86/irq.c | 22 xen/arch/x86/machine_kexec.c | 3 xen/arch/x86/microcode.c | 58 - xen/arch/x86/microcode_amd.c | 265 ++--- xen/arch/x86/microcode_intel.c | 18 xen/arch/x86/mm.c | 527 +++++----- xen/arch/x86/mm/Makefile | 2 xen/arch/x86/mm/hap/Makefile | 2 xen/arch/x86/mm/hap/hap.c | 2 xen/arch/x86/mm/hap/p2m-ept.c | 61 - xen/arch/x86/mm/p2m.c | 61 - xen/arch/x86/mm/shadow/Makefile | 2 xen/arch/x86/mm/shadow/common.c | 15 xen/arch/x86/mm/shadow/multi.c | 7 xen/arch/x86/mm/shadow/private.h | 15 xen/arch/x86/msi.c | 15 xen/arch/x86/nmi.c | 2 xen/arch/x86/oprofile/nmi_int.c | 42 xen/arch/x86/oprofile/op_model_p4.c | 4 xen/arch/x86/physdev.c | 20 xen/arch/x86/setup.c | 114 +- xen/arch/x86/smpboot.c | 12 xen/arch/x86/tboot.c | 12 xen/arch/x86/time.c | 7 xen/arch/x86/traps.c | 28 xen/arch/x86/x86_32/machine_kexec.c | 3 xen/arch/x86/x86_32/mm.c | 15 xen/arch/x86/x86_64/Makefile | 13 xen/arch/x86/x86_64/compat/mm.c | 14 xen/arch/x86/x86_64/machine_kexec.c | 9 xen/arch/x86/x86_64/mm.c | 126 +- xen/common/Makefile | 10 xen/common/compat/Makefile | 4 xen/common/compat/memory.c | 69 - xen/common/inflate.c | 1303 +++++++++++++++++++++++++ xen/common/kexec.c | 1 xen/common/keyhandler.c | 4 xen/common/libelf/libelf-dominfo.c | 15 xen/common/libelf/libelf-private.h | 4 xen/common/memory.c | 79 - xen/common/page_alloc.c | 133 +- xen/common/xenoprof.c | 63 - xen/drivers/acpi/pmstat.c | 10 xen/drivers/acpi/reboot.c | 4 xen/drivers/cpufreq/utility.c | 30 xen/drivers/passthrough/amd/iommu_init.c | 36 xen/drivers/passthrough/amd/iommu_intr.c | 20 xen/drivers/passthrough/amd/iommu_map.c | 158 +-- xen/drivers/passthrough/amd/pci_amd_iommu.c | 143 +- xen/drivers/passthrough/io.c | 123 +- xen/drivers/passthrough/iommu.c | 1 xen/drivers/passthrough/vtd/dmar.c | 18 xen/drivers/passthrough/vtd/iommu.c | 17 xen/drivers/passthrough/vtd/iommu.h | 3 xen/drivers/video/vga.c | 2 xen/include/Makefile | 2 xen/include/asm-ia64/domain.h | 6 xen/include/asm-x86/config.h | 8 xen/include/asm-x86/domain.h | 7 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 38 xen/include/asm-x86/hvm/vmx/vmcs.h | 9 xen/include/asm-x86/hvm/vmx/vmx.h | 3 xen/include/asm-x86/irq.h | 9 xen/include/asm-x86/microcode.h | 49 xen/include/asm-x86/mm.h | 105 -- xen/include/asm-x86/mtrr.h | 8 xen/include/asm-x86/paging.h | 7 xen/include/asm-x86/processor.h | 3 xen/include/asm-x86/smp.h | 1 xen/include/asm-x86/time.h | 1 xen/include/public/domctl.h | 1 xen/include/public/elfnote.h | 13 xen/include/public/hvm/hvm_info_table.h | 28 xen/include/public/memory.h | 43 xen/include/public/xen.h | 4 xen/include/xen/elf.h | 2 xen/include/xen/elfstructs.h | 527 ++++++++++ xen/include/xen/hvm/iommu.h | 2 xen/include/xen/hvm/irq.h | 11 xen/include/xen/iommu.h | 1 xen/include/xen/libelf.h | 271 +++++ xen/include/xen/mm.h | 4 xen/include/xen/sched.h | 6 xen/include/xen/xenoprof.h | 7 xen/include/xlat.lst | 2 xen/include/xsm/xsm.h | 12 xen/xsm/dummy.c | 11 xen/xsm/flask/hooks.c | 21 212 files changed, 6397 insertions(+), 4382 deletions(-) diff -r 4fd4dcf2f891 -r 79f259a26a11 Config.mk --- a/Config.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/Config.mk Wed Jan 28 13:06:45 2009 +0900 @@ -1,7 +1,7 @@ # -*- mode: Makefile; -*- # A debug build of Xen and tools? -debug ?= n +debug ?= y ## TEMPORARILY ENABLED XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ -e s/i86pc/x86_32/ -e s/amd64/x86_64/) @@ -38,6 +38,15 @@ cc-option = $(shell if test -z "`$(1) $( cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \ /dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;) +# cc-option-add: Add an option to compilation flags, but only if supported. +# Usage: $(call cc-option-add CFLAGS,CC,-march=winchip-c6) +cc-option-add = $(eval $(call cc-option-add-closure,$(1),$(2),$(3))) +define cc-option-add-closure + ifneq ($$(call cc-option,$$($(2)),$(3),n),n) + $(1) += $(3) + endif +endef + # cc-ver: Check compiler is at least specified version. Return boolean 'y'/'n'. # Usage: ifeq ($(call cc-ver,$(CC),0x030400),y) cc-ver = $(shell if [ $$((`$(1) -dumpversion | awk -F. \ @@ -84,8 +93,8 @@ CFLAGS += -Wall -Wstrict-prototypes # result of any casted expression causes a warning. CFLAGS += -Wno-unused-value -HOSTCFLAGS += $(call cc-option,$(HOSTCC),-Wdeclaration-after-statement,) -CFLAGS += $(call cc-option,$(CC),-Wdeclaration-after-statement,) +$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement) +$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement) LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i)) CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i)) diff -r 4fd4dcf2f891 -r 79f259a26a11 buildconfigs/mk.linux-2.6-common --- a/buildconfigs/mk.linux-2.6-common Wed Jan 28 12:22:58 2009 +0900 +++ b/buildconfigs/mk.linux-2.6-common Wed Jan 28 13:06:45 2009 +0900 @@ -100,10 +100,10 @@ endif endif $(__NONINT_CONFIG) $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) oldconfig O=$$(/bin/pwd)/$(LINUX_DIR) @set -e ; if [ ! -f $(LINUX_DIR)/Makefile ] ; then \ - echo "***********************************"; \ + echo "==================================="; \ echo "oldconfig did not create a Makefile"; \ echo "Generating $(LINUX_DIR)/Makefile "; \ - echo "***********************************"; \ + echo "==================================="; \ ( echo "# Automatically generated: don't edit"; \ echo ""; \ echo "VERSION = 2"; \ diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/check_pkgs --- a/docs/check_pkgs Wed Jan 28 12:22:58 2009 +0900 +++ b/docs/check_pkgs Wed Jan 28 13:06:45 2009 +0900 @@ -2,12 +2,12 @@ silent_which () silent_which () { which $1 1>/dev/null 2>/dev/null || { - echo "*************************************************" - echo "*************************************************" - echo "* WARNING: Package '$1' is required" - echo "* to build Xen documentation" - echo "*************************************************" - echo "*************************************************" + echo "=================================================" + echo "=================================================" + echo "= WARNING: Package '$1' is required" + echo "= to build Xen documentation" + echo "=================================================" + echo "=================================================" } which $1 1>/dev/null 2>/dev/null } diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/misc/dump-core-format.txt --- a/docs/misc/dump-core-format.txt Wed Jan 28 12:22:58 2009 +0900 +++ b/docs/misc/dump-core-format.txt Wed Jan 28 13:06:45 2009 +0900 @@ -30,8 +30,13 @@ The elf header members are set as follow e_ident[EI_OSABI] = ELFOSABI_SYSV = 0 e_type = ET_CORE = 4 ELFCLASS64 is always used independent of architecture. -e_ident[EI_DATA] and e_flags are set according to the dumping system's -architecture. Other members are set as usual. +e_ident[EI_DATA] is set as follows + For x86 PV domain case, it is set according to the guest configuration + (i.e. if guest is 32bit it is set to EM_386 even when the dom0 is 64 bit.) + For other domain case (x86 HVM domain case and ia64 domain case), + it is set according to the dumping system's architecture. +e_flags is set according to the dumping system's architecture. +Other members are set as usual. Sections -------- @@ -241,3 +246,7 @@ Currently only (major, minor) = (0, 1) i The format version isn't bumped because analysis tools can distinguish it. - .xen_ia64_mapped_regs section was made only for ia64 PV domain. In case of IA64 HVM domain, this section doesn't exist. +- elf header e_ident[EI_DATA] + On x86 PV domain case, it is set according to the guest configuration. + I.e. 32-on-64 case, the file will be set EM_386 instead of EM_X86_64. + This is the same as 32-on-32 case, so there is no impact on analysis tools. diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/misc/vtd.txt --- a/docs/misc/vtd.txt Wed Jan 28 12:22:58 2009 +0900 +++ b/docs/misc/vtd.txt Wed Jan 28 13:06:45 2009 +0900 @@ -38,6 +38,30 @@ Add "msi=1" option in kernel line of hos Add "msi=1" option in kernel line of host grub. +MSI-INTx translation for passthrough devices in HVM +--------------------------------------------------- + +If the assigned device uses a physical IRQ that is shared by more than +one device among multiple domains, there may be significant impact on +device performance. Unfortunately, this is quite a common case if the +IO-APIC (INTx) IRQ is used. MSI can avoid this issue, but was only +available if the guest enables it. + +With MSI-INTx translation turned on, Xen enables device MSI if it's +available, regardless of whether the guest uses INTx or MSI. If the +guest uses INTx IRQ, Xen will inject a translated INTx IRQ to guest's +virtual ioapic whenever an MSI message is received. This reduces the +interrupt sharing of the system. If the guest OS enables MSI or MSI-X, +the translation is automatically turned off. + +To enable or disable MSI-INTx translation globally, add "pci_msitranslate" +in the config file: + pci_msitranslate = 1 (default is 1) + +To override for a specific device: + pci = [ '01:00.0,msitranslate=0', '03:00.0' ] + + Caveat on Conventional PCI Device Passthrough --------------------------------------------- @@ -79,6 +103,11 @@ 2 virtual PCI slots (6~7) are reserved i 3. Attach a PCI device to the guest by the physical BDF and desired virtual slot(optional). Following command would insert the physical device into guest's virtual slot 7 [root@vt-vtd ~]# xm pci-attach HVMDomainVtd 0:2:0.0 7 + + To specify options for the device, use -o or --options=. Following command would disable MSI-INTx translation for the device + + [root@vt-vtd ~]# xm pci-attach -o msitranslate=0 0:2:0.0 7 + VTd hotplug usage model: ------------------------ diff -r 4fd4dcf2f891 -r 79f259a26a11 stubdom/Makefile --- a/stubdom/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/stubdom/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -194,6 +194,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) include/xen && \ ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) include/xen && \ ( [ -h include/xen/sys ] || ln -sf ../../$(XEN_ROOT)/tools/include/xen-sys/MiniOS include/xen/sys ) && \ + ( [ -h include/xen/libelf ] || ln -sf ../../$(XEN_ROOT)/tools/include/xen/libelf include/xen/libelf ) && \ mkdir -p include/xen-foreign && \ ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/tools/include/xen-foreign/*)) include/xen-foreign/ && \ $(MAKE) -C include/xen-foreign/ && \ diff -r 4fd4dcf2f891 -r 79f259a26a11 stubdom/README --- a/stubdom/README Wed Jan 28 12:22:58 2009 +0900 +++ b/stubdom/README Wed Jan 28 13:06:45 2009 +0900 @@ -55,6 +55,11 @@ sdl = 0 - In hvmconfig-dm, set an sdl vfb: vfb = [ 'type=sdl' ] + + by default qemu will use sdl together with opengl for rendering, if + you do not want qemu to use opengl then also pass opengl=0: + +vfb = [ 'type=sdl, opengl=0' ] * Using a VNC server in the stub domain diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/Rules.mk --- a/tools/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -29,6 +29,10 @@ X11_LDPATH = -L/usr/X11R6/$(LIBLEAFDIR) CFLAGS += -D__XEN_TOOLS__ +# Get gcc to generate the dependencies for us. +CFLAGS += -MMD -MF .$(@F).d +DEPS = .*.d + # Enable implicit LFS support *and* explicit LFS names. CFLAGS += $(shell getconf LFS_CFLAGS) CFLAGS += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE @@ -59,4 +63,3 @@ subdirs-all subdirs-clean subdirs-instal subdir-all-% subdir-clean-% subdir-install-%: .phony $(MAKE) -C $* $(patsubst subdir-%-$*,%,$@) - diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/blktap/drivers/Makefile --- a/tools/blktap/drivers/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/blktap/drivers/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -13,16 +13,12 @@ CFLAGS += -I $(LIBAIO_DIR) CFLAGS += -I $(LIBAIO_DIR) CFLAGS += -D_GNU_SOURCE -# Get gcc to generate the dependencies for us. -CFLAGS += -Wp,-MD,.$(@F).d -DEPS = .*.d - ifeq ($(shell . ./check_gcrypt),"yes") CFLAGS += -DUSE_GCRYPT CRYPT_LIB := -lgcrypt else CRYPT_LIB := -lcrypto -$(warning *** libgcrypt not installed: falling back to libcrypto ***) +$(warning === libgcrypt not installed: falling back to libcrypto ===) endif LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/console/Makefile --- a/tools/console/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/console/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -16,7 +16,7 @@ all: $(BIN) .PHONY: clean clean: - $(RM) *.a *.so *.o *.rpm $(BIN) + $(RM) *.a *.so *.o *.rpm $(BIN) $(DEPS) $(RM) client/*.o daemon/*.o xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c)) @@ -33,3 +33,5 @@ install: $(BIN) $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(SBINDIR) $(INSTALL_DIR) $(DESTDIR)$(PRIVATE_BINDIR) $(INSTALL_PROG) xenconsole $(DESTDIR)$(PRIVATE_BINDIR) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/examples/xmexample.hvm Wed Jan 28 13:06:45 2009 +0900 @@ -288,6 +288,39 @@ serial='pty' # 'x' -> we don't care (do not check) # 's' -> the bit must be the same as on the host that started this VM +#----------------------------------------------------------------------------- +# Configure passthrough PCI{,-X,e} devices: +# +# pci=[ '[SSSS:]BB:DD.F[,option1[,option2[...]]]', ... ] +# +# [SSSS]:BB:DD.F "bus segment:bus:device.function"(1) of the device to +# be assigned, bus segment is optional. All fields are +# in hexadecimal and no field should be longer than that +# as shown in the pattern. Successful assignment may need +# certain hardware support and additional configurations +# (e.g. VT-d, see docs/misc/vtd.txt for more details). +# +# (1) bus segment is sometimes also referred to as the PCI "domain", +# not to be confused with Xen domain. +# +# +# optionN per-device options in "key=val" format. Current +# available options are: +# - msitranslate=0|1 +# per-device overriden of pci_msitranslate, see below +# +#pci=[ '07:00.0', '07:00.1' ] + +# MSI-INTx translation for MSI capable devices: +# +# If it's set, Xen will enable MSI for the device that supports it even +# if the guest don't use MSI. In the case, an IO-APIC type interrupt will +# be injected to the guest every time a corresponding MSI message is +# received. +# If the guest enables MSI or MSI-X, the translation is automatically +# turned off. +# +#pci_msitranslate=1 #----------------------------------------------------------------------------- # Configure PVSCSI devices: diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/Makefile --- a/tools/firmware/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -15,10 +15,10 @@ SUBDIRS += hvmloader .PHONY: all all: @set -e; if [ $$((`( bcc -v 2>&1 | grep version || echo 0.0.0 ) | cut -d' ' -f 3 | awk -F. '{ printf "0x%02x%02x%02x", $$1, $$2, $$3}'`)) -lt $$((0x00100e)) ] ; then \ - echo "***********************************************************"; \ + echo "==========================================================="; \ echo "Require dev86 package version >= 0.16.14 to build firmware!"; \ echo "(visit http://www.cix.co.uk/~mayday for more information)"; \ - echo "***********************************************************"; \ + echo "==========================================================="; \ else \ $(MAKE) subdirs-$@; \ fi diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/Rules.mk --- a/tools/firmware/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -2,7 +2,7 @@ override XEN_TARGET_ARCH = x86_32 override XEN_TARGET_ARCH = x86_32 # User-supplied CFLAGS are not useful here. -CFLAGS := +CFLAGS = include $(XEN_ROOT)/tools/Rules.mk @@ -13,9 +13,9 @@ CFLAGS += -Werror CFLAGS += -Werror # Disable PIE/SSP if GCC supports them. They can break us. -CFLAGS += $(call cc-option,$(CC),-nopie,) -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) +$(call cc-option-add,CFLAGS,CC,-nopie) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all) # Extra CFLAGS suitable for an embedded type of environment. CFLAGS += -fno-builtin -msoft-float diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/32bitbios_support.c --- a/tools/firmware/hvmloader/32bitbios_support.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/32bitbios_support.c Wed Jan 28 13:06:45 2009 +0900 @@ -32,15 +32,13 @@ #include "../rombios/32bit/32bitbios_flat.h" -static void relocate_32bitbios(char *elfarray, uint32_t elfarraysize) +static uint32_t relocate_32bitbios(char *elfarray, uint32_t elfarraysize) { Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfarray; Elf32_Shdr *shdr = (Elf32_Shdr *)&elfarray[ehdr->e_shoff]; - char *secstrings = &elfarray[shdr[ehdr->e_shstrndx].sh_offset]; - char *jump_table; uint32_t reloc_off, reloc_size; char *highbiosarea; - int i, jump_sec_idx = 0; + int i; /* * Step 1. General elf cleanup, and compute total relocation size. @@ -50,13 +48,6 @@ static void relocate_32bitbios(char *elf { /* By default all section data points into elf image data array. */ shdr[i].sh_addr = (Elf32_Addr)&elfarray[shdr[i].sh_offset]; - - if ( !strcmp(".biosjumptable", secstrings + shdr[i].sh_name) ) - { - /* We do not relocate the BIOS jump table to high memory. */ - shdr[i].sh_flags &= ~SHF_ALLOC; - jump_sec_idx = i; - } /* Fix up a corner case of address alignment. */ if ( shdr[i].sh_addralign == 0 ) @@ -76,7 +67,7 @@ static void relocate_32bitbios(char *elf */ reloc_size = reloc_off; printf("%d bytes of ROMBIOS high-memory extensions:\n", reloc_size); - highbiosarea = (char *)(long)e820_malloc(reloc_size, 0); + highbiosarea = mem_alloc(reloc_size, 0); BUG_ON(highbiosarea == NULL); printf(" Relocating to 0x%x-0x%x ... ", (uint32_t)&highbiosarea[0], @@ -148,21 +139,12 @@ static void relocate_32bitbios(char *elf } } - /* Step 5. Find the ROMBIOS jump-table stub and copy in the real table. */ - for ( jump_table = (char *)ROMBIOS_BEGIN; - jump_table != (char *)ROMBIOS_END; - jump_table++ ) - if ( !strncmp(jump_table, "___JMPT", 7) ) - break; - BUG_ON(jump_table == NULL); - BUG_ON(jump_sec_idx == 0); - memcpy(jump_table, (char *)shdr[jump_sec_idx].sh_addr, - shdr[jump_sec_idx].sh_size); + printf("done\n"); - printf("done\n"); + return (uint32_t)highbiosarea; } -void highbios_setup(void) +uint32_t highbios_setup(void) { - relocate_32bitbios((char *)highbios_array, sizeof(highbios_array)); + return relocate_32bitbios((char *)highbios_array, sizeof(highbios_array)); } diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -58,4 +58,6 @@ roms.h: ../rombios/BIOS-bochs-latest ../ .PHONY: clean clean: subdirs-clean rm -f roms.h acpi.h - rm -f hvmloader hvmloader.tmp *.o + rm -f hvmloader hvmloader.tmp *.o $(DEPS) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/Makefile --- a/tools/firmware/hvmloader/acpi/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/acpi/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -22,10 +22,6 @@ H_SRC = $(wildcard *.h) H_SRC = $(wildcard *.h) OBJS = $(patsubst %.c,%.o,$(C_SRC)) -IASL_VER = acpica-unix-20080729 -#IASL_URL = http://acpica.org/download/$(IASL_VER).tar.gz -IASL_URL = $(XEN_EXTFILES_URL)/$(IASL_VER).tar.gz - CFLAGS += -I. -I.. $(CFLAGS_include) vpath iasl $(PATH) @@ -46,15 +42,11 @@ dsdt.c: dsdt.asl iasl: @echo - @echo "ACPI ASL compiler(iasl) is needed" - @echo "Download Intel ACPI CA" - @echo "If wget failed, please download and compile manually from" + @echo "ACPI ASL compiler (iasl) is needed" + @echo "Download and install Intel ACPI CA from" @echo "http://acpica.org/downloads/" @echo - wget $(IASL_URL) - tar xzf $(IASL_VER).tar.gz - make -C $(IASL_VER)/compiler - $(INSTALL_PROG) $(IASL_VER)/compiler/iasl $(DESTDIR)$(BINDIR)/iasl + @exit 1 acpi.a: $(OBJS) $(AR) rc $@ $(OBJS) @@ -63,6 +55,8 @@ acpi.a: $(OBJS) $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< clean: - rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz + rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz $(DEPS) install: all + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/build.c --- a/tools/firmware/hvmloader/acpi/build.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/acpi/build.c Wed Jan 28 13:06:45 2009 +0900 @@ -48,48 +48,9 @@ static void set_checksum( p[checksum_offset] = -sum; } -static int uart_exists(uint16_t uart_base) -{ - uint16_t ier = uart_base + 1; - uint8_t a, b, c; - - a = inb(ier); - outb(ier, 0); - b = inb(ier); - outb(ier, 0xf); - c = inb(ier); - outb(ier, a); - - return ((b == 0) && (c == 0xf)); -} - -static int hpet_exists(unsigned long hpet_base) -{ - uint32_t hpet_id = *(uint32_t *)hpet_base; - return ((hpet_id >> 16) == 0x8086); -} - static uint8_t battery_port_exists(void) { return (inb(0x88) == 0x1F); -} - -static int construct_bios_info_table(uint8_t *buf) -{ - struct bios_info *bios_info = (struct bios_info *)buf; - - memset(bios_info, 0, sizeof(*bios_info)); - - bios_info->com1_present = uart_exists(0x3f8); - bios_info->com2_present = uart_exists(0x2f8); - - bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS); - - bios_info->pci_min = PCI_MEMBASE; - bios_info->pci_len = PCI_MEMSIZE; - bios_info->xen_pfiob = 0xdead; - - return align16(sizeof(*bios_info)); } static int construct_madt(struct acpi_20_madt *madt) @@ -150,7 +111,7 @@ static int construct_madt(struct acpi_20 offset += sizeof(*io_apic); lapic = (struct acpi_20_madt_lapic *)(io_apic + 1); - for ( i = 0; i < get_vcpu_nr(); i++ ) + for ( i = 0; i < hvm_info->nr_vcpus; i++ ) { memset(lapic, 0, sizeof(*lapic)); lapic->type = ACPI_PROCESSOR_LOCAL_APIC; @@ -199,9 +160,10 @@ static int construct_secondary_tables(ui struct acpi_20_tcpa *tcpa; static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001}; uint16_t *tis_hdr; + void *lasa; /* MADT. */ - if ( (get_vcpu_nr() > 1) || get_apic_mode() ) + if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode ) { madt = (struct acpi_20_madt *)&buf[offset]; offset += construct_madt(madt); @@ -246,11 +208,11 @@ static int construct_secondary_tables(ui tcpa->header.oem_revision = ACPI_OEM_REVISION; tcpa->header.creator_id = ACPI_CREATOR_ID; tcpa->header.creator_revision = ACPI_CREATOR_REVISION; - tcpa->lasa = e820_malloc(ACPI_2_0_TCPA_LAML_SIZE, 0); - if ( tcpa->lasa ) - { + if ( (lasa = mem_alloc(ACPI_2_0_TCPA_LAML_SIZE, 0)) != NULL ) + { + tcpa->lasa = virt_to_phys(lasa); tcpa->laml = ACPI_2_0_TCPA_LAML_SIZE; - memset((char *)(unsigned long)tcpa->lasa, 0, tcpa->laml); + memset(lasa, 0, tcpa->laml); set_checksum(tcpa, offsetof(struct acpi_header, checksum), tcpa->header.length); @@ -348,9 +310,7 @@ static void __acpi_build_tables(uint8_t buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS; offset = 0; - offset += construct_bios_info_table(&buf[offset]); rsdp = (struct acpi_20_rsdp *)&buf[offset]; - memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp)); offset += align16(sizeof(struct acpi_20_rsdp)); rsdp->rsdt_address = (unsigned long)rsdt; @@ -376,7 +336,7 @@ void acpi_build_tables(void) memset(buf, 0, high_sz); /* Allocate data area and set up ACPI tables there. */ - buf = (uint8_t *)e820_malloc(high_sz, 0); + buf = mem_alloc(high_sz, 0); __acpi_build_tables(buf, &low_sz, &high_sz); printf(" - Lo data: %08lx-%08lx\n" diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/dsdt.asl --- a/tools/firmware/hvmloader/acpi/dsdt.asl Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/acpi/dsdt.asl Wed Jan 28 13:06:45 2009 +0900 @@ -86,7 +86,7 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, Scope (\_SB) { - /* ACPI_PHYSICAL_ADDRESS == 0xEA000 */ + /* BIOS_INFO_PHYSICAL_ADDRESS == 0xEA000 */ OperationRegion(BIOS, SystemMemory, 0xEA000, 16) Field(BIOS, ByteAcc, NoLock, Preserve) { UAR1, 1, diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/dsdt.c --- a/tools/firmware/hvmloader/acpi/dsdt.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/acpi/dsdt.c Wed Jan 28 13:06:45 2009 +0900 @@ -1,11 +1,11 @@ /* * * Intel ACPI Component Architecture - * ASL Optimizing Compiler version 20080729 [Dec 25 2008] + * ASL Optimizing Compiler version 20081204 [Jan 23 2009] * Copyright (C) 2000 - 2008 Intel Corporation * Supports ACPI Specification Revision 3.0a * - * Compilation of "dsdt.asl" - Thu Dec 25 17:00:32 2008 + * Compilation of "dsdt.asl" - Fri Jan 23 14:30:29 2009 * * C source code output * @@ -13,10 +13,10 @@ unsigned char AmlCode[] = unsigned char AmlCode[] = { 0x44,0x53,0x44,0x54,0x5E,0x11,0x00,0x00, /* 00000000 "DSDT^..." */ - 0x02,0xD1,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */ + 0x02,0xEB,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */ 0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00, /* 00000010 "HVM....." */ 0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ - 0x29,0x07,0x08,0x20,0x08,0x50,0x4D,0x42, /* 00000020 ").. .PMB" */ + 0x04,0x12,0x08,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */ 0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C, /* 00000028 "S....PML" */ 0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31, /* 00000030 "N...IOB1" */ 0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08, /* 00000038 "..IOL1.." */ diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/cacheattr.c --- a/tools/firmware/hvmloader/cacheattr.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/cacheattr.c Wed Jan 28 13:06:45 2009 +0900 @@ -88,11 +88,25 @@ void cacheattr_init(void) nr_var_ranges = (uint8_t)mtrr_cap; if ( nr_var_ranges != 0 ) { - /* A single UC range covering PCI space. */ - wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE); - wrmsr(MSR_MTRRphysMask(0), - ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11)); - printf("var MTRRs ... "); + unsigned long base = pci_mem_start, size; + int i; + + for ( i = 0; (base != pci_mem_end) && (i < nr_var_ranges); i++ ) + { + size = PAGE_SIZE; + while ( !(base & size) ) + size <<= 1; + while ( ((base + size) < base) || ((base + size) > pci_mem_end) ) + size >>= 1; + + wrmsr(MSR_MTRRphysBase(i), base); + wrmsr(MSR_MTRRphysMask(i), + (~(uint64_t)(size-1) & addr_mask) | (1u << 11)); + + base += size; + } + + printf("var MTRRs [%d/%d] ... ", i, nr_var_ranges); } wrmsr(MSR_MTRRdefType, mtrr_def); diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/config.h --- a/tools/firmware/hvmloader/config.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/config.h Wed Jan 28 13:06:45 2009 +0900 @@ -1,5 +1,8 @@ #ifndef __HVMLOADER_CONFIG_H__ #define __HVMLOADER_CONFIG_H__ + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1ul << PAGE_SHIFT) #define IOAPIC_BASE_ADDRESS 0xfec00000 #define IOAPIC_ID 0x01 @@ -11,8 +14,14 @@ #define PCI_ISA_DEVFN 0x08 /* dev 1, fn 0 */ #define PCI_ISA_IRQ_MASK 0x0c20U /* ISA IRQs 5,10,11 are PCI connected */ -#define PCI_MEMBASE 0xf0000000 -#define PCI_MEMSIZE 0x0c000000 +/* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */ +#define PCI_MEM_START 0xf0000000 +#define PCI_MEM_END 0xfc000000 +extern unsigned long pci_mem_start, pci_mem_end; + +/* We reserve 16MB for special BIOS mappings, etc. */ +#define RESERVED_MEMBASE 0xfc000000 +#define RESERVED_MEMSIZE 0x01000000 #define ROMBIOS_SEG 0xF000 #define ROMBIOS_BEGIN 0x000F0000 @@ -21,16 +30,17 @@ #define ROMBIOS_END (ROMBIOS_BEGIN + ROMBIOS_SIZE) /* Memory map. */ +#define SCRATCH_PHYSICAL_ADDRESS 0x00010000 #define HYPERCALL_PHYSICAL_ADDRESS 0x00080000 #define VGABIOS_PHYSICAL_ADDRESS 0x000C0000 #define OPTIONROM_PHYSICAL_ADDRESS 0x000C8000 #define OPTIONROM_PHYSICAL_END 0x000EA000 -#define ACPI_PHYSICAL_ADDRESS 0x000EA000 +#define BIOS_INFO_PHYSICAL_ADDRESS 0x000EA000 +#define ACPI_PHYSICAL_ADDRESS 0x000EA020 #define E820_PHYSICAL_ADDRESS 0x000EA100 #define SMBIOS_PHYSICAL_ADDRESS 0x000EB000 #define SMBIOS_MAXIMUM_SIZE 0x00005000 #define ROMBIOS_PHYSICAL_ADDRESS 0x000F0000 -#define SCRATCH_PHYSICAL_ADDRESS 0x00010000 /* Offsets from E820_PHYSICAL_ADDRESS. */ #define E820_NR_OFFSET 0x0 @@ -39,12 +49,16 @@ /* Xen Platform Device */ #define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */ +/* Located at BIOS_INFO_PHYSICAL_ADDRESS. */ struct bios_info { - uint8_t com1_present:1; - uint8_t com2_present:1; - uint8_t hpet_present:1; - uint32_t pci_min, pci_len; - uint16_t xen_pfiob; + uint8_t com1_present:1; /* 0[0] - System has COM1? */ + uint8_t com2_present:1; /* 0[1] - System has COM2? */ + uint8_t hpet_present:1; /* 0[2] - System has HPET? */ + uint32_t pci_min, pci_len; /* 4, 8 - PCI I/O hole boundaries */ + uint32_t bios32_entry; /* 12 - Entry point for 32-bit BIOS */ + uint16_t xen_pfiob; /* 16 - Xen platform device I/O ports */ }; +#define BIOSINFO_OFF_bios32_entry 12 +#define BIOSINFO_OFF_xen_pfiob 16 #endif /* __HVMLOADER_CONFIG_H__ */ diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/hvmloader.c --- a/tools/firmware/hvmloader/hvmloader.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/hvmloader.c Wed Jan 28 13:06:45 2009 +0900 @@ -31,6 +31,7 @@ #include "option_rom.h" #include <xen/version.h> #include <xen/hvm/params.h> +#include <xen/memory.h> asm ( " .text \n" @@ -99,6 +100,9 @@ asm ( " .text \n" ); +unsigned long pci_mem_start = PCI_MEM_START; +unsigned long pci_mem_end = PCI_MEM_END; + static enum { VGA_none, VGA_std, VGA_cirrus } virtual_vga = VGA_none; static void init_hypercalls(void) @@ -148,16 +152,14 @@ static void apic_setup(void) static void pci_setup(void) { - uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd; + uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0; uint16_t class, vendor_id, device_id; unsigned int bar, pin, link, isa_irq; /* Resources assignable to PCI devices via BARs. */ struct resource { uint32_t base, max; - } *resource; - struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE }; - struct resource io_resource = { 0xc000, 0x10000 }; + } *resource, mem_resource, io_resource; /* Create a list of device BARs in descending order of size. */ struct bars { @@ -248,6 +250,10 @@ static void pci_setup(void) bars[i].bar_reg = bar_reg; bars[i].bar_sz = bar_sz; + if ( (bar_data & PCI_BASE_ADDRESS_SPACE) == + PCI_BASE_ADDRESS_SPACE_MEMORY ) + mmio_total += bar_sz; + nr_bars++; /* Skip the upper-half of the address for a 64-bit BAR. */ @@ -276,6 +282,28 @@ static void pci_setup(void) pci_writew(devfn, PCI_COMMAND, cmd); } + while ( (mmio_total > (pci_mem_end - pci_mem_start)) && + ((pci_mem_start << 1) != 0) ) + pci_mem_start <<= 1; + + while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend ) + { + struct xen_add_to_physmap xatp; + if ( hvm_info->high_mem_pgend == 0 ) + hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT); + xatp.domid = DOMID_SELF; + xatp.space = XENMAPSPACE_gmfn; + xatp.idx = --hvm_info->low_mem_pgend; + xatp.gpfn = hvm_info->high_mem_pgend++; + if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + BUG(); + } + + mem_resource.base = pci_mem_start; + mem_resource.max = pci_mem_end; + io_resource.base = 0xc000; + io_resource.max = 0x10000; + /* Assign iomem and ioport resources in descending order of size. */ for ( i = 0; i < nr_bars; i++ ) { @@ -488,22 +516,13 @@ static int pci_load_option_roms(uint32_t /* Replace possibly erroneous memory-size CMOS fields with correct values. */ static void cmos_write_memory_size(void) { - struct e820entry *map = E820; - int i, nr = *E820_NR; - uint32_t base_mem = 640, ext_mem = 0, alt_mem = 0; - - for ( i = 0; i < nr; i++ ) - if ( (map[i].addr >= 0x100000) && (map[i].type == E820_RAM) ) - break; - - if ( i != nr ) - { - alt_mem = ext_mem = map[i].addr + map[i].size; - ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0; - if ( ext_mem > 0xffff ) - ext_mem = 0xffff; - alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0; - } + uint32_t base_mem = 640, ext_mem, alt_mem; + + alt_mem = ext_mem = hvm_info->low_mem_pgend << PAGE_SHIFT; + ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0; + if ( ext_mem > 0xffff ) + ext_mem = 0xffff; + alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0; /* All BIOSes: conventional memory (CMOS *always* reports 640kB). */ cmos_outb(0x15, (uint8_t)(base_mem >> 0)); @@ -520,25 +539,23 @@ static void cmos_write_memory_size(void) cmos_outb(0x35, (uint8_t)( alt_mem >> 8)); } -static uint16_t init_xen_platform_io_base(void) -{ - struct bios_info *bios_info = (struct bios_info *)ACPI_PHYSICAL_ADDRESS; +static uint16_t xen_platform_io_base(void) +{ uint32_t devfn, bar_data; uint16_t vendor_id, device_id; - bios_info->xen_pfiob = 0; - for ( devfn = 0; devfn < 128; devfn++ ) { vendor_id = pci_readw(devfn, PCI_VENDOR_ID); device_id = pci_readw(devfn, PCI_DEVICE_ID); - if ( (vendor_id != 0x5853) || (device_id != 0x0001) ) - continue; - bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0); - bios_info->xen_pfiob = bar_data & PCI_BASE_ADDRESS_IO_MASK; - } - - return bios_info->xen_pfiob; + if ( (vendor_id == 0x5853) && (device_id == 0x0001) ) + { + bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0); + return bar_data & PCI_BASE_ADDRESS_IO_MASK; + } + } + + return 0; } /* @@ -548,27 +565,80 @@ static uint16_t init_xen_platform_io_bas */ static void init_vm86_tss(void) { - uint32_t tss; + void *tss; struct xen_hvm_param p; - tss = e820_malloc(128, 128); - memset((char *)tss, 0, 128); + tss = mem_alloc(128, 128); + memset(tss, 0, 128); p.domid = DOMID_SELF; p.index = HVM_PARAM_VM86_TSS; - p.value = tss; + p.value = virt_to_phys(tss); hypercall_hvm_op(HVMOP_set_param, &p); - printf("vm86 TSS at %08x\n", tss); -} - -/* - * Copy the E820 table provided by the HVM domain builder into the correct - * place in the memory map we share with the rombios. - */ -static void copy_e820_table(void) -{ - uint8_t nr = *(uint8_t *)(HVM_E820_PAGE + HVM_E820_NR_OFFSET); - BUG_ON(nr > 16); - memcpy(E820, (char *)HVM_E820_PAGE + HVM_E820_OFFSET, nr * sizeof(*E820)); + printf("vm86 TSS at %08lx\n", virt_to_phys(tss)); +} + +/* Create an E820 table based on memory parameters provided in hvm_info. */ +static void build_e820_table(void) +{ + struct e820entry *e820 = E820; + unsigned int nr = 0; + + /* 0x0-0x9FC00: Ordinary RAM. */ + e820[nr].addr = 0x0; + e820[nr].size = 0x9FC00; + e820[nr].type = E820_RAM; + nr++; + + /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */ + e820[nr].addr = 0x9FC00; + e820[nr].size = 0x400; + e820[nr].type = E820_RESERVED; + nr++; + + /* + * Following regions are standard regions of the PC memory map. + * They are not covered by e820 regions. OSes will not use as RAM. + * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820. + * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios). + * TODO: free pages which turn out to be unused. + */ + + /* + * 0xE0000-0x0F0000: PC-specific area. We place various tables here. + * 0xF0000-0x100000: System BIOS. + * TODO: free pages which turn out to be unused. + */ + e820[nr].addr = 0xE0000; + e820[nr].size = 0x20000; + e820[nr].type = E820_RESERVED; + nr++; + + /* Low RAM goes here. Reserve space for special pages. */ + BUG_ON((hvm_info->low_mem_pgend << PAGE_SHIFT) < (2u << 20)); + e820[nr].addr = 0x100000; + e820[nr].size = (hvm_info->low_mem_pgend << PAGE_SHIFT) - e820[nr].addr; + e820[nr].type = E820_RAM; + nr++; + + /* + * Explicitly reserve space for special pages. + * This space starts at RESERVED_MEMBASE an extends to cover various + * fixed hardware mappings (e.g., LAPIC, IOAPIC, default SVGA framebuffer). + */ + e820[nr].addr = RESERVED_MEMBASE; + e820[nr].size = (uint32_t)-e820[nr].addr; + e820[nr].type = E820_RESERVED; + nr++; + + if ( hvm_info->high_mem_pgend ) + { + e820[nr].addr = ((uint64_t)1 << 32); + e820[nr].size = + ((uint64_t)hvm_info->high_mem_pgend << PAGE_SHIFT) - e820[nr].addr; + e820[nr].type = E820_RAM; + nr++; + } + *E820_NR = nr; } @@ -576,16 +646,17 @@ int main(void) { int option_rom_sz = 0, vgabios_sz = 0, etherboot_sz = 0; int rombios_sz, smbios_sz; - uint32_t etherboot_phys_addr, option_rom_phys_addr, vga_ram = 0; - uint16_t xen_pfiob; + uint32_t etherboot_phys_addr, option_rom_phys_addr, bios32_addr; + struct bios_info *bios_info; printf("HVM Loader\n"); - copy_e820_table(); - init_hypercalls(); printf("CPU speed is %u MHz\n", get_cpu_mhz()); + + apic_setup(); + pci_setup(); smp_initialise(); @@ -599,12 +670,9 @@ int main(void) if ( rombios_sz > 0x10000 ) rombios_sz = 0x10000; memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, rombios_sz); - highbios_setup(); - - apic_setup(); - pci_setup(); - - if ( (get_vcpu_nr() > 1) || get_apic_mode() ) + bios32_addr = highbios_setup(); + + if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode ) create_mp_tables(); switch ( virtual_vga ) @@ -626,12 +694,6 @@ int main(void) break; } - if ( virtual_vga != VGA_none ) - { - vga_ram = e820_malloc(8 << 20, 4096); - printf("VGA RAM at %08x\n", vga_ram); - } - etherboot_phys_addr = VGABIOS_PHYSICAL_ADDRESS + vgabios_sz; if ( etherboot_phys_addr < OPTIONROM_PHYSICAL_ADDRESS ) etherboot_phys_addr = OPTIONROM_PHYSICAL_ADDRESS; @@ -640,7 +702,7 @@ int main(void) option_rom_phys_addr = etherboot_phys_addr + etherboot_sz; option_rom_sz = pci_load_option_roms(option_rom_phys_addr); - if ( get_acpi_enabled() ) + if ( hvm_info->acpi_enabled ) { printf("Loading ACPI ...\n"); acpi_build_tables(); @@ -672,9 +734,17 @@ int main(void) ROMBIOS_PHYSICAL_ADDRESS, ROMBIOS_PHYSICAL_ADDRESS + rombios_sz - 1); - xen_pfiob = init_xen_platform_io_base(); - if ( xen_pfiob && vga_ram ) - outl(xen_pfiob + 4, vga_ram); + build_e820_table(); + + bios_info = (struct bios_info *)BIOS_INFO_PHYSICAL_ADDRESS; + memset(bios_info, 0, sizeof(*bios_info)); + bios_info->com1_present = uart_exists(0x3f8); + bios_info->com2_present = uart_exists(0x2f8); + bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS); + bios_info->pci_min = pci_mem_start; + bios_info->pci_len = pci_mem_end - pci_mem_start; + bios_info->bios32_entry = bios32_addr; + bios_info->xen_pfiob = xen_platform_io_base(); printf("Invoking ROMBIOS ...\n"); return 0; diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/mp_tables.c --- a/tools/firmware/hvmloader/mp_tables.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/mp_tables.c Wed Jan 28 13:06:45 2009 +0900 @@ -155,7 +155,7 @@ static void fill_mp_config_table(struct int vcpu_nr, i; uint8_t checksum; - vcpu_nr = get_vcpu_nr(); + vcpu_nr = hvm_info->nr_vcpus; /* fill in the MP configuration table signature, "PCMP" */ mpct->signature[0] = 'P'; @@ -317,7 +317,7 @@ void create_mp_tables(void) char *p; int vcpu_nr, i, length; - vcpu_nr = get_vcpu_nr(); + vcpu_nr = hvm_info->nr_vcpus; printf("Creating MP tables ...\n"); diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/smbios.c --- a/tools/firmware/hvmloader/smbios.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/smbios.c Wed Jan 28 13:06:45 2009 +0900 @@ -118,8 +118,9 @@ write_smbios_tables(void *start, do_struct(smbios_type_16_init(p, memsize, nr_mem_devs)); for ( i = 0; i < nr_mem_devs; i++ ) { - uint32_t dev_memsize = ((i == (nr_mem_devs - 1)) - ? (memsize & 0x3fff) : 0x4000); + uint32_t dev_memsize = 0x4000; /* all but last covers 16GB */ + if ( (i == (nr_mem_devs - 1)) && ((memsize & 0x3fff) != 0) ) + dev_memsize = memsize & 0x3fff; /* last dev is <16GB */ do_struct(smbios_type_17_init(p, dev_memsize, i)); do_struct(smbios_type_19_init(p, dev_memsize, i)); do_struct(smbios_type_20_init(p, dev_memsize, i)); @@ -143,28 +144,18 @@ static uint64_t static uint64_t get_memsize(void) { - struct e820entry *map = E820; - uint8_t num_entries = *E820_NR; - uint64_t memsize = 0; - int i; - - /* - * Walk through e820map, ignoring any entries that aren't marked - * as usable or reserved. - */ - for ( i = 0; i < num_entries; i++ ) - { - if ( (map->type == E820_RAM) || (map->type == E820_RESERVED) ) - memsize += map->size; - map++; - } + uint64_t sz; + + sz = (uint64_t)hvm_info->low_mem_pgend << PAGE_SHIFT; + if ( hvm_info->high_mem_pgend ) + sz += (hvm_info->high_mem_pgend << PAGE_SHIFT) - (1ull << 32); /* * Round up to the nearest MB. The user specifies domU pseudo-physical * memory in megabytes, so not doing this could easily lead to reporting * one less MB than the user specified. */ - return (memsize + (1 << 20) - 1) >> 20; + return (sz + (1ul << 20) - 1) >> 20; } int @@ -229,7 +220,7 @@ hvm_write_smbios_tables(void) /* SCRATCH_PHYSICAL_ADDRESS is a safe large memory area for scratch. */ len = write_smbios_tables((void *)SCRATCH_PHYSICAL_ADDRESS, - get_vcpu_nr(), get_memsize(), + hvm_info->nr_vcpus, get_memsize(), uuid, xen_version_str, xen_major_version, xen_minor_version); if ( len > SMBIOS_MAXIMUM_SIZE ) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/smp.c --- a/tools/firmware/hvmloader/smp.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/smp.c Wed Jan 28 13:06:45 2009 +0900 @@ -121,7 +121,7 @@ static void boot_cpu(unsigned int cpu) void smp_initialise(void) { - unsigned int i, nr_cpus = get_vcpu_nr(); + unsigned int i, nr_cpus = hvm_info->nr_vcpus; memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start); diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/util.c --- a/tools/firmware/hvmloader/util.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/util.c Wed Jan 28 13:06:45 2009 +0900 @@ -25,7 +25,6 @@ #include <stdint.h> #include <xen/xen.h> #include <xen/memory.h> -#include <xen/hvm/hvm_info_table.h> void wrmsr(uint32_t idx, uint64_t v) { @@ -304,63 +303,63 @@ uuid_to_string(char *dest, uint8_t *uuid *p = '\0'; } -static void e820_collapse(void) -{ - int i = 0; - struct e820entry *ent = E820; - - while ( i < (*E820_NR-1) ) - { - if ( (ent[i].type == ent[i+1].type) && - ((ent[i].addr + ent[i].size) == ent[i+1].addr) ) - { - ent[i].size += ent[i+1].size; - memcpy(&ent[i+1], &ent[i+2], (*E820_NR-i-2) * sizeof(*ent)); - (*E820_NR)--; - } - else - { - i++; - } - } -} - -uint32_t e820_malloc(uint32_t size, uint32_t align) -{ - uint32_t addr; - int i; - struct e820entry *ent = E820; +void *mem_alloc(uint32_t size, uint32_t align) +{ + static uint32_t reserve = RESERVED_MEMBASE - 1; + static int over_allocated; + struct xen_add_to_physmap xatp; + struct xen_memory_reservation xmr; + xen_pfn_t mfn; + uint32_t s, e; /* Align to at least one kilobyte. */ if ( align < 1024 ) align = 1024; - for ( i = *E820_NR - 1; i >= 0; i-- ) - { - addr = (ent[i].addr + ent[i].size - size) & ~(align-1); - if ( (ent[i].type != E820_RAM) || /* not ram? */ - (addr < ent[i].addr) || /* too small or starts above 4gb? */ - ((addr + size) < addr) ) /* ends above 4gb? */ - continue; - - if ( addr != ent[i].addr ) - { - memmove(&ent[i+1], &ent[i], (*E820_NR-i) * sizeof(*ent)); - (*E820_NR)++; - ent[i].size = addr - ent[i].addr; - ent[i+1].addr = addr; - ent[i+1].size -= ent[i].size; - i++; - } - - ent[i].type = E820_RESERVED; - - e820_collapse(); - - return addr; - } - - return 0; + s = (reserve + align) & ~(align - 1); + e = s + size - 1; + + BUG_ON((e < s) || (e >> PAGE_SHIFT) >= hvm_info->reserved_mem_pgstart); + + while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) ) + { + reserve += PAGE_SIZE; + mfn = reserve >> PAGE_SHIFT; + + /* Try to allocate a brand new page in the reserved area. */ + if ( !over_allocated ) + { + xmr.domid = DOMID_SELF; + xmr.mem_flags = 0; + xmr.extent_order = 0; + xmr.nr_extents = 1; + set_xen_guest_handle(xmr.extent_start, &mfn); + if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 ) + continue; + over_allocated = 1; + } + + /* Otherwise, relocate a page from the ordinary RAM map. */ + if ( hvm_info->high_mem_pgend ) + { + xatp.idx = --hvm_info->high_mem_pgend; + if ( xatp.idx == (1ull << (32 - PAGE_SHIFT)) ) + hvm_info->high_mem_pgend = 0; + } + else + { + xatp.idx = --hvm_info->low_mem_pgend; + } + xatp.domid = DOMID_SELF; + xatp.space = XENMAPSPACE_gmfn; + xatp.gpfn = mfn; + if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + BUG(); + } + + reserve = e; + + return (void *)(unsigned long)s; } uint32_t ioapic_read(uint32_t reg) @@ -543,30 +542,35 @@ void __bug(char *file, int line) asm volatile ( "ud2" ); } -static int validate_hvm_info(struct hvm_info_table *t) -{ - char signature[] = "HVM INFO"; +static void validate_hvm_info(struct hvm_info_table *t) +{ uint8_t *ptr = (uint8_t *)t; uint8_t sum = 0; int i; - /* strncmp(t->signature, "HVM INFO", 8) */ - for ( i = 0; i < 8; i++ ) - { - if ( signature[i] != t->signature[i] ) - { - printf("Bad hvm info signature\n"); - return 0; - } + if ( strncmp(t->signature, "HVM INFO", 8) ) + { + printf("Bad hvm info signature\n"); + BUG(); + } + + if ( t->length < sizeof(struct hvm_info_table) ) + { + printf("Bad hvm info length\n"); + BUG(); } for ( i = 0; i < t->length; i++ ) sum += ptr[i]; - return (sum == 0); -} - -static struct hvm_info_table *get_hvm_info_table(void) + if ( sum != 0 ) + { + printf("Bad hvm info checksum\n"); + BUG(); + } +} + +struct hvm_info_table *get_hvm_info_table(void) { static struct hvm_info_table *table; struct hvm_info_table *t; @@ -576,33 +580,11 @@ static struct hvm_info_table *get_hvm_in t = (struct hvm_info_table *)HVM_INFO_PADDR; - if ( !validate_hvm_info(t) ) - { - printf("Bad hvm info table\n"); - return NULL; - } + validate_hvm_info(t); table = t; return table; -} - -int get_vcpu_nr(void) -{ - struct hvm_info_table *t = get_hvm_info_table(); - return (t ? t->nr_vcpus : 1); -} - -int get_acpi_enabled(void) -{ - struct hvm_info_table *t = get_hvm_info_table(); - return (t ? t->acpi_enabled : 1); -} - -int get_apic_mode(void) -{ - struct hvm_info_table *t = get_hvm_info_table(); - return (t ? t->apic_mode : 1); } uint16_t get_cpu_mhz(void) @@ -645,6 +627,27 @@ uint16_t get_cpu_mhz(void) cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000); return cpu_mhz; +} + +int uart_exists(uint16_t uart_base) +{ + uint16_t ier = uart_base + 1; + uint8_t a, b, c; + + a = inb(ier); + outb(ier, 0); + b = inb(ier); + outb(ier, 0xf); + c = inb(ier); + outb(ier, a); + + return ((b == 0) && (c == 0xf)); +} + +int hpet_exists(unsigned long hpet_base) +{ + uint32_t hpet_id = *(uint32_t *)hpet_base; + return ((hpet_id >> 16) == 0x8086); } /* diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/util.h --- a/tools/firmware/hvmloader/util.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/hvmloader/util.h Wed Jan 28 13:06:45 2009 +0900 @@ -3,6 +3,7 @@ #include <stdarg.h> #include <stdint.h> +#include <xen/hvm/hvm_info_table.h> #undef offsetof #define offsetof(t, m) ((unsigned long)&((t *)0)->m) @@ -56,6 +57,10 @@ void pci_write(uint32_t devfn, uint32_t /* Get CPU speed in MHz. */ uint16_t get_cpu_mhz(void); +/* Hardware detection. */ +int uart_exists(uint16_t uart_base); +int hpet_exists(unsigned long hpet_base); + /* Do cpuid instruction, with operation 'idx' */ void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); @@ -103,9 +108,8 @@ static inline void cpu_relax(void) }) /* HVM-builder info. */ -int get_vcpu_nr(void); -int get_acpi_enabled(void); -int get_apic_mode(void); +struct hvm_info_table *get_hvm_info_table(void); +#define hvm_info (get_hvm_info_table()) /* String and memory functions */ int strcmp(const char *cs, const char *ct); @@ -131,11 +135,12 @@ int printf(const char *fmt, ...) __attri int printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); int vprintf(const char *fmt, va_list ap); -/* Reserve a RAM region in the e820 table. */ -uint32_t e820_malloc(uint32_t size, uint32_t align); +/* Allocate memory in a reserved region below 4GB. */ +void *mem_alloc(uint32_t size, uint32_t align); +#define virt_to_phys(v) ((unsigned long)(v)) /* Prepare the 32bit BIOS */ -void highbios_setup(void); +uint32_t highbios_setup(void); /* Miscellaneous. */ void cacheattr_init(void); diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/32bitbios.c --- a/tools/firmware/rombios/32bit/32bitbios.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/32bit/32bitbios.c Wed Jan 28 13:06:45 2009 +0900 @@ -19,35 +19,16 @@ * * Author: Stefan Berger <stefanb@xxxxxxxxxx> */ + #include "rombios_compat.h" + +asm ( + " .text \n" + " movzwl %bx,%eax \n" + " jmp *jumptable(,%eax,4) \n" + " .data \n" + "jumptable: \n" +#define X(idx, ret, fn, args...) " .long "#fn"\n" #include "32bitprotos.h" - -/* - the jumptable that will be copied into the rombios in the 0xf000 segment - for every function that is to be called from the lower BIOS, make an entry - here. - */ -#define TABLE_ENTRY(idx, func) [idx] = (uint32_t)func -uint32_t jumptable[IDX_LAST+1] __attribute__((section (".biosjumptable"))) = -{ - TABLE_ENTRY(IDX_TCPA_ACPI_INIT, tcpa_acpi_init), - TABLE_ENTRY(IDX_TCPA_EXTEND_ACPI_LOG, tcpa_extend_acpi_log), - - TABLE_ENTRY(IDX_TCGINTERRUPTHANDLER, TCGInterruptHandler), - - TABLE_ENTRY(IDX_TCPA_CALLING_INT19H, tcpa_calling_int19h), - TABLE_ENTRY(IDX_TCPA_RETURNED_INT19H, tcpa_returned_int19h), - TABLE_ENTRY(IDX_TCPA_ADD_EVENT_SEPARATORS, tcpa_add_event_separators), - TABLE_ENTRY(IDX_TCPA_WAKE_EVENT, tcpa_wake_event), - TABLE_ENTRY(IDX_TCPA_ADD_BOOTDEVICE, tcpa_add_bootdevice), - TABLE_ENTRY(IDX_TCPA_START_OPTION_ROM_SCAN, tcpa_start_option_rom_scan), - TABLE_ENTRY(IDX_TCPA_OPTION_ROM, tcpa_option_rom), - TABLE_ENTRY(IDX_TCPA_IPL, tcpa_ipl), - TABLE_ENTRY(IDX_TCPA_MEASURE_POST, tcpa_measure_post), - - TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm), - - TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector), - - TABLE_ENTRY(IDX_LAST , 0) /* keep last */ -}; +#undef X + ); diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/Makefile --- a/tools/firmware/rombios/32bit/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/32bit/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -1,14 +1,11 @@ XEN_ROOT = ../../../.. XEN_ROOT = ../../../.. include $(XEN_ROOT)/tools/firmware/Rules.mk -SOURCES = util.c TARGET = 32bitbios_flat.h -CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS +CFLAGS += $(CFLAGS_include) -I.. SUBDIRS = tcgbios - -MODULES = tcgbios/tcgbiosext.o .PHONY: all all: subdirs-all @@ -16,9 +13,12 @@ all: subdirs-all .PHONY: clean clean: subdirs-clean - rm -rf *.o $(TARGET) + rm -rf *.o $(TARGET) $(DEPS) -$(TARGET): 32bitbios.o $(MODULES) util.o +$(TARGET): 32bitbios_all.o + sh mkhex highbios_array 32bitbios_all.o > $@ + +32bitbios_all.o: 32bitbios.o tcgbios/tcgbiosext.o util.o pmm.o $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o @nm 32bitbios_all.o | \ egrep '^ +U ' >/dev/null && { \ @@ -26,4 +26,5 @@ clean: subdirs-clean nm -u 32bitbios_all.o; \ exit 11; \ } || : - sh mkhex highbios_array 32bitbios_all.o > $@ + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/pmm.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/firmware/rombios/32bit/pmm.c Wed Jan 28 13:06:45 2009 +0900 @@ -0,0 +1,531 @@ +/* + * pmm.c - POST(Power On Self Test) Memory Manager + * according to the specification described in + * http://www.phoenix.com/NR/rdonlyres/873A00CF-33AC-4775-B77E-08E7B9754993/0/specspmm101.pdf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (C) 2009 FUJITSU LIMITED + * + * Author: Kouya Shimura <kouya@xxxxxxxxxxxxxx> + */ + +/* + * Algorithm: + * + * This is not a fast storage allocator but simple one. There is no + * segregated management by block size and it does nothing special for + * avoiding the fragmentation. + * + * The allocation algorithm is a first-fit. All memory blocks are + * managed by linear single linked list in order of the address. + * (i.e. There is no backward pointer) It searches the first available + * equal or larger block from the head (lowest address) of memory + * heap. The larger block is splitted into two blocks unless one side + * becomes too small. + * + * For de-allocation, the specified block is just marked as available + * and it does nothing else. Thus, the fragmentation will occur. The + * collection of continuous available blocks are done on the search + * phase of another block allocation. + * + * The following is an abstract of this algorithm. The actual code + * looks complicated on account of alignment and checking the handle. + * + * static memblk_t * + * alloc(heap_t *heap, uint32_t size) + * { + * static memblk_t *mb; + * for_each_memblk(heap, mb) // search memory blocks + * if (memblk_is_avail(mb)) + * { + * collect_avail_memblks(heap, mb); + * if (size <= memblk_bufsize(mb)) + * { + * split_memblk(mb, size); + * set_inuse(mb); + * return mb; + * } + * } + * return NULL; + * } + */ + +#include <stdint.h> +#include <stddef.h> +#include <../hvmloader/config.h> +#include <../hvmloader/e820.h> +#include "util.h" + +#define DEBUG_PMM 0 + +#define ASSERT(_expr, _action) \ + if (!(_expr)) { \ + printf("ASSERTION FAIL: %s %s:%d %s()\n", \ + __STRING(_expr), __FILE__, __LINE__, __func__); \ + _action; \ + } else + +#if DEBUG_PMM +# define PMM_DEBUG(format, p...) printf("PMM " format, ##p) +#else +# define PMM_DEBUG(format, p...) +#endif + +struct pmmAllocArgs { + uint16_t function; + uint32_t length; + uint32_t handle; + uint16_t flags; +} __attribute__ ((packed)); + +struct pmmFindArgs { + uint16_t function; + uint32_t handle; +} __attribute__ ((packed)); + +struct pmmDeallocateArgs { + uint16_t function; + uint32_t buffer; +} __attribute__ ((packed)); + +#define PMM_FUNCTION_ALLOCATE 0 +#define PMM_FUNCTION_FIND 1 +#define PMM_FUNCTION_DEALLOC 2 + +#define PARAGRAPH_LENGTH 16 // unit of length + +#define PMM_HANDLE_ANONYMOUS 0xffffffff + +#define PMM_FLAGS_MEMORY_TYPE_MASK 0x0003 +#define PMM_FLAGS_MEMORY_INVALID 0 +#define PMM_FLAGS_MEMORY_CONVENTIONAL 1 // 0 to 1MB +#define PMM_FLAGS_MEMORY_EXTENDED 2 // 1MB to 4GB +#define PMM_FLAGS_MEMORY_ANY 3 // whichever is available +#define PMM_FLAGS_ALIGINMENT 0x0004 + +/* Error code */ +#define PMM_ENOMEM (0) // Out of memory, duplicate handle +#define PMM_EINVAL (-1) // Invalid argument + +#define ALIGN_UP(addr, size) (((addr)+((size)-1))&(~((size)-1))) +#define ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) + +typedef struct memblk { + uint32_t magic; // inuse or available + struct memblk *next; // points the very next of this memblk + uint32_t handle; // identifier of this block + uint32_t __fill; // for 16byte alignment, not used + uint8_t buffer[0]; +} memblk_t; + +typedef struct heap { + memblk_t *head; // start address of heap + memblk_t *end; // end address of heap +} heap_t; + +#define HEAP_NOT_INITIALIZED (memblk_t *)-1 +#define HEAP_ALIGNMENT 16 + +/* + * PMM handles two memory heaps, the caller chooses either. + * + * - conventional memroy (below 1MB) + * In HVM, the area is fixed. 0x00010000-0x0007FFFF + * (from SCRATCH_PHYSICAL_ADDRESS to HYPERCALL_PHYSICAL_ADDRESS) + * + * - extended memory (start at 1MB, below 4GB) + * In HVM, the area starts at memory address 0x00100000. + * The end address is variable. We read low RAM address from e820 table. + * + * The following struct must be located in the data segment since bss + * in 32bitbios doesn't be relocated. + */ +static struct { + heap_t heap; // conventional memory + heap_t ext_heap; // extended memory +} pmm_data = { {HEAP_NOT_INITIALIZED, NULL}, {NULL, NULL} }; + +/* These values are private use, not a spec in PMM */ +#define MEMBLK_MAGIC_INUSE 0x2A4D4D50 // 'PMM*' +#define MEMBLK_MAGIC_AVAIL 0x5F4D4D50 // 'PMM_' + +#define memblk_is_inuse(_mb) ((_mb)->magic == MEMBLK_MAGIC_INUSE) +#define memblk_is_avail(_mb) ((_mb)->magic == MEMBLK_MAGIC_AVAIL) + +static void set_inuse(memblk_t *mb, uint32_t handle) +{ + mb->magic = MEMBLK_MAGIC_INUSE; + mb->handle = handle; +} + +static void set_avail(memblk_t *mb) +{ + mb->magic = MEMBLK_MAGIC_AVAIL; + mb->handle = PMM_HANDLE_ANONYMOUS; +} + +#define MEMBLK_HEADER_SIZE ((int)(&((memblk_t *)0)->buffer)) +#define MIN_MEMBLK_SIZE (MEMBLK_HEADER_SIZE + PARAGRAPH_LENGTH) + +#define memblk_size(_mb) ((void *)((_mb)->next) - (void *)(_mb)) +#define memblk_buffer(_mb) ((uint32_t)(&(_mb)->buffer)) +#define memblk_bufsize(_mb) (memblk_size(_mb) - MEMBLK_HEADER_SIZE) + +#define buffer_memblk(_buf) (memblk_t *)((_buf) - MEMBLK_HEADER_SIZE) + +#define memblk_loop_mbondition(_h, _mb) \ + (((_mb) < (_h)->end) && (/* avoid infinite loop */ (_mb) < (_mb)->next)) + +#define for_each_memblk(_h, _mb) \ + for ((_mb) = (_h)->head; \ + memblk_loop_mbondition(_h, _mb); \ + (_mb) = (_mb)->next) + +#define for_remain_memblk(_h, _mb) \ + for (; \ + memblk_loop_mbondition(_h, _mb); \ + (_mb) = (_mb)->next) + +/* + * <-size-> + * +==================+======+ +========+========+======+ + * | avail | | | avail | avail | | + * | memblk |memblk|... | memblk | memblk |memblk|... + * +==================+======+ => +========+========+======+ + * ^ | ^ | ^ | ^ | ^ | ^ + * | |next | |next| |next | |next | |next| + * | \________________/ \____/ \______/ \______/ \____/ + * | ^ + * | | + * mb +- sb(return value) + */ +static memblk_t * +split_memblk(memblk_t *mb, uint32_t size) +{ + memblk_t *sb = (void *)memblk_buffer(mb) + size; + + /* Only split if the remaining fragment is big enough. */ + if ( (memblk_bufsize(mb) - size) < MIN_MEMBLK_SIZE) + return mb; + + sb->next = mb->next; + set_avail(sb); + + mb->next = sb; + return sb; +} + +/* + * +======+======+======+======+ +=================+======+ + * |avail |avail |avail |inuse | | avail |inuse | + * |memblk|memblk|memblk|memblk|... | memblk |memblk|... + * +======+======+======+======+ => +=================+======+ + * ^ | ^ | ^ | ^ | ^ | ^ | ^ + * | |next| |next| |next| |next| |next | |next| + * | \____/ \____/ \____/ \____/ \_______________/ \____/ + * | + * mb + */ +static void +collect_avail_memblks(heap_t *heap, memblk_t *mb) +{ + memblk_t *nb = mb->next; + + for_remain_memblk ( heap, nb ) + if ( memblk_is_inuse(nb) ) + break; + mb->next = nb; +} + +static void +pmm_init_heap(heap_t *heap, uint32_t from_addr, uint32_t to_addr) +{ + memblk_t *mb = (memblk_t *)ALIGN_UP(from_addr, HEAP_ALIGNMENT); + + mb->next = (memblk_t *)ALIGN_DOWN(to_addr, HEAP_ALIGNMENT); + set_avail(mb); + + heap->head = mb; + heap->end = mb->next; +} + +static void +pmm_initalize(void) +{ + int i, e820_nr = *E820_NR; + struct e820entry *e820 = E820; + + /* Extended memory: RAM below 4GB, 0x100000-0xXXXXXXXX */ + for ( i = 0; i < e820_nr; i++ ) + { + if ( (e820[i].type == E820_RAM) && (e820[i].addr >= 0x00100000) ) + { + pmm_init_heap(&pmm_data.ext_heap, e820[i].addr, + e820[i].addr + e820[i].size); + break; + } + } + + /* convectional memory: RAM below 1MB, 0x10000-0x7FFFF */ + pmm_init_heap(&pmm_data.heap, SCRATCH_PHYSICAL_ADDRESS, + HYPERCALL_PHYSICAL_ADDRESS); +} + +static uint32_t +pmm_max_avail_length(heap_t *heap) +{ + memblk_t *mb; + uint32_t size, max = 0; + + for_each_memblk ( heap, mb ) + { + if ( !memblk_is_avail(mb) ) + continue; + collect_avail_memblks(heap, mb); + size = memblk_bufsize(mb); + if ( size > max ) + max = size; + } + + return (max / PARAGRAPH_LENGTH); +} + +static memblk_t * +first_fit(heap_t *heap, uint32_t size, uint32_t handle, uint32_t flags) +{ + memblk_t *mb; + int32_t align = 0; + + if ( flags & PMM_FLAGS_ALIGINMENT ) + align = ((size ^ (size - 1)) >> 1) + 1; + + for_each_memblk ( heap, mb ) + { + if ( memblk_is_avail(mb) ) + { + collect_avail_memblks(heap, mb); + + if ( align ) + { + uint32_t addr = memblk_buffer(mb); + uint32_t offset = ALIGN_UP(addr, align) - addr; + + if ( offset > 0 ) + { + ASSERT(offset >= MEMBLK_HEADER_SIZE, continue); + + if ( (offset + size) > memblk_bufsize(mb) ) + continue; + + mb = split_memblk(mb, offset - MEMBLK_HEADER_SIZE); + return mb; + } + } + + if ( size <= memblk_bufsize(mb) ) + return mb; + } + else + { + ASSERT(memblk_is_inuse(mb), return NULL); + + /* Duplication check for handle. */ + if ( (handle != PMM_HANDLE_ANONYMOUS) && (mb->handle == handle) ) + return NULL; + } + } + + return NULL; +} + +static memblk_t * +pmm_find_handle(heap_t *heap, uint32_t handle) +{ + memblk_t *mb; + + if ( handle == PMM_HANDLE_ANONYMOUS ) + return NULL; + + for_each_memblk ( heap, mb ) + if ( mb->handle == handle ) + return mb; + + return NULL; +} + +/* + * allocate a memory block of the specified type and size, and returns + * the address of the memory block. + * + * A client-specified identifier to be associated with the allocated + * memory block. A handle of 0xFFFFFFFF indicates that no identifier + * should be associated with the block. Such a memory block is known + * as an "anonymous" memory block and cannot be found using the + * pmmFind function. If a specified handle for a requested memory + * block is already used in a currently allocated memory block, the + * error value of 0x00000000 is returned + * + * If length is 0x00000000, no memory is allocated and the value + * returned is the size of the largest memory block available for the + * memory type specified in the flags parameter. The alignment bit in + * the flags register is ignored when calculating the largest memory + * block available. + * + * If a specified handle for a requested memory block is already used + * in a currently allocated memory block, the error value of + * 0x00000000 is returned. + * + * A return value of 0x00000000 indicates that an error occurred and + * no memory has been allocated. + */ +static uint32_t +pmmAllocate(uint32_t length, uint32_t handle, uint16_t flags) +{ + heap_t *heap; + memblk_t *mb; + uint32_t size; + + switch ( flags & PMM_FLAGS_MEMORY_TYPE_MASK ) + { + case PMM_FLAGS_MEMORY_CONVENTIONAL: + heap = &pmm_data.heap; + break; + + case PMM_FLAGS_MEMORY_EXTENDED: + case PMM_FLAGS_MEMORY_ANY: /* XXX: ignore conventional memory for now */ + heap = &pmm_data.ext_heap; + break; + + default: + return PMM_EINVAL; + } + + /* return the largest memory block available */ + if ( length == 0 ) + return pmm_max_avail_length(heap); + + size = length * PARAGRAPH_LENGTH; + mb = first_fit(heap, size, handle, flags); + + if ( mb == NULL ) + return PMM_ENOMEM; + + /* duplication check for handle */ + if ( handle != PMM_HANDLE_ANONYMOUS ) + { + memblk_t *nb = mb->next; + + for_remain_memblk(heap, nb) + if (nb->handle == handle) + return PMM_ENOMEM; + } + + split_memblk(mb, size); + set_inuse(mb, handle); + + return memblk_buffer(mb); +} + +/* + * returns the address of the memory block associated with the + * specified handle. + * + * A return value of 0x00000000 indicates that the handle does not + * correspond to a currently allocated memory block. + */ +static uint32_t +pmmFind(uint32_t handle) +{ + memblk_t *mb; + + if ( handle == PMM_HANDLE_ANONYMOUS ) + return 0; + + mb = pmm_find_handle(&pmm_data.heap, handle); + if ( mb == NULL ) + mb = pmm_find_handle(&pmm_data.ext_heap, handle); + + return mb ? memblk_buffer(mb) : 0; +} + +/* + * frees the specified memory block that was previously allocated by + * pmmAllocate. + * + * If the memory block was deallocated correctly, the return value is + * 0x00000000. If there was an error, the return value is non-zero. + */ +static uint32_t +pmmDeallocate(uint32_t buffer) +{ + memblk_t *mb = buffer_memblk(buffer); + + if ( !memblk_is_inuse(mb) ) + return PMM_EINVAL; + + set_avail(mb); + return 0; +} + + +union pmm_args { + uint16_t function; + struct pmmAllocArgs alloc; + struct pmmFindArgs find; + struct pmmDeallocateArgs dealloc; +} __attribute__ ((packed)); + +/* + * entry function of all PMM services. + * + * Values returned to the caller are placed in the DX:AX register + * pair. The flags and all registers, other than DX and AX, are + * preserved across calls to PMM services. + */ +uint32_t +pmm(void *argp) +{ + union pmm_args *ap = argp; + uint32_t ret = PMM_EINVAL; + + if ( pmm_data.heap.head == HEAP_NOT_INITIALIZED ) + pmm_initalize(); + + switch ( ap->function ) + { + case PMM_FUNCTION_ALLOCATE: + ret = pmmAllocate(ap->alloc.length, ap->alloc.handle, ap->alloc.flags); + PMM_DEBUG("Alloc length=%x handle=%x flags=%x ret=%x\n", + ap->alloc.length, ap->alloc.handle, ap->alloc.flags, ret); + break; + + case PMM_FUNCTION_FIND: + ret = pmmFind(ap->find.handle); + PMM_DEBUG("Find handle=%x ret=%x\n", ap->find.handle, ret); + break; + + case PMM_FUNCTION_DEALLOC: + ret = pmmDeallocate(ap->dealloc.buffer); + PMM_DEBUG("Dealloc buffer=%x ret=%x\n", ap->dealloc.buffer, ret); + break; + + default: + PMM_DEBUG("Invalid function:%d\n", ap->function); + break; + } + + return ret; +} diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/rombios_compat.h --- a/tools/firmware/rombios/32bit/rombios_compat.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/32bit/rombios_compat.h Wed Jan 28 13:06:45 2009 +0900 @@ -89,4 +89,8 @@ static inline void write_byte(Bit16u seg *addr = val; } +#define X(idx, ret, fn, args...) ret fn (args); +#include "32bitprotos.h" +#undef X + #endif diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/tcgbios/Makefile --- a/tools/firmware/rombios/32bit/tcgbios/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/32bit/tcgbios/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -2,17 +2,17 @@ include $(XEN_ROOT)/tools/firmware/Rules include $(XEN_ROOT)/tools/firmware/Rules.mk TARGET = tcgbiosext.o -FILES = tcgbios tpm_drivers -OBJECTS = $(foreach f,$(FILES),$(f).o) -CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS +CFLAGS += $(CFLAGS_include) -I.. -I../.. -.PHONY: all clean - +.PHONY: all all: $(TARGET) +.PHONY: clean clean: - rm -rf *.o $(TARGET) + rm -rf *.o $(TARGET) $(DEPS) -$(TARGET): $(OBJECTS) +$(TARGET): tcgbios.o tpm_drivers.o $(LD) $(LDFLAGS_DIRECT) -r $^ -o $@ + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/tcgbios/tcgbios.c --- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c Wed Jan 28 13:06:45 2009 +0900 @@ -26,7 +26,6 @@ #include "util.h" #include "tcgbios.h" -#include "32bitprotos.h" /* local structure and variables */ struct ptti_cust { @@ -259,6 +258,10 @@ uint8_t acpi_validate_entry(struct acpi_ } +/* + initialize the TCPA ACPI subsystem; find the ACPI tables and determine + where the TCPA table is. + */ void tcpa_acpi_init(void) { struct acpi_20_rsdt *rsdt; @@ -313,6 +316,16 @@ static void tcpa_reset_acpi_log(void) } +/* + * Extend the ACPI log with the given entry by copying the + * entry data into the log. + * Input + * Pointer to the structure to be copied into the log + * + * Output: + * lower 16 bits of return code contain entry number + * if entry number is '0', then upper 16 bits contain error code. + */ uint32_t tcpa_extend_acpi_log(uint32_t entry_ptr) { uint32_t res = 0; @@ -622,7 +635,8 @@ void tcpa_wake_event() } /* - * add the boot device to the measurement log + * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to + * the list of measurements. */ void tcpa_add_bootdevice(uint32_t bootcd, uint32_t bootdrv) { diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitgateway.c --- a/tools/firmware/rombios/32bitgateway.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/32bitgateway.c Wed Jan 28 13:06:45 2009 +0900 @@ -19,8 +19,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Copyright (C) IBM Corporation, 2006 + * Copyright (c) 2008, Citrix Systems, Inc. * * Author: Stefan Berger <stefanb@xxxxxxxxxx> + * Author: Keir Fraser <keir.fraser@xxxxxxxxxx> */ /* @@ -34,389 +36,142 @@ * (4 bytes) even for uint16_t, so casting to 32bit from bcc is a good idea. */ -#define SEGMENT_OFFSET 0xf0000 -#define REAL_MODE_CODE_SEGMENT 0xf000 +/* At most 32 bytes in argument list to a 32-bit function. */ +#define MAX_ARG_BYTES 32 -#define START_PM_CODE USE32 -#define END_PM_CODE USE16 +#define REAL_MODE_CODE_OFFSET 0xf0000 -/* definition of used code/data segment descriptors */ -#define PM_NORMAL_CS (gdt_entry_pm_cs - gdt_base) +/* Definitions of code/data segment descriptors. */ +#define PM_32BIT_CS (gdt_entry_pm_32bit_cs - gdt_base) #define PM_16BIT_CS (gdt_entry_pm_16bit_cs - gdt_base) #define PM_32BIT_DS (gdt_entry_pm_32bit_ds - gdt_base) +#define PM_16BIT_DS (gdt_entry_pm_16bit_ds - gdt_base) - ASM_START + .align 16 +gdt_base: + .word 0,0 + .byte 0,0,0,0 +gdt_entry_pm_32bit_cs: + .word 0xffff, 0x0000 + .byte 0x00, 0x9b, 0xcf, 0x00 +gdt_entry_pm_16bit_cs: + .word 0xffff, 0x0000 + .byte REAL_MODE_CODE_OFFSET >> 16, 0x9b, 0x0, 0x0 +gdt_entry_pm_32bit_ds: + .word 0xffff, 0x0000 + .byte 0x0, 0x93, 0xcf, 0x0 +gdt_entry_pm_16bit_ds: + .word 0xffff, 0x0000 + .byte 0x0, 0x93, 0x0, 0x0 +gdt_entry_end: - ; Switch into protected mode to allow access to 32 bit addresses. - ; This function allows switching into protected mode. - ; (the specs says big real mode, but that will not work) +protmode_gdtdesc: + .word (gdt_entry_end - gdt_base) - 1 + .long gdt_base | REAL_MODE_CODE_OFFSET + +realmode_gdtdesc: + .word 0xffff + .long 0x0 + +Upcall: + ; Do an upcall into 32 bit space ; - ; preserves all registers and prepares cs, ds, es, ss for usage - ; in protected mode; while in prot.mode interrupts remain disabled -switch_to_protmode: + ; Input: + ; bx: index of function to call + ; Ouput: + ; dx, ax: 32 bit result of call (even if 'void' is expected) + + ; Save caller state, stack frame offsets listed below +#define esp_off 0 +#define ss_off 4 +#define es_off 6 +#define ds_off 8 +#define flags_off 10 +#define retaddr_off 12 +#define args_off 14 + pushf cli + push ds + push es + push ss + push esp - ; have to fix the stack for proper return address in 32 bit mode - push WORD #(REAL_MODE_CODE_SEGMENT>>12) ;extended return address - push bp ;pop@A1 - mov bp, sp - push eax ;pop@A2 - mov eax, 2[bp] ; fix return address - rol eax, #16 - mov 2[bp], eax - - mov eax, esp - ror eax, #16 ; hi(esp) - - push bx ; preserve before function call - push cx - push dx - - push ax ; prepare stack for - push es ; call - push ds - push cs - push ss - call _store_segment_registers - add sp, #10 ; pop ax,es-ss - - pop dx ; restore after function call - pop cx - pop bx - - ; calculate protected-mode esp from ss:sp + ; Calculate protected-mode esp from ss:sp and esp, #0xffff xor eax, eax mov ax, ss - rol eax, #4 - add eax, esp - mov esp, eax + shl eax, #4 + add esp, eax + ; Switch to protected mode seg cs - lgdt my_gdtdesc ; switch to own table - + lgdt protmode_gdtdesc mov eax, cr0 - or al, #0x1 ; protected mode 'on' + or al, #0x1 ; protected mode on mov cr0, eax - - jmpf DWORD (SEGMENT_OFFSET | switch_to_protmode_goon_1), #PM_NORMAL_CS - - START_PM_CODE - -switch_to_protmode_goon_1: - mov ax, #PM_32BIT_DS ; 32 bit segment that allows - mov ds, ax ; to reach all 32 bit - mov es, ax ; addresses + jmpf DWORD (REAL_MODE_CODE_OFFSET|upcall1), #PM_32BIT_CS +upcall1: + USE32 + mov ax, #PM_32BIT_DS + mov ds, ax + mov es, ax mov ss, ax - pop eax ;@A2 - pop bp ;@A1 - ret + ; Marshal arguments and call 32-bit function + mov ecx, #MAX_ARG_BYTES/4 +upcall2: + push MAX_ARG_BYTES-4+args_off[esp] + loop upcall2 + mov eax, [BIOS_INFO_PHYSICAL_ADDRESS + BIOSINFO_OFF_bios32_entry] + call eax + add esp, #MAX_ARG_BYTES + mov ecx, eax ; Result in ecx - END_PM_CODE - - - - .align 16 -gdt_base: - ; see Intel SW Dev. Manuals section 3.4.5, Volume 3 for meaning of bits - .word 0,0 - .byte 0,0,0,0 - -gdt_entry_pm_cs: - ; 32 bit code segment for protected mode - .word 0xffff, 0x0000 - .byte 0x00, 0x9a, 0xcf, 0x00 - -gdt_entry_pm_16bit_cs: - ; temp. 16 bit code segment used while in protected mode - .word 0xffff, 0x0000 - .byte SEGMENT_OFFSET >> 16, 0x9a, 0x0, 0x0 - -gdt_entry_pm_32bit_ds: - ; (32 bit) data segment (r/w) reaching all possible areas in 32bit memory - ; 4kb granularity - .word 0xffff, 0x0000 - .byte 0x0, 0x92, 0xcf, 0x0 -gdt_entry_end: - -my_gdtdesc: - .word (gdt_entry_end - gdt_base) - 1 - .long gdt_base | SEGMENT_OFFSET - - -realmode_gdtdesc: ;to be used in real mode - .word 0xffff - .long 0x0 - - - -switch_to_realmode: - ; Implementation of switching from protected mode to real mode - ; prepares cs, es, ds, ss to be used in real mode - ; spills eax - START_PM_CODE - - ; need to fix up the stack to return in 16 bit mode - ; currently the 32 bit return address is on the stack - pop eax - push ax - - push bx ;pop@1 - push si ;pop@2 - - call _ebda_ss_offset32 ; get the offset of the ss - mov bx, ax ; entry within the ebda. - - jmpf switch_to_realmode_goon_1, #PM_16BIT_CS - - END_PM_CODE - -switch_to_realmode_goon_1: - mov eax, cr0 - and al, #0xfe ; protected mode 'off' - mov cr0, eax - - jmpf switch_to_realmode_goon_2, #REAL_MODE_CODE_SEGMENT - -switch_to_realmode_goon_2: - - ; get orig. 'ss' without using the stack (no 'call'!) - xor eax, eax ; clear upper 16 bits (and lower) - mov ax, #0x40 ; where is the ebda located? - mov ds, ax - mov si, #0xe - seg ds - mov ax, [si] ; ax = segment of ebda - - mov ds, ax ; segment of ebda - seg ds - mov ax, [bx] ; stack segment - bx has been set above - mov ss, ax - - ; from esp and ss calculate real-mode sp - rol eax, #4 + ; Restore real-mode stack pointer + xor eax, eax + mov ax, ss_off[esp] + mov bx, ax ; Real-mode ss in bx + shl eax, 4 sub esp, eax - push dx ;preserve before call(s) - push cx - push bx - - call _get_register_ds ; get orig. 'ds' + ; Return to real mode + jmpf upcall3, #PM_16BIT_CS +upcall3: + USE16 + mov ax, #PM_16BIT_DS mov ds, ax - call _get_register_es ; get orig. 'es' mov es, ax - call _get_register_esp_hi ; fix the upper 16 bits of esp - ror esp, #16 - mov sp, ax - rol esp, #16 - - pop bx - pop cx - pop dx - + mov ss, ax + mov eax, cr0 + and al, #0xfe ; protected mode off + mov cr0, eax + jmpf upcall4, #REAL_MODE_CODE_OFFSET>>4 +upcall4: seg cs lgdt realmode_gdtdesc - sti ; allow interrupts + ; Restore real-mode ss + mov ss, bx - pop si ;@2 - pop bx ;@1 + ; Convert result into dx:ax format + mov eax, ecx + ror eax, #16 + mov dx, ax + ror eax, #16 + ; Restore caller state and return + pop esp + pop bx ; skip ss + pop es + pop ds + popf ret - ASM_END - -/* - * Helper function to get the offset of the reg_ss within the ebda struct - * Only 'C' can tell the offset. - */ -Bit16u -ebda_ss_offset32() -{ - ASM_START - START_PM_CODE // need to have this - ASM_END // compiled for protected mode - return &EbdaData->upcall.reg_ss; // 'C' knows the offset! - ASM_START - END_PM_CODE - ASM_END -} - -/* - * Two often-used functions - */ -Bit16u -read_word_from_ebda(offset) - Bit16u offset; -{ - Bit16u ebda_seg = read_word(0x0040, 0x000E); - return read_word(ebda_seg, offset); -} - -Bit32u -read_dword_from_ebda(offset) - Bit16u offset; -{ - Bit16u ebda_seg = read_word(0x0040, 0x000E); - return read_dword(ebda_seg, offset); -} - -/* - * Store registers in the EBDA; used to keep the registers' - * content in a well-defined place during protected mode execution - */ - void -store_segment_registers(ss, cs, ds, es, esp_hi) - Bit16u ss, cs, ds, es, esp_hi; -{ - Bit16u ebda_seg = read_word(0x0040, 0x000E); - write_word(ebda_seg, &EbdaData->upcall.reg_ss, ss); - write_word(ebda_seg, &EbdaData->upcall.reg_cs, cs); - write_word(ebda_seg, &EbdaData->upcall.reg_ds, ds); - write_word(ebda_seg, &EbdaData->upcall.reg_es, es); - write_word(ebda_seg, &EbdaData->upcall.esp_hi, esp_hi); -} - - - void -store_returnaddress(retaddr) - Bit16u retaddr; -{ - Bit16u ebda_seg = read_word(0x0040, 0x000E); - write_word(ebda_seg, &EbdaData->upcall.retaddr, retaddr); -} - -Bit16u -get_returnaddress() -{ - return read_word_from_ebda(&EbdaData->upcall.retaddr); -} - -/* - * get the segment register 'cs' value from the EBDA - */ -Bit16u -get_register_cs() -{ - return read_word_from_ebda(&EbdaData->upcall.reg_cs); -} - -/* - * get the segment register 'ds' value from the EBDA - */ -Bit16u -get_register_ds() -{ - return read_word_from_ebda(&EbdaData->upcall.reg_ds); -} - -/* - * get the segment register 'es' value from the EBDA - */ -Bit16u -get_register_es() -{ - return read_word_from_ebda(&EbdaData->upcall.reg_es); -} - -/* - * get the upper 16 bits of the esp from the EBDA - */ -Bit16u -get_register_esp_hi() -{ - return read_word_from_ebda(&EbdaData->upcall.esp_hi); -} - - - -/********************************************************/ - - -ASM_START - -Upcall: - ; do the upcall into 32 bit space - ; clear the stack frame so that 32 bit space sees all the parameters - ; on the stack as if they were prepared for it - ; ---> take the 16 bit return address off the stack and remember it - ; - ; Input: - ; bx: index of function to call - ; Ouput: - ; dx, ax: 32 bit result of call (even if 'void' is expected) - - push bp ;pop @1 - mov bp, sp - push si ;pop @2 - - mov ax, 2[bp] ; 16 bit return address - push ax - call _store_returnaddress ; store away - pop ax - - ; XXX GDT munging requires ROM to be writable! - call _enable_rom_write_access - - rol bx, #2 - mov si, #jmptable - seg cs - mov eax, dword ptr [si+bx] ; address to call from table - - pop si ;@2 - pop bp ;@1 - - add sp, #2 ; remove 16bit return address from stack - - call switch_to_protmode - START_PM_CODE - - call eax ; call 32bit function - push eax ; preserve result - - call switch_to_realmode ; back to realmode - END_PM_CODE - - pop eax ; get result - - push word 0x0000 ; placeholder for 16 bit return address - push bp - mov bp,sp - push eax ; preserve work register - - call _disable_rom_write_access - - call _get_returnaddress - mov 2[bp], ax ; 16bit return address onto stack - - pop eax - pop bp - - ror eax, #16 ; result into dx/ax - mov dx, ax ; hi(res) -> dx - ror eax, #16 - - ret - - -/* macro for functions to declare their call into 32bit space */ MACRO DoUpcall - mov bx, #?1 - jmp Upcall + mov bx, #?1 + jmp Upcall MEND - -ASM_END - +#define X(idx, ret, fn, args...) _ ## fn: DoUpcall(idx) #include "32bitprotos.h" -#include "32bitgateway.h" - -#include "tcgbios.c" - -Bit32u get_s3_waking_vector() -{ - ASM_START - DoUpcall(IDX_GET_S3_WAKING_VECTOR) - ASM_END -} +#undef X diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitgateway.h --- a/tools/firmware/rombios/32bitgateway.h Wed Jan 28 12:22:58 2009 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#ifndef GATEWAY -#define GATEWAY - -#include "32bitprotos.h" - -void test_gateway(); - -/* extension for the EBDA */ -typedef struct { - Bit16u reg_ss; - Bit16u reg_cs; - Bit16u reg_ds; - Bit16u reg_es; - Bit16u esp_hi; - Bit16u retaddr; -} upcall_t; - -#endif diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitprotos.h --- a/tools/firmware/rombios/32bitprotos.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/32bitprotos.h Wed Jan 28 13:06:45 2009 +0900 @@ -1,47 +1,16 @@ -#ifndef PROTOS_HIGHBIOS -#define PROTOS_HIGHBIOS - -/* shared include file for bcc and gcc */ - -/* bcc does not like 'enum' */ -#define IDX_TCGINTERRUPTHANDLER 0 -#define IDX_TCPA_ACPI_INIT 1 -#define IDX_TCPA_EXTEND_ACPI_LOG 2 -#define IDX_TCPA_CALLING_INT19H 3 -#define IDX_TCPA_RETURNED_INT19H 4 -#define IDX_TCPA_ADD_EVENT_SEPARATORS 5 -#define IDX_TCPA_WAKE_EVENT 6 -#define IDX_TCPA_ADD_BOOTDEVICE 7 -#define IDX_TCPA_START_OPTION_ROM_SCAN 8 -#define IDX_TCPA_OPTION_ROM 9 -#define IDX_TCPA_IPL 10 -#define IDX_TCPA_INITIALIZE_TPM 11 -#define IDX_TCPA_MEASURE_POST 12 -#define IDX_GET_S3_WAKING_VECTOR 13 -#define IDX_LAST 14 /* keep last! */ - -#ifdef GCC_PROTOS - #define PARMS(x...) x -#else - /* bcc doesn't want any parameter types in prototypes */ - #define PARMS(x...) -#endif - -Bit32u TCGInterruptHandler( PARMS(pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr)); - -void tcpa_acpi_init( PARMS(void) ); -Bit32u tcpa_extend_acpi_log( PARMS(Bit32u entry_ptr) ); -void tcpa_calling_int19h( PARMS(void) ); -void tcpa_returned_int19h( PARMS(void) ); -void tcpa_add_event_separators( PARMS(void) ); -void tcpa_wake_event( PARMS(void) ); -void tcpa_add_bootdevice( PARMS(Bit32u bootcd, Bit32u bootdrv) ); -void tcpa_start_option_rom_scan( PARMS(void) ); -void tcpa_option_rom( PARMS(Bit32u seg) ); -void tcpa_ipl( PARMS(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count) ); -void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) ); -Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) ); - -Bit32u get_s3_waking_vector( PARMS(void) ); - -#endif +X(0, Bit32u, TCGInterruptHandler, + pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr) +X(1, void, tcpa_acpi_init, void) +X(2, Bit32u, tcpa_extend_acpi_log, Bit32u entry_ptr) +X(3, void, tcpa_calling_int19h,void) +X(4, void, tcpa_returned_int19h, void) +X(5, void, tcpa_add_event_separators, void) +X(6, void, tcpa_wake_event, void) +X(7, void, tcpa_add_bootdevice, Bit32u bootcd, Bit32u bootdrv) +X(8, void, tcpa_start_option_rom_scan, void) +X(9, void, tcpa_option_rom, Bit32u seg) +X(10, void, tcpa_ipl, Bit32u bootcd, Bit32u seg, Bit32u off, Bit32u count) +X(11, void, tcpa_measure_post, Bit32u from, Bit32u to) +X(12, Bit32u, tcpa_initialize_tpm, Bit32u physpres) +X(13, Bit32u, get_s3_waking_vector, void) +X(14, Bit32u, pmm, void *argp) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/Makefile --- a/tools/firmware/rombios/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -13,6 +13,7 @@ clean: subdirs-clean rm -f as86-sym.txt ld86-sym.txt rm -f rombios*.txt rombios*.sym usage biossums rm -f BIOS-bochs-* + rm -f $(DEPS) BIOS-bochs-latest: rombios.c biossums 32bitgateway.c tcgbios.c gcc -DBX_SMP_PROCESSORS=1 -E -P $< > _rombios_.c @@ -27,3 +28,4 @@ biossums: biossums.c biossums: biossums.c gcc -o biossums biossums.c +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/rombios.c Wed Jan 28 13:06:45 2009 +0900 @@ -161,6 +161,8 @@ #define BX_TCGBIOS 0 /* main switch for TCG BIOS ext. */ +#define BX_PMM 1 /* POST Memory Manager */ + #define BX_MAX_ATA_INTERFACES 4 #define BX_MAX_ATA_DEVICES (BX_MAX_ATA_INTERFACES*2) @@ -726,7 +728,9 @@ typedef struct { } cdemu_t; #endif // BX_ELTORITO_BOOT -#include "32bitgateway.h" +#define X(idx, ret, fn, arg...) ret fn (); +#include "32bitprotos.h" +#undef X // for access to EBDA area // The EBDA structure should conform to @@ -752,8 +756,6 @@ typedef struct { // El Torito Emulation data cdemu_t cdemu; #endif // BX_ELTORITO_BOOT - - upcall_t upcall; } ebda_data_t; #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1 @@ -1416,31 +1418,24 @@ fixup_base_mem_in_k() write_word(0x40, 0x13, base_mem >> 10); } -void -set_rom_write_access(action) - Bit16u action; -{ - Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob; ASM_START - mov si,.set_rom_write_access.off[bp] +_rom_write_access_control: push ds - mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4) + mov ax,#(BIOS_INFO_PHYSICAL_ADDRESS >> 4) mov ds,ax - mov dx,[si] + mov ax,[BIOSINFO_OFF_xen_pfiob] pop ds - mov ax,.set_rom_write_access.action[bp] - out dx,al + ret ASM_END -} void enable_rom_write_access() { - set_rom_write_access(0); + outb(rom_write_access_control(), 0); } void disable_rom_write_access() { - set_rom_write_access(PFFLAG_ROM_LOCK); + outb(rom_write_access_control(), PFFLAG_ROM_LOCK); } #endif /* HVMASSIST */ @@ -2054,7 +2049,10 @@ print_bios_banner() "rombios32 " #endif #if BX_TCGBIOS - "TCG-enabled" + "TCG-enabled " +#endif +#if BX_PMM + "PMM " #endif "\n\n"); } @@ -9499,8 +9497,9 @@ use16 386 #endif +#include "32bitgateway.c" ASM_END -#include "32bitgateway.c" +#include "tcgbios.c" ASM_START ;-------------------- @@ -10355,6 +10354,48 @@ rombios32_gdt: dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff #endif // BX_ROMBIOS32 +#if BX_PMM +; according to POST Memory Manager Specification Version 1.01 +.align 16 +pmm_structure: + db 0x24,0x50,0x4d,0x4d ;; "$PMM" signature + db 0x01 ;; revision + db 16 ;; length + db (-((pmm_entry_point>>8)+pmm_entry_point+0x20f))&0xff;; checksum + dw pmm_entry_point,0xf000 ;; far call entrypoint + db 0,0,0,0,0 ;; reserved + +pmm_entry_point: + pushf + pushad +; Calculate protected-mode address of PMM function args + xor eax, eax + mov ax, sp + xor ebx, ebx + mov bx, ss + shl ebx, 4 + lea ebx, [eax+ebx+38] ;; ebx=(ss<<4)+sp+4(far call)+2(pushf)+32(pushad) + push ebx +; +; Stack layout at this point: +; +; : +0x0 +0x2 +0x4 +0x6 +0x8 +0xa +0xc +0xe +; ----------------------------------------------------------------------- +; sp : [&arg1 ][edi ][esi ][ebp ] +; sp+0x10: [esp ][ebx ][edx ][ecx ] +; sp+0x20: [eax ][flags ][ip ][cs ][arg1 ][arg2, ... +; + call _pmm + mov bx, sp +SEG SS + mov [bx+0x20], ax +SEG SS + mov [bx+0x18], dx + pop ebx + popad + popf + retf +#endif // BX_PMM ; parallel port detection: base address in DX, index in BX, timeout in CL detect_parport: @@ -10447,7 +10488,9 @@ rom_scan: ;; 3 ROM initialization entry point (FAR CALL) #if BX_TCGBIOS + push ax call _tcpa_start_option_rom_scan /* specs: 3.2.3.3 + 10.4.3 */ + pop ax #endif rom_scan_loop: @@ -11790,15 +11833,6 @@ static Bit8u vgafont8[128*8]= #ifdef HVMASSIST ASM_START -// space for addresses in 32bit BIOS area; currently 256/4 entries -// are allocated -.org 0xcb00 -jmptable: -db 0x5F, 0x5F, 0x5F, 0x4A, 0x4D, 0x50, 0x54 ;; ___JMPT -dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 64 bytes -dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 128 bytes -dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 192 bytes - // // MP Tables // just carve out some blank space for HVMLOADER to write the MP tables to diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/tcgbios.c --- a/tools/firmware/rombios/tcgbios.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/rombios/tcgbios.c Wed Jan 28 13:06:45 2009 +0900 @@ -25,162 +25,6 @@ Support for TCPA ACPI logging ******************************************************************/ -/* - * Extend the ACPI log with the given entry by copying the - * entry data into the log. - * Input - * Pointer to the structure to be copied into the log - * - * Output: - * lower 16 bits of return code contain entry number - * if entry number is '0', then upper 16 bits contain error code. - */ -Bit32u tcpa_extend_acpi_log(entry_ptr) - Bit32u entry_ptr; -{ - ASM_START - DoUpcall(IDX_TCPA_EXTEND_ACPI_LOG) - ASM_END -} - - -/* - initialize the TCPA ACPI subsystem; find the ACPI tables and determine - where the TCPA table is. - */ - void -tcpa_acpi_init() -{ - ASM_START - DoUpcall(IDX_TCPA_ACPI_INIT) - ASM_END -} - - -/* - * Add measurement to log about call of int 19h - */ - void -tcpa_calling_int19h() -{ - ASM_START - DoUpcall(IDX_TCPA_CALLING_INT19H) - ASM_END -} - -/* - * Add measurement to log about retuning from int 19h - */ - void -tcpa_returned_int19h() -{ - ASM_START - DoUpcall(IDX_TCPA_RETURNED_INT19H) - ASM_END -} - -/* - * Add event separators for PCRs 0 to 7; specs 8.2.3 - */ - void -tcpa_add_event_separators() -{ - ASM_START - DoUpcall(IDX_TCPA_ADD_EVENT_SEPARATORS) - ASM_END -} - - -/* - * Add a wake event to the log - */ - void -tcpa_wake_event() -{ - ASM_START - DoUpcall(IDX_TCPA_WAKE_EVENT) - ASM_END -} - - -/* - * Add measurement to the log about option rom scan - * 10.4.3 : action 14 - */ - void -tcpa_start_option_rom_scan() -{ - ASM_START - DoUpcall(IDX_TCPA_START_OPTION_ROM_SCAN) - ASM_END -} - - -/* - * Add measurement to the log about an option rom - */ - void -tcpa_option_rom(seg) - Bit32u seg; -{ - ASM_START - DoUpcall(IDX_TCPA_OPTION_ROM) - ASM_END -} - -/* - * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to - * the list of measurements. - */ -void - tcpa_add_bootdevice(bootcd, bootdrv) - Bit32u bootcd; - Bit32u bootdrv; -{ - ASM_START - DoUpcall(IDX_TCPA_ADD_BOOTDEVICE) - ASM_END -} - -/* - * Add a measurement to the log in support of 8.2.5.3 - * Creates two log entries - * - * Input parameter: - * seg : segment where the IPL data are located - */ - void - tcpa_ipl(bootcd,seg,off,count) - Bit32u bootcd; - Bit32u seg; - Bit32u off; - Bit32u count; -{ - ASM_START - DoUpcall(IDX_TCPA_IPL) - ASM_END -} - - -Bit32u -tcpa_initialize_tpm(physpres) - Bit32u physpres; -{ - ASM_START - DoUpcall(IDX_TCPA_INITIALIZE_TPM) - ASM_END -} - -void -tcpa_measure_post(from, to) - Bit32u from; - Bit32u to; -{ - ASM_START - DoUpcall(IDX_TCPA_MEASURE_POST) - ASM_END -} - ASM_START MACRO POST_MEASURE push word #0x000f @@ -205,18 +49,6 @@ tcpa_do_measure_POSTs() POST_MEASURE(timer_tick_post, int76_handler) ret - ASM_END -} - -Bit32u -TCGInterruptHandler(regs_ptr, es, ds, flags_ptr) - Bit32u regs_ptr; - Bit16u es; - Bit16u ds; - Bit32u flags_ptr; -{ - ASM_START - DoUpcall(IDX_TCGINTERRUPTHANDLER) ASM_END } diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbe.c --- a/tools/firmware/vgabios/vbe.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/vgabios/vbe.c Wed Jan 28 13:06:45 2009 +0900 @@ -37,8 +37,6 @@ #include "vbe.h" #include "vbetables.h" - -#define VBE_TOTAL_VIDEO_MEMORY_DIV_64K (VBE_DISPI_TOTAL_VIDEO_MEMORY_MB*1024/64) // The current OEM Software Revision of this VBE Bios #define VBE_OEM_SOFTWARE_REV 0x0002; @@ -821,7 +819,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI; vbe_info_block.VideoModePtr_Off= DI + 34; // VBE Total Memory (in 64b blocks) - vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K; + outw(VBE_DISPI_IOPORT_INDEX, VBE_DISPI_INDEX_VIDEO_MEMORY_64K); + vbe_info_block.TotalMemory = inw(VBE_DISPI_IOPORT_DATA); if (vbe2_info) { @@ -846,7 +845,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI; do { if ((cur_info->info.XResolution <= dispi_get_max_xres()) && - (cur_info->info.BitsPerPixel <= dispi_get_max_bpp())) { + (cur_info->info.BitsPerPixel <= dispi_get_max_bpp()) && + (cur_info->info.XResolution * cur_info->info.XResolution * cur_info->info.BitsPerPixel <= vbe_info_block.TotalMemory << 19 )) { #ifdef DEBUG printf("VBE found mode %x => %x\n", cur_info->mode,cur_mode); #endif @@ -855,7 +855,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI; cur_ptr+=2; } else { #ifdef DEBUG - printf("VBE mode %x (xres=%x / bpp=%02x) not supported by display\n", cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel); + printf("VBE mode %x (xres=%x / bpp=%02x) not supported \n", cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel); #endif } cur_info++; @@ -913,7 +913,13 @@ Bit16u *AX;Bit16u CX; Bit16u ES;Bit16u D info.WinFuncPtr = 0xC0000000UL; *(Bit16u *)&(info.WinFuncPtr) = (Bit16u)(dispi_set_bank_farcall); } - + outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_H); + info.PhysBasePtr = inw(VBE_DISPI_IOPORT_DATA); + info.PhysBasePtr = info.PhysBasePtr << 16; +#if 0 + outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_L); + info.PhysBasePtr |= inw(VBE_DISPI_IOPORT_DATA); +#endif result = 0x4f; } else diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbe.h --- a/tools/firmware/vgabios/vbe.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/vgabios/vbe.h Wed Jan 28 13:06:45 2009 +0900 @@ -275,39 +275,41 @@ typedef struct ModeInfoListItem // like 0xE0000000 - #define VBE_DISPI_BANK_ADDRESS 0xA0000 - #define VBE_DISPI_BANK_SIZE_KB 64 + #define VBE_DISPI_BANK_ADDRESS 0xA0000 + #define VBE_DISPI_BANK_SIZE_KB 64 - #define VBE_DISPI_MAX_XRES 1024 - #define VBE_DISPI_MAX_YRES 768 + #define VBE_DISPI_MAX_XRES 2560 + #define VBE_DISPI_MAX_YRES 1600 - #define VBE_DISPI_IOPORT_INDEX 0x01CE - #define VBE_DISPI_IOPORT_DATA 0x01CF + #define VBE_DISPI_IOPORT_INDEX 0x01CE + #define VBE_DISPI_IOPORT_DATA 0x01CF - #define VBE_DISPI_INDEX_ID 0x0 - #define VBE_DISPI_INDEX_XRES 0x1 - #define VBE_DISPI_INDEX_YRES 0x2 - #define VBE_DISPI_INDEX_BPP 0x3 - #define VBE_DISPI_INDEX_ENABLE 0x4 - #define VBE_DISPI_INDEX_BANK 0x5 - #define VBE_DISPI_INDEX_VIRT_WIDTH 0x6 - #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7 - #define VBE_DISPI_INDEX_X_OFFSET 0x8 - #define VBE_DISPI_INDEX_Y_OFFSET 0x9 - - #define VBE_DISPI_ID0 0xB0C0 - #define VBE_DISPI_ID1 0xB0C1 - #define VBE_DISPI_ID2 0xB0C2 - #define VBE_DISPI_ID3 0xB0C3 - #define VBE_DISPI_ID4 0xB0C4 - - #define VBE_DISPI_DISABLED 0x00 - #define VBE_DISPI_ENABLED 0x01 - #define VBE_DISPI_GETCAPS 0x02 - #define VBE_DISPI_8BIT_DAC 0x20 - #define VBE_DISPI_LFB_ENABLED 0x40 - #define VBE_DISPI_NOCLEARMEM 0x80 - - #define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000 + #define VBE_DISPI_INDEX_ID 0x0 + #define VBE_DISPI_INDEX_XRES 0x1 + #define VBE_DISPI_INDEX_YRES 0x2 + #define VBE_DISPI_INDEX_BPP 0x3 + #define VBE_DISPI_INDEX_ENABLE 0x4 + #define VBE_DISPI_INDEX_BANK 0x5 + #define VBE_DISPI_INDEX_VIRT_WIDTH 0x6 + #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7 + #define VBE_DISPI_INDEX_X_OFFSET 0x8 + #define VBE_DISPI_INDEX_Y_OFFSET 0x9 + #define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa + #define VBE_DISPI_INDEX_LFB_ADDRESS_H 0xb + #define VBE_DISPI_INDEX_LFB_ADDRESS_L 0xc + + #define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000 + #define VBE_DISPI_ID0 0xB0C0 + #define VBE_DISPI_ID1 0xB0C1 + #define VBE_DISPI_ID2 0xB0C2 + #define VBE_DISPI_ID3 0xB0C3 + #define VBE_DISPI_ID4 0xB0C4 + + #define VBE_DISPI_DISABLED 0x00 + #define VBE_DISPI_ENABLED 0x01 + #define VBE_DISPI_GETCAPS 0x02 + #define VBE_DISPI_8BIT_DAC 0x20 + #define VBE_DISPI_LFB_ENABLED 0x40 + #define VBE_DISPI_NOCLEARMEM 0x80 #endif diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbetables-gen.c --- a/tools/firmware/vgabios/vbetables-gen.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/vgabios/vbetables-gen.c Wed Jan 28 13:06:45 2009 +0900 @@ -2,7 +2,7 @@ #include <stdlib.h> #include <stdio.h> -#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 8 +#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 16 typedef struct { int width; @@ -42,19 +42,40 @@ ModeInfo modes[] = { { 1600, 1200, 24 , 0x11F}, /* BOCHS/PLE, 86 'own' mode numbers */ -{ 320, 200, 32 , 0x140}, -{ 640, 400, 32 , 0x141}, -{ 640, 480, 32 , 0x142}, -{ 800, 600, 32 , 0x143}, -{ 1024, 768, 32 , 0x144}, -{ 1280, 1024, 32 , 0x145}, -{ 320, 200, 8 , 0x146}, -{ 1600, 1200, 32 , 0x147}, -{ 1152, 864, 8 , 0x148}, +{ 320, 200, 32 , 0x140}, +{ 640, 400, 32 , 0x141}, +{ 640, 480, 32 , 0x142}, +{ 800, 600, 32 , 0x143}, +{ 1024, 768, 32 , 0x144}, +{ 1280, 1024, 32 , 0x145}, +{ 320, 200, 8 , 0x146}, +{ 1600, 1200, 32 , 0x147}, +{ 1152, 864, 8 , 0x148}, { 1152, 864, 15 , 0x149}, { 1152, 864, 16 , 0x14a}, { 1152, 864, 24 , 0x14b}, { 1152, 864, 32 , 0x14c}, +{ 1280, 800, 16 , 0x178}, +{ 1280, 800, 24 , 0x179}, +{ 1280, 800, 32 , 0x17a}, +{ 1280, 960, 16 , 0x17b}, +{ 1280, 960, 24 , 0x17c}, +{ 1280, 960, 32 , 0x17d}, +{ 1440, 900, 16 , 0x17e}, +{ 1440, 900, 24 , 0x17f}, +{ 1440, 900, 32 , 0x180}, +{ 1400, 1050, 16 , 0x181}, +{ 1400, 1050, 24 , 0x182}, +{ 1400, 1050, 32 , 0x183}, +{ 1680, 1050, 16 , 0x184}, +{ 1680, 1050, 24 , 0x185}, +{ 1680, 1050, 32 , 0x186}, +{ 1920, 1200, 16 , 0x187}, +{ 1920, 1200, 24 , 0x188}, +{ 1920, 1200, 32 , 0x189}, +{ 2560, 1600, 16 , 0x18a}, +{ 2560, 1600, 24 , 0x18b}, +{ 2560, 1600, 32 , 0x18c}, { 0, }, }; diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vgabios.c --- a/tools/firmware/vgabios/vgabios.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/firmware/vgabios/vgabios.c Wed Jan 28 13:06:45 2009 +0900 @@ -3811,9 +3811,9 @@ void printf(s) for (i=0; i<format_width; i++) { nibble = (arg >> (4 * digit)) & 0x000f; if (nibble <= 9) - outb(0x0500, nibble + '0'); + outb(0xe9, nibble + '0'); else - outb(0x0500, (nibble - 10) + 'A'); + outb(0xe9, (nibble - 10) + 'A'); digit--; } in_format = 0; @@ -3823,7 +3823,7 @@ void printf(s) // } } else { - outb(0x0500, c); + outb(0xe9, c); } s ++; } diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/flask/libflask/Makefile --- a/tools/flask/libflask/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/flask/libflask/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -16,7 +16,6 @@ CFLAGS += $(INCLUDES) -I./include -I$( # Get gcc to generate the dependencies for us. CFLAGS += -Wp,-MD,.$(@F).d LDFLAGS += -L. -DEPS = .*.d LIB_OBJS := $(patsubst %.c,%.o,$(SRCS)) PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS)) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/flask/loadpolicy/Makefile --- a/tools/flask/loadpolicy/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/flask/loadpolicy/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -7,9 +7,6 @@ LIBFLASK_ROOT = $(XEN_ROOT)/tools/flask/ PROFILE=#-pg BASECFLAGS=-Wall -g -Werror -# Make gcc generate dependencies. -BASECFLAGS += -Wp,-MD,.$(@F).d -PROG_DEP = .*.d BASECFLAGS+= $(PROFILE) #BASECFLAGS+= -I$(XEN_ROOT)/tools BASECFLAGS+= $(CFLAGS_libxenctrl) @@ -39,7 +36,7 @@ clean: clean: rm -f *.o *.opic *.so rm -f $(CLIENTS) - $(RM) $(PROG_DEP) + $(RM) $(DEPS) .PHONY: print-dir print-dir: @@ -54,7 +51,7 @@ install: all $(INSTALL_DIR) $(DESTDIR)$(SBINDIR) $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)$(SBINDIR) --include $(PROG_DEP) +-include $(DEPS) # never delete any intermediate files. .SECONDARY: diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/fs-back/Makefile --- a/tools/fs-back/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/fs-back/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -12,10 +12,6 @@ CFLAGS += $(CFLAGS_libxenstore) CFLAGS += $(CFLAGS_libxenstore) CFLAGS += $(INCLUDES) -I. CFLAGS += -D_GNU_SOURCE - -# Get gcc to generate the dependencies for us. -CFLAGS += -Wp,-MD,.$(@F).d -DEPS = .*.d LIBS := -L. -L.. -L../lib LIBS += $(LDFLAGS_libxenctrl) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/include/Makefile --- a/tools/include/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/include/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -10,11 +10,12 @@ xen-foreign: xen/.dir: @rm -rf xen - mkdir xen + mkdir -p xen/libelf ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen ln -sf ../xen-sys/$(XEN_OS) xen/sys + ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/xen/,libelf.h elfstructs.h) xen/libelf/ ln -s ../xen-foreign xen/foreign touch $@ diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/include/xen-foreign/reference.size --- a/tools/include/xen-foreign/reference.size Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/include/xen-foreign/reference.size Wed Jan 28 13:06:45 2009 +0900 @@ -1,7 +1,7 @@ structs | x86_32 x86_64 ia64 -start_info | 1104 1152 1152 +start_info | 1112 1168 1168 trap_info | 8 16 - pt_fpreg | - - 16 cpu_user_regs | 68 200 - diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libaio/src/Makefile --- a/tools/libaio/src/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libaio/src/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -6,7 +6,7 @@ libdir=$(prefix)/lib libdir=$(prefix)/lib ARCH := $(shell uname -m | sed -e s/i.86/i386/) -CFLAGS := -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC +CFLAGS = -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC SO_CFLAGS=-shared $(CFLAGS) L_CFLAGS=$(CFLAGS) LINK_FLAGS= diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libfsimage/Rules.mk --- a/tools/libfsimage/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libfsimage/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -1,8 +1,6 @@ include $(XEN_ROOT)/tools/Rules.mk include $(XEN_ROOT)/tools/Rules.mk -DEPS = .*.d - -CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror -Wp,-MD,.$(@F).d +CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror LDFLAGS += -L../common/ PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y)) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libfsimage/common/Makefile --- a/tools/libfsimage/common/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libfsimage/common/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -3,9 +3,6 @@ include $(XEN_ROOT)/tools/Rules.mk MAJOR = 1.0 MINOR = 0 - -CFLAGS += -Werror -Wp,-MD,.$(@F).d -DEPS = .*.d LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/Makefile --- a/tools/libxc/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -1,7 +1,7 @@ XEN_ROOT = ../.. XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk -MAJOR = 3.2 +MAJOR = 3.4 MINOR = 0 CTRL_SRCS-y := @@ -62,10 +62,7 @@ CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE # libraries. #CFLAGS += -DVALGRIND -O0 -ggdb3 -# Get gcc to generate the dependencies for us. -CFLAGS += -Wp,-MD,.$(@F).d LDFLAGS += -L. -DEPS = .*.d CTRL_LIB_OBJS := $(patsubst %.c,%.o,$(CTRL_SRCS-y)) CTRL_PIC_OBJS := $(patsubst %.c,%.opic,$(CTRL_SRCS-y)) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_core.c Wed Jan 28 13:06:45 2009 +0900 @@ -57,9 +57,6 @@ /* number of pages to write at a time */ #define DUMP_INCREMENT (4 * 1024) - -/* Don't yet support cross-address-size core dump */ -#define guest_width (sizeof (unsigned long)) /* string table */ struct xc_core_strtab { @@ -240,7 +237,7 @@ xc_core_ehdr_init(Elf64_Ehdr *ehdr) ehdr->e_ident[EI_ABIVERSION] = EV_CURRENT; ehdr->e_type = ET_CORE; - ehdr->e_machine = ELF_ARCH_MACHINE; + /* e_machine will be filled in later */ ehdr->e_version = EV_CURRENT; ehdr->e_entry = 0; ehdr->e_phoff = 0; @@ -359,7 +356,8 @@ elfnote_dump_core_header( } static int -elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle) +elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle, + unsigned int guest_width) { int sts; struct elfnote elfnote; @@ -371,6 +369,12 @@ elfnote_dump_xen_version(void *args, dum elfnote.descsz = sizeof(xen_version); elfnote.type = XEN_ELFNOTE_DUMPCORE_XEN_VERSION; elfnote_fill_xen_version(xc_handle, &xen_version); + if (guest_width < sizeof(unsigned long)) + { + // 32 bit elf file format differs in pagesize's alignment + char *p = (char *)&xen_version.pagesize; + memmove(p - 4, p, sizeof(xen_version.pagesize)); + } sts = dump_rtn(args, (char*)&elfnote, sizeof(elfnote)); if ( sts != 0 ) return sts; @@ -396,6 +400,24 @@ elfnote_dump_format_version(void *args, return dump_rtn(args, (char*)&format_version, sizeof(format_version)); } +static int +get_guest_width(int xc_handle, + uint32_t domid, + unsigned int *guest_width) +{ + DECLARE_DOMCTL; + + memset(&domctl, 0, sizeof(domctl)); + domctl.domain = domid; + domctl.cmd = XEN_DOMCTL_get_address_size; + + if ( do_domctl(xc_handle, &domctl) != 0 ) + return 1; + + *guest_width = domctl.u.address_size.size / 8; + return 0; +} + int xc_domain_dumpcore_via_callback(int xc_handle, uint32_t domid, @@ -403,7 +425,8 @@ xc_domain_dumpcore_via_callback(int xc_h dumpcore_rtn_t dump_rtn) { xc_dominfo_t info; - shared_info_t *live_shinfo = NULL; + shared_info_any_t *live_shinfo = NULL; + unsigned int guest_width; int nr_vcpus = 0; char *dump_mem, *dump_mem_start = NULL; @@ -437,6 +460,12 @@ xc_domain_dumpcore_via_callback(int xc_h uint16_t strtab_idx; struct xc_core_section_headers *sheaders = NULL; Elf64_Shdr *shdr; + + if ( get_guest_width(xc_handle, domid, &guest_width) != 0 ) + { + PERROR("Could not get address size for domain"); + return sts; + } xc_core_arch_context_init(&arch_ctxt); if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL ) @@ -500,7 +529,7 @@ xc_domain_dumpcore_via_callback(int xc_h goto out; } - sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo, + sts = xc_core_arch_map_p2m(xc_handle, guest_width, &info, live_shinfo, &p2m, &p2m_size); if ( sts != 0 ) goto out; @@ -676,6 +705,7 @@ xc_domain_dumpcore_via_callback(int xc_h /* write out elf header */ ehdr.e_shnum = sheaders->num; ehdr.e_shstrndx = strtab_idx; + ehdr.e_machine = ELF_ARCH_MACHINE; sts = dump_rtn(args, (char*)&ehdr, sizeof(ehdr)); if ( sts != 0 ) goto out; @@ -697,7 +727,7 @@ xc_domain_dumpcore_via_callback(int xc_h goto out; /* elf note section: xen version */ - sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle); + sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle, guest_width); if ( sts != 0 ) goto out; @@ -757,9 +787,21 @@ xc_domain_dumpcore_via_callback(int xc_h if ( !auto_translated_physmap ) { - gmfn = p2m[i]; - if ( gmfn == INVALID_P2M_ENTRY ) - continue; + if ( guest_width >= sizeof(unsigned long) ) + { + if ( guest_width == sizeof(unsigned long) ) + gmfn = p2m[i]; + else + gmfn = ((uint64_t *)p2m)[i]; + if ( gmfn == INVALID_P2M_ENTRY ) + continue; + } + else + { + gmfn = ((uint32_t *)p2m)[i]; + if ( gmfn == (uint32_t)INVALID_P2M_ENTRY ) + continue; + } p2m_array[j].pfn = i; p2m_array[j].gmfn = gmfn; @@ -802,7 +844,7 @@ copy_done: /* When live dump-mode (-L option) is specified, * guest domain may reduce memory. pad with zero pages. */ - IPRINTF("j (%ld) != nr_pages (%ld)", j , nr_pages); + IPRINTF("j (%ld) != nr_pages (%ld)", j, nr_pages); memset(dump_mem_start, 0, PAGE_SIZE); for (; j < nr_pages; j++) { sts = dump_rtn(args, dump_mem_start, PAGE_SIZE); @@ -891,7 +933,7 @@ xc_domain_dumpcore(int xc_handle, struct dump_args da; int sts; - if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 ) + if ( (da.fd = open(corename, O_CREAT|O_RDWR|O_TRUNC, S_IWUSR|S_IRUSR)) < 0 ) { PERROR("Could not open corefile %s", corename); return -errno; diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core.h --- a/tools/libxc/xc_core.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_core.h Wed Jan 28 13:06:45 2009 +0900 @@ -23,7 +23,7 @@ #include "xen/version.h" #include "xg_private.h" -#include "xen/elfstructs.h" +#include "xen/libelf/elfstructs.h" /* section names */ #define XEN_DUMPCORE_SEC_NOTE ".note.Xen" @@ -136,12 +136,12 @@ struct xc_core_arch_context; struct xc_core_arch_context; int xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *arch_ctxt, - xc_dominfo_t *info, shared_info_t *live_shinfo, + xc_dominfo_t *info, shared_info_any_t *live_shinfo, xc_core_memory_map_t **mapp, unsigned int *nr_entries); -int xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info, - shared_info_t *live_shinfo, xen_pfn_t **live_p2m, - unsigned long *pfnp); +int xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, + xc_dominfo_t *info, shared_info_any_t *live_shinfo, + xen_pfn_t **live_p2m, unsigned long *pfnp); #if defined (__i386__) || defined (__x86_64__) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_ia64.c --- a/tools/libxc/xc_core_ia64.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_core_ia64.c Wed Jan 28 13:06:45 2009 +0900 @@ -68,7 +68,7 @@ xc_core_arch_auto_translated_physmap(con /* see setup_guest() @ xc_linux_build.c */ static int memory_map_get_old_domu(int xc_handle, xc_dominfo_t *info, - shared_info_t *live_shinfo, + shared_info_any_t *live_shinfo, xc_core_memory_map_t **mapp, unsigned int *nr_entries) { xc_core_memory_map_t *map = NULL; @@ -96,7 +96,7 @@ out: /* see setup_guest() @ xc_ia64_hvm_build.c */ static int memory_map_get_old_hvm(int xc_handle, xc_dominfo_t *info, - shared_info_t *live_shinfo, + shared_info_any_t *live_shinfo, xc_core_memory_map_t **mapp, unsigned int *nr_entries) { const xc_core_memory_map_t gfw_map[] = { @@ -155,7 +155,7 @@ out: static int memory_map_get_old(int xc_handle, xc_dominfo_t *info, - shared_info_t *live_shinfo, + shared_info_any_t *live_shinfo, xc_core_memory_map_t **mapp, unsigned int *nr_entries) { if ( info->hvm ) @@ -170,7 +170,8 @@ int int xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *arch_ctxt, - xc_dominfo_t *info, shared_info_t *live_shinfo, + xc_dominfo_t *info, + shared_info_any_t *live_shinfo, xc_core_memory_map_t **mapp, unsigned int *nr_entries) { @@ -190,8 +191,8 @@ xc_core_arch_memory_map_get(int xc_handl } /* copy before use in case someone updating them */ - if (xc_ia64_copy_memmap(xc_handle, info->domid, live_shinfo, &memmap_info, - NULL)) { + if (xc_ia64_copy_memmap(xc_handle, info->domid, &live_shinfo->s, + &memmap_info, NULL)) { goto old; } @@ -235,8 +236,8 @@ old: } int -xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info, - shared_info_t *live_shinfo, xen_pfn_t **live_p2m, +xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info, + shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m, unsigned long *pfnp) { /* diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_x86.c --- a/tools/libxc/xc_core_x86.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_core_x86.c Wed Jan 28 13:06:45 2009 +0900 @@ -20,9 +20,25 @@ #include "xg_private.h" #include "xc_core.h" - -/* Don't yet support cross-address-size core dump */ -#define guest_width (sizeof (unsigned long)) +#include "xc_e820.h" + +#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f)) + +#ifndef MAX +#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b)) +#endif + +int +xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt, + unsigned long pfn) +{ + if ((pfn >= 0xa0 && pfn < 0xc0) /* VGA hole */ + || (pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) + && pfn < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ + return 0; + return 1; +} + static int nr_gpfns(int xc_handle, domid_t domid) { @@ -37,7 +53,7 @@ xc_core_arch_auto_translated_physmap(con int xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *unused, - xc_dominfo_t *info, shared_info_t *live_shinfo, + xc_dominfo_t *info, shared_info_any_t *live_shinfo, xc_core_memory_map_t **mapp, unsigned int *nr_entries) { @@ -60,17 +76,22 @@ xc_core_arch_memory_map_get(int xc_handl } int -xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info, - shared_info_t *live_shinfo, xen_pfn_t **live_p2m, +xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info, + shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m, unsigned long *pfnp) { /* Double and single indirect references to the live P2M table */ xen_pfn_t *live_p2m_frame_list_list = NULL; xen_pfn_t *live_p2m_frame_list = NULL; + /* Copies of the above. */ + xen_pfn_t *p2m_frame_list_list = NULL; + xen_pfn_t *p2m_frame_list = NULL; + uint32_t dom = info->domid; unsigned long p2m_size = nr_gpfns(xc_handle, info->domid); int ret = -1; int err; + int i; if ( p2m_size < info->nr_pages ) { @@ -80,17 +101,36 @@ xc_core_arch_map_p2m(int xc_handle, xc_d live_p2m_frame_list_list = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, - live_shinfo->arch.pfn_to_mfn_frame_list_list); + GET_FIELD(live_shinfo, arch.pfn_to_mfn_frame_list_list)); if ( !live_p2m_frame_list_list ) { PERROR("Couldn't map p2m_frame_list_list (errno %d)", errno); goto out; } + + /* Get a local copy of the live_P2M_frame_list_list */ + if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) ) + { + ERROR("Couldn't allocate p2m_frame_list_list array"); + goto out; + } + memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE); + + /* Canonicalize guest's unsigned long vs ours */ + if ( guest_width > sizeof(unsigned long) ) + for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ ) + if ( i < PAGE_SIZE/guest_width ) + p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i]; + else + p2m_frame_list_list[i] = 0; + else if ( guest_width < sizeof(unsigned long) ) + for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- ) + p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i]; live_p2m_frame_list = xc_map_foreign_pages(xc_handle, dom, PROT_READ, - live_p2m_frame_list_list, + p2m_frame_list_list, P2M_FLL_ENTRIES); if ( !live_p2m_frame_list ) @@ -99,8 +139,25 @@ xc_core_arch_map_p2m(int xc_handle, xc_d goto out; } + /* Get a local copy of the live_P2M_frame_list */ + if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) ) + { + ERROR("Couldn't allocate p2m_frame_list array"); + goto out; + } + memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE); + memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE); + + /* Canonicalize guest's unsigned long vs ours */ + if ( guest_width > sizeof(unsigned long) ) + for ( i = 0; i < P2M_FL_ENTRIES; i++ ) + p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i]; + else if ( guest_width < sizeof(unsigned long) ) + for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- ) + p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i]; + *live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_READ, - live_p2m_frame_list, + p2m_frame_list, P2M_FL_ENTRIES); if ( !*live_p2m ) @@ -121,6 +178,12 @@ out: if ( live_p2m_frame_list ) munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); + + if ( p2m_frame_list_list ) + free(p2m_frame_list_list); + + if ( p2m_frame_list ) + free(p2m_frame_list); errno = err; return ret; diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_x86.h --- a/tools/libxc/xc_core_x86.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_core_x86.h Wed Jan 28 13:06:45 2009 +0900 @@ -21,15 +21,8 @@ #ifndef XC_CORE_X86_H #define XC_CORE_X86_H -#if defined(__i386__) || defined(__x86_64__) #define ELF_ARCH_DATA ELFDATA2LSB -#if defined (__i386__) -# define ELF_ARCH_MACHINE EM_386 -#else -# define ELF_ARCH_MACHINE EM_X86_64 -#endif -#endif /* __i386__ or __x86_64__ */ - +#define ELF_ARCH_MACHINE (guest_width == 8 ? EM_X86_64 : EM_386) struct xc_core_arch_context { /* nothing */ @@ -40,8 +33,10 @@ struct xc_core_arch_context { #define xc_core_arch_context_get(arch_ctxt, ctxt, xc_handle, domid) \ (0) #define xc_core_arch_context_dump(arch_ctxt, args, dump_rtn) (0) -#define xc_core_arch_gpfn_may_present(arch_ctxt, i) (1) +int +xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt, + unsigned long pfn); static inline int xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, struct xc_core_section_headers *sheaders, diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_dom.h --- a/tools/libxc/xc_dom.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_dom.h Wed Jan 28 13:06:45 2009 +0900 @@ -1,4 +1,4 @@ -#include <xen/libelf.h> +#include <xen/libelf/libelf.h> #define INVALID_P2M_ENTRY ((xen_pfn_t)-1) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_domain.c Wed Jan 28 13:06:45 2009 +0900 @@ -531,33 +531,6 @@ int xc_domain_memory_populate_physmap(in DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n", domid, nr_extents, extent_order); errno = EBUSY; - err = -1; - } - - return err; -} - -int xc_domain_memory_translate_gpfn_list(int xc_handle, - uint32_t domid, - unsigned long nr_gpfns, - xen_pfn_t *gpfn_list, - xen_pfn_t *mfn_list) -{ - int err; - struct xen_translate_gpfn_list translate_gpfn_list = { - .domid = domid, - .nr_gpfns = nr_gpfns, - }; - set_xen_guest_handle(translate_gpfn_list.gpfn_list, gpfn_list); - set_xen_guest_handle(translate_gpfn_list.mfn_list, mfn_list); - - err = xc_memory_op(xc_handle, XENMEM_translate_gpfn_list, &translate_gpfn_list); - - if ( err != 0 ) - { - DPRINTF("Failed translation for dom %d (%ld PFNs)\n", - domid, nr_gpfns); - errno = -err; err = -1; } @@ -958,7 +931,8 @@ int xc_domain_bind_pt_irq( bind->hvm_domid = domid; bind->irq_type = irq_type; bind->machine_irq = machine_irq; - if ( irq_type == PT_IRQ_TYPE_PCI ) + if ( irq_type == PT_IRQ_TYPE_PCI || + irq_type == PT_IRQ_TYPE_MSI_TRANSLATE ) { bind->u.pci.bus = bus; bind->u.pci.device = device; diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_elf.h --- a/tools/libxc/xc_elf.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_elf.h Wed Jan 28 13:06:45 2009 +0900 @@ -1,1 +1,1 @@ -#include <xen/elfstructs.h> +#include <xen/libelf/elfstructs.h> diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_hvm_build.c Wed Jan 28 13:06:45 2009 +0900 @@ -15,100 +15,55 @@ #include <xen/foreign/x86_64.h> #include <xen/hvm/hvm_info_table.h> #include <xen/hvm/params.h> -#include "xc_e820.h" - -#include <xen/libelf.h> +#include <xen/hvm/e820.h> + +#include <xen/libelf/libelf.h> #define SUPERPAGE_PFN_SHIFT 9 #define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT) -#define SCRATCH_PFN 0xFFFFF - -#define SPECIALPAGE_GUARD 0 -#define SPECIALPAGE_BUFIOREQ 1 -#define SPECIALPAGE_XENSTORE 2 -#define SPECIALPAGE_IOREQ 3 -#define SPECIALPAGE_IDENT_PT 4 +#define SPECIALPAGE_BUFIOREQ 0 +#define SPECIALPAGE_XENSTORE 1 +#define SPECIALPAGE_IOREQ 2 +#define SPECIALPAGE_IDENT_PT 3 +#define SPECIALPAGE_SHINFO 4 #define NR_SPECIAL_PAGES 5 - -static void build_e820map(void *e820_page, unsigned long long mem_size) -{ - struct e820entry *e820entry = - (struct e820entry *)(((unsigned char *)e820_page) + HVM_E820_OFFSET); - unsigned long long extra_mem_size = 0; - unsigned char nr_map = 0; - - /* - * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved - * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END - * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above. - */ - if ( mem_size > HVM_BELOW_4G_RAM_END ) - { - extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END; - mem_size = HVM_BELOW_4G_RAM_END; - } - - /* 0x0-0x9FC00: Ordinary RAM. */ - e820entry[nr_map].addr = 0x0; - e820entry[nr_map].size = 0x9FC00; - e820entry[nr_map].type = E820_RAM; - nr_map++; - - /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */ - e820entry[nr_map].addr = 0x9FC00; - e820entry[nr_map].size = 0x400; - e820entry[nr_map].type = E820_RESERVED; - nr_map++; - - /* - * Following regions are standard regions of the PC memory map. - * They are not covered by e820 regions. OSes will not use as RAM. - * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820. - * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios). - * TODO: hvmloader should free pages which turn out to be unused. - */ - - /* - * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here. - * We *cannot* mark as E820_ACPI, for two reasons: - * 1. ACPI spec. says that E820_ACPI regions below - * 16MB must clip INT15h 0x88 and 0xe801 queries. - * Our rombios doesn't do this. - * 2. The OS is allowed to reclaim ACPI memory after - * parsing the tables. But our FACS is in this - * region and it must not be reclaimed (it contains - * the ACPI global lock!). - * 0xF0000-0x100000: System BIOS. - * TODO: hvmloader should free pages which turn out to be unused. - */ - e820entry[nr_map].addr = 0xE0000; - e820entry[nr_map].size = 0x20000; - e820entry[nr_map].type = E820_RESERVED; - nr_map++; - - /* Low RAM goes here. Reserve space for special pages. */ - e820entry[nr_map].addr = 0x100000; - e820entry[nr_map].size = (mem_size - 0x100000 - - PAGE_SIZE * NR_SPECIAL_PAGES); - e820entry[nr_map].type = E820_RAM; - nr_map++; - - /* Explicitly reserve space for special pages (excluding guard page). */ - e820entry[nr_map].addr = mem_size - PAGE_SIZE * (NR_SPECIAL_PAGES - 1); - e820entry[nr_map].size = PAGE_SIZE * (NR_SPECIAL_PAGES - 1); - e820entry[nr_map].type = E820_RESERVED; - nr_map++; - - if ( extra_mem_size ) - { - e820entry[nr_map].addr = (1ULL << 32); - e820entry[nr_map].size = extra_mem_size; - e820entry[nr_map].type = E820_RAM; - nr_map++; - } - - *(((unsigned char *)e820_page) + HVM_E820_NR_OFFSET) = nr_map; +#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x)) + +static void build_hvm_info(void *hvm_info_page, uint64_t mem_size) +{ + struct hvm_info_table *hvm_info = (struct hvm_info_table *) + (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET); + uint64_t lowmem_end = mem_size, highmem_end = 0; + uint8_t sum; + int i; + + if ( lowmem_end > HVM_BELOW_4G_RAM_END ) + { + highmem_end = lowmem_end + (1ull<<32) - HVM_BELOW_4G_RAM_END; + lowmem_end = HVM_BELOW_4G_RAM_END; + } + + memset(hvm_info_page, 0, PAGE_SIZE); + + /* Fill in the header. */ + strncpy(hvm_info->signature, "HVM INFO", 8); + hvm_info->length = sizeof(struct hvm_info_table); + + /* Sensible defaults: these can be overridden by the caller. */ + hvm_info->acpi_enabled = 1; + hvm_info->apic_mode = 1; + hvm_info->nr_vcpus = 1; + + /* Memory parameters. */ + hvm_info->low_mem_pgend = lowmem_end >> PAGE_SHIFT; + hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT; + hvm_info->reserved_mem_pgstart = special_pfn(0); + + /* Finish with the checksum. */ + for ( i = 0, sum = 0; i < hvm_info->length; i++ ) + sum += ((uint8_t *)hvm_info)[i]; + hvm_info->checksum = -sum; } static int loadelfimage( @@ -153,10 +108,10 @@ static int setup_guest(int xc_handle, unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT); unsigned long target_pages = (unsigned long)target << (20 - PAGE_SHIFT); unsigned long pod_pages = 0; - unsigned long special_page_nr, entry_eip, cur_pages; + unsigned long entry_eip, cur_pages; struct xen_add_to_physmap xatp; struct shared_info *shared_info; - void *e820_page; + void *hvm_info_page; uint32_t *ident_pt; struct elf_binary elf; uint64_t v_start, v_end; @@ -289,23 +244,22 @@ static int setup_guest(int xc_handle, if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 ) goto error_out; - if ( (e820_page = xc_map_foreign_range( + if ( (hvm_info_page = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - HVM_E820_PAGE >> PAGE_SHIFT)) == NULL ) - goto error_out; - memset(e820_page, 0, PAGE_SIZE); - build_e820map(e820_page, v_end); - munmap(e820_page, PAGE_SIZE); + HVM_INFO_PFN)) == NULL ) + goto error_out; + build_hvm_info(hvm_info_page, v_end); + munmap(hvm_info_page, PAGE_SIZE); /* Map and initialise shared_info page. */ xatp.domid = dom; xatp.space = XENMAPSPACE_shared_info; xatp.idx = 0; - xatp.gpfn = SCRATCH_PFN; + xatp.gpfn = special_pfn(SPECIALPAGE_SHINFO); if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) || ((shared_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - SCRATCH_PFN)) == NULL) ) + special_pfn(SPECIALPAGE_SHINFO))) == NULL) ) goto error_out; memset(shared_info, 0, PAGE_SIZE); /* NB. evtchn_upcall_mask is unused: leave as zero. */ @@ -313,31 +267,28 @@ static int setup_guest(int xc_handle, sizeof(shared_info->evtchn_mask)); munmap(shared_info, PAGE_SIZE); - special_page_nr = (((v_end > HVM_BELOW_4G_RAM_END) - ? (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - : (v_end >> PAGE_SHIFT)) - - NR_SPECIAL_PAGES); - - /* Paranoia: clean special pages. */ + /* Allocate and clear special pages. */ for ( i = 0; i < NR_SPECIAL_PAGES; i++ ) - if ( xc_clear_domain_page(xc_handle, dom, special_page_nr + i) ) + { + xen_pfn_t pfn = special_pfn(i); + if ( i == SPECIALPAGE_SHINFO ) + continue; + rc = xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &pfn); + if ( rc != 0 ) + { + PERROR("Could not allocate %d'th special page.\n", i); goto error_out; - - /* Free the guard page that separates low RAM from special pages. */ - rc = xc_domain_memory_decrease_reservation( - xc_handle, dom, 1, 0, &page_array[special_page_nr]); - if ( rc != 0 ) - { - PERROR("Could not deallocate guard page for HVM guest.\n"); - goto error_out; + } + if ( xc_clear_domain_page(xc_handle, dom, special_pfn(i)) ) + goto error_out; } xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, - special_page_nr + SPECIALPAGE_XENSTORE); + special_pfn(SPECIALPAGE_XENSTORE)); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, - special_page_nr + SPECIALPAGE_BUFIOREQ); + special_pfn(SPECIALPAGE_BUFIOREQ)); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, - special_page_nr + SPECIALPAGE_IOREQ); + special_pfn(SPECIALPAGE_IOREQ)); /* * Identity-map page table is required for running with CR0.PG=0 when @@ -345,14 +296,14 @@ static int setup_guest(int xc_handle, */ if ( (ident_pt = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - special_page_nr + SPECIALPAGE_IDENT_PT)) == NULL ) + special_pfn(SPECIALPAGE_IDENT_PT))) == NULL ) goto error_out; for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ ) ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); munmap(ident_pt, PAGE_SIZE); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, - (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT); + special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT); /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */ entry_eip = elf_uval(&elf, elf.ehdr, e_entry); diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_private.c Wed Jan 28 13:06:45 2009 +0900 @@ -307,13 +307,6 @@ int xc_memory_op(int xc_handle, goto out1; } break; - case XENMEM_remove_from_physmap: - if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) ) - { - PERROR("Could not lock"); - goto out1; - } - break; case XENMEM_current_reservation: case XENMEM_maximum_reservation: case XENMEM_maximum_gpfn: @@ -354,9 +347,6 @@ int xc_memory_op(int xc_handle, break; case XENMEM_add_to_physmap: unlock_pages(arg, sizeof(struct xen_add_to_physmap)); - break; - case XENMEM_remove_from_physmap: - unlock_pages(arg, sizeof(struct xen_remove_from_physmap)); break; case XENMEM_current_reservation: case XENMEM_maximum_reservation: diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_ptrace_core.c --- a/tools/libxc/xc_ptrace_core.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xc_ptrace_core.c Wed Jan 28 13:06:45 2009 +0900 @@ -540,7 +540,9 @@ xc_waitdomain_core_elf( XEN_ELFNOTE_DUMPCORE_XEN_VERSION, (void**)&xen_version) < 0) goto out; - if (xen_version->xen_version.pagesize != PAGE_SIZE) + /* shifted case covers 32 bit FV guest core file created on 64 bit Dom0 */ + if (xen_version->xen_version.pagesize != PAGE_SIZE && + (xen_version->xen_version.pagesize >> 32) != PAGE_SIZE) goto out; /* .note.Xen: format_version */ diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/libxc/xenctrl.h Wed Jan 28 13:06:45 2009 +0900 @@ -628,12 +628,6 @@ int xc_domain_memory_populate_physmap(in unsigned int mem_flags, xen_pfn_t *extent_start); -int xc_domain_memory_translate_gpfn_list(int xc_handle, - uint32_t domid, - unsigned long nr_gpfns, - xen_pfn_t *gpfn_list, - xen_pfn_t *mfn_list); - int xc_domain_memory_set_pod_target(int xc_handle, uint32_t domid, uint64_t target_pages, diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/misc/Makefile --- a/tools/misc/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/misc/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -47,7 +47,7 @@ install: build .PHONY: clean clean: - $(RM) *.o $(TARGETS) *~ + $(RM) *.o $(TARGETS) *~ $(DEPS) set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d clean; done %.o: %.c $(HDRS) Makefile @@ -55,3 +55,5 @@ clean: xenperf xenpm: %: %.o Makefile $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/misc/xenpm.c --- a/tools/misc/xenpm.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/misc/xenpm.c Wed Jan 28 13:06:45 2009 +0900 @@ -21,83 +21,56 @@ #include <stdio.h> #include <stdlib.h> +#include <unistd.h> #include <string.h> #include <getopt.h> #include <errno.h> +#include <signal.h> #include <xenctrl.h> #include <inttypes.h> +#include <sys/time.h> #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) + +static int xc_fd; +static int max_cpu_nr; /* help message */ void show_help(void) { fprintf(stderr, - "Usage:\n" - " xenpm get-cpuidle-states [cpuid]: list cpu idle information on CPU cpuid or all CPUs.\n" - " xenpm get-cpufreq-states [cpuid]: list cpu frequency information on CPU cpuid or all CPUs.\n" - " xenpm get-cpufreq-para [cpuid]: list cpu frequency information on CPU cpuid or all CPUs.\n" - " xenpm set-scaling-maxfreq <cpuid> <HZ>: set max cpu frequency <HZ> on CPU <cpuid>.\n" - " xenpm set-scaling-minfreq <cpuid> <HZ>: set min cpu frequency <HZ> on CPU <cpuid>.\n" - " xenpm set-scaling-governor <cpuid> <name>: set scaling governor on CPU <cpuid>.\n" - " xenpm set-scaling-speed <cpuid> <num>: set scaling speed on CPU <cpuid>.\n" - " xenpm set-sampling-rate <cpuid> <num>: set sampling rate on CPU <cpuid>.\n" - " xenpm set-up-threshold <cpuid> <num>: set up threshold on CPU <cpuid>.\n"); -} - + "xen power management control tool\n\n" + "usage: xenpm <command> [args]\n\n" + "xenpm command list:\n\n" + " get-cpuidle-states [cpuid] list cpu idle info of CPU <cpuid> or all\n" + " get-cpufreq-states [cpuid] list cpu freq info of CPU <cpuid> or all\n" + " get-cpufreq-para [cpuid] list cpu freq parameter of CPU <cpuid> or all\n" + " set-scaling-maxfreq [cpuid] <HZ> set max cpu frequency <HZ> on CPU <cpuid>\n" + " or all CPUs\n" + " set-scaling-minfreq [cpuid] <HZ> set min cpu frequency <HZ> on CPU <cpuid>\n" + " or all CPUs\n" + " set-scaling-speed [cpuid] <num> set scaling speed on CPU <cpuid> or all\n" + " it is used in userspace governor.\n" + " set-scaling-governor [cpuid] <gov> set scaling governor on CPU <cpuid> or all\n" + " as userspace/performance/powersave/ondemand\n" + " set-sampling-rate [cpuid] <num> set sampling rate on CPU <cpuid> or all\n" + " it is used in ondemand governor.\n" + " set-up-threshold [cpuid] <num> set up threshold on CPU <cpuid> or all\n" + " it is used in ondemand governor.\n" + " start start collect Cx/Px statistics,\n" + " output after CTRL-C or SIGINT.\n" + ); +} /* wrapper function */ -int help_func(int xc_fd, int cpuid, uint32_t value) +void help_func(int argc, char *argv[]) { show_help(); - return 0; -} - -/* show cpu idle information on CPU cpuid */ -static int show_cx_cpuid(int xc_fd, int cpuid) -{ - int i, ret = 0; - int max_cx_num = 0; - struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo; - - ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num); - if ( ret ) - { - if ( errno == ENODEV ) - { - fprintf(stderr, "Xen cpuidle is not enabled!\n"); - return -ENODEV; - } - else - { - fprintf(stderr, "[CPU%d] failed to get max C-state\n", cpuid); - return -EINVAL; - } - } - - cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t)); - if ( !cxstat->triggers ) - { - fprintf(stderr, "[CPU%d] failed to malloc for C-states triggers\n", cpuid); - return -ENOMEM; - } - cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t)); - if ( !cxstat->residencies ) - { - fprintf(stderr, "[CPU%d] failed to malloc for C-states residencies\n", cpuid); - free(cxstat->triggers); - return -ENOMEM; - } - - ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat); - if( ret ) - { - fprintf(stderr, "[CPU%d] failed to get C-states statistics " - "information\n", cpuid); - free(cxstat->triggers); - free(cxstat->residencies); - return -EINVAL; - } +} + +static void print_cxstat(int cpuid, struct xc_cx_stat *cxstat) +{ + int i; printf("cpu id : %d\n", cpuid); printf("total C-states : %d\n", cxstat->nr); @@ -110,88 +83,87 @@ static int show_cx_cpuid(int xc_fd, int printf(" residency [%020"PRIu64" ms]\n", cxstat->residencies[i]/1000000UL); } - - free(cxstat->triggers); - free(cxstat->residencies); - printf("\n"); +} + +/* show cpu idle information on CPU cpuid */ +static int get_cxstat_by_cpuid(int xc_fd, int cpuid, struct xc_cx_stat *cxstat) +{ + int ret = 0; + int max_cx_num = 0; + + ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num); + if ( ret ) + return errno; + + if ( !cxstat ) + return -EINVAL; + + cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t)); + if ( !cxstat->triggers ) + return -ENOMEM; + cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t)); + if ( !cxstat->residencies ) + { + free(cxstat->triggers); + return -ENOMEM; + } + + ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat); + if( ret ) + { + int temp = errno; + free(cxstat->triggers); + free(cxstat->residencies); + cxstat->triggers = NULL; + cxstat->residencies = NULL; + return temp; + } + return 0; } -int cxstates_func(int xc_fd, int cpuid, uint32_t value) +static int show_cxstat_by_cpuid(int xc_fd, int cpuid) { int ret = 0; - xc_physinfo_t physinfo = { 0 }; - - if ( cpuid < 0 ) - { - /* show cxstates on all cpu */ - ret = xc_physinfo(xc_fd, &physinfo); - if ( ret ) - { - fprintf(stderr, "failed to get the processor information\n"); - } - else - { - int i; - for ( i = 0; i < physinfo.nr_cpus; i++ ) - { - if ( (ret = show_cx_cpuid(xc_fd, i)) == -ENODEV ) - break; - } - } - } - else - ret = show_cx_cpuid(xc_fd, cpuid); - - return ret; -} - -/* show cpu frequency information on CPU cpuid */ -static int show_px_cpuid(int xc_fd, int cpuid) -{ - int i, ret = 0; - int max_px_num = 0; - struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo; - - ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num); + struct xc_cx_stat cxstatinfo; + + ret = get_cxstat_by_cpuid(xc_fd, cpuid, &cxstatinfo); if ( ret ) - { - if ( errno == ENODEV ) - { - printf("Xen cpufreq is not enabled!\n"); - return -ENODEV; - } - else - { - fprintf(stderr, "[CPU%d] failed to get max P-state\n", cpuid); - return -EINVAL; - } - } - - pxstat->trans_pt = malloc(max_px_num * max_px_num * - sizeof(uint64_t)); - if ( !pxstat->trans_pt ) - { - fprintf(stderr, "[CPU%d] failed to malloc for P-states transition table\n", cpuid); - return -ENOMEM; - } - pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val)); - if ( !pxstat->pt ) - { - fprintf(stderr, "[CPU%d] failed to malloc for P-states table\n", cpuid); - free(pxstat->trans_pt); - return -ENOMEM; - } - - ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat); - if( ret ) - { - fprintf(stderr, "[CPU%d] failed to get P-states statistics information\n", cpuid); - free(pxstat->trans_pt); - free(pxstat->pt); - return -ENOMEM; - } + return ret; + + print_cxstat(cpuid, &cxstatinfo); + + free(cxstatinfo.triggers); + free(cxstatinfo.residencies); + return 0; +} + +void cxstat_func(int argc, char *argv[]) +{ + int cpuid = -1; + + if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 ) + cpuid = -1; + + if ( cpuid >= max_cpu_nr ) + cpuid = -1; + + if ( cpuid < 0 ) + { + /* show cxstates on all cpus */ + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( show_cxstat_by_cpuid(xc_fd, i) == -ENODEV ) + break; + } + else + show_cxstat_by_cpuid(xc_fd, cpuid); +} + +static void print_pxstat(int cpuid, struct xc_px_stat *pxstat) +{ + int i; printf("cpu id : %d\n", cpuid); printf("total P-states : %d\n", pxstat->total); @@ -211,40 +183,233 @@ static int show_px_cpuid(int xc_fd, int printf(" residency [%020"PRIu64" ms]\n", pxstat->pt[i].residency/1000000UL); } - - free(pxstat->trans_pt); - free(pxstat->pt); - printf("\n"); +} + +/* show cpu frequency information on CPU cpuid */ +static int get_pxstat_by_cpuid(int xc_fd, int cpuid, struct xc_px_stat *pxstat) +{ + int ret = 0; + int max_px_num = 0; + + ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num); + if ( ret ) + return errno; + + if ( !pxstat) + return -EINVAL; + + pxstat->trans_pt = malloc(max_px_num * max_px_num * + sizeof(uint64_t)); + if ( !pxstat->trans_pt ) + return -ENOMEM; + pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val)); + if ( !pxstat->pt ) + { + free(pxstat->trans_pt); + return -ENOMEM; + } + + ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat); + if( ret ) + { + int temp = errno; + free(pxstat->trans_pt); + free(pxstat->pt); + pxstat->trans_pt = NULL; + pxstat->pt = NULL; + return temp; + } + return 0; } -int pxstates_func(int xc_fd, int cpuid, uint32_t value) +static int show_pxstat_by_cpuid(int xc_fd, int cpuid) { int ret = 0; - xc_physinfo_t physinfo = { 0 }; - - if ( cpuid < 0 ) - { - ret = xc_physinfo(xc_fd, &physinfo); - if ( ret ) + struct xc_px_stat pxstatinfo; + + ret = get_pxstat_by_cpuid(xc_fd, cpuid, &pxstatinfo); + if ( ret ) + return ret; + + print_pxstat(cpuid, &pxstatinfo); + + free(pxstatinfo.trans_pt); + free(pxstatinfo.pt); + return 0; +} + +void pxstat_func(int argc, char *argv[]) +{ + int cpuid = -1; + + if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 ) + cpuid = -1; + + if ( cpuid >= max_cpu_nr ) + cpuid = -1; + + if ( cpuid < 0 ) + { + /* show pxstates on all cpus */ + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( show_pxstat_by_cpuid(xc_fd, i) == -ENODEV ) + break; + } + else + show_pxstat_by_cpuid(xc_fd, cpuid); +} + +static uint64_t usec_start, usec_end; +static struct xc_cx_stat *cxstat, *cxstat_start, *cxstat_end; +static struct xc_px_stat *pxstat, *pxstat_start, *pxstat_end; +static uint64_t *sum, *sum_cx, *sum_px; + +static void signal_int_handler(int signo) +{ + int i, j; + struct timeval tv; + int cx_cap = 0, px_cap = 0; + + if ( gettimeofday(&tv, NULL) == -1 ) + { + fprintf(stderr, "failed to get timeofday\n"); + return ; + } + usec_end = tv.tv_sec * 1000000UL + tv.tv_usec; + + if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV ) + { + cx_cap = 1; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( !get_cxstat_by_cpuid(xc_fd, i, &cxstat_end[i]) ) + for ( j = 0; j < cxstat_end[i].nr; j++ ) + sum_cx[i] += cxstat_end[i].residencies[j] - + cxstat_start[i].residencies[j]; + } + + if ( get_pxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV ) + { + px_cap = 1; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( !get_pxstat_by_cpuid(xc_fd, i , &pxstat_end[i]) ) + for ( j = 0; j < pxstat_end[i].total; j++ ) + sum_px[i] += pxstat_end[i].pt[j].residency - + pxstat_start[i].pt[j].residency; + } + + printf("Elapsed time (ms): %"PRIu64"\n", (usec_end - usec_start) / 1000UL); + for ( i = 0; i < max_cpu_nr; i++ ) + { + uint64_t temp; + printf("CPU%d:\n\tresidency\tpercentage\n", i); + if ( cx_cap ) { - fprintf(stderr, "failed to get the processor information\n"); - } - else - { - int i; - for ( i = 0; i < physinfo.nr_cpus; i++ ) + for ( j = 0; j < cxstat_end[i].nr; j++ ) { - if ( (ret = show_px_cpuid(xc_fd, i)) == -ENODEV ) - break; + if ( sum_cx[i] > 0 ) + { + temp = cxstat_end[i].residencies[j] - + cxstat_start[i].residencies[j]; + printf(" C%d\t%"PRIu64" ms\t%.2f%%\n", j, + temp / 1000000UL, 100UL * temp / (double)sum_cx[i]); + } } } - } - else - ret = show_px_cpuid(xc_fd, cpuid); - - return ret; + if ( px_cap ) + { + for ( j = 0; j < pxstat_end[i].total; j++ ) + { + if ( sum_px[i] > 0 ) + { + temp = pxstat_end[i].pt[j].residency - + pxstat_start[i].pt[j].residency; + printf(" P%d\t%"PRIu64" ms\t%.2f%%\n", j, + temp / 1000000UL, 100UL * temp / (double)sum_px[i]); + } + } + } + printf("\n"); + } + + /* some clean up and then exits */ + for ( i = 0; i < 2 * max_cpu_nr; i++ ) + { + free(cxstat[i].triggers); + free(cxstat[i].residencies); + free(pxstat[i].trans_pt); + free(pxstat[i].pt); + } + free(cxstat); + free(pxstat); + free(sum); + xc_interface_close(xc_fd); + exit(0); +} + +void start_gather_func(int argc, char *argv[]) +{ + int i; + struct timeval tv; + + if ( gettimeofday(&tv, NULL) == -1 ) + { + fprintf(stderr, "failed to get timeofday\n"); + return ; + } + usec_start = tv.tv_sec * 1000000UL + tv.tv_usec; + + sum = malloc(sizeof(uint64_t) * 2 * max_cpu_nr); + if ( sum == NULL ) + return ; + cxstat = malloc(sizeof(struct xc_cx_stat) * 2 * max_cpu_nr); + if ( cxstat == NULL ) + { + free(sum); + return ; + } + pxstat = malloc(sizeof(struct xc_px_stat) * 2 * max_cpu_nr); + if ( pxstat == NULL ) + { + free(sum); + free(cxstat); + return ; + } + memset(sum, 0, sizeof(uint64_t) * 2 * max_cpu_nr); + memset(cxstat, 0, sizeof(struct xc_cx_stat) * 2 * max_cpu_nr); + memset(pxstat, 0, sizeof(struct xc_px_stat) * 2 * max_cpu_nr); + sum_cx = sum; + sum_px = sum + max_cpu_nr; + cxstat_start = cxstat; + cxstat_end = cxstat + max_cpu_nr; + pxstat_start = pxstat; + pxstat_end = pxstat + max_cpu_nr; + + if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV && + get_pxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV ) + { + fprintf(stderr, "Xen cpu idle and frequency is disabled!\n"); + return ; + } + + for ( i = 0; i < max_cpu_nr; i++ ) + { + get_cxstat_by_cpuid(xc_fd, i, &cxstat_start[i]); + get_pxstat_by_cpuid(xc_fd, i, &pxstat_start[i]); + } + + if (signal(SIGINT, signal_int_handler) == SIG_ERR) + { + fprintf(stderr, "failed to set signal int handler\n"); + free(sum); + free(pxstat); + free(cxstat); + return ; + } + + pause(); } /* print out parameters about cpu frequency */ @@ -294,7 +459,8 @@ static void print_cpufreq_para(int cpuid printf("scaling_avail_freq :"); for ( i = 0; i < p_cpufreq->freq_num; i++ ) - if ( p_cpufreq->scaling_available_frequencies[i] == p_cpufreq->scaling_cur_freq ) + if ( p_cpufreq->scaling_available_frequencies[i] == + p_cpufreq->scaling_cur_freq ) printf(" *%d", p_cpufreq->scaling_available_frequencies[i]); else printf(" %d", p_cpufreq->scaling_available_frequencies[i]); @@ -308,7 +474,7 @@ static void print_cpufreq_para(int cpuid } /* show cpu frequency parameters information on CPU cpuid */ -static int show_cpufreq_para_cpuid(int xc_fd, int cpuid) +static int show_cpufreq_para_by_cpuid(int xc_fd, int cpuid) { int ret = 0; struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para; @@ -381,159 +547,221 @@ out: return ret; } -int cpufreq_para_func(int xc_fd, int cpuid, uint32_t value) -{ - int ret = 0; - xc_physinfo_t physinfo = { 0 }; - - if ( cpuid < 0 ) - { - ret = xc_physinfo(xc_fd, &physinfo); - if ( ret ) +void cpufreq_para_func(int argc, char *argv[]) +{ + int cpuid = -1; + + if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 ) + cpuid = -1; + + if ( cpuid >= max_cpu_nr ) + cpuid = -1; + + if ( cpuid < 0 ) + { + /* show cpu freqency information on all cpus */ + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( show_cpufreq_para_by_cpuid(xc_fd, i) == -ENODEV ) + break; + } + else + show_cpufreq_para_by_cpuid(xc_fd, cpuid); +} + +void scaling_max_freq_func(int argc, char *argv[]) +{ + int cpuid = -1, freq = -1; + + if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 || + sscanf(argv[0], "%d", &cpuid) != 1)) || + (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) || + argc == 0 ) + { + fprintf(stderr, "failed to set scaling max freq\n"); + return ; + } + + if ( cpuid < 0 ) + { + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MAX_FREQ, freq) ) + fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", i); + } + else + { + if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, freq) ) + fprintf(stderr, "failed to set scaling max freq\n"); + } +} + +void scaling_min_freq_func(int argc, char *argv[]) +{ + int cpuid = -1, freq = -1; + + if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 || + sscanf(argv[0], "%d", &cpuid) != 1) ) || + (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) || + argc == 0 ) + { + fprintf(stderr, "failed to set scaling min freq\n"); + return ; + } + + if ( cpuid < 0 ) + { + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MIN_FREQ, freq) ) + fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", i); + } + else + { + if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, freq) ) + fprintf(stderr, "failed to set scaling min freq\n"); + } +} + +void scaling_speed_func(int argc, char *argv[]) +{ + int cpuid = -1, speed = -1; + + if ( (argc >= 2 && (sscanf(argv[1], "%d", &speed) != 1 || + sscanf(argv[0], "%d", &cpuid) != 1) ) || + (argc == 1 && sscanf(argv[0], "%d", &speed) != 1 ) || + argc == 0 ) + { + fprintf(stderr, "failed to set scaling speed\n"); + return ; + } + + if ( cpuid < 0 ) + { + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( xc_set_cpufreq_para(xc_fd, i, SCALING_SETSPEED, speed) ) + fprintf(stderr, "[CPU%d] failed to set scaling speed\n", i); + } + else + { + if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, speed) ) + fprintf(stderr, "failed to set scaling speed\n"); + } +} + +void scaling_sampling_rate_func(int argc, char *argv[]) +{ + int cpuid = -1, rate = -1; + + if ( (argc >= 2 && (sscanf(argv[1], "%d", &rate) != 1 || + sscanf(argv[0], "%d", &cpuid) != 1) ) || + (argc == 1 && sscanf(argv[0], "%d", &rate) != 1 ) || + argc == 0 ) + { + fprintf(stderr, "failed to set scaling sampling rate\n"); + return ; + } + + if ( cpuid < 0 ) + { + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( xc_set_cpufreq_para(xc_fd, i, SAMPLING_RATE, rate) ) + fprintf(stderr, + "[CPU%d] failed to set scaling sampling rate\n", i); + } + else + { + if ( xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, rate) ) + fprintf(stderr, "failed to set scaling sampling rate\n"); + } +} + +void scaling_up_threshold_func(int argc, char *argv[]) +{ + int cpuid = -1, threshold = -1; + + if ( (argc >= 2 && (sscanf(argv[1], "%d", &threshold) != 1 || + sscanf(argv[0], "%d", &cpuid) != 1) ) || + (argc == 1 && sscanf(argv[0], "%d", &threshold) != 1 ) || + argc == 0 ) + { + fprintf(stderr, "failed to set up scaling threshold\n"); + return ; + } + + if ( cpuid < 0 ) + { + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( xc_set_cpufreq_para(xc_fd, i, UP_THRESHOLD, threshold) ) + fprintf(stderr, + "[CPU%d] failed to set up scaling threshold\n", i); + } + else + { + if ( xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, threshold) ) + fprintf(stderr, "failed to set up scaling threshold\n"); + } +} + +void scaling_governor_func(int argc, char *argv[]) +{ + int cpuid = -1; + char *name = NULL; + + if ( argc >= 2 ) + { + name = strdup(argv[1]); + if ( name == NULL ) + goto out; + if ( sscanf(argv[0], "%d", &cpuid) != 1 ) { - fprintf(stderr, "failed to get the processor information\n"); + free(name); + goto out; } - else - { - int i; - for ( i = 0; i < physinfo.nr_cpus; i++ ) - { - if ( (ret = show_cpufreq_para_cpuid(xc_fd, i)) == -ENODEV ) - break; - } - } - } - else - ret = show_cpufreq_para_cpuid(xc_fd, cpuid); - - return ret; -} - -int scaling_max_freq_func(int xc_fd, int cpuid, uint32_t value) -{ - int ret = 0; - - if ( cpuid < 0 ) - { - show_help(); - return -EINVAL; - } - - ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, value); - if ( ret ) - { - fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", cpuid); - } - - return ret; -} - -int scaling_min_freq_func(int xc_fd, int cpuid, uint32_t value) -{ - int ret; - - if ( cpuid < 0 ) - { - show_help(); - return -EINVAL; - } - - ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, value); - if ( ret ) - { - fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", cpuid); - } - - return ret; -} - -int scaling_speed_func(int xc_fd, int cpuid, uint32_t value) -{ - int ret; - - if ( cpuid < 0 ) - { - show_help(); - return -EINVAL; - } - - ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, value); - if ( ret ) - { - fprintf(stderr, "[CPU%d] failed to set scaling speed\n", cpuid); - } - - return ret; -} - -int scaling_sampling_rate_func(int xc_fd, int cpuid, uint32_t value) -{ - int ret; - - if ( cpuid < 0 ) - { - show_help(); - return -EINVAL; - } - - ret = xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, value); - if ( ret ) - { - fprintf(stderr, "[CPU%d] failed to set scaling sampling rate\n", cpuid); - } - - return ret; -} - -int scaling_up_threshold_func(int xc_fd, int cpuid, uint32_t value) -{ - int ret; - - if ( cpuid < 0 ) - { - show_help(); - return -EINVAL; - } - - ret = xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, value); - if ( ret ) - { - fprintf(stderr, "[CPU%d] failed to set scaling threshold\n", cpuid); - } - - return ret; -} - -int scaling_governor_func(int xc_fd, int cpuid, char *name) -{ - int ret = 0; - - if ( cpuid < 0 ) - { - show_help(); - return -EINVAL; - } - - ret = xc_set_cpufreq_gov(xc_fd, cpuid, name); - if ( ret ) - { - fprintf(stderr, "failed to set cpufreq governor to %s\n", name); - } - - return ret; + } + else if ( argc > 0 ) + { + name = strdup(argv[0]); + if ( name == NULL ) + goto out; + } + else + goto out; + + if ( cpuid < 0 ) + { + int i; + for ( i = 0; i < max_cpu_nr; i++ ) + if ( xc_set_cpufreq_gov(xc_fd, i, name) ) + fprintf(stderr, "[CPU%d] failed to set governor name\n", i); + } + else + { + if ( xc_set_cpufreq_gov(xc_fd, cpuid, name) ) + fprintf(stderr, "failed to set governor name\n"); + } + + free(name); + return ; +out: + fprintf(stderr, "failed to set governor name\n"); } struct { const char *name; - int (*function)(int xc_fd, int cpuid, uint32_t value); + void (*function)(int argc, char *argv[]); } main_options[] = { { "help", help_func }, - { "get-cpuidle-states", cxstates_func }, - { "get-cpufreq-states", pxstates_func }, + { "get-cpuidle-states", cxstat_func }, + { "get-cpufreq-states", pxstat_func }, + { "start", start_gather_func }, { "get-cpufreq-para", cpufreq_para_func }, { "set-scaling-maxfreq", scaling_max_freq_func }, { "set-scaling-minfreq", scaling_min_freq_func }, - { "set-scaling-governor", NULL }, + { "set-scaling-governor", scaling_governor_func }, { "set-scaling-speed", scaling_speed_func }, { "set-sampling-rate", scaling_sampling_rate_func }, { "set-up-threshold", scaling_up_threshold_func }, @@ -541,38 +769,37 @@ struct { int main(int argc, char *argv[]) { - int i, ret = -EINVAL; - int xc_fd; - int cpuid = -1; - uint32_t value = 0; + int i, ret = 0; + xc_physinfo_t physinfo = { 0 }; int nr_matches = 0; int matches_main_options[ARRAY_SIZE(main_options)]; if ( argc < 2 ) { show_help(); - return ret; - } - - if ( argc > 2 ) - { - if ( sscanf(argv[2], "%d", &cpuid) != 1 ) - cpuid = -1; + return 0; } xc_fd = xc_interface_open(); if ( xc_fd < 0 ) { fprintf(stderr, "failed to get the handler\n"); - } - + return 0; + } + + ret = xc_physinfo(xc_fd, &physinfo); + if ( ret ) + { + fprintf(stderr, "failed to get the processor information\n"); + xc_interface_close(xc_fd); + return 0; + } + max_cpu_nr = physinfo.nr_cpus; + + /* calculate how many options match with user's input */ for ( i = 0; i < ARRAY_SIZE(main_options); i++ ) - { if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) ) - { matches_main_options[nr_matches++] = i; - } - } if ( nr_matches > 1 ) { @@ -582,27 +809,12 @@ int main(int argc, char *argv[]) fprintf(stderr, "\n"); } else if ( nr_matches == 1 ) - { - if ( !strcmp("set-scaling-governor", main_options[matches_main_options[0]].name) ) - { - char *name = strdup(argv[3]); - ret = scaling_governor_func(xc_fd, cpuid, name); - free(name); - } - else - { - if ( argc > 3 ) - { - if ( sscanf(argv[3], "%d", &value) != 1 ) - value = 0; - } - ret = main_options[matches_main_options[0]].function(xc_fd, cpuid, value); - } - } + /* dispatch to the corresponding function handler */ + main_options[matches_main_options[0]].function(argc - 2, argv + 2); else show_help(); xc_interface_close(xc_fd); - return ret; -} - + return 0; +} + diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/pygrub/Makefile --- a/tools/pygrub/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/pygrub/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -22,4 +22,6 @@ endif .PHONY: clean clean: - rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out + rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out $(DEPS) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/Makefile --- a/tools/python/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -85,3 +85,6 @@ test: .PHONY: clean clean: rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc + rm -f $(DEPS) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/lowlevel/xc/xc.c Wed Jan 28 13:06:45 2009 +0900 @@ -903,26 +903,24 @@ static PyObject *pyxc_hvm_build(XcObject if ( target == -1 ) target = memsize; - if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target, image) != 0 ) + if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, + target, image) != 0 ) return pyxc_error_to_exception(); #if !defined(__ia64__) - /* Set up the HVM info table. */ + /* Fix up the HVM info table. */ va_map = xc_map_foreign_range(self->xc_handle, dom, XC_PAGE_SIZE, PROT_READ | PROT_WRITE, HVM_INFO_PFN); if ( va_map == NULL ) return PyErr_SetFromErrno(xc_error_obj); va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET); - memset(va_hvm, 0, sizeof(*va_hvm)); - strncpy(va_hvm->signature, "HVM INFO", 8); - va_hvm->length = sizeof(struct hvm_info_table); va_hvm->acpi_enabled = acpi; va_hvm->apic_mode = apic; va_hvm->nr_vcpus = vcpus; for ( i = 0, sum = 0; i < va_hvm->length; i++ ) sum += ((uint8_t *)va_hvm)[i]; - va_hvm->checksum = -sum; + va_hvm->checksum -= sum; munmap(va_map, XC_PAGE_SIZE); #endif diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/util/oshelp.py --- a/tools/python/xen/util/oshelp.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/util/oshelp.py Wed Jan 28 13:06:45 2009 +0900 @@ -5,7 +5,7 @@ def fcntl_setfd_cloexec(file, bool): f = fcntl.fcntl(file, fcntl.F_GETFD) if bool: f |= fcntl.FD_CLOEXEC else: f &= ~fcntl.FD_CLOEXEC - fcntl.fcntl(file, fcntl.F_SETFD) + fcntl.fcntl(file, fcntl.F_SETFD, f) def waitstatus_description(st): if os.WIFEXITED(st): diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/XendConfig.py Wed Jan 28 13:06:45 2009 +0900 @@ -149,6 +149,7 @@ XENAPI_PLATFORM_CFG_TYPES = { 'opengl': int, 'soundhw': str, 'stdvga': int, + 'videoram': int, 'usb': int, 'usbdevice': str, 'hpet': int, @@ -166,6 +167,7 @@ XENAPI_PLATFORM_CFG_TYPES = { 'guest_os_type': str, 'hap': int, 'xen_extended_power_mgmt': int, + 'pci_msitranslate': int, } # Xen API console 'other_config' keys. @@ -1247,6 +1249,11 @@ class XendConfig(dict): 'PPCI': ppci_uuid, 'hotplug_slot': pci_dev.get('vslot', 0) } + + dpci_opts = pci_dev.get('opts') + if dpci_opts and len(dpci_opts) > 0: + dpci_record['options'] = dpci_opts + XendDPCI(dpci_uuid, dpci_record) target['devices'][pci_devs_uuid] = (dev_type, @@ -1762,6 +1769,11 @@ class XendConfig(dict): 'PPCI': ppci_uuid, 'hotplug_slot': pci_dev.get('vslot', 0) } + + dpci_opts = pci_dev.get('opts') + if dpci_opts and len(dpci_opts) > 0: + dpci_record['options'] = dpci_opts + XendDPCI(dpci_uuid, dpci_record) self['devices'][dev_uuid] = (dev_type, diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDPCI.py --- a/tools/python/xen/xend/XendDPCI.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/XendDPCI.py Wed Jan 28 13:06:45 2009 +0900 @@ -41,7 +41,8 @@ class XendDPCI(XendBase): 'virtual_name', 'VM', 'PPCI', - 'hotplug_slot'] + 'hotplug_slot', + 'options'] return XendBase.getAttrRO() + attrRO def getAttrRW(self): @@ -119,6 +120,8 @@ class XendDPCI(XendBase): self.VM = record['VM'] self.PPCI = record['PPCI'] self.hotplug_slot = record['hotplug_slot'] + if 'options' in record.keys(): + self.options = record['options'] def destroy(self): xendom = XendDomain.instance() @@ -152,3 +155,5 @@ class XendDPCI(XendBase): def get_hotplug_slot(self): return self.hotplug_slot + def get_options(self): + return self.options diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/XendDomain.py Wed Jan 28 13:06:45 2009 +0900 @@ -423,7 +423,7 @@ class XendDomain: log.exception("Unable to recreate domain") try: xc.domain_pause(domid) - do_FLR(domid) + XendDomainInfo.do_FLR(domid) xc.domain_destroy(domid) except: log.exception("Hard destruction of domain failed: %d" % @@ -1264,7 +1264,7 @@ class XendDomain: else: try: xc.domain_pause(int(domid)) - do_FLR(int(domid)) + XendDomainInfo.do_FLR(int(domid)) val = xc.domain_destroy(int(domid)) except ValueError: raise XendInvalidDomain(domid) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jan 28 13:06:45 2009 +0900 @@ -696,10 +696,17 @@ class XendDomainInfo: " assigned to other domain.' \ )% (pci_device.name, self.domid, pci_str)) - bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'], + opts = '' + if 'opts' in new_dev and len(new_dev['opts']) > 0: + config_opts = new_dev['opts'] + config_opts = map(lambda (x, y): x+'='+y, config_opts) + opts = ',' + reduce(lambda x, y: x+','+y, config_opts) + + bdf_str = "%s:%s:%s.%s%s@%s" % (new_dev['domain'], new_dev['bus'], new_dev['slot'], new_dev['func'], + opts, new_dev['vslt']) self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str) @@ -1192,7 +1199,7 @@ class XendDomainInfo: if self.domid >= 0: if target > memory_cur: - balloon.free( (target-memory_cur)*1024 ) + balloon.free((target - memory_cur) * 1024, self) self.storeVm("memory", target) self.storeDom("memory/target", target << 10) xc.domain_set_target_mem(self.domid, @@ -2234,7 +2241,11 @@ class XendDomainInfo: xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max'])) # Test whether the devices can be assigned with VT-d - pci_str = str(self.info["platform"].get("pci")) + pci = self.info["platform"].get("pci") + pci_str = '' + if pci and len(pci) > 0: + pci = map(lambda x: x[0:4], pci) # strip options + pci_str = str(pci) if hvm and pci_str: bdf = xc.test_assign_device(self.domid, pci_str) if bdf != 0: @@ -3527,6 +3538,11 @@ class XendDomainInfo: dpci_uuid = uuid.createString() + dpci_opts = [] + opts_dict = xenapi_pci.get('options') + for k in opts_dict.keys(): + dpci_opts.append([k, opts_dict[k]]) + # Convert xenapi to sxp ppci = XendAPIStore.get(xenapi_pci.get('PPCI'), 'PPCI') @@ -3538,6 +3554,7 @@ class XendDomainInfo: ['slot', '0x%02x' % ppci.get_slot()], ['func', '0x%1x' % ppci.get_func()], ['vslt', '0x%02x' % xenapi_pci.get('hotplug_slot')], + ['opts', dpci_opts], ['uuid', dpci_uuid] ], ['state', 'Initialising'] diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/balloon.py Wed Jan 28 13:06:45 2009 +0900 @@ -67,7 +67,7 @@ def get_dom0_target_alloc(): raise VmError('Failed to query target memory allocation of dom0.') return kb -def free(need_mem ,self): +def free(need_mem, dominfo): """Balloon out memory from the privileged domain so that there is the specified required amount (in KiB) free. """ @@ -130,7 +130,7 @@ def free(need_mem ,self): if physinfo['nr_nodes'] > 1 and retries == 0: oldnode = -1 waitscrub = 1 - vcpus = self.info['cpus'][0] + vcpus = dominfo.info['cpus'][0] for vcpu in vcpus: nodenum = 0 for node in physinfo['node_to_cpu']: diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/image.py Wed Jan 28 13:06:45 2009 +0900 @@ -264,6 +264,10 @@ class ImageHandler: # skip vnc init if nographic is set ret.append('-nographic') return ret + + vram = str(vmConfig['platform'].get('videoram',4)) + ret.append('-videoram') + ret.append(vram) vnc_config = {} has_vnc = int(vmConfig['platform'].get('vnc', 0)) != 0 @@ -833,6 +837,7 @@ class IA64_HVM_ImageHandler(HVMImageHand def configure(self, vmConfig): HVMImageHandler.configure(self, vmConfig) self.vhpt = int(vmConfig['platform'].get('vhpt', 0)) + self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024 def buildDomain(self): xc.nvram_init(self.vm.getName(), self.vm.getDomid()) @@ -847,8 +852,8 @@ class IA64_HVM_ImageHandler(HVMImageHand # buffer io page, buffer pio page and memmap info page extra_pages = 1024 + 5 mem_kb += extra_pages * page_kb - # Add 8 MiB overhead for QEMU's video RAM. - return mem_kb + 8192 + mem_kb += self.vramsize + return mem_kb def getRequiredInitialReservation(self): return self.vm.getMemoryTarget() @@ -882,6 +887,7 @@ class X86_HVM_ImageHandler(HVMImageHandl def configure(self, vmConfig): HVMImageHandler.configure(self, vmConfig) self.pae = int(vmConfig['platform'].get('pae', 0)) + self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024 def buildDomain(self): xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae) @@ -890,8 +896,7 @@ class X86_HVM_ImageHandler(HVMImageHandl return rc def getRequiredAvailableMemory(self, mem_kb): - # Add 8 MiB overhead for QEMU's video RAM. - return mem_kb + 8192 + return mem_kb + self.vramsize def getRequiredInitialReservation(self): return self.vm.getMemoryTarget() diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/server/pciif.py Wed Jan 28 13:06:45 2009 +0900 @@ -75,6 +75,12 @@ class PciController(DevController): slot = parse_hex(pci_config.get('slot', 0)) func = parse_hex(pci_config.get('func', 0)) + opts = pci_config.get('opts', '') + if len(opts) > 0: + opts = map(lambda (x, y): x+'='+y, opts) + opts = reduce(lambda x, y: x+','+y, opts) + back['opts-%i' % pcidevid] = opts + vslt = pci_config.get('vslt') if vslt is not None: vslots = vslots + vslt + ";" @@ -89,6 +95,9 @@ class PciController(DevController): back['num_devs']=str(pcidevid) back['uuid'] = config.get('uuid','') + if 'pci_msitranslate' in self.vm.info['platform']: + back['msitranslate']=str(self.vm.info['platform']['pci_msitranslate']) + return (0, back, {}) @@ -108,6 +117,9 @@ class PciController(DevController): dev = back['dev-%i' % i] state = states[i] uuid = back['uuid-%i' %i] + opts = '' + if 'opts-%i' % i in back: + opts = back['opts-%i' % i] except: raise XendError('Error reading config') @@ -129,6 +141,8 @@ class PciController(DevController): self.writeBackend(devid, 'state-%i' % (num_olddevs + i), str(xenbusState['Initialising'])) self.writeBackend(devid, 'uuid-%i' % (num_olddevs + i), uuid) + if len(opts) > 0: + self.writeBackend(devid, 'opts-%i' % (num_olddevs + i), opts) self.writeBackend(devid, 'num_devs', str(num_olddevs + i + 1)) # Update vslots @@ -540,6 +554,9 @@ class PciController(DevController): self.removeBackend(devid, 'vdev-%i' % i) self.removeBackend(devid, 'state-%i' % i) self.removeBackend(devid, 'uuid-%i' % i) + tmpopts = self.readBackend(devid, 'opts-%i' % i) + if tmpopts is not None: + self.removeBackend(devid, 'opts-%i' % i) else: if new_num_devs != i: tmpdev = self.readBackend(devid, 'dev-%i' % i) @@ -556,6 +573,9 @@ class PciController(DevController): tmpuuid = self.readBackend(devid, 'uuid-%i' % i) self.writeBackend(devid, 'uuid-%i' % new_num_devs, tmpuuid) self.removeBackend(devid, 'uuid-%i' % i) + tmpopts = self.readBackend(devid, 'opts-%i' % i) + if tmpopts is not None: + self.removeBackend(devid, 'opts-%i' % i) new_num_devs = new_num_devs + 1 self.writeBackend(devid, 'num_devs', str(new_num_devs)) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/server/relocate.py --- a/tools/python/xen/xend/server/relocate.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xend/server/relocate.py Wed Jan 28 13:06:45 2009 +0900 @@ -122,6 +122,8 @@ class RelocationProtocol(protocol.Protoc if self.transport: self.send_reply(["ready", name]) p2cread, p2cwrite = os.pipe() + from xen.util import oshelp + oshelp.fcntl_setfd_cloexec(p2cwrite, True) threading.Thread(target=connection.SSLSocketServerConnection.recv2fd, args=(self.transport.sock, p2cwrite)).start() try: diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/create.dtd --- a/tools/python/xen/xm/create.dtd Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xm/create.dtd Wed Jan 28 13:06:45 2009 +0900 @@ -82,11 +82,12 @@ <!ELEMENT vtpm (name*)> <!ATTLIST vtpm backend CDATA #REQUIRED> -<!ELEMENT pci EMPTY> +<!ELEMENT pci (pci_opt*)> <!ATTLIST pci domain CDATA #REQUIRED bus CDATA #REQUIRED slot CDATA #REQUIRED func CDATA #REQUIRED + opts_str CDATA #IMPLIED vslt CDATA #IMPLIED> <!ELEMENT vscsi EMPTY> @@ -138,6 +139,10 @@ <!ATTLIST vcpu_param key CDATA #REQUIRED value CDATA #REQUIRED> +<!ELEMENT pci_opt EMPTY> +<!ATTLIST pci_opt key CDATA #REQUIRED + value CDATA #REQUIRED> + <!ELEMENT other_config EMPTY> <!ATTLIST other_config key CDATA #REQUIRED value CDATA #REQUIRED> diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xm/create.py Wed Jan 28 13:06:45 2009 +0900 @@ -318,11 +318,14 @@ gopts.var('disk', val='phy:DEV,VDEV,MODE backend driver domain to use for the disk. The option may be repeated to add more than one disk.""") -gopts.var('pci', val='BUS:DEV.FUNC', +gopts.var('pci', val='BUS:DEV.FUNC[,msitranslate=0|1]', fn=append_value, default=[], use="""Add a PCI device to a domain, using given params (in hex). - For example 'pci=c0:02.1'. - The option may be repeated to add more than one pci device.""") + For example 'pci=c0:02.1'. + If msitranslate is set, MSI-INTx translation is enabled if possible. + Guest that doesn't support MSI will get IO-APIC type IRQs + translated from physical MSI, HVM only. Default is 1. + The option may be repeated to add more than one pci device.""") gopts.var('vscsi', val='PDEV,VDEV[,DOM]', fn=append_value, default=[], @@ -523,9 +526,9 @@ gopts.var('vncunused', val='', use="""Try to find an unused port for the VNC server. Only valid when vnc=1.""") -gopts.var('videoram', val='', - fn=set_value, default=None, - use="""Maximum amount of videoram PV guest can allocate +gopts.var('videoram', val='MEMORY', + fn=set_int, default=4, + use="""Maximum amount of videoram a guest can allocate for frame buffer.""") gopts.var('sdl', val='', @@ -587,6 +590,11 @@ gopts.var('suppress_spurious_page_faults gopts.var('suppress_spurious_page_faults', val='yes|no', fn=set_bool, default=None, use="""Do not inject spurious page faults into this guest""") + +gopts.var('pci_msitranslate', val='TRANSLATE', + fn=set_int, default=1, + use="""Global PCI MSI-INTx translation flag (0=disable; + 1=enable.""") def err(msg): """Print an error to stderr and exit. @@ -667,9 +675,23 @@ def configure_pci(config_devs, vals): """Create the config for pci devices. """ config_pci = [] - for (domain, bus, slot, func) in vals.pci: - config_pci.append(['dev', ['domain', domain], ['bus', bus], \ - ['slot', slot], ['func', func]]) + for (domain, bus, slot, func, opts) in vals.pci: + config_pci_opts = [] + d = comma_sep_kv_to_dict(opts) + + def f(k): + if k not in ['msitranslate']: + err('Invalid pci option: ' + k) + + config_pci_opts.append([k, d[k]]) + + config_pci_bdf = ['dev', ['domain', domain], ['bus', bus], \ + ['slot', slot], ['func', func]] + map(f, d.keys()) + if len(config_pci_opts)>0: + config_pci_bdf.append(['opts', config_pci_opts]) + + config_pci.append(config_pci_bdf) if len(config_pci)>0: config_pci.insert(0, 'pci') @@ -862,12 +884,12 @@ def configure_hvm(config_image, vals): """Create the config for HVM devices. """ args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode', - 'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw', + 'localtime', 'serial', 'stdvga', 'videoram', 'isa', 'nographic', 'soundhw', 'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten', 'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor', 'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet', 'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check', - 'viridian', 'xen_extended_power_mgmt' ] + 'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate' ] for a in args: if a in vals.__dict__ and vals.__dict__[a] is not None: @@ -991,14 +1013,18 @@ def preprocess_pci(vals): pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \ r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \ r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \ - r"(?P<func>[0-7])$", pci_dev_str) + r"(?P<func>[0-7])" + \ + r"(,(?P<opts>.*))?$", pci_dev_str) if pci_match!=None: - pci_dev_info = pci_match.groupdict('0') + pci_dev_info = pci_match.groupdict('') + if pci_dev_info['domain']=='': + pci_dev_info['domain']='0' try: pci.append( ('0x'+pci_dev_info['domain'], \ '0x'+pci_dev_info['bus'], \ '0x'+pci_dev_info['slot'], \ - '0x'+pci_dev_info['func'])) + '0x'+pci_dev_info['func'], \ + pci_dev_info['opts'])) except IndexError: err('Error in PCI slot syntax "%s"'%(pci_dev_str)) vals.pci = pci diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xm/main.py Wed Jan 28 13:06:45 2009 +0900 @@ -187,7 +187,7 @@ SUBCOMMAND_HELP = { 'vnet-delete' : ('<VnetId>', 'Delete a Vnet.'), 'vnet-list' : ('[-l|--long]', 'List Vnets.'), 'vtpm-list' : ('<Domain> [--long]', 'List virtual TPM devices.'), - 'pci-attach' : ('<Domain> <domain:bus:slot.func> [virtual slot]', + 'pci-attach' : ('[-o|--options=<opt>] <Domain> <domain:bus:slot.func> [virtual slot]', 'Insert a new pass-through pci device.'), 'pci-detach' : ('<Domain> <domain:bus:slot.func>', 'Remove a domain\'s pass-through pci device.'), @@ -2428,7 +2428,7 @@ def xm_network_attach(args): vif.append(vif_param) server.xend.domain.device_create(dom, vif) -def parse_pci_configuration(args, state): +def parse_pci_configuration(args, state, opts = ''): dom = args[0] pci_dev_str = args[1] if len(args) == 3: @@ -2443,12 +2443,17 @@ def parse_pci_configuration(args, state) if pci_match == None: raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt)) pci_dev_info = pci_match.groupdict('0') + try: - pci.append(['dev', ['domain', '0x'+ pci_dev_info['domain']], \ + pci_bdf =['dev', ['domain', '0x'+ pci_dev_info['domain']], \ ['bus', '0x'+ pci_dev_info['bus']], ['slot', '0x'+ pci_dev_info['slot']], ['func', '0x'+ pci_dev_info['func']], - ['vslt', '0x%x' % int(vslt, 16)]]) + ['vslt', '0x%x' % int(vslt, 16)]] + if len(opts) > 0: + pci_bdf.append(['opts', opts]) + pci.append(pci_bdf) + except: raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt)) pci.append(['state', state]) @@ -2456,8 +2461,22 @@ def parse_pci_configuration(args, state) return (dom, pci) def xm_pci_attach(args): - arg_check(args, 'pci-attach', 2, 3) - (dom, pci) = parse_pci_configuration(args, 'Initialising') + config_pci_opts = [] + (options, params) = getopt.gnu_getopt(args, 'o:', ['options=']) + for (k, v) in options: + if k in ('-o', '--options'): + if len(v.split('=')) != 2: + err("Invalid pci attach option: %s" % v) + usage('pci-attach') + config_pci_opts.append(v.split('=')) + + n = len([i for i in params if i != '--']) + if n < 2 or n > 3: + err("Invalid argument for 'xm pci-attach'") + usage('pci-attach') + + (dom, pci) = parse_pci_configuration(params, 'Initialising', + config_pci_opts) if serverType == SERVER_XEN_API: @@ -2480,7 +2499,8 @@ def xm_pci_attach(args): dpci_record = { "VM": get_single_vm(dom), "PPCI": target_ref, - "hotplug_slot": vslt + "hotplug_slot": vslt, + "options": dict(config_pci_opts) } server.xenapi.DPCI.create(dpci_record) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/xenapi_create.py --- a/tools/python/xen/xm/xenapi_create.py Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/python/xen/xm/xenapi_create.py Wed Jan 28 13:06:45 2009 +0900 @@ -533,7 +533,10 @@ class xenapi_create: "PPCI": target_ref, "hotplug_slot": - int(pci.attributes["func"].value, 16) + int(pci.attributes["func"].value, 16), + "options": + get_child_nodes_as_dict(pci, + "pci_opt", "key", "value") } return server.xenapi.DPCI.create(dpci_record) @@ -931,6 +934,12 @@ class sxp2xml: = get_child_by_name(dev_sxp, "func", "0") pci.attributes["vslt"] \ = get_child_by_name(dev_sxp, "vslt", "0") + for opt in get_child_by_name(dev_sxp, "opts", ""): + if len(opt) > 0: + pci_opt = document.createElement("pci_opt") + pci_opt.attributes["key"] = opt[0] + pci_opt.attributes["value"] = opt[1] + pci.appendChild(pci_opt) pcis.append(pci) @@ -1032,6 +1041,7 @@ class sxp2xml: 'vhpt', 'guest_os_type', 'hap', + 'pci_msitranslate', ] platform_configs = [] diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/tests/blowfish.mk --- a/tools/tests/blowfish.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/tests/blowfish.mk Wed Jan 28 13:06:45 2009 +0900 @@ -1,13 +1,13 @@ override XEN_TARGET_ARCH = x86_32 XEN_ROOT = ../.. -CFLAGS := +CFLAGS = include $(XEN_ROOT)/tools/Rules.mk # Disable PIE/SSP if GCC supports them. They can break us. -CFLAGS += $(call cc-option,$(CC),-nopie,) -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) +$(call cc-option-add,CFLAGS,CC,-nopie) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all) CFLAGS += -fno-builtin -msoft-float diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vnet/libxutil/Makefile --- a/tools/vnet/libxutil/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/vnet/libxutil/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -24,13 +24,10 @@ LIB_OBJS := $(LIB_SRCS:.c=.o) LIB_OBJS := $(LIB_SRCS:.c=.o) PIC_OBJS := $(LIB_SRCS:.c=.opic) -CFLAGS += -Werror -fno-strict-aliasing $(call cc-option,$(CC),-fgnu89-inline,) +$(call cc-option-add,CFLAGS,CC,-fgnu89-inline) +CFLAGS += -Werror -fno-strict-aliasing CFLAGS += -O3 #CFLAGS += -g - -# Get gcc to generate the dependencies for us. -CFLAGS += -Wp,-MD,.$(@F).d -DEPS = .*.d MAJOR := 3.0 MINOR := 0 diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm/Makefile --- a/tools/vtpm/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/vtpm/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -89,6 +89,6 @@ build_sub: $(MAKE) -C $(TPM_EMULATOR_DIR); \ fi \ else \ - echo "*** Unable to build VTPMs. libgmp could not be found."; \ + echo "=== Unable to build VTPMs. libgmp could not be found."; \ fi diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm/Rules.mk --- a/tools/vtpm/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/vtpm/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin # General compiler flags CFLAGS = -Werror -g3 -I. -# For generating dependencies -CFLAGS += -Wp,-MD,.$(@F).d - -DEP_FILES = .*.d - # Generic project files HDRS = $(wildcard *.h) SRCS = $(wildcard *.c) @@ -26,7 +21,7 @@ OBJS = $(patsubst %.c,%.o,$(SRCS)) $(OBJS): $(SRCS) --include $(DEP_FILES) +-include $(DEPS) BUILD_EMULATOR = y diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm_manager/Rules.mk --- a/tools/vtpm_manager/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/vtpm_manager/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin # General compiler flags CFLAGS = -Werror -g3 -I. -# For generating dependencies -CFLAGS += -Wp,-MD,.$(@F).d - -DEP_FILES = .*.d - # Generic project files HDRS = $(wildcard *.h) SRCS = $(wildcard *.c) @@ -26,7 +21,7 @@ OBJS = $(patsubst %.c,%.o,$(SRCS)) $(OBJS): $(SRCS) --include $(DEP_FILES) +-include $(FILES) # Make sure these are just rules .PHONY : all build install clean diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xcutils/Makefile --- a/tools/xcutils/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xcutils/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -13,10 +13,6 @@ include $(XEN_ROOT)/tools/Rules.mk CFLAGS += -Werror CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore) - -# Make gcc generate dependencies. -CFLAGS += -Wp,-MD,.$(@F).d -PROG_DEP = .*.d PROGRAMS = xc_restore xc_save readnotes lsevtchn @@ -40,6 +36,6 @@ install: build .PHONY: clean clean: $(RM) *.o $(PROGRAMS) - $(RM) $(PROG_DEP) + $(RM) $(DEPS) --include $(PROG_DEP) +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xcutils/readnotes.c --- a/tools/xcutils/readnotes.c Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xcutils/readnotes.c Wed Jan 28 13:06:45 2009 +0900 @@ -13,7 +13,7 @@ #include <xg_private.h> #include <xc_dom.h> /* gunzip bits */ -#include <xen/libelf.h> +#include <xen/libelf/libelf.h> static void print_string_note(const char *prefix, struct elf_binary *elf, const elf_note *note) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenmon/Makefile --- a/tools/xenmon/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xenmon/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -38,10 +38,12 @@ install: build .PHONY: clean clean: - rm -f $(BIN) + rm -f $(BIN) $(DEPS) %: %.c Makefile $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ xentrace_%: %.c Makefile $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenpmd/Makefile --- a/tools/xenpmd/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xenpmd/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -17,4 +17,6 @@ install: all .PHONY: clean clean: - $(RM) -f $(BIN) + $(RM) -f $(BIN) $(DEPS) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstat/libxenstat/Makefile --- a/tools/xenstat/libxenstat/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xenstat/libxenstat/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -155,4 +155,6 @@ endif .PHONY: clean clean: rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS-y) \ - $(BINDINGS) $(BINDINGSRC) + $(BINDINGS) $(BINDINGSRC) $(DEPS) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstat/xentop/Makefile --- a/tools/xenstat/xentop/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xenstat/xentop/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -37,4 +37,6 @@ endif .PHONY: clean clean: - rm -f xentop xentop.o + rm -f xentop xentop.o $(DEPS) + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xenstore/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -7,10 +7,6 @@ CFLAGS += -Werror CFLAGS += -Werror CFLAGS += -I. CFLAGS += $(CFLAGS_libxenctrl) - -# Make gcc generate dependencies. -CFLAGS += -Wp,-MD,.$(@F).d -DEP = .*.d CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm xenstore-chmod CLIENTS += xenstore-write xenstore-ls @@ -82,7 +78,7 @@ clean: rm -f xenstored xs_random xs_stress xs_crashme rm -f xs_tdb_dump xenstore-control rm -f xenstore $(CLIENTS) - $(RM) $(DEP) + $(RM) $(DEPS) .PHONY: TAGS TAGS: @@ -113,7 +109,7 @@ install: all $(INSTALL_DATA) xs.h $(DESTDIR)$(INCLUDEDIR) $(INSTALL_DATA) xs_lib.h $(DESTDIR)$(INCLUDEDIR) --include $(DEP) +-include $(DEPS) # never delete any intermediate files. .SECONDARY: diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xentrace/Makefile --- a/tools/xentrace/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/tools/xentrace/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -46,9 +46,12 @@ install: build .PHONY: clean clean: - $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN) + $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN) $(DEPS) %: %.c $(HDRS) Makefile $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) xentrace_%: %.c $(HDRS) Makefile $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +-include $(DEPS) + diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/Rules.mk --- a/xen/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -23,9 +23,6 @@ ifeq ($(perfc_arrays),y) ifeq ($(perfc_arrays),y) perfc := y endif -ifeq ($(frame_pointer),y) -CFLAGS := $(shell echo $(CFLAGS) | sed -e 's/-f[^ ]*omit-frame-pointer//g') -endif # Set ARCH/SUBARCH appropriately. override TARGET_SUBARCH := $(XEN_TARGET_ARCH) @@ -34,20 +31,7 @@ override TARGET_ARCH := $(shell echo TARGET := $(BASEDIR)/xen -HDRS := $(wildcard *.h) -HDRS += $(wildcard $(BASEDIR)/include/xen/*.h) -HDRS += $(wildcard $(BASEDIR)/include/xen/hvm/*.h) -HDRS += $(wildcard $(BASEDIR)/include/public/*.h) -HDRS += $(wildcard $(BASEDIR)/include/public/*/*.h) -HDRS += $(wildcard $(BASEDIR)/include/compat/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h) - include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk - -# Do not depend on auto-generated header files. -AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS)) -HDRS := $(filter-out %/asm-offsets.h,$(AHDRS)) # Note that link order matters! ALL_OBJS-y += $(BASEDIR)/common/built_in.o @@ -77,15 +61,18 @@ AFLAGS-y += -D__ASSEMBLY_ ALL_OBJS := $(ALL_OBJS-y) -CFLAGS := $(strip $(CFLAGS) $(CFLAGS-y)) +# Get gcc to generate the dependencies for us. +CFLAGS-y += -MMD -MF .$(@F).d +DEPS = .*.d + +CFLAGS += $(CFLAGS-y) # Most CFLAGS are safe for assembly files: # -std=gnu{89,99} gets confused by #-prefixed end-of-line comments -AFLAGS := $(strip $(AFLAGS) $(AFLAGS-y)) -AFLAGS += $(patsubst -std=gnu%,,$(CFLAGS)) +AFLAGS += $(AFLAGS-y) $(filter-out -std=gnu%,$(CFLAGS)) # LDFLAGS are only passed directly to $(LD) -LDFLAGS := $(strip $(LDFLAGS) $(LDFLAGS_DIRECT)) +LDFLAGS += $(LDFLAGS_DIRECT) include Makefile @@ -115,19 +102,21 @@ FORCE: .PHONY: clean clean:: $(addprefix _clean_, $(subdir-all)) - rm -f *.o *~ core + rm -f *.o *~ core $(DEPS) _clean_%/: FORCE $(MAKE) -f $(BASEDIR)/Rules.mk -C $* clean -%.o: %.c $(HDRS) Makefile +%.o: %.c Makefile $(CC) $(CFLAGS) -c $< -o $@ -%.o: %.S $(AHDRS) Makefile +%.o: %.S Makefile $(CC) $(AFLAGS) -c $< -o $@ -%.i: %.c $(HDRS) Makefile +%.i: %.c Makefile $(CPP) $(CFLAGS) $< -o $@ # -std=gnu{89,99} gets confused by # as an end-of-line comment marker -%.s: %.S $(AHDRS) Makefile +%.s: %.S Makefile $(CPP) $(AFLAGS) $< -o $@ + +-include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/Makefile --- a/xen/arch/ia64/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/ia64/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -29,11 +29,11 @@ subdir-y += linux-xen # Headers do not depend on auto-generated header, but object files do. $(ALL_OBJS): $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h -asm-offsets.s: asm-offsets.c $(HDRS) \ +asm-offsets.s: asm-offsets.c \ $(BASEDIR)/include/asm-ia64/.offsets.h.stamp $(CC) $(CFLAGS) -DGENERATE_ASM_OFFSETS -DIA64_TASK_SIZE=0 -S -o $@ $< -asm-xsi-offsets.s: asm-xsi-offsets.c $(HDRS) +asm-xsi-offsets.s: asm-xsi-offsets.c $(CC) $(CFLAGS) -S -o $@ $< $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h: asm-xsi-offsets.s @@ -61,7 +61,7 @@ asm-xsi-offsets.s: asm-xsi-offsets.c $(H touch $@ # I'm sure a Makefile wizard would know a better way to do this -xen.lds.s: xen/xen.lds.S $(HDRS) +xen.lds.s: xen/xen.lds.S $(CC) -E $(CPPFLAGS) -P -DXEN $(AFLAGS) \ -o xen.lds.s xen/xen.lds.S diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/ia64/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -72,19 +72,4 @@ CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTV CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTVFAULT endif -LDFLAGS := -g - -# Additionnal IA64 include dirs. -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/sn/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/linux/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/sn/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/linux/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm-generic/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/byteorder/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/hvm/*.h) - -HDRS := $(filter-out %/include/asm-ia64/asm-xsi-offsets.h,$(HDRS)) +LDFLAGS = -g diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/tools/p2m_foreign/Makefile --- a/xen/arch/ia64/tools/p2m_foreign/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -# -# xen/arch/ia64/tools/p2m_foreign -# -# Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> -# VA Linux Systems Japan K.K. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -XEN_ROOT = ../../../../.. -include $(XEN_ROOT)/tools/Rules.mk - -CFLAGS += -Werror -ggdb3 -CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE) - -# Make gcc generate dependencies. -CFLAGS += -Wp,-MD,.$(@F).d -DEPS = .*.d - -PROGRAMS = p2m_foreign -LDLIBS = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl - -.PHONY: all -all: build - -.PHONY: build -build: $(PROGRAMS) - -$(PROGRAMS): %: %.o - $(CC) $(CFLAGS) $^ $(LDLIBS) -o $@ - - -.PHONY: install -install: - -.PHONY: clean -clean: - $(RM) *.o $(PROGRAMS) - $(RM) $(DEPS) - --include $(DEPS) diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c --- a/xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c Wed Jan 28 12:22:58 2009 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,233 +0,0 @@ -/* - * Foreign p2m exposure test. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp> - * VA Linux Systems Japan K.K. - * - */ - -#include <sys/mman.h> -#include <err.h> -#include <errno.h> -#include <assert.h> - -#include <xc_private.h> -#include <xenctrl.h> -#include <xenguest.h> -#include <xc_efi.h> -#include <ia64/xc_ia64.h> - -#if 1 -# define printd(fmt, args...) printf(fmt, ##args) -#else -# define printd(fmt, args...) ((void)0) -#endif - -/* xc_memory_op() in xc_private.c doesn't support translate_gpfn_list */ -static int -__xc_memory_op(int xc_handle, int cmd, void *arg) -{ - DECLARE_HYPERCALL; - struct xen_translate_gpfn_list* translate = arg; - - xen_ulong_t* gpfns; - xen_ulong_t* mfns; - size_t len; - - long ret = -EINVAL; - - hypercall.op = __HYPERVISOR_memory_op; - hypercall.arg[0] = (unsigned long)cmd; - hypercall.arg[1] = (unsigned long)arg; - - assert(cmd == XENMEM_translate_gpfn_list); - - get_xen_guest_handle(gpfns, translate->gpfn_list); - get_xen_guest_handle(mfns, translate->mfn_list); - len = sizeof(gpfns[0]) * translate->nr_gpfns; - if (lock_pages(translate, sizeof(*translate)) || - lock_pages(gpfns, len) || - lock_pages(mfns, len)) - goto out; - - ret = do_xen_hypercall(xc_handle, &hypercall); - -out: - unlock_pages(mfns, len); - unlock_pages(gpfns, len); - unlock_pages(translate, sizeof(*translate)); - - return ret; -} - -int -xc_translate_gpfn_list(int xc_handle, uint32_t domid, xen_ulong_t nr_gpfns, - xen_ulong_t* gpfns, xen_ulong_t* mfns) -{ - struct xen_translate_gpfn_list translate = { - .domid = domid, - .nr_gpfns = nr_gpfns, - }; - set_xen_guest_handle(translate.gpfn_list, gpfns); - set_xen_guest_handle(translate.mfn_list, mfns); - - return __xc_memory_op(xc_handle, - XENMEM_translate_gpfn_list, &translate); -} - -int -main(int argc, char** argv) -{ - uint32_t domid; - int xc_handle; - - xc_dominfo_t info; - shared_info_t* shinfo; - - unsigned long map_size; - xen_ia64_memmap_info_t* memmap_info; - struct xen_ia64_p2m_table p2m_table; - - char* p; - char* start; - char* end; - xen_ulong_t nr_gpfns; - - xen_ulong_t* gpfns; - xen_ulong_t* mfns; - - unsigned long i; - - if (argc != 2) - errx(EXIT_FAILURE, "usage: %s <domid>", argv[0]); - domid = atol(argv[1]); - - printd("xc_interface_open()\n"); - xc_handle = xc_interface_open(); - if (xc_handle < 0) - errx(EXIT_FAILURE, "can't open control interface"); - - printd("xc_domain_getinfo\n"); - if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1) - errx(EXIT_FAILURE, "Could not get info for domain"); - - - printd("shared info\n"); - shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, - PROT_READ, info.shared_info_frame); - if (shinfo == NULL) - errx(EXIT_FAILURE, "can't map shared info"); - - printd("memmap_info\n"); - map_size = PAGE_SIZE * shinfo->arch.memmap_info_num_pages; - memmap_info = xc_map_foreign_range(xc_handle, info.domid, - map_size, PROT_READ, - shinfo->arch.memmap_info_pfn); - if (memmap_info == NULL) - errx(EXIT_FAILURE, "can't map memmap_info"); - -#if 1 - start = (char*)&memmap_info->memdesc; - end = start + memmap_info->efi_memmap_size; - i = 0; - for (p = start; p < end; p += memmap_info->efi_memdesc_size) { - efi_memory_desc_t* md = (efi_memory_desc_t*)p; - printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n", - i, md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT)); - i++; - } -#endif - - - printd("p2m map\n"); - if (xc_ia64_p2m_map(&p2m_table, xc_handle, domid, memmap_info, 0) < 0) - errx(EXIT_FAILURE, "can't map foreign p2m table"); - printd("p2m map done\n"); - - start = (char*)&memmap_info->memdesc; - end = start + memmap_info->efi_memmap_size; - nr_gpfns = 0; - i = 0; - for (p = start; p < end; p += memmap_info->efi_memdesc_size) { - efi_memory_desc_t* md = (efi_memory_desc_t*)p; - if ( md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0 ) - continue; - - printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n", - i, md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT)); - nr_gpfns += md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT); - i++; - } - - printd("total 0x%lx gpfns\n", nr_gpfns); - gpfns = malloc(sizeof(gpfns[0]) * nr_gpfns); - mfns = malloc(sizeof(mfns[0]) * nr_gpfns); - if (gpfns == NULL || mfns == NULL) - err(EXIT_FAILURE, "can't allocate memory for gpfns/mfns"); - - i = 0; - for (p = start; p < end; p += memmap_info->efi_memdesc_size) { - efi_memory_desc_t* md = (efi_memory_desc_t*)p; - unsigned long j; - if ( md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0 ) - continue; - - for (j = 0; - j < md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT); - j++) { - gpfns[i] = (md->phys_addr >> PAGE_SHIFT) + j; - i++; - } - } - for (i = 0; i < nr_gpfns; i++) - mfns[i] = INVALID_MFN; - - printd("issue translate gpfn list hypercall. " - "this may take a while\n"); - if (xc_translate_gpfn_list(xc_handle, - domid, nr_gpfns, gpfns, mfns) < 0) - err(EXIT_FAILURE, "translate gpfn list hypercall failure"); - printd("translate gpfn list hypercall done\n"); - - printd("checking p2m table\n"); - for (i = 0; i < nr_gpfns; i++) { - unsigned long mfn_by_translated = mfns[i]; - unsigned long mfn_by_p2m = - xc_ia64_p2m_mfn(&p2m_table, gpfns[i]); - if (mfn_by_translated != mfn_by_p2m && - !(mfn_by_translated == 0 && mfn_by_p2m == INVALID_MFN)) { - printf("ERROR! i 0x%lx gpfn " - "0x%lx trnslated 0x%lx p2m 0x%lx\n", - i, gpfns[i], mfn_by_translated, mfn_by_p2m); - } - } - printd("checking p2m table done\n"); - - xc_ia64_p2m_unmap(&p2m_table); - munmap(memmap_info, map_size); - munmap(shinfo, PAGE_SIZE); - - return EXIT_SUCCESS; -} diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/ia64/xen/domain.c Wed Jan 28 13:06:45 2009 +0900 @@ -31,7 +31,7 @@ #include <xen/event.h> #include <xen/console.h> #include <xen/version.h> -#include <public/libelf.h> +#include <xen/libelf.h> #include <asm/pgalloc.h> #include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */ #include <asm/vcpu.h> /* for function declarations */ diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/irq.c --- a/xen/arch/ia64/xen/irq.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/ia64/xen/irq.c Wed Jan 28 13:06:45 2009 +0900 @@ -402,7 +402,7 @@ void __do_IRQ_guest(int irq) } } -int pirq_acktype(int irq) +static int pirq_acktype(int irq) { irq_desc_t *desc = &irq_desc[irq]; diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/machine_kexec.c --- a/xen/arch/ia64/xen/machine_kexec.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/ia64/xen/machine_kexec.c Wed Jan 28 13:06:45 2009 +0900 @@ -195,6 +195,7 @@ int machine_kexec_get(xen_kexec_range_t void arch_crash_save_vmcoreinfo(void) { + VMCOREINFO_SYMBOL(xenheap_phys_end); VMCOREINFO_SYMBOL(dom_xen); VMCOREINFO_SYMBOL(dom_io); VMCOREINFO_SYMBOL(xen_pstart); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/ia64/xen/mm.c Wed Jan 28 13:06:45 2009 +0900 @@ -3246,9 +3246,9 @@ int get_page_type(struct page_info *page return 1; } -int memory_is_conventional_ram(paddr_t p) -{ - return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY); +int page_is_conventional_ram(unsigned long mfn) +{ + return (efi_mem_type(pfn_to_paddr(mfn)) == EFI_CONVENTIONAL_MEMORY); } @@ -3295,38 +3295,39 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( spin_unlock(&d->grant_table->lock); break; - case XENMAPSPACE_mfn: - { - if ( get_page_from_pagenr(xatp.idx, d) ) { - struct xen_ia64_memmap_info memmap_info; - efi_memory_desc_t md; - int ret; - - mfn = xatp.idx; - page = mfn_to_page(mfn); - - memmap_info.efi_memmap_size = sizeof(md); - memmap_info.efi_memdesc_size = sizeof(md); - memmap_info.efi_memdesc_version = - EFI_MEMORY_DESCRIPTOR_VERSION; - - md.type = EFI_CONVENTIONAL_MEMORY; - md.pad = 0; - md.phys_addr = xatp.gpfn << PAGE_SHIFT; - md.virt_addr = 0; - md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT); - md.attribute = EFI_MEMORY_WB; - - ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md); - if (ret != 0) { - put_page(page); - rcu_unlock_domain(d); - gdprintk(XENLOG_DEBUG, - "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n", - __func__, __LINE__, - d->domain_id, xatp.gpfn, xatp.idx, ret); - return ret; - } + case XENMAPSPACE_gmfn: { + struct xen_ia64_memmap_info memmap_info; + efi_memory_desc_t md; + int ret; + + xatp.idx = gmfn_to_mfn(d, xatp.idx); + if ( !get_page_from_pagenr(xatp.idx, d) ) + break; + + mfn = xatp.idx; + page = mfn_to_page(mfn); + + memmap_info.efi_memmap_size = sizeof(md); + memmap_info.efi_memdesc_size = sizeof(md); + memmap_info.efi_memdesc_version = + EFI_MEMORY_DESCRIPTOR_VERSION; + + md.type = EFI_CONVENTIONAL_MEMORY; + md.pad = 0; + md.phys_addr = xatp.gpfn << PAGE_SHIFT; + md.virt_addr = 0; + md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT); + md.attribute = EFI_MEMORY_WB; + + ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md); + if (ret != 0) { + put_page(page); + rcu_unlock_domain(d); + gdprintk(XENLOG_DEBUG, + "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n", + __func__, __LINE__, + d->domain_id, xatp.gpfn, xatp.idx, ret); + return ret; } break; } @@ -3377,34 +3378,6 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( break; } - - case XENMEM_remove_from_physmap: - { - struct xen_remove_from_physmap xrfp; - unsigned long mfn; - struct domain *d; - - if ( copy_from_guest(&xrfp, arg, 1) ) - return -EFAULT; - - rc = rcu_lock_target_domain_by_id(xrfp.domid, &d); - if ( rc != 0 ) - return rc; - - domain_lock(d); - - mfn = gmfn_to_mfn(d, xrfp.gpfn); - - if ( mfn_valid(mfn) ) - guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); - - domain_unlock(d); - - rcu_unlock_domain(d); - - break; - } - case XENMEM_machine_memory_map: { diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/ia64/xen/xensetup.c Wed Jan 28 13:06:45 2009 +0900 @@ -747,8 +747,3 @@ int xen_in_range(paddr_t start, paddr_t return start < end; } - -int tboot_in_range(paddr_t start, paddr_t end) -{ - return 0; -} diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -53,6 +53,7 @@ obj-y += crash.o obj-y += crash.o obj-y += tboot.o obj-y += hpet.o +obj-y += bzimage.o obj-$(crash_debug) += gdbstub.o @@ -78,10 +79,10 @@ ALL_OBJS := $(BASEDIR)/arch/x86/boot/bui $(@D)/.$(@F).1.o -o $@ rm -f $(@D)/.$(@F).[0-9]* -asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS) +asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(CC) $(CFLAGS) -S -o $@ $< -xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS) +xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(CC) -P -E -Ui386 $(AFLAGS) -o $@ $< boot/mkelf32: boot/mkelf32.c @@ -90,4 +91,4 @@ boot/mkelf32: boot/mkelf32.c .PHONY: clean clean:: rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32 - rm -f $(BASEDIR)/.xen-syms.[0-9]* + rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/Rules.mk Wed Jan 28 13:06:45 2009 +0900 @@ -26,9 +26,9 @@ CFLAGS += -msoft-float CFLAGS += -msoft-float # Disable PIE/SSP if GCC supports them. They can break us. -CFLAGS += $(call cc-option,$(CC),-nopie,) -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) +$(call cc-option-add,CFLAGS,CC,-nopie) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all) ifeq ($(supervisor_mode_kernel),y) CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 @@ -45,16 +45,12 @@ CFLAGS += -mno-red-zone -fpic -fno-reord CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks CFLAGS += -fno-asynchronous-unwind-tables # -fvisibility=hidden reduces -fpic cost, if it's available -CFLAGS += $(call cc-option,$(CC),-fvisibility=hidden,) -CFLAGS := $(subst -fvisibility=hidden,-DGCC_HAS_VISIBILITY_ATTRIBUTE,$(CFLAGS)) +ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n) +CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE +endif x86_32 := n x86_64 := y endif -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/svm/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/vmx/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/mach-*/*.h) - # Require GCC v3.4+ (to avoid issues with alignment constraints in Xen headers) $(call cc-ver-check,CC,0x030400,"Xen requires at least gcc-3.4") diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/acpi/cpu_idle.c --- a/xen/arch/x86/acpi/cpu_idle.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/acpi/cpu_idle.c Wed Jan 28 13:06:45 2009 +0900 @@ -50,11 +50,6 @@ #define DEBUG_PM_CX -#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) -#define PM_TIMER_TICKS_TO_US(t) ((t * 1000) / (PM_TIMER_FREQUENCY / 1000)) -#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ -#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ - static void (*lapic_timer_off)(void); static void (*lapic_timer_on)(void); @@ -366,7 +361,7 @@ static void acpi_processor_idle(void) cx->usage++; if ( sleep_ticks > 0 ) { - power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks); + power->last_residency = acpi_pm_tick_to_ns(sleep_ticks) / 1000UL; cx->time += sleep_ticks; } @@ -611,7 +606,7 @@ static void set_cx( cx->latency = xen_cx->latency; cx->power = xen_cx->power; - cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); + cx->latency_ticks = ns_to_acpi_pm_tick(cx->latency * 1000UL); cx->target_residency = cx->latency * latency_factor; if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 ) acpi_power->safe_state = cx; diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/acpi/power.c --- a/xen/arch/x86/acpi/power.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/acpi/power.c Wed Jan 28 13:06:45 2009 +0900 @@ -221,6 +221,7 @@ static int enter_state(u32 state) enable_cpu: cpufreq_add_cpu(0); + microcode_resume_cpu(0); enable_nonboot_cpus(); thaw_domains(); spin_unlock(&pm_lock); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/apic.c Wed Jan 28 13:06:45 2009 +0900 @@ -40,7 +40,7 @@ /* * Knob to control our willingness to enable the local APIC. */ -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ +static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ /* * Debug level @@ -742,7 +742,7 @@ static void __init lapic_disable(char *s static void __init lapic_disable(char *str) { enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + setup_clear_cpu_cap(X86_FEATURE_APIC); } custom_param("nolapic", lapic_disable); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/boot/Makefile --- a/xen/arch/x86/boot/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/boot/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -1,4 +1,1 @@ obj-y += head.o obj-y += head.o - -head.o: head.S $(TARGET_SUBARCH).S trampoline.S mem.S video.S \ - cmdline.S edd.S wakeup.S diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/boot/mkelf32.c --- a/xen/arch/x86/boot/mkelf32.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/boot/mkelf32.c Wed Jan 28 13:06:45 2009 +0900 @@ -25,7 +25,7 @@ #define s16 int16_t #define s32 int32_t #define s64 int64_t -#include "../../../include/public/elfstructs.h" +#include "../../../include/xen/elfstructs.h" #define DYNAMICALLY_FILLED 0 #define RAW_OFFSET 128 diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/bzimage.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/bzimage.c Wed Jan 28 13:06:45 2009 +0900 @@ -0,0 +1,242 @@ +#include <xen/cache.h> +#include <xen/errno.h> +#include <xen/lib.h> +#include <xen/mm.h> +#include <xen/string.h> +#include <xen/types.h> + +#define HEAPORDER 3 + +static unsigned char *window; +#define memptr long +static memptr free_mem_ptr; +static memptr free_mem_end_ptr; + +#define WSIZE 0x80000000 + +static unsigned char *inbuf; +static unsigned insize; + +/* Index of next byte to be processed in inbuf: */ +static unsigned inptr; + +/* Bytes in output buffer: */ +static unsigned outcnt; + +#define OF(args) args +#define STATIC static + +#define memzero(s, n) memset((s), 0, (n)) + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + +#define INIT __init + +#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) + +/* Diagnostic functions */ +#ifdef DEBUG +# define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0) +# define Trace(x) do { fprintf x; } while (0) +# define Tracev(x) do { if (verbose) fprintf x ; } while (0) +# define Tracevv(x) do { if (verbose > 1) fprintf x ; } while (0) +# define Tracec(c, x) do { if (verbose && (c)) fprintf x ; } while (0) +# define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0) +#else +# define Assert(cond, msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c, x) +# define Tracecv(c, x) +#endif + +static long bytes_out; +static void flush_window(void); + +static __init void error(char *x) +{ + printk("%s\n", x); + BUG(); +} + +static __init int fill_inbuf(void) +{ + error("ran out of input data"); + return 0; +} + + +#include "../../common/inflate.c" + +static __init void flush_window(void) +{ + /* + * The window is equal to the output buffer therefore only need to + * compute the crc. + */ + unsigned long c = crc; + unsigned n; + unsigned char *in, ch; + + in = window; + for ( n = 0; n < outcnt; n++ ) + { + ch = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + + bytes_out += (unsigned long)outcnt; + outcnt = 0; +} + +static __init int gzip_length(char *image, unsigned long image_len) +{ + return *(uint32_t *)&image[image_len - 4]; +} + +static __init int perform_gunzip(char *output, char **_image_start, unsigned long *image_len) +{ + char *image = *_image_start; + int rc; + unsigned char magic0 = (unsigned char)image[0]; + unsigned char magic1 = (unsigned char)image[1]; + + if ( magic0 != 0x1f || ( (magic1 != 0x8b) && (magic1 != 0x9e) ) ) + return 0; + + window = (unsigned char *)output; + + free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER); + free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER); + + inbuf = (unsigned char *)image; + insize = *image_len; + inptr = 0; + + makecrc(); + + if ( gunzip() < 0 ) + { + rc = -EINVAL; + } + else + { + *_image_start = (char *)window; + *image_len = gzip_length(image, *image_len); + rc = 0; + } + + free_xenheap_pages((void *)free_mem_ptr, HEAPORDER); + + return rc; +} + +struct setup_header { + uint8_t _pad0[0x1f1]; /* skip uninteresting stuff */ + uint8_t setup_sects; + uint16_t root_flags; + uint32_t syssize; + uint16_t ram_size; + uint16_t vid_mode; + uint16_t root_dev; + uint16_t boot_flag; + uint16_t jump; + uint32_t header; +#define HDR_MAGIC "HdrS" +#define HDR_MAGIC_SZ 4 + uint16_t version; +#define VERSION(h,l) (((h)<<8) | (l)) + uint32_t realmode_swtch; + uint16_t start_sys; + uint16_t kernel_version; + uint8_t type_of_loader; + uint8_t loadflags; + uint16_t setup_move_size; + uint32_t code32_start; + uint32_t ramdisk_image; + uint32_t ramdisk_size; + uint32_t bootsect_kludge; + uint16_t heap_end_ptr; + uint16_t _pad1; + uint32_t cmd_line_ptr; + uint32_t initrd_addr_max; + uint32_t kernel_alignment; + uint8_t relocatable_kernel; + uint8_t _pad2[3]; + uint32_t cmdline_size; + uint32_t hardware_subarch; + uint64_t hardware_subarch_data; + uint32_t payload_offset; + uint32_t payload_length; + } __attribute__((packed)); + +static __init int bzimage_check(struct setup_header *hdr, unsigned long len) +{ + if ( len < sizeof(struct setup_header) ) + return 0; + + if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 ) + return 0; + + if ( hdr->version < VERSION(2,8) ) { + printk("Cannot load bzImage v%d.%02d at least v2.08 is required\n", + hdr->version >> 8, hdr->version & 0xff); + return -EINVAL; + } + return 1; +} + +int __init bzimage_headroom(char *image_start, unsigned long image_length) +{ + struct setup_header *hdr = (struct setup_header *)image_start; + char *img; + int err, headroom; + + err = bzimage_check(hdr, image_length); + if (err < 1) + return err; + + img = image_start + (hdr->setup_sects+1) * 512; + img += hdr->payload_offset; + + headroom = gzip_length(img, hdr->payload_length); + headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */ + headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */ + headroom = (headroom + 4095) & ~4095; + + return headroom; +} + +int __init bzimage_parse(char *image_base, char **image_start, unsigned long *image_len) +{ + struct setup_header *hdr = (struct setup_header *)(*image_start); + int err = bzimage_check(hdr, *image_len); + + if (err < 1) + return err; + + BUG_ON(!(image_base < *image_start)); + + *image_start += (hdr->setup_sects+1) * 512; + *image_start += hdr->payload_offset; + *image_len = hdr->payload_length; + + if ( (err = perform_gunzip(image_base, image_start, image_len)) < 0 ) + return err; + + return 0; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/cpu/common.c --- a/xen/arch/x86/cpu/common.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/cpu/common.c Wed Jan 28 13:06:45 2009 +0900 @@ -29,6 +29,14 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM */ u64 host_pat = 0x050100070406; +static unsigned int __cpuinitdata cleared_caps[NCAPINTS]; + +void __init setup_clear_cpu_cap(unsigned int cap) +{ + __clear_bit(cap, boot_cpu_data.x86_capability); + __set_bit(cap, cleared_caps); +} + static void default_init(struct cpuinfo_x86 * c) { /* Not much we can do here... */ @@ -235,6 +243,7 @@ static void __init early_cpu_detect(void if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; + cap0 &= ~cleared_caps[0]; if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_capability[0] = cap0; /* Added for Xen bootstrap */ @@ -329,6 +338,7 @@ void __cpuinit identify_cpu(struct cpuin c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; + c->x86_num_siblings = 1; c->x86_clflush_size = 0; memset(&c->x86_capability, 0, sizeof c->x86_capability); @@ -395,6 +405,9 @@ void __cpuinit identify_cpu(struct cpuin if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); + for (i = 0 ; i < NCAPINTS ; ++i) + c->x86_capability[i] &= ~cleared_caps[i]; + /* If the model name is still unset, do table lookup. */ if ( !c->x86_model_id[0] ) { char *p; @@ -468,27 +481,27 @@ void __cpuinit detect_ht(struct cpuinfo_ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { + c->x86_num_siblings = (ebx & 0xff0000) >> 16; + + if (c->x86_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1 ) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); - smp_num_siblings = 1; + } else if (c->x86_num_siblings > 1 ) { + + if (c->x86_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", c->x86_num_siblings); + c->x86_num_siblings = 1; return; } - index_msb = get_count_order(smp_num_siblings); + index_msb = get_count_order(c->x86_num_siblings); phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; + c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(c->x86_num_siblings) ; core_bits = get_count_order(c->x86_max_cores); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Wed Jan 28 13:06:45 2009 +0900 @@ -14,7 +14,6 @@ DEFINE_PER_CPU(cpu_banks_t, mce_banks_ow static int nr_intel_ext_msrs = 0; static int cmci_support = 0; -extern int firstbank; #ifdef CONFIG_X86_MCE_THERMAL static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) @@ -121,7 +120,7 @@ static inline void intel_get_extended_ms if (nr_intel_ext_msrs == 0) return; - /*this function will called when CAP(9).MCG_EXT_P = 1*/ + /* this function will called when CAP(9).MCG_EXT_P = 1 */ memset(mc_ext, 0, sizeof(struct mcinfo_extended)); mc_ext->common.type = MC_TYPE_EXTENDED; mc_ext->common.size = sizeof(mc_ext); @@ -157,7 +156,7 @@ static inline void intel_get_extended_ms * 3. called in polling handler * It will generate a new mc_info item if found CE/UC errors. DOM0 is the * consumer. -*/ + */ static struct mc_info *machine_check_poll(int calltype) { struct mc_info *mi = NULL; @@ -174,9 +173,9 @@ static struct mc_info *machine_check_pol memset(&mcg, 0, sizeof(mcg)); mcg.common.type = MC_TYPE_GLOBAL; mcg.common.size = sizeof(mcg); - /*If called from cpu-reset check, don't need to fill them. - *If called from cmci context, we'll try to fill domid by memory addr - */ + /* If called from cpu-reset check, don't need to fill them. + * If called from cmci context, we'll try to fill domid by memory addr + */ mcg.mc_domid = -1; mcg.mc_vcpuid = -1; if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET) @@ -186,12 +185,13 @@ static struct mc_info *machine_check_pol mcg.mc_socketid = phys_proc_id[cpu]; mcg.mc_coreid = cpu_core_id[cpu]; mcg.mc_apicid = cpu_physical_id(cpu); - mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); + mcg.mc_core_threadid = + mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus); for ( i = 0; i < nr_mce_banks; i++ ) { struct mcinfo_bank mcb; - /*For CMCI, only owners checks the owned MSRs*/ + /* For CMCI, only owners checks the owned MSRs */ if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) && (calltype & MC_FLAG_CMCI) ) continue; @@ -240,7 +240,7 @@ static struct mc_info *machine_check_pol x86_mcinfo_add(mi, &mcb); nr_unit++; add_taint(TAINT_MACHINE_CHECK); - /*Clear state for this bank */ + /* Clear state for this bank */ wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0); printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%"PRIx64"]\n", i, cpu, status); @@ -249,12 +249,12 @@ static struct mc_info *machine_check_pol mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid); } - /*if pcc = 1, uc must be 1*/ + /* if pcc = 1, uc must be 1 */ if (pcc) mcg.mc_flags |= MC_FLAG_UNCORRECTABLE; else if (uc) mcg.mc_flags |= MC_FLAG_RECOVERABLE; - else /*correctable*/ + else /* correctable */ mcg.mc_flags |= MC_FLAG_CORRECTABLE; if (nr_unit && nr_intel_ext_msrs && @@ -264,7 +264,7 @@ static struct mc_info *machine_check_pol } if (nr_unit) x86_mcinfo_add(mi, &mcg); - /*Clear global state*/ + /* Clear global state */ return mi; } @@ -541,8 +541,7 @@ static void mce_init(void) * This also clears all registers*/ mi = machine_check_poll(MC_FLAG_RESET); - /*in the boot up stage, not expect inject to DOM0, but go print out - */ + /* in the boot up stage, don't inject to DOM0, but print out */ if (mi) x86_mcinfo_dump(mi); @@ -553,22 +552,22 @@ static void mce_init(void) for (i = firstbank; i < nr_mce_banks; i++) { - /*Some banks are shared across cores, use MCi_CTRL to judge whether - * this bank has been initialized by other cores already.*/ + /* Some banks are shared across cores, use MCi_CTRL to judge whether + * this bank has been initialized by other cores already. */ rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h); - if (!l & !h) + if (!(l | h)) { - /*if ctl is 0, this bank is never initialized*/ + /* if ctl is 0, this bank is never initialized */ printk(KERN_DEBUG "mce_init: init bank%d\n", i); wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff); wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0); - } - } - if (firstbank) /*if cmci enabled, firstbank = 0*/ + } + } + if (firstbank) /* if cmci enabled, firstbank = 0 */ wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); } -/*p4/p6 faimily has similar MCA initialization process*/ +/* p4/p6 family have similar MCA initialization process */ void intel_mcheck_init(struct cpuinfo_x86 *c) { mce_cap_init(c); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/domain.c Wed Jan 28 13:06:45 2009 +0900 @@ -143,7 +143,7 @@ void dump_pageframe_info(struct domain * { list_for_each_entry ( page, &d->page_list, list ) { - printk(" DomPage %p: caf=%08x, taf=%" PRtype_info "\n", + printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n", _p(page_to_mfn(page)), page->count_info, page->u.inuse.type_info); } @@ -156,7 +156,7 @@ void dump_pageframe_info(struct domain * list_for_each_entry ( page, &d->xenpage_list, list ) { - printk(" XenPage %p: caf=%08x, taf=%" PRtype_info "\n", + printk(" XenPage %p: caf=%08lx, taf=%" PRtype_info "\n", _p(page_to_mfn(page)), page->count_info, page->u.inuse.type_info); } @@ -405,8 +405,17 @@ int arch_domain_create(struct domain *d, if ( d->arch.ioport_caps == NULL ) goto fail; +#ifdef __i386__ if ( (d->shared_info = alloc_xenheap_page()) == NULL ) goto fail; +#else + pg = alloc_domheap_page( + NULL, MEMF_node(domain_to_node(d)) | MEMF_bits(32)); + if ( pg == NULL ) + goto fail; + pg->count_info |= PGC_xen_heap; + d->shared_info = page_to_virt(pg); +#endif clear_page(d->shared_info); share_xen_page_with_guest( diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/domain_build.c Wed Jan 28 13:06:45 2009 +0900 @@ -19,6 +19,7 @@ #include <xen/iocap.h> #include <xen/bitops.h> #include <xen/compat.h> +#include <xen/libelf.h> #include <asm/regs.h> #include <asm/system.h> #include <asm/io.h> @@ -30,7 +31,9 @@ #include <asm/e820.h> #include <public/version.h> -#include <public/libelf.h> + +int __init bzimage_parse( + char *output, char **image_start, unsigned long *image_len); extern unsigned long initial_images_nrpages(void); extern void discard_initial_images(void); @@ -196,7 +199,8 @@ static void __init process_dom0_ioports_ int __init construct_dom0( struct domain *d, - unsigned long _image_start, unsigned long image_len, + unsigned long _image_base, + unsigned long _image_start, unsigned long image_len, unsigned long _initrd_start, unsigned long initrd_len, char *cmdline) { @@ -213,9 +217,11 @@ int __init construct_dom0( struct vcpu *v = d->vcpu[0]; unsigned long long value; #if defined(__i386__) + char *image_base = (char *)_image_base; /* use lowmem mappings */ char *image_start = (char *)_image_start; /* use lowmem mappings */ char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ #elif defined(__x86_64__) + char *image_base = __va(_image_base); char *image_start = __va(_image_start); char *initrd_start = __va(_initrd_start); #endif @@ -262,6 +268,9 @@ int __init construct_dom0( nr_pages = compute_dom0_nr_pages(); + if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 ) + return rc; + if ( (rc = elf_init(&elf, image_start, image_len)) != 0 ) return rc; #ifdef VERBOSE @@ -341,6 +350,12 @@ int __init construct_dom0( #endif } + if ( (parms.p2m_base != UNSET_ADDR) && elf_32bit(&elf) ) + { + printk(XENLOG_WARNING "P2M table base ignored\n"); + parms.p2m_base = UNSET_ADDR; + } + domain_set_alloc_bitsize(d); /* @@ -359,6 +374,8 @@ int __init construct_dom0( vphysmap_end = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ? sizeof(unsigned long) : sizeof(unsigned int))); + if ( parms.p2m_base != UNSET_ADDR ) + vphysmap_end = vphysmap_start; vstartinfo_start = round_pgup(vphysmap_end); vstartinfo_end = (vstartinfo_start + sizeof(struct start_info) + @@ -400,6 +417,11 @@ int __init construct_dom0( /* Ensure that our low-memory 1:1 mapping covers the allocation. */ page = alloc_domheap_pages(d, order, MEMF_bits(30)); #else + if ( parms.p2m_base != UNSET_ADDR ) + { + vphysmap_start = parms.p2m_base; + vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long); + } page = alloc_domheap_pages(d, order, 0); #endif if ( page == NULL ) @@ -429,14 +451,6 @@ int __init construct_dom0( _p(vstack_start), _p(vstack_end), _p(v_start), _p(v_end)); printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry)); - - if ( ((v_end - v_start)>>PAGE_SHIFT) > nr_pages ) - { - printk("Initial guest OS requires too much space\n" - "(%luMB is greater than %luMB limit)\n", - (v_end-v_start)>>20, nr_pages>>(20-PAGE_SHIFT)); - return -ENOMEM; - } mpt_alloc = (vpt_start - v_start) + (unsigned long)pfn_to_paddr(alloc_spfn); @@ -748,8 +762,109 @@ int __init construct_dom0( snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s", elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : ""); + count = d->tot_pages; +#ifdef __x86_64__ + /* Set up the phys->machine table if not part of the initial mapping. */ + if ( parms.p2m_base != UNSET_ADDR ) + { + unsigned long va = vphysmap_start; + + if ( v_start <= vphysmap_end && vphysmap_start <= v_end ) + panic("DOM0 P->M table overlaps initial mapping"); + + while ( va < vphysmap_end ) + { + if ( d->tot_pages + ((round_pgup(vphysmap_end) - va) + >> PAGE_SHIFT) + 3 > nr_pages ) + panic("Dom0 allocation too small for initial P->M table.\n"); + + l4tab = l4start + l4_table_offset(va); + if ( !l4e_get_intpte(*l4tab) ) + { + page = alloc_domheap_page(d, 0); + if ( !page ) + break; + /* No mapping, PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 2; + page->u.inuse.type_info = + PGT_l3_page_table | PGT_validated | 1; + clear_page(page_to_virt(page)); + *l4tab = l4e_from_page(page, L4_PROT); + } + l3tab = page_to_virt(l4e_get_page(*l4tab)); + l3tab += l3_table_offset(va); + if ( !l3e_get_intpte(*l3tab) ) + { + if ( cpu_has_page1gb && + !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) && + vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) && + (page = alloc_domheap_pages(d, + L3_PAGETABLE_SHIFT - + PAGE_SHIFT, + 0)) != NULL ) + { + *l3tab = l3e_from_page(page, + L1_PROT|_PAGE_DIRTY|_PAGE_PSE); + va += 1UL << L3_PAGETABLE_SHIFT; + continue; + } + if ( (page = alloc_domheap_page(d, 0)) == NULL ) + break; + else + { + /* No mapping, PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 2; + page->u.inuse.type_info = + PGT_l2_page_table | PGT_validated | 1; + clear_page(page_to_virt(page)); + *l3tab = l3e_from_page(page, L3_PROT); + } + } + l2tab = page_to_virt(l3e_get_page(*l3tab)); + l2tab += l2_table_offset(va); + if ( !l2e_get_intpte(*l2tab) ) + { + if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) && + vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) && + (page = alloc_domheap_pages(d, + L2_PAGETABLE_SHIFT - + PAGE_SHIFT, + 0)) != NULL ) + { + *l2tab = l2e_from_page(page, + L1_PROT|_PAGE_DIRTY|_PAGE_PSE); + va += 1UL << L2_PAGETABLE_SHIFT; + continue; + } + if ( (page = alloc_domheap_page(d, 0)) == NULL ) + break; + else + { + /* No mapping, PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 2; + page->u.inuse.type_info = + PGT_l1_page_table | PGT_validated | 1; + clear_page(page_to_virt(page)); + *l2tab = l2e_from_page(page, L2_PROT); + } + } + l1tab = page_to_virt(l2e_get_page(*l2tab)); + l1tab += l1_table_offset(va); + BUG_ON(l1e_get_intpte(*l1tab)); + page = alloc_domheap_page(d, 0); + if ( !page ) + break; + *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY); + va += PAGE_SIZE; + va &= PAGE_MASK; + } + if ( !page ) + panic("Not enough RAM for DOM0 P->M table.\n"); + } +#endif + /* Write the phys->machine and machine->phys table entries. */ - for ( pfn = 0; pfn < d->tot_pages; pfn++ ) + for ( pfn = 0; pfn < count; pfn++ ) { mfn = pfn + alloc_spfn; #ifndef NDEBUG @@ -763,6 +878,26 @@ int __init construct_dom0( ((unsigned int *)vphysmap_start)[pfn] = mfn; set_gpfn_from_mfn(mfn, pfn); } + si->first_p2m_pfn = pfn; + si->nr_p2m_frames = d->tot_pages - count; + list_for_each_entry ( page, &d->page_list, list ) + { + mfn = page_to_mfn(page); + if ( get_gpfn_from_mfn(mfn) >= count ) + { + BUG_ON(is_pv_32bit_domain(d)); + if ( !page->u.inuse.type_info && + !get_page_and_type(page, d, PGT_writable_page) ) + BUG(); + ((unsigned long *)vphysmap_start)[pfn] = mfn; + set_gpfn_from_mfn(mfn, pfn); + ++pfn; +#ifndef NDEBUG + ++alloc_epfn; +#endif + } + } + BUG_ON(pfn != d->tot_pages); while ( pfn < nr_pages ) { if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL ) diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/hvm/hvm.c Wed Jan 28 13:06:45 2009 +0900 @@ -20,6 +20,7 @@ */ #include <xen/config.h> +#include <xen/ctype.h> #include <xen/init.h> #include <xen/lib.h> #include <xen/trace.h> @@ -272,6 +273,10 @@ static int hvm_print_line( char c = *val; BUG_ON(bytes != 1); + + /* Accept only printable characters, newline, and horizontal tab. */ + if ( !isprint(c) && (c != '\n') && (c != '\t') ) + return X86EMUL_OKAY; spin_lock(&hd->pbuf_lock); hd->pbuf[hd->pbuf_idx++] = c; @@ -1503,7 +1508,15 @@ static enum hvm_copy_result __hvm_copy( if ( flags & HVMCOPY_to_guest ) { - if ( p2mt != p2m_ram_ro ) + if ( p2mt == p2m_ram_ro ) + { + static unsigned long lastpage; + if ( xchg(&lastpage, gfn) != gfn ) + gdprintk(XENLOG_DEBUG, "guest attempted write to read-only" + " memory page. gfn=%#lx, mfn=%#lx\n", + gfn, mfn); + } + else { memcpy(p, buf, count); paging_mark_dirty(curr->domain, mfn); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/mtrr.c --- a/xen/arch/x86/hvm/mtrr.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/hvm/mtrr.c Wed Jan 28 13:06:45 2009 +0900 @@ -702,12 +702,15 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save 1, HVMSR_PER_VCPU); uint8_t epte_get_entry_emt( - struct domain *d, unsigned long gfn, unsigned long mfn) + struct domain *d, unsigned long gfn, + unsigned long mfn, uint8_t *igmt, int direct_mmio) { uint8_t gmtrr_mtype, hmtrr_mtype; uint32_t type; struct vcpu *v = current; + *igmt = 0; + if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) ) return MTRR_TYPE_WRBACK; @@ -722,6 +725,21 @@ uint8_t epte_get_entry_emt( if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) ) return type; + + if ( !iommu_enabled ) + { + *igmt = 1; + return MTRR_TYPE_WRBACK; + } + + if ( direct_mmio ) + return MTRR_TYPE_UNCACHABLE; + + if ( iommu_snoop ) + { + *igmt = 1; + return MTRR_TYPE_WRBACK; + } gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)); hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT)); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/vmsi.c --- a/xen/arch/x86/hvm/vmsi.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/hvm/vmsi.c Wed Jan 28 13:06:45 2009 +0900 @@ -134,7 +134,7 @@ int vmsi_deliver(struct domain *d, int p "vector=%x trig_mode=%x\n", dest, dest_mode, delivery_mode, vector, trig_mode); - if ( !test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags) ) + if ( !( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) ) { gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq); return 0; diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Jan 28 13:06:45 2009 +0900 @@ -167,14 +167,15 @@ static void vmx_init_vmcs_config(void) #endif min = VM_EXIT_ACK_INTR_ON_EXIT; - opt = 0; + opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT; #ifdef __x86_64__ min |= VM_EXIT_IA32E_MODE; #endif _vmx_vmexit_control = adjust_vmx_controls( min, opt, MSR_IA32_VMX_EXIT_CTLS); - min = opt = 0; + min = 0; + opt = VM_ENTRY_LOAD_GUEST_PAT; _vmx_vmentry_control = adjust_vmx_controls( min, opt, MSR_IA32_VMX_ENTRY_CTLS); @@ -519,8 +520,6 @@ static int construct_vmcs(struct vcpu *v /* VMCS controls. */ __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); - __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control); - __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control); v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; @@ -534,12 +533,18 @@ static int construct_vmcs(struct vcpu *v else { v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; + vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT | + VM_EXIT_LOAD_HOST_PAT); + vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT; } /* Do not enable Monitor Trap Flag unless start single step debug */ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); + __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control); + __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control); + if ( cpu_has_vmx_secondary_exec_control ) __vmwrite(SECONDARY_VM_EXEC_CONTROL, v->arch.hvm_vmx.secondary_exec_control); @@ -561,6 +566,8 @@ static int construct_vmcs(struct vcpu *v vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS); vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP); + if ( cpu_has_vmx_pat && paging_mode_hap(d) ) + vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT); } /* I/O access bitmap. */ @@ -690,6 +697,21 @@ static int construct_vmcs(struct vcpu *v v->arch.hvm_vmx.vpid = v->domain->arch.hvm_domain.vmx.vpid_base + v->vcpu_id; __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid); + } + + if ( cpu_has_vmx_pat && paging_mode_hap(d) ) + { + u64 host_pat, guest_pat; + + rdmsrl(MSR_IA32_CR_PAT, host_pat); + guest_pat = 0x7040600070406ULL; + + __vmwrite(HOST_PAT, host_pat); + __vmwrite(GUEST_PAT, guest_pat); +#ifdef __i386__ + __vmwrite(HOST_PAT_HIGH, host_pat >> 32); + __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32); +#endif } vmx_vmcs_exit(v); @@ -989,6 +1011,8 @@ void vmcs_dump_vcpu(struct vcpu *v) vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR); vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT); vmx_dump_sel("TR", GUEST_TR_SELECTOR); + printk("Guest PAT = 0x%08x%08x\n", + (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT)); x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32; x |= (uint32_t)vmr(TSC_OFFSET); printk("TSC Offset = %016llx\n", x); @@ -1027,6 +1051,8 @@ void vmcs_dump_vcpu(struct vcpu *v) (unsigned long long)vmr(HOST_SYSENTER_ESP), (int)vmr(HOST_SYSENTER_CS), (unsigned long long)vmr(HOST_SYSENTER_EIP)); + printk("Host PAT = 0x%08x%08x\n", + (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT)); printk("*** Control State ***\n"); printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/io_apic.c Wed Jan 28 13:06:45 2009 +0900 @@ -84,7 +84,9 @@ int disable_timer_pin_1 __initdata; static struct irq_pin_list { int apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; +} irq_2_pin[PIN_MAP_SIZE] = { + [0 ... PIN_MAP_SIZE-1].pin = -1 +}; static int irq_2_pin_free_entry = NR_IRQS; int vector_irq[NR_VECTORS] __read_mostly = { @@ -1017,11 +1019,6 @@ static void __init enable_IO_APIC(void) int i8259_apic, i8259_pin; int i, apic; unsigned long flags; - - for (i = 0; i < PIN_MAP_SIZE; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } /* Initialise dynamic irq_2_pin free list. */ for (i = NR_IRQS; i < PIN_MAP_SIZE; i++) @@ -1557,11 +1554,14 @@ static unsigned int startup_msi_vector(u static void ack_msi_vector(unsigned int vector) { - ack_APIC_irq(); + if ( msi_maskable_irq(irq_desc[vector].msi_desc) ) + ack_APIC_irq(); /* ACKTYPE_NONE */ } static void end_msi_vector(unsigned int vector) { + if ( !msi_maskable_irq(irq_desc[vector].msi_desc) ) + ack_APIC_irq(); /* ACKTYPE_EOI */ } static void shutdown_msi_vector(unsigned int vector) diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/irq.c Wed Jan 28 13:06:45 2009 +0900 @@ -491,7 +491,7 @@ int pirq_guest_unmask(struct domain *d) } extern int ioapic_ack_new; -int pirq_acktype(struct domain *d, int irq) +static int pirq_acktype(struct domain *d, int irq) { irq_desc_t *desc; unsigned int vector; @@ -705,6 +705,10 @@ static irq_guest_action_t *__pirq_guest_ spin_lock_irq(&desc->lock); } break; + case ACKTYPE_NONE: + stop_timer(&irq_guest_eoi_timer[vector]); + _irq_guest_eoi(desc); + break; } /* @@ -853,10 +857,6 @@ int map_domain_pirq( ASSERT(spin_is_locked(&pcidevs_lock)); ASSERT(spin_is_locked(&d->event_lock)); - /* XXX Until pcidev and msi locking is fixed. */ - if ( type == MAP_PIRQ_TYPE_MSI ) - return -EINVAL; - if ( !IS_PRIV(current->domain) ) return -EPERM; @@ -867,8 +867,8 @@ int map_domain_pirq( return -EINVAL; } - old_vector = d->arch.pirq_vector[pirq]; - old_pirq = d->arch.vector_pirq[vector]; + old_vector = domain_irq_to_vector(d, pirq); + old_pirq = domain_vector_to_irq(d, vector); if ( (old_vector && (old_vector != vector) ) || (old_pirq && (old_pirq != pirq)) ) @@ -891,6 +891,10 @@ int map_domain_pirq( if ( type == MAP_PIRQ_TYPE_MSI ) { struct msi_info *msi = (struct msi_info *)data; + + ret = -ENODEV; + if ( !cpu_has_apic ) + goto done; pdev = pci_get_pdev(msi->bus, msi->devfn); ret = pci_enable_msi(msi, &msi_desc); @@ -937,7 +941,7 @@ int unmap_domain_pirq(struct domain *d, ASSERT(spin_is_locked(&pcidevs_lock)); ASSERT(spin_is_locked(&d->event_lock)); - vector = d->arch.pirq_vector[pirq]; + vector = domain_irq_to_vector(d, pirq); if ( vector <= 0 ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n", @@ -958,7 +962,7 @@ int unmap_domain_pirq(struct domain *d, spin_lock_irqsave(&desc->lock, flags); - BUG_ON(vector != d->arch.pirq_vector[pirq]); + BUG_ON(vector != domain_irq_to_vector(d, pirq)); if ( msi_desc ) teardown_msi_vector(vector); diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/machine_kexec.c --- a/xen/arch/x86/machine_kexec.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/machine_kexec.c Wed Jan 28 13:06:45 2009 +0900 @@ -150,6 +150,9 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(dom_xen); VMCOREINFO_SYMBOL(dom_io); +#ifdef CONFIG_X86_32 + VMCOREINFO_SYMBOL(xenheap_phys_end); +#endif #ifdef CONFIG_X86_PAE VMCOREINFO_SYMBOL_ALIAS(pgd_l3, idle_pg_table); #endif diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode.c --- a/xen/arch/x86/microcode.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/microcode.c Wed Jan 28 13:06:45 2009 +0900 @@ -49,31 +49,22 @@ struct microcode_info { char buffer[1]; }; -static void microcode_fini_cpu(int cpu) +static void __microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + xfree(uci->mc.mc_valid); + memset(uci, 0, sizeof(*uci)); +} + +static void microcode_fini_cpu(int cpu) +{ spin_lock(µcode_mutex); - xfree(uci->mc.valid_mc); - uci->mc.valid_mc = NULL; - uci->valid = 0; + __microcode_fini_cpu(cpu); spin_unlock(µcode_mutex); } -static int collect_cpu_info(int cpu) -{ - int err = 0; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - memset(uci, 0, sizeof(*uci)); - err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig); - if ( !err ) - uci->valid = 1; - - return err; -} - -static int microcode_resume_cpu(int cpu) +int microcode_resume_cpu(int cpu) { int err = 0; struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -81,7 +72,7 @@ static int microcode_resume_cpu(int cpu) gdprintk(XENLOG_INFO, "microcode: CPU%d resumed\n", cpu); - if ( !uci->mc.valid_mc ) + if ( !uci->mc.mc_valid ) return -EIO; /* @@ -95,16 +86,15 @@ static int microcode_resume_cpu(int cpu) return err; } - if ( memcmp(&nsig, &uci->cpu_sig, sizeof(nsig)) ) + if ( microcode_ops->microcode_resume_match(cpu, &nsig) ) + { + return microcode_ops->apply_microcode(cpu); + } + else { microcode_fini_cpu(cpu); - /* Should we look for a new ucode here? */ return -EIO; } - - err = microcode_ops->apply_microcode(cpu); - - return err; } static int microcode_update_cpu(const void *buf, size_t size) @@ -115,20 +105,11 @@ static int microcode_update_cpu(const vo spin_lock(µcode_mutex); - /* - * Check if the system resume is in progress (uci->valid != NULL), - * otherwise just request a firmware: - */ - if ( uci->valid ) - { - err = microcode_resume_cpu(cpu); - } + err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig); + if ( likely(!err) ) + err = microcode_ops->cpu_request_microcode(cpu, buf, size); else - { - err = collect_cpu_info(cpu); - if ( !err && uci->valid ) - err = microcode_ops->cpu_request_microcode(cpu, buf, size); - } + __microcode_fini_cpu(cpu); spin_unlock(µcode_mutex); @@ -153,7 +134,6 @@ static long do_microcode_update(void *_i error = info->error; xfree(info); return error; - } int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len) diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode_amd.c --- a/xen/arch/x86/microcode_amd.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/microcode_amd.c Wed Jan 28 13:06:45 2009 +0900 @@ -38,21 +38,16 @@ #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define DWSIZE (sizeof(uint32_t)) -/* For now we support a fixed ucode total size only */ -#define get_totalsize(mc) \ - ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ - + MC_HEADER_SIZE) /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); struct equiv_cpu_entry *equiv_cpu_table; -static long install_equiv_cpu_table(const void *, uint32_t, long); - static int collect_cpu_info(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data[cpu]; + uint32_t dummy; memset(csig, 0, sizeof(*csig)); @@ -60,13 +55,10 @@ static int collect_cpu_info(int cpu, str { printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", cpu); - return -1; - } - - asm volatile ( - "movl %1, %%ecx; rdmsr" - : "=a" (csig->rev) - : "i" (MSR_AMD_PATCHLEVEL) : "ecx" ); + return -EINVAL; + } + + rdmsr(MSR_AMD_PATCHLEVEL, csig->rev, dummy); printk(KERN_INFO "microcode: collect_cpu_info: patch_id=0x%x\n", csig->rev); @@ -74,29 +66,17 @@ static int collect_cpu_info(int cpu, str return 0; } -static int get_matching_microcode(void *mc, int cpu) +static int microcode_fits(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_amd *mc_header = mc; - unsigned long total_size = get_totalsize(mc_header); - void *new_mc; unsigned int current_cpu_id; - unsigned int equiv_cpu_id = 0x00; + unsigned int equiv_cpu_id = 0x0; unsigned int i; /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - /* This is a tricky part. We might be called from a write operation - * to the device file instead of the usual process of firmware - * loading. This routine needs to be able to distinguish both - * cases. This is done by checking if there already is a equivalent - * CPU table installed. If not, we're written through - * /dev/cpu/microcode. - * Since we ignore all checks. The error case in which going through - * firmware loading and that table is not loaded has already been - * checked earlier. - */ if ( equiv_cpu_table == NULL ) { printk(KERN_INFO "microcode: CPU%d microcode update with " @@ -111,7 +91,7 @@ static int get_matching_microcode(void * { if ( current_cpu_id == equiv_cpu_table[i].installed_cpu ) { - equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; + equiv_cpu_id = equiv_cpu_table[i].equiv_cpu & 0xffff; break; } } @@ -119,171 +99,136 @@ static int get_matching_microcode(void * if ( !equiv_cpu_id ) { printk(KERN_ERR "microcode: CPU%d cpu_id " - "not found in equivalent cpu table \n", cpu); - return 0; - } - - if ( (mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff) ) - { - printk(KERN_INFO - "microcode: CPU%d patch does not match " - "(patch is %x, cpu extended is %x) \n", - cpu, mc_header->processor_rev_id[0], - (equiv_cpu_id & 0xff)); - return 0; - } - - if ( (mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff) ) + "not found in equivalent cpu table\n", cpu); + return -EINVAL; + } + + if ( (mc_header->processor_rev_id) != equiv_cpu_id ) { printk(KERN_INFO "microcode: CPU%d patch does not match " "(patch is %x, cpu base id is %x) \n", - cpu, mc_header->processor_rev_id[1], - ((equiv_cpu_id >> 16) & 0xff)); - return 0; + cpu, mc_header->processor_rev_id, equiv_cpu_id); + return -EINVAL; } if ( mc_header->patch_id <= uci->cpu_sig.rev ) - return 0; + return -EINVAL; printk(KERN_INFO "microcode: CPU%d found a matching microcode " "update with version 0x%x (current=0x%x)\n", cpu, mc_header->patch_id, uci->cpu_sig.rev); - out: - new_mc = xmalloc_bytes(UCODE_MAX_SIZE); - if ( new_mc == NULL ) - { - printk(KERN_ERR "microcode: error, can't allocate memory\n"); - return -ENOMEM; - } - memset(new_mc, 0, UCODE_MAX_SIZE); - - /* free previous update file */ - xfree(uci->mc.mc_amd); - - memcpy(new_mc, mc, total_size); - - uci->mc.mc_amd = new_mc; - return 1; +out: + return 0; } static int apply_microcode(int cpu) { unsigned long flags; - uint32_t eax, edx, rev; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - uint64_t addr; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + uint32_t rev, dummy; + struct microcode_amd *mc_amd = uci->mc.mc_amd; /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if ( uci->mc.mc_amd == NULL ) + BUG_ON(raw_smp_processor_id() != cpu); + + if ( mc_amd == NULL ) return -EINVAL; spin_lock_irqsave(µcode_update_lock, flags); - addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code; - edx = (uint32_t)(addr >> 32); - eax = (uint32_t)addr; - - asm volatile ( - "movl %0, %%ecx; wrmsr" : - : "i" (MSR_AMD_PATCHLOADER), "a" (eax), "d" (edx) : "ecx" ); + wrmsrl(MSR_AMD_PATCHLOADER, (unsigned long)&mc_amd->hdr.data_code); /* get patch id after patching */ - asm volatile ( - "movl %1, %%ecx; rdmsr" - : "=a" (rev) - : "i" (MSR_AMD_PATCHLEVEL) : "ecx"); + rdmsr(MSR_AMD_PATCHLEVEL, rev, dummy); spin_unlock_irqrestore(µcode_update_lock, flags); /* check current patch id and patch's id for match */ - if ( rev != uci->mc.mc_amd->hdr.patch_id ) + if ( rev != mc_amd->hdr.patch_id ) { printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", cpu_num, - uci->mc.mc_amd->hdr.patch_id, rev); + "0x%x to 0x%x failed\n", cpu, + mc_amd->hdr.patch_id, rev); return -EIO; } printk("microcode: CPU%d updated from revision " "0x%x to 0x%x \n", - cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id); + cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id); uci->cpu_sig.rev = rev; return 0; } -static long get_next_ucode_from_buffer_amd(void **mc, const void *buf, - unsigned long size, long offset) +static int get_next_ucode_from_buffer_amd(void *mc, const void *buf, + size_t size, unsigned long *offset) { struct microcode_header_amd *mc_header; - unsigned long total_size; - const uint8_t *buf_pos = buf; + size_t total_size; + const uint8_t *bufp = buf; + unsigned long off; + + off = *offset; /* No more data */ - if ( offset >= size ) - return 0; - - if ( buf_pos[offset] != UCODE_UCODE_TYPE ) + if ( off >= size ) + return 1; + + if ( bufp[off] != UCODE_UCODE_TYPE ) { printk(KERN_ERR "microcode: error! " "Wrong microcode payload type field\n"); return -EINVAL; } - mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]); - - total_size = (unsigned long) (buf_pos[offset+4] + - (buf_pos[offset+5] << 8)); + mc_header = (struct microcode_header_amd *)(&bufp[off+8]); + + total_size = (unsigned long) (bufp[off+4] + (bufp[off+5] << 8)); printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n", - size, total_size, offset); - - if ( (offset + total_size) > size ) + (unsigned long)size, total_size, off); + + if ( (off + total_size) > size ) { printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); return -EINVAL; } - *mc = xmalloc_bytes(UCODE_MAX_SIZE); - if ( *mc == NULL ) - { - printk(KERN_ERR "microcode: error! " - "Can not allocate memory for microcode patch\n"); - return -ENOMEM; - } - - memset(*mc, 0, UCODE_MAX_SIZE); - memcpy(*mc, (const void *)(buf + offset + 8), total_size); - - return offset + total_size + 8; -} - -static long install_equiv_cpu_table(const void *buf, - uint32_t size, long offset) + memset(mc, 0, UCODE_MAX_SIZE); + memcpy(mc, (const void *)(&bufp[off + 8]), total_size); + + *offset = off + total_size + 8; + + return 0; +} + +static int install_equiv_cpu_table(const void *buf, uint32_t size, + unsigned long *offset) { const uint32_t *buf_pos = buf; + unsigned long off; + + off = *offset; + *offset = 0; /* No more data */ - if ( offset >= size ) - return 0; + if ( off >= size ) + return -EINVAL; if ( buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE ) { printk(KERN_ERR "microcode: error! " - "Wrong microcode equivalnet cpu table type field\n"); - return 0; + "Wrong microcode equivalent cpu table type field\n"); + return -EINVAL; } if ( size == 0 ) { printk(KERN_ERR "microcode: error! " "Wrong microcode equivalnet cpu table length\n"); - return 0; + return -EINVAL; } equiv_cpu_table = xmalloc_bytes(size); @@ -291,20 +236,24 @@ static long install_equiv_cpu_table(cons { printk(KERN_ERR "microcode: error, can't allocate " "memory for equiv CPU table\n"); - return 0; + return -ENOMEM; } memset(equiv_cpu_table, 0, size); memcpy(equiv_cpu_table, (const void *)&buf_pos[3], size); - return size + 12; /* add header length */ + *offset = size + 12; /* add header length */ + + return 0; } static int cpu_request_microcode(int cpu, const void *buf, size_t size) { const uint32_t *buf_pos; - long offset = 0; + unsigned long offset = 0; int error = 0; + int ret; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; void *mc; /* We should bind the task to the CPU */ @@ -319,41 +268,63 @@ static int cpu_request_microcode(int cpu return -EINVAL; } - offset = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), offset); - if ( !offset ) + error = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), &offset); + if ( error ) { printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); return -EINVAL; } - while ( (offset = - get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0 ) - { - error = get_matching_microcode(mc, cpu); - if ( error < 0 ) + mc = xmalloc_bytes(UCODE_MAX_SIZE); + if ( mc == NULL ) + { + printk(KERN_ERR "microcode: error! " + "Can not allocate memory for microcode patch\n"); + error = -ENOMEM; + goto out; + } + + /* implicitely validates uci->mc.mc_valid */ + uci->mc.mc_amd = mc; + + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) == 0) + { + error = microcode_fits(mc, cpu); + if (error != 0) + continue; + + error = apply_microcode(cpu); + if (error == 0) break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if ( error == 1 ) - error = apply_microcode(cpu); + } + + /* On success keep the microcode patch for + * re-apply on resume. + */ + if (error) { xfree(mc); - } - if ( offset > 0 ) - { - xfree(mc); - xfree(equiv_cpu_table); - equiv_cpu_table = NULL; - } - if ( offset < 0 ) - error = offset; + mc = NULL; + } + uci->mc.mc_amd = mc; + +out: + xfree(equiv_cpu_table); + equiv_cpu_table = NULL; return error; } +static int microcode_resume_match(int cpu, struct cpu_signature *nsig) +{ + return 0; +} + static struct microcode_ops microcode_amd_ops = { - .get_matching_microcode = get_matching_microcode, + .microcode_resume_match = microcode_resume_match, .cpu_request_microcode = cpu_request_microcode, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode, diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode_intel.c --- a/xen/arch/x86/microcode_intel.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/microcode_intel.c Wed Jan 28 13:06:45 2009 +0900 @@ -64,6 +64,8 @@ static int collect_cpu_info(int cpu_num, struct cpuinfo_x86 *c = &cpu_data[cpu_num]; unsigned int val[2]; + BUG_ON(cpu_num != smp_processor_id()); + memset(csig, 0, sizeof(*csig)); if ( (c->x86_vendor != X86_VENDOR_INTEL) || (c->x86 < 6) || @@ -323,6 +325,7 @@ static int cpu_request_microcode(int cpu long offset = 0; int error = 0; void *mc; + unsigned int matching_count = 0; /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); @@ -341,7 +344,7 @@ static int cpu_request_microcode(int cpu */ if ( error == 1 ) { - apply_microcode(cpu); + matching_count++; error = 0; } xfree(mc); @@ -351,11 +354,22 @@ static int cpu_request_microcode(int cpu if ( offset < 0 ) error = offset; + if ( !error && matching_count ) + apply_microcode(cpu); + return error; } +static int microcode_resume_match(int cpu, struct cpu_signature *nsig) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + return (sigmatch(nsig->sig, uci->cpu_sig.sig, nsig->pf, uci->cpu_sig.pf) && + (uci->cpu_sig.rev > nsig->rev)); +} + static struct microcode_ops microcode_intel_ops = { - .get_matching_microcode = get_matching_microcode, + .microcode_resume_match = microcode_resume_match, .cpu_request_microcode = cpu_request_microcode, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode, diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/mm.c Wed Jan 28 13:06:45 2009 +0900 @@ -205,11 +205,6 @@ void __init init_frametable(void) } memset(frame_table, 0, nr_pages << PAGE_SHIFT); - -#if defined(__x86_64__) - for ( i = 0; i < max_page; i ++ ) - spin_lock_init(&frame_table[i].lock); -#endif } void __init arch_init_memory(void) @@ -290,15 +285,16 @@ void __init arch_init_memory(void) subarch_init_memory(); } -int memory_is_conventional_ram(paddr_t p) -{ +int page_is_conventional_ram(unsigned long mfn) +{ + uint64_t maddr = pfn_to_paddr(mfn); int i; for ( i = 0; i < e820.nr_map; i++ ) { if ( (e820.map[i].type == E820_RAM) && - (e820.map[i].addr <= p) && - (e820.map[i].size > p) ) + (e820.map[i].addr <= maddr) && + ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) ) return 1; } @@ -329,7 +325,7 @@ void share_xen_page_with_guest( page_set_owner(page, d); wmb(); /* install valid domain ptr before updating refcnt. */ - ASSERT(page->count_info == 0); + ASSERT((page->count_info & ~PGC_xen_heap) == 0); /* Only add to the allocation list if the domain isn't dying. */ if ( !d->is_dying ) @@ -738,8 +734,8 @@ get_page_from_l1e( else if ( pte_flags_to_cacheattr(l1f) != ((page->count_info >> PGC_cacheattr_base) & 7) ) { - uint32_t x, nx, y = page->count_info; - uint32_t cacheattr = pte_flags_to_cacheattr(l1f); + unsigned long x, nx, y = page->count_info; + unsigned long cacheattr = pte_flags_to_cacheattr(l1f); if ( is_xen_heap_page(page) ) { @@ -1013,7 +1009,8 @@ static int put_page_from_l2e(l2_pgentry_ { unsigned long mfn = l2e_get_pfn(l2e), m = mfn; int writeable = l2e_get_flags(l2e) & _PAGE_RW; - ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1))); + + ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1))); do { put_data_page(mfn_to_page(m), writeable); } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); @@ -1031,14 +1028,28 @@ static int put_page_from_l3e(l3_pgentry_ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, int partial, int preemptible) { - if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && - (l3e_get_pfn(l3e) != pfn) ) - { - if ( unlikely(partial > 0) ) - return __put_page_type(l3e_get_page(l3e), preemptible); - return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); - } - return 1; + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) + return 1; + +#ifdef __x86_64__ + if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) ) + { + unsigned long mfn = l3e_get_pfn(l3e); + int writeable = l3e_get_flags(l3e) & _PAGE_RW; + + ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))); + do { + put_data_page(mfn_to_page(mfn), writeable); + } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) ); + + return 0; + } +#endif + + if ( unlikely(partial > 0) ) + return __put_page_type(l3e_get_page(l3e), preemptible); + + return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); } #if CONFIG_PAGING_LEVELS >= 4 @@ -1523,24 +1534,31 @@ static int free_l4_table(struct page_inf #define free_l4_table(page, preemptible) (-EINVAL) #endif -static void page_lock(struct page_info *page) -{ -#if defined(__i386__) - while ( unlikely(test_and_set_bit(_PGC_locked, &page->count_info)) ) - while ( test_bit(_PGC_locked, &page->count_info) ) +static int page_lock(struct page_info *page) +{ + unsigned long x, nx; + + do { + while ( (x = page->u.inuse.type_info) & PGT_locked ) cpu_relax(); -#else - spin_lock(&page->lock); -#endif + nx = x + (1 | PGT_locked); + if ( !(x & PGT_validated) || + !(x & PGT_count_mask) || + !(nx & PGT_count_mask) ) + return 0; + } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x ); + + return 1; } static void page_unlock(struct page_info *page) { -#if defined(__i386__) - clear_bit(_PGC_locked, &page->count_info); -#else - spin_unlock(&page->lock); -#endif + unsigned long x, nx, y = page->u.inuse.type_info; + + do { + x = y; + nx = x - (1 | PGT_locked); + } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); } /* How to write an entry to the guest pagetables. @@ -1603,19 +1621,15 @@ static int mod_l1_entry(l1_pgentry_t *pl struct vcpu *curr = current; struct domain *d = curr->domain; unsigned long mfn; - struct page_info *l1pg = mfn_to_page(gl1mfn); p2m_type_t p2mt; int rc = 1; - page_lock(l1pg); - if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) - return page_unlock(l1pg), 0; + return 0; if ( unlikely(paging_mode_refcounts(d)) ) { rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad); - page_unlock(l1pg); return rc; } @@ -1624,13 +1638,12 @@ static int mod_l1_entry(l1_pgentry_t *pl /* Translate foreign guest addresses. */ mfn = mfn_x(gfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e), &p2mt)); if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) ) - return page_unlock(l1pg), 0; + return 0; ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e)); if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) ) { - page_unlock(l1pg); MEM_LOG("Bad L1 flags %x", l1e_get_flags(nl1e) & l1_disallow_mask(d)); return 0; @@ -1642,12 +1655,11 @@ static int mod_l1_entry(l1_pgentry_t *pl adjust_guest_l1e(nl1e, d); rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad); - page_unlock(l1pg); return rc; } if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) - return page_unlock(l1pg), 0; + return 0; adjust_guest_l1e(nl1e, d); if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, @@ -1660,11 +1672,9 @@ static int mod_l1_entry(l1_pgentry_t *pl else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad)) ) { - page_unlock(l1pg); return 0; } - page_unlock(l1pg); put_page_from_l1e(ol1e, d); return rc; } @@ -1674,13 +1684,13 @@ static int mod_l2_entry(l2_pgentry_t *pl static int mod_l2_entry(l2_pgentry_t *pl2e, l2_pgentry_t nl2e, unsigned long pfn, - unsigned long type, int preserve_ad) { l2_pgentry_t ol2e; struct vcpu *curr = current; struct domain *d = curr->domain; struct page_info *l2pg = mfn_to_page(pfn); + unsigned long type = l2pg->u.inuse.type_info; int rc = 1; if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) @@ -1689,16 +1699,13 @@ static int mod_l2_entry(l2_pgentry_t *pl return 0; } - page_lock(l2pg); - if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) ) - return page_unlock(l2pg), 0; + return 0; if ( l2e_get_flags(nl2e) & _PAGE_PRESENT ) { if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) { - page_unlock(l2pg); MEM_LOG("Bad L2 flags %x", l2e_get_flags(nl2e) & L2_DISALLOW_MASK); return 0; @@ -1709,12 +1716,11 @@ static int mod_l2_entry(l2_pgentry_t *pl { adjust_guest_l2e(nl2e, d); rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad); - page_unlock(l2pg); return rc; } if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) ) - return page_unlock(l2pg), 0; + return 0; adjust_guest_l2e(nl2e, d); if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, @@ -1727,11 +1733,9 @@ static int mod_l2_entry(l2_pgentry_t *pl else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad)) ) { - page_unlock(l2pg); return 0; } - page_unlock(l2pg); put_page_from_l2e(ol2e, pfn); return rc; } @@ -1746,7 +1750,6 @@ static int mod_l3_entry(l3_pgentry_t *pl l3_pgentry_t ol3e; struct vcpu *curr = current; struct domain *d = curr->domain; - struct page_info *l3pg = mfn_to_page(pfn); int rc = 0; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) @@ -1762,16 +1765,13 @@ static int mod_l3_entry(l3_pgentry_t *pl if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) return -EINVAL; - page_lock(l3pg); - if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) - return page_unlock(l3pg), -EFAULT; + return -EFAULT; if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) { if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) { - page_unlock(l3pg); MEM_LOG("Bad L3 flags %x", l3e_get_flags(nl3e) & l3_disallow_mask(d)); return -EINVAL; @@ -1782,13 +1782,12 @@ static int mod_l3_entry(l3_pgentry_t *pl { adjust_guest_l3e(nl3e, d); rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad); - page_unlock(l3pg); return rc ? 0 : -EFAULT; } rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible); if ( unlikely(rc < 0) ) - return page_unlock(l3pg), rc; + return rc; rc = 0; adjust_guest_l3e(nl3e, d); @@ -1802,7 +1801,6 @@ static int mod_l3_entry(l3_pgentry_t *pl else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad)) ) { - page_unlock(l3pg); return -EFAULT; } @@ -1814,7 +1812,6 @@ static int mod_l3_entry(l3_pgentry_t *pl pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); } - page_unlock(l3pg); put_page_from_l3e(ol3e, pfn, 0, 0); return rc; } @@ -1831,7 +1828,6 @@ static int mod_l4_entry(l4_pgentry_t *pl struct vcpu *curr = current; struct domain *d = curr->domain; l4_pgentry_t ol4e; - struct page_info *l4pg = mfn_to_page(pfn); int rc = 0; if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) @@ -1840,16 +1836,13 @@ static int mod_l4_entry(l4_pgentry_t *pl return -EINVAL; } - page_lock(l4pg); - if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) - return page_unlock(l4pg), -EFAULT; + return -EFAULT; if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) { if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) { - page_unlock(l4pg); MEM_LOG("Bad L4 flags %x", l4e_get_flags(nl4e) & L4_DISALLOW_MASK); return -EINVAL; @@ -1860,13 +1853,12 @@ static int mod_l4_entry(l4_pgentry_t *pl { adjust_guest_l4e(nl4e, d); rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad); - page_unlock(l4pg); return rc ? 0 : -EFAULT; } rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible); if ( unlikely(rc < 0) ) - return page_unlock(l4pg), rc; + return rc; rc = 0; adjust_guest_l4e(nl4e, d); @@ -1880,11 +1872,9 @@ static int mod_l4_entry(l4_pgentry_t *pl else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad)) ) { - page_unlock(l4pg); return -EFAULT; } - page_unlock(l4pg); put_page_from_l4e(ol4e, pfn, 0, 0); return rc; } @@ -1893,7 +1883,7 @@ static int mod_l4_entry(l4_pgentry_t *pl void put_page(struct page_info *page) { - u32 nx, x, y = page->count_info; + unsigned long nx, x, y = page->count_info; do { x = y; @@ -1911,36 +1901,30 @@ void put_page(struct page_info *page) int get_page(struct page_info *page, struct domain *domain) { - u32 x, nx, y = page->count_info; - u32 d, nd = page->u.inuse._domain; - u32 _domain = pickle_domptr(domain); + unsigned long x, y = page->count_info; do { - x = y; - nx = x + 1; - d = nd; + x = y; if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */ /* Keep one spare reference to be acquired by get_page_light(). */ - unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */ - unlikely(d != _domain) ) /* Wrong owner? */ - { - if ( !_shadow_mode_refcounts(domain) && !domain->is_dying ) - gdprintk(XENLOG_INFO, - "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" - PRtype_info "\n", - page_to_mfn(page), domain, unpickle_domptr(d), - x, page->u.inuse.type_info); - return 0; - } - asm volatile ( - LOCK_PREFIX "cmpxchg8b %2" - : "=d" (nd), "=a" (y), - "=m" (*(volatile u64 *)(&page->count_info)) - : "0" (d), "1" (x), "c" (d), "b" (nx) ); - } - while ( unlikely(nd != d) || unlikely(y != x) ); - - return 1; + unlikely(((x + 2) & PGC_count_mask) <= 1) ) /* Overflow? */ + goto fail; + } + while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x ); + + if ( likely(page_get_owner(page) == domain) ) + return 1; + + put_page(page); + + fail: + if ( !_shadow_mode_refcounts(domain) && !domain->is_dying ) + gdprintk(XENLOG_INFO, + "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%" + PRtype_info "\n", + page_to_mfn(page), domain, page_get_owner(page), + y, page->u.inuse.type_info); + return 0; } /* @@ -1953,7 +1937,7 @@ int get_page(struct page_info *page, str */ static void get_page_light(struct page_info *page) { - u32 x, nx, y = page->count_info; + unsigned long x, nx, y = page->count_info; do { x = y; @@ -1994,7 +1978,7 @@ static int alloc_page_type(struct page_i rc = alloc_segdesc_page(page); break; default: - printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", + printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n", type, page->u.inuse.type_info, page->count_info); rc = -EINVAL; @@ -2018,7 +2002,7 @@ static int alloc_page_type(struct page_i { ASSERT(rc < 0); MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" - PRtype_info ": caf=%08x taf=%" PRtype_info, + PRtype_info ": caf=%08lx taf=%" PRtype_info, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), type, page->count_info, page->u.inuse.type_info); page->u.inuse.type_info = 0; @@ -2949,7 +2933,6 @@ int do_mmu_update( unsigned int cmd, done = 0; struct vcpu *v = current; struct domain *d = v->domain; - unsigned long type_info; struct domain_mmap_cache mapcache; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) @@ -3021,24 +3004,9 @@ int do_mmu_update( (unsigned long)(req.ptr & ~PAGE_MASK)); page = mfn_to_page(mfn); - switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask ) + if ( page_lock(page) ) { - case PGT_l1_page_table: - case PGT_l2_page_table: - case PGT_l3_page_table: - case PGT_l4_page_table: - { - if ( paging_mode_refcounts(d) ) - { - MEM_LOG("mmu update on auto-refcounted domain!"); - break; - } - - if ( unlikely(!get_page_type( - page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) ) - goto not_a_pt; - - switch ( type_info & PGT_type_mask ) + switch ( page->u.inuse.type_info & PGT_type_mask ) { case PGT_l1_page_table: { @@ -3050,7 +3018,7 @@ int do_mmu_update( case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); - okay = mod_l2_entry(va, l2e, mfn, type_info, + okay = mod_l2_entry(va, l2e, mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD); } break; @@ -3072,31 +3040,23 @@ int do_mmu_update( } break; #endif + case PGT_writable_page: + perfc_incr(writable_mmu_updates); + okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); + break; } - - put_page_type(page); + page_unlock(page); if ( rc == -EINTR ) rc = -EAGAIN; } - break; - - default: - not_a_pt: + else if ( get_page_type(page, PGT_writable_page) ) { - if ( unlikely(!get_page_type(page, PGT_writable_page)) ) - break; - perfc_incr(writable_mmu_updates); - okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); - put_page_type(page); } - break; - } unmap_domain_page_with_cache(va, &mapcache); - put_page(page); break; @@ -3175,7 +3135,6 @@ static int create_grant_pte_mapping( void *va; unsigned long gmfn, mfn; struct page_info *page; - u32 type; l1_pgentry_t ol1e; struct domain *d = v->domain; @@ -3196,21 +3155,23 @@ static int create_grant_pte_mapping( va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK)); page = mfn_to_page(mfn); - type = page->u.inuse.type_info & PGT_type_mask; - if ( (type != PGT_l1_page_table) || !get_page_type(page, type) ) - { - MEM_LOG("Grant map attempted to update a non-L1 page"); + if ( !page_lock(page) ) + { rc = GNTST_general_error; goto failed; } - page_lock(page); + if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) + { + page_unlock(page); + rc = GNTST_general_error; + goto failed; + } ol1e = *(l1_pgentry_t *)va; if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) ) { page_unlock(page); - put_page_type(page); rc = GNTST_general_error; goto failed; } @@ -3220,8 +3181,6 @@ static int create_grant_pte_mapping( if ( !paging_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); - put_page_type(page); - failed: unmap_domain_page(va); put_page(page); @@ -3236,7 +3195,6 @@ static int destroy_grant_pte_mapping( void *va; unsigned long gmfn, mfn; struct page_info *page; - u32 type; l1_pgentry_t ol1e; gmfn = addr >> PAGE_SHIFT; @@ -3252,15 +3210,18 @@ static int destroy_grant_pte_mapping( va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK)); page = mfn_to_page(mfn); - type = page->u.inuse.type_info & PGT_type_mask; - if ( (type != PGT_l1_page_table) || !get_page_type(page, type) ) - { - MEM_LOG("Grant map attempted to update a non-L1 page"); + if ( !page_lock(page) ) + { rc = GNTST_general_error; goto failed; } - page_lock(page); + if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) + { + page_unlock(page); + rc = GNTST_general_error; + goto failed; + } ol1e = *(l1_pgentry_t *)va; @@ -3270,7 +3231,6 @@ static int destroy_grant_pte_mapping( page_unlock(page); MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx", (unsigned long)l1e_get_intpte(ol1e), addr, frame); - put_page_type(page); rc = GNTST_general_error; goto failed; } @@ -3284,13 +3244,11 @@ static int destroy_grant_pte_mapping( { page_unlock(page); MEM_LOG("Cannot delete PTE entry at %p", va); - put_page_type(page); rc = GNTST_general_error; goto failed; } page_unlock(page); - put_page_type(page); failed: unmap_domain_page(va); @@ -3318,21 +3276,40 @@ static int create_grant_va_mapping( MEM_LOG("Could not find L1 PTE for address %lx", va); return GNTST_general_error; } + + if ( !get_page_from_pagenr(gl1mfn, current->domain) ) + { + guest_unmap_l1e(v, pl1e); + return GNTST_general_error; + } + l1pg = mfn_to_page(gl1mfn); - page_lock(l1pg); + if ( !page_lock(l1pg) ) + { + put_page(l1pg); + guest_unmap_l1e(v, pl1e); + return GNTST_general_error; + } + + if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) + { + page_unlock(l1pg); + put_page(l1pg); + guest_unmap_l1e(v, pl1e); + return GNTST_general_error; + } + ol1e = *pl1e; okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0); + page_unlock(l1pg); + put_page(l1pg); guest_unmap_l1e(v, pl1e); - pl1e = NULL; - - if ( !okay ) - return GNTST_general_error; - - if ( !paging_mode_refcounts(d) ) + + if ( okay && !paging_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); - return GNTST_okay; + return okay ? GNTST_okay : GNTST_general_error; } static int replace_grant_va_mapping( @@ -3350,31 +3327,48 @@ static int replace_grant_va_mapping( return GNTST_general_error; } + if ( !get_page_from_pagenr(gl1mfn, current->domain) ) + { + rc = GNTST_general_error; + goto out; + } + l1pg = mfn_to_page(gl1mfn); - page_lock(l1pg); + if ( !page_lock(l1pg) ) + { + rc = GNTST_general_error; + put_page(l1pg); + goto out; + } + + if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) + { + rc = GNTST_general_error; + goto unlock_and_out; + } + ol1e = *pl1e; /* Check that the virtual address supplied is actually mapped to frame. */ if ( unlikely(l1e_get_pfn(ol1e) != frame) ) { - page_unlock(l1pg); MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", l1e_get_pfn(ol1e), addr, frame); rc = GNTST_general_error; - goto out; + goto unlock_and_out; } /* Delete pagetable entry. */ if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) { - page_unlock(l1pg); MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); rc = GNTST_general_error; - goto out; - } - + goto unlock_and_out; + } + + unlock_and_out: page_unlock(l1pg); - + put_page(l1pg); out: guest_unmap_l1e(v, pl1e); return rc; @@ -3436,20 +3430,42 @@ int replace_grant_host_mapping( return GNTST_general_error; } + if ( !get_page_from_pagenr(gl1mfn, current->domain) ) + { + guest_unmap_l1e(curr, pl1e); + return GNTST_general_error; + } + l1pg = mfn_to_page(gl1mfn); - page_lock(l1pg); + if ( !page_lock(l1pg) ) + { + put_page(l1pg); + guest_unmap_l1e(curr, pl1e); + return GNTST_general_error; + } + + if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) + { + page_unlock(l1pg); + put_page(l1pg); + guest_unmap_l1e(curr, pl1e); + return GNTST_general_error; + } + ol1e = *pl1e; if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, curr, 0)) ) { page_unlock(l1pg); + put_page(l1pg); MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); guest_unmap_l1e(curr, pl1e); return GNTST_general_error; } page_unlock(l1pg); + put_page(l1pg); guest_unmap_l1e(curr, pl1e); rc = replace_grant_va_mapping(addr, frame, ol1e, curr); @@ -3462,49 +3478,47 @@ int steal_page( int steal_page( struct domain *d, struct page_info *page, unsigned int memflags) { - u32 _d, _nd, x, y; + unsigned long x, y; spin_lock(&d->page_alloc_lock); + if ( is_xen_heap_page(page) || (page_get_owner(page) != d) ) + goto fail; + /* - * The tricky bit: atomically release ownership while there is just one - * benign reference to the page (PGC_allocated). If that reference - * disappears then the deallocation routine will safely spin. + * We require there is just one reference (PGC_allocated). We temporarily + * drop this reference now so that we can safely swizzle the owner. */ - _d = pickle_domptr(d); - _nd = page->u.inuse._domain; - y = page->count_info; + y = page->count_info; do { x = y; - if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != - (1 | PGC_allocated)) || unlikely(_nd != _d) ) - { - MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p," - " caf=%08x, taf=%" PRtype_info "\n", - (void *) page_to_mfn(page), - d, d->domain_id, unpickle_domptr(_nd), x, - page->u.inuse.type_info); - spin_unlock(&d->page_alloc_lock); - return -1; - } - asm volatile ( - LOCK_PREFIX "cmpxchg8b %2" - : "=d" (_nd), "=a" (y), - "=m" (*(volatile u64 *)(&page->count_info)) - : "0" (_d), "1" (x), "c" (NULL), "b" (x) ); - } while (unlikely(_nd != _d) || unlikely(y != x)); - - /* - * Unlink from 'd'. At least one reference remains (now anonymous), so - * noone else is spinning to try to delete this page from 'd'. - */ + if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) ) + goto fail; + y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask); + } while ( y != x ); + + /* Swizzle the owner then reinstate the PGC_allocated reference. */ + page_set_owner(page, NULL); + y = page->count_info; + do { + x = y; + BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated); + } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x ); + + /* Unlink from original owner. */ if ( !(memflags & MEMF_no_refcount) ) d->tot_pages--; list_del(&page->list); spin_unlock(&d->page_alloc_lock); - return 0; + + fail: + spin_unlock(&d->page_alloc_lock); + MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08lx, taf=%" PRtype_info, + (void *)page_to_mfn(page), d, d->domain_id, + page_get_owner(page), page->count_info, page->u.inuse.type_info); + return -1; } int do_update_va_mapping(unsigned long va, u64 val64, @@ -3513,28 +3527,45 @@ int do_update_va_mapping(unsigned long v l1_pgentry_t val = l1e_from_intpte(val64); struct vcpu *v = current; struct domain *d = v->domain; + struct page_info *gl1pg; l1_pgentry_t *pl1e; unsigned long vmask, bmap_ptr, gl1mfn; cpumask_t pmask; - int rc = 0; + int rc; perfc_incr(calls_to_update_va); - - if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) ) - return -EINVAL; rc = xsm_update_va_mapping(d, FOREIGNDOM, val); if ( rc ) return rc; + rc = -EINVAL; pl1e = guest_map_l1e(v, va, &gl1mfn); - - if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) ) - rc = -EINVAL; - + if ( unlikely(!pl1e || !get_page_from_pagenr(gl1mfn, d)) ) + goto out; + + gl1pg = mfn_to_page(gl1mfn); + if ( !page_lock(gl1pg) ) + { + put_page(gl1pg); + goto out; + } + + if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) + { + page_unlock(gl1pg); + put_page(gl1pg); + goto out; + } + + rc = mod_l1_entry(pl1e, val, gl1mfn, 0) ? 0 : -EINVAL; + + page_unlock(gl1pg); + put_page(gl1pg); + + out: if ( pl1e ) guest_unmap_l1e(v, pl1e); - pl1e = NULL; process_deferred_ops(); @@ -3793,14 +3824,13 @@ long arch_memory_op(int op, XEN_GUEST_HA spin_unlock(&d->grant_table->lock); break; - case XENMAPSPACE_mfn: - { - if ( get_page_from_pagenr(xatp.idx, d) ) { - mfn = xatp.idx; - page = mfn_to_page(mfn); - } + case XENMAPSPACE_gmfn: + xatp.idx = gmfn_to_mfn(d, xatp.idx); + if ( !get_page_from_pagenr(xatp.idx, d) ) + break; + mfn = xatp.idx; + page = mfn_to_page(mfn); break; - } default: break; } @@ -3839,39 +3869,6 @@ long arch_memory_op(int op, XEN_GUEST_HA if ( page ) put_page(page); - - rcu_unlock_domain(d); - - break; - } - - case XENMEM_remove_from_physmap: - { - struct xen_remove_from_physmap xrfp; - unsigned long mfn; - struct domain *d; - - if ( copy_from_guest(&xrfp, arg, 1) ) - return -EFAULT; - - rc = rcu_lock_target_domain_by_id(xrfp.domid, &d); - if ( rc != 0 ) - return rc; - - if ( xsm_remove_from_physmap(current->domain, d) ) - { - rcu_unlock_domain(d); - return -EPERM; - } - - domain_lock(d); - - mfn = gmfn_to_mfn(d, xrfp.gpfn); - - if ( mfn_valid(mfn) ) - guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); - - domain_unlock(d); rcu_unlock_domain(d); @@ -4245,15 +4242,25 @@ int ptwr_do_page_fault(struct vcpu *v, u /* Attempt to read the PTE that maps the VA being accessed. */ guest_get_eff_l1e(v, addr, &pte); - page = l1e_get_page(pte); /* We are looking only for read-only mappings of p.t. pages. */ if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) || - !mfn_valid(l1e_get_pfn(pte)) || - ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) || - ((page->u.inuse.type_info & PGT_count_mask) == 0) || - (page_get_owner(page) != d) ) + !get_page_from_pagenr(l1e_get_pfn(pte), d) ) goto bail; + + page = l1e_get_page(pte); + if ( !page_lock(page) ) + { + put_page(page); + goto bail; + } + + if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) + { + page_unlock(page); + put_page(page); + goto bail; + } ptwr_ctxt.ctxt.regs = regs; ptwr_ctxt.ctxt.force_writeback = 0; @@ -4262,9 +4269,11 @@ int ptwr_do_page_fault(struct vcpu *v, u ptwr_ctxt.cr2 = addr; ptwr_ctxt.pte = pte; - page_lock(page); rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops); + page_unlock(page); + put_page(page); + if ( rc == X86EMUL_UNHANDLEABLE ) goto bail; @@ -4741,12 +4750,18 @@ void memguard_init(void) void memguard_init(void) { unsigned long start = max_t(unsigned long, xen_phys_start, 1UL << 20); +#ifdef __i386__ map_pages_to_xen( (unsigned long)__va(start), start >> PAGE_SHIFT, (xenheap_phys_end - start) >> PAGE_SHIFT, __PAGE_HYPERVISOR|MAP_SMALL_PAGES); -#ifdef __x86_64__ +#else + map_pages_to_xen( + (unsigned long)__va(start), + start >> PAGE_SHIFT, + (__pa(&_end) + PAGE_SIZE - 1 - start) >> PAGE_SHIFT, + __PAGE_HYPERVISOR|MAP_SMALL_PAGES); BUG_ON(start != xen_phys_start); map_pages_to_xen( XEN_VIRT_START, diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/Makefile --- a/xen/arch/x86/mm/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/mm/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -7,5 +7,5 @@ obj-y += guest_walk_3.o obj-y += guest_walk_3.o obj-$(x86_64) += guest_walk_4.o -guest_walk_%.o: guest_walk.c $(HDRS) Makefile +guest_walk_%.o: guest_walk.c Makefile $(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@ diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/Makefile --- a/xen/arch/x86/mm/hap/Makefile Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/mm/hap/Makefile Wed Jan 28 13:06:45 2009 +0900 @@ -7,5 +7,5 @@ guest_levels = $(subst level,,$(filter guest_levels = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1))))) guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) -guest_walk_%level.o: guest_walk.c $(HDRS) Makefile +guest_walk_%level.o: guest_walk.c Makefile $(CC) $(CFLAGS) $(call guest_walk_defns,$(@F)) -c $< -o $@ diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/mm/hap/hap.c Wed Jan 28 13:06:45 2009 +0900 @@ -166,7 +166,7 @@ void hap_free_p2m_page(struct domain *d, ASSERT(page_get_owner(pg) == d); /* Should have just the one ref we gave it in alloc_p2m_page() */ if ( (pg->count_info & PGC_count_mask) != 1 ) - HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", + HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n", pg->count_info, pg->u.inuse.type_info); pg->count_info = 0; /* Free should not decrement domain's total allocation, since diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Wed Jan 28 12:22:58 2009 +0900 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Wed Jan 28 13:06:45 2009 +0900 @@ -66,6 +66,7 @@ static int ept_set_middle_entry(struct d list_add_tail(&pg->list, &d->arch.p2m->pages); ept_entry->emt = 0; + ept_entry->igmt = 0; ept_entry->sp_avail = 0; ept_entry->avail1 = 0; ept_entry->mfn = page_to_mfn(pg); @@ -114,9 +115,13 @@ static int ept_next_level(struct domain } } +/* + * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself, + * by observing whether any gfn->mfn translations are modified. + */ static int -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, - unsigned int order, p2m_type_t p2mt) +_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table) { ept_entry_t *table = NULL; unsigned long gfn_remainder = gfn, offset = 0; @@ -124,6 +129,8 @@ ept_set_entry(struct domain *d, unsigned u32 index; int i, rv = 0, ret = 0; int walk_level = order / EPT_TABLE_ORDER; + int direct_mmio = (p2mt == p2m_mmio_direct); + uint8_t igmt = 0; /* we only support 4k and 2m pages now */ @@ -157,7 +164,9 @@ ept_set_entry(struct domain *d, unsigned { if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) ) { - ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn)); + ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn), + &igmt, direct_mmio); + ept_entry->igmt = igmt; ept_entry->sp_avail = walk_level ? 1 : 0; if ( ret == GUEST_TABLE_SUPER_PAGE ) @@ -208,7 +217,10 @@ ept_set_entry(struct domain *d, unsigned { split_ept_entry = split_table + i; split_ept_entry->emt = epte_get_entry_emt(d, - gfn-offset+i, split_mfn+i); + gfn-offset+i, split_mfn+i, + &igmt, direct_mmio); + split_ept_entry->igmt = igmt; + split_ept_entry->sp_avail = 0; split_ept_entry->mfn = split_mfn+i; @@ -223,7 +235,10 @@ ept_set_entry(struct domain *d, unsigned /* Set the destinated 4k page as normal */ split_ept_entry = split_table + offset; - split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn)); + split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn), + &igmt, direct_mmio); + split_ept_entry->igmt = igmt; + split_ept_entry->mfn = mfn_x(mfn); split_ept_entry->avail1 = p2mt; ept_p2m_type_to_flags(split_ept_entry, p2mt); @@ -246,7 +261,8 @@ out: /* Now the p2m table is not shared with vt-d page table */ - if ( iommu_enabled && is_hvm_domain(d) ) + if ( iommu_enabled && is_hvm_domain(d) + && need_modify_vtd_table ) { if ( p2mt == p2m_ram_rw ) { @@ -271,6 +287,17 @@ out: } return rv; +} + +static int +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt) +{ + /* ept_set_entry() are called from set_entry(), + * We should always create VT-d page table acording + * to the gfn to mfn translations changes. + */ + return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); } /* Read ept p2m entries */ @@ -395,18 +422,30 @@ void ept_change_entry_emt_with_range(str * Set emt for super page. */ order = EPT_TABLE_ORDER; - ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); + /* vmx_set_uc_mode() dont' touch the gfn to mfn + * translations, only modify the emt field of the EPT entries. + * so we need not modify the current VT-d page tables. + */ + _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); gfn += 0x1FF; } else { - /* change emt for partial entries of the 2m area */ - ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); + /* 1)change emt for partial entries of the 2m area. + * 2)vmx_set_uc_mode() dont' touch the gfn to mfn + * translations, only modify the emt field of the EPT entries. + * so we need not modify the current VT-d page tables. + */ + _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0); gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF; } } - else /* gfn assigned with 4k */ - ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); + else /* 1)gfn assigned with 4k + * 2)vmx_set_uc_mode() dont' touch the gfn to mfn + * translations, only modify the emt field of the EPT entries. + * so we need not modify the current VT-d page tables. + */ + _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); } } _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |