[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1218077854 -32400 # Node ID a39913db6e51d060e312c27ef46d1442390bbf67 # Parent 7affdebb7a1ed4f80fb46c679f25c642b1a67034 # Parent eff5fcfa69bc601f1872a808d7a2134c460f2135 merge with xen-unstable.hg --- tools/examples/stubdom-ExampleHVMDomain | 14 .hgignore | 20 .hgtags | 2 Config.mk | 6 Makefile | 2 README | 22 docs/Docs.mk | 4 docs/Makefile | 16 docs/misc/vtd.txt | 9 docs/src/user.tex | 8 docs/xen-api/Makefile | 8 docs/xen-api/coversheet.tex | 4 docs/xen-api/revision-history.tex | 9 docs/xen-api/xenapi-coversheet.tex | 4 docs/xen-api/xenapi-datamodel-graph.dot | 7 docs/xen-api/xenapi-datamodel.tex | 1259 +++++++++++++ extras/mini-os/fs-front.c | 266 ++ extras/mini-os/gntmap.c | 252 ++ extras/mini-os/include/fs.h | 5 extras/mini-os/include/gntmap.h | 35 extras/mini-os/include/lib.h | 3 extras/mini-os/lib/sys.c | 12 extras/mini-os/minios.mk | 1 extras/mini-os/pcifront.c | 13 stubdom/Makefile | 38 stubdom/README | 18 stubdom/stubdom-dm | 6 tools/Makefile | 15 tools/blktap/Makefile | 3 tools/blktap/lib/Makefile | 2 tools/console/Makefile | 2 tools/console/daemon/io.c | 14 tools/examples/Makefile | 18 tools/examples/README | 4 tools/examples/xend-config.sxp | 4 tools/examples/xmexample.hvm | 5 tools/examples/xmexample.hvm-dm | 14 tools/examples/xmexample.hvm-stubdom | 4 tools/examples/xmexample.pv-grub | 212 ++ tools/examples/xmexample.vti | 5 tools/examples/xmexample3 | 24 tools/firmware/extboot/Makefile | 2 tools/firmware/hvmloader/Makefile | 8 tools/firmware/hvmloader/acpi/acpi2_0.h | 2 tools/firmware/hvmloader/acpi/build.c | 41 tools/firmware/hvmloader/hvmloader.c | 9 tools/firmware/hvmloader/util.h | 2 tools/firmware/rombios/32bit/32bitbios.c | 2 tools/firmware/rombios/32bit/Makefile | 9 tools/firmware/rombios/32bit/tcgbios/Makefile | 2 tools/firmware/rombios/32bit/tcgbios/tcgbios.c | 47 tools/firmware/rombios/32bit/tcgbios/tcgbios.h | 41 tools/firmware/rombios/32bit/util.c | 72 tools/firmware/rombios/32bit/util.h | 3 tools/firmware/rombios/32bitgateway.c | 12 tools/firmware/rombios/32bitprotos.h | 6 tools/firmware/rombios/Makefile | 3 tools/firmware/rombios/rombios.c | 154 - tools/fs-back/fs-backend.c | 36 tools/fs-back/fs-backend.h | 17 tools/fs-back/fs-ops.c | 143 - tools/fs-back/fs-xenbus.c | 19 tools/include/xen-sys/MiniOS/privcmd.h | 2 tools/ioemu/block-vbd.c | 4 tools/ioemu/hw/pass-through.c | 329 +-- tools/ioemu/hw/pass-through.h | 8 tools/ioemu/hw/pc.c | 19 tools/ioemu/hw/pci.c | 7 tools/ioemu/hw/pt-msi.c | 6 tools/ioemu/hw/serial.c | 1 tools/ioemu/hw/vga.c | 4 tools/ioemu/vl.c | 2 tools/libaio/src/Makefile | 2 tools/libxc/xc_dom_boot.c | 30 tools/libxc/xc_domain_save.c | 54 tools/libxc/xc_hvm_build.c | 34 tools/libxc/xc_linux.c | 35 tools/libxc/xc_minios.c | 150 + tools/libxc/xc_netbsd.c | 53 tools/libxc/xc_physdev.c | 6 tools/libxc/xc_private.h | 5 tools/libxc/xc_solaris.c | 35 tools/libxc/xenctrl.h | 2 tools/pygrub/src/pygrub | 2 tools/python/xen/lowlevel/xc/xc.c | 3 tools/python/xen/util/pci.py | 116 - tools/python/xen/util/utils.py | 44 tools/python/xen/xend/XendDomainInfo.py | 30 tools/python/xen/xend/image.py | 10 tools/python/xen/xend/server/pciif.py | 43 tools/python/xen/xm/console.py | 63 tools/python/xen/xm/create.dtd | 8 tools/python/xen/xm/create.py | 65 tools/python/xen/xm/main.py | 130 + tools/python/xen/xm/shutdown.py | 5 tools/python/xen/xm/xenapi_create.py | 81 tools/xenmon/Makefile | 2 tools/xenstat/libxenstat/Makefile | 2 tools/xenstat/libxenstat/src/xenstat.c | 2 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 4 xen/Makefile | 6 xen/arch/ia64/xen/domain.c | 5 xen/arch/ia64/xen/irq.c | 3 xen/arch/x86/acpi/cpufreq/utility.c | 10 xen/arch/x86/acpi/pmstat.c | 6 xen/arch/x86/domain.c | 1 xen/arch/x86/domain_build.c | 1 xen/arch/x86/domctl.c | 15 xen/arch/x86/hvm/io.c | 2 xen/arch/x86/hvm/stdvga.c | 34 xen/arch/x86/hvm/svm/svm.c | 7 xen/arch/x86/hvm/vmx/vmx.c | 2 xen/arch/x86/io_apic.c | 8 xen/arch/x86/irq.c | 3 xen/arch/x86/mm.c | 38 xen/arch/x86/mm/shadow/multi.c | 23 xen/arch/x86/msi.c | 17 xen/arch/x86/numa.c | 26 xen/arch/x86/physdev.c | 9 xen/arch/x86/platform_hypercall.c | 31 xen/arch/x86/setup.c | 12 xen/arch/x86/shutdown.c | 13 xen/arch/x86/time.c | 180 - xen/arch/x86/x86_64/physdev.c | 9 xen/arch/x86/x86_emulate/x86_emulate.c | 2 xen/common/compat/grant_table.c | 1 xen/common/domain.c | 4 xen/common/event_channel.c | 57 xen/common/keyhandler.c | 58 xen/common/page_alloc.c | 93 xen/common/shutdown.c | 5 xen/drivers/char/console.c | 3 xen/drivers/passthrough/amd/iommu_acpi.c | 233 +- xen/drivers/passthrough/amd/iommu_detect.c | 227 -- xen/drivers/passthrough/amd/iommu_init.c | 236 ++ xen/drivers/passthrough/amd/iommu_intr.c | 17 xen/drivers/passthrough/amd/iommu_map.c | 2 xen/drivers/passthrough/amd/pci_amd_iommu.c | 289 -- xen/drivers/passthrough/iommu.c | 72 xen/drivers/passthrough/pci.c | 118 - xen/drivers/passthrough/vtd/dmar.c | 57 xen/drivers/passthrough/vtd/extern.h | 1 xen/drivers/passthrough/vtd/iommu.c | 152 - xen/drivers/passthrough/vtd/utils.c | 136 + xen/include/acpi/cpufreq/processor_perf.h | 2 xen/include/asm-ia64/config.h | 2 xen/include/asm-x86/amd-iommu.h | 8 xen/include/asm-x86/config.h | 2 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 27 xen/include/asm-x86/io_apic.h | 2 xen/include/asm-x86/p2m.h | 2 xen/include/public/io/fsif.h | 24 xen/include/public/xen.h | 1 xen/include/xen/iommu.h | 2 xen/include/xen/irq.h | 2 xen/include/xen/sched.h | 4 xen/include/xen/shutdown.h | 2 xen/include/xen/spinlock.h | 4 158 files changed, 4775 insertions(+), 1930 deletions(-) diff -r 7affdebb7a1e -r a39913db6e51 .hgignore --- a/.hgignore Thu Aug 07 11:47:34 2008 +0900 +++ b/.hgignore Thu Aug 07 11:57:34 2008 +0900 @@ -21,8 +21,7 @@ ^[^/]*\.bz2$ ^\.config$ ^\.pc -^TAGS$ -^tags$ +(^|/)(tags|TAGS)$ ^build-.*$ ^dist/.*$ ^docs/.*\.aux$ @@ -60,10 +59,13 @@ ^docs/xen-api/vm_lifecycle.eps$ ^docs/xen-api/xenapi-datamodel-graph.eps$ ^docs/xen-api/xenapi.out$ -^extras/mini-os/h/hypervisor-ifs$ -^extras/mini-os/h/xen-public$ +^extras/mini-os/arch/ia64/gen_off.s$ +^extras/mini-os/include/mini-os$ +^extras/mini-os/include/ia64/mini-os$ +^extras/mini-os/include/ia64/offsets.h$ +^extras/mini-os/include/x86/mini-os$ +^extras/mini-os/include/xen$ ^extras/mini-os/mini-os.*$ -^extras/mini-os/*-stubdom.*$ ^install/.*$ ^linux-[^/]*-paravirt/.*$ ^linux-2.6[^/]*/.*$ @@ -91,13 +93,17 @@ ^stubdom/libxc$ ^stubdom/lwip-.*$ ^stubdom/mini-os-.*$ +^stubdom/mk-headers$ ^stubdom/newlib-.*$ ^stubdom/pciutils-.*$ ^stubdom/zlib-.*$ ^stubdom/grub-cvs$ ^stubdom/grub/stage2$ ^stubdom/grub/netboot$ -^tools/.*/TAGS$ +^stubdom/grub/dirs$ +^stubdom/lwip/ +^stubdom/ioemu/ +^stubdom/grub-upstream/ ^tools/.*/build/lib.*/.*\.py$ ^tools/blktap/Makefile\.smh$ ^tools/blktap/drivers/blktapctrl$ @@ -253,7 +259,6 @@ ^xen/\.banner.*$ ^xen/BLOG$ ^xen/System.map$ -^xen/TAGS$ ^xen/arch/x86/asm-offsets\.s$ ^xen/arch/x86/boot/mkelf32$ ^xen/arch/x86/xen\.lds$ @@ -271,7 +276,6 @@ ^xen/include/xen/acm_policy\.h$ ^xen/include/xen/banner\.h$ ^xen/include/xen/compile\.h$ -^xen/tags$ ^xen/tools/figlet/figlet$ ^xen/tools/symbols$ ^xen/xen$ diff -r 7affdebb7a1e -r a39913db6e51 .hgtags --- a/.hgtags Thu Aug 07 11:47:34 2008 +0900 +++ b/.hgtags Thu Aug 07 11:57:34 2008 +0900 @@ -25,3 +25,5 @@ c5deb251b9dcece9e466a48a66d3528ca1797db4 c5deb251b9dcece9e466a48a66d3528ca1797db4 3.2.0-rc4 36bb2ab4722733d919d32e4555eb46cc6a06cb8f 3.2.0-rc5 9facc624a238f2b9437b07fa28ff65884aa867f2 3.2.0-rc6 +c3494402098e26507fc61a6579832c0149351d6a 3.3.0-rc1 +dde12ff94c96331668fe38a7b09506fa94d03c34 3.3.0-rc2 diff -r 7affdebb7a1e -r a39913db6e51 Config.mk --- a/Config.mk Thu Aug 07 11:47:34 2008 +0900 +++ b/Config.mk Thu Aug 07 11:57:34 2008 +0900 @@ -19,6 +19,8 @@ HOSTCFLAGS += -fno-strict-aliasing DISTDIR ?= $(XEN_ROOT)/dist DESTDIR ?= / +DOCDIR ?= /usr/share/doc/xen +MANDIR ?= /usr/share/man # Allow phony attribute to be listed as dependency rather than fake target .PHONY: .phony @@ -84,7 +86,11 @@ QEMU_REMOTE=http://xenbits.xensource.com # Mercurial in-tree version, or a local directory, or a git URL. # CONFIG_QEMU ?= ioemu # CONFIG_QEMU ?= ../qemu-xen.git +ifeq ($(XEN_TARGET_ARCH),ia64) +CONFIG_QEMU ?= ioemu +else CONFIG_QEMU ?= $(QEMU_REMOTE) +endif # Optional components XENSTAT_XENTOP ?= y diff -r 7affdebb7a1e -r a39913db6e51 Makefile --- a/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -149,7 +149,7 @@ help: @echo ' trees then make dist' @echo ' xen - build and install Xen hypervisor' @echo ' tools - build and install tools' - @echo ' stubdomain - build and install the stubdomain images' + @echo ' stubdom - build and install the stubdomain images' @echo ' kernels - build and install guest kernels' @echo ' kbuild - synonym for make kernels' @echo ' docs - build and install user documentation' diff -r 7affdebb7a1e -r a39913db6e51 README --- a/README Thu Aug 07 11:47:34 2008 +0900 +++ b/README Thu Aug 07 11:57:34 2008 +0900 @@ -1,10 +1,10 @@ ################################# - __ __ _____ _____ - \ \/ /___ _ __ |___ / |___ / - \ // _ \ '_ \ |_ \ |_ \ - / \ __/ | | | ___) | ___) | - /_/\_\___|_| |_| |____(_)____/ - + __ __ _ _ ___ + \ \/ /___ _ __ | || | / _ \ + \ // _ \ '_ \ | || |_| | | | + / \ __/ | | | |__ _| |_| | + /_/\_\___|_| |_| |_|(_)___/ + ################################# http://www.xen.org/ @@ -21,7 +21,7 @@ by the original Xen development team to by the original Xen development team to build enterprise products around Xen. -The 3.3 release offers excellent performance, hardware support and +The 4.0 release offers excellent performance, hardware support and enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and live relocation of VMs. Ports to Linux 2.6, Linux 2.4, NetBSD, FreeBSD and Solaris are available from the community. @@ -54,8 +54,8 @@ 2. Configure your bootloader to boot Xen /boot/grub/menu.lst: edit this file to include an entry like the following: - title Xen 3.3 / XenLinux 2.6 - kernel /boot/xen-3.3.gz console=vga + title Xen 4.0 / XenLinux 2.6 + kernel /boot/xen-4.0.gz console=vga module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0 module /boot/initrd-2.6-xen.img @@ -74,7 +74,7 @@ 2. Configure your bootloader to boot Xen 32MB memory for internal use, which is not available for allocation to virtual machines. -3. Reboot your system and select the "Xen 3.3 / XenLinux 2.6" menu +3. Reboot your system and select the "Xen 4.0 / XenLinux 2.6" menu option. After booting Xen, Linux will start and your initialisation scripts should execute in the usual way. @@ -224,6 +224,6 @@ to modify grub.conf to use tboot to laun to modify grub.conf to use tboot to launch Xen. There are optional targets as part of Xen's top-level makefile that will -downlaod and build tboot: install-tboot, build-tboot, dist-tboot, clean-tboot. +download and build tboot: install-tboot, build-tboot, dist-tboot, clean-tboot. These will download the latest tar file from the SourceForge site using wget, then build/install/dist according to Xen's settings. diff -r 7affdebb7a1e -r a39913db6e51 docs/Docs.mk --- a/docs/Docs.mk Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/Docs.mk Thu Aug 07 11:57:34 2008 +0900 @@ -7,7 +7,3 @@ POD2MAN := pod2man POD2MAN := pod2man DOT := dot NEATO := neato - -pkgdocdir := /usr/share/doc/xen -mandir := /usr/share/man - diff -r 7affdebb7a1e -r a39913db6e51 docs/Makefile --- a/docs/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -80,17 +80,17 @@ distclean: clean .PHONY: install install: all - rm -rf $(DESTDIR)$(pkgdocdir) - $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir) + rm -rf $(DESTDIR)$(DOCDIR) + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR) $(MAKE) -C xen-api install - cp -dR ps $(DESTDIR)$(pkgdocdir) - cp -dR pdf $(DESTDIR)$(pkgdocdir) - $(INSTALL_DIR) $(DESTDIR)$(mandir) - cp -dR man1 $(DESTDIR)$(mandir) - cp -dR man5 $(DESTDIR)$(mandir) - [ ! -d html ] || cp -dR html $(DESTDIR)$(pkgdocdir) + cp -dR ps $(DESTDIR)$(DOCDIR) + cp -dR pdf $(DESTDIR)$(DOCDIR) + $(INSTALL_DIR) $(DESTDIR)$(MANDIR) + cp -dR man1 $(DESTDIR)$(MANDIR) + cp -dR man5 $(DESTDIR)$(MANDIR) + [ ! -d html ] || cp -dR html $(DESTDIR)$(DOCDIR) pdf/%.pdf: ps/%.ps $(INSTALL_DIR) $(@D) diff -r 7affdebb7a1e -r a39913db6e51 docs/misc/vtd.txt --- a/docs/misc/vtd.txt Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/misc/vtd.txt Thu Aug 07 11:57:34 2008 +0900 @@ -2,7 +2,7 @@ Authors : Allen Kay <allen.m.kay@inte Authors : Allen Kay <allen.m.kay@xxxxxxxxx> Weidong Han <weidong.han@xxxxxxxxx> Created : October-24-2007 -Updated : May-07-2008 +Updated : August-06-2008 How to turn on VT-d in Xen -------------------------- @@ -21,7 +21,7 @@ 11) "hide" pci device from dom0 as follo title Xen-Fedora Core (2.6.18-xen) root (hd0,0) - kernel /boot/xen.gz com1=115200,8n1 console=com1 + kernel /boot/xen.gz com1=115200,8n1 console=com1 iommu=1 module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro xencons=ttyS console=tty0 console=ttyS0, pciback.hide=(01:00.0)(03:00.0) module /boot/initrd-2.6.18-xen.img @@ -30,6 +30,11 @@ 13) add "pci" line in /etc/xen/hvm.conf pci = [ '01:00.0', '03:00.0' ] 15) start hvm guest and use "lspci" to see the passthru device and "ifconfig" to see if IP address has been assigned to NIC devices. + + +Enable MSI/MSI-x for assigned devices +------------------------------------- +Add "msi=1" option in kernel line of host grub. Caveat on Conventional PCI Device Passthrough diff -r 7affdebb7a1e -r a39913db6e51 docs/src/user.tex --- a/docs/src/user.tex Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/src/user.tex Thu Aug 07 11:57:34 2008 +0900 @@ -4204,11 +4204,9 @@ writing to the VGA console after domain enabled by the BIOS. \item [ apic=bigsmp,default,es7000,summit ] Specify NUMA platform. This can usually be probed automatically. -\item [ dma\_bits=xxx ] Specify width of DMA - addresses in bits. Default is 30 bits (addresses up to 1GB are DMAable). -\item [ dma\_emergency\_pool=xxx ] Specify lower bound on size of DMA - pool below which ordinary allocations will fail rather than fall - back to allocating from the DMA pool. +\item [ dma\_bits=xxx ] Specify width of DMA addresses in bits. This + is used in NUMA systems to prevent this special DMA memory from + being exhausted in one node when remote nodes have available memory. \end{description} In addition, the following options may be specified on the Xen command diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/Makefile --- a/docs/xen-api/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/xen-api/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -16,11 +16,11 @@ build: xenapi.pdf xenapi.ps build: xenapi.pdf xenapi.ps install: - $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)/ps - $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)/pdf + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)/ps + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)/pdf - [ -e xenapi.ps ] && cp xenapi.ps $(DESTDIR)$(pkgdocdir)/ps || true - [ -e xenapi.pdf ] && cp xenapi.pdf $(DESTDIR)$(pkgdocdir)/pdf || true + [ -e xenapi.ps ] && cp xenapi.ps $(DESTDIR)$(DOCDIR)/ps || true + [ -e xenapi.pdf ] && cp xenapi.pdf $(DESTDIR)$(DOCDIR)/pdf || true xenapi.dvi: $(TEX) $(EPS) $(EPSDOT) $(LATEX) xenapi.tex diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/coversheet.tex --- a/docs/xen-api/coversheet.tex Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/xen-api/coversheet.tex Thu Aug 07 11:57:34 2008 +0900 @@ -50,7 +50,7 @@ Hollis Blanchard, IBM & Alastair Tse, Xe Hollis Blanchard, IBM & Alastair Tse, XenSource \\ Mike Day, IBM & Daniel Veillard, Red Hat \\ Jim Fehlig, Novell & Tom Wilkie, University of Cambridge \\ -Jon Harrop, XenSource & \\ +Jon Harrop, XenSource & Yosuke Iwamatsu, NEC \\ \end{tabular} \end{large} @@ -60,4 +60,4 @@ Jon Harrop, XenSource & \\ \legalnotice{} \newpage -\pagestyle{fancy} \ No newline at end of file +\pagestyle{fancy} diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/revision-history.tex --- a/docs/xen-api/revision-history.tex Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/xen-api/revision-history.tex Thu Aug 07 11:57:34 2008 +0900 @@ -47,5 +47,14 @@ \end{flushleft} \end{minipage}\\ \hline + 1.0.6 & 24th Jul. 08 & Y. Iwamatsu & + \begin{minipage}[t]{7cm} + \begin{flushleft} + Added definitions of new classes DPCI and PPCI. Updated the table + and the diagram representing relationships between classes. + Added host.PPCIs and VM.DPCIs fields. + \end{flushleft} + \end{minipage}\\ + \hline \end{tabular} \end{center} diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/xenapi-coversheet.tex --- a/docs/xen-api/xenapi-coversheet.tex Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/xen-api/xenapi-coversheet.tex Thu Aug 07 11:57:34 2008 +0900 @@ -17,12 +17,12 @@ \newcommand{\coversheetlogo}{xen.eps} %% Document date -\newcommand{\datestring}{11th February 2008} +\newcommand{\datestring}{24th July 2008} \newcommand{\releasestatement}{Stable Release} %% Document revision -\newcommand{\revstring}{API Revision 1.0.5} +\newcommand{\revstring}{API Revision 1.0.6} %% Document authors \newcommand{\docauthors}{ diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/xenapi-datamodel-graph.dot --- a/docs/xen-api/xenapi-datamodel-graph.dot Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/xen-api/xenapi-datamodel-graph.dot Thu Aug 07 11:57:34 2008 +0900 @@ -14,7 +14,7 @@ fontname="Verdana"; node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user XSPolicy ACMPolicy; node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics PBD_metrics VM_guest_metrics host_metrics; -node [shape=box]; host_cpu console +node [shape=box]; DPCI PPCI host_cpu console session -> host [ arrowhead="none" ] session -> user [ arrowhead="none" ] VM -> VM_metrics [ arrowhead="none" ] @@ -22,7 +22,7 @@ VM -> console [ arrowhead="crow" ] VM -> console [ arrowhead="crow" ] host -> PBD [ arrowhead="crow", arrowtail="none" ] host -> host_metrics [ arrowhead="none" ] -host -> host_cpu [ arrowhead="none" ] +host -> host_cpu [ arrowhead="crow", arrowtail="none" ] VIF -> VM [ arrowhead="none", arrowtail="crow" ] VIF -> network [ arrowhead="none", arrowtail="crow" ] VIF -> VIF_metrics [ arrowhead="none" ] @@ -38,4 +38,7 @@ VBD -> VBD_metrics [ arrowhead="none" ] VBD -> VBD_metrics [ arrowhead="none" ] XSPolicy -> host [ arrowhead="none" ] XSPolicy -> ACMPolicy [ arrowhead="none" ] +DPCI -> VM [ arrowhead="none", arrowtail="crow" ] +DPCI -> PPCI [ arrowhead="none" ] +PPCI -> host [ arrowhead="none", arrowtail="crow" ] } diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/xenapi-datamodel.tex --- a/docs/xen-api/xenapi-datamodel.tex Thu Aug 07 11:47:34 2008 +0900 +++ b/docs/xen-api/xenapi-datamodel.tex Thu Aug 07 11:57:34 2008 +0900 @@ -44,6 +44,8 @@ Name & Description \\ {\tt crashdump} & A VM crashdump \\ {\tt VTPM} & A virtual TPM device \\ {\tt console} & A console \\ +{\tt DPCI} & A pass-through PCI device \\ +{\tt PPCI} & A physical PCI device \\ {\tt user} & A user of the system \\ {\tt debug} & A basic class for testing \\ {\tt XSPolicy} & A class for handling Xen Security Policies \\ @@ -70,6 +72,8 @@ SR.VDIs & VDI.SR & many-to-one\\ SR.VDIs & VDI.SR & many-to-one\\ VTPM.VM & VM.VTPMs & one-to-many\\ console.VM & VM.consoles & one-to-many\\ +DPCI.VM & VM.DPCIs & one-to-many\\ +PPCI.host & host.PPCIs & one-to-many\\ host.resident\_VMs & VM.resident\_on & many-to-one\\ host.host\_CPUs & host\_cpu.host & many-to-one\\ \hline @@ -1402,6 +1406,7 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt VBDs} & (VBD ref) Set & virtual block devices \\ $\mathit{RO}_\mathit{run}$ & {\tt crash\_dumps} & (crashdump ref) Set & crash dumps associated with this VM \\ $\mathit{RO}_\mathit{run}$ & {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\ +$\mathit{RO}_\mathit{run}$ & {\tt DPCIs} & (DPCI ref) Set & pass-through PCI devices \\ $\mathit{RW}$ & {\tt PV/bootloader} & string & name of or path to bootloader \\ $\mathit{RW}$ & {\tt PV/kernel} & string & path to the kernel \\ $\mathit{RW}$ & {\tt PV/ramdisk} & string & path to the initrd \\ @@ -3406,6 +3411,38 @@ Get the VTPMs field of the given VM. \noindent {\bf Return Type:} {\tt (VTPM ref) Set +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_DPCIs} + +{\bf Overview:} +Get the DPCIs field of the given VM. + + \noindent {\bf Signature:} +\begin{verbatim} ((DPCI ref) Set) get_DPCIs (session_id s, VM ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt VM ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(DPCI ref) Set } @@ -5480,6 +5517,7 @@ Quals & Field & Type & Description \\ $\mathit{RW}$ & {\tt suspend\_image\_sr} & SR ref & The SR in which VDIs for suspend images are created \\ $\mathit{RW}$ & {\tt crash\_dump\_sr} & SR ref & The SR in which VDIs for crash dumps are created \\ $\mathit{RO}_\mathit{run}$ & {\tt PBDs} & (PBD ref) Set & physical blockdevices \\ +$\mathit{RO}_\mathit{run}$ & {\tt PPCIs} & (PPCI ref) Set & physical PCI devices \\ $\mathit{RO}_\mathit{run}$ & {\tt host\_CPUs} & (host\_cpu ref) Set & The physical CPUs on this host \\ $\mathit{RO}_\mathit{run}$ & {\tt metrics} & host\_metrics ref & metrics associated with this host \\ \hline @@ -6767,6 +6805,38 @@ Get the PBDs field of the given host. \noindent {\bf Return Type:} {\tt (PBD ref) Set +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_PPCIs} + +{\bf Overview:} +Get the PPCIs field of the given host. + + \noindent {\bf Signature:} +\begin{verbatim} ((PPCI ref) Set) get_PPCIs (session_id s, host ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt host ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(PPCI ref) Set } @@ -14464,6 +14534,1195 @@ all fields from the object \vspace{1cm} \newpage +\section{Class: DPCI} +\subsection{Fields for class: DPCI} +\begin{longtable}{|lllp{0.38\textwidth}|} +\hline +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DPCI} \\ +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A +pass-through PCI device.}} \\ +\hline +Quals & Field & Type & Description \\ +\hline +$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ +$\mathit{RO}_\mathit{inst}$ & {\tt VM} & VM ref & the virtual machine \\ +$\mathit{RO}_\mathit{inst}$ & {\tt PPCI} & PPCI ref & the physical PCI device \\ +$\mathit{RO}_\mathit{inst}$ & {\tt hotplug\_slot} & int & the slot number to which this PCI device is inserted \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_domain} & int & the virtual domain number \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_bus} & int & the virtual bus number \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_slot} & int & the virtual slot number \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_func} & int & the virtual func number \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_name} & string & the virtual PCI name \\ +\hline +\end{longtable} +\subsection{RPCs associated with class: DPCI} +\subsubsection{RPC name:~get\_all} + +{\bf Overview:} +Return a list of all the DPCIs known to the system. + + \noindent {\bf Signature:} +\begin{verbatim} ((DPCI ref) Set) get_all (session_id s)\end{verbatim} + + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(DPCI ref) Set +} + + +references to all objects +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_uuid} + +{\bf Overview:} +Get the uuid field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_uuid (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_VM} + +{\bf Overview:} +Get the VM field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} (VM ref) get_VM (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +VM ref +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_PPCI} + +{\bf Overview:} +Get the PPCI field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} (PPCI ref) get_PPCI (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +PPCI ref +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_hotplug\_slot} + +{\bf Overview:} +Get the hotplug\_slot field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_hotplug_slot (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_domain} + +{\bf Overview:} +Get the virtual\_domain field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_domain (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_bus} + +{\bf Overview:} +Get the virtual\_bus field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_bus (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_slot} + +{\bf Overview:} +Get the virtual\_slot field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_slot (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_func} + +{\bf Overview:} +Get the virtual\_func field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_func (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_name} + +{\bf Overview:} +Get the virtual\_name field of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_virtual_name (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~create} + +{\bf Overview:} +Create a new DPCI instance, and return its handle. + + \noindent {\bf Signature:} +\begin{verbatim} (DPCI ref) create (session_id s, DPCI record args)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI record } & args & All constructor arguments \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +DPCI ref +} + + +reference to the newly created object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~destroy} + +{\bf Overview:} +Destroy the specified DPCI instance. + + \noindent {\bf Signature:} +\begin{verbatim} void destroy (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +void +} + + +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_by\_uuid} + +{\bf Overview:} +Get a reference to the DPCI instance with the specified UUID. + + \noindent {\bf Signature:} +\begin{verbatim} (DPCI ref) get_by_uuid (session_id s, string uuid)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt string } & uuid & UUID of object to return \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +DPCI ref +} + + +reference to the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_record} + +{\bf Overview:} +Get a record containing the current state of the given DPCI. + + \noindent {\bf Signature:} +\begin{verbatim} (DPCI record) get_record (session_id s, DPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +DPCI record +} + + +all fields from the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} + +\vspace{1cm} +\newpage +\section{Class: PPCI} +\subsection{Fields for class: PPCI} +\begin{longtable}{|lllp{0.38\textwidth}|} +\hline +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PPCI} \\ +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A +physical PCI device.}} \\ +\hline +Quals & Field & Type & Description \\ +\hline +$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ +$\mathit{RO}_\mathit{run}$ & {\tt host} & host ref & the physical machine to which this PPCI is connected \\ +$\mathit{RO}_\mathit{run}$ & {\tt domain} & int & the domain number \\ +$\mathit{RO}_\mathit{run}$ & {\tt bus} & int & the bus number \\ +$\mathit{RO}_\mathit{run}$ & {\tt slot} & int & the slot number \\ +$\mathit{RO}_\mathit{run}$ & {\tt func} & int & the func number \\ +$\mathit{RO}_\mathit{run}$ & {\tt name} & string & the PCI name \\ +$\mathit{RO}_\mathit{run}$ & {\tt vendor\_id} & int & the vendor ID \\ +$\mathit{RO}_\mathit{run}$ & {\tt vendor\_name} & string & the vendor name \\ +$\mathit{RO}_\mathit{run}$ & {\tt device\_id} & int & the device ID \\ +$\mathit{RO}_\mathit{run}$ & {\tt device\_name} & string & the device name \\ +$\mathit{RO}_\mathit{run}$ & {\tt revision\_id} & int & the revision ID \\ +$\mathit{RO}_\mathit{run}$ & {\tt class\_code} & int & the class code \\ +$\mathit{RO}_\mathit{run}$ & {\tt class\_name} & string & the class name \\ +$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_vendor\_id} & int & the subsystem vendor ID \\ +$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_vendor\_name} & string & the subsystem vendor name \\ +$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_id} & int & the subsystem ID \\ +$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_name} & string & the subsystem name \\ +$\mathit{RO}_\mathit{run}$ & {\tt driver} & string & the driver name \\ +\hline +\end{longtable} +\subsection{RPCs associated with class: PPCI} +\subsubsection{RPC name:~get\_all} + +{\bf Overview:} +Return a list of all the PPCIs known to the system. + + \noindent {\bf Signature:} +\begin{verbatim} ((PPCI ref) Set) get_all (session_id s)\end{verbatim} + + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(PPCI ref) Set +} + + +references to all objects +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_uuid} + +{\bf Overview:} +Get the uuid field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_uuid (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_host} + +{\bf Overview:} +Get the host field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} (host ref) get_host (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +host ref +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_domain} + +{\bf Overview:} +Get the domain field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_domain (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_bus} + +{\bf Overview:} +Get the bus field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_bus (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_slot} + +{\bf Overview:} +Get the slot field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_slot (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_func} + +{\bf Overview:} +Get the func field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_func (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_name} + +{\bf Overview:} +Get the name field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_name (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_vendor\_id} + +{\bf Overview:} +Get the vendor\_id field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_vendor_id (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_vendor\_name} + +{\bf Overview:} +Get the vendor\_name field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_vendor_name (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_device\_id} + +{\bf Overview:} +Get the device\_id field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_device_id (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_device\_name} + +{\bf Overview:} +Get the device\_name field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_device_name (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_revision\_id} + +{\bf Overview:} +Get the revision\_id field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_revision_id (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_class\_code} + +{\bf Overview:} +Get the class\_code field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_class_code (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_class\_name} + +{\bf Overview:} +Get the class\_name field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_class_name (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_subsystem\_vendor\_id} + +{\bf Overview:} +Get the subsystem\_vendor\_id field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_subsystem_vendor_id (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_subsystem\_vendor\_name} + +{\bf Overview:} +Get the subsystem\_vendor\_name field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_subsystem_vendor_name (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_subsystem\_id} + +{\bf Overview:} +Get the subsystem\_id field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_subsystem_id (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_subsystem\_name} + +{\bf Overview:} +Get the subsystem\_name field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_subsystem_name (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_driver} + +{\bf Overview:} +Get the driver field of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_driver (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_by\_uuid} + +{\bf Overview:} +Get a reference to the PPCI instance with the specified UUID. + + \noindent {\bf Signature:} +\begin{verbatim} (PPCI ref) get_by_uuid (session_id s, string uuid)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt string } & uuid & UUID of object to return \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +PPCI ref +} + + +reference to the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_record} + +{\bf Overview:} +Get a record containing the current state of the given PPCI. + + \noindent {\bf Signature:} +\begin{verbatim} (PPCI record) get_record (session_id s, PPCI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PPCI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +PPCI record +} + + +all fields from the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} + +\vspace{1cm} +\newpage \section{Class: user} \subsection{Fields for class: user} \begin{longtable}{|lllp{0.38\textwidth}|} diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/fs-front.c --- a/extras/mini-os/fs-front.c Thu Aug 07 11:47:34 2008 +0900 +++ b/extras/mini-os/fs-front.c Thu Aug 07 11:57:34 2008 +0900 @@ -50,6 +50,8 @@ struct fs_request; struct fs_import *fs_import; +void *alloc_buffer_page(struct fs_request *req, domid_t domid, grant_ref_t *gref); +void free_buffer_page(struct fs_request *req); /******************************************************************************/ /* RING REQUEST/RESPONSES HANDLING */ @@ -57,11 +59,19 @@ struct fs_import *fs_import; struct fs_request { - void *page; - grant_ref_t gref; + void *private1; /* Specific to request type */ + void *private2; struct thread *thread; /* Thread blocked on this request */ struct fsif_response shadow_rsp; /* Response copy writen by the interrupt handler */ +}; + +struct fs_rw_gnts +{ + /* TODO 16 bit? */ + int count; + grant_ref_t grefs[FSIF_NR_READ_GNTS]; + void *pages[FSIF_NR_READ_GNTS]; }; /* Ring operations: @@ -177,6 +187,8 @@ int fs_open(struct fs_import *import, ch { struct fs_request *fsr; unsigned short priv_req_id; + grant_ref_t gref; + void *buffer; RING_IDX back_req_id; struct fsif_request *req; int fd; @@ -189,14 +201,15 @@ int fs_open(struct fs_import *import, ch priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_open call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref id=%d\n", fsr->gref); - fsr->thread = current; - sprintf(fsr->page, "%s", file); + buffer = alloc_buffer_page(fsr, import->dom_id, &gref); + DEBUG("gref id=%d\n", gref); + fsr->thread = current; + sprintf(buffer, "%s", file); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_FILE_OPEN; req->id = priv_req_id; - req->u.fopen.gref = fsr->gref; + req->u.fopen.gref = gref; /* Set blocked flag before commiting the request, thus avoiding missed * response race */ @@ -207,6 +220,7 @@ int fs_open(struct fs_import *import, ch /* Read the response */ fd = (int)fsr->shadow_rsp.ret_val; DEBUG("The following FD returned: %d\n", fd); + free_buffer_page(fsr); add_id_to_freelist(priv_req_id, import->freelist); return fd; @@ -254,11 +268,13 @@ ssize_t fs_read(struct fs_import *import { struct fs_request *fsr; unsigned short priv_req_id; + struct fs_rw_gnts gnts; RING_IDX back_req_id; struct fsif_request *req; ssize_t ret; - - BUG_ON(len > PAGE_SIZE); + int i; + + BUG_ON(len > PAGE_SIZE * FSIF_NR_READ_GNTS); /* Prepare request for the backend */ back_req_id = reserve_fsif_request(import); @@ -268,17 +284,27 @@ ssize_t fs_read(struct fs_import *import priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_read call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - memset(fsr->page, 0, PAGE_SIZE); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_FILE_READ; req->id = priv_req_id; req->u.fread.fd = fd; - req->u.fread.gref = fsr->gref; req->u.fread.len = len; req->u.fread.offset = offset; + + + ASSERT(len > 0); + gnts.count = ((len - 1) / PAGE_SIZE) + 1; + for(i=0; i<gnts.count; i++) + { + gnts.pages[i] = (void *)alloc_page(); + gnts.grefs[i] = gnttab_grant_access(import->dom_id, + virt_to_mfn(gnts.pages[i]), + 0); + memset(gnts.pages[i], 0, PAGE_SIZE); + req->u.fread.grefs[i] = gnts.grefs[i]; + } + fsr->thread = current; /* Set blocked flag before commiting the request, thus avoiding missed * response race */ @@ -290,7 +316,19 @@ ssize_t fs_read(struct fs_import *import ret = (ssize_t)fsr->shadow_rsp.ret_val; DEBUG("The following ret value returned %d\n", ret); if(ret > 0) - memcpy(buf, fsr->page, ret); + { + ssize_t to_copy = ret, current_copy; + for(i=0; i<gnts.count; i++) + { + gnttab_end_access(gnts.grefs[i]); + current_copy = to_copy > PAGE_SIZE ? PAGE_SIZE : to_copy; + if(current_copy > 0) + memcpy(buf, gnts.pages[i], current_copy); + to_copy -= current_copy; + buf = (char*) buf + current_copy; + free_page(gnts.pages[i]); + } + } add_id_to_freelist(priv_req_id, import->freelist); return ret; @@ -301,11 +339,13 @@ ssize_t fs_write(struct fs_import *impor { struct fs_request *fsr; unsigned short priv_req_id; - RING_IDX back_req_id; - struct fsif_request *req; - ssize_t ret; - - BUG_ON(len > PAGE_SIZE); + struct fs_rw_gnts gnts; + RING_IDX back_req_id; + struct fsif_request *req; + ssize_t ret, to_copy; + int i; + + BUG_ON(len > PAGE_SIZE * FSIF_NR_WRITE_GNTS); /* Prepare request for the backend */ back_req_id = reserve_fsif_request(import); @@ -315,20 +355,35 @@ ssize_t fs_write(struct fs_import *impor priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_read call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - memcpy(fsr->page, buf, len); - BUG_ON(len > PAGE_SIZE); - memset((char *)fsr->page + len, 0, PAGE_SIZE - len); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_FILE_WRITE; req->id = priv_req_id; req->u.fwrite.fd = fd; - req->u.fwrite.gref = fsr->gref; req->u.fwrite.len = len; req->u.fwrite.offset = offset; + ASSERT(len > 0); + gnts.count = ((len - 1) / PAGE_SIZE) + 1; + to_copy = len; + for(i=0; i<gnts.count; i++) + { + int current_copy = (to_copy > PAGE_SIZE ? PAGE_SIZE : to_copy); + gnts.pages[i] = (void *)alloc_page(); + gnts.grefs[i] = gnttab_grant_access(import->dom_id, + virt_to_mfn(gnts.pages[i]), + 0); + memcpy(gnts.pages[i], buf, current_copy); + if(current_copy < PAGE_SIZE) + memset((char *)gnts.pages[i] + current_copy, + 0, + PAGE_SIZE - current_copy); + req->u.fwrite.grefs[i] = gnts.grefs[i]; + to_copy -= current_copy; + buf = (char*) buf + current_copy; + } + fsr->thread = current; + /* Set blocked flag before commiting the request, thus avoiding missed * response race */ block(current); @@ -338,6 +393,11 @@ ssize_t fs_write(struct fs_import *impor /* Read the response */ ret = (ssize_t)fsr->shadow_rsp.ret_val; DEBUG("The following ret value returned %d\n", ret); + for(i=0; i<gnts.count; i++) + { + gnttab_end_access(gnts.grefs[i]); + free_page(gnts.pages[i]); + } add_id_to_freelist(priv_req_id, import->freelist); return ret; @@ -361,15 +421,12 @@ int fs_stat(struct fs_import *import, priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_stat call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - memset(fsr->page, 0, PAGE_SIZE); + fsr->thread = current; req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_STAT; req->id = priv_req_id; req->u.fstat.fd = fd; - req->u.fstat.gref = fsr->gref; /* Set blocked flag before commiting the request, thus avoiding missed * response race */ @@ -380,7 +437,9 @@ int fs_stat(struct fs_import *import, /* Read the response */ ret = (int)fsr->shadow_rsp.ret_val; DEBUG("Following ret from fstat: %d\n", ret); - memcpy(stat, fsr->page, sizeof(struct fsif_stat_response)); + memcpy(stat, + &fsr->shadow_rsp.fstat, + sizeof(struct fsif_stat_response)); add_id_to_freelist(priv_req_id, import->freelist); return ret; @@ -430,6 +489,8 @@ int fs_remove(struct fs_import *import, { struct fs_request *fsr; unsigned short priv_req_id; + grant_ref_t gref; + void *buffer; RING_IDX back_req_id; struct fsif_request *req; int ret; @@ -442,14 +503,15 @@ int fs_remove(struct fs_import *import, priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_open call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - sprintf(fsr->page, "%s", file); + buffer = alloc_buffer_page(fsr, import->dom_id, &gref); + DEBUG("gref=%d\n", gref); + fsr->thread = current; + sprintf(buffer, "%s", file); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_REMOVE; req->id = priv_req_id; - req->u.fremove.gref = fsr->gref; + req->u.fremove.gref = gref; /* Set blocked flag before commiting the request, thus avoiding missed * response race */ @@ -460,6 +522,7 @@ int fs_remove(struct fs_import *import, /* Read the response */ ret = (int)fsr->shadow_rsp.ret_val; DEBUG("The following ret: %d\n", ret); + free_buffer_page(fsr); add_id_to_freelist(priv_req_id, import->freelist); return ret; @@ -472,6 +535,8 @@ int fs_rename(struct fs_import *import, { struct fs_request *fsr; unsigned short priv_req_id; + grant_ref_t gref; + void *buffer; RING_IDX back_req_id; struct fsif_request *req; int ret; @@ -486,15 +551,16 @@ int fs_rename(struct fs_import *import, priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_open call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - sprintf(fsr->page, "%s%s%c%s%s", + buffer = alloc_buffer_page(fsr, import->dom_id, &gref); + DEBUG("gref=%d\n", gref); + fsr->thread = current; + sprintf(buffer, "%s%s%c%s%s", old_header, old_file_name, '\0', new_header, new_file_name); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_RENAME; req->id = priv_req_id; - req->u.frename.gref = fsr->gref; + req->u.frename.gref = gref; req->u.frename.old_name_offset = strlen(old_header); req->u.frename.new_name_offset = strlen(old_header) + strlen(old_file_name) + @@ -511,6 +577,7 @@ int fs_rename(struct fs_import *import, /* Read the response */ ret = (int)fsr->shadow_rsp.ret_val; DEBUG("The following ret: %d\n", ret); + free_buffer_page(fsr); add_id_to_freelist(priv_req_id, import->freelist); return ret; @@ -521,6 +588,8 @@ int fs_create(struct fs_import *import, { struct fs_request *fsr; unsigned short priv_req_id; + grant_ref_t gref; + void *buffer; RING_IDX back_req_id; struct fsif_request *req; int ret; @@ -533,14 +602,15 @@ int fs_create(struct fs_import *import, priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_create call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - sprintf(fsr->page, "%s", name); + buffer = alloc_buffer_page(fsr, import->dom_id, &gref); + DEBUG("gref=%d\n", gref); + fsr->thread = current; + sprintf(buffer, "%s", name); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_CREATE; req->id = priv_req_id; - req->u.fcreate.gref = fsr->gref; + req->u.fcreate.gref = gref; req->u.fcreate.directory = directory; req->u.fcreate.mode = mode; @@ -553,6 +623,7 @@ int fs_create(struct fs_import *import, /* Read the response */ ret = (int)fsr->shadow_rsp.ret_val; DEBUG("The following ret: %d\n", ret); + free_buffer_page(fsr); add_id_to_freelist(priv_req_id, import->freelist); return ret; @@ -563,6 +634,8 @@ char** fs_list(struct fs_import *import, { struct fs_request *fsr; unsigned short priv_req_id; + grant_ref_t gref; + void *buffer; RING_IDX back_req_id; struct fsif_request *req; char **files, *current_file; @@ -579,14 +652,15 @@ char** fs_list(struct fs_import *import, priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_list call is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - sprintf(fsr->page, "%s", name); + buffer = alloc_buffer_page(fsr, import->dom_id, &gref); + DEBUG("gref=%d\n", gref); + fsr->thread = current; + sprintf(buffer, "%s", name); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_DIR_LIST; req->id = priv_req_id; - req->u.flist.gref = fsr->gref; + req->u.flist.gref = gref; req->u.flist.offset = offset; /* Set blocked flag before commiting the request, thus avoiding missed @@ -600,7 +674,7 @@ char** fs_list(struct fs_import *import, files = NULL; if(*nr_files <= 0) goto exit; files = malloc(sizeof(char*) * (*nr_files)); - current_file = fsr->page; + current_file = buffer; for(i=0; i<*nr_files; i++) { files[i] = strdup(current_file); @@ -608,6 +682,7 @@ char** fs_list(struct fs_import *import, } if(has_more != NULL) *has_more = fsr->shadow_rsp.ret_val & HAS_MORE_FLAG; + free_buffer_page(fsr); add_id_to_freelist(priv_req_id, import->freelist); exit: return files; @@ -655,6 +730,8 @@ int64_t fs_space(struct fs_import *impor { struct fs_request *fsr; unsigned short priv_req_id; + grant_ref_t gref; + void *buffer; RING_IDX back_req_id; struct fsif_request *req; int64_t ret; @@ -667,14 +744,15 @@ int64_t fs_space(struct fs_import *impor priv_req_id = get_id_from_freelist(import->freelist); DEBUG("Request id for fs_space is: %d\n", priv_req_id); fsr = &import->requests[priv_req_id]; - DEBUG("gref=%d\n", fsr->gref); - fsr->thread = current; - sprintf(fsr->page, "%s", location); + buffer = alloc_buffer_page(fsr, import->dom_id, &gref); + DEBUG("gref=%d\n", gref); + fsr->thread = current; + sprintf(buffer, "%s", location); req = RING_GET_REQUEST(&import->ring, back_req_id); req->type = REQ_FS_SPACE; req->id = priv_req_id; - req->u.fspace.gref = fsr->gref; + req->u.fspace.gref = gref; /* Set blocked flag before commiting the request, thus avoiding missed * response race */ @@ -685,6 +763,7 @@ int64_t fs_space(struct fs_import *impor /* Read the response */ ret = (int64_t)fsr->shadow_rsp.ret_val; DEBUG("The following returned: %lld\n", ret); + free_buffer_page(fsr); add_id_to_freelist(priv_req_id, import->freelist); return ret; @@ -732,6 +811,23 @@ int fs_sync(struct fs_import *import, in /* END OF INDIVIDUAL FILE OPERATIONS */ /******************************************************************************/ +void *alloc_buffer_page(struct fs_request *req, domid_t domid, grant_ref_t *gref) +{ + void *page; + + page = (void *)alloc_page(); + *gref = gnttab_grant_access(domid, virt_to_mfn(page), 0); + req->private1 = page; + req->private2 = (void *)(uintptr_t)(*gref); + + return page; +} + +void free_buffer_page(struct fs_request *req) +{ + gnttab_end_access((grant_ref_t)(uintptr_t)req->private2); + free_page(req->private1); +} static void fsfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) { @@ -797,15 +893,7 @@ static void alloc_request_table(struct f import->freelist = xmalloc_array(unsigned short, import->nr_entries + 1); memset(import->freelist, 0, sizeof(unsigned short) * (import->nr_entries + 1)); for(i=0; i<import->nr_entries; i++) - { - /* TODO: that's a lot of memory */ - requests[i].page = (void *)alloc_page(); - requests[i].gref = gnttab_grant_access(import->dom_id, - virt_to_mfn(requests[i].page), - 0); - //printk(" ===>> Page=%lx, gref=%d, mfn=%lx\n", requests[i].page, requests[i].gref, virt_to_mfn(requests[i].page)); add_id_to_freelist(i, import->freelist); - } import->requests = requests; } @@ -818,22 +906,27 @@ void test_fs_import(void *data) void test_fs_import(void *data) { struct fs_import *import = (struct fs_import *)data; - int ret, fd, i; + int ret, fd, i, repeat_count; int32_t nr_files; char buffer[1024]; ssize_t offset; char **files; long ret64; - + struct fsif_stat_response stat; + + repeat_count = 10; /* Sleep for 1s and then try to open a file */ msleep(1000); +again: ret = fs_create(import, "mini-os-created-directory", 1, 0777); printk("Directory create: %d\n", ret); - ret = fs_create(import, "mini-os-created-directory/mini-os-created-file", 0, 0666); + sprintf(buffer, "mini-os-created-directory/mini-os-created-file-%d", + repeat_count); + ret = fs_create(import, buffer, 0, 0666); printk("File create: %d\n", ret); - fd = fs_open(import, "mini-os-created-directory/mini-os-created-file"); + fd = fs_open(import, buffer); printk("File descriptor: %d\n", fd); if(fd < 0) return; @@ -847,7 +940,16 @@ void test_fs_import(void *data) return; offset += ret; } - + ret = fs_stat(import, fd, &stat); + printk("Ret after stat: %d\n", ret); + printk(" st_mode=%o\n", stat.stat_mode); + printk(" st_uid =%d\n", stat.stat_uid); + printk(" st_gid =%d\n", stat.stat_gid); + printk(" st_size=%ld\n", stat.stat_size); + printk(" st_atime=%ld\n", stat.stat_atime); + printk(" st_mtime=%ld\n", stat.stat_mtime); + printk(" st_ctime=%ld\n", stat.stat_ctime); + ret = fs_close(import, fd); printk("Closed fd: %d, ret=%d\n", fd, ret); @@ -858,6 +960,9 @@ void test_fs_import(void *data) ret64 = fs_space(import, "/"); printk("Free space: %lld (=%lld Mb)\n", ret64, (ret64 >> 20)); + repeat_count--; + if(repeat_count > 0) + goto again; } @@ -924,20 +1029,21 @@ static int init_fs_import(struct fs_impo xenbus_transaction_t xbt; char nodename[1024], r_nodename[1024], token[128], *message = NULL; struct fsif_sring *sring; - int retry = 0; + int i, retry = 0; domid_t self_id; xenbus_event_queue events = NULL; printk("Initialising FS fortend to backend dom %d\n", import->dom_id); /* Allocate page for the shared ring */ - sring = (struct fsif_sring*) alloc_page(); - memset(sring, 0, PAGE_SIZE); + sring = (struct fsif_sring*) alloc_pages(FSIF_RING_SIZE_ORDER); + memset(sring, 0, PAGE_SIZE * FSIF_RING_SIZE_PAGES); /* Init the shared ring */ SHARED_RING_INIT(sring); + ASSERT(FSIF_NR_READ_GNTS == FSIF_NR_WRITE_GNTS); /* Init private frontend ring */ - FRONT_RING_INIT(&import->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&import->ring, sring, PAGE_SIZE * FSIF_RING_SIZE_PAGES); import->nr_entries = import->ring.nr_ents; /* Allocate table of requests */ @@ -945,7 +1051,11 @@ static int init_fs_import(struct fs_impo init_SEMAPHORE(&import->reqs_sem, import->nr_entries); /* Grant access to the shared ring */ - import->gnt_ref = gnttab_grant_access(import->dom_id, virt_to_mfn(sring), 0); + for(i=0; i<FSIF_RING_SIZE_PAGES; i++) + import->gnt_refs[i] = + gnttab_grant_access(import->dom_id, + virt_to_mfn((char *)sring + i * PAGE_SIZE), + 0); /* Allocate event channel */ BUG_ON(evtchn_alloc_unbound(import->dom_id, @@ -969,12 +1079,26 @@ again: err = xenbus_printf(xbt, nodename, - "ring-ref", + "ring-size", "%u", - import->gnt_ref); + FSIF_RING_SIZE_PAGES); if (err) { - message = "writing ring-ref"; + message = "writing ring-size"; goto abort_transaction; + } + + for(i=0; i<FSIF_RING_SIZE_PAGES; i++) + { + sprintf(r_nodename, "ring-ref-%d", i); + err = xenbus_printf(xbt, + nodename, + r_nodename, + "%u", + import->gnt_refs[i]); + if (err) { + message = "writing ring-refs"; + goto abort_transaction; + } } err = xenbus_printf(xbt, diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/gntmap.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/gntmap.c Thu Aug 07 11:57:34 2008 +0900 @@ -0,0 +1,252 @@ +/* + * Manages grant mappings from other domains. + * + * Diego Ongaro <diego.ongaro@xxxxxxxxxx>, July 2008 + * + * Files of type FTYPE_GNTMAP contain a gntmap, which is an array of + * (host address, grant handle) pairs. Grant handles come from a hypervisor map + * operation and are needed for the corresponding unmap. + * + * This is a rather naive implementation in terms of performance. If we start + * using it frequently, there's definitely some low-hanging fruit here. + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <os.h> +#include <xmalloc.h> +#include <errno.h> +#include <xen/grant_table.h> +#include <inttypes.h> +#include "gntmap.h" + +#define DEFAULT_MAX_GRANTS 128 + +struct gntmap_entry { + unsigned long host_addr; + grant_handle_t handle; +}; + +static inline int +gntmap_entry_used(struct gntmap_entry *entry) +{ + return entry->host_addr != 0; +} + +static struct gntmap_entry* +gntmap_find_free_entry(struct gntmap *map) +{ + int i; + + for (i = 0; i < map->nentries; i++) { + if (!gntmap_entry_used(&map->entries[i])) + return &map->entries[i]; + } + +#ifdef GNTMAP_DEBUG + printk("gntmap_find_free_entry(map=%p): all %d entries full\n", + map, map->nentries); +#endif + return NULL; +} + +static struct gntmap_entry* +gntmap_find_entry(struct gntmap *map, unsigned long addr) +{ + int i; + + for (i = 0; i < map->nentries; i++) { + if (map->entries[i].host_addr == addr) + return &map->entries[i]; + } + return NULL; +} + +int +gntmap_set_max_grants(struct gntmap *map, int count) +{ +#ifdef GNTMAP_DEBUG + printk("gntmap_set_max_grants(map=%p, count=%d)\n", map, count); +#endif + + if (map->nentries != 0) + return -EBUSY; + + map->entries = xmalloc_array(struct gntmap_entry, count); + if (map->entries == NULL) + return -ENOMEM; + + memset(map->entries, 0, sizeof(struct gntmap_entry) * count); + map->nentries = count; + return 0; +} + +static int +_gntmap_map_grant_ref(struct gntmap_entry *entry, + unsigned long host_addr, + uint32_t domid, + uint32_t ref, + int writable) +{ + struct gnttab_map_grant_ref op; + int rc; + + op.ref = (grant_ref_t) ref; + op.dom = (domid_t) domid; + op.host_addr = (uint64_t) host_addr; + op.flags = GNTMAP_host_map; + if (!writable) + op.flags |= GNTMAP_readonly; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (rc != 0 || op.status != GNTST_okay) { + printk("GNTTABOP_map_grant_ref failed: " + "returned %d, status %" PRId16 "\n", + rc, op.status); + return rc != 0 ? rc : op.status; + } + + entry->host_addr = host_addr; + entry->handle = op.handle; + return 0; +} + +static int +_gntmap_unmap_grant_ref(struct gntmap_entry *entry) +{ + struct gnttab_unmap_grant_ref op; + int rc; + + op.host_addr = (uint64_t) entry->host_addr; + op.dev_bus_addr = 0; + op.handle = entry->handle; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + if (rc != 0 || op.status != GNTST_okay) { + printk("GNTTABOP_unmap_grant_ref failed: " + "returned %d, status %" PRId16 "\n", + rc, op.status); + return rc != 0 ? rc : op.status; + } + + entry->host_addr = 0; + return 0; +} + +int +gntmap_munmap(struct gntmap *map, unsigned long start_address, int count) +{ + int i, rc; + struct gntmap_entry *ent; + +#ifdef GNTMAP_DEBUG + printk("gntmap_munmap(map=%p, start_address=%lx, count=%d)\n", + map, start_address, count); +#endif + + for (i = 0; i < count; i++) { + ent = gntmap_find_entry(map, start_address + PAGE_SIZE * i); + if (ent == NULL) { + printk("gntmap: tried to munmap unknown page\n"); + return -EINVAL; + } + + rc = _gntmap_unmap_grant_ref(ent); + if (rc != 0) + return rc; + } + + return 0; +} + +void* +gntmap_map_grant_refs(struct gntmap *map, + uint32_t count, + uint32_t *domids, + int domids_stride, + uint32_t *refs, + int writable) +{ + unsigned long addr; + struct gntmap_entry *ent; + int i; + +#ifdef GNTMAP_DEBUG + printk("gntmap_map_grant_refs(map=%p, count=%" PRIu32 ", " + "domids=%p [%" PRIu32 "...], domids_stride=%d, " + "refs=%p [%" PRIu32 "...], writable=%d)\n", + map, count, + domids, domids == NULL ? 0 : domids[0], domids_stride, + refs, refs == NULL ? 0 : refs[0], writable); +#endif + + (void) gntmap_set_max_grants(map, DEFAULT_MAX_GRANTS); + + addr = allocate_ondemand((unsigned long) count, 1); + if (addr == 0) + return NULL; + + for (i = 0; i < count; i++) { + ent = gntmap_find_free_entry(map); + if (ent == NULL || + _gntmap_map_grant_ref(ent, + addr + PAGE_SIZE * i, + domids[i * domids_stride], + refs[i], + writable) != 0) { + + (void) gntmap_munmap(map, addr, i); + return NULL; + } + } + + return (void*) addr; +} + +void +gntmap_init(struct gntmap *map) +{ +#ifdef GNTMAP_DEBUG + printk("gntmap_init(map=%p)\n", map); +#endif + map->nentries = 0; + map->entries = NULL; +} + +void +gntmap_fini(struct gntmap *map) +{ + struct gntmap_entry *ent; + int i; + +#ifdef GNTMAP_DEBUG + printk("gntmap_fini(map=%p)\n", map); +#endif + + for (i = 0; i < map->nentries; i++) { + ent = &map->entries[i]; + if (gntmap_entry_used(ent)) + (void) _gntmap_unmap_grant_ref(ent); + } + + xfree(map->entries); + map->entries = NULL; + map->nentries = 0; +} diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/include/fs.h --- a/extras/mini-os/include/fs.h Thu Aug 07 11:47:34 2008 +0900 +++ b/extras/mini-os/include/fs.h Thu Aug 07 11:57:34 2008 +0900 @@ -4,6 +4,9 @@ #include <xen/io/fsif.h> #include <mini-os/semaphore.h> #include <mini-os/types.h> + +#define FSIF_RING_SIZE_ORDER 1 +#define FSIF_RING_SIZE_PAGES (1<<FSIF_RING_SIZE_ORDER) struct fs_import { @@ -14,7 +17,7 @@ struct fs_import unsigned int nr_entries; /* Number of entries in rings & request array */ struct fsif_front_ring ring; /* frontend ring (contains shared ring) */ - int gnt_ref; /* grant reference to the shared ring */ + u32 gnt_refs[FSIF_RING_SIZE_PAGES]; /* grant references to the shared ring */ evtchn_port_t local_port; /* local event channel port */ char *backend; /* XenBus location of the backend */ struct fs_request *requests; /* Table of requests */ diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/include/gntmap.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/include/gntmap.h Thu Aug 07 11:57:34 2008 +0900 @@ -0,0 +1,35 @@ +#ifndef __GNTMAP_H__ +#define __GNTMAP_H__ + +#include <os.h> + +/* + * Please consider struct gntmap opaque. If instead you choose to disregard + * this message, I insist that you keep an eye out for raptors. + */ +struct gntmap { + int nentries; + struct gntmap_entry *entries; +}; + +int +gntmap_set_max_grants(struct gntmap *map, int count); + +int +gntmap_munmap(struct gntmap *map, unsigned long start_address, int count); + +void* +gntmap_map_grant_refs(struct gntmap *map, + uint32_t count, + uint32_t *domids, + int domids_stride, + uint32_t *refs, + int writable); + +void +gntmap_init(struct gntmap *map); + +void +gntmap_fini(struct gntmap *map); + +#endif /* !__GNTMAP_H__ */ diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/include/lib.h --- a/extras/mini-os/include/lib.h Thu Aug 07 11:47:34 2008 +0900 +++ b/extras/mini-os/include/lib.h Thu Aug 07 11:57:34 2008 +0900 @@ -59,6 +59,7 @@ #include <stddef.h> #include <xen/xen.h> #include <xen/event_channel.h> +#include "gntmap.h" #ifdef HAVE_LIBC #include <stdio.h> @@ -138,6 +139,7 @@ enum fd_type { FTYPE_XENBUS, FTYPE_XC, FTYPE_EVTCHN, + FTYPE_GNTMAP, FTYPE_SOCKET, FTYPE_TAP, FTYPE_BLK, @@ -168,6 +170,7 @@ extern struct file { int bound; } ports[MAX_EVTCHN_PORTS]; } evtchn; + struct gntmap gntmap; struct { struct netfront_dev *dev; } tap; diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/lib/sys.c --- a/extras/mini-os/lib/sys.c Thu Aug 07 11:47:34 2008 +0900 +++ b/extras/mini-os/lib/sys.c Thu Aug 07 11:57:34 2008 +0900 @@ -84,6 +84,7 @@ #define NOFILE 32 extern int xc_evtchn_close(int fd); extern int xc_interface_close(int fd); +extern int xc_gnttab_close(int fd); pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER; struct file files[NOFILE] = { @@ -230,8 +231,8 @@ int read(int fd, void *buf, size_t nbyte } case FTYPE_FILE: { ssize_t ret; - if (nbytes > PAGE_SIZE) - nbytes = PAGE_SIZE; + if (nbytes > PAGE_SIZE * FSIF_NR_READ_GNTS) + nbytes = PAGE_SIZE * FSIF_NR_READ_GNTS; ret = fs_read(fs_import, files[fd].file.fd, buf, nbytes, files[fd].file.offset); if (ret > 0) { files[fd].file.offset += ret; @@ -291,8 +292,8 @@ int write(int fd, const void *buf, size_ return nbytes; case FTYPE_FILE: { ssize_t ret; - if (nbytes > PAGE_SIZE) - nbytes = PAGE_SIZE; + if (nbytes > PAGE_SIZE * FSIF_NR_WRITE_GNTS) + nbytes = PAGE_SIZE * FSIF_NR_WRITE_GNTS; ret = fs_write(fs_import, files[fd].file.fd, (void *) buf, nbytes, files[fd].file.offset); if (ret > 0) { files[fd].file.offset += ret; @@ -401,6 +402,9 @@ int close(int fd) case FTYPE_EVTCHN: xc_evtchn_close(fd); return 0; + case FTYPE_GNTMAP: + xc_gnttab_close(fd); + return 0; case FTYPE_TAP: shutdown_netfront(files[fd].tap.dev); files[fd].type = FTYPE_NONE; diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/minios.mk --- a/extras/mini-os/minios.mk Thu Aug 07 11:47:34 2008 +0900 +++ b/extras/mini-os/minios.mk Thu Aug 07 11:57:34 2008 +0900 @@ -21,6 +21,7 @@ DEF_CFLAGS += -g #DEF_CFLAGS += -DFS_DEBUG #DEF_CFLAGS += -DLIBC_DEBUG DEF_CFLAGS += -DGNT_DEBUG +DEF_CFLAGS += -DGNTMAP_DEBUG else DEF_CFLAGS += -O3 endif diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/pcifront.c --- a/extras/mini-os/pcifront.c Thu Aug 07 11:47:34 2008 +0900 +++ b/extras/mini-os/pcifront.c Thu Aug 07 11:57:34 2008 +0900 @@ -57,19 +57,26 @@ struct pcifront_dev *init_pcifront(char int retry=0; char* msg; char* nodename = _nodename ? _nodename : "device/pci/0"; + int dom; struct pcifront_dev *dev; char path[strlen(nodename) + 1 + 10 + 1]; printk("******************* PCIFRONT for %s **********\n\n\n", nodename); + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dom = xenbus_read_integer(path); + if (dom == -1) { + printk("no backend\n"); + return NULL; + } dev = malloc(sizeof(*dev)); memset(dev, 0, sizeof(*dev)); dev->nodename = strdup(nodename); - - snprintf(path, sizeof(path), "%s/backend-id", nodename); - dev->dom = xenbus_read_integer(path); + dev->dom = dom; + evtchn_alloc_unbound(dev->dom, pcifront_handler, dev, &dev->evtchn); dev->info = (struct xen_pci_sharedinfo*) alloc_page(); diff -r 7affdebb7a1e -r a39913db6e51 stubdom/Makefile --- a/stubdom/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/stubdom/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -2,18 +2,23 @@ MINI_OS = $(XEN_ROOT)/extras/mini-os MINI_OS = $(XEN_ROOT)/extras/mini-os export XEN_OS=MiniOS - -CONFIG_QEMU=ioemu export stubdom=y export debug=y include $(XEN_ROOT)/Config.mk +override CONFIG_QEMU=ioemu + IOEMU_OPTIONS=--disable-sdl --disable-opengl --disable-gfx-check --disable-vnc-tls --disable-brlapi --disable-kqemu +ZLIB_URL?=http://www.zlib.net ZLIB_VERSION=1.2.3 +LIBPCI_URL?=http://www.kernel.org/pub/software/utils/pciutils LIBPCI_VERSION=2.2.9 +NEWLIB_URL?=ftp://sources.redhat.com/pub/newlib NEWLIB_VERSION=1.16.0 +LWIP_URL?=http://download.savannah.gnu.org/releases/lwip LWIP_VERSION=1.3.0 +GRUB_URL?=http://alpha.gnu.org/gnu/grub GRUB_VERSION=0.97 WGET=wget -c @@ -75,7 +80,7 @@ endif ############## newlib-$(NEWLIB_VERSION).tar.gz: - $(WGET) ftp://sources.redhat.com/pub/newlib/$@ + $(WGET) $(NEWLIB_URL)/$@ newlib-$(NEWLIB_VERSION): newlib-$(NEWLIB_VERSION).tar.gz tar xzf $< @@ -97,7 +102,7 @@ cross-newlib: $(NEWLIB_STAMPFILE) ############ zlib-$(ZLIB_VERSION).tar.gz: - $(WGET) http://www.zlib.net/$@ + $(WGET) $(ZLIB_URL)/$@ ZLIB_STAMPFILE=$(CROSS_ROOT)/$(GNU_TARGET_ARCH)-xen-elf/lib/libz.a .PHONY: cross-zlib @@ -114,7 +119,7 @@ cross-zlib: $(ZLIB_STAMPFILE) ############## pciutils-$(LIBPCI_VERSION).tar.bz2: - $(WGET) http://www.kernel.org/pub/software/utils/pciutils/$@ + $(WGET) $(LIBPCI_URL)/$@ pciutils-$(LIBPCI_VERSION): pciutils-$(LIBPCI_VERSION).tar.bz2 tar xjf $< @@ -132,7 +137,7 @@ cross-libpci: $(LIBPCI_STAMPFILE) $(MAKE) CC="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -I$(realpath $(MINI_OS)/include)" lib/libpci.a && \ $(INSTALL_DATA) lib/libpci.a $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/lib/ && \ $(INSTALL_DIR) $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci && \ - $(INSTALL_DATA) lib/{config,header,pci,types}.h $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \ + $(INSTALL_DATA) lib/config.h lib/header.h lib/pci.h lib/types.h $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \ ) ###### @@ -140,7 +145,7 @@ cross-libpci: $(LIBPCI_STAMPFILE) ###### lwip-$(LWIP_VERSION).tar.gz: - $(WGET) http://download.savannah.gnu.org/releases/lwip/$@ + $(WGET) $(LWIP_URL)/$@ lwip: lwip-$(LWIP_VERSION).tar.gz tar xzf $< @@ -154,7 +159,6 @@ lwip: lwip-$(LWIP_VERSION).tar.gz .PHONY: $(CROSS_ROOT) $(CROSS_ROOT): cross-newlib cross-zlib cross-libpci -.PHONY: mk-headers mk-headers: mkdir -p include/xen && \ ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) include/xen && \ @@ -191,6 +195,7 @@ endif [ ! -h ioemu/config-host.h ] || rm -f ioemu/config-host.h [ ! -h ioemu/config-host.mak ] || rm -f ioemu/config-host.mak $(MAKE) -C $(MINI_OS) links + touch mk-headers TARGETS_MINIOS=$(addprefix mini-os-,$(TARGETS)) $(TARGETS_MINIOS): mini-os-%: @@ -247,7 +252,7 @@ c: $(CROSS_ROOT) ###### grub-$(GRUB_VERSION).tar.gz: - $(WGET) ftp://alpha.gnu.org/gnu/grub/$@ + $(WGET) $(GRUB_URL)/$@ grub-upstream: grub-$(GRUB_VERSION).tar.gz tar xzf $< @@ -291,20 +296,24 @@ pv-grub: mini-os-grub libxc grub ######### ifeq ($(STUBDOM_SUPPORTED),1) -install: install-ioemu install-grub +install: install-readme install-ioemu install-grub else install: endif + +install-readme: + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR) + $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.stubdom install-ioemu: ioemu-stubdom $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/bin" $(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin" $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot" - $(INSTALL_PROG) mini-os-ioemu/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/ioemu-stubdom.gz" + $(INSTALL_DATA) mini-os-ioemu/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/ioemu-stubdom.gz" install-grub: pv-grub $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot" - $(INSTALL_PROG) mini-os-grub/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/pv-grub.gz" + $(INSTALL_DATA) mini-os-grub/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/pv-grub.gz" ####### # clean @@ -320,7 +329,8 @@ clean: $(MAKE) -C caml clean $(MAKE) -C c clean $(MAKE) -C grub clean - rm -fr libxc ioemu mini-os include + [ ! -d libxc ] || $(MAKE) -C libxc clean + [ ! -d ioemu ] || $(MAKE) -C ioemu clean # clean the cross-compilation result .PHONY: crossclean @@ -328,6 +338,8 @@ crossclean: clean rm -fr $(CROSS_ROOT) rm -fr newlib-build rm -fr zlib-$(ZLIB_VERSION) pciutils-$(LIBPCI_VERSION) + rm -fr libxc ioemu + rm -f mk-headers # clean patched sources .PHONY: patchclean diff -r 7affdebb7a1e -r a39913db6e51 stubdom/README --- a/stubdom/README Thu Aug 07 11:47:34 2008 +0900 +++ b/stubdom/README Thu Aug 07 11:57:34 2008 +0900 @@ -1,13 +1,3 @@ To compile -To compile -========== - -Just run make -j 4, that will download / patch / compile -Then make install to install the result. - -Also, run make and make install in $XEN_ROOT/tools/fs-back - - - IOEMU stubdom ============= @@ -15,6 +5,14 @@ Also, run make and make install in $XEN_ General Configuration ===================== + +Due to a race between the creation of the IOEMU stubdomain itself and allocation +of video memory for the HVM domain, you need to avoid the need for ballooning, +by using the hypervisor dom0_mem= option for instance. + + +There is a sample configuration set in xmexample.hvm-stubdom and +xmexample.hvm-dm In your HVM config "hvmconfig", diff -r 7affdebb7a1e -r a39913db6e51 stubdom/stubdom-dm --- a/stubdom/stubdom-dm Thu Aug 07 11:47:34 2008 +0900 +++ b/stubdom/stubdom-dm Thu Aug 07 11:57:34 2008 +0900 @@ -55,7 +55,7 @@ term() { kill %1 ( [ -n "$vncpid" ] && kill -9 $vncpid - xm destroy stubdom-$domname + xm destroy $domname-dm #xm destroy $domname ) & # We need to exit immediately so as to let xend do the commands above @@ -67,12 +67,12 @@ trap term SIGHUP ############ # stubdomain # Wait for any previous stubdom to terminate -while xm list | grep stubdom-$domname +while xm list | grep $domname-dm do sleep 1 done -creation="xm create -c stubdom-$domname target=$domid memory=32 extra=\"$extra\"" +creation="xm create -c $domname-dm target=$domid memory=32 extra=\"$extra\"" (while true ; do sleep 60 ; done) | /bin/sh -c "$creation" & #xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" & diff -r 7affdebb7a1e -r a39913db6e51 tools/Makefile --- a/tools/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -55,13 +55,14 @@ clean distclean: subdirs-clean clean distclean: subdirs-clean ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) -IOEMU_CONFIGURE_CROSS ?= --cross-prefix=$(CROSS_COMPILE) \ +IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \ + --cross-prefix=$(CROSS_COMPILE) \ --interp-prefix=$(CROSS_SYS_ROOT) endif ioemu/config-host.mak: - cd ioemu && XEN_TARGET_ARCH=$(XEN_TARGET_ARCH) sh configure --prefix=/usr \ - $(IOEMU_CONFIGURE_CROSS) + cd ioemu && XEN_TARGET_ARCH=$(XEN_TARGET_ARCH) sh configure \ + --prefix=$(PREFIX) $(IOEMU_CONFIGURE_CROSS) subdir-all-ioemu subdir-install-ioemu: ioemu/config-host.mak @@ -78,6 +79,12 @@ ioemu-dir-find: rm -rf ioemu-remote ioemu-remote.tmp; \ mkdir ioemu-remote.tmp; rmdir ioemu-remote.tmp; \ $(GIT) clone $(CONFIG_QEMU) ioemu-remote.tmp; \ + if [ "$(QEMU_TAG)" ]; then \ + cd ioemu-remote.tmp; \ + $(GIT) branch -D dummy >/dev/null 2>&1 ||:; \ + $(GIT) checkout -b dummy $(QEMU_TAG); \ + cd ..; \ + fi; \ mv ioemu-remote.tmp ioemu-remote; \ fi; \ rm -f ioemu-dir; \ @@ -90,7 +97,7 @@ ioemu-dir-find: esac; \ export XEN_ROOT; \ cd ioemu-dir; \ - ./xen-setup + ./xen-setup $(IOEMU_CONFIGURE_CROSS) subdir-all-ioemu-dir subdir-install-ioemu-dir: ioemu-dir-find diff -r 7affdebb7a1e -r a39913db6e51 tools/blktap/Makefile --- a/tools/blktap/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/blktap/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -8,3 +8,6 @@ SUBDIRS-y += drivers .PHONY: all clean install all clean install: %: subdirs-% +install: + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR) + $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.blktap diff -r 7affdebb7a1e -r a39913db6e51 tools/blktap/lib/Makefile --- a/tools/blktap/lib/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/blktap/lib/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -43,7 +43,7 @@ install: all .PHONY: clean clean: - rm -rf *.a *.so* *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS + rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC) $(CC) $(CFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_CFLAGS) \ diff -r 7affdebb7a1e -r a39913db6e51 tools/console/Makefile --- a/tools/console/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/console/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -21,7 +21,7 @@ clean: xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c)) $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) \ - $(UTIL_LIBS) $(SOCKET_LIBS) + $(UTIL_LIBS) $(SOCKET_LIBS) -lrt xenconsole: $(patsubst %.c,%.o,$(wildcard client/*.c)) $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) \ diff -r 7affdebb7a1e -r a39913db6e51 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/console/daemon/io.c Thu Aug 07 11:57:34 2008 +0900 @@ -622,9 +622,9 @@ static struct domain *create_domain(int { struct domain *dom; char *s; - struct timeval tv; - - if (gettimeofday(&tv, NULL) < 0) { + struct timespec ts; + + if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) { dolog(LOG_ERR, "Cannot get time of day %s:%s:L%d", __FILE__, __FUNCTION__, __LINE__); return NULL; @@ -666,7 +666,7 @@ static struct domain *create_domain(int dom->buffer.capacity = 0; dom->buffer.max_capacity = 0; dom->event_count = 0; - dom->next_period = (tv.tv_sec * 1000) + (tv.tv_usec / 1000) + RATE_LIMIT_PERIOD; + dom->next_period = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000) + RATE_LIMIT_PERIOD; dom->next = NULL; dom->ring_ref = -1; @@ -971,7 +971,7 @@ void handle_io(void) struct domain *d, *n; int max_fd = -1; struct timeval timeout; - struct timeval tv; + struct timespec ts; long long now, next_timeout = 0; FD_ZERO(&readfds); @@ -985,9 +985,9 @@ void handle_io(void) max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd); } - if (gettimeofday(&tv, NULL) < 0) + if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) return; - now = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); + now = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000); /* Re-calculate any event counter allowances & unblock domains with new allowance */ diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/Makefile --- a/tools/examples/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/examples/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -8,11 +8,18 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig. # Xen configuration dir and configs to go there. XEN_CONFIG_DIR = /etc/xen +XEN_READMES = README +XEN_READMES += README.incompatibilities XEN_CONFIGS = xend-config.sxp XEN_CONFIGS += xm-config.xml XEN_CONFIGS += xmexample1 XEN_CONFIGS += xmexample2 +XEN_CONFIGS += xmexample3 XEN_CONFIGS += xmexample.hvm +XEN_CONFIGS += xmexample.hvm-stubdom +XEN_CONFIGS += xmexample.hvm-dm +XEN_CONFIGS += xmexample.pv-grub +XEN_CONFIGS += xmexample.nbd XEN_CONFIGS += xmexample.vti XEN_CONFIGS += xend-pci-quirks.sxp XEN_CONFIGS += xend-pci-permissive.sxp @@ -59,7 +66,16 @@ build: build: .PHONY: install -install: all install-initd install-configs install-scripts $(HOTPLUGS) +install: all install-readmes install-initd install-configs install-scripts $(HOTPLUGS) + +.PHONY: install-readmes +install-readmes: + [ -d $(DESTDIR)$(XEN_CONFIG_DIR) ] || \ + $(INSTALL_DIR) $(DESTDIR)$(XEN_CONFIG_DIR) + set -e; for i in $(XEN_READMES); \ + do [ -e $(DESTDIR)$(XEN_CONFIG_DIR)/$$i ] || \ + $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_CONFIG_DIR); \ + done .PHONY: install-initd install-initd: diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/README --- a/tools/examples/README Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/examples/README Thu Aug 07 11:57:34 2008 +0900 @@ -44,4 +44,8 @@ xmexample.nbd - configuration scri xmexample.nbd - configuration script that uses NBD filesystems xmexample.hvm - a configuration script for creating a hvm domain with 'xm create' +xmexample.hvm-stubdom - a configuration script for creating a hvm domain with + 'xm create' that utilizes a stubdomain for device model +xmexample.pv-grub - a configuration script for creating a domain with 'xm create' + which boots PV-GRUB. xmexample.vti - a configuration script for creating a domain on vti diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/stubdom-ExampleHVMDomain --- a/tools/examples/stubdom-ExampleHVMDomain Thu Aug 07 11:47:34 2008 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -# Not to be started directly, -# See xmexample.hvm-stubdom and stubdom/README for more details - -kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz" - -# Must be the same as in xmexample.hvm-stubdom, with a prepended vif for TCP/IP -# networking in the stubdomain itself, here just '' -vif = [ '', 'type=ioemu, bridge=xenbr0' ] - -# Set here instead of in xmexample.hvm-stubdom -disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ] - -# Actual output via PVFB -vfb = [ 'type=sdl' ] diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/examples/xend-config.sxp Thu Aug 07 11:57:34 2008 +0900 @@ -245,3 +245,7 @@ # Rotation count of qemu-dm log file. #(qemu-dm-logrotate-count 10) + +# Path where persistent domain configuration is stored. +# Default is /var/lib/xend/domains/ +#(xend-domains-path /var/lib/xend/domains) diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/examples/xmexample.hvm Thu Aug 07 11:57:34 2008 +0900 @@ -156,11 +156,6 @@ vnc=1 #---------------------------------------------------------------------------- # try to find an unused port for the VNC server, default = 1 #vncunused=1 - -#---------------------------------------------------------------------------- -# enable spawning vncviewer for domain's console -# (only valid when vnc=1), default = 0 -#vncconsole=0 #---------------------------------------------------------------------------- # set password for domain's VNC console diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.hvm-dm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/examples/xmexample.hvm-dm Thu Aug 07 11:57:34 2008 +0900 @@ -0,0 +1,14 @@ +# Not to be started directly, +# See xmexample.hvm-stubdom and stubdom/README for more details + +kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz" + +# Must be the same as in xmexample.hvm-stubdom, with a prepended vif for TCP/IP +# networking in the stubdomain itself, here just '' +vif = [ '', 'type=ioemu, bridge=xenbr0' ] + +# Set here instead of in xmexample.hvm-stubdom +disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ] + +# Actual output via PVFB +vfb = [ 'type=sdl' ] diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.hvm-stubdom --- a/tools/examples/xmexample.hvm-stubdom Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/examples/xmexample.hvm-stubdom Thu Aug 07 11:57:34 2008 +0900 @@ -7,7 +7,7 @@ #============================================================================ # # This is a version using a stubdomain for device model, see -# stubdom-ExampleHVMDomain and stubdom/README for more details +# xmexample.hvm-dm and README.stubdom for more details # The differences with xmexample.hvm are marked with "STUBDOM" #---------------------------------------------------------------------------- @@ -30,7 +30,7 @@ memory = 128 # shadow_memory = 8 # A name for your domain. All domains must have different names. -name = "ExampleHVMDomain" +name = "xmexample.hvm" # 128-bit UUID for the domain. The default behavior is to generate a new UUID # on each call to 'xm create'. diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.pv-grub --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/examples/xmexample.pv-grub Thu Aug 07 11:57:34 2008 +0900 @@ -0,0 +1,212 @@ +# -*- mode: python; -*- +#============================================================================ +# Python configuration setup for 'xm create'. +# This script sets the parameters used when a domain is created using 'xm create'. +# You use a separate script for each domain you want to create, or +# you can set the parameters for the domain on the xm command line. +#============================================================================ + +#---------------------------------------------------------------------------- +# PV GRUB image file. +kernel = "/usr/lib/xen/boot/pv-grub.gz" + +# Optional provided menu.lst. +#ramdisk = "/boot/guests/menu.lst" + +# Sets path to menu.lst +extra = "(hd0,0)/boot/grub/menu.lst" +# can be a TFTP-served path (DHCP will automatically be run) +# extra = "(nd)/netboot/menu.lst" +# can be configured automatically by GRUB's DHCP option 150 (see grub manual) +# extra = "" + +# Initial memory allocation (in megabytes) for the new domain. +# +# WARNING: Creating a domain with insufficient memory may cause out of +# memory errors. The domain needs enough memory to boot kernel +# and modules. Allocating less than 32MBs is not recommended. +memory = 64 + +# A name for your domain. All domains must have different names. +name = "ExampleDomain" + +# 128-bit UUID for the domain. The default behavior is to generate a new UUID +# on each call to 'xm create'. +#uuid = "06ed00fe-1162-4fc4-b5d8-11993ee4a8b9" + +# List of which CPUS this domain is allowed to use, default Xen picks +#cpus = "" # leave to Xen to pick +#cpus = "0" # all vcpus run on CPU0 +#cpus = "0-3,5,^1" # all vcpus run on cpus 0,2,3,5 +#cpus = ["2", "3"] # VCPU0 runs on CPU2, VCPU1 runs on CPU3 + +# Number of Virtual CPUS to use, default is 1 +#vcpus = 1 + +#---------------------------------------------------------------------------- +# Define network interfaces. + +# By default, no network interfaces are configured. You may have one created +# with sensible defaults using an empty vif clause: +# +# vif = [ '' ] +# +# or optionally override backend, bridge, ip, mac, script, type, or vifname: +# +# vif = [ 'mac=00:16:3e:00:00:11, bridge=xenbr0' ] +# +# or more than one interface may be configured: +# +# vif = [ '', 'bridge=xenbr1' ] + +vif = [ '' ] + +#---------------------------------------------------------------------------- +# Define the disk devices you want the domain to have access to, and +# what you want them accessible as. +# Each disk entry is of the form phy:UNAME,DEV,MODE +# where UNAME is the device, DEV is the device name the domain will see, +# and MODE is r for read-only, w for read-write. + +disk = [ 'phy:hda1,hda1,w' ] + +#---------------------------------------------------------------------------- +# Define frame buffer device. +# +# By default, no frame buffer device is configured. +# +# To create one using the SDL backend and sensible defaults: +# +# vfb = [ 'type=sdl' ] +# +# This uses environment variables XAUTHORITY and DISPLAY. You +# can override that: +# +# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ] +# +# To create one using the VNC backend and sensible defaults: +# +# vfb = [ 'type=vnc' ] +# +# The backend listens on 127.0.0.1 port 5900+N by default, where N is +# the domain ID. You can override both address and N: +# +# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ] +# +# Or you can bind the first unused port above 5900: +# +# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vncunused=1' ] +# +# You can override the password: +# +# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ] +# +# Empty password disables authentication. Defaults to the vncpasswd +# configured in xend-config.sxp. + +#---------------------------------------------------------------------------- +# Define to which TPM instance the user domain should communicate. +# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM' +# where INSTANCE indicates the instance number of the TPM the VM +# should be talking to and DOM provides the domain where the backend +# is located. +# Note that no two virtual machines should try to connect to the same +# TPM instance. The handling of all TPM instances does require +# some management effort in so far that VM configration files (and thus +# a VM) should be associated with a TPM instance throughout the lifetime +# of the VM / VM configuration file. The instance number must be +# greater or equal to 1. +#vtpm = [ 'instance=1,backend=0' ] + +#---------------------------------------------------------------------------- +# Set the kernel command line for the new domain. +# You only need to define the IP parameters and hostname if the domain's +# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP. +# You can use 'extra' to set the runlevel and custom environment +# variables used by custom rc scripts (e.g. VMID=, usr= ). + +# Set if you want dhcp to allocate the IP address. +#dhcp="dhcp" +# Set netmask. +#netmask= +# Set default gateway. +#gateway= +# Set the hostname. +#hostname= "vm%d" % vmid + +# Set root device. +root = "/dev/hda1 ro" + +# Root device for nfs. +#root = "/dev/nfs" +# The nfs server. +#nfs_server = '192.0.2.1' +# Root directory on the nfs server. +#nfs_root = '/full/path/to/root/directory' + +#---------------------------------------------------------------------------- +# Configure the behaviour when a domain exits. There are three 'reasons' +# for a domain to stop: poweroff, reboot, and crash. For each of these you +# may specify: +# +# "destroy", meaning that the domain is cleaned up as normal; +# "restart", meaning that a new domain is started in place of the old +# one; +# "preserve", meaning that no clean-up is done until the domain is +# manually destroyed (using xm destroy, for example); or +# "rename-restart", meaning that the old domain is not cleaned up, but is +# renamed and a new domain started in its place. +# +# In the event a domain stops due to a crash, you have the additional options: +# +# "coredump-destroy", meaning dump the crashed domain's core and then destroy; +# "coredump-restart', meaning dump the crashed domain's core and the restart. +# +# The default is +# +# on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# For backwards compatibility we also support the deprecated option restart +# +# restart = 'onreboot' means on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'destroy' +# +# restart = 'always' means on_poweroff = 'restart' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# restart = 'never' means on_poweroff = 'destroy' +# on_reboot = 'destroy' +# on_crash = 'destroy' + +#on_poweroff = 'destroy' +#on_reboot = 'restart' +#on_crash = 'restart' + +#----------------------------------------------------------------------------- +# Configure PVSCSI devices: +# +#vscsi=[ 'PDEV, VDEV' ] +# +# PDEV gives physical SCSI device to be attached to specified guest +# domain by one of the following identifier format. +# - XX:XX:XX:XX (4-tuples with decimal notation which shows +# "host:channel:target:lun") +# - /dev/sdxx or sdx +# - /dev/stxx or stx +# - /dev/sgxx or sgx +# - result of 'scsi_id -gu -s'. +# ex. # scsi_id -gu -s /block/sdb +# 36000b5d0006a0000006a0257004c0000 +# +# VDEV gives virtual SCSI device by 4-tuples (XX:XX:XX:XX) as +# which the specified guest domain recognize. +# + +#vscsi = [ '/dev/sdx, 0:0:0:0' ] + +#============================================================================ + diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.vti --- a/tools/examples/xmexample.vti Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/examples/xmexample.vti Thu Aug 07 11:57:34 2008 +0900 @@ -95,11 +95,6 @@ vnc=0 #vncunused=1 #---------------------------------------------------------------------------- -# enable spawning vncviewer for domain's console -# (only valid when vnc=1), default = 0 -#vncconsole=0 - -#---------------------------------------------------------------------------- # set password for domain's VNC console # default is depents on vncpasswd in xend-config.sxp vncpasswd='' diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample3 --- a/tools/examples/xmexample3 Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/examples/xmexample3 Thu Aug 07 11:57:34 2008 +0900 @@ -207,4 +207,26 @@ extra = "4 VMID=%d" % vmid #on_reboot = 'restart' #on_crash = 'restart' -#============================================================================ +#----------------------------------------------------------------------------- +# Configure PVSCSI devices: +# +#vscsi=[ 'PDEV, VDEV' ] +# +# PDEV gives physical SCSI device to be attached to specified guest +# domain by one of the following identifier format. +# - XX:XX:XX:XX (4-tuples with decimal notation which shows +# "host:channel:target:lun") +# - /dev/sdxx or sdx +# - /dev/stxx or stx +# - /dev/sgxx or sgx +# - result of 'scsi_id -gu -s'. +# ex. # scsi_id -gu -s /block/sdb +# 36000b5d0006a0000006a0257004c0000 +# +# VDEV gives virtual SCSI device by 4-tuples (XX:XX:XX:XX) as +# which the specified guest domain recognize. +# + +#vscsi = [ '/dev/sdx, 0:0:0:0' ] + +#============================================================================ diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/extboot/Makefile --- a/tools/firmware/extboot/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/extboot/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -1,7 +1,5 @@ XEN_ROOT = ../../.. XEN_ROOT = ../../.. include $(XEN_ROOT)/tools/firmware/Rules.mk - -CFLAGS += -I$(XEN_ROOT)/tools/libxc -I. .PHONY: all all: extboot.bin diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/hvmloader/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -33,14 +33,14 @@ OBJS = $(patsubst %.c,%.o,$(SRCS)) OBJS = $(patsubst %.c,%.o,$(SRCS)) .PHONY: all -all: hvmloader +all: subdirs-all + $(MAKE) hvmloader hvmloader.o: roms.h smbios.o: CFLAGS += -D__SMBIOS_DATE__="\"$(shell date +%m/%d/%Y)\"" -hvmloader: subdirs-all $(OBJS) - $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) \ - -o hvmloader.tmp $(OBJS) acpi/acpi.a +hvmloader: $(OBJS) acpi/acpi.a + $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) -o hvmloader.tmp $^ $(OBJCOPY) hvmloader.tmp hvmloader rm -f hvmloader.tmp diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/acpi/acpi2_0.h --- a/tools/firmware/hvmloader/acpi/acpi2_0.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h Thu Aug 07 11:57:34 2008 +0900 @@ -381,7 +381,7 @@ struct acpi_20_madt_intsrcovr { #pragma pack () -int acpi_build_tables(uint8_t *); +void acpi_build_tables(void); #endif /* _ACPI_2_0_H_ */ diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/acpi/build.c --- a/tools/firmware/hvmloader/acpi/build.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/hvmloader/acpi/build.c Thu Aug 07 11:57:34 2008 +0900 @@ -248,8 +248,7 @@ static int construct_secondary_tables(ui return align16(offset); } -/* Copy all the ACPI table to buffer. */ -int acpi_build_tables(uint8_t *buf) +static void __acpi_build_tables(uint8_t *buf, int *low_sz, int *high_sz) { struct acpi_20_rsdp *rsdp; struct acpi_20_rsdt *rsdt; @@ -261,7 +260,9 @@ int acpi_build_tables(uint8_t *buf) unsigned long secondary_tables[16]; int offset = 0, i; - offset += construct_bios_info_table(&buf[offset]); + /* + * Fill in high-memory data structures, starting at @buf. + */ facs = (struct acpi_20_facs *)&buf[offset]; memcpy(facs, &Facs, sizeof(struct acpi_20_facs)); @@ -325,7 +326,18 @@ int acpi_build_tables(uint8_t *buf) offsetof(struct acpi_header, checksum), rsdt->header.length); + *high_sz = offset; + + /* + * Fill in low-memory data structures: bios_info_table and RSDP. + */ + + buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS; + offset = 0; + + offset += construct_bios_info_table(&buf[offset]); rsdp = (struct acpi_20_rsdp *)&buf[offset]; + memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp)); offset += align16(sizeof(struct acpi_20_rsdp)); rsdp->rsdt_address = (unsigned long)rsdt; @@ -337,7 +349,28 @@ int acpi_build_tables(uint8_t *buf) offsetof(struct acpi_20_rsdp, extended_checksum), sizeof(struct acpi_20_rsdp)); - return offset; + *low_sz = offset; +} + +void acpi_build_tables(void) +{ + int high_sz, low_sz; + uint8_t *buf; + + /* Find out size of high-memory ACPI data area. */ + buf = (uint8_t *)&_end; + __acpi_build_tables(buf, &low_sz, &high_sz); + memset(buf, 0, high_sz); + + /* Allocate data area and set up ACPI tables there. */ + buf = (uint8_t *)e820_malloc(high_sz); + __acpi_build_tables(buf, &low_sz, &high_sz); + + printf(" - Lo data: %08lx-%08lx\n" + " - Hi data: %08lx-%08lx\n", + (unsigned long)ACPI_PHYSICAL_ADDRESS, + (unsigned long)ACPI_PHYSICAL_ADDRESS + low_sz - 1, + (unsigned long)buf, (unsigned long)buf + high_sz - 1); } /* diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/hvmloader.c --- a/tools/firmware/hvmloader/hvmloader.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/hvmloader/hvmloader.c Thu Aug 07 11:57:34 2008 +0900 @@ -449,7 +449,7 @@ static void init_xen_platform_io_base(vo int main(void) { - int acpi_sz = 0, vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz; + int vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz; int extboot_sz = 0; printf("HVM Loader\n"); @@ -508,8 +508,7 @@ int main(void) if ( get_acpi_enabled() ) { printf("Loading ACPI ...\n"); - acpi_sz = acpi_build_tables((uint8_t *)ACPI_PHYSICAL_ADDRESS); - ASSERT((ACPI_PHYSICAL_ADDRESS + acpi_sz) <= 0xF0000); + acpi_build_tables(); } cmos_write_memory_size(); @@ -531,10 +530,6 @@ int main(void) printf(" %05x-%05x: SMBIOS tables\n", SMBIOS_PHYSICAL_ADDRESS, SMBIOS_PHYSICAL_ADDRESS + smbios_sz - 1); - if ( acpi_sz ) - printf(" %05x-%05x: ACPI tables\n", - ACPI_PHYSICAL_ADDRESS, - ACPI_PHYSICAL_ADDRESS + acpi_sz - 1); if ( rombios_sz ) printf(" %05x-%05x: Main BIOS\n", ROMBIOS_PHYSICAL_ADDRESS, diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/util.h --- a/tools/firmware/hvmloader/util.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/hvmloader/util.h Thu Aug 07 11:57:34 2008 +0900 @@ -145,4 +145,6 @@ void smp_initialise(void); #define isdigit(c) ((c) >= '0' && (c) <= '9') +extern char _start[], _end[]; + #endif /* __HVMLOADER_UTIL_H__ */ diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/32bitbios.c --- a/tools/firmware/rombios/32bit/32bitbios.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bit/32bitbios.c Thu Aug 07 11:57:34 2008 +0900 @@ -47,5 +47,7 @@ uint32_t jumptable[IDX_LAST+1] __attribu TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm), + TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector), + TABLE_ENTRY(IDX_LAST , 0) /* keep last */ }; diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/Makefile --- a/tools/firmware/rombios/32bit/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bit/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -4,21 +4,22 @@ SOURCES = util.c SOURCES = util.c TARGET = 32bitbios_flat.h -CFLAGS += -I../ -DGCC_PROTOS +CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS SUBDIRS = tcgbios MODULES = tcgbios/tcgbiosext.o .PHONY: all -all: $(TARGET) +all: subdirs-all + $(MAKE) $(TARGET) .PHONY: clean clean: subdirs-clean rm -rf *.o $(TARGET) -$(TARGET): subdirs-all 32bitbios.o util.o - $(LD) $(LDFLAGS_DIRECT) -s -r 32bitbios.o $(MODULES) util.o -o 32bitbios_all.o +$(TARGET): 32bitbios.o $(MODULES) util.o + $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o @nm 32bitbios_all.o | \ egrep '^ +U ' >/dev/null && { \ echo "There are undefined symbols in the BIOS:"; \ diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/tcgbios/Makefile --- a/tools/firmware/rombios/32bit/tcgbios/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bit/tcgbios/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -5,7 +5,7 @@ FILES = tcgbios tpm_drivers FILES = tcgbios tpm_drivers OBJECTS = $(foreach f,$(FILES),$(f).o) -CFLAGS += -I../ -I../../ -DGCC_PROTOS +CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS .PHONY: all clean diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/tcgbios/tcgbios.c --- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c Thu Aug 07 11:57:34 2008 +0900 @@ -24,10 +24,9 @@ #include "rombios_compat.h" #include "tpm_drivers.h" +#include "util.h" #include "tcgbios.h" #include "32bitprotos.h" -#include "util.h" - /* local structure and variables */ struct ptti_cust { @@ -135,7 +134,7 @@ static inline uint32_t bswap(uint32_t a) *******************************************************/ typedef struct { - struct acpi_20_tcpa *tcpa_ptr; + struct acpi_20_tcpa_clisrv *tcpa_ptr; unsigned char *lasa_last_ptr; uint16_t entry_count; uint16_t flags; @@ -260,45 +259,19 @@ uint8_t acpi_validate_entry(struct acpi_ } -/* - * Search for the RSDP ACPI table in the memory starting at addr and - * ending at addr + len - 1. - */ -static struct acpi_20_rsdp *find_rsdp(const void *start, unsigned int len) -{ - char *rsdp = (char *)start; - char *end = rsdp + len; - /* scan memory in steps of 16 bytes */ - while (rsdp < end) { - /* check for expected string */ - if (!strncmp( rsdp, "RSD PTR ", 8)) - return (struct acpi_20_rsdp *)rsdp; - rsdp += 0x10; - } - return 0; -} - void tcpa_acpi_init(void) { struct acpi_20_rsdt *rsdt; - struct acpi_20_tcpa *tcpa = (void *)0; + struct acpi_20_tcpa_clisrv *tcpa = (void *)0; struct acpi_20_rsdp *rsdp; uint32_t length; uint16_t off; int found = 0; - uint16_t ebda_seg; - - if (MA_IsTPMPresent() == 0) { + + if (MA_IsTPMPresent() == 0) return; - } - - /* RSDP in EBDA? */ - ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe); - rsdp = find_rsdp((void *)(ebda_seg << 16), 1024); - - if (!rsdp) - rsdp = find_rsdp((void *)(ACPI_SEGMENT << 4), 0x20000); - + + rsdp = find_rsdp(); if (rsdp) { uint32_t ctr = 0; /* get RSDT from RSDP */ @@ -307,7 +280,7 @@ void tcpa_acpi_init(void) off = 36; while ((off + 3) < length) { /* try all pointers to structures */ - tcpa = (struct acpi_20_tcpa *)rsdt->entry[ctr]; + tcpa = (struct acpi_20_tcpa_clisrv *)rsdt->entry[ctr]; /* valid TCPA ACPI table ? */ if (ACPI_2_0_TCPA_SIGNATURE == tcpa->header.signature && acpi_validate_entry(&tcpa->header) == 0) { @@ -398,7 +371,7 @@ unsigned char *tcpa_get_lasa_base_ptr(vo unsigned char *tcpa_get_lasa_base_ptr(void) { unsigned char *lasa = 0; - struct acpi_20_tcpa *tcpa = tcpa_acpi.tcpa_ptr; + struct acpi_20_tcpa_clisrv *tcpa = tcpa_acpi.tcpa_ptr; if (tcpa != 0) { uint32_t class = tcpa->platform_class; if (class == TCPA_ACPI_CLASS_CLIENT) { @@ -416,7 +389,7 @@ uint32_t tcpa_get_laml(void) uint32_t tcpa_get_laml(void) { uint32_t laml = 0; - struct acpi_20_tcpa *tcpa = tcpa_acpi.tcpa_ptr; + struct acpi_20_tcpa_clisrv *tcpa = tcpa_acpi.tcpa_ptr; if (tcpa != 0) { uint32_t class = tcpa->platform_class; if (class == TCPA_ACPI_CLASS_CLIENT) { diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/tcgbios/tcgbios.h --- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.h Thu Aug 07 11:57:34 2008 +0900 @@ -1,6 +1,5 @@ #ifndef TCGBIOS_H #define TCGBIOS_H - /* TCPA ACPI definitions */ #define TCPA_ACPI_CLASS_CLIENT 0 @@ -117,14 +116,7 @@ /* address of locality 0 (TIS) */ #define TPM_TIS_BASE_ADDRESS 0xfed40000 -#define ASCII32(a,b,c,d) ((((Bit32u)a) << 0) | (((Bit32u)b) << 8) | \ - (((Bit32u)c) << 16) | (((Bit32u)d) << 24) ) -#define ACPI_2_0_TCPA_SIGNATURE ASCII32('T','C','P','A') /* "TCPA" */ - - #define STATUS_FLAG_SHUTDOWN (1 << 0) - -#define ACPI_SEGMENT 0xE000 /* Input and Output blocks for the TCG BIOS commands */ @@ -232,37 +224,6 @@ struct pcpes uint32_t event; } __attribute__((packed)); - -struct acpi_header -{ - uint32_t signature; - uint32_t length; - uint8_t revision; - uint8_t checksum; - uint8_t oem_id[6]; - uint64_t oem_table_id; - uint32_t oem_revision; - uint32_t creator_id; - uint32_t creator_revision; -} __attribute__((packed)); - -struct acpi_20_rsdt { - struct acpi_header header; - uint32_t entry[1]; -} __attribute__((packed)); - -struct acpi_20_rsdp { - uint64_t signature; - uint8_t checksum; - uint8_t oem_id[6]; - uint8_t revision; - uint32_t rsdt_address; - uint32_t length; - uint64_t xsdt_address; - uint8_t extended_checksum; - uint8_t reserved[3]; -} __attribute__((packed)); - struct acpi_20_tcpa_client { uint32_t laml; uint64_t lasa; @@ -275,7 +236,7 @@ struct acpi_20_tcpa_server { /* more here */ } __attribute__((packed)); -struct acpi_20_tcpa { +struct acpi_20_tcpa_clisrv { struct acpi_header header; uint16_t platform_class; union { diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/util.c --- a/tools/firmware/rombios/32bit/util.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bit/util.c Thu Aug 07 11:57:34 2008 +0900 @@ -19,6 +19,7 @@ */ #include <stdarg.h> #include <stdint.h> +#include "rombios_compat.h" #include "util.h" static void putchar(char c); @@ -92,11 +93,11 @@ int strcmp(const char *cs, const char *c int strncmp(const char *s1, const char *s2, uint32_t n) { - uint32_t ctr; - for (ctr = 0; ctr < n; ctr++) - if (s1[ctr] != s2[ctr]) - return (int)(s1[ctr] - s2[ctr]); - return 0; + uint32_t ctr; + for (ctr = 0; ctr < n; ctr++) + if (s1[ctr] != s2[ctr]) + return (int)(s1[ctr] - s2[ctr]); + return 0; } void *memcpy(void *dest, const void *src, unsigned n) @@ -402,3 +403,64 @@ void mssleep(uint32_t waittime) y = x; } } + +/* + * Search for the RSDP ACPI table in the memory starting at addr and + * ending at addr + len - 1. + */ +static struct acpi_20_rsdp *__find_rsdp(const void *start, unsigned int len) +{ + char *rsdp = (char *)start; + char *end = rsdp + len; + /* scan memory in steps of 16 bytes */ + while (rsdp < end) { + /* check for expected string */ + if (!strncmp(rsdp, "RSD PTR ", 8)) + return (struct acpi_20_rsdp *)rsdp; + rsdp += 0x10; + } + return 0; +} + +struct acpi_20_rsdp *find_rsdp(void) +{ + struct acpi_20_rsdp *rsdp; + uint16_t ebda_seg; + + ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe); + rsdp = __find_rsdp((void *)(ebda_seg << 16), 1024); + if (!rsdp) + rsdp = __find_rsdp((void *)0xE0000, 0x20000); + + return rsdp; +} + +uint32_t get_s3_waking_vector(void) +{ + struct acpi_20_rsdp *rsdp = find_rsdp(); + struct acpi_20_xsdt *xsdt; + struct acpi_20_fadt *fadt; + struct acpi_20_facs *facs; + uint32_t vector; + + if (!rsdp) + return 0; + + xsdt = (struct acpi_20_xsdt *)(long)rsdp->xsdt_address; + if (!xsdt) + return 0; + + fadt = (struct acpi_20_fadt *)(long)xsdt->entry[0]; + if (!fadt || (fadt->header.signature != ACPI_2_0_FADT_SIGNATURE)) + return 0; + + facs = (struct acpi_20_facs *)(long)fadt->x_firmware_ctrl; + if (!facs) + return 0; + + vector = facs->x_firmware_waking_vector; + if (!vector) + vector = facs->firmware_waking_vector; + + return vector; +} diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/util.h --- a/tools/firmware/rombios/32bit/util.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bit/util.h Thu Aug 07 11:57:34 2008 +0900 @@ -1,5 +1,7 @@ #ifndef UTIL_H #define UTIL_H + +#include "../hvmloader/acpi/acpi2_0.h" void outb(uint16_t addr, uint8_t val); void outw(uint16_t addr, uint16_t val); @@ -39,5 +41,6 @@ static inline uint32_t mmio_readl(uint32 return *(volatile uint32_t *)addr; } +struct acpi_20_rsdp *find_rsdp(void); #endif diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bitgateway.c --- a/tools/firmware/rombios/32bitgateway.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bitgateway.c Thu Aug 07 11:57:34 2008 +0900 @@ -356,6 +356,9 @@ Upcall: call _store_returnaddress ; store away pop ax + ; XXX GDT munging requires ROM to be writable! + call _enable_rom_write_access + rol bx, #2 mov si, #jmptable seg cs @@ -381,6 +384,8 @@ Upcall: push bp mov bp,sp push eax ; preserve work register + + call _disable_rom_write_access call _get_returnaddress mov 2[bp], ax ; 16bit return address onto stack @@ -408,3 +413,10 @@ ASM_END #include "32bitgateway.h" #include "tcgbios.c" + +Bit32u get_s3_waking_vector() +{ + ASM_START + DoUpcall(IDX_GET_S3_WAKING_VECTOR) + ASM_END +} diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bitprotos.h --- a/tools/firmware/rombios/32bitprotos.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/32bitprotos.h Thu Aug 07 11:57:34 2008 +0900 @@ -17,8 +17,8 @@ #define IDX_TCPA_IPL 10 #define IDX_TCPA_INITIALIZE_TPM 11 #define IDX_TCPA_MEASURE_POST 12 - -#define IDX_LAST 13 /* keep last! */ +#define IDX_GET_S3_WAKING_VECTOR 13 +#define IDX_LAST 14 /* keep last! */ #ifdef GCC_PROTOS #define PARMS(x...) x @@ -42,4 +42,6 @@ void tcpa_measure_post( PARMS(Bit32u fro void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) ); Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) ); +Bit32u get_s3_waking_vector( PARMS(void) ); + #endif diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/Makefile --- a/tools/firmware/rombios/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -4,7 +4,8 @@ SUBDIRS := 32bit SUBDIRS := 32bit .PHONY: all -all: subdirs-all BIOS-bochs-latest +all: subdirs-all + $(MAKE) BIOS-bochs-latest .PHONY: clean clean: subdirs-clean diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/firmware/rombios/rombios.c Thu Aug 07 11:57:34 2008 +0900 @@ -738,7 +738,9 @@ typedef struct { // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot // device tables are at 0x9ff00 -- 0x9ffff typedef struct { - unsigned char filler1[0x3D]; + unsigned char ebda_size; + unsigned char cmos_shutdown_status; + unsigned char filler1[0x3B]; // FDPT - Can be splitted in data members if needed unsigned char fdpt0[0x10]; @@ -757,6 +759,7 @@ typedef struct { upcall_t upcall; } ebda_data_t; + #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1 #define EbdaData ((ebda_data_t *) 0) // for access to the int13ext structure @@ -1464,19 +1467,30 @@ copy_e820_table() } void -disable_rom_write_access() +set_rom_write_access(action) + Bit16u action; { Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob; ASM_START - mov si,.disable_rom_write_access.off[bp] + mov si,.set_rom_write_access.off[bp] push ds mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4) mov ds,ax mov dx,[si] pop ds - mov ax,#PFFLAG_ROM_LOCK + mov ax,.set_rom_write_access.action[bp] out dx,al ASM_END +} + +void enable_rom_write_access() +{ + set_rom_write_access(0); +} + +void disable_rom_write_access() +{ + set_rom_write_access(PFFLAG_ROM_LOCK); } #endif /* HVMASSIST */ @@ -2325,78 +2339,38 @@ debugger_off() outb(0xfedc, 0x00); } -/* according to memory layout defined in acpi_build_tables(), - acpi FACS table is located in ACPI_PHYSICAL_ADDRESS(0xEA000) */ -#define ACPI_FACS_ADDRESS 0xEA000 -#define ACPI_FACS_OFFSET 0x10 -/* S3 resume status in CMOS 0Fh shutdown status byte*/ - -Bit32u facs_get32(offs) -Bit16u offs; -{ -ASM_START - push bp - mov bp, sp - - push ds - mov ax, #(ACPI_FACS_ADDRESS >> 4) - mov ds, ax - - mov bx, 4[bp] - mov ax, [bx] - mov dx, 2[bx] - pop ds - - pop bp -ASM_END -} - - void s3_resume() { Bit32u s3_wakeup_vector; - extern Bit16u s3_wakeup_ip; - extern Bit16u s3_wakeup_cs; - extern Bit8u s3_resume_flag; + Bit16u s3_wakeup_ip, s3_wakeup_cs; + Bit8u cmos_shutdown_status; ASM_START push ds - mov ax, #0xF000 + push ax + mov ax, #EBDA_SEG mov ds, ax + mov al, [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET] + mov .s3_resume.cmos_shutdown_status[bp], al + pop ax + pop ds ASM_END - if (s3_resume_flag!=CMOS_SHUTDOWN_S3){ - goto s3_out; - } - s3_resume_flag = 0; - - /* get x_firmware_waking_vector */ - s3_wakeup_vector = facs_get32(ACPI_FACS_OFFSET+24); - if (!s3_wakeup_vector) { - /* get firmware_waking_vector */ - s3_wakeup_vector = facs_get32(ACPI_FACS_OFFSET+12); - if (!s3_wakeup_vector) { - goto s3_out; - } - } - - /* setup wakeup vector */ + if (cmos_shutdown_status != CMOS_SHUTDOWN_S3) + return; + + s3_wakeup_vector = get_s3_waking_vector(); + if (!s3_wakeup_vector) + return; + s3_wakeup_ip = s3_wakeup_vector & 0xF; s3_wakeup_cs = s3_wakeup_vector >> 4; ASM_START - jmpf [_s3_wakeup_ip] - -; S3 data -_s3_wakeup_ip: dw 0x0a -_s3_wakeup_cs: dw 0x0 -_s3_resume_flag: db 0 ; set at POST time by CMOS[0xF] shutdown status -ASM_END - -s3_out: -ASM_START - pop ds + push .s3_resume.s3_wakeup_cs[bp] + push .s3_resume.s3_wakeup_ip[bp] + retf ASM_END } @@ -9865,52 +9839,9 @@ post: ;; Examine CMOS shutdown status. mov al, bl - - ;; 0xFE S3 resume - cmp AL, #0xFE - jnz not_s3_resume - - ;; set S3 resume flag - mov dx, #0xF000 + mov dx, #EBDA_SEG mov ds, dx - mov [_s3_resume_flag], AL - jmp normal_post - -not_s3_resume: - - ;; 0x00, 0x09, 0x0D+ = normal startup - cmp AL, #0x00 - jz normal_post - cmp AL, #0x0d - jae normal_post - cmp AL, #0x09 - je normal_post - - ;; 0x05 = eoi + jmp via [0x40:0x67] jump - cmp al, #0x05 - je eoi_jmp_post - - ;; Examine CMOS shutdown status. - ;; 0x01,0x02,0x03,0x04,0x06,0x07,0x08, 0x0a, 0x0b, 0x0c = Unimplemented shutdown status. - push bx - call _shutdown_status_panic - -#if 0 - HALT(__LINE__) - ; - ;#if 0 - ; 0xb0, 0x20, /* mov al, #0x20 */ - ; 0xe6, 0x20, /* out 0x20, al ;send EOI to PIC */ - ;#endif - ; - pop es - pop ds - popa - iret -#endif - -normal_post: - ; case 0: normal startup + mov [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET], AL cli mov ax, #0xfffe @@ -9928,8 +9859,6 @@ normal_post: stosw call _log_bios_start - - call _clobber_entry_point ;; set all interrupts to default handler mov bx, #0x0000 ;; offset index @@ -10123,8 +10052,11 @@ post_default_ints: out 0xa1, AL ;slave pic: unmask IRQ 12, 13, 14 #ifdef HVMASSIST + call _enable_rom_write_access + call _clobber_entry_point call _copy_e820_table call smbios_init + call _disable_rom_write_access #endif call _init_boot_vectors @@ -10174,10 +10106,6 @@ post_default_ints: #if BX_TCGBIOS call tcpa_post_part2 #endif - -#ifdef HVMASSIST - call _disable_rom_write_access -#endif ;; Start the boot sequence. See the comments in int19_relocated ;; for why we use INT 18h instead of INT 19h here. diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-backend.c --- a/tools/fs-back/fs-backend.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/fs-back/fs-backend.c Thu Aug 07 11:57:34 2008 +0900 @@ -16,7 +16,7 @@ static int export_id = 0; static int export_id = 0; static int mount_id = 0; -void dispatch_response(struct mount *mount, int priv_req_id) +static void dispatch_response(struct fs_mount *mount, int priv_req_id) { int i; struct fs_op *op; @@ -41,7 +41,7 @@ void dispatch_response(struct mount *mou add_id_to_freelist(priv_req_id, mount->freelist); } -static void handle_aio_events(struct mount *mount) +static void handle_aio_events(struct fs_mount *mount) { int fd, ret, count, i, notify; evtchn_port_t port; @@ -103,7 +103,7 @@ read_event_channel: } -void allocate_request_array(struct mount *mount) +static void allocate_request_array(struct fs_mount *mount) { int i, nr_entries = mount->nr_entries; struct fs_request *requests; @@ -123,10 +123,10 @@ void allocate_request_array(struct mount } -void* handle_mount(void *data) +static void *handle_mount(void *data) { int more, notify; - struct mount *mount = (struct mount *)data; + struct fs_mount *mount = (struct fs_mount *)data; printf("Starting a thread for mount: %d\n", mount->mount_id); allocate_request_array(mount); @@ -147,7 +147,8 @@ moretodo: int i; struct fs_op *op; - printf("Got a request at %d\n", cons); + printf("Got a request at %d (of %d)\n", + cons, RING_SIZE(&mount->ring)); req = RING_GET_REQUEST(&mount->ring, cons); printf("Request type=%d\n", req->type); for(i=0;;i++) @@ -193,11 +194,12 @@ moretodo: static void handle_connection(int frontend_dom_id, int export_id, char *frontend) { - struct mount *mount; + struct fs_mount *mount; struct fs_export *export; int evt_port; pthread_t handling_thread; struct fsif_sring *sring; + uint32_t dom_ids[MAX_RING_SIZE]; int i; printf("Handling connection from dom=%d, for export=%d\n", @@ -216,13 +218,13 @@ static void handle_connection(int fronte return; } - mount = (struct mount*)malloc(sizeof(struct mount)); + mount = (struct fs_mount*)malloc(sizeof(struct fs_mount)); mount->dom_id = frontend_dom_id; mount->export = export; mount->mount_id = mount_id++; xenbus_read_mount_request(mount, frontend); printf("Frontend found at: %s (gref=%d, evtchn=%d)\n", - mount->frontend, mount->gref, mount->remote_evtchn); + mount->frontend, mount->grefs[0], mount->remote_evtchn); xenbus_write_backend_node(mount); mount->evth = -1; mount->evth = xc_evtchn_open(); @@ -235,11 +237,15 @@ static void handle_connection(int fronte mount->gnth = -1; mount->gnth = xc_gnttab_open(); assert(mount->gnth != -1); - sring = xc_gnttab_map_grant_ref(mount->gnth, - mount->dom_id, - mount->gref, - PROT_READ | PROT_WRITE); - BACK_RING_INIT(&mount->ring, sring, XC_PAGE_SIZE); + for(i=0; i<mount->shared_ring_size; i++) + dom_ids[i] = mount->dom_id; + sring = xc_gnttab_map_grant_refs(mount->gnth, + mount->shared_ring_size, + dom_ids, + mount->grefs, + PROT_READ | PROT_WRITE); + + BACK_RING_INIT(&mount->ring, sring, mount->shared_ring_size * XC_PAGE_SIZE); mount->nr_entries = mount->ring.nr_ents; for (i = 0; i < MAX_FDS; i++) mount->fds[i] = -1; @@ -287,7 +293,7 @@ next_select: } while (1); } -struct fs_export* create_export(char *name, char *export_path) +static struct fs_export* create_export(char *name, char *export_path) { struct fs_export *curr_export, **last_export; diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-backend.h --- a/tools/fs-back/fs-backend.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/fs-back/fs-backend.h Thu Aug 07 11:57:34 2008 +0900 @@ -13,6 +13,7 @@ #define EXPORTS_NODE ROOT_NODE"/"EXPORTS_SUBNODE #define WATCH_NODE EXPORTS_NODE"/requests" #define MAX_FDS 16 +#define MAX_RING_SIZE 16 struct fs_export { @@ -26,22 +27,24 @@ struct fs_request { int active; void *page; /* Pointer to mapped grant */ + int count; struct fsif_request req_shadow; struct aiocb aiocb; }; -struct mount +struct fs_mount { struct fs_export *export; int dom_id; char *frontend; int mount_id; /* = backend id */ - grant_ref_t gref; + grant_ref_t grefs[MAX_RING_SIZE]; evtchn_port_t remote_evtchn; int evth; /* Handle to the event channel */ evtchn_port_t local_evtchn; int gnth; + int shared_ring_size; /* in pages */ struct fsif_back_ring ring; int nr_entries; struct fs_request *requests; @@ -56,17 +59,17 @@ bool xenbus_create_request_node(void); bool xenbus_create_request_node(void); int xenbus_register_export(struct fs_export *export); int xenbus_get_watch_fd(void); -void xenbus_read_mount_request(struct mount *mount, char *frontend); -void xenbus_write_backend_node(struct mount *mount); -void xenbus_write_backend_ready(struct mount *mount); +void xenbus_read_mount_request(struct fs_mount *mount, char *frontend); +void xenbus_write_backend_node(struct fs_mount *mount); +void xenbus_write_backend_ready(struct fs_mount *mount); /* File operations, implemented in fs-ops.c */ struct fs_op { int type; /* Type of request (from fsif.h) this handlers are responsible for */ - void (*dispatch_handler)(struct mount *mount, struct fsif_request *req); - void (*response_handler)(struct mount *mount, struct fs_request *req); + void (*dispatch_handler)(struct fs_mount *mount, struct fsif_request *req); + void (*response_handler)(struct fs_mount *mount, struct fs_request *req); }; /* This NULL terminated array of all file requests handlers */ diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-ops.c --- a/tools/fs-back/fs-ops.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/fs-back/fs-ops.c Thu Aug 07 11:57:34 2008 +0900 @@ -10,7 +10,7 @@ #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> -#include <sys/vfs.h> +#include <sys/statvfs.h> #include <sys/mount.h> #include <unistd.h> #include "fs-backend.h" @@ -23,7 +23,7 @@ #define BUFFER_SIZE 1024 -unsigned short get_request(struct mount *mount, struct fsif_request *req) +static unsigned short get_request(struct fs_mount *mount, struct fsif_request *req) { unsigned short id = get_id_from_freelist(mount->freelist); @@ -34,7 +34,7 @@ unsigned short get_request(struct mount return id; } -int get_fd(struct mount *mount) +static int get_fd(struct fs_mount *mount) { int i; @@ -45,7 +45,7 @@ int get_fd(struct mount *mount) } -void dispatch_file_open(struct mount *mount, struct fsif_request *req) +static void dispatch_file_open(struct fs_mount *mount, struct fsif_request *req) { char *file_name, full_path[BUFFER_SIZE]; int fd; @@ -93,7 +93,7 @@ void dispatch_file_open(struct mount *mo rsp->ret_val = (uint64_t)fd; } -void dispatch_file_close(struct mount *mount, struct fsif_request *req) +static void dispatch_file_close(struct fs_mount *mount, struct fsif_request *req) { int ret; RING_IDX rsp_idx; @@ -122,19 +122,25 @@ void dispatch_file_close(struct mount *m rsp->id = req_id; rsp->ret_val = (uint64_t)ret; } -void dispatch_file_read(struct mount *mount, struct fsif_request *req) + +#define MAX_GNTS 16 +static void dispatch_file_read(struct fs_mount *mount, struct fsif_request *req) { void *buf; - int fd; + int fd, i, count; uint16_t req_id; unsigned short priv_id; struct fs_request *priv_req; /* Read the request */ - buf = xc_gnttab_map_grant_ref(mount->gnth, - mount->dom_id, - req->u.fread.gref, - PROT_WRITE); + assert(req->u.fread.len > 0); + count = (req->u.fread.len - 1) / XC_PAGE_SIZE + 1; + assert(count <= FSIF_NR_READ_GNTS); + buf = xc_gnttab_map_domain_grant_refs(mount->gnth, + count, + mount->dom_id, + req->u.fread.grefs, + PROT_WRITE); req_id = req->id; printf("File read issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n", @@ -149,6 +155,7 @@ void dispatch_file_read(struct mount *mo printf("Private id is: %d\n", priv_id); priv_req = &mount->requests[priv_id]; priv_req->page = buf; + priv_req->count = count; /* Dispatch AIO read request */ bzero(&priv_req->aiocb, sizeof(struct aiocb)); @@ -164,14 +171,16 @@ out: mount->ring.req_cons++; } -void end_file_read(struct mount *mount, struct fs_request *priv_req) +static void end_file_read(struct fs_mount *mount, struct fs_request *priv_req) { RING_IDX rsp_idx; fsif_response_t *rsp; uint16_t req_id; /* Release the grant */ - assert(xc_gnttab_munmap(mount->gnth, priv_req->page, 1) == 0); + assert(xc_gnttab_munmap(mount->gnth, + priv_req->page, + priv_req->count) == 0); /* Get a response from the ring */ rsp_idx = mount->ring.rsp_prod_pvt++; @@ -182,19 +191,23 @@ void end_file_read(struct mount *mount, rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb); } -void dispatch_file_write(struct mount *mount, struct fsif_request *req) +static void dispatch_file_write(struct fs_mount *mount, struct fsif_request *req) { void *buf; - int fd; + int fd, count, i; uint16_t req_id; unsigned short priv_id; struct fs_request *priv_req; /* Read the request */ - buf = xc_gnttab_map_grant_ref(mount->gnth, - mount->dom_id, - req->u.fwrite.gref, - PROT_READ); + assert(req->u.fwrite.len > 0); + count = (req->u.fwrite.len - 1) / XC_PAGE_SIZE + 1; + assert(count <= FSIF_NR_WRITE_GNTS); + buf = xc_gnttab_map_domain_grant_refs(mount->gnth, + count, + mount->dom_id, + req->u.fwrite.grefs, + PROT_READ); req_id = req->id; printf("File write issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n", @@ -209,6 +222,7 @@ void dispatch_file_write(struct mount *m printf("Private id is: %d\n", priv_id); priv_req = &mount->requests[priv_id]; priv_req->page = buf; + priv_req->count = count; /* Dispatch AIO write request */ bzero(&priv_req->aiocb, sizeof(struct aiocb)); @@ -224,14 +238,16 @@ void dispatch_file_write(struct mount *m mount->ring.req_cons++; } -void end_file_write(struct mount *mount, struct fs_request *priv_req) +static void end_file_write(struct fs_mount *mount, struct fs_request *priv_req) { RING_IDX rsp_idx; fsif_response_t *rsp; uint16_t req_id; /* Release the grant */ - assert(xc_gnttab_munmap(mount->gnth, priv_req->page, 1) == 0); + assert(xc_gnttab_munmap(mount->gnth, + priv_req->page, + priv_req->count) == 0); /* Get a response from the ring */ rsp_idx = mount->ring.rsp_prod_pvt++; @@ -242,7 +258,7 @@ void end_file_write(struct mount *mount, rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb); } -void dispatch_stat(struct mount *mount, struct fsif_request *req) +static void dispatch_stat(struct fs_mount *mount, struct fsif_request *req) { struct fsif_stat_response *buf; struct stat stat; @@ -251,12 +267,6 @@ void dispatch_stat(struct mount *mount, RING_IDX rsp_idx; fsif_response_t *rsp; - /* Read the request */ - buf = xc_gnttab_map_grant_ref(mount->gnth, - mount->dom_id, - req->u.fstat.gref, - PROT_WRITE); - req_id = req->id; if (req->u.fstat.fd < MAX_FDS) fd = mount->fds[req->u.fstat.fd]; @@ -272,38 +282,35 @@ void dispatch_stat(struct mount *mount, /* Stat, and create the response */ ret = fstat(fd, &stat); printf("Mode=%o, uid=%d, a_time=%ld\n", - stat.st_mode, stat.st_uid, stat.st_atime); - buf->stat_mode = stat.st_mode; - buf->stat_uid = stat.st_uid; - buf->stat_gid = stat.st_gid; + stat.st_mode, stat.st_uid, (long)stat.st_atime); + + /* Get a response from the ring */ + rsp_idx = mount->ring.rsp_prod_pvt++; + printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id); + rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx); + rsp->id = req_id; + rsp->fstat.stat_ret = (uint32_t)ret; + rsp->fstat.stat_mode = stat.st_mode; + rsp->fstat.stat_uid = stat.st_uid; + rsp->fstat.stat_gid = stat.st_gid; #ifdef BLKGETSIZE if (S_ISBLK(stat.st_mode)) { unsigned long sectors; if (ioctl(fd, BLKGETSIZE, §ors)) { perror("getting device size\n"); - buf->stat_size = 0; + rsp->fstat.stat_size = 0; } else - buf->stat_size = sectors << 9; + rsp->fstat.stat_size = sectors << 9; } else #endif - buf->stat_size = stat.st_size; - buf->stat_atime = stat.st_atime; - buf->stat_mtime = stat.st_mtime; - buf->stat_ctime = stat.st_ctime; - - /* Release the grant */ - assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0); - - /* Get a response from the ring */ - rsp_idx = mount->ring.rsp_prod_pvt++; - printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id); - rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx); - rsp->id = req_id; - rsp->ret_val = (uint64_t)ret; -} - - -void dispatch_truncate(struct mount *mount, struct fsif_request *req) + rsp->fstat.stat_size = stat.st_size; + rsp->fstat.stat_atime = stat.st_atime; + rsp->fstat.stat_mtime = stat.st_mtime; + rsp->fstat.stat_ctime = stat.st_ctime; +} + + +static void dispatch_truncate(struct fs_mount *mount, struct fsif_request *req) { int fd, ret; uint16_t req_id; @@ -335,7 +342,7 @@ void dispatch_truncate(struct mount *mou rsp->ret_val = (uint64_t)ret; } -void dispatch_remove(struct mount *mount, struct fsif_request *req) +static void dispatch_remove(struct fs_mount *mount, struct fsif_request *req) { char *file_name, full_path[BUFFER_SIZE]; int ret; @@ -374,7 +381,7 @@ void dispatch_remove(struct mount *mount } -void dispatch_rename(struct mount *mount, struct fsif_request *req) +static void dispatch_rename(struct fs_mount *mount, struct fsif_request *req) { char *buf, *old_file_name, *new_file_name; char old_full_path[BUFFER_SIZE], new_full_path[BUFFER_SIZE]; @@ -421,7 +428,7 @@ void dispatch_rename(struct mount *mount } -void dispatch_create(struct mount *mount, struct fsif_request *req) +static void dispatch_create(struct fs_mount *mount, struct fsif_request *req) { char *file_name, full_path[BUFFER_SIZE]; int ret; @@ -459,7 +466,17 @@ void dispatch_create(struct mount *mount else { printf("Issuing create for file: %s\n", full_path); - ret = creat(full_path, mode); + ret = get_fd(mount); + if (ret >= 0) { + int real_fd = creat(full_path, mode); + if (real_fd < 0) + ret = -1; + else + { + mount->fds[ret] = real_fd; + printf("Got FD: %d for real %d\n", ret, real_fd); + } + } } printf("Got ret %d (errno=%d)\n", ret, errno); @@ -471,7 +488,7 @@ void dispatch_create(struct mount *mount rsp->ret_val = (uint64_t)ret; } -void dispatch_list(struct mount *mount, struct fsif_request *req) +static void dispatch_list(struct fs_mount *mount, struct fsif_request *req) { char *file_name, *buf, full_path[BUFFER_SIZE]; uint32_t offset, nr_files, error_code; @@ -541,7 +558,7 @@ error_out: rsp->ret_val = ret_val; } -void dispatch_chmod(struct mount *mount, struct fsif_request *req) +static void dispatch_chmod(struct fs_mount *mount, struct fsif_request *req) { int fd, ret; RING_IDX rsp_idx; @@ -572,13 +589,13 @@ void dispatch_chmod(struct mount *mount, rsp->ret_val = (uint64_t)ret; } -void dispatch_fs_space(struct mount *mount, struct fsif_request *req) +static void dispatch_fs_space(struct fs_mount *mount, struct fsif_request *req) { char *file_name, full_path[BUFFER_SIZE]; RING_IDX rsp_idx; fsif_response_t *rsp; uint16_t req_id; - struct statfs stat; + struct statvfs stat; int64_t ret; printf("Dispatching fs space operation (gref=%d).\n", req->u.fspace.gref); @@ -596,7 +613,7 @@ void dispatch_fs_space(struct mount *mou mount->export->export_path, file_name); assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0); printf("Issuing fs space for %s\n", full_path); - ret = statfs(full_path, &stat); + ret = statvfs(full_path, &stat); if(ret >= 0) ret = stat.f_bsize * stat.f_bfree; @@ -613,7 +630,7 @@ void dispatch_fs_space(struct mount *mou rsp->ret_val = (uint64_t)ret; } -void dispatch_file_sync(struct mount *mount, struct fsif_request *req) +static void dispatch_file_sync(struct fs_mount *mount, struct fsif_request *req) { int fd; uint16_t req_id; @@ -643,7 +660,7 @@ void dispatch_file_sync(struct mount *mo mount->ring.req_cons++; } -void end_file_sync(struct mount *mount, struct fs_request *priv_req) +static void end_file_sync(struct fs_mount *mount, struct fs_request *priv_req) { RING_IDX rsp_idx; fsif_response_t *rsp; diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-xenbus.c --- a/tools/fs-back/fs-xenbus.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/fs-back/fs-xenbus.c Thu Aug 07 11:57:34 2008 +0900 @@ -109,10 +109,11 @@ int xenbus_get_watch_fd(void) return xs_fileno(xsh); } -void xenbus_read_mount_request(struct mount *mount, char *frontend) +void xenbus_read_mount_request(struct fs_mount *mount, char *frontend) { char node[1024]; char *s; + int i; assert(xsh != NULL); #if 0 @@ -125,10 +126,18 @@ void xenbus_read_mount_request(struct mo s = xs_read(xsh, XBT_NULL, node, NULL); assert(strcmp(s, STATE_READY) == 0); free(s); - snprintf(node, sizeof(node), "%s/ring-ref", frontend); + snprintf(node, sizeof(node), "%s/ring-size", frontend); s = xs_read(xsh, XBT_NULL, node, NULL); - mount->gref = atoi(s); + mount->shared_ring_size = atoi(s); + assert(mount->shared_ring_size <= MAX_RING_SIZE); free(s); + for(i=0; i<mount->shared_ring_size; i++) + { + snprintf(node, sizeof(node), "%s/ring-ref-%d", frontend, i); + s = xs_read(xsh, XBT_NULL, node, NULL); + mount->grefs[i] = atoi(s); + free(s); + } snprintf(node, sizeof(node), "%s/event-channel", frontend); s = xs_read(xsh, XBT_NULL, node, NULL); mount->remote_evtchn = atoi(s); @@ -150,7 +159,7 @@ static int get_self_id(void) } -void xenbus_write_backend_node(struct mount *mount) +void xenbus_write_backend_node(struct fs_mount *mount) { char node[1024], backend_node[1024]; int self_id; @@ -167,7 +176,7 @@ void xenbus_write_backend_node(struct mo xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED)); } -void xenbus_write_backend_ready(struct mount *mount) +void xenbus_write_backend_ready(struct fs_mount *mount) { char node[1024]; int self_id; diff -r 7affdebb7a1e -r a39913db6e51 tools/include/xen-sys/MiniOS/privcmd.h --- a/tools/include/xen-sys/MiniOS/privcmd.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/include/xen-sys/MiniOS/privcmd.h Thu Aug 07 11:57:34 2008 +0900 @@ -10,9 +10,7 @@ typedef struct privcmd_hypercall } privcmd_hypercall_t; typedef struct privcmd_mmap_entry { - u64 va; u64 mfn; - u64 npages; } privcmd_mmap_entry_t; #endif /* __MINIOS_PUBLIC_PRIVCMD_H__ */ diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/block-vbd.c --- a/tools/ioemu/block-vbd.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/block-vbd.c Thu Aug 07 11:57:34 2008 +0900 @@ -273,6 +273,10 @@ static BlockDriverAIOCB *vbd_aio_flush(B BDRVVbdState *s = bs->opaque; VbdAIOCB *acb = NULL; + if (s->info.mode == O_RDONLY) { + cb(opaque, 0); + return NULL; + } if (s->info.barrier == 1) { acb = vbd_aio_setup(bs, 0, NULL, 0, s->info.flush == 1 ? vbd_nop_cb : cb, opaque); diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pass-through.c --- a/tools/ioemu/hw/pass-through.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/hw/pass-through.c Thu Aug 07 11:57:34 2008 +0900 @@ -138,6 +138,13 @@ static int pt_msixctrl_reg_write(struct struct pt_reg_tbl *cfg_entry, uint16_t *value, uint16_t dev_value, uint16_t valid_mask); +/* pt_reg_info_tbl declaration + * - only for emulated register (either a part or whole bit). + * - for passthrough register that need special behavior (like interacting with + * other component), set emu_mask to all 0 and specify r/w func properly. + * - do NOT use ALL F for init_val, otherwise the tbl will not be registered. + */ + /* Header Type0 reg static infomation table */ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { /* Command reg */ @@ -564,6 +571,13 @@ static struct pt_reg_info_tbl pt_emu_reg }, }; +/* pt_reg_grp_info_tbl declaration + * - only for emulated or zero-hardwired register group. + * - for register group with dynamic size, just set grp_size to 0xFF and + * specify size_init func properly. + * - no need to specify emu_reg_tbl for zero-hardwired type. + */ + /* emul reg group static infomation table */ static const struct pt_reg_grp_info_tbl pt_emu_reg_grp_tbl[] = { /* Header Type0 reg group */ @@ -821,7 +835,7 @@ void pt_iomem_map(PCIDevice *d, int i, u assigned_device->bases[i].e_size= e_size; PT_LOG("e_phys=%08x maddr=%lx type=%d len=%d index=%d first_map=%d\n", - e_phys, assigned_device->bases[i].access.maddr, + e_phys, (unsigned long)assigned_device->bases[i].access.maddr, type, e_size, i, first_map); if ( e_size == 0 ) @@ -843,7 +857,7 @@ void pt_iomem_map(PCIDevice *d, int i, u } } - /* map only valid guest address (include 0) */ + /* map only valid guest address */ if (e_phys != -1) { /* Create new mapping */ @@ -860,7 +874,7 @@ void pt_iomem_map(PCIDevice *d, int i, u ret = remove_msix_mapping(assigned_device, i); if ( ret != 0 ) - PT_LOG("Error: remove MSX-X mmio mapping failed!\n"); + PT_LOG("Error: remove MSI-X mmio mapping failed!\n"); } } @@ -996,8 +1010,11 @@ static void pt_pci_write_config(PCIDevic int index = 0; int ret = 0; - PT_LOG("write(%x.%x): address=%04x val=0x%08x len=%d\n", - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); +#ifdef PT_DEBUG_PCI_CONFIG_ACCESS + PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n", + pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), + address, val, len); +#endif /* check offset range */ if (address >= 0xFF) @@ -1049,7 +1066,10 @@ static void pt_pci_write_config(PCIDevic if (reg_grp->grp_type == GRP_TYPE_HARDWIRED) { /* ignore silently */ - PT_LOG("Access to 0 Hardwired register.\n"); + PT_LOG("Access to 0 Hardwired register. " + "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n", + pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), + (d->devfn & 0x7), address, len); goto exit; } } @@ -1067,22 +1087,22 @@ static void pt_pci_write_config(PCIDevic break; } - /* check libpci error */ + /* check libpci result */ valid_mask = (0xFFFFFFFF >> ((4 - len) << 3)); if ((read_val & valid_mask) == valid_mask) { - PT_LOG("libpci read error. No emulation. " + PT_LOG("Warning: Return ALL F from libpci read. " "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n", pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), address, len); - goto exit; } /* pass directly to libpci for passthrough type register group */ if (reg_grp_entry == NULL) goto out; - /* adjust the write value to appropriate CFC-CFF window */ + /* adjust the read and write value to appropriate CFC-CFF window */ + read_val <<= ((address & 3) << 3); val <<= ((address & 3) << 3); emul_len = len; @@ -1131,7 +1151,8 @@ static void pt_pci_write_config(PCIDevic if (ret < 0) { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Invalid write emulation " + "return value[%d]. I/O emulator exit.\n", ret); exit(1); } @@ -1186,9 +1207,6 @@ static uint32_t pt_pci_read_config(PCIDe int emul_len = 0; int ret = 0; - PT_LOG("read(%x.%x): address=%04x len=%d\n", - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, len); - /* check offset range */ if (address >= 0xFF) { @@ -1246,15 +1264,14 @@ static uint32_t pt_pci_read_config(PCIDe break; } - /* check libpci error */ + /* check libpci result */ valid_mask = (0xFFFFFFFF >> ((4 - len) << 3)); if ((val & valid_mask) == valid_mask) { - PT_LOG("libpci read error. No emulation. " + PT_LOG("Warning: Return ALL F from libpci read. " "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n", pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), address, len); - goto exit; } /* just return the I/O device register value for @@ -1309,7 +1326,8 @@ static uint32_t pt_pci_read_config(PCIDe if (ret < 0) { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Invalid read emulation " + "return value[%d]. I/O emulator exit.\n", ret); exit(1); } @@ -1332,6 +1350,13 @@ static uint32_t pt_pci_read_config(PCIDe val >>= ((address & 3) << 3); exit: + +#ifdef PT_DEBUG_PCI_CONFIG_ACCESS + PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n", + pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), + address, val, len); +#endif + return val; } @@ -1389,7 +1414,7 @@ static int pt_register_regions(struct pt return 0; } -static int pt_unregister_regions(struct pt_dev *assigned_device) +static void pt_unregister_regions(struct pt_dev *assigned_device) { int i, type, ret; uint32_t e_size; @@ -1488,7 +1513,9 @@ static int pt_bar_reg_parse( /* check 64bit BAR */ index = pt_bar_offset_to_index(reg->offset); if ((index > 0) && (index < PCI_ROM_SLOT) && - (d->config[bar_64] & PCI_BASE_ADDRESS_MEM_TYPE_64)) + ((d->config[bar_64] & (PCI_BASE_ADDRESS_SPACE | + PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == + (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64))) { region = &ptdev->bases[index-1]; if (region->bar_flag != PT_BAR_FLAG_UPPER) @@ -1502,6 +1529,13 @@ static int pt_bar_reg_parse( r = &d->io_regions[index]; if (!r->size) goto out; + + /* for ExpROM BAR */ + if (index == PCI_ROM_SLOT) + { + bar_flag = PT_BAR_FLAG_MEM; + goto out; + } /* check BAR I/O indicator */ if (d->config[reg->offset] & PCI_BASE_ADDRESS_SPACE_IO) @@ -1540,7 +1574,7 @@ static void pt_bar_mapping(struct pt_dev /* copy region address to temporary */ r_addr = r->addr; - /* clear region address in case I/O Space or Memory Space disable */ + /* need unmapping in case I/O Space or Memory Space disable */ if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable ) || ((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable )) r_addr = -1; @@ -1556,8 +1590,10 @@ static void pt_bar_mapping(struct pt_dev /* check overlapped address */ ret = pt_chk_bar_overlap(dev->bus, dev->devfn, r_addr, r_size); if (ret > 0) - PT_LOG("Base Address[%d] is overlapped. " - "[Address:%08xh][Size:%04xh]\n", i, r_addr, r_size); + PT_LOG("ptdev[%02x:%02x.%x][Region:%d][Address:%08xh][Size:%08xh] " + "is overlapped.\n", pci_bus_num(dev->bus), + (dev->devfn >> 3) & 0x1F, (dev->devfn & 0x7), + i, r_addr, r_size); /* check whether we need to update the mapping or not */ if (r_addr != ptdev->bases[i].e_physbase) @@ -1776,14 +1812,16 @@ static uint32_t pt_status_reg_init(struc else { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Couldn't find pt_reg_tbl for " + "Capabilities Pointer register. I/O emulator exit.\n"); exit(1); } } else { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Couldn't find pt_reg_grp_tbl for Header. " + "I/O emulator exit.\n"); exit(1); } @@ -1815,7 +1853,8 @@ static uint32_t pt_bar_reg_init(struct p if (index < 0) { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); exit(1); } @@ -1962,9 +2001,8 @@ static uint8_t pt_msi_size_init(struct p ptdev->msi = malloc(sizeof(struct pt_msi_info)); if ( !ptdev->msi ) { - PT_LOG("error allocation pt_msi_info\n"); /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("error allocation pt_msi_info. I/O emulator exit.\n"); exit(1); } memset(ptdev->msi, 0, sizeof(struct pt_msi_info)); @@ -1983,7 +2021,8 @@ static uint8_t pt_msix_size_init(struct if (ret == -1) { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Invalid pt_msix_init return value[%d]. " + "I/O emulator exit.\n", ret); exit(1); } @@ -2060,7 +2099,8 @@ static int pt_bar_reg_read(struct pt_dev if (index < 0) { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); exit(1); } @@ -2074,8 +2114,8 @@ static int pt_bar_reg_read(struct pt_dev bar_emu_mask = PT_BAR_IO_EMU_MASK; break; case PT_BAR_FLAG_UPPER: - *value = 0; - goto out; + bar_emu_mask = PT_BAR_ALLF; + break; default: break; } @@ -2085,7 +2125,6 @@ static int pt_bar_reg_read(struct pt_dev *value = ((*value & ~valid_emu_mask) | (cfg_entry->data & valid_emu_mask)); -out: return 0; } @@ -2201,12 +2240,13 @@ static int pt_bar_reg_write(struct pt_de uint32_t r_size = 0; int index = 0; - /* get BAR index */ + /* get BAR index */ index = pt_bar_offset_to_index(reg->offset); if (index < 0) { /* exit I/O emulator */ - PT_LOG("I/O emulator exit()\n"); + PT_LOG("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); exit(1); } @@ -2216,89 +2256,113 @@ static int pt_bar_reg_write(struct pt_de /* align resource size (memory type only) */ PT_GET_EMUL_SIZE(base->bar_flag, r_size); - /* check guest write value */ - if (*value == PT_BAR_ALLF) - { - /* set register with resource size alligned to page size */ - cfg_entry->data = ~(r_size - 1); - /* avoid writing ALL F to I/O device register */ - *value = dev_value; - } - else - { - /* set emulate mask and read-only mask depend on BAR flag */ - switch (ptdev->bases[index].bar_flag) - { - case PT_BAR_FLAG_MEM: - bar_emu_mask = PT_BAR_MEM_EMU_MASK; - bar_ro_mask = PT_BAR_MEM_RO_MASK; - break; - case PT_BAR_FLAG_IO: - new_addr = *value; - last_addr = new_addr + r_size - 1; + /* set emulate mask and read-only mask depend on BAR flag */ + switch (ptdev->bases[index].bar_flag) + { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1); + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + bar_ro_mask = PT_BAR_IO_RO_MASK | (r_size - 1); + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + bar_ro_mask = 0; /* all upper 32bit are R/W */ + break; + default: + break; + } + + /* modify emulate register */ + writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; + cfg_entry->data = ((*value & writable_mask) | + (cfg_entry->data & ~writable_mask)); + + /* check whether we need to update the virtual region address or not */ + switch (ptdev->bases[index].bar_flag) + { + case PT_BAR_FLAG_MEM: + /* nothing to do */ + break; + case PT_BAR_FLAG_IO: + new_addr = cfg_entry->data; + last_addr = new_addr + r_size - 1; + /* check invalid address */ + if (last_addr <= new_addr || !new_addr || last_addr >= 0x10000) + { /* check 64K range */ - if (last_addr <= new_addr || !new_addr || last_addr >= 0x10000) + if ((last_addr >= 0x10000) && + (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))) { PT_LOG("Guest attempt to set Base Address over the 64KB. " - "[%02x:%02x.%x][Offset:%02xh][Range:%08xh-%08xh]\n", + "[%02x:%02x.%x][Offset:%02xh][Address:%08xh][Size:%08xh]\n", pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), - reg->offset, new_addr, last_addr); - /* just remove mapping */ - r->addr = -1; - goto exit; + reg->offset, new_addr, r_size); } - bar_emu_mask = PT_BAR_IO_EMU_MASK; - bar_ro_mask = PT_BAR_IO_RO_MASK; - break; - case PT_BAR_FLAG_UPPER: - if (*value) + /* just remove mapping */ + r->addr = -1; + goto exit; + } + break; + case PT_BAR_FLAG_UPPER: + if (cfg_entry->data) + { + if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask)) { PT_LOG("Guest attempt to set high MMIO Base Address. " - "Ignore mapping. " - "[%02x:%02x.%x][Offset:%02xh][High Address:%08xh]\n", + "Ignore mapping. " + "[%02x:%02x.%x][Offset:%02xh][High Address:%08xh]\n", pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), - reg->offset, *value); - /* clear lower address */ - d->io_regions[index-1].addr = -1; + reg->offset, cfg_entry->data); } - else + /* clear lower address */ + d->io_regions[index-1].addr = -1; + } + else + { + /* find lower 32bit BAR */ + prev_offset = (reg->offset - 4); + reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset); + if (reg_grp_entry) { - /* find lower 32bit BAR */ - prev_offset = (reg->offset - 4); - reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset); - if (reg_grp_entry) - { - reg_entry = pt_find_reg(reg_grp_entry, prev_offset); - if (reg_entry) - /* restore lower address */ - d->io_regions[index-1].addr = reg_entry->data; - else - return -1; - } + reg_entry = pt_find_reg(reg_grp_entry, prev_offset); + if (reg_entry) + /* restore lower address */ + d->io_regions[index-1].addr = reg_entry->data; else return -1; } - cfg_entry->data = 0; - r->addr = -1; - goto exit; - } - - /* modify emulate register */ - writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; - cfg_entry->data = ((*value & writable_mask) | - (cfg_entry->data & ~writable_mask)); - /* update the corresponding virtual region address */ - r->addr = cfg_entry->data; - - /* create value for writing to I/O device register */ - throughable_mask = ~bar_emu_mask & valid_mask; - *value = ((*value & throughable_mask) | - (dev_value & ~throughable_mask)); - } + else + return -1; + } + + /* always keep the emulate register value to 0, + * because hvmloader does not support high MMIO for now. + */ + cfg_entry->data = 0; + + /* never mapping the 'empty' upper region, + * because we'll do it enough for the lower region. + */ + r->addr = -1; + goto exit; + default: + break; + } + + /* update the corresponding virtual region address */ + r->addr = cfg_entry->data; exit: + /* create value for writing to I/O device register */ + throughable_mask = ~bar_emu_mask & valid_mask; + *value = ((*value & throughable_mask) | + (dev_value & ~throughable_mask)); + return 0; } @@ -2314,6 +2378,8 @@ static int pt_exp_rom_bar_reg_write(stru uint32_t writable_mask = 0; uint32_t throughable_mask = 0; uint32_t r_size = 0; + uint32_t bar_emu_mask = 0; + uint32_t bar_ro_mask = 0; r = &d->io_regions[PCI_ROM_SLOT]; r_size = r->size; @@ -2321,28 +2387,22 @@ static int pt_exp_rom_bar_reg_write(stru /* align memory type resource size */ PT_GET_EMUL_SIZE(base->bar_flag, r_size); - /* check guest write value */ - if (*value == PT_BAR_ALLF) - { - /* set register with resource size alligned to page size */ - cfg_entry->data = ~(r_size - 1); - /* avoid writing ALL F to I/O device register */ - *value = dev_value; - } - else - { - /* modify emulate register */ - writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - cfg_entry->data = ((*value & writable_mask) | - (cfg_entry->data & ~writable_mask)); - /* update the corresponding virtual region address */ - r->addr = cfg_entry->data; - - /* create value for writing to I/O device register */ - throughable_mask = ~reg->emu_mask & valid_mask; - *value = ((*value & throughable_mask) | - (dev_value & ~throughable_mask)); - } + /* set emulate mask and read-only mask */ + bar_emu_mask = reg->emu_mask; + bar_ro_mask = reg->ro_mask | (r_size - 1); + + /* modify emulate register */ + writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; + cfg_entry->data = ((*value & writable_mask) | + (cfg_entry->data & ~writable_mask)); + + /* update the corresponding virtual region address */ + r->addr = cfg_entry->data; + + /* create value for writing to I/O device register */ + throughable_mask = ~bar_emu_mask & valid_mask; + *value = ((*value & throughable_mask) | + (dev_value & ~throughable_mask)); return 0; } @@ -2483,8 +2543,6 @@ static int pt_msgctrl_reg_write(struct p uint16_t throughable_mask = 0; uint16_t old_ctrl = cfg_entry->data; PCIDevice *pd = (PCIDevice *)ptdev; - - PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value); /* Currently no support for multi-vector */ if ((*value & PCI_MSI_FLAGS_QSIZE) != 0x0) @@ -2527,8 +2585,6 @@ static int pt_msgctrl_reg_write(struct p else ptdev->msi->flags &= ~PCI_MSI_FLAGS_ENABLE; - PT_LOG("[after] wr_val:%xh\n", *value); - return 0; } @@ -2541,8 +2597,6 @@ static int pt_msgaddr32_reg_write(struct uint32_t writable_mask = 0; uint32_t throughable_mask = 0; uint32_t old_addr = cfg_entry->data; - - PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value); /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; @@ -2564,8 +2618,6 @@ static int pt_msgaddr32_reg_write(struct pt_msi_update(ptdev); } - PT_LOG("[after] wr_val:%xh\n", *value); - return 0; } @@ -2578,8 +2630,6 @@ static int pt_msgaddr64_reg_write(struct uint32_t writable_mask = 0; uint32_t throughable_mask = 0; uint32_t old_addr = cfg_entry->data; - - PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value); /* check whether the type is 64 bit or not */ if (!(ptdev->msi->flags & PCI_MSI_FLAGS_64BIT)) @@ -2609,8 +2659,6 @@ static int pt_msgaddr64_reg_write(struct pt_msi_update(ptdev); } - PT_LOG("[after] wr_val:%xh\n", *value); - return 0; } @@ -2627,8 +2675,6 @@ static int pt_msgdata_reg_write(struct p uint32_t flags = ptdev->msi->flags; uint32_t offset = reg->offset; - PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value); - /* check the offset whether matches the type or not */ if (!((offset == PCI_MSI_DATA_64) && (flags & PCI_MSI_FLAGS_64BIT)) && !((offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT))) @@ -2658,8 +2704,6 @@ static int pt_msgdata_reg_write(struct p pt_msi_update(ptdev); } - PT_LOG("[after] wr_val:%xh\n", *value); - return 0; } @@ -2672,8 +2716,6 @@ static int pt_msixctrl_reg_write(struct uint16_t writable_mask = 0; uint16_t throughable_mask = 0; uint16_t old_ctrl = cfg_entry->data; - - PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value); /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; @@ -2691,8 +2733,6 @@ static int pt_msixctrl_reg_write(struct pt_msix_update(ptdev); ptdev->msix->enabled = !!(*value & PCI_MSIX_ENABLE); - - PT_LOG("[after] wr_val:%xh\n", *value); return 0; } @@ -2785,8 +2825,7 @@ struct pt_dev * register_real_device(PCI int pirq = pci_dev->irq; machine_irq = pci_dev->irq; - rc = xc_physdev_map_pirq(xc_handle, domid, MAP_PIRQ_TYPE_GSI, - machine_irq, &pirq); + rc = xc_physdev_map_pirq(xc_handle, domid, machine_irq, &pirq); if ( rc ) { diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pass-through.h --- a/tools/ioemu/hw/pass-through.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/hw/pass-through.h Thu Aug 07 11:57:34 2008 +0900 @@ -47,12 +47,20 @@ /* because the current version of libpci (2.2.0) doesn't define these ID, * so we define Capability ID here. */ +#ifndef PCI_CAP_ID_HOTPLUG /* SHPC Capability List Item reg group */ #define PCI_CAP_ID_HOTPLUG 0x0C +#endif + +#ifndef PCI_CAP_ID_SSVID /* Subsystem ID and Subsystem Vendor ID Capability List Item reg group */ #define PCI_CAP_ID_SSVID 0x0D +#endif + +#ifndef PCI_MSI_FLAGS_MASK_BIT /* interrupt masking & reporting supported */ #define PCI_MSI_FLAGS_MASK_BIT 0x0100 +#endif #define PT_INVALID_REG 0xFFFFFFFF /* invalid register value */ #define PT_BAR_ALLF 0xFFFFFFFF /* BAR ALLF value */ diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/hw/pc.c Thu Aug 07 11:57:34 2008 +0900 @@ -30,9 +30,6 @@ #define VGABIOS_FILENAME "vgabios.bin" #define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin" #define LINUX_BOOT_FILENAME "linux_boot.bin" - -/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables. */ -#define ACPI_DATA_SIZE 0x10000 static fdctrl_t *floppy_controller; static RTCState *rtc_state; @@ -542,6 +539,7 @@ static void load_linux(const char *kerne uint16_t seg[6]; uint16_t real_seg; int setup_size, kernel_size, initrd_size, cmdline_size; + unsigned long end_low_ram; uint32_t initrd_max; uint8_t header[1024]; target_phys_addr_t real_addr, reloc_prot_addr, prot_addr, cmdline_addr, initrd_addr; @@ -595,15 +593,14 @@ static void load_linux(const char *kerne (size_t)cmdline_addr, (size_t)prot_addr); + /* Special pages are placed at end of low RAM: pick an arbitrary one and + * subtract a suitably large amount of padding (64kB) to skip BIOS data. */ + xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &end_low_ram); + end_low_ram = (end_low_ram << 12) - (64*1024); + /* highest address for loading the initrd */ - if (protocol >= 0x203) - initrd_max = ldl_p(header+0x22c); - else - initrd_max = 0x37ffffff; - - if (initrd_max >= ram_size-ACPI_DATA_SIZE) - initrd_max = ram_size-ACPI_DATA_SIZE-1; - + initrd_max = (protocol >= 0x203) ? ldl_p(header+0x22c) : 0x37ffffff; + initrd_max = MIN(initrd_max, (uint32_t)end_low_ram); /* kernel command line */ ncmdline = strlen(kernel_cmdline); diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pci.c --- a/tools/ioemu/hw/pci.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/hw/pci.c Thu Aug 07 11:57:34 2008 +0900 @@ -664,9 +664,10 @@ int pt_chk_bar_overlap(PCIBus *bus, int r = &devices->io_regions[j]; if ((addr < (r->addr + r->size)) && ((addr + size) > r->addr)) { - printf("Overlapped to device[%02x:%02x.%x] region:%d addr:%08x" - " size:%08x\n", bus->bus_num, (devices->devfn >> 3) & 0x1F, - (devices->devfn & 0x7), j, r->addr, r->size); + printf("Overlapped to device[%02x:%02x.%x][Region:%d]" + "[Address:%08xh][Size:%08xh]\n", bus->bus_num, + (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7), + j, r->addr, r->size); ret = 1; goto out; } diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pt-msi.c --- a/tools/ioemu/hw/pt-msi.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/hw/pt-msi.c Thu Aug 07 11:57:34 2008 +0900 @@ -37,8 +37,7 @@ int pt_msi_setup(struct pt_dev *dev) return -1; } - if ( xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI, - AUTO_ASSIGN, &pirq, + if ( xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq, dev->pci_dev->dev << 3 | dev->pci_dev->func, dev->pci_dev->bus, 0, 1) ) { @@ -120,8 +119,7 @@ static int pt_msix_update_one(struct pt_ /* Check if this entry is already mapped */ if ( entry->pirq == -1 ) { - ret = xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI, - AUTO_ASSIGN, &pirq, + ret = xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq, dev->pci_dev->dev << 3 | dev->pci_dev->func, dev->pci_dev->bus, entry_nr, 0); if ( ret ) diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/serial.c --- a/tools/ioemu/hw/serial.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/hw/serial.c Thu Aug 07 11:57:34 2008 +0900 @@ -728,7 +728,6 @@ static int serial_load(QEMUFile *f, void qemu_get_8s(f,&s->lsr); qemu_get_8s(f,&s->msr); qemu_get_8s(f,&s->scr); - qemu_get_8s(f,&s->fcr); if (version_id >= 2) qemu_get_8s(f,&fcr); diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/hw/vga.c Thu Aug 07 11:57:34 2008 +0900 @@ -1548,8 +1548,8 @@ static void vga_draw_graphic(VGAState *s } else { /* ENODATA just means we have changed mode and will succeed * next time */ - if (err != -ENODATA) - fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d)\n", s->lfb_addr + y, npages, err); + if (errno != ENODATA) + fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d, %d)\n", s->lfb_addr + y, npages, err, errno); } } diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/ioemu/vl.c Thu Aug 07 11:57:34 2008 +0900 @@ -7136,8 +7136,10 @@ int main(int argc, char **argv) sigaddset(&set, aio_sig_num); sigprocmask(SIG_BLOCK, &set, NULL); } +#endif QEMU_LIST_INIT (&vm_change_state_head); +#ifndef CONFIG_STUBDOM #ifndef _WIN32 { struct sigaction act; diff -r 7affdebb7a1e -r a39913db6e51 tools/libaio/src/Makefile --- a/tools/libaio/src/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libaio/src/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -1,7 +1,7 @@ XEN_ROOT = ../../.. XEN_ROOT = ../../.. include $(XEN_ROOT)/tools/Rules.mk -prefix=/usr +prefix=$(PREFIX) includedir=$(prefix)/include libdir=$(prefix)/lib diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_dom_boot.c --- a/tools/libxc/xc_dom_boot.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_dom_boot.c Thu Aug 07 11:57:34 2008 +0900 @@ -4,7 +4,7 @@ * This is the code which actually boots a fresh * prepared domain image as xen guest domain. * - * ==> this is the only domain bilder code piece + * ==> this is the only domain builder code piece * where xen hypercalls are allowed <== * * This code is licenced under the GPL. @@ -153,7 +153,7 @@ void *xc_dom_boot_domU_map(struct xc_dom int page_shift = XC_DOM_PAGE_SHIFT(dom); privcmd_mmap_entry_t *entries; void *ptr; - int i, rc; + int i; int err; entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t)); @@ -165,9 +165,13 @@ void *xc_dom_boot_domU_map(struct xc_dom return NULL; } - ptr = mmap(NULL, count << page_shift, PROT_READ | PROT_WRITE, - MAP_SHARED, dom->guest_xc, 0); - if ( ptr == MAP_FAILED ) + for ( i = 0; i < count; i++ ) + entries[i].mfn = xc_dom_p2m_host(dom, pfn + i); + + ptr = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid, + count << page_shift, PROT_READ | PROT_WRITE, 1 << page_shift, + entries, count); + if ( ptr == NULL ) { err = errno; xc_dom_panic(XC_INTERNAL_ERROR, @@ -177,22 +181,6 @@ void *xc_dom_boot_domU_map(struct xc_dom return NULL; } - for ( i = 0; i < count; i++ ) - { - entries[i].va = (uintptr_t) ptr + (i << page_shift); - entries[i].mfn = xc_dom_p2m_host(dom, pfn + i); - entries[i].npages = 1; - } - - rc = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid, - entries, count); - if ( rc < 0 ) - { - xc_dom_panic(XC_INTERNAL_ERROR, - "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn - " [xenctl, rc=%d]\n", __FUNCTION__, pfn, count, rc); - return NULL; - } return ptr; } diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_domain_save.c --- a/tools/libxc/xc_domain_save.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_domain_save.c Thu Aug 07 11:57:34 2008 +0900 @@ -568,16 +568,19 @@ static xen_pfn_t *xc_map_m2p(int xc_hand unsigned long m2p_chunks, m2p_size; xen_pfn_t *m2p; xen_pfn_t *extent_start; - int i, rc; - + int i; + + m2p = NULL; m2p_size = M2P_SIZE(max_mfn); m2p_chunks = M2P_CHUNKS(max_mfn); xmml.max_extents = m2p_chunks; - if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) ) + + extent_start = calloc(m2p_chunks, sizeof(xen_pfn_t)); + if ( !extent_start ) { ERROR("failed to allocate space for m2p mfns"); - return NULL; + goto err0; } set_xen_guest_handle(xmml.extent_start, extent_start); @@ -585,41 +588,36 @@ static xen_pfn_t *xc_map_m2p(int xc_hand (xmml.nr_extents != m2p_chunks) ) { ERROR("xc_get_m2p_mfns"); - return NULL; - } - - if ( (m2p = mmap(NULL, m2p_size, prot, - MAP_SHARED, xc_handle, 0)) == MAP_FAILED ) - { - ERROR("failed to mmap m2p"); - return NULL; - } - - if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) ) + goto err1; + } + + entries = calloc(m2p_chunks, sizeof(privcmd_mmap_entry_t)); + if (entries == NULL) { ERROR("failed to allocate space for mmap entries"); - return NULL; + goto err1; } for ( i = 0; i < m2p_chunks; i++ ) - { - entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); entries[i].mfn = extent_start[i]; - entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; - } - - if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN, - entries, m2p_chunks)) < 0 ) - { - ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc); - return NULL; + + m2p = xc_map_foreign_ranges(xc_handle, DOMID_XEN, + m2p_size, prot, M2P_CHUNK_SIZE, + entries, m2p_chunks); + if (m2p == NULL) + { + ERROR("xc_mmap_foreign_ranges failed"); + goto err2; } m2p_mfn0 = entries[0].mfn; +err2: + free(entries); +err1: free(extent_start); - free(entries); - + +err0: return m2p; } diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_hvm_build.c Thu Aug 07 11:57:34 2008 +0900 @@ -115,42 +115,32 @@ static int loadelfimage( struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray) { privcmd_mmap_entry_t *entries = NULL; - int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT; + size_t pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT; int i, rc = -1; /* Map address space for initial elf image. */ - entries = malloc(pages * sizeof(privcmd_mmap_entry_t)); + entries = calloc(pages, sizeof(privcmd_mmap_entry_t)); if ( entries == NULL ) goto err; - elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, - MAP_SHARED, xch, 0); - if ( elf->dest == MAP_FAILED ) - goto err; for ( i = 0; i < pages; i++ ) - { - entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT); entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i]; - entries[i].npages = 1; - } - - rc = xc_map_foreign_ranges(xch, dom, entries, pages); - if ( rc < 0 ) + + elf->dest = xc_map_foreign_ranges( + xch, dom, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT, + entries, pages); + if ( elf->dest == NULL ) goto err; /* Load the initial elf image. */ elf_load_binary(elf); rc = 0; + munmap(elf->dest, pages << PAGE_SHIFT); + elf->dest = NULL; + err: - if ( elf->dest ) - { - munmap(elf->dest, pages << PAGE_SHIFT); - elf->dest = NULL; - } - - if ( entries ) - free(entries); + free(entries); return rc; } @@ -239,7 +229,7 @@ static int setup_guest(int xc_handle, if ( ((count | cur_pages) & (SUPERPAGE_NR_PFNS - 1)) == 0 ) { long done; - xen_pfn_t sp_extents[2048 >> SUPERPAGE_PFN_SHIFT]; + xen_pfn_t sp_extents[count >> SUPERPAGE_PFN_SHIFT]; struct xen_memory_reservation sp_req = { .nr_extents = count >> SUPERPAGE_PFN_SHIFT, .extent_order = SUPERPAGE_PFN_SHIFT, diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_linux.c --- a/tools/libxc/xc_linux.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_linux.c Thu Aug 07 11:57:34 2008 +0900 @@ -118,16 +118,41 @@ void *xc_map_foreign_range(int xc_handle return addr; } -int xc_map_foreign_ranges(int xc_handle, uint32_t dom, - privcmd_mmap_entry_t *entries, int nr) +void *xc_map_foreign_ranges(int xc_handle, uint32_t dom, + size_t size, int prot, size_t chunksize, + privcmd_mmap_entry_t entries[], int nentries) { privcmd_mmap_t ioctlx; - - ioctlx.num = nr; + int i, rc; + void *addr; + + addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0); + if ( addr == MAP_FAILED ) + goto mmap_failed; + + for ( i = 0; i < nentries; i++ ) + { + entries[i].va = (unsigned long)addr + (i * chunksize); + entries[i].npages = chunksize >> PAGE_SHIFT; + } + + ioctlx.num = nentries; ioctlx.dom = dom; ioctlx.entry = entries; - return ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx); + rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx); + if ( rc ) + goto ioctl_failed; + + return addr; + +ioctl_failed: + rc = munmap(addr, size); + if ( rc == -1 ) + ERROR("%s: error in error path\n", __FUNCTION__); + +mmap_failed: + return NULL; } static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data) diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_minios.c --- a/tools/libxc/xc_minios.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_minios.c Thu Aug 07 11:57:34 2008 +0900 @@ -15,6 +15,7 @@ #include <os.h> #include <mm.h> #include <lib.h> +#include <gntmap.h> #include <events.h> #include <wait.h> #include <sys/mman.h> @@ -76,16 +77,30 @@ void *xc_map_foreign_range(int xc_handle return map_frames_ex(&mfn, size / getpagesize(), 0, 1, 1, dom, 0, pt_prot); } -int xc_map_foreign_ranges(int xc_handle, uint32_t dom, - privcmd_mmap_entry_t *entries, int nr) -{ - int i; - for (i = 0; i < nr; i++) { - unsigned long mfn = entries[i].mfn; - do_map_frames(entries[i].va, &mfn, entries[i].npages, 0, 1, dom, 0, L1_PROT); - } - return 0; -} +void *xc_map_foreign_ranges(int xc_handle, uint32_t dom, + size_t size, int prot, size_t chunksize, + privcmd_mmap_entry_t entries[], int nentries) +{ + unsigned long mfns[size / PAGE_SIZE]; + int i, j, n; + unsigned long pt_prot = 0; +#ifdef __ia64__ + /* TODO */ +#else + if (prot & PROT_READ) + pt_prot = L1_PROT_RO; + if (prot & PROT_WRITE) + pt_prot = L1_PROT; +#endif + + n = 0; + for (i = 0; i < nentries; i++) + for (j = 0; j < chunksize / PAGE_SIZE; j++) + mfns[n++] = entries[i].mfn + j; + + return map_frames_ex(mfns, n, 1, 0, 1, dom, 0, pt_prot); +} + int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall) { @@ -102,8 +117,8 @@ int do_xen_hypercall(int xc_handle, priv errno = -ret; return -1; } - if (call.result < 0) { - errno = -call.result; + if ((long) call.result < 0) { + errno = - (long) call.result; return -1; } return call.result; @@ -244,8 +259,11 @@ int xc_evtchn_unbind(int xce_handle, evt files[xce_handle].evtchn.ports[i].port = -1; break; } - if (i == MAX_EVTCHN_PORTS) + if (i == MAX_EVTCHN_PORTS) { printf("Warning: couldn't find port %"PRId32" for xc handle %x\n", port, xce_handle); + errno = -EINVAL; + return -1; + } files[xce_handle].evtchn.ports[i].bound = 0; unbind_evtchn(port); return 0; @@ -278,18 +296,24 @@ evtchn_port_or_error_t xc_evtchn_pending { int i; unsigned long flags; + evtchn_port_t ret = -1; + local_irq_save(flags); + files[xce_handle].read = 0; for (i = 0; i < MAX_EVTCHN_PORTS; i++) { - evtchn_port_t port = files[xce_handle].evtchn.ports[i].port; - if (port != -1 && files[xce_handle].evtchn.ports[i].pending) { - files[xce_handle].evtchn.ports[i].pending = 0; - local_irq_restore(flags); - return port; - } - } - files[xce_handle].read = 0; + evtchn_port_t port = files[xce_handle].evtchn.ports[i].port; + if (port != -1 && files[xce_handle].evtchn.ports[i].pending) { + if (ret == -1) { + ret = port; + files[xce_handle].evtchn.ports[i].pending = 0; + } else { + files[xce_handle].read = 1; + break; + } + } + } local_irq_restore(flags); - return -1; + return ret; } int xc_evtchn_unmask(int xce_handle, evtchn_port_t port) @@ -304,6 +328,88 @@ void discard_file_cache(int fd, int flus if (flush) fsync(fd); } + +int xc_gnttab_open(void) +{ + int xcg_handle; + xcg_handle = alloc_fd(FTYPE_GNTMAP); + gntmap_init(&files[xcg_handle].gntmap); + return xcg_handle; +} + +int xc_gnttab_close(int xcg_handle) +{ + gntmap_fini(&files[xcg_handle].gntmap); + files[xcg_handle].type = FTYPE_NONE; + return 0; +} + +void *xc_gnttab_map_grant_ref(int xcg_handle, + uint32_t domid, + uint32_t ref, + int prot) +{ + return gntmap_map_grant_refs(&files[xcg_handle].gntmap, + 1, + &domid, 0, + &ref, + prot & PROT_WRITE); +} + +void *xc_gnttab_map_grant_refs(int xcg_handle, + uint32_t count, + uint32_t *domids, + uint32_t *refs, + int prot) +{ + return gntmap_map_grant_refs(&files[xcg_handle].gntmap, + count, + domids, 1, + refs, + prot & PROT_WRITE); +} + +void *xc_gnttab_map_domain_grant_refs(int xcg_handle, + uint32_t count, + uint32_t domid, + uint32_t *refs, + int prot) +{ + return gntmap_map_grant_refs(&files[xcg_handle].gntmap, + count, + &domid, 0, + refs, + prot & PROT_WRITE); +} + +int xc_gnttab_munmap(int xcg_handle, + void *start_address, + uint32_t count) +{ + int ret; + ret = gntmap_munmap(&files[xcg_handle].gntmap, + (unsigned long) start_address, + count); + if (ret < 0) { + errno = -ret; + return -1; + } + return ret; +} + +int xc_gnttab_set_max_grants(int xcg_handle, + uint32_t count) +{ + int ret; + ret = gntmap_set_max_grants(&files[xcg_handle].gntmap, + count); + if (ret < 0) { + errno = -ret; + return -1; + } + return ret; +} + /* * Local variables: * mode: C diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_netbsd.c --- a/tools/libxc/xc_netbsd.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_netbsd.c Thu Aug 07 11:57:34 2008 +0900 @@ -11,7 +11,6 @@ #include "xc_private.h" -#include <xen/memory.h> #include <xen/sys/evtchn.h> #include <unistd.h> #include <fcntl.h> @@ -114,22 +113,42 @@ void *xc_map_foreign_range(int xc_handle return addr; } -int xc_map_foreign_ranges(int xc_handle, uint32_t dom, - privcmd_mmap_entry_t *entries, int nr) -{ - privcmd_mmap_t ioctlx; - int err; - - ioctlx.num = nr; - ioctlx.dom = dom; - ioctlx.entry = entries; - - err = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx); - if (err == 0) - return 0; - else - return -errno; -} +void *xc_map_foreign_ranges(int xc_handle, uint32_t dom, + size_t size, int prot, size_t chunksize, + privcmd_mmap_entry_t entries[], int nentries) +{ + privcmd_mmap_t ioctlx; + int i, rc; + void *addr; + + addr = mmap(NULL, size, prot, MAP_ANON | MAP_SHARED, -1, 0); + if (addr == MAP_FAILED) + goto mmap_failed; + + for (i = 0; i < nentries; i++) { + entries[i].va = (uintptr_t)addr + (i * chunksize); + entries[i].npages = chunksize >> PAGE_SHIFT; + } + + ioctlx.num = nentries; + ioctlx.dom = dom; + ioctlx.entry = entries; + + rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx); + if (rc) + goto ioctl_failed; + + return addr; + +ioctl_failed: + rc = munmap(addr, size); + if (rc == -1) + ERROR("%s: error in error path\n", __FUNCTION__); + +mmap_failed: + return NULL; +} + static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data) { diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_physdev.c --- a/tools/libxc/xc_physdev.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_physdev.c Thu Aug 07 11:57:34 2008 +0900 @@ -22,7 +22,6 @@ int xc_physdev_pci_access_modify(int xc_ int xc_physdev_map_pirq(int xc_handle, int domid, - int type, int index, int *pirq) { @@ -33,7 +32,7 @@ int xc_physdev_map_pirq(int xc_handle, return -EINVAL; map.domid = domid; - map.type = type; + map.type = MAP_PIRQ_TYPE_GSI; map.index = index; map.pirq = *pirq; @@ -47,7 +46,6 @@ int xc_physdev_map_pirq(int xc_handle, int xc_physdev_map_pirq_msi(int xc_handle, int domid, - int type, int index, int *pirq, int devfn, @@ -62,7 +60,7 @@ int xc_physdev_map_pirq_msi(int xc_handl return -EINVAL; map.domid = domid; - map.type = type; + map.type = MAP_PIRQ_TYPE_MSI; map.index = index; map.pirq = *pirq; map.msi_info.devfn = devfn; diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_private.h Thu Aug 07 11:57:34 2008 +0900 @@ -184,8 +184,9 @@ static inline int do_sysctl(int xc_handl return ret; } -int xc_map_foreign_ranges(int xc_handle, uint32_t dom, - privcmd_mmap_entry_t *entries, int nr); +void *xc_map_foreign_ranges(int xc_handle, uint32_t dom, + size_t size, int prot, size_t chunksize, + privcmd_mmap_entry_t entries[], int nentries); void *map_domain_va_core(unsigned long domfd, int cpu, void *guest_va, vcpu_guest_context_any_t *ctxt); diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_solaris.c --- a/tools/libxc/xc_solaris.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xc_solaris.c Thu Aug 07 11:57:34 2008 +0900 @@ -109,17 +109,40 @@ void *xc_map_foreign_range(int xc_handle return addr; } -int xc_map_foreign_ranges(int xc_handle, uint32_t dom, - privcmd_mmap_entry_t *entries, int nr) +void *xc_map_foreign_ranges(int xc_handle, uint32_t dom, + size_t size, int prot, size_t chunksize, + privcmd_mmap_entry_t entries[], int nentries) { privcmd_mmap_t ioctlx; - - ioctlx.num = nr; + int i, rc; + void *addr; + + addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0); + if (addr == MAP_FAILED) + goto mmap_failed; + + for (i = 0; i < nentries; i++) { + entries[i].va = (uintptr_t)addr + (i * chunksize); + entries[i].npages = chunksize >> PAGE_SHIFT; + } + + ioctlx.num = nentries; ioctlx.dom = dom; ioctlx.entry = entries; - return ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx); -} + rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx); + if (rc) + goto ioctl_failed; + +ioctl_failed: + rc = munmap(addr, size); + if (rc == -1) + ERROR("%s: error in error path\n", __FUNCTION__); + +mmap_failed: + return NULL; +} + static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data) { diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/libxc/xenctrl.h Thu Aug 07 11:57:34 2008 +0900 @@ -907,13 +907,11 @@ int xc_gnttab_set_max_grants(int xcg_han int xc_physdev_map_pirq(int xc_handle, int domid, - int type, int index, int *pirq); int xc_physdev_map_pirq_msi(int xc_handle, int domid, - int type, int index, int *pirq, int devfn, diff -r 7affdebb7a1e -r a39913db6e51 tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/pygrub/src/pygrub Thu Aug 07 11:57:34 2008 +0900 @@ -21,7 +21,7 @@ import curses, _curses, curses.wrapper, import curses, _curses, curses.wrapper, curses.textpad, curses.ascii import getopt -sys.path = [ '/usr/lib/python' ] + sys.path +sys.path = [ '/usr/lib/python', '/usr/lib64/python' ] + sys.path import fsimage import grub.GrubConf diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/lowlevel/xc/xc.c Thu Aug 07 11:57:34 2008 +0900 @@ -958,8 +958,7 @@ static PyObject *pyxc_physdev_map_pirq(P if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iii", kwd_list, &dom, &index, &pirq) ) return NULL; - ret = xc_physdev_map_pirq(xc->xc_handle, dom, MAP_PIRQ_TYPE_GSI, - index, &pirq); + ret = xc_physdev_map_pirq(xc->xc_handle, dom, index, &pirq); if ( ret != 0 ) return pyxc_error_to_exception(); return PyLong_FromUnsignedLong(pirq); diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/util/pci.py --- a/tools/python/xen/util/pci.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/util/pci.py Thu Aug 07 11:57:34 2008 +0900 @@ -44,6 +44,12 @@ PCI_CLASS_DEVICE = 0x0a PCI_CLASS_DEVICE = 0x0a PCI_CLASS_BRIDGE_PCI = 0x0604 +PCI_HEADER_TYPE = 0x0e +PCI_HEADER_TYPE_MASK = 0x7f +PCI_HEADER_TYPE_NORMAL = 0 +PCI_HEADER_TYPE_BRIDGE = 1 +PCI_HEADER_TYPE_CARDBUS = 2 + PCI_CAPABILITY_LIST = 0x34 PCI_CB_BRIDGE_CONTROL = 0x3e PCI_BRIDGE_CTL_BUS_RESET= 0x40 @@ -56,6 +62,12 @@ PCI_EXP_DEVCAP_FLR = (0x1 << 28) PCI_EXP_DEVCAP_FLR = (0x1 << 28) PCI_EXP_DEVCTL = 0x8 PCI_EXP_DEVCTL_FLR = (0x1 << 15) + +PCI_CAP_ID_PM = 0x01 +PCI_PM_CTRL = 4 +PCI_PM_CTRL_NO_SOFT_RESET = 0x0004 +PCI_PM_CTRL_STATE_MASK = 0x0003 +PCI_D3hot = 3 PCI_CAP_ID_AF = 0x13 PCI_AF_CAPs = 0x3 @@ -105,15 +117,22 @@ def parse_hex(val): return None def parse_pci_name(pci_name_string): - # Format: xxxx:xx:xx:x - s = pci_name_string - s = s.split(':') - dom = parse_hex(s[0]) - bus = parse_hex(s[1]) - s = s[2].split('.') - dev = parse_hex(s[0]) - func = parse_hex(s[1]) - return (dom, bus, dev, func) + pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \ + r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \ + r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \ + r"(?P<func>[0-7])$", pci_name_string) + if pci_match is None: + raise PciDeviceParseError(('Failed to parse pci device name: %s' % + pci_name_string)) + pci_dev_info = pci_match.groupdict('0') + + domain = parse_hex(pci_dev_info['domain']) + bus = parse_hex(pci_dev_info['bus']) + slot = parse_hex(pci_dev_info['slot']) + func = parse_hex(pci_dev_info['func']) + + return (domain, bus, slot, func) + def find_sysfs_mnt(): global sysfs_mnt_point @@ -169,14 +188,14 @@ def create_lspci_info(): # Execute 'lspci' command and parse the result. # If the command does not exist, lspci_info will be kept blank ({}). - for paragraph in os.popen(LSPCI_CMD + ' -vmmD').read().split('\n\n'): + for paragraph in os.popen(LSPCI_CMD + ' -vmm').read().split('\n\n'): device_name = None device_info = {} for line in paragraph.split('\n'): try: (opt, value) = line.split(':\t') if opt == 'Slot': - device_name = value + device_name = PCI_DEV_FORMAT_STR % parse_pci_name(value) else: device_info[opt] = value except: @@ -246,18 +265,8 @@ def transform_list(target, src): return result def check_FLR_capability(dev_list): - i = len(dev_list) - if i == 0: + if len(dev_list) == 0: return [] - i = i - 1; - while i >= 0: - dev = dev_list[i] - if dev.bus == 0: - if dev.dev_type == DEV_TYPE_PCIe_ENDPOINT and not dev.pcie_flr: - del dev_list[i] - elif dev.dev_type == DEV_TYPE_PCI and not dev.pci_af_flr: - del dev_list[i] - i = i - 1 pci_list = [] pci_dev_dict = {} @@ -270,6 +279,8 @@ def check_FLR_capability(dev_list): for pci in pci_list: if isinstance(pci, types.StringTypes): dev = pci_dev_dict[pci] + if dev.bus == 0: + continue if dev.dev_type == DEV_TYPE_PCIe_ENDPOINT and not dev.pcie_flr: coassigned_pci_list = dev.find_all_the_multi_functions() need_transform = True @@ -336,13 +347,6 @@ class PciDeviceAssignmentError(Exception self.message = msg def __str__(self): return 'pci: impproper device assignment spcified: ' + \ - self.message - -class PciDeviceFlrError(PciDeviceAssignmentError): - def __init__(self,msg): - self.message = msg - def __str__(self): - return 'Can not find a suitable FLR method for the device(s): ' + \ self.message class PciDevice: @@ -480,6 +484,27 @@ class PciDevice: # Restore the config spaces restore_pci_conf_space((pci_list, cfg_list)) + def do_Dstate_transition(self): + pos = self.find_cap_offset(PCI_CAP_ID_PM) + if pos == 0: + return + + (pci_list, cfg_list) = save_pci_conf_space([self.name]) + + # Enter D3hot without soft reset + pm_ctl = self.pci_conf_read32(pos + PCI_PM_CTRL) + pm_ctl |= PCI_PM_CTRL_NO_SOFT_RESET + pm_ctl &= ~PCI_PM_CTRL_STATE_MASK + pm_ctl |= PCI_D3hot + self.pci_conf_write32(pos + PCI_PM_CTRL, pm_ctl) + time.sleep(0.010) + + # From D3hot to D0 + self.pci_conf_write32(pos + PCI_PM_CTRL, 0) + time.sleep(0.010) + + restore_pci_conf_space((pci_list, cfg_list)) + def find_all_the_multi_functions(self): sysfs_mnt = find_sysfs_mnt() pci_names = os.popen('ls ' + sysfs_mnt + SYSFS_PCI_DEVS_PATH).read() @@ -650,13 +675,16 @@ class PciDevice: time.sleep(0.200) restore_pci_conf_space((pci_list, cfg_list)) else: - funcs = self.find_all_the_multi_functions() - self.devs_check_driver(funcs) - - parent = '%04x:%02x:%02x.%01x' % self.find_parent() - - # Do Secondary Bus Reset. - self.do_secondary_bus_reset(parent, funcs) + if self.bus == 0: + self.do_Dstate_transition() + else: + funcs = self.find_all_the_multi_functions() + self.devs_check_driver(funcs) + + parent = '%04x:%02x:%02x.%01x' % self.find_parent() + + # Do Secondary Bus Reset. + self.do_secondary_bus_reset(parent, funcs) # PCI devices else: # For PCI device on host bus, we test "PCI Advanced Capabilities". @@ -669,9 +697,7 @@ class PciDevice: restore_pci_conf_space((pci_list, cfg_list)) else: if self.bus == 0: - err_msg = 'pci: %s is not assignable: it is on bus 0, '+ \ - 'but it has no PCI Advanced Capabilities.' - raise PciDeviceFlrError(err_msg % self.name) + self.do_Dstate_transition() else: devs = self.find_coassigned_devices(False) # Remove the element 0 which is a bridge @@ -690,12 +716,24 @@ class PciDevice: self.name+SYSFS_PCI_DEV_CONFIG_PATH try: conf_file = open(path, 'rb') + conf_file.seek(PCI_HEADER_TYPE) + header_type = ord(conf_file.read(1)) & PCI_HEADER_TYPE_MASK + if header_type == PCI_HEADER_TYPE_CARDBUS: + return conf_file.seek(PCI_STATUS_OFFSET) status = ord(conf_file.read(1)) if status&PCI_STATUS_CAP_MASK: conf_file.seek(PCI_CAP_OFFSET) capa_pointer = ord(conf_file.read(1)) + capa_count = 0 while capa_pointer: + if capa_pointer < 0x40: + raise PciDeviceParseError( + ('Broken capability chain: %s' % self.name)) + capa_count += 1 + if capa_count > 96: + raise PciDeviceParseError( + ('Looped capability chain: %s' % self.name)) conf_file.seek(capa_pointer) capa_id = ord(conf_file.read(1)) capa_pointer = ord(conf_file.read(1)) diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/util/utils.py --- a/tools/python/xen/util/utils.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/util/utils.py Thu Aug 07 11:57:34 2008 +0900 @@ -1,6 +1,50 @@ import traceback import traceback import sys +import os def exception_string(e): (ty,v,tb) = sys.exc_info() return traceback.format_exception_only(ty,v) + +def daemonize(prog, args, stdin_tmpfile=None): + """Runs a program as a daemon with the list of arguments. Returns the PID + of the daemonized program, or returns 0 on error. + """ + r, w = os.pipe() + pid = os.fork() + + if pid == 0: + os.close(r) + w = os.fdopen(w, 'w') + os.setsid() + try: + pid2 = os.fork() + except: + pid2 = None + if pid2 == 0: + os.chdir("/") + null_fd = os.open("/dev/null", os.O_RDWR) + if stdin_tmpfile is not None: + os.dup2(stdin_tmpfile.fileno(), 0) + else: + os.dup2(null_fd, 0) + os.dup2(null_fd, 1) + os.dup2(null_fd, 2) + for fd in range(3, 256): + try: + os.close(fd) + except: + pass + os.execvp(prog, args) + os._exit(1) + else: + w.write(str(pid2 or 0)) + w.close() + os._exit(0) + os.close(w) + r = os.fdopen(r) + daemon_pid = int(r.read()) + r.close() + os.waitpid(pid, 0) + return daemon_pid + diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Aug 07 11:57:34 2008 +0900 @@ -599,14 +599,17 @@ class XendDomainInfo: new_dev['func']) bdf = xc.test_assign_device(self.domid, pci_str) if bdf != 0: + if bdf == -1: + raise VmError("failed to assign device: maybe the platform" + " doesn't support VT-d, or VT-d isn't enabled" + " properly?") bus = (bdf >> 16) & 0xff devfn = (bdf >> 8) & 0xff dev = (devfn >> 3) & 0x1f func = devfn & 0x7 - raise VmError("Fail to hot insert device(%x:%x.%x): maybe VT-d is " - "not enabled, or the device is not exist, or it " - "has already been assigned to other domain" - % (bus, dev, func)) + raise VmError("fail to assign device(%x:%x.%x): maybe it has" + " already been assigned to other domain, or maybe" + " it doesn't exist." % (bus, dev, func)) bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'], new_dev['bus'], @@ -635,7 +638,10 @@ class XendDomainInfo: self._waitForDevice(dev_type, devid) except VmError, ex: del self.info['devices'][dev_uuid] - if dev_type == 'tap': + if dev_type == 'pci': + for dev in dev_config_dict['devs']: + XendAPIStore.deregister(dev['uuid'], 'DPCI') + elif dev_type == 'tap': self.info['vbd_refs'].remove(dev_uuid) else: self.info['%s_refs' % dev_type].remove(dev_uuid) @@ -2086,14 +2092,17 @@ class XendDomainInfo: if hvm and pci_str: bdf = xc.test_assign_device(self.domid, pci_str) if bdf != 0: + if bdf == -1: + raise VmError("failed to assign device: maybe the platform" + " doesn't support VT-d, or VT-d isn't enabled" + " properly?") bus = (bdf >> 16) & 0xff devfn = (bdf >> 8) & 0xff dev = (devfn >> 3) & 0x1f func = devfn & 0x7 - raise VmError("Fail to assign device(%x:%x.%x): maybe VT-d is " - "not enabled, or the device is not exist, or it " - "has already been assigned to other domain" - % (bus, dev, func)) + raise VmError("fail to assign device(%x:%x.%x): maybe it has" + " already been assigned to other domain, or maybe" + " it doesn't exist." % (bus, dev, func)) # register the domain in the list from xen.xend import XendDomain @@ -2374,6 +2383,9 @@ class XendDomainInfo: def destroy(self): """Cleanup VM and destroy domain. Nothrow guarantee.""" + if self.domid is None: + return + log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid)) paths = self._prepare_phantom_paths() diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xend/image.py Thu Aug 07 11:57:34 2008 +0900 @@ -114,7 +114,7 @@ class ImageHandler: self.display = vmConfig['platform'].get('display') self.xauthority = vmConfig['platform'].get('xauthority') - self.vncconsole = vmConfig['platform'].get('vncconsole') + self.vncconsole = int(vmConfig['platform'].get('vncconsole', 0)) self.dmargs = self.parseDeviceModelArgs(vmConfig) self.pid = None rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset') @@ -249,10 +249,6 @@ class ImageHandler: # xm config file def parseDeviceModelArgs(self, vmConfig): ret = ["-domain-name", str(self.vm.info['name_label'])] - - # Tell QEMU how large the guest's memory allocation is - # to help it when loading the initrd (if neccessary) - ret += ["-m", str(self.getRequiredInitialReservation() / 1024)] # Find RFB console device, and if it exists, make QEMU enable # the VNC console. @@ -777,6 +773,10 @@ class HVMImageHandler(ImageHandler): ret.append("tap,vlan=%d,ifname=tap%d.%d,bridge=%s" % (nics, self.vm.getDomid(), nics-1, bridge)) + if nics == 0: + ret.append("-net") + ret.append("none") + return ret def getDeviceModelArgs(self, restore = False): diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xend/server/pciif.py Thu Aug 07 11:57:34 2008 +0900 @@ -375,17 +375,34 @@ class PciController(DevController): raise VmError("pci: failed to locate device and "+ "parse it's resources - "+str(e)) if (dev.dev_type == DEV_TYPE_PCIe_ENDPOINT) and not dev.pcie_flr: - funcs = dev.find_all_the_multi_functions() - for f in funcs: - if not f in pci_str_list: - err_msg = 'pci: % must be co-assigned to guest with %s' - raise VmError(err_msg % (f, dev.name)) + if dev.bus == 0: + # We cope with this case by using the Dstate transition + # method for now. + err_msg = 'pci: %s: it is on bus 0, but has no PCIe' +\ + ' FLR Capability. Will try the Dstate transition'+\ + ' method if available.' + log.warn(err_msg % dev.name) + else: + funcs = dev.find_all_the_multi_functions() + for f in funcs: + if not f in pci_str_list: + (f_dom, f_bus, f_slot, f_func) = parse_pci_name(f) + f_pci_str = '0x%x,0x%x,0x%x,0x%x' % \ + (f_dom, f_bus, f_slot, f_func) + # f has been assigned to other guest? + if xc.test_assign_device(0, f_pci_str) != 0: + err_msg = 'pci: %s must be co-assigned to' + \ + ' the same guest with %s' + raise VmError(err_msg % (f, dev.name)) elif dev.dev_type == DEV_TYPE_PCI: if dev.bus == 0: if not dev.pci_af_flr: - err_msg = 'pci: %s is not assignable: it is on ' + \ - 'bus 0, but lacks of FLR capability' - raise VmError(err_msg % dev.name) + # We cope with this case by using the Dstate transition + # method for now. + err_msg = 'pci: %s: it is on bus 0, but has no PCI' +\ + ' Advanced Capabilities for FLR. Will try the'+\ + ' Dstate transition method if available.' + log.warn(err_msg % dev.name) else: # All devices behind the uppermost PCI/PCI-X bridge must be\ # co-assigned to the same guest. @@ -395,8 +412,14 @@ class PciController(DevController): for s in devs_str: if not s in pci_str_list: - err_msg = 'pci: %s must be co-assigned to guest with %s' - raise VmError(err_msg % (s, dev.name)) + (s_dom, s_bus, s_slot, s_func) = parse_pci_name(s) + s_pci_str = '0x%x,0x%x,0x%x,0x%x' % \ + (s_dom, s_bus, s_slot, s_func) + # s has been assigned to other guest? + if xc.test_assign_device(0, s_pci_str) != 0: + err_msg = 'pci: %s must be co-assigned to the'+\ + ' same guest with %s' + raise VmError(err_msg % (s, dev.name)) for (domain, bus, slot, func) in pci_dev_list: self.setupOneDevice(domain, bus, slot, func) diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/console.py --- a/tools/python/xen/xm/console.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xm/console.py Thu Aug 07 11:57:34 2008 +0900 @@ -15,10 +15,69 @@ # Copyright (C) 2005 XenSource Ltd #============================================================================ +import xen.util.auxbin +import xen.lowlevel.xs +import os +import sys +import signal +from xen.util import utils XENCONSOLE = "xenconsole" -import xen.util.auxbin - def execConsole(domid): xen.util.auxbin.execute(XENCONSOLE, [str(domid)]) + + +class OurXenstoreConnection: + def __init__(self): + self.handle = xen.lowlevel.xs.xs() + def read_eventually(self, path): + watch = None + trans = None + try: + signal.alarm(10) + watch = self.handle.watch(path, None) + while True: + result = self.handle.read('0', path) + if result is not None: + signal.alarm(0) + return result + self.handle.read_watch() + finally: + signal.alarm(0) + if watch is not None: self.handle.unwatch(path, watch) + def read_maybe(self, path): + return self.handle.read('0', path) + +def runVncViewer(domid, do_autopass, do_daemonize=False): + xs = OurXenstoreConnection() + d = '/local/domain/%d/' % domid + vnc_port = xs.read_eventually(d + 'console/vnc-port') + vfb_backend = xs.read_maybe(d + 'device/vfb/0/backend') + vnc_listen = None + vnc_password = None + vnc_password_tmpfile = None + cmdl = ['vncviewer'] + if vfb_backend is not None: + vnc_listen = xs.read_maybe(vfb_backend + '/vnclisten') + if do_autopass: + vnc_password = xs.read_maybe(vfb_backend + '/vncpasswd') + if vnc_password is not None: + cmdl.append('-autopass') + vnc_password_tmpfile = os.tmpfile() + print >>vnc_password_tmpfile, vnc_password + vnc_password_tmpfile.seek(0) + vnc_password_tmpfile.flush() + if vnc_listen is None: + vnc_listen = 'localhost' + cmdl.append('%s:%d' % (vnc_listen, int(vnc_port) - 5900)) + if do_daemonize: + pid = utils.daemonize('vncviewer', cmdl, vnc_password_tmpfile) + if pid == 0: + puts >>sys.stderr, 'failed to invoke vncviewer' + os._exit(-1) + else: + print 'invoking ', ' '.join(cmdl) + if vnc_password_tmpfile is not None: + os.dup2(vnc_password_tmpfile.fileno(), 0) + os.execvp('vncviewer', cmdl) diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/create.dtd --- a/tools/python/xen/xm/create.dtd Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xm/create.dtd Thu Aug 07 11:57:34 2008 +0900 @@ -39,6 +39,7 @@ vbd*, vif*, vtpm*, + pci*, console*, platform*, vcpu_param*, @@ -79,6 +80,13 @@ <!ELEMENT vtpm (name*)> <!ATTLIST vtpm backend CDATA #REQUIRED> + +<!ELEMENT pci EMPTY> +<!ATTLIST pci domain CDATA #REQUIRED + bus CDATA #REQUIRED + slot CDATA #REQUIRED + func CDATA #REQUIRED + vslt CDATA #IMPLIED> <!ELEMENT console (other_config*)> <!ATTLIST console protocol (vt100|rfb|rdp) #REQUIRED> diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xm/create.py Thu Aug 07 11:57:34 2008 +0900 @@ -36,10 +36,12 @@ from xen.util import vscsi_util from xen.util import vscsi_util import xen.util.xsm.xsm as security from xen.xm.main import serverType, SERVER_XEN_API, get_single_vm +from xen.util import utils from xen.xm.opts import * from main import server +from main import domain_name_to_domid import console @@ -118,6 +120,14 @@ gopts.opt('console_autoconnect', short=' fn=set_true, default=0, use="Connect to the console after the domain is created.") +gopts.opt('vncviewer', + fn=set_true, default=0, + use="Connect to the VNC display after the domain is created.") + +gopts.opt('vncviewer-autopass', + fn=set_true, default=0, + use="Pass VNC password to viewer via stdin and -autopass.") + gopts.var('vncpasswd', val='NAME', fn=set_value, default=None, use="Password for VNC console on HVM domain.") @@ -128,7 +138,7 @@ gopts.var('vncviewer', val='no|yes', "The address of the vncviewer is passed to the domain on the " "kernel command line using 'VNC_SERVER=<host>:<port>'. The port " "used by vnc is 5500 + DISPLAY. A display value with a free port " - "is chosen if possible.\nOnly valid when vnc=1.") + "is chosen if possible.\nOnly valid when vnc=1.\nDEPRECATED") gopts.var('vncconsole', val='no|yes', fn=set_bool, default=None, @@ -1108,44 +1118,6 @@ def choose_vnc_display(): return None vncpid = None -def daemonize(prog, args): - """Runs a program as a daemon with the list of arguments. Returns the PID - of the daemonized program, or returns 0 on error. - """ - r, w = os.pipe() - pid = os.fork() - - if pid == 0: - os.close(r) - w = os.fdopen(w, 'w') - os.setsid() - try: - pid2 = os.fork() - except: - pid2 = None - if pid2 == 0: - os.chdir("/") - for fd in range(0, 256): - try: - os.close(fd) - except: - pass - os.open("/dev/null", os.O_RDWR) - os.dup2(0, 1) - os.dup2(0, 2) - os.execvp(prog, args) - os._exit(1) - else: - w.write(str(pid2 or 0)) - w.close() - os._exit(0) - os.close(w) - r = os.fdopen(r) - daemon_pid = int(r.read()) - r.close() - os.waitpid(pid, 0) - return daemon_pid - def spawn_vnc(display): """Spawns a vncviewer that listens on the specified display. On success, returns the port that the vncviewer is listening on and sets the global @@ -1154,7 +1126,7 @@ def spawn_vnc(display): vncargs = (["vncviewer", "-log", "*:stdout:0", "-listen", "%d" % (VNC_BASE_PORT + display) ]) global vncpid - vncpid = daemonize("vncviewer", vncargs) + vncpid = utils.daemonize("vncviewer", vncargs) if vncpid == 0: return 0 @@ -1362,6 +1334,11 @@ def main(argv): elif not opts.is_xml: dom = make_domain(opts, config) + if opts.vals.vncviewer: + domid = domain_name_to_domid(sxp.child_value(config, 'name', -1)) + vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False) + console.runVncViewer(domid, vncviewer_autopass, True) + def do_console(domain_name): cpid = os.fork() if cpid != 0: @@ -1373,13 +1350,7 @@ def do_console(domain_name): if os.WEXITSTATUS(rv) != 0: sys.exit(os.WEXITSTATUS(rv)) try: - # Acquire the console of the created dom - if serverType == SERVER_XEN_API: - domid = server.xenapi.VM.get_domid( - get_single_vm(domain_name)) - else: - dom = server.xend.domain(domain_name) - domid = int(sxp.child_value(dom, 'domid', '-1')) + domid = domain_name_to_domid(domain_name) console.execConsole(domid) except: pass diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xm/main.py Thu Aug 07 11:57:34 2008 +0900 @@ -64,6 +64,9 @@ from xen.xend import XendOptions from xen.xend import XendOptions xoptions = XendOptions.instance() +import signal +signal.signal(signal.SIGINT, signal.SIG_DFL) + # getopt.gnu_getopt is better, but only exists in Python 2.3+. Use # getopt.getopt if gnu_getopt is not available. This will mean that options # may only be specified before positional arguments. @@ -97,6 +100,8 @@ SUBCOMMAND_HELP = { 'console' : ('[-q|--quiet] <Domain>', 'Attach to <Domain>\'s console.'), + 'vncviewer' : ('[--[vncviewer-]autopass] <Domain>', + 'Attach to <Domain>\'s VNC server.'), 'create' : ('<ConfigFile> [options] [vars]', 'Create a domain based on <ConfigFile>.'), 'destroy' : ('<Domain>', @@ -243,6 +248,10 @@ SUBCOMMAND_OPTIONS = { 'console': ( ('-q', '--quiet', 'Do not print an error message if the domain does not exist'), ), + 'vncviewer': ( + ('', '--autopass', 'Pass VNC password to viewer via stdin and -autopass'), + ('', '--vncviewer-autopass', '(consistency alias for --autopass)'), + ), 'dmesg': ( ('-c', '--clear', 'Clear dmesg buffer as well as printing it'), ), @@ -260,6 +269,8 @@ SUBCOMMAND_OPTIONS = { 'start': ( ('-p', '--paused', 'Do not unpause domain after starting it'), ('-c', '--console_autoconnect', 'Connect to the console after the domain is created'), + ('', '--vncviewer', 'Connect to display via VNC after the domain is created'), + ('', '--vncviewer-autopass', 'Pass VNC password to viewer via stdin and -autopass'), ), 'resume': ( ('-p', '--paused', 'Do not unpause domain after resuming it'), @@ -277,6 +288,7 @@ SUBCOMMAND_OPTIONS = { common_commands = [ "console", + "vncviewer", "create", "new", "delete", @@ -304,6 +316,7 @@ common_commands = [ domain_commands = [ "console", + "vncviewer", "create", "new", "delete", @@ -1185,14 +1198,20 @@ def xm_start(args): paused = False console_autoconnect = False + vncviewer = False + vncviewer_autopass = False try: - (options, params) = getopt.gnu_getopt(args, 'cp', ['console_autoconnect','paused']) + (options, params) = getopt.gnu_getopt(args, 'cp', ['console_autoconnect','paused','vncviewer','vncviewer-autopass']) for (k, v) in options: if k in ('-p', '--paused'): paused = True if k in ('-c', '--console_autoconnect'): console_autoconnect = True + if k in ('--vncviewer'): + vncviewer = True + if k in ('--vncviewer-autopass'): + vncviewer_autopass = True if len(params) != 1: raise OptionError("Expects 1 argument") @@ -1204,6 +1223,9 @@ def xm_start(args): if console_autoconnect: start_do_console(dom) + + if console_autoconnect: + console.runVncViewer(domid, vncviewer_autopass, True) try: if serverType == SERVER_XEN_API: @@ -1783,6 +1805,40 @@ def xm_console(args): console.execConsole(domid) +def domain_name_to_domid(domain_name): + if serverType == SERVER_XEN_API: + domid = server.xenapi.VM.get_domid( + get_single_vm(domain_name)) + else: + dom = server.xend.domain(domain_name) + domid = int(sxp.child_value(dom, 'domid', '-1')) + return domid + +def xm_vncviewer(args): + autopass = False; + + try: + (options, params) = getopt.gnu_getopt(args, '', ['autopass','vncviewer-autopass']) + except getopt.GetoptError, opterr: + err(opterr) + usage('vncviewer') + + for (k, v) in options: + if k in ['--autopass','--vncviewer-autopass']: + autopass = True + else: + assert False + + if len(params) != 1: + err('No domain given (or several parameters specified)') + usage('vncviewer') + + dom = params[0] + domid = domain_name_to_domid(dom) + + console.runVncViewer(domid, autopass) + + def xm_uptime(args): short_mode = 0 @@ -2102,7 +2158,23 @@ def xm_pci_list(args): dom = params[0] - devs = server.xend.domain.getDeviceSxprs(dom, 'pci') + devs = [] + if serverType == SERVER_XEN_API: + for dpci_ref in server.xenapi.VM.get_DPCIs(get_single_vm(dom)): + ppci_ref = server.xenapi.DPCI.get_PPCI(dpci_ref) + ppci_record = server.xenapi.PPCI.get_record(ppci_ref) + dev = { + "domain": "0x%04x" % int(ppci_record["domain"]), + "bus": "0x%02x" % int(ppci_record["bus"]), + "slot": "0x%02x" % int(ppci_record["slot"]), + "func": "0x%01x" % int(ppci_record["func"]), + "vslt": "0x%02x" % \ + int(server.xenapi.DPCI.get_hotplug_slot(dpci_ref)) + } + devs.append(dev) + + else: + devs = server.xend.domain.getDeviceSxprs(dom, 'pci') if len(devs) == 0: return @@ -2362,7 +2434,34 @@ def xm_pci_attach(args): def xm_pci_attach(args): arg_check(args, 'pci-attach', 2, 3) (dom, pci) = parse_pci_configuration(args, 'Initialising') - server.xend.domain.device_configure(dom, pci) + + if serverType == SERVER_XEN_API: + + pci_dev = sxp.children(pci, 'dev')[0] + domain = int(sxp.child_value(pci_dev, 'domain'), 16) + bus = int(sxp.child_value(pci_dev, 'bus'), 16) + slot = int(sxp.child_value(pci_dev, 'slot'), 16) + func = int(sxp.child_value(pci_dev, 'func'), 16) + vslt = int(sxp.child_value(pci_dev, 'vslt'), 16) + name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func) + + target_ref = None + for ppci_ref in server.xenapi.PPCI.get_all(): + if name == server.xenapi.PPCI.get_name(ppci_ref): + target_ref = ppci_ref + break + if target_ref is None: + raise OptionError("Device %s not found" % name) + + dpci_record = { + "VM": get_single_vm(dom), + "PPCI": target_ref, + "hotplug_slot": vslt + } + server.xenapi.DPCI.create(dpci_record) + + else: + server.xend.domain.device_configure(dom, pci) def xm_scsi_attach(args): xenapi_unsupported() @@ -2462,7 +2561,29 @@ def xm_pci_detach(args): def xm_pci_detach(args): arg_check(args, 'pci-detach', 2) (dom, pci) = parse_pci_configuration(args, 'Closing') - server.xend.domain.device_configure(dom, pci) + + if serverType == SERVER_XEN_API: + + pci_dev = sxp.children(pci, 'dev')[0] + domain = int(sxp.child_value(pci_dev, 'domain'), 16) + bus = int(sxp.child_value(pci_dev, 'bus'), 16) + slot = int(sxp.child_value(pci_dev, 'slot'), 16) + func = int(sxp.child_value(pci_dev, 'func'), 16) + vslt = int(sxp.child_value(pci_dev, 'vslt'), 16) + name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func) + + target_ref = None + for dpci_ref in server.xenapi.VM.get_DPCIs(get_single_vm(dom)): + ppci_ref = server.xenapi.DPCI.get_PPCI(dpci_ref) + if name == server.xenapi.PPCI.get_name(ppci_ref): + target_ref = ppci_ref + server.xenapi.DPCI.destroy(dpci_ref) + break + if target_ref is None: + raise OptionError("Device %s not assigned" % name) + + else: + server.xend.domain.device_configure(dom, pci) def xm_scsi_detach(args): xenapi_unsupported() @@ -2617,6 +2738,7 @@ commands = { "event-monitor": xm_event_monitor, # console commands "console": xm_console, + "vncviewer": xm_vncviewer, # xenstat commands "top": xm_top, # domain commands diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/shutdown.py --- a/tools/python/xen/xm/shutdown.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xm/shutdown.py Thu Aug 07 11:57:34 2008 +0900 @@ -144,9 +144,10 @@ def main_dom(opts, args): def main_dom(opts, args): if len(args) == 0: opts.err('No domain parameter given') if len(args) > 1: opts.err('No multiple domain parameters allowed') - dom = sxp.child_value(server.xend.domain(args[0]), 'name') if serverType == SERVER_XEN_API: - dom = get_single_vm(dom) + dom = get_single_vm(args[0]) + else: + dom = sxp.child_value(server.xend.domain(args[0]), 'name') mode = shutdown_mode(opts) shutdown(opts, [ dom ], mode, opts.vals.wait) diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/xenapi_create.py --- a/tools/python/xen/xm/xenapi_create.py Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/python/xen/xm/xenapi_create.py Thu Aug 07 11:57:34 2008 +0900 @@ -369,6 +369,12 @@ class xenapi_create: self.create_consoles(vm_ref, consoles) + # Now create pcis + + pcis = vm.getElementsByTagName("pci") + + self.create_pcis(vm_ref, pcis) + return vm_ref except: server.xenapi.VM.destroy(vm_ref) @@ -389,7 +395,7 @@ class xenapi_create: "device": vbd.attributes["device"].value, "bootable": - vbd.attributes["bootable"].value == "True", + vbd.attributes["bootable"].value == "1", "mode": vbd.attributes["mode"].value, "type": @@ -493,6 +499,39 @@ class xenapi_create: return server.xenapi.console.create(console_record) + def create_pcis(self, vm_ref, pcis): + log(DEBUG, "create_pcis") + return map(lambda pci: self.create_pci(vm_ref, pci), pcis) + + def create_pci(self, vm_ref, pci): + log(DEBUG, "create_pci") + + domain = int(pci.attributes["domain"].value, 16) + bus = int(pci.attributes["bus"].value, 16) + slot = int(pci.attributes["slot"].value, 16) + func = int(pci.attributes["func"].value, 16) + name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func) + + target_ref = None + for ppci_ref in server.xenapi.PPCI.get_all(): + if name == server.xenapi.PPCI.get_name(ppci_ref): + target_ref = ppci_ref + break + if target_ref is None: + log(DEBUG, "create_pci: pci device not found") + return None + + dpci_record = { + "VM": + vm_ref, + "PPCI": + target_ref, + "hotplug_slot": + int(pci.attributes["func"].value, 16) + } + + return server.xenapi.DPCI.create(dpci_record) + def get_child_by_name(exp, childname, default = None): try: return [child for child in sxp.children(exp) @@ -520,6 +559,9 @@ class sxp2xml: vfbs_sxp = map(lambda x: x[1], [device for device in devices if device[1][0] == "vfb"]) + + pcis_sxp = map(lambda x: x[1], [device for device in devices + if device[1][0] == "pci"]) # Create XML Document @@ -597,13 +639,15 @@ class sxp2xml: pv = document.createElement("pv") pv.attributes["kernel"] \ = get_child_by_name(image, "kernel", "") - pv.attributes["bootloader"] = "" + pv.attributes["bootloader"] \ + = get_child_by_name(config, "bootloader", "") pv.attributes["ramdisk"] \ = get_child_by_name(image, "ramdisk", "") pv.attributes["args"] \ = "root=" + get_child_by_name(image, "root", "") \ + " " + get_child_by_name(image, "args", "") - pv.attributes["bootloader_args"] = "" + pv.attributes["bootloader_args"] \ + = get_child_by_name(config, "bootloader_args","") vm.appendChild(pv) elif image[0] == "hvm": @@ -653,6 +697,12 @@ class sxp2xml: vtpms = map(lambda vtpm: self.extract_vtpm(vtpm, document), vtpms_sxp) map(vm.appendChild, vtpms) + + # And now the pcis + + pcis = self.extract_pcis(pcis_sxp, document) + + map(vm.appendChild, pcis) # Last but not least the consoles... @@ -821,7 +871,28 @@ class sxp2xml: return vfb - _eths = -1 + def extract_pcis(self, pcis_sxp, document): + + pcis = [] + + for pci_sxp in pcis_sxp: + for dev_sxp in sxp.children(pci_sxp, "dev"): + pci = document.createElement("pci") + + pci.attributes["domain"] \ + = get_child_by_name(dev_sxp, "domain", "0") + pci.attributes["bus"] \ + = get_child_by_name(dev_sxp, "bus", "0") + pci.attributes["slot"] \ + = get_child_by_name(dev_sxp, "slot", "0") + pci.attributes["func"] \ + = get_child_by_name(dev_sxp, "func", "0") + pci.attributes["vslt"] \ + = get_child_by_name(dev_sxp, "vslt", "0") + + pcis.append(pci) + + return pcis def mk_other_config(self, key, value, document): other_config = document.createElement("other_config") @@ -914,6 +985,8 @@ class sxp2xml: return platform_configs + _eths = -1 + def getFreshEthDevice(self): self._eths += 1 return "eth%i" % self._eths diff -r 7affdebb7a1e -r a39913db6e51 tools/xenmon/Makefile --- a/tools/xenmon/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/xenmon/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -33,6 +33,8 @@ install: build $(INSTALL_PROG) xenbaked $(DESTDIR)$(SBINDIR)/xenbaked $(INSTALL_PROG) xentrace_setmask $(DESTDIR)$(SBINDIR)/xentrace_setmask $(INSTALL_PROG) xenmon.py $(DESTDIR)$(SBINDIR)/xenmon.py + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR) + $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.xenmon .PHONY: clean clean: diff -r 7affdebb7a1e -r a39913db6e51 tools/xenstat/libxenstat/Makefile --- a/tools/xenstat/libxenstat/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/xenstat/libxenstat/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -15,7 +15,7 @@ XEN_ROOT=../../.. XEN_ROOT=../../.. include $(XEN_ROOT)/tools/Rules.mk -prefix=/usr +prefix=$(PREFIX) includedir=$(prefix)/include libdir=$(prefix)/lib diff -r 7affdebb7a1e -r a39913db6e51 tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Thu Aug 07 11:47:34 2008 +0900 +++ b/tools/xenstat/libxenstat/src/xenstat.c Thu Aug 07 11:57:34 2008 +0900 @@ -109,7 +109,7 @@ xenstat_handle *xenstat_init(void) handle->xshandle = xs_daemon_open_readonly(); /* open handle to xenstore*/ if (handle->xshandle == NULL) { - perror("unable to open xenstore\n"); + perror("unable to open xenstore"); xc_interface_close(handle->xc_handle); free(handle); return NULL; diff -r 7affdebb7a1e -r a39913db6e51 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Thu Aug 07 11:47:34 2008 +0900 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Thu Aug 07 11:57:34 2008 +0900 @@ -71,7 +71,7 @@ static int bp_suspend(void) return suspend_cancelled; } -int __xen_suspend(int fast_suspend, void (*resume_notifier)(void)) +int __xen_suspend(int fast_suspend, void (*resume_notifier)(int)) { int err, suspend_cancelled, nr_cpus; struct ap_suspend_info info; @@ -101,7 +101,7 @@ int __xen_suspend(int fast_suspend, void local_irq_disable(); suspend_cancelled = bp_suspend(); - resume_notifier(); + resume_notifier(suspend_cancelled); local_irq_enable(); smp_mb(); diff -r 7affdebb7a1e -r a39913db6e51 xen/Makefile --- a/xen/Makefile Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/Makefile Thu Aug 07 11:57:34 2008 +0900 @@ -1,8 +1,8 @@ # This is the correct place to edit the build version. # All other places this is stored (eg. compile.h) should be autogenerated. -export XEN_VERSION = 3 -export XEN_SUBVERSION = 3 -export XEN_EXTRAVERSION ?= -unstable$(XEN_VENDORVERSION) +export XEN_VERSION = 4 +export XEN_SUBVERSION = 0 +export XEN_EXTRAVERSION ?= .0-rc3-pre$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/ia64/xen/domain.c Thu Aug 07 11:57:34 2008 +0900 @@ -2212,8 +2212,9 @@ int __init construct_dom0(struct domain return 0; } -void machine_restart(void) -{ +void machine_restart(unsigned int delay_millisecs) +{ + mdelay(delay_millisecs); console_start_sync(); if (running_on_sim) printk ("machine_restart called. spinning...\n"); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/ia64/xen/irq.c --- a/xen/arch/ia64/xen/irq.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/ia64/xen/irq.c Thu Aug 07 11:57:34 2008 +0900 @@ -459,7 +459,7 @@ int pirq_guest_bind(struct vcpu *v, int return rc; } -int pirq_guest_unbind(struct domain *d, int irq) +void pirq_guest_unbind(struct domain *d, int irq) { irq_desc_t *desc = &irq_desc[irq]; irq_guest_action_t *action; @@ -493,7 +493,6 @@ int pirq_guest_unbind(struct domain *d, } spin_unlock_irqrestore(&desc->lock, flags); - return 0; } void diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/acpi/cpufreq/utility.c --- a/xen/arch/x86/acpi/cpufreq/utility.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/acpi/cpufreq/utility.c Thu Aug 07 11:57:34 2008 +0900 @@ -296,12 +296,11 @@ void cpufreq_suspend(void) { int cpu; - /* to protect the case when Px was controlled by dom0-kernel */ - /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */ + /* to protect the case when Px was not controlled by xen */ for_each_online_cpu(cpu) { struct processor_performance *perf = &processor_pminfo[cpu].perf; - if (!perf->init) + if (!(perf->init & XEN_PX_INIT)) return; } @@ -316,14 +315,13 @@ int cpufreq_resume(void) { int cpu, ret = 0; - /* 1. to protect the case when Px was controlled by dom0-kernel */ - /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */ + /* 1. to protect the case when Px was not controlled by xen */ /* 2. set state and resume flag to sync cpu to right state and freq */ for_each_online_cpu(cpu) { struct processor_performance *perf = &processor_pminfo[cpu].perf; struct cpufreq_policy *policy = &xen_px_policy[cpu]; - if (!perf->init) + if (!(perf->init & XEN_PX_INIT)) goto err; perf->state = 0; policy->resume = 1; diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/acpi/pmstat.c --- a/xen/arch/x86/acpi/pmstat.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/acpi/pmstat.c Thu Aug 07 11:57:34 2008 +0900 @@ -52,9 +52,9 @@ int do_get_pm_info(struct xen_sysctl_get struct pm_px *pxpt = &px_statistic_data[op->cpuid]; struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid]; - /* to protect the case when Px was controlled by dom0-kernel */ - /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */ - if ( !pmpt->perf.init && (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX ) + /* to protect the case when Px was not controlled by xen */ + if ( (!(pmpt->perf.init & XEN_PX_INIT)) && + (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX ) return -EINVAL; if ( !cpu_online(op->cpuid) ) diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/domain.c Thu Aug 07 11:57:34 2008 +0900 @@ -1811,7 +1811,6 @@ int domain_relinquish_resources(struct d if ( ret ) return ret; #endif - WARN_ON(d->xenheap_pages); break; default: diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/domain_build.c Thu Aug 07 11:57:34 2008 +0900 @@ -757,6 +757,7 @@ int __init construct_dom0( si->shared_info = virt_to_maddr(d->shared_info); si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; + si->flags |= (xen_processor_pmbits << 8) & SIF_PM_MASK; si->pt_base = vpt_start + 2 * PAGE_SIZE * !!is_pv_32on64_domain(d); si->nr_pt_frames = nr_pt_pages; si->mfn_list = vphysmap_start; diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/domctl.c Thu Aug 07 11:57:34 2008 +0900 @@ -661,6 +661,7 @@ long arch_do_domctl( if ( !iommu_pv_enabled && !is_hvm_domain(d) ) { ret = -ENOSYS; + put_domain(d); break; } @@ -669,12 +670,16 @@ long arch_do_domctl( gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: " "%x:%x:%x already assigned, or non-existent\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + put_domain(d); break; } ret = assign_device(d, bus, devfn); - gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + if ( ret ) + gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: " + "assign device (%x:%x:%x) failed\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + put_domain(d); } break; @@ -701,11 +706,15 @@ long arch_do_domctl( if ( !iommu_pv_enabled && !is_hvm_domain(d) ) { ret = -ENOSYS; + put_domain(d); break; } if ( !device_assigned(bus, devfn) ) - break; + { + put_domain(d); + break; + } ret = 0; deassign_device(d, bus, devfn); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/hvm/io.c Thu Aug 07 11:57:34 2008 +0900 @@ -284,7 +284,7 @@ void dpci_ioport_write(uint32_t mport, i data = p->data; if ( p->data_is_ptr ) (void)hvm_copy_from_guest_phys( - &data, p->data + (sign * i & p->size), p->size); + &data, p->data + (sign * i * p->size), p->size); switch ( p->size ) { diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/stdvga.c --- a/xen/arch/x86/hvm/stdvga.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/hvm/stdvga.c Thu Aug 07 11:57:34 2008 +0900 @@ -33,6 +33,10 @@ #include <xen/domain_page.h> #include <asm/hvm/support.h> #include <xen/numa.h> +#include <xen/paging.h> + +#define VGA_MEM_BASE 0xa0000 +#define VGA_MEM_SIZE 0x20000 #define PAT(x) (x) static const uint32_t mask16[16] = { @@ -464,6 +468,7 @@ static int mmio_move(struct hvm_hw_stdvg { int i; int sign = p->df ? -1 : 1; + p2m_type_t p2mt; if ( p->data_is_ptr ) { @@ -473,7 +478,19 @@ static int mmio_move(struct hvm_hw_stdvg for ( i = 0; i < p->count; i++ ) { tmp = stdvga_mem_read(addr, p->size); - hvm_copy_to_guest_phys(data, &tmp, p->size); + if ( hvm_copy_to_guest_phys(data, &tmp, p->size) == + HVMCOPY_bad_gfn_to_mfn ) + { + (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt); + /* + * The only case we handle is vga_mem <-> vga_mem. + * Anything else disables caching and leaves it to qemu-dm. + */ + if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) || + ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) ) + return 0; + stdvga_mem_write(data, tmp, p->size); + } data += sign * p->size; addr += sign * p->size; } @@ -483,7 +500,15 @@ static int mmio_move(struct hvm_hw_stdvg uint32_t addr = p->addr, data = p->data, tmp; for ( i = 0; i < p->count; i++ ) { - hvm_copy_from_guest_phys(&tmp, data, p->size); + if ( hvm_copy_from_guest_phys(&tmp, data, p->size) == + HVMCOPY_bad_gfn_to_mfn ) + { + (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt); + if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) || + ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) ) + return 0; + tmp = stdvga_mem_read(data, p->size); + } stdvga_mem_write(addr, tmp, p->size); data += sign * p->size; addr += sign * p->size; @@ -536,7 +561,8 @@ static int stdvga_intercept_mmio(ioreq_t { case IOREQ_TYPE_COPY: buf = mmio_move(s, p); - break; + if ( buf ) + break; default: gdprintk(XENLOG_WARNING, "unsupported mmio request type:%d " "addr:0x%04x data:0x%04x size:%d count:%d state:%d " @@ -588,7 +614,7 @@ void stdvga_init(struct domain *d) register_portio_handler(d, 0x3ce, 2, stdvga_intercept_pio); /* MMIO. */ register_buffered_io_handler( - d, 0xa0000, 0x20000, stdvga_intercept_mmio); + d, VGA_MEM_BASE, VGA_MEM_SIZE, stdvga_intercept_mmio); } } diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Aug 07 11:57:34 2008 +0900 @@ -874,9 +874,12 @@ static void svm_do_nested_pgfault(paddr_ mfn_t mfn; unsigned long gfn = gpa >> PAGE_SHIFT; - /* If this GFN is emulated MMIO, pass the fault to the mmio handler */ + /* + * If this GFN is emulated MMIO or marked as read-only, pass the fault + * to the mmio handler. + */ mfn = gfn_to_mfn_current(gfn, &p2mt); - if ( p2mt == p2m_mmio_dm ) + if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) ) { if ( !handle_mmio() ) hvm_inject_exception(TRAP_gp_fault, 0, 0); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 07 11:57:34 2008 +0900 @@ -1971,7 +1971,7 @@ static void ept_handle_violation(unsigne } mfn = gfn_to_mfn(d, gfn, &t); - if ( p2m_is_ram(t) && paging_mode_log_dirty(d) ) + if ( (t != p2m_ram_ro) && p2m_is_ram(t) && paging_mode_log_dirty(d) ) { paging_mark_dirty(d, mfn_x(mfn)); p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/io_apic.c Thu Aug 07 11:57:34 2008 +0900 @@ -45,12 +45,12 @@ int (*ioapic_renumber_irq)(int ioapic, i int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; -int msi_irq_enable = 0; -boolean_param("msi_irq_enable", msi_irq_enable); +int msi_enable = 0; +boolean_param("msi", msi_enable); int domain_irq_to_vector(struct domain *d, int irq) { - if ( !msi_irq_enable ) + if ( !msi_enable ) return irq_to_vector(irq); else return d->arch.pirq_vector[irq]; @@ -58,7 +58,7 @@ int domain_irq_to_vector(struct domain * int domain_vector_to_irq(struct domain *d, int vector) { - if ( !msi_irq_enable ) + if ( !msi_enable ) return vector_to_irq(vector); else return d->arch.vector_pirq[vector]; diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/irq.c Thu Aug 07 11:57:34 2008 +0900 @@ -573,7 +573,7 @@ int pirq_guest_bind(struct vcpu *v, int return rc; } -int pirq_guest_unbind(struct domain *d, int irq) +void pirq_guest_unbind(struct domain *d, int irq) { unsigned int vector; irq_desc_t *desc; @@ -660,7 +660,6 @@ int pirq_guest_unbind(struct domain *d, out: spin_unlock_irqrestore(&desc->lock, flags); - return 0; } extern void dump_ioapic_irq_info(void); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/mm.c Thu Aug 07 11:57:34 2008 +0900 @@ -1138,8 +1138,10 @@ static int alloc_l2_table(struct page_in for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { - if ( is_guest_l2_slot(d, type, i) && - unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) ) + if ( !is_guest_l2_slot(d, type, i) ) + continue; + + if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) ) goto fail; adjust_guest_l2e(pl2e[i], d); @@ -1206,8 +1208,9 @@ static int alloc_l3_table(struct page_in d) ) goto fail; } - else if ( is_guest_l3_slot(i) && - unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) + else if ( !is_guest_l3_slot(i) ) + continue; + else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) goto fail; adjust_guest_l3e(pl3e[i], d); @@ -1222,8 +1225,12 @@ static int alloc_l3_table(struct page_in fail: MEM_LOG("Failure in alloc_l3_table: entry %d", i); while ( i-- > 0 ) - if ( is_guest_l3_slot(i) ) - put_page_from_l3e(pl3e[i], pfn); + { + if ( !is_guest_l3_slot(i) ) + continue; + unadjust_guest_l3e(pl3e[i], d); + put_page_from_l3e(pl3e[i], pfn); + } unmap_domain_page(pl3e); return 0; @@ -1242,8 +1249,10 @@ static int alloc_l4_table(struct page_in for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) { - if ( is_guest_l4_slot(d, i) && - unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) ) + if ( !is_guest_l4_slot(d, i) ) + continue; + + if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) ) goto fail; adjust_guest_l4e(pl4e[i], d); @@ -1585,7 +1594,7 @@ static int mod_l3_entry(l3_pgentry_t *pl struct vcpu *curr = current; struct domain *d = curr->domain; struct page_info *l3pg = mfn_to_page(pfn); - int okay, rc = 1; + int rc = 1; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) { @@ -1642,10 +1651,13 @@ static int mod_l3_entry(l3_pgentry_t *pl return 0; } - okay = create_pae_xen_mappings(d, pl3e); - BUG_ON(!okay); - - pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); + if ( likely(rc) ) + { + if ( !create_pae_xen_mappings(d, pl3e) ) + BUG(); + + pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); + } page_unlock(l3pg); put_page_from_l3e(ol3e, pfn); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Aug 07 11:57:34 2008 +0900 @@ -3359,7 +3359,7 @@ static int sh_page_fault(struct vcpu *v, gdprintk(XENLOG_DEBUG, "guest attempted write to read-only memory" " page. va page=%#lx, mfn=%#lx\n", va & PAGE_MASK, mfn_x(gmfn)); - goto emulate; /* skip over the instruction */ + goto emulate_readonly; /* skip over the instruction */ } /* In HVM guests, we force CR0.WP always to be set, so that the @@ -3404,6 +3404,11 @@ static int sh_page_fault(struct vcpu *v, goto done; } + /* + * Write from userspace to ro-mem needs to jump here to avoid getting + * caught by user-mode page-table check above. + */ + emulate_readonly: /* * We don't need to hold the lock for the whole emulation; we will * take it again when we write to the pagetables. @@ -4640,14 +4645,8 @@ static void *emulate_map_dest(struct vcp u32 bytes, struct sh_emulate_ctxt *sh_ctxt) { - struct segment_register *sreg; unsigned long offset; void *map = NULL; - - /* We don't emulate user-mode writes to page tables */ - sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt); - if ( sreg->attr.fields.dpl == 3 ) - return MAPPING_UNHANDLEABLE; sh_ctxt->mfn1 = emulate_gva_to_mfn(v, vaddr, sh_ctxt); if ( !mfn_valid(sh_ctxt->mfn1) ) @@ -4656,6 +4655,16 @@ static void *emulate_map_dest(struct vcp (mfn_x(sh_ctxt->mfn1) == READONLY_GFN) ? MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE); +#ifndef NDEBUG + /* We don't emulate user-mode writes to page tables */ + if ( hvm_get_seg_reg(x86_seg_ss, sh_ctxt)->attr.fields.dpl == 3 ) + { + gdprintk(XENLOG_DEBUG, "User-mode write to pagetable reached " + "emulate_map_dest(). This should never happen!\n"); + return MAPPING_UNHANDLEABLE; + } +#endif + /* Unaligned writes mean probably this isn't a pagetable */ if ( vaddr & (bytes - 1) ) sh_remove_shadows(v, sh_ctxt->mfn1, 0, 0 /* Slow, can fail */ ); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/msi.c Thu Aug 07 11:57:34 2008 +0900 @@ -27,8 +27,6 @@ #include <public/physdev.h> #include <xen/iommu.h> -extern int msi_irq_enable; - /* bitmap indicate which fixed map is free */ DEFINE_SPINLOCK(msix_fixmap_lock); DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES); @@ -763,14 +761,13 @@ retry: { desc = &irq_desc[entry->vector]; - local_irq_save(flags); - if ( !spin_trylock(&desc->lock) ) - { - local_irq_restore(flags); - goto retry; - } - - spin_lock_irqsave(&desc->lock, flags); + local_irq_save(flags); + if ( !spin_trylock(&desc->lock) ) + { + local_irq_restore(flags); + goto retry; + } + if ( desc->handler == &pci_msi_type ) { /* MSI is not shared, so should be released already */ diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/numa.c --- a/xen/arch/x86/numa.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/numa.c Thu Aug 07 11:57:34 2008 +0900 @@ -14,6 +14,7 @@ #include <xen/time.h> #include <xen/smp.h> #include <asm/acpi.h> +#include <xen/sched.h> static int numa_setup(char *s); custom_param("numa", numa_setup); @@ -281,6 +282,9 @@ static void dump_numa(unsigned char key) { s_time_t now = NOW(); int i; + struct domain *d; + struct page_info *page; + unsigned int page_num_node[MAX_NUMNODES]; printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key, (u32)(now>>32), (u32)now); @@ -297,6 +301,28 @@ static void dump_numa(unsigned char key) } for_each_online_cpu(i) printk("CPU%d -> NODE%d\n", i, cpu_to_node[i]); + + rcu_read_lock(&domlist_read_lock); + + printk("Memory location of each domain:\n"); + for_each_domain(d) + { + printk("Domain %u (total: %u):\n", d->domain_id, d->tot_pages); + + for_each_online_node(i) + page_num_node[i] = 0; + + list_for_each_entry(page, &d->page_list, list) + { + i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT); + page_num_node[i]++; + } + + for_each_online_node(i) + printk(" Node %u: %u\n", i, page_num_node[i]); + } + + rcu_read_unlock(&domlist_read_lock); } static __init int register_numa_trigger(void) diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/physdev.c Thu Aug 07 11:57:34 2008 +0900 @@ -184,15 +184,14 @@ static int unmap_domain_pirq(struct doma return ret; } -extern int msi_irq_enable; static int physdev_map_pirq(struct physdev_map_pirq *map) { struct domain *d; int vector, pirq, ret = 0; unsigned long flags; - /* if msi_irq_enable is not enabled,map always success */ - if ( !msi_irq_enable ) + /* if msi_enable is not enabled, map always succeeds */ + if ( !msi_enable ) return 0; if ( !IS_PRIV(current->domain) ) @@ -304,7 +303,7 @@ static int physdev_unmap_pirq(struct phy unsigned long flags; int ret; - if ( !msi_irq_enable ) + if ( !msi_enable ) return 0; if ( !IS_PRIV(current->domain) ) @@ -455,7 +454,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H ret = 0; - if ( msi_irq_enable ) + if ( msi_enable ) { spin_lock_irqsave(&dom0->arch.irq_lock, flags); if ( irq != AUTO_ASSIGN ) diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/platform_hypercall.c --- a/xen/arch/x86/platform_hypercall.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/platform_hypercall.c Thu Aug 07 11:57:34 2008 +0900 @@ -355,6 +355,11 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe struct processor_pminfo *pmpt; struct processor_performance *pxpt; + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) ) + { + ret = -ENOSYS; + break; + } if ( cpuid < 0 ) { ret = -EINVAL; @@ -373,6 +378,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe memcpy ((void *)&pxpt->status_register, (void *)&xenpxpt->status_register, sizeof(struct xen_pct_register)); + pxpt->init |= XEN_PX_PCT; } if ( xenpxpt->flags & XEN_PX_PSS ) { @@ -390,6 +396,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe break; } pxpt->state_count = xenpxpt->state_count; + pxpt->init |= XEN_PX_PSS; } if ( xenpxpt->flags & XEN_PX_PSD ) { @@ -397,14 +404,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe memcpy ((void *)&pxpt->domain_info, (void *)&xenpxpt->domain_info, sizeof(struct xen_psd_package)); + pxpt->init |= XEN_PX_PSD; } if ( xenpxpt->flags & XEN_PX_PPC ) + { pxpt->ppc = xenpxpt->ppc; - - if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS | - XEN_PX_PSD | XEN_PX_PPC ) ) - { - pxpt->init =1; + pxpt->init |= XEN_PX_PPC; + } + + if ( pxpt->init == ( XEN_PX_PCT | XEN_PX_PSS | + XEN_PX_PSD | XEN_PX_PPC ) ) + { + pxpt->init |= XEN_PX_INIT; cpu_count++; } if ( cpu_count == num_online_cpus() ) @@ -418,10 +429,20 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe } case XEN_PM_CX: + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) ) + { + ret = -ENOSYS; + break; + } ret = set_cx_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.power); break; case XEN_PM_TX: + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_TX) ) + { + ret = -ENOSYS; + break; + } ret = -EINVAL; break; diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/setup.c Thu Aug 07 11:57:34 2008 +0900 @@ -997,7 +997,6 @@ void __init __start_xen(unsigned long mb if ( (cmdline != NULL) || (kextra != NULL) ) { static char dom0_cmdline[MAX_GUEST_CMDLINE]; - char xen_pm_param[32]; cmdline = cmdline_cook(cmdline); safe_strcpy(dom0_cmdline, cmdline); @@ -1022,14 +1021,6 @@ void __init __start_xen(unsigned long mb safe_strcat(dom0_cmdline, " acpi="); safe_strcat(dom0_cmdline, acpi_param); } - if ( xen_cpuidle ) - xen_processor_pmbits |= XEN_PROCESSOR_PM_CX; - - snprintf(xen_pm_param, sizeof(xen_pm_param), - " xen_processor_pmbits=%d", xen_processor_pmbits); - - if ( !strstr(dom0_cmdline, "xen_processor_pmbits=") ) - safe_strcat(dom0_cmdline, xen_pm_param); cmdline = dom0_cmdline; } @@ -1040,6 +1031,9 @@ void __init __start_xen(unsigned long mb (mod[initrdidx].mod_start - mod[0].mod_start); _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start; } + + if ( xen_cpuidle ) + xen_processor_pmbits |= XEN_PROCESSOR_PM_CX; /* * We're going to setup domain0 using the module(s) that we stashed safely diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/shutdown.c --- a/xen/arch/x86/shutdown.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/shutdown.c Thu Aug 07 11:57:34 2008 +0900 @@ -291,7 +291,12 @@ __initcall(reboot_init); #endif -void machine_restart(void) +static void __machine_restart(void *pdelay) +{ + machine_restart(*(unsigned int *)pdelay); +} + +void machine_restart(unsigned int delay_millisecs) { int i; @@ -304,13 +309,15 @@ void machine_restart(void) if ( get_apic_id() != boot_cpu_physical_apicid ) { /* Send IPI to the boot CPU (logical cpu 0). */ - on_selected_cpus(cpumask_of_cpu(0), (void *)machine_restart, - NULL, 1, 0); + on_selected_cpus(cpumask_of_cpu(0), __machine_restart, + &delay_millisecs, 1, 0); for ( ; ; ) halt(); } smp_send_stop(); + + mdelay(delay_millisecs); if ( tboot_in_measured_env() ) tboot_shutdown(TB_SHUTDOWN_REBOOT); diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/time.c Thu Aug 07 11:57:34 2008 +0900 @@ -35,8 +35,6 @@ static char opt_clocksource[10]; static char opt_clocksource[10]; string_param("clocksource", opt_clocksource); -#define EPOCH MILLISECS(1000) - unsigned long cpu_khz; /* CPU clock frequency in kHz. */ DEFINE_SPINLOCK(rtc_lock); unsigned long pit0_ticks; @@ -55,7 +53,6 @@ struct cpu_time { s_time_t stime_master_stamp; struct time_scale tsc_scale; u64 cstate_plt_count_stamp; - struct timer calibration_timer; }; struct platform_timesource { @@ -66,6 +63,10 @@ struct platform_timesource { }; static DEFINE_PER_CPU(struct cpu_time, cpu_time); + +/* Calibrate all CPUs to platform timer every EPOCH. */ +#define EPOCH MILLISECS(1000) +static struct timer calibration_timer; /* TSC is invariant on C state entry? */ static bool_t tsc_invariant; @@ -481,35 +482,6 @@ static int init_pmtimer(struct platform_ } /************************************************************ - * PLATFORM TIMER 5: TSC - */ - -static const char plt_tsc_name[] = "TSC"; -#define platform_timer_is_tsc() (plt_src.name == plt_tsc_name) - -static int init_tsctimer(struct platform_timesource *pts) -{ - if ( !tsc_invariant ) - return 0; - - pts->name = (char *)plt_tsc_name; - return 1; -} - -static void make_tsctimer_record(void) -{ - struct cpu_time *t = &this_cpu(cpu_time); - s_time_t now; - u64 tsc; - - rdtscll(tsc); - now = scale_delta(tsc, &t->tsc_scale); - - t->local_tsc_stamp = tsc; - t->stime_local_stamp = t->stime_master_stamp = now; -} - -/************************************************************ * GENERIC PLATFORM TIMER INFRASTRUCTURE */ @@ -530,11 +502,11 @@ static void plt_overflow(void *unused) { u64 count; - spin_lock(&platform_timer_lock); + spin_lock_irq(&platform_timer_lock); count = plt_src.read_counter(); plt_stamp64 += (count - plt_stamp) & plt_mask; plt_stamp = count; - spin_unlock(&platform_timer_lock); + spin_unlock_irq(&platform_timer_lock); set_timer(&plt_overflow_timer, NOW() + plt_overflow_period); } @@ -550,6 +522,8 @@ static s_time_t read_platform_stime(void { u64 count; s_time_t stime; + + ASSERT(!local_irq_is_enabled()); spin_lock(&platform_timer_lock); count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask); @@ -564,22 +538,16 @@ static void platform_time_calibration(vo u64 count; s_time_t stamp; - spin_lock(&platform_timer_lock); + spin_lock_irq(&platform_timer_lock); count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask); stamp = __read_platform_stime(count); stime_platform_stamp = stamp; platform_timer_stamp = count; - spin_unlock(&platform_timer_lock); + spin_unlock_irq(&platform_timer_lock); } static void resume_platform_timer(void) { - if ( platform_timer_is_tsc() ) - { - /* TODO: Save/restore TSC values. */ - return; - } - /* No change in platform_stime across suspend/resume. */ platform_timer_stamp = plt_stamp64; plt_stamp = plt_src.read_counter(); @@ -600,8 +568,6 @@ static void init_platform_timer(void) rc = init_cyclone(pts); else if ( !strcmp(opt_clocksource, "acpi") ) rc = init_pmtimer(pts); - else if ( !strcmp(opt_clocksource, "tsc") ) - rc = init_tsctimer(pts); if ( rc <= 0 ) printk("WARNING: %s clocksource '%s'.\n", @@ -614,12 +580,6 @@ static void init_platform_timer(void) !init_hpet(pts) && !init_pmtimer(pts) ) init_pit(pts); - - if ( platform_timer_is_tsc() ) - { - printk("Platform timer is TSC\n"); - return; - } plt_mask = (u64)~0ull >> (64 - pts->counter_bits); @@ -823,10 +783,6 @@ int cpu_frequency_change(u64 freq) struct cpu_time *t = &this_cpu(cpu_time); u64 curr_tsc; - /* Nothing to do if TSC is platform timer. Assume it is constant-rate. */ - if ( platform_timer_is_tsc() ) - return 0; - /* Sanity check: CPU frequency allegedly dropping below 1MHz? */ if ( freq < 1000000u ) { @@ -847,9 +803,11 @@ int cpu_frequency_change(u64 freq) local_irq_enable(); /* A full epoch should pass before we check for deviation. */ - set_timer(&t->calibration_timer, NOW() + EPOCH); if ( smp_processor_id() == 0 ) + { + set_timer(&calibration_timer, NOW() + EPOCH); platform_time_calibration(); + } return 0; } @@ -875,9 +833,20 @@ void do_settime(unsigned long secs, unsi rcu_read_unlock(&domlist_read_lock); } +/* Per-CPU communication between rendezvous IRQ and softirq handler. */ +struct cpu_calibration { + u64 local_tsc_stamp; + s_time_t stime_local_stamp; + s_time_t stime_master_stamp; + struct timer softirq_callback; +}; +static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration); + +/* Softirq handler for per-CPU time calibration. */ static void local_time_calibration(void *unused) { struct cpu_time *t = &this_cpu(cpu_time); + struct cpu_calibration *c = &this_cpu(cpu_calibration); /* * System timestamps, extrapolated from local and master oscillators, @@ -908,26 +877,15 @@ static void local_time_calibration(void /* The overall calibration scale multiplier. */ u32 calibration_mul_frac; - if ( platform_timer_is_tsc() ) - { - make_tsctimer_record(); - update_vcpu_system_time(current); - set_timer(&t->calibration_timer, NOW() + MILLISECS(10*1000)); - return; - } - prev_tsc = t->local_tsc_stamp; prev_local_stime = t->stime_local_stamp; prev_master_stime = t->stime_master_stamp; - /* - * Disable IRQs to get 'instantaneous' current timestamps. We read platform - * time first, as we may be delayed when acquiring platform_timer_lock. - */ + /* Disabling IRQs ensures we atomically read cpu_calibration struct. */ local_irq_disable(); - curr_master_stime = read_platform_stime(); - curr_local_stime = get_s_time(); - rdtscll(curr_tsc); + curr_tsc = c->local_tsc_stamp; + curr_local_stime = c->stime_local_stamp; + curr_master_stime = c->stime_master_stamp; local_irq_enable(); #if 0 @@ -1021,10 +979,62 @@ static void local_time_calibration(void update_vcpu_system_time(current); out: - set_timer(&t->calibration_timer, NOW() + EPOCH); - if ( smp_processor_id() == 0 ) + { + set_timer(&calibration_timer, NOW() + EPOCH); platform_time_calibration(); + } +} + +/* + * Rendezvous for all CPUs in IRQ context. + * Master CPU snapshots the platform timer. + * All CPUS snapshot their local TSC and extrapolation of system time. + */ +struct calibration_rendezvous { + atomic_t nr_cpus; + s_time_t master_stime; +}; + +static void time_calibration_rendezvous(void *_r) +{ + unsigned int total_cpus = num_online_cpus(); + struct cpu_calibration *c = &this_cpu(cpu_calibration); + struct calibration_rendezvous *r = _r; + + local_irq_disable(); + + if ( smp_processor_id() == 0 ) + { + while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) ) + cpu_relax(); + r->master_stime = read_platform_stime(); + atomic_inc(&r->nr_cpus); + } + else + { + atomic_inc(&r->nr_cpus); + while ( atomic_read(&r->nr_cpus) != total_cpus ) + cpu_relax(); + } + + rdtscll(c->local_tsc_stamp); + c->stime_local_stamp = get_s_time(); + c->stime_master_stamp = r->master_stime; + + local_irq_enable(); + + /* Callback in softirq context as soon as possible. */ + set_timer(&c->softirq_callback, c->stime_local_stamp); +} + +static void time_calibration(void *unused) +{ + struct calibration_rendezvous r = { + .nr_cpus = ATOMIC_INIT(0) + }; + + on_each_cpu(time_calibration_rendezvous, &r, 0, 1); } void init_percpu_time(void) @@ -1032,12 +1042,6 @@ void init_percpu_time(void) struct cpu_time *t = &this_cpu(cpu_time); unsigned long flags; s_time_t now; - - if ( platform_timer_is_tsc() ) - { - make_tsctimer_record(); - goto out; - } local_irq_save(flags); rdtscll(t->local_tsc_stamp); @@ -1047,10 +1051,14 @@ void init_percpu_time(void) t->stime_master_stamp = now; t->stime_local_stamp = now; - out: - init_timer(&t->calibration_timer, local_time_calibration, - NULL, smp_processor_id()); - set_timer(&t->calibration_timer, NOW() + EPOCH); + init_timer(&this_cpu(cpu_calibration).softirq_callback, + local_time_calibration, NULL, smp_processor_id()); + + if ( smp_processor_id() == 0 ) + { + init_timer(&calibration_timer, time_calibration, NULL, 0); + set_timer(&calibration_timer, NOW() + EPOCH); + } } /* Late init function (after all CPUs are booted). */ @@ -1134,7 +1142,10 @@ void pit_broadcast_enter(void) void pit_broadcast_exit(void) { - cpu_clear(smp_processor_id(), pit_broadcast_mask); + int cpu = smp_processor_id(); + + if ( cpu_test_and_clear(cpu, pit_broadcast_mask) ) + reprogram_timer(per_cpu(timer_deadline, cpu)); } int pit_broadcast_is_available(void) @@ -1163,10 +1174,11 @@ int time_suspend(void) { cmos_utc_offset = -get_cmos_time(); cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL); + kill_timer(&calibration_timer); } /* Better to cancel calibration timer for accuracy. */ - kill_timer(&this_cpu(cpu_time).calibration_timer); + kill_timer(&this_cpu(cpu_calibration).softirq_callback); return 0; } diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/x86_64/physdev.c --- a/xen/arch/x86/x86_64/physdev.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/x86_64/physdev.c Thu Aug 07 11:57:34 2008 +0900 @@ -30,6 +30,15 @@ #define physdev_irq_status_query compat_physdev_irq_status_query #define physdev_irq_status_query_t physdev_irq_status_query_compat_t +#define physdev_map_pirq compat_physdev_map_pirq +#define physdev_map_pirq_t physdev_map_pirq_compat_t + +#define physdev_unmap_pirq compat_physdev_unmap_pirq +#define physdev_unmap_pirq_t physdev_unmap_pirq_compat_t + +#define physdev_manage_pci compat_physdev_manage_pci +#define physdev_manage_pci_t physdev_manage_pci_compat_t + #define COMPAT #undef guest_handle_okay #define guest_handle_okay compat_handle_okay diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/x86_emulate/x86_emulate.c --- a/xen/arch/x86/x86_emulate/x86_emulate.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c Thu Aug 07 11:57:34 2008 +0900 @@ -1606,6 +1606,7 @@ x86_emulate( dst.val = _regs.eax; case 0x38 ... 0x3b: cmp: /* cmp */ emulate_2op_SrcV("cmp", src, dst, _regs.eflags); + dst.type = OP_NONE; break; case 0x62: /* bound */ { @@ -1707,6 +1708,7 @@ x86_emulate( dst.val = _regs.eax; case 0x84 ... 0x85: test: /* test */ emulate_2op_SrcV("test", src, dst, _regs.eflags); + dst.type = OP_NONE; break; case 0x86 ... 0x87: xchg: /* xchg */ diff -r 7affdebb7a1e -r a39913db6e51 xen/common/compat/grant_table.c --- a/xen/common/compat/grant_table.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/common/compat/grant_table.c Thu Aug 07 11:57:34 2008 +0900 @@ -138,7 +138,6 @@ int compat_grant_table_op(unsigned int c for ( i = 0; i < (_s_)->nr_frames; ++i ) \ { \ unsigned int frame = (_s_)->frame_list.p[i]; \ - BUG_ON(frame != (_s_)->frame_list.p[i]); \ (void)__copy_to_compat_offset((_d_)->frame_list, i, &frame, 1); \ } \ } \ diff -r 7affdebb7a1e -r a39913db6e51 xen/common/domain.c --- a/xen/common/domain.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/common/domain.c Thu Aug 07 11:57:34 2008 +0900 @@ -50,7 +50,7 @@ static void __init setup_cpufreq_option( else if ( !strcmp(str, "xen") ) { xen_processor_pmbits |= XEN_PROCESSOR_PM_PX; - cpufreq_controller = FREQCTL_none; + cpufreq_controller = FREQCTL_xen; } } custom_param("cpufreq", setup_cpufreq_option); @@ -136,6 +136,8 @@ struct vcpu *alloc_vcpu( v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline; v->runstate.state_entry_time = NOW(); + + spin_lock_init(&v->virq_lock); if ( !is_idle_domain(d) ) { diff -r 7affdebb7a1e -r a39913db6e51 xen/common/event_channel.c --- a/xen/common/event_channel.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/common/event_channel.c Thu Aug 07 11:57:34 2008 +0900 @@ -386,14 +386,18 @@ static long __evtchn_close(struct domain break; case ECS_PIRQ: - if ( (rc = pirq_guest_unbind(d1, chn1->u.pirq)) == 0 ) - d1->pirq_to_evtchn[chn1->u.pirq] = 0; + pirq_guest_unbind(d1, chn1->u.pirq); + d1->pirq_to_evtchn[chn1->u.pirq] = 0; break; case ECS_VIRQ: for_each_vcpu ( d1, v ) - if ( v->virq_to_evtchn[chn1->u.virq] == port1 ) - v->virq_to_evtchn[chn1->u.virq] = 0; + { + if ( v->virq_to_evtchn[chn1->u.virq] != port1 ) + continue; + v->virq_to_evtchn[chn1->u.virq] = 0; + spin_barrier(&v->virq_lock); + } break; case ECS_IPI: @@ -447,6 +451,9 @@ static long __evtchn_close(struct domain BUG(); } + /* Clear pending event to avoid unexpected behavior on re-bind. */ + clear_bit(port1, &shared_info(d1, evtchn_pending)); + /* Reset binding to vcpu0 when the channel is freed. */ chn1->state = ECS_FREE; chn1->notify_vcpu_id = 0; @@ -573,37 +580,33 @@ static int evtchn_set_pending(struct vcp return 0; } +int guest_enabled_event(struct vcpu *v, int virq) +{ + return ((v != NULL) && (v->virq_to_evtchn[virq] != 0)); +} void send_guest_vcpu_virq(struct vcpu *v, int virq) { + unsigned long flags; int port; ASSERT(!virq_is_global(virq)); + + spin_lock_irqsave(&v->virq_lock, flags); port = v->virq_to_evtchn[virq]; if ( unlikely(port == 0) ) - return; + goto out; evtchn_set_pending(v, port); -} - -int guest_enabled_event(struct vcpu *v, int virq) -{ - int port; - - if ( unlikely(v == NULL) ) - return 0; - - port = v->virq_to_evtchn[virq]; - if ( port == 0 ) - return 0; - - /* virq is in use */ - return 1; + + out: + spin_unlock_irqrestore(&v->virq_lock, flags); } void send_guest_global_virq(struct domain *d, int virq) { + unsigned long flags; int port; struct vcpu *v; struct evtchn *chn; @@ -617,20 +620,28 @@ void send_guest_global_virq(struct domai if ( unlikely(v == NULL) ) return; + spin_lock_irqsave(&v->virq_lock, flags); + port = v->virq_to_evtchn[virq]; if ( unlikely(port == 0) ) - return; + goto out; chn = evtchn_from_port(d, port); evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port); -} - + + out: + spin_unlock_irqrestore(&v->virq_lock, flags); +} int send_guest_pirq(struct domain *d, int pirq) { int port = d->pirq_to_evtchn[pirq]; struct evtchn *chn; + /* + * It should not be possible to race with __evtchn_close(): + * The caller of this function must synchronise with pirq_guest_unbind(). + */ ASSERT(port != 0); chn = evtchn_from_port(d, port); diff -r 7affdebb7a1e -r a39913db6e51 xen/common/keyhandler.c --- a/xen/common/keyhandler.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/common/keyhandler.c Thu Aug 07 11:57:34 2008 +0900 @@ -143,7 +143,7 @@ static void halt_machine(unsigned char k static void halt_machine(unsigned char key, struct cpu_user_regs *regs) { printk("'%c' pressed -> rebooting machine\n", key); - machine_restart(); + machine_restart(0); } static void cpuset_print(char *set, int size, cpumask_t mask) @@ -236,6 +236,7 @@ static void dump_domains(unsigned char k static cpumask_t read_clocks_cpumask = CPU_MASK_NONE; static s_time_t read_clocks_time[NR_CPUS]; +static u64 read_cycles_time[NR_CPUS]; static void read_clocks_slave(void *unused) { @@ -244,14 +245,20 @@ static void read_clocks_slave(void *unus while ( !cpu_isset(cpu, read_clocks_cpumask) ) cpu_relax(); read_clocks_time[cpu] = NOW(); + read_cycles_time[cpu] = get_cycles(); cpu_clear(cpu, read_clocks_cpumask); local_irq_enable(); } static void read_clocks(unsigned char key) { - unsigned int cpu = smp_processor_id(), min_cpu, max_cpu; - u64 min, max, dif, difus; + unsigned int cpu = smp_processor_id(), min_stime_cpu, max_stime_cpu; + unsigned int min_cycles_cpu, max_cycles_cpu; + u64 min_stime, max_stime, dif_stime; + u64 min_cycles, max_cycles, dif_cycles; + static u64 sumdif_stime = 0, maxdif_stime = 0; + static u64 sumdif_cycles = 0, maxdif_cycles = 0; + static u32 count = 0; static DEFINE_SPINLOCK(lock); spin_lock(&lock); @@ -261,31 +268,48 @@ static void read_clocks(unsigned char ke local_irq_disable(); read_clocks_cpumask = cpu_online_map; read_clocks_time[cpu] = NOW(); + read_cycles_time[cpu] = get_cycles(); cpu_clear(cpu, read_clocks_cpumask); local_irq_enable(); while ( !cpus_empty(read_clocks_cpumask) ) cpu_relax(); - min_cpu = max_cpu = cpu; + min_stime_cpu = max_stime_cpu = min_cycles_cpu = max_cycles_cpu = cpu; for_each_online_cpu ( cpu ) { - if ( read_clocks_time[cpu] < read_clocks_time[min_cpu] ) - min_cpu = cpu; - if ( read_clocks_time[cpu] > read_clocks_time[max_cpu] ) - max_cpu = cpu; - } - - min = read_clocks_time[min_cpu]; - max = read_clocks_time[max_cpu]; + if ( read_clocks_time[cpu] < read_clocks_time[min_stime_cpu] ) + min_stime_cpu = cpu; + if ( read_clocks_time[cpu] > read_clocks_time[max_stime_cpu] ) + max_stime_cpu = cpu; + if ( read_cycles_time[cpu] < read_cycles_time[min_cycles_cpu] ) + min_cycles_cpu = cpu; + if ( read_cycles_time[cpu] > read_cycles_time[max_cycles_cpu] ) + max_cycles_cpu = cpu; + } + + min_stime = read_clocks_time[min_stime_cpu]; + max_stime = read_clocks_time[max_stime_cpu]; + min_cycles = read_cycles_time[min_cycles_cpu]; + max_cycles = read_cycles_time[max_cycles_cpu]; spin_unlock(&lock); - dif = difus = max - min; - do_div(difus, 1000); - printk("Min = %"PRIu64" ; Max = %"PRIu64" ; Diff = %"PRIu64 - " (%"PRIu64" microseconds)\n", - min, max, dif, difus); + dif_stime = max_stime - min_stime; + if ( dif_stime > maxdif_stime ) + maxdif_stime = dif_stime; + sumdif_stime += dif_stime; + dif_cycles = max_cycles - min_cycles; + if ( dif_cycles > maxdif_cycles ) + maxdif_cycles = dif_cycles; + sumdif_cycles += dif_cycles; + count++; + printk("Synced stime skew: max=%"PRIu64"ns avg=%"PRIu64"ns " + "samples=%"PRIu32" current=%"PRIu64"ns\n", + maxdif_stime, sumdif_stime/count, count, dif_stime); + printk("Synced cycles skew: max=%"PRIu64" avg=%"PRIu64" " + "samples=%"PRIu32" current=%"PRIu64"\n", + maxdif_cycles, sumdif_cycles/count, count, dif_cycles); } extern void dump_runq(unsigned char key); diff -r 7affdebb7a1e -r a39913db6e51 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/common/page_alloc.c Thu Aug 07 11:57:34 2008 +0900 @@ -53,34 +53,11 @@ boolean_param("bootscrub", opt_bootscrub boolean_param("bootscrub", opt_bootscrub); /* - * Bit width of the DMA heap. - */ -static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE; -static void __init parse_dma_bits(char *s) -{ - unsigned int v = simple_strtol(s, NULL, 0); - if ( v >= (BITS_PER_LONG + PAGE_SHIFT) ) - dma_bitsize = BITS_PER_LONG + PAGE_SHIFT; - else if ( v > PAGE_SHIFT + 1 ) - dma_bitsize = v; - else - printk("Invalid dma_bits value of %u ignored.\n", v); -} -custom_param("dma_bits", parse_dma_bits); - -/* - * Amount of memory to reserve in a low-memory (<4GB) pool for specific - * allocation requests. Ordinary requests will not fall back to the - * lowmem emergency pool. - */ -static unsigned long dma_emergency_pool_pages; -static void __init parse_dma_emergency_pool(char *s) -{ - unsigned long long bytes; - bytes = parse_size_and_unit(s, NULL); - dma_emergency_pool_pages = bytes >> PAGE_SHIFT; -} -custom_param("dma_emergency_pool", parse_dma_emergency_pool); + * Bit width of the DMA heap -- used to override NUMA-node-first. + * allocation strategy, which can otherwise exhaust low memory. + */ +static unsigned int dma_bitsize; +integer_param("dma_bits", dma_bitsize); #define round_pgdown(_p) ((_p)&PAGE_MASK) #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) @@ -281,11 +258,7 @@ unsigned long __init alloc_boot_pages( */ #define MEMZONE_XEN 0 -#ifdef PADDR_BITS #define NR_ZONES (PADDR_BITS - PAGE_SHIFT) -#else -#define NR_ZONES (BITS_PER_LONG - PAGE_SHIFT) -#endif #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1) @@ -583,7 +556,22 @@ void __init end_boot_allocator(void) init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1); } - printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize); + if ( !dma_bitsize && (num_online_nodes() > 1) ) + { +#ifdef CONFIG_X86 + dma_bitsize = min_t(unsigned int, + fls(NODE_DATA(0)->node_spanned_pages) - 1 + + PAGE_SHIFT - 2, + 32); +#else + dma_bitsize = 32; +#endif + } + + printk("Domain heap initialised"); + if ( dma_bitsize ) + printk(" DMA width %u bits", dma_bitsize); + printk("\n"); } #undef avail_for_domheap @@ -803,19 +791,9 @@ struct page_info *alloc_domheap_pages( if ( bits < zone_hi ) zone_hi = bits; - if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize ) - { + if ( (dma_bitsize > PAGE_SHIFT) && + ((zone_hi + PAGE_SHIFT) >= dma_bitsize) ) pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, node, order); - - /* Failure? Then check if we can fall back to the DMA pool. */ - if ( unlikely(pg == NULL) && - ((order > MAX_ORDER) || - (avail_heap_pages(MEMZONE_XEN + 1, - dma_bitsize - PAGE_SHIFT - 1, - -1) < - (dma_emergency_pool_pages + (1UL << order)))) ) - return NULL; - } if ( (pg == NULL) && ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi, @@ -917,28 +895,15 @@ unsigned long avail_domheap_pages_region unsigned long avail_domheap_pages(void) { - unsigned long avail_nrm, avail_dma; - - avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT, - NR_ZONES - 1, - -1); - - avail_dma = avail_heap_pages(MEMZONE_XEN + 1, - dma_bitsize - PAGE_SHIFT - 1, - -1); - - if ( avail_dma > dma_emergency_pool_pages ) - avail_dma -= dma_emergency_pool_pages; - else - avail_dma = 0; - - return avail_nrm + avail_dma; + return avail_heap_pages(MEMZONE_XEN + 1, + NR_ZONES - 1, + -1); } static void pagealloc_keyhandler(unsigned char key) { unsigned int zone = MEMZONE_XEN; - unsigned long total = 0; + unsigned long n, total = 0; printk("Physical memory information:\n"); printk(" Xen heap: %lukB free\n", @@ -946,9 +911,7 @@ static void pagealloc_keyhandler(unsigne while ( ++zone < NR_ZONES ) { - unsigned long n; - - if ( zone == dma_bitsize - PAGE_SHIFT ) + if ( (zone + PAGE_SHIFT) == dma_bitsize ) { printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10)); total = 0; diff -r 7affdebb7a1e -r a39913db6e51 xen/common/shutdown.c --- a/xen/common/shutdown.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/common/shutdown.c Thu Aug 07 11:57:34 2008 +0900 @@ -23,8 +23,7 @@ static void maybe_reboot(void) { printk("rebooting machine in 5 seconds.\n"); watchdog_disable(); - mdelay(5000); - machine_restart(); + machine_restart(5000); } } @@ -50,7 +49,7 @@ void dom0_shutdown(u8 reason) case SHUTDOWN_reboot: { printk("Domain 0 shutdown: rebooting machine.\n"); - machine_restart(); + machine_restart(0); break; /* not reached */ } diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/drivers/char/console.c Thu Aug 07 11:57:34 2008 +0900 @@ -939,8 +939,7 @@ void panic(const char *fmt, ...) else { watchdog_disable(); - mdelay(5000); - machine_restart(); + machine_restart(5000); } } diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/passthrough/amd/iommu_acpi.c --- a/xen/drivers/passthrough/amd/iommu_acpi.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Thu Aug 07 11:57:34 2008 +0900 @@ -27,6 +27,7 @@ extern unsigned long amd_iommu_page_entr extern unsigned long amd_iommu_page_entries; extern unsigned short ivrs_bdf_entries; extern struct ivrs_mappings *ivrs_mappings; +extern unsigned short last_bdf; static struct amd_iommu * __init find_iommu_from_bdf_cap( u16 bdf, u8 cap_offset) @@ -85,10 +86,8 @@ static void __init reserve_unity_map_for } /* extend r/w permissioms and keep aggregate */ - if ( iw ) - ivrs_mappings[bdf].write_permission = IOMMU_CONTROL_ENABLED; - if ( ir ) - ivrs_mappings[bdf].read_permission = IOMMU_CONTROL_ENABLED; + ivrs_mappings[bdf].write_permission = iw; + ivrs_mappings[bdf].read_permission = ir; ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_ENABLED; ivrs_mappings[bdf].addr_range_start = base; ivrs_mappings[bdf].addr_range_length = length; @@ -112,7 +111,7 @@ static int __init register_exclusion_ran length = range_top - base; /* reserve r/w unity-mapped page entries for devices */ /* note: these entries are part of the exclusion range */ - for (bdf = 0; bdf < ivrs_bdf_entries; ++bdf) + for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) reserve_unity_map_for_device(bdf, base, length, iw, ir); /* push 'base' just outside of virtual address space */ base = iommu_top; @@ -190,7 +189,7 @@ static int __init register_exclusion_ran length = range_top - base; /* reserve r/w unity-mapped page entries for devices */ /* note: these entries are part of the exclusion range */ - for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) + for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) { bus = bdf >> 8; devfn = bdf & 0xFF; @@ -357,7 +356,7 @@ static u16 __init parse_ivhd_device_padd } static u16 __init parse_ivhd_device_select( - union acpi_ivhd_device *ivhd_device) + union acpi_ivhd_device *ivhd_device, struct amd_iommu *iommu) { u16 bdf; @@ -373,13 +372,14 @@ static u16 __init parse_ivhd_device_sele get_field_from_byte(ivhd_device->header.flags, AMD_IOMMU_ACPI_SYS_MGT_MASK, AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + ivrs_mappings[bdf].iommu = iommu; return sizeof(struct acpi_ivhd_device_header); } static u16 __init parse_ivhd_device_range( union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + u16 header_length, u16 block_length, struct amd_iommu *iommu) { u16 dev_length, first_bdf, last_bdf, bdf; u8 sys_mgt; @@ -423,14 +423,17 @@ static u16 __init parse_ivhd_device_rang AMD_IOMMU_ACPI_SYS_MGT_MASK, AMD_IOMMU_ACPI_SYS_MGT_SHIFT); for ( bdf = first_bdf; bdf <= last_bdf; bdf++ ) + { ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; + ivrs_mappings[bdf].iommu = iommu; + } return dev_length; } static u16 __init parse_ivhd_device_alias( union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + u16 header_length, u16 block_length, struct amd_iommu *iommu) { u16 dev_length, alias_id, bdf; @@ -463,15 +466,18 @@ static u16 __init parse_ivhd_device_alia get_field_from_byte(ivhd_device->header.flags, AMD_IOMMU_ACPI_SYS_MGT_MASK, AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + ivrs_mappings[bdf].iommu = iommu; + ivrs_mappings[alias_id].dte_sys_mgt_enable = ivrs_mappings[bdf].dte_sys_mgt_enable; + ivrs_mappings[alias_id].iommu = iommu; return dev_length; } static u16 __init parse_ivhd_device_alias_range( union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + u16 header_length, u16 block_length, struct amd_iommu *iommu) { u16 dev_length, first_bdf, last_bdf, alias_id, bdf; @@ -527,15 +533,17 @@ static u16 __init parse_ivhd_device_alia { ivrs_mappings[bdf].dte_requestor_id = alias_id; ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; + ivrs_mappings[bdf].iommu = iommu; } ivrs_mappings[alias_id].dte_sys_mgt_enable = sys_mgt; + ivrs_mappings[alias_id].iommu = iommu; return dev_length; } static u16 __init parse_ivhd_device_extended( union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + u16 header_length, u16 block_length, struct amd_iommu *iommu) { u16 dev_length, bdf; @@ -558,13 +566,14 @@ static u16 __init parse_ivhd_device_exte get_field_from_byte(ivhd_device->header.flags, AMD_IOMMU_ACPI_SYS_MGT_MASK, AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + ivrs_mappings[bdf].iommu = iommu; return dev_length; } static u16 __init parse_ivhd_device_extended_range( union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + u16 header_length, u16 block_length, struct amd_iommu *iommu) { u16 dev_length, first_bdf, last_bdf, bdf; u8 sys_mgt; @@ -609,7 +618,10 @@ static u16 __init parse_ivhd_device_exte AMD_IOMMU_ACPI_SYS_MGT_MASK, AMD_IOMMU_ACPI_SYS_MGT_SHIFT); for ( bdf = first_bdf; bdf <= last_bdf; bdf++ ) + { ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; + ivrs_mappings[bdf].iommu = iommu; + } return dev_length; } @@ -635,33 +647,6 @@ static int __init parse_ivhd_block(struc ivhd_block->header.dev_id, ivhd_block->cap_offset); return -ENODEV; } - - amd_iov_info("IVHD Block:\n"); - amd_iov_info(" Cap_Offset 0x%x\n", ivhd_block->cap_offset); - amd_iov_info(" MMIO_BAR_Phys 0x%"PRIx64"\n",ivhd_block->mmio_base); - amd_iov_info( " PCI_Segment 0x%x\n", ivhd_block->pci_segment); - amd_iov_info( " IOMMU_Info 0x%x\n", ivhd_block->iommu_info); - - /* override IOMMU support flags */ - iommu->coherent = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_COHERENT_MASK, - AMD_IOMMU_ACPI_COHERENT_SHIFT); - iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_IOTLB_SUP_MASK, - AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT); - iommu->isochronous = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_ISOC_MASK, - AMD_IOMMU_ACPI_ISOC_SHIFT); - iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_RES_PASS_PW_MASK, - AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT); - iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_PASS_PW_MASK, - AMD_IOMMU_ACPI_PASS_PW_SHIFT); - iommu->ht_tunnel_enable = get_field_from_byte( - ivhd_block->header.flags, - AMD_IOMMU_ACPI_HT_TUN_ENB_MASK, - AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT); /* parse Device Entries */ block_length = sizeof(struct acpi_ivhd_block_header); @@ -689,32 +674,32 @@ static int __init parse_ivhd_block(struc ivhd_block->header.length, block_length); break; case AMD_IOMMU_ACPI_IVHD_DEV_SELECT: - dev_length = parse_ivhd_device_select(ivhd_device); + dev_length = parse_ivhd_device_select(ivhd_device, iommu); break; case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START: dev_length = parse_ivhd_device_range( ivhd_device, - ivhd_block->header.length, block_length); + ivhd_block->header.length, block_length, iommu); break; case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT: dev_length = parse_ivhd_device_alias( ivhd_device, - ivhd_block->header.length, block_length); + ivhd_block->header.length, block_length, iommu); break; case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE: dev_length = parse_ivhd_device_alias_range( ivhd_device, - ivhd_block->header.length, block_length); + ivhd_block->header.length, block_length, iommu); break; case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT: dev_length = parse_ivhd_device_extended( ivhd_device, - ivhd_block->header.length, block_length); + ivhd_block->header.length, block_length, iommu); break; case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE: dev_length = parse_ivhd_device_extended_range( ivhd_device, - ivhd_block->header.length, block_length); + ivhd_block->header.length, block_length, iommu); break; default: amd_iov_error("IVHD Error: Invalid Device Type!\n"); @@ -794,29 +779,16 @@ static void __init dump_acpi_table_heade } -int __init parse_ivrs_table(struct acpi_table_header *_table) +static int __init parse_ivrs_table(struct acpi_table_header *_table) { struct acpi_ivrs_block_header *ivrs_block; - unsigned long length, i; - u8 checksum, *raw_table; + unsigned long length; int error = 0; struct acpi_table_header *table = (struct acpi_table_header *)_table; BUG_ON(!table); dump_acpi_table_header(table); - - /* validate checksum: sum of entire table == 0 */ - checksum = 0; - raw_table = (u8 *)table; - for ( i = 0; i < table->length; i++ ) - checksum += raw_table[i]; - if ( checksum ) - { - amd_iov_error("IVRS Error: " - "Invalid Checksum 0x%x\n", checksum); - return -ENODEV; - } /* parse IVRS blocks */ length = sizeof(struct acpi_ivrs_table_header); @@ -846,3 +818,144 @@ int __init parse_ivrs_table(struct acpi_ return error; } + +static int __init detect_iommu_acpi(struct acpi_table_header *_table) +{ + struct acpi_ivrs_block_header *ivrs_block; + struct acpi_table_header *table = (struct acpi_table_header *)_table; + unsigned long i; + unsigned long length = sizeof(struct acpi_ivrs_table_header); + u8 checksum, *raw_table; + + /* validate checksum: sum of entire table == 0 */ + checksum = 0; + raw_table = (u8 *)table; + for ( i = 0; i < table->length; i++ ) + checksum += raw_table[i]; + if ( checksum ) + { + amd_iov_error("IVRS Error: " + "Invalid Checksum 0x%x\n", checksum); + return -ENODEV; + } + + while ( table->length > (length + sizeof(*ivrs_block)) ) + { + ivrs_block = (struct acpi_ivrs_block_header *) ((u8 *)table + length); + if ( table->length < (length + ivrs_block->length) ) + return -ENODEV; + if ( ivrs_block->type == AMD_IOMMU_ACPI_IVHD_TYPE ) + if ( amd_iommu_detect_one_acpi((void*)ivrs_block) != 0 ) + return -ENODEV; + length += ivrs_block->length; + } + return 0; +} + +#define UPDATE_LAST_BDF(x) do {\ + if ((x) > last_bdf) \ + last_bdf = (x); \ + } while(0); + +static int __init get_last_bdf_ivhd(void *ivhd) +{ + union acpi_ivhd_device *ivhd_device; + u16 block_length, dev_length; + struct acpi_ivhd_block_header *ivhd_block; + + ivhd_block = (struct acpi_ivhd_block_header *)ivhd; + + if ( ivhd_block->header.length < + sizeof(struct acpi_ivhd_block_header) ) + { + amd_iov_error("IVHD Error: Invalid Block Length!\n"); + return -ENODEV; + } + + block_length = sizeof(struct acpi_ivhd_block_header); + while ( ivhd_block->header.length >= + (block_length + sizeof(struct acpi_ivhd_device_header)) ) + { + ivhd_device = (union acpi_ivhd_device *) + ((u8 *)ivhd_block + block_length); + + switch ( ivhd_device->header.type ) + { + case AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD: + dev_length = sizeof(u32); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD: + dev_length = sizeof(u64); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_SELECT: + UPDATE_LAST_BDF(ivhd_device->header.dev_id); + dev_length = sizeof(struct acpi_ivhd_device_header); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT: + UPDATE_LAST_BDF(ivhd_device->header.dev_id); + dev_length = sizeof(struct acpi_ivhd_device_alias); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT: + UPDATE_LAST_BDF(ivhd_device->header.dev_id); + dev_length = sizeof(struct acpi_ivhd_device_extended); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START: + UPDATE_LAST_BDF(ivhd_device->range.trailer.dev_id); + dev_length = sizeof(struct acpi_ivhd_device_range); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE: + UPDATE_LAST_BDF(ivhd_device->alias_range.trailer.dev_id) + dev_length = sizeof(struct acpi_ivhd_device_alias_range); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE: + UPDATE_LAST_BDF(ivhd_device->extended_range.trailer.dev_id) + dev_length = sizeof(struct acpi_ivhd_device_extended_range); + break; + default: + amd_iov_error("IVHD Error: Invalid Device Type!\n"); + dev_length = 0; + break; + } + + block_length += dev_length; + if ( !dev_length ) + return -ENODEV; + } + + return 0; +} + +static int __init get_last_bdf_acpi(struct acpi_table_header *_table) +{ + struct acpi_ivrs_block_header *ivrs_block; + struct acpi_table_header *table = (struct acpi_table_header *)_table; + unsigned long length = sizeof(struct acpi_ivrs_table_header); + + while ( table->length > (length + sizeof(*ivrs_block)) ) + { + ivrs_block = (struct acpi_ivrs_block_header *) ((u8 *)table + length); + if ( table->length < (length + ivrs_block->length) ) + return -ENODEV; + if ( ivrs_block->type == AMD_IOMMU_ACPI_IVHD_TYPE ) + if ( get_last_bdf_ivhd((void*)ivrs_block) != 0 ) + return -ENODEV; + length += ivrs_block->length; + } + return 0; +} + +int __init amd_iommu_detect_acpi(void) +{ + return acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, detect_iommu_acpi); +} + +int __init amd_iommu_get_ivrs_dev_entries(void) +{ + acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, get_last_bdf_acpi); + return last_bdf + 1; +} + +int __init amd_iommu_update_ivrs_mapping_acpi(void) +{ + return acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, parse_ivrs_table); +} diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/passthrough/amd/iommu_detect.c --- a/xen/drivers/passthrough/amd/iommu_detect.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/drivers/passthrough/amd/iommu_detect.c Thu Aug 07 11:57:34 2008 +0900 @@ -25,65 +25,10 @@ #include <xen/pci_regs.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> +#include <asm/hvm/svm/amd-iommu-acpi.h> -static int __init valid_bridge_bus_config( - int bus, int dev, int func, int *sec_bus, int *sub_bus) -{ - int pri_bus; - - pri_bus = pci_conf_read8(bus, dev, func, PCI_PRIMARY_BUS); - *sec_bus = pci_conf_read8(bus, dev, func, PCI_SECONDARY_BUS); - *sub_bus = pci_conf_read8(bus, dev, func, PCI_SUBORDINATE_BUS); - - return ((pri_bus == bus) && (*sec_bus > bus) && (*sub_bus >= *sec_bus)); -} - -int __init get_iommu_last_downstream_bus(struct amd_iommu *iommu) -{ - int bus, dev, func; - int devfn, hdr_type; - int sec_bus, sub_bus; - int multi_func; - - bus = iommu->last_downstream_bus = iommu->root_bus; - iommu->downstream_bus_present[bus] = 1; - dev = PCI_SLOT(iommu->first_devfn); - multi_func = PCI_FUNC(iommu->first_devfn) > 0; - for ( devfn = iommu->first_devfn; devfn <= iommu->last_devfn; devfn++ ) - { - /* skipping to next device#? */ - if ( dev != PCI_SLOT(devfn) ) - { - dev = PCI_SLOT(devfn); - multi_func = 0; - } - func = PCI_FUNC(devfn); - - if ( !VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func, - PCI_VENDOR_ID)) ) - continue; - - hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE); - if ( func == 0 ) - multi_func = IS_PCI_MULTI_FUNCTION(hdr_type); - - if ( (func == 0 || multi_func) && - IS_PCI_TYPE1_HEADER(hdr_type) ) - { - if ( !valid_bridge_bus_config(bus, dev, func, - &sec_bus, &sub_bus) ) - return -ENODEV; - - if ( sub_bus > iommu->last_downstream_bus ) - iommu->last_downstream_bus = sub_bus; - do { - iommu->downstream_bus_present[sec_bus] = 1; - } while ( sec_bus++ < sub_bus ); - } - } - - return 0; -} +extern struct list_head amd_iommu_head; +unsigned short last_bdf = 0; static int __init get_iommu_msi_capabilities(u8 bus, u8 dev, u8 func, struct amd_iommu *iommu) @@ -128,30 +73,10 @@ int __init get_iommu_capabilities(u8 bus struct amd_iommu *iommu) { u32 cap_header, cap_range, misc_info; - u64 mmio_bar; - - mmio_bar = (u64)pci_conf_read32( - bus, dev, func, cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32; - mmio_bar |= pci_conf_read32(bus, dev, func, - cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET); - iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF; - - if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) ) - { - amd_iov_error("Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar); - return -ENODEV; - } - - iommu->bdf = (bus << 8) | PCI_DEVFN(dev, func); - iommu->cap_offset = cap_ptr; cap_header = pci_conf_read32(bus, dev, func, cap_ptr); iommu->revision = get_field_from_reg_u32( cap_header, PCI_CAP_REV_MASK, PCI_CAP_REV_SHIFT); - iommu->iotlb_support = get_field_from_reg_u32( - cap_header, PCI_CAP_IOTLB_MASK, PCI_CAP_IOTLB_SHIFT); - iommu->ht_tunnel_support = get_field_from_reg_u32( - cap_header, PCI_CAP_HT_TUNNEL_MASK, PCI_CAP_HT_TUNNEL_SHIFT); iommu->pte_not_present_cached = get_field_from_reg_u32( cap_header, PCI_CAP_NP_CACHE_MASK, PCI_CAP_NP_CACHE_SHIFT); @@ -159,96 +84,76 @@ int __init get_iommu_capabilities(u8 bus cap_ptr + PCI_CAP_RANGE_OFFSET); iommu->unit_id = get_field_from_reg_u32( cap_range, PCI_CAP_UNIT_ID_MASK, PCI_CAP_UNIT_ID_SHIFT); - iommu->root_bus = get_field_from_reg_u32( - cap_range, PCI_CAP_BUS_NUMBER_MASK, PCI_CAP_BUS_NUMBER_SHIFT); - iommu->first_devfn = get_field_from_reg_u32( - cap_range, PCI_CAP_FIRST_DEVICE_MASK, PCI_CAP_FIRST_DEVICE_SHIFT); - iommu->last_devfn = get_field_from_reg_u32( - cap_range, PCI_CAP_LAST_DEVICE_MASK, PCI_CAP_LAST_DEVICE_SHIFT); misc_info = pci_conf_read32(bus, dev, func, cap_ptr + PCI_MISC_INFO_OFFSET); iommu->msi_number = get_field_from_reg_u32( misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT); + return 0; +} + +int __init amd_iommu_detect_one_acpi(void *ivhd) +{ + struct amd_iommu *iommu; + u8 bus, dev, func; + struct acpi_ivhd_block_header *ivhd_block; + + ivhd_block = (struct acpi_ivhd_block_header *)ivhd; + + if ( ivhd_block->header.length < sizeof(struct acpi_ivhd_block_header) ) + { + amd_iov_error("Invalid IVHD Block Length!\n"); + return -ENODEV; + } + + if ( !ivhd_block->header.dev_id || + !ivhd_block->cap_offset || !ivhd_block->mmio_base) + { + amd_iov_error("Invalid IVHD Block!\n"); + return -ENODEV; + } + + iommu = (struct amd_iommu *) xmalloc(struct amd_iommu); + if ( !iommu ) + { + amd_iov_error("Error allocating amd_iommu\n"); + return -ENOMEM; + } + memset(iommu, 0, sizeof(struct amd_iommu)); + + spin_lock_init(&iommu->lock); + + iommu->bdf = ivhd_block->header.dev_id; + iommu->cap_offset = ivhd_block->cap_offset; + iommu->mmio_base_phys = ivhd_block->mmio_base; + + /* override IOMMU support flags */ + iommu->coherent = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_COHERENT_MASK, + AMD_IOMMU_ACPI_COHERENT_SHIFT); + iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_IOTLB_SUP_MASK, + AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT); + iommu->isochronous = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_ISOC_MASK, + AMD_IOMMU_ACPI_ISOC_SHIFT); + iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_RES_PASS_PW_MASK, + AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT); + iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_PASS_PW_MASK, + AMD_IOMMU_ACPI_PASS_PW_SHIFT); + iommu->ht_tunnel_enable = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_HT_TUN_ENB_MASK, + AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT); + bus = iommu->bdf >> 8; + dev = PCI_SLOT(iommu->bdf & 0xFF); + func = PCI_FUNC(iommu->bdf & 0xFF); + get_iommu_capabilities(bus, dev, func, iommu->cap_offset, iommu); get_iommu_msi_capabilities(bus, dev, func, iommu); + + list_add_tail(&iommu->list, &amd_iommu_head); return 0; } - -static int __init scan_caps_for_iommu( - int bus, int dev, int func, - iommu_detect_callback_ptr_t iommu_detect_callback) -{ - int cap_ptr, cap_id, cap_type; - u32 cap_header; - int count, error = 0; - - count = 0; - cap_ptr = pci_conf_read8(bus, dev, func, PCI_CAPABILITY_LIST); - while ( (cap_ptr >= PCI_MIN_CAP_OFFSET) && - (count < PCI_MAX_CAP_BLOCKS) && - !error ) - { - cap_ptr &= PCI_CAP_PTR_MASK; - cap_header = pci_conf_read32(bus, dev, func, cap_ptr); - cap_id = get_field_from_reg_u32( - cap_header, PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT); - - if ( cap_id == PCI_CAP_ID_SECURE_DEVICE ) - { - cap_type = get_field_from_reg_u32( - cap_header, PCI_CAP_TYPE_MASK, PCI_CAP_TYPE_SHIFT); - if ( cap_type == PCI_CAP_TYPE_IOMMU ) - error = iommu_detect_callback( - bus, dev, func, cap_ptr); - } - - cap_ptr = get_field_from_reg_u32( - cap_header, PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT); - count++; - } - - return error; -} - -static int __init scan_functions_for_iommu( - int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback) -{ - int func, hdr_type; - int count = 1, error = 0; - - for ( func = 0; - (func < count) && !error && - VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func, - PCI_VENDOR_ID)); - func++ ) - - { - hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE); - - if ( (func == 0) && IS_PCI_MULTI_FUNCTION(hdr_type) ) - count = PCI_MAX_FUNC_COUNT; - - if ( IS_PCI_TYPE0_HEADER(hdr_type) || - IS_PCI_TYPE1_HEADER(hdr_type) ) - error = scan_caps_for_iommu(bus, dev, func, - iommu_detect_callback); - } - - return error; -} - - -int __init scan_for_iommu(iommu_detect_callback_ptr_t iommu_detect_callback) -{ - int bus, dev, error = 0; - - for ( bus = 0; bus < PCI_MAX_BUS_COUNT && !error; ++bus ) - for ( dev = 0; dev < PCI_MAX_DEV_COUNT && !error; ++dev ) - error = scan_functions_for_iommu(bus, dev, - iommu_detect_callback); - - return error; -} - diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Thu Aug 07 11:47:34 2008 +0900 +++ b/xen/drivers/passthrough/amd/iommu_init.c Thu Aug 07 11:57:34 2008 +0900 @@ -27,10 +27,20 @@ #include <asm/hvm/svm/amd-iommu-proto.h> #include <asm-x86/fixmap.h> -extern int nr_amd_iommus; static struct amd_iommu *vector_to_iommu[NR_VECTORS]; - -int __init map_iommu_mmio_region(struct amd_iommu *iommu) +static int nr_amd_iommus; +static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES; +static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES; + +unsigned short ivrs_bdf_entries; +struct ivrs_mappings *ivrs_mappings; +struct list_head amd_iommu_head; +struct table_struct device_table; + +extern void *int_remap_table; +extern spinlock_t int_remap_table_lock; + +static int __init map_iommu_mmio_region(struct amd_iommu *iommu) { unsigned long mfn; @@ -51,7 +61,7 @@ int __init map_iommu_mmio_region(struct return 0; } -void __init unmap_iommu_mmio_region(struct amd_iommu *iommu) +static void __init unmap_iommu_mmio_region(struct amd_iommu *iommu) { if ( iommu->mmio_base ) { @@ -60,7 +70,7 @@ void __init unmap_iommu_mmio_region(stru } } -void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu) +static void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu) { u64 addr_64, addr_lo, addr_hi; u32 entry; @@ -83,7 +93,7 @@ void __init register_iommu_dev_table_in_ writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_HIGH_OFFSET); } -void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu) +static void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu) { u64 addr_64, addr_lo, addr_hi; u32 power_of2_entries; @@ -110,7 +120,7 @@ void __init register_iommu_cmd_buffer_in writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET); } -void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu) +static void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu) { u64 addr_64, addr_lo, addr_hi; u32 power_of2_entries; @@ -266,12 +276,13 @@ static int amd_iommu_read_event_log(stru return -EFAULT; } -static void amd_iommu_msi_data_init(struct amd_iommu *iommu, int vector) +static void amd_iommu_msi_data_init(struct amd_iommu *iommu) { u32 msi_data; u8 bus = (iommu->bdf >> 8) & 0xff; u8 dev = PCI_SLOT(iommu->bdf & 0xff); u8 func = PCI_FUNC(iommu->bdf & 0xff); + int vector = iommu->vector; msi_data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | @@ -434,7 +445,6 @@ static int set_iommu_interrupt_handler(s static int set_iommu_interrupt_handler(struct amd_iommu *iommu) { int vector, ret; - unsigned long flags; vector = assign_irq_vector(AUTO_ASSIGN); vector_to_iommu[vector] = iommu; @@ -450,38 +460,210 @@ static int set_iommu_interrupt_handler(s } irq_desc[vector].handler = &iommu_msi_type; - ret = request_irq(vector, amd_iommu_page_fault, 0, "dmar", iommu); + ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu); if ( ret ) { amd_iov_error("can't request irq\n"); return 0; } + return vector; +} + +void __init enable_iommu(struct amd_iommu *iommu) +{ + unsigned long flags; + spin_lock_irqsave(&iommu->lock, flags); - amd_iommu_msi_data_init (iommu, vector); + if ( iommu->enabled ) + { + spin_unlock_irqrestore(&iommu->lock, flags); + return; + } + + iommu->dev_table.alloc_size = device_table.alloc_size; + iommu->dev_table.entries = device_table.entries; + iommu->dev_table.buffer = device_table.buffer; + + register_iommu_dev_table_in_mmio_space(iommu); + register_iommu_cmd_buffer_in_mmio_space(iommu); + register_iommu_event_log_in_mmio_space(iommu); + register_iommu_exclusion_range(iommu); + + amd_iommu_msi_data_init (iommu); amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map))); amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED); - spin_unlock_irqrestore(&iommu->lock, flags); - - return vector; -} - -void __init enable_iommu(struct amd_iommu *iommu) -{ - unsigned long flags; - - set_iommu_interrupt_handler(iommu); - - spin_lock_irqsave(&iommu->lock, flags); - - register_iommu_exclusion_range(iommu); set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED); set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); + printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus ); + nr_amd_iommus++; + + iommu->enabled = 1; spin_unlock_irqrestore(&iommu->lock, flags); - printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus); -} +} + +static void __init deallocate_iommu_table_struct( + struct table_struct *table) +{ + if ( table->buffer ) + { + free_xenheap_pages(table->buffer, + get_order_from_bytes(table->alloc_size)); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |