[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1209067709 21600 # Node ID dc510776dd598f3f479af749865bec225e32634d # Parent 239b44eeb2d6d235ddee581b6e89398c80278a2f # Parent 97da69831384f0819caeeb8b8bdff0f942b2d690 merge with xen-unstable.hg --- xen/arch/x86/hvm/svm/x86_32/Makefile | 1 xen/arch/x86/hvm/svm/x86_32/exits.S | 131 ------- xen/arch/x86/hvm/svm/x86_64/Makefile | 1 xen/arch/x86/hvm/svm/x86_64/exits.S | 148 -------- xen/arch/x86/hvm/vmx/x86_32/Makefile | 1 xen/arch/x86/hvm/vmx/x86_32/exits.S | 148 -------- xen/arch/x86/hvm/vmx/x86_64/Makefile | 1 xen/arch/x86/hvm/vmx/x86_64/exits.S | 165 --------- .hgignore | 1 Makefile | 7 docs/misc/vtd.txt | 2 docs/src/user.tex | 7 docs/xen-api/revision-history.tex | 10 docs/xen-api/xenapi-coversheet.tex | 2 docs/xen-api/xenapi-datamodel.tex | 180 ++++++++-- extras/mini-os/Makefile | 29 + extras/mini-os/app.lds | 11 extras/mini-os/arch/ia64/minios-ia64.lds | 5 extras/mini-os/arch/x86/minios-x86_32.lds | 1 extras/mini-os/arch/x86/minios-x86_64.lds | 1 extras/mini-os/arch/x86/mm.c | 7 extras/mini-os/fbfront.c | 98 +++-- extras/mini-os/hypervisor.c | 15 extras/mini-os/include/fbfront.h | 3 extras/mini-os/include/hypervisor.h | 4 extras/mini-os/include/ia64/arch_mm.h | 2 extras/mini-os/include/ia64/os.h | 11 extras/mini-os/include/lib.h | 1 extras/mini-os/include/mm.h | 2 extras/mini-os/include/x86/os.h | 1 extras/mini-os/kernel.c | 15 extras/mini-os/lib/sys.c | 35 ++ extras/mini-os/main.c | 2 extras/mini-os/mm.c | 12 extras/mini-os/sched.c | 136 ++------ tools/blktap/drivers/blktapctrl.c | 83 ++++ tools/blktap/drivers/tapdisk.h | 2 tools/console/daemon/io.c | 18 - tools/console/daemon/main.c | 13 tools/examples/blktap | 22 - tools/firmware/hvmloader/Makefile | 5 tools/firmware/hvmloader/acpi/build.c | 4 tools/firmware/hvmloader/cacheattr.c | 99 +++++ tools/firmware/hvmloader/config.h | 3 tools/firmware/hvmloader/hvmloader.c | 3 tools/firmware/hvmloader/smp.c | 9 tools/ioemu/Makefile | 2 tools/ioemu/hw/cirrus_vga.c | 4 tools/ioemu/hw/pci.c | 16 tools/ioemu/hw/vga.c | 6 tools/ioemu/hw/xen_blktap.c | 45 -- tools/ioemu/hw/xenfb.c | 153 ++++++--- tools/ioemu/tapdisk-ioemu.c | 14 tools/ioemu/target-i386-dm/helper2.c | 2 tools/ioemu/vl.c | 17 - tools/ioemu/vl.h | 1 tools/libfsimage/ext2fs/fsys_ext2fs.c | 61 +++ tools/libxc/xc_hvm_build.c | 2 tools/python/xen/util/acmpolicy.py | 27 + tools/python/xen/xend/XendDomain.py | 26 + tools/python/xen/xend/XendDomainInfo.py | 31 + tools/python/xen/xend/XendXSPolicyAdmin.py | 1 tools/python/xen/xend/image.py | 46 ++ tools/python/xen/xm/main.py | 11 tools/tests/test_x86_emulator.c | 48 -- tools/tests/x86_emulate.c | 6 tools/xenmon/xenbaked.c | 32 + xen/Makefile | 12 xen/arch/x86/Makefile | 2 xen/arch/x86/bitops.c | 32 + xen/arch/x86/cpu/mtrr/main.c | 7 xen/arch/x86/domain.c | 11 xen/arch/x86/domain_build.c | 4 xen/arch/x86/hvm/emulate.c | 133 +++++++- xen/arch/x86/hvm/hvm.c | 152 ++++++++- xen/arch/x86/hvm/mtrr.c | 312 ++++-------------- xen/arch/x86/hvm/svm/Makefile | 4 xen/arch/x86/hvm/svm/entry.S | 178 ++++++++++ xen/arch/x86/hvm/svm/intr.c | 6 xen/arch/x86/hvm/svm/svm.c | 40 -- xen/arch/x86/hvm/vmx/Makefile | 4 xen/arch/x86/hvm/vmx/entry.S | 198 +++++++++++ xen/arch/x86/hvm/vmx/intr.c | 6 xen/arch/x86/hvm/vmx/vmx.c | 101 ------ xen/arch/x86/mm.c | 35 -- xen/arch/x86/mm/shadow/common.c | 94 ++--- xen/arch/x86/mm/shadow/multi.c | 13 xen/arch/x86/setup.c | 4 xen/arch/x86/smp.c | 27 - xen/arch/x86/traps.c | 41 +- xen/arch/x86/x86_emulate.c | 18 - xen/arch/x86/x86_emulate/x86_emulate.c | 282 +++++++++++----- xen/arch/x86/x86_emulate/x86_emulate.h | 54 +-- xen/common/trace.c | 11 xen/common/xencomm.c | 2 xen/drivers/char/console.c | 2 xen/drivers/char/serial.c | 53 ++- xen/drivers/passthrough/amd/iommu_acpi.c | 214 +++++------- xen/drivers/passthrough/amd/iommu_detect.c | 62 +++ xen/drivers/passthrough/amd/iommu_init.c | 306 ++++++++++++++++++ xen/drivers/passthrough/amd/iommu_map.c | 21 - xen/drivers/passthrough/amd/pci_amd_iommu.c | 210 +++++------- xen/drivers/passthrough/iommu.c | 30 + xen/drivers/passthrough/vtd/dmar.c | 58 --- xen/drivers/passthrough/vtd/iommu.c | 432 ++++++++------------------ xen/drivers/passthrough/vtd/utils.c | 2 xen/drivers/passthrough/vtd/x86/vtd.c | 16 xen/include/asm-x86/amd-iommu.h | 6 xen/include/asm-x86/bitops.h | 52 +-- xen/include/asm-x86/hvm/hvm.h | 2 xen/include/asm-x86/hvm/support.h | 2 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h | 37 ++ xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 19 - xen/include/asm-x86/hvm/vcpu.h | 11 xen/include/asm-x86/mtrr.h | 8 xen/include/asm-x86/paging.h | 2 xen/include/public/xsm/acm.h | 7 xen/include/xen/iommu.h | 4 xen/include/xen/serial.h | 8 xen/include/xsm/acm/acm_core.h | 1 xen/tools/Makefile | 4 xen/tools/figlet/figlet.c | 24 - xen/xsm/acm/acm_policy.c | 8 123 files changed, 2967 insertions(+), 2425 deletions(-) diff -r 239b44eeb2d6 -r dc510776dd59 .hgignore --- a/.hgignore Thu Apr 24 14:02:16 2008 -0600 +++ b/.hgignore Thu Apr 24 14:08:29 2008 -0600 @@ -243,6 +243,7 @@ ^tools/xm-test/lib/XmTestLib/config.py$ ^tools/xm-test/lib/XmTestReport/xmtest.py$ ^tools/xm-test/tests/.*\.test$ +^xen/\.banner.*$ ^xen/BLOG$ ^xen/System.map$ ^xen/TAGS$ diff -r 239b44eeb2d6 -r dc510776dd59 Makefile --- a/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -121,6 +121,13 @@ distclean: # Linux name for GNU distclean .PHONY: mrproper mrproper: distclean + +# Prepare for source tarball +.PHONY: src-tarball +src-tarball: distclean + $(MAKE) -C xen .banner + rm -rf xen/tools/figlet .[a-z]* + $(MAKE) -C xen distclean .PHONY: help help: diff -r 239b44eeb2d6 -r dc510776dd59 docs/misc/vtd.txt --- a/docs/misc/vtd.txt Thu Apr 24 14:02:16 2008 -0600 +++ b/docs/misc/vtd.txt Thu Apr 24 14:08:29 2008 -0600 @@ -21,7 +21,7 @@ 11) "hide" pci device from dom0 as follo title Xen-Fedora Core (2.6.18-xen) root (hd0,0) - kernel /boot/xen.gz com1=115200,8n1 console=com1 vtd=1 + kernel /boot/xen.gz com1=115200,8n1 console=com1 module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro console=tty0 console=ttyS0,115200,8n1 pciback.hide=(01:00.0)(03:00.0) pciback.verbose_request=1 apic=debug module /boot/initrd-2.6.18-xen.img diff -r 239b44eeb2d6 -r dc510776dd59 docs/src/user.tex --- a/docs/src/user.tex Thu Apr 24 14:02:16 2008 -0600 +++ b/docs/src/user.tex Thu Apr 24 14:08:29 2008 -0600 @@ -4088,6 +4088,8 @@ editing \path{grub.conf}. a list of pages not to be allocated for use because they contain bad bytes. For example, if your memory tester says that byte 0x12345678 is bad, you would place `badpage=0x12345' on Xen's command line. +\item [ serial\_tx\_buffer=$<$size$>$ ] Size of serial transmit + buffers. Default is 16kB. \item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\ Xen supports up to two 16550-compatible serial ports. For example: @@ -4239,10 +4241,11 @@ In addition to the standard Linux kernel \begin{tabular}{l} `xencons=off': disable virtual console \\ `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\ - `xencons=ttyS': attach console to /dev/ttyS0 + `xencons=ttyS': attach console to /dev/ttyS0 \\ + `xencons=xvc': attach console to /dev/xvc0 \end{tabular} \end{center} -The default is ttyS for dom0 and tty for all other domains. +The default is ttyS for dom0 and xvc for all other domains. \end{description} diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/revision-history.tex --- a/docs/xen-api/revision-history.tex Thu Apr 24 14:02:16 2008 -0600 +++ b/docs/xen-api/revision-history.tex Thu Apr 24 14:08:29 2008 -0600 @@ -37,5 +37,15 @@ \end{flushleft} \end{minipage}\\ \hline + 1.0.5 & 17th Apr. 08 & S. Berger & + \begin{minipage}[t]{7cm} + \begin{flushleft} + Added undocumented fields and methods for default\_netmask and + default\_gateway to the Network class. Removed an unimplemented + method from the XSPolicy class and removed the 'optional' from + 'oldlabel' parameters. + \end{flushleft} + \end{minipage}\\ + \hline \end{tabular} \end{center} diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/xenapi-coversheet.tex --- a/docs/xen-api/xenapi-coversheet.tex Thu Apr 24 14:02:16 2008 -0600 +++ b/docs/xen-api/xenapi-coversheet.tex Thu Apr 24 14:08:29 2008 -0600 @@ -22,7 +22,7 @@ \newcommand{\releasestatement}{Stable Release} %% Document revision -\newcommand{\revstring}{API Revision 1.0.4} +\newcommand{\revstring}{API Revision 1.0.5} %% Document authors \newcommand{\docauthors}{ diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/xenapi-datamodel.tex --- a/docs/xen-api/xenapi-datamodel.tex Thu Apr 24 14:02:16 2008 -0600 +++ b/docs/xen-api/xenapi-datamodel.tex Thu Apr 24 14:08:29 2008 -0600 @@ -4467,7 +4467,7 @@ security_label, string old_label)\end{ve {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline {\tt string } & security\_label & security label for the VM \\ \hline -{\tt string } & old\_label & Optional label value that the security label \\ +{\tt string } & old\_label & Label value that the security label \\ & & must currently have for the change to succeed.\\ \hline \end{tabular} @@ -7619,6 +7619,8 @@ Quals & Field & Type & Description \\ $\mathit{RW}$ & {\tt name/description} & string & a notes field containg human-readable description \\ $\mathit{RO}_\mathit{run}$ & {\tt VIFs} & (VIF ref) Set & list of connected vifs \\ $\mathit{RO}_\mathit{run}$ & {\tt PIFs} & (PIF ref) Set & list of connected pifs \\ +$\mathit{RW}$ & {\tt default\_gateway} & string & default gateway \\ +$\mathit{RW}$ & {\tt default\_netmask} & string & default netmask \\ $\mathit{RW}$ & {\tt other\_config} & (string $\rightarrow$ string) Map & additional configuration \\ \hline \end{longtable} @@ -7869,6 +7871,138 @@ Get the PIFs field of the given network. value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_default\_gateway} + +{\bf Overview:} +Get the default\_gateway field of the given network. + + \noindent {\bf Signature:} +\begin{verbatim} string get_default_gateway (session_id s, network ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt network ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~set\_default\_gateway} + +{\bf Overview:} +Set the default\_gateway field of the given network. + + \noindent {\bf Signature:} +\begin{verbatim} void set_default_gateway (session_id s, network ref self, string value)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt network ref } & self & reference to the object \\ \hline + +{\tt string } & value & New value to set \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +void +} + + + +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_default\_netmask} + +{\bf Overview:} +Get the default\_netmask field of the given network. + + \noindent {\bf Signature:} +\begin{verbatim} string get_default_netmask (session_id s, network ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt network ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~set\_default\_netmask} + +{\bf Overview:} +Set the default\_netmask field of the given network. + + \noindent {\bf Signature:} +\begin{verbatim} void set_default_netmask (session_id s, network ref self, string value)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt network ref } & self & reference to the object \\ \hline + +{\tt string } & value & New value to set \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +void +} + + + \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} @@ -8999,7 +9133,7 @@ security_label, string old_label)\end{ve {\tt VIF ref } & self & reference to the object \\ \hline {\tt string } & security\_label & New value of the security label \\ \hline -{\tt string } & old\_label & Optional label value that the security label \\ +{\tt string } & old\_label & Label value that the security label \\ & & must currently have for the change to succeed.\\ \hline \end{tabular} @@ -11504,7 +11638,7 @@ security_label, string old_label)\end{ve {\tt VDI ref } & self & reference to the object \\ \hline {\tt string } & security\_label & New value of the security label \\ \hline -{\tt string } & old\_label & Optional label value that the security label \\ +{\tt string } & old\_label & Label value that the security label \\ & & must currently have for the change to succeed.\\ \hline \end{tabular} @@ -14898,46 +15032,6 @@ The label of the given resource. \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~activate\_xspolicy} - -{\bf Overview:} -Load the referenced policy into the hypervisor. - - \noindent {\bf Signature:} -\begin{verbatim} xs_instantiationflags activate_xspolicy (session_id s, xs_ref xspolicy, -xs_instantiationflags flags)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt xs ref } & self & reference to the object \\ \hline -{\tt xs\_instantiationflags } & flags & flags to activate on a policy; flags - can only be set \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - - \noindent {\bf Return Type:} -{\tt -xs\_instantiationflags -} - - -Currently active instantiation flags. -\vspace{0.3cm} - -\noindent{\bf Possible Error Codes:} {\tt SECURITY\_ERROR} - -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} \subsubsection{RPC name:~can\_run} {\bf Overview:} diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/Makefile --- a/extras/mini-os/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -19,6 +19,7 @@ include minios.mk # Define some default flags for linking. LDLIBS := +APP_LDLIBS := LDARCHLIB := -L$(TARGET_ARCH_DIR) -l$(ARCH_LIB_NAME) LDFLAGS_FINAL := -T $(TARGET_ARCH_DIR)/minios-$(XEN_TARGET_ARCH).lds @@ -33,6 +34,7 @@ SUBDIRS := lib xenbus console SUBDIRS := lib xenbus console # The common mini-os objects to build. +APP_OBJS := OBJS := $(patsubst %.c,%.o,$(wildcard *.c)) OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c)) @@ -75,28 +77,28 @@ OBJS := $(filter-out lwip%.o $(LWO), $(O ifeq ($(caml),y) CAMLLIB = $(shell ocamlc -where) -OBJS += $(CAMLDIR)/caml.o -OBJS += $(CAMLLIB)/libasmrun.a +APP_OBJS += main-caml.o +APP_OBJS += $(CAMLDIR)/caml.o +APP_OBJS += $(CAMLLIB)/libasmrun.a CFLAGS += -I$(CAMLLIB) -LDLIBS += -lm -else +APP_LDLIBS += -lm +endif OBJS := $(filter-out main-caml.o, $(OBJS)) -endif ifeq ($(qemu),y) -OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a +APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a CFLAGS += -DCONFIG_QEMU endif ifneq ($(CDIR),) -OBJS += $(CDIR)/main.a -LDLIBS += +APP_OBJS += $(CDIR)/main.a +APP_LDLIBS += endif ifeq ($(libc),y) LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest -LDLIBS += -lpci -LDLIBS += -lz +APP_LDLIBS += -lpci +APP_LDLIBS += -lz LDLIBS += -lc endif @@ -104,8 +106,11 @@ OBJS := $(filter-out daytime.o, $(OBJS)) OBJS := $(filter-out daytime.o, $(OBJS)) endif -$(TARGET): links $(OBJS) arch_lib - $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o +app.o: $(APP_OBJS) app.lds + $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@ + +$(TARGET): links $(OBJS) app.o arch_lib + $(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@ gzip -f -9 -c $@ >$@.gz diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/app.lds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/app.lds Thu Apr 24 14:08:29 2008 -0600 @@ -0,0 +1,11 @@ +SECTIONS +{ + .app.bss : { + __app_bss_start = . ; + *(.bss .bss.*) + *(COMMON) + *(.lbss .lbss.*) + *(LARGE_COMMON) + __app_bss_end = . ; + } +} diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/ia64/minios-ia64.lds --- a/extras/mini-os/arch/ia64/minios-ia64.lds Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/arch/ia64/minios-ia64.lds Thu Apr 24 14:08:29 2008 -0600 @@ -59,7 +59,10 @@ SECTIONS { *(.IA_64.unwind) } .bss : AT(ADDR(.bss) - (((5<<(61))+0x100000000) - (1 << 20))) - { *(.bss) } + { + *(.bss) + *(.app.bss) + } _end = .; diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/minios-x86_32.lds --- a/extras/mini-os/arch/x86/minios-x86_32.lds Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/arch/x86/minios-x86_32.lds Thu Apr 24 14:08:29 2008 -0600 @@ -38,6 +38,7 @@ SECTIONS __bss_start = .; /* BSS */ .bss : { *(.bss) + *(.app.bss) } _end = . ; diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/minios-x86_64.lds --- a/extras/mini-os/arch/x86/minios-x86_64.lds Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/arch/x86/minios-x86_64.lds Thu Apr 24 14:08:29 2008 -0600 @@ -38,6 +38,7 @@ SECTIONS __bss_start = .; /* BSS */ .bss : { *(.bss) + *(.app.bss) } _end = . ; diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/mm.c --- a/extras/mini-os/arch/x86/mm.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/arch/x86/mm.c Thu Apr 24 14:08:29 2008 -0600 @@ -556,7 +556,6 @@ void *map_frames_ex(unsigned long *f, un static void clear_bootstrap(void) { - struct xen_memory_reservation reservation; xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) }; int n = sizeof(mfns)/sizeof(*mfns); pte_t nullpte = { }; @@ -567,11 +566,7 @@ static void clear_bootstrap(void) if (HYPERVISOR_update_va_mapping((unsigned long) &_text, nullpte, UVMF_INVLPG)) printk("Unable to unmap first page\n"); - set_xen_guest_handle(reservation.extent_start, mfns); - reservation.nr_extents = n; - reservation.extent_order = 0; - reservation.domid = DOMID_SELF; - if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != n) + if (free_physical_pages(mfns, n) != n) printk("Unable to free bootstrap pages\n"); } diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/fbfront.c --- a/extras/mini-os/fbfront.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/fbfront.c Thu Apr 24 14:08:29 2008 -0600 @@ -243,12 +243,12 @@ struct fbfront_dev { char *backend; int request_update; - char *data; int width; int height; int depth; - int line_length; + int stride; int mem_length; + int offset; }; void fbfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) @@ -256,7 +256,7 @@ void fbfront_handler(evtchn_port_t port, wake_up(&fbfront_queue); } -struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int height, int depth, int line_length, int mem_length) +struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n) { xenbus_transaction_t xbt; char* err; @@ -289,24 +289,17 @@ struct fbfront_dev *init_fbfront(char *n dev->width = s->width = width; dev->height = s->height = height; dev->depth = s->depth = depth; - dev->line_length = s->line_length = line_length; - dev->mem_length = s->mem_length = mem_length; - - ASSERT(!((unsigned long)data & ~PAGE_MASK)); - dev->data = data; + dev->stride = s->line_length = stride; + dev->mem_length = s->mem_length = n * PAGE_SIZE; + dev->offset = 0; const int max_pd = sizeof(s->pd) / sizeof(s->pd[0]); unsigned long mapped = 0; - for (i = 0; mapped < mem_length && i < max_pd; i++) { + for (i = 0; mapped < n && i < max_pd; i++) { unsigned long *pd = (unsigned long *) alloc_page(); - for (j = 0; mapped < mem_length && j < PAGE_SIZE / sizeof(unsigned long); j++) { - /* Trigger CoW */ - * ((char *)data + mapped) = 0; - barrier(); - pd[j] = virtual_to_mfn((unsigned long) data + mapped); - mapped += PAGE_SIZE; - } + for (j = 0; mapped < n && j < PAGE_SIZE / sizeof(unsigned long); j++) + pd[j] = mfns[mapped++]; for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++) pd[j] = 0; s->pd[i] = virt_to_mfn(pd); @@ -395,31 +388,11 @@ done: return dev; } -void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height) +static void fbfront_out_event(struct fbfront_dev *dev, union xenfb_out_event *event) { struct xenfb_page *page = dev->page; uint32_t prod; DEFINE_WAIT(w); - - if (dev->request_update <= 0) - return; - - if (x < 0) { - width += x; - x = 0; - } - if (x + width > dev->width) - width = dev->width - x; - - if (y < 0) { - height += y; - y = 0; - } - if (y + height > dev->height) - height = dev->height - y; - - if (width <= 0 || height <= 0) - return; add_waiter(w, fbfront_queue); while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN) @@ -428,14 +401,55 @@ void fbfront_update(struct fbfront_dev * prod = page->out_prod; mb(); /* ensure ring space available */ - XENFB_OUT_RING_REF(page, prod).type = XENFB_TYPE_UPDATE; - XENFB_OUT_RING_REF(page, prod).update.x = x; - XENFB_OUT_RING_REF(page, prod).update.y = y; - XENFB_OUT_RING_REF(page, prod).update.width = width; - XENFB_OUT_RING_REF(page, prod).update.height = height; + XENFB_OUT_RING_REF(page, prod) = *event; wmb(); /* ensure ring contents visible */ page->out_prod = prod + 1; notify_remote_via_evtchn(dev->evtchn); +} + +void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height) +{ + struct xenfb_update update; + + if (dev->request_update <= 0) + return; + + if (x < 0) { + width += x; + x = 0; + } + if (x + width > dev->width) + width = dev->width - x; + + if (y < 0) { + height += y; + y = 0; + } + if (y + height > dev->height) + height = dev->height - y; + + if (width <= 0 || height <= 0) + return; + + update.type = XENFB_TYPE_UPDATE; + update.x = x; + update.y = y; + update.width = width; + update.height = height; + fbfront_out_event(dev, (union xenfb_out_event *) &update); +} + +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset) +{ + struct xenfb_resize resize; + + resize.type = XENFB_TYPE_RESIZE; + dev->width = resize.width = width; + dev->height = resize.height = height; + dev->stride = resize.stride = stride; + dev->depth = resize.depth = depth; + dev->offset = resize.offset = offset; + fbfront_out_event(dev, (union xenfb_out_event *) &resize); } void shutdown_fbfront(struct fbfront_dev *dev) diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/hypervisor.c --- a/extras/mini-os/hypervisor.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/hypervisor.c Thu Apr 24 14:08:29 2008 -0600 @@ -66,6 +66,21 @@ void do_hypervisor_callback(struct pt_re in_callback = 0; } +void force_evtchn_callback(void) +{ + vcpu_info_t *vcpu; + vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; + int save = vcpu->evtchn_upcall_mask; + + while (vcpu->evtchn_upcall_pending) { + vcpu->evtchn_upcall_mask = 1; + barrier(); + do_hypervisor_callback(NULL); + barrier(); + vcpu->evtchn_upcall_mask = save; + barrier(); + }; +} inline void mask_evtchn(u32 port) { diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/fbfront.h --- a/extras/mini-os/include/fbfront.h Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/include/fbfront.h Thu Apr 24 14:08:29 2008 -0600 @@ -31,11 +31,12 @@ void shutdown_kbdfront(struct kbdfront_d void shutdown_kbdfront(struct kbdfront_dev *dev); -struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int height, int depth, int line_length, int mem_length); +struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n); #ifdef HAVE_LIBC int fbfront_open(struct fbfront_dev *dev); #endif void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height); +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset); void shutdown_fbfront(struct fbfront_dev *dev); diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/include/hypervisor.h Thu Apr 24 14:08:29 2008 -0600 @@ -24,6 +24,7 @@ #else #error "Unsupported architecture" #endif +#include <traps.h> /* * a placeholder for the start of day information passed up from the hypervisor @@ -37,7 +38,8 @@ extern union start_info_union start_info #define start_info (start_info_union.start_info) /* hypervisor.c */ -//void do_hypervisor_callback(struct pt_regs *regs); +void force_evtchn_callback(void); +void do_hypervisor_callback(struct pt_regs *regs); void mask_evtchn(u32 port); void unmask_evtchn(u32 port); void clear_evtchn(u32 port); diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/ia64/arch_mm.h --- a/extras/mini-os/include/ia64/arch_mm.h Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/include/ia64/arch_mm.h Thu Apr 24 14:08:29 2008 -0600 @@ -38,6 +38,6 @@ #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0) /* TODO */ #define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0) -#define do_map_zero(start, n) ((void)0) +#define do_map_zero(start, n) ASSERT(n == 0) #endif /* __ARCH_MM_H__ */ diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/ia64/os.h --- a/extras/mini-os/include/ia64/os.h Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/include/ia64/os.h Thu Apr 24 14:08:29 2008 -0600 @@ -189,17 +189,6 @@ __synch_cmpxchg(volatile void *ptr, uint return ia64_cmpxchg_acq_64(ptr, old, new); } -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -static inline void -force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0, NULL); -} - extern shared_info_t *HYPERVISOR_shared_info; static inline int diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/lib.h --- a/extras/mini-os/include/lib.h Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/include/lib.h Thu Apr 24 14:08:29 2008 -0600 @@ -187,6 +187,7 @@ int alloc_fd(enum fd_type type); int alloc_fd(enum fd_type type); void close_all_files(void); extern struct thread *main_thread; +void sparse(unsigned long data, size_t size); #endif #endif /* _LIB_H_ */ diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/mm.h --- a/extras/mini-os/include/mm.h Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/include/mm.h Thu Apr 24 14:08:29 2008 -0600 @@ -70,4 +70,6 @@ extern unsigned long heap, brk, heap_map extern unsigned long heap, brk, heap_mapped, heap_end; #endif +int free_physical_pages(xen_pfn_t *mfns, int n); + #endif /* _MM_H_ */ diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/x86/os.h --- a/extras/mini-os/include/x86/os.h Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/include/x86/os.h Thu Apr 24 14:08:29 2008 -0600 @@ -28,7 +28,6 @@ extern void do_exit(void) __attribute__( #include <xen/xen.h> -#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0)) #define __KERNEL_CS FLAT_KERNEL_CS #define __KERNEL_DS FLAT_KERNEL_DS diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/kernel.c Thu Apr 24 14:08:29 2008 -0600 @@ -297,9 +297,20 @@ static void fbfront_thread(void *p) { size_t line_length = WIDTH * (DEPTH / 8); size_t memsize = HEIGHT * line_length; - + unsigned long *mfns; + int i, n = (memsize + PAGE_SIZE-1) / PAGE_SIZE; + + memsize = n * PAGE_SIZE; fb = _xmalloc(memsize, PAGE_SIZE); - fb_dev = init_fbfront(NULL, fb, WIDTH, HEIGHT, DEPTH, line_length, memsize); + mfns = xmalloc_array(unsigned long, n); + for (i = 0; i < n; i++) { + /* trigger CoW */ + ((char *) fb) [i * PAGE_SIZE] = 0; + barrier(); + mfns[i] = virtual_to_mfn((char *) fb + i * PAGE_SIZE); + } + fb_dev = init_fbfront(NULL, mfns, WIDTH, HEIGHT, DEPTH, line_length, n); + xfree(mfns); if (!fb_dev) { xfree(fb); return; diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/lib/sys.c --- a/extras/mini-os/lib/sys.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/lib/sys.c Thu Apr 24 14:08:29 2008 -0600 @@ -1108,6 +1108,41 @@ int munmap(void *start, size_t length) return 0; } +void sparse(unsigned long data, size_t size) +{ + unsigned long newdata; + xen_pfn_t *mfns; + int i, n; + + newdata = (data + PAGE_SIZE - 1) & PAGE_MASK; + if (newdata - data > size) + return; + size -= newdata - data; + data = newdata; + n = size / PAGE_SIZE; + size = n * PAGE_SIZE; + + mfns = malloc(n * sizeof(*mfns)); + for (i = 0; i < n; i++) { +#ifdef LIBC_DEBUG + int j; + for (j=0; j<PAGE_SIZE; j++) + if (((char*)data + i * PAGE_SIZE)[j]) { + printk("%lx is not zero!\n", data + i * PAGE_SIZE + j); + exit(1); + } +#endif + mfns[i] = virtual_to_mfn(data + i * PAGE_SIZE); + } + + printk("sparsing %ldMB at %lx\n", size >> 20, data); + + munmap((void *) data, size); + free_physical_pages(mfns, n); + do_map_zero(data, n); +} + + /* Not supported by FS yet. */ unsupported_function_crash(link); unsupported_function(int, readlink, -1); diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/main.c --- a/extras/mini-os/main.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/main.c Thu Apr 24 14:08:29 2008 -0600 @@ -39,6 +39,7 @@ void _fini(void) { } +extern char __app_bss_start, __app_bss_end; static void call_main(void *p) { char *args, /**path,*/ *msg, *c; @@ -56,6 +57,7 @@ static void call_main(void *p) * crashing. */ //sleep(1); + sparse((unsigned long) &__app_bss_start, &__app_bss_end - &__app_bss_start); start_networking(); init_fs_frontend(); diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/mm.c --- a/extras/mini-os/mm.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/mm.c Thu Apr 24 14:08:29 2008 -0600 @@ -36,6 +36,7 @@ #include <os.h> #include <hypervisor.h> +#include <xen/memory.h> #include <mm.h> #include <types.h> #include <lib.h> @@ -360,6 +361,17 @@ void free_pages(void *pointer, int order } +int free_physical_pages(xen_pfn_t *mfns, int n) +{ + struct xen_memory_reservation reservation; + + set_xen_guest_handle(reservation.extent_start, mfns); + reservation.nr_extents = n; + reservation.extent_order = 0; + reservation.domid = DOMID_SELF; + return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); +} + #ifdef HAVE_LIBC void *sbrk(ptrdiff_t increment) { diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/sched.c --- a/extras/mini-os/sched.c Thu Apr 24 14:02:16 2008 -0600 +++ b/extras/mini-os/sched.c Thu Apr 24 14:08:29 2008 -0600 @@ -70,62 +70,15 @@ void inline print_runqueue(void) printk("\n"); } -/* Find the time when the next timeout expires. If this is more than - 10 seconds from now, return 10 seconds from now. */ -static s_time_t blocking_time(void) -{ - struct thread *thread; - struct list_head *iterator; - s_time_t min_wakeup_time; - unsigned long flags; - local_irq_save(flags); - /* default-block the domain for 10 seconds: */ - min_wakeup_time = NOW() + SECONDS(10); - - /* Thread list needs to be protected */ - list_for_each(iterator, &idle_thread->thread_list) - { - thread = list_entry(iterator, struct thread, thread_list); - if(!is_runnable(thread) && thread->wakeup_time != 0LL) - { - if(thread->wakeup_time < min_wakeup_time) - { - min_wakeup_time = thread->wakeup_time; - } - } - } - local_irq_restore(flags); - return(min_wakeup_time); -} - -/* Wake up all threads with expired timeouts. */ -static void wake_expired(void) -{ - struct thread *thread; - struct list_head *iterator; - s_time_t now = NOW(); - unsigned long flags; - local_irq_save(flags); - /* Thread list needs to be protected */ - list_for_each(iterator, &idle_thread->thread_list) - { - thread = list_entry(iterator, struct thread, thread_list); - if(!is_runnable(thread) && thread->wakeup_time != 0LL) - { - if(thread->wakeup_time <= now) - wake(thread); - } - } - local_irq_restore(flags); -} - void schedule(void) { struct thread *prev, *next, *thread; struct list_head *iterator; unsigned long flags; + prev = current; local_irq_save(flags); + if (in_callback) { printk("Must not call schedule() from a callback\n"); BUG(); @@ -134,6 +87,45 @@ void schedule(void) printk("Must not call schedule() with IRQs disabled\n"); BUG(); } + + do { + /* Examine all threads. + Find a runnable thread, but also wake up expired ones and find the + time when the next timeout expires, else use 10 seconds. */ + s_time_t now = NOW(); + s_time_t min_wakeup_time = now + SECONDS(10); + next = NULL; + list_for_each(iterator, &idle_thread->thread_list) + { + thread = list_entry(iterator, struct thread, thread_list); + if (!is_runnable(thread) && thread->wakeup_time != 0LL) + { + if (thread->wakeup_time <= now) + wake(thread); + else if (thread->wakeup_time < min_wakeup_time) + min_wakeup_time = thread->wakeup_time; + } + if(is_runnable(thread)) + { + next = thread; + /* Put this thread on the end of the list */ + list_del(&thread->thread_list); + list_add_tail(&thread->thread_list, &idle_thread->thread_list); + break; + } + } + if (next) + break; + /* block until the next timeout expires, or for 10 secs, whichever comes first */ + block_domain(min_wakeup_time); + /* handle pending events if any */ + force_evtchn_callback(); + } while(1); + local_irq_restore(flags); + /* Interrupting the switch is equivalent to having the next thread + inturrupted at the return instruction. And therefore at safe point. */ + if(prev != next) switch_threads(prev, next); + list_for_each(iterator, &exited_threads) { thread = list_entry(iterator, struct thread, thread_list); @@ -144,24 +136,6 @@ void schedule(void) xfree(thread); } } - next = idle_thread; - /* Thread list needs to be protected */ - list_for_each(iterator, &idle_thread->thread_list) - { - thread = list_entry(iterator, struct thread, thread_list); - if(is_runnable(thread)) - { - next = thread; - /* Put this thread on the end of the list */ - list_del(&thread->thread_list); - list_add_tail(&thread->thread_list, &idle_thread->thread_list); - break; - } - } - local_irq_restore(flags); - /* Interrupting the switch is equivalent to having the next thread - inturrupted at the return instruction. And therefore at safe point. */ - if(prev != next) switch_threads(prev, next); } struct thread* create_thread(char *name, void (*function)(void *), void *data) @@ -267,32 +241,10 @@ void wake(struct thread *thread) void idle_thread_fn(void *unused) { - s_time_t until; threads_started = 1; - unsigned long flags; - struct list_head *iterator; - struct thread *next, *thread; - for(;;) - { - schedule(); - next = NULL; - local_irq_save(flags); - list_for_each(iterator, &idle_thread->thread_list) - { - thread = list_entry(iterator, struct thread, thread_list); - if(is_runnable(thread)) - { - next = thread; - break; - } - } - if (!next) { - /* block until the next timeout expires, or for 10 secs, whichever comes first */ - until = blocking_time(); - block_domain(until); - } - local_irq_restore(flags); - wake_expired(); + while (1) { + block(current); + schedule(); } } diff -r 239b44eeb2d6 -r dc510776dd59 tools/blktap/drivers/blktapctrl.c --- a/tools/blktap/drivers/blktapctrl.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/blktap/drivers/blktapctrl.c Thu Apr 24 14:08:29 2008 -0600 @@ -474,9 +474,8 @@ static int read_msg(int fd, int msgtype, } -int launch_tapdisk(char *wrctldev, char *rdctldev) -{ - char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL }; +static int launch_tapdisk_provider(char **argv) +{ pid_t child; if ((child = fork()) < 0) @@ -490,7 +489,9 @@ int launch_tapdisk(char *wrctldev, char i != STDERR_FILENO) close(i); - execvp("tapdisk", argv); + execvp(argv[0], argv); + DPRINTF("execvp failed: %d (%s)\n", errno, strerror(errno)); + DPRINTF("PATH = %s\n", getenv("PATH")); _exit(1); } else { pid_t got; @@ -498,28 +499,78 @@ int launch_tapdisk(char *wrctldev, char got = waitpid(child, NULL, 0); } while (got != child); } + return child; +} + +static int launch_tapdisk(char *wrctldev, char *rdctldev) +{ + char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL }; + + if (launch_tapdisk_provider(argv) < 0) + return -1; + return 0; } -/* Connect to qemu-dm */ -static int connect_qemu(blkif_t *blkif) +static int launch_tapdisk_ioemu(void) +{ + char *argv[] = { "tapdisk-ioemu", NULL }; + return launch_tapdisk_provider(argv); +} + +/* + * Connect to an ioemu based disk provider (qemu-dm or tapdisk-ioemu) + * + * If the domain has a device model, connect to qemu-dm through the + * domain specific pipe. Otherwise use a single tapdisk-ioemu instance + * which is represented by domid 0 and provides access for Dom0 and + * all DomUs without device model. + */ +static int connect_qemu(blkif_t *blkif, int domid) { char *rdctldev, *wrctldev; - - if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", - blkif->domid) < 0) - return -1; - - if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", - blkif->domid) < 0) { + + static int tapdisk_ioemu_pid = 0; + static int dom0_readfd = 0; + static int dom0_writefd = 0; + + if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) < 0) + return -1; + + if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) < 0) { free(rdctldev); return -1; } DPRINTF("Using qemu blktap pipe: %s\n", rdctldev); - blkif->fds[READ] = open_ctrl_socket(wrctldev); - blkif->fds[WRITE] = open_ctrl_socket(rdctldev); + if (domid == 0) { + /* + * tapdisk-ioemu exits as soon as the last image is + * disconnected. Check if it is still running. + */ + if (tapdisk_ioemu_pid == 0 || kill(tapdisk_ioemu_pid, 0)) { + /* No device model and tapdisk-ioemu doesn't run yet */ + DPRINTF("Launching tapdisk-ioemu\n"); + tapdisk_ioemu_pid = launch_tapdisk_ioemu(); + + dom0_readfd = open_ctrl_socket(wrctldev); + dom0_writefd = open_ctrl_socket(rdctldev); + } + + DPRINTF("Using tapdisk-ioemu connection\n"); + blkif->fds[READ] = dom0_readfd; + blkif->fds[WRITE] = dom0_writefd; + } else if (access(rdctldev, R_OK | W_OK) == 0) { + /* Use existing pipe to the device model */ + DPRINTF("Using qemu-dm connection\n"); + blkif->fds[READ] = open_ctrl_socket(wrctldev); + blkif->fds[WRITE] = open_ctrl_socket(rdctldev); + } else { + /* No device model => try with tapdisk-ioemu */ + DPRINTF("No device model\n"); + connect_qemu(blkif, 0); + } free(rdctldev); free(wrctldev); @@ -599,7 +650,7 @@ int blktapctrl_new_blkif(blkif_t *blkif) if (!exist) { if (type == DISK_TYPE_IOEMU) { - if (connect_qemu(blkif)) + if (connect_qemu(blkif, blkif->domid)) goto fail; } else { if (connect_tapdisk(blkif, minor)) diff -r 239b44eeb2d6 -r dc510776dd59 tools/blktap/drivers/tapdisk.h --- a/tools/blktap/drivers/tapdisk.h Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/blktap/drivers/tapdisk.h Thu Apr 24 14:08:29 2008 -0600 @@ -235,7 +235,7 @@ static disk_info_t ioemu_disk = { DISK_TYPE_IOEMU, "ioemu disk", "ioemu", - 0, + 1, #ifdef TAPDISK NULL #endif diff -r 239b44eeb2d6 -r dc510776dd59 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/console/daemon/io.c Thu Apr 24 14:08:29 2008 -0600 @@ -63,6 +63,7 @@ extern int log_time_hv; extern int log_time_hv; extern int log_time_guest; extern char *log_dir; +extern int discard_overflowed_data; static int log_time_hv_needts = 1; static int log_time_guest_needts = 1; @@ -201,7 +202,7 @@ static void buffer_append(struct domain dom->domid, errno, strerror(errno)); } - if (buffer->max_capacity && + if (discard_overflowed_data && buffer->max_capacity && buffer->size > buffer->max_capacity) { /* Discard the middle of the data. */ @@ -228,6 +229,11 @@ static void buffer_advance(struct buffer if (buffer->consumed == buffer->size) { buffer->consumed = 0; buffer->size = 0; + if (buffer->max_capacity && + buffer->capacity > buffer->max_capacity) { + buffer->data = realloc(buffer->data, buffer->max_capacity); + buffer->capacity = buffer->max_capacity; + } } } @@ -1005,9 +1011,13 @@ void handle_io(void) d->next_period < next_timeout) next_timeout = d->next_period; } else if (d->xce_handle != -1) { - int evtchn_fd = xc_evtchn_fd(d->xce_handle); - FD_SET(evtchn_fd, &readfds); - max_fd = MAX(evtchn_fd, max_fd); + if (discard_overflowed_data || + !d->buffer.max_capacity || + d->buffer.size < d->buffer.max_capacity) { + int evtchn_fd = xc_evtchn_fd(d->xce_handle); + FD_SET(evtchn_fd, &readfds); + max_fd = MAX(evtchn_fd, max_fd); + } } if (d->master_fd != -1) { diff -r 239b44eeb2d6 -r dc510776dd59 tools/console/daemon/main.c --- a/tools/console/daemon/main.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/console/daemon/main.c Thu Apr 24 14:08:29 2008 -0600 @@ -38,6 +38,7 @@ int log_time_hv = 0; int log_time_hv = 0; int log_time_guest = 0; char *log_dir = NULL; +int discard_overflowed_data = 1; static void handle_hup(int sig) { @@ -46,7 +47,7 @@ static void handle_hup(int sig) static void usage(char *name) { - printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] [--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all]\n", name); + printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] [--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all] [-o, --overflow-data=discard|keep]\n", name); } static void version(char *name) @@ -56,7 +57,7 @@ static void version(char *name) int main(int argc, char **argv) { - const char *sopts = "hVvit:"; + const char *sopts = "hVvit:o:"; struct option lopts[] = { { "help", 0, 0, 'h' }, { "version", 0, 0, 'V' }, @@ -66,6 +67,7 @@ int main(int argc, char **argv) { "log-dir", 1, 0, 'r' }, { "pid-file", 1, 0, 'p' }, { "timestamp", 1, 0, 't' }, + { "overflow-data", 1, 0, 'o'}, { 0 }, }; bool is_interactive = false; @@ -119,6 +121,13 @@ int main(int argc, char **argv) log_time_hv = 0; } break; + case 'o': + if (!strcmp(optarg, "keep")) { + discard_overflowed_data = 0; + } else if (!strcmp(optarg, "discard")) { + discard_overflowed_data = 1; + } + break; case '?': fprintf(stderr, "Try `%s --help' for more information\n", diff -r 239b44eeb2d6 -r dc510776dd59 tools/examples/blktap --- a/tools/examples/blktap Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/examples/blktap Thu Apr 24 14:08:29 2008 -0600 @@ -54,10 +54,6 @@ check_blktap_sharing() echo 'ok' } -FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id") -FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm") -mode=$(xenstore_read "$XENBUS_PATH/mode") -mode=$(canonicalise_mode "$mode") t=$(xenstore_read_default "$XENBUS_PATH/type" 'MISSING') if [ -n "$t" ] @@ -77,15 +73,21 @@ else file="$p" fi -if [ "$mode" != '!' ] -then - result=$(check_blktap_sharing "$file" "$mode") - [ "$result" = 'ok' ] || ebusy "$file already in use by other domain" -fi - if [ "$command" = 'add' ] then [ -e "$file" ] || { fatal $file does not exist; } + + FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id") + FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm") + mode=$(xenstore_read "$XENBUS_PATH/mode") + mode=$(canonicalise_mode "$mode") + + if [ "$mode" != '!' ] + then + result=$(check_blktap_sharing "$file" "$mode") + [ "$result" = 'ok' ] || ebusy "$file already in use by other domain" + fi + success fi diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/firmware/hvmloader/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -28,8 +28,9 @@ LOADADDR = 0x100000 CFLAGS += $(CFLAGS_include) -I. -SRCS = hvmloader.c mp_tables.c util.c smbios.c 32bitbios_support.c smp.c -OBJS = $(patsubst %.c,%.o,$(SRCS)) +SRCS = hvmloader.c mp_tables.c util.c smbios.c +SRCS += 32bitbios_support.c smp.c cacheattr.c +OBJS = $(patsubst %.c,%.o,$(SRCS)) .PHONY: all all: hvmloader diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/acpi/build.c --- a/tools/firmware/hvmloader/acpi/build.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/firmware/hvmloader/acpi/build.c Thu Apr 24 14:08:29 2008 -0600 @@ -84,8 +84,8 @@ static int construct_bios_info_table(uin bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS); - bios_info->pci_min = 0xf0000000; - bios_info->pci_len = 0x0c000000; + bios_info->pci_min = PCI_MEMBASE; + bios_info->pci_len = PCI_MEMSIZE; return align16(sizeof(*bios_info)); } diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/cacheattr.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/firmware/hvmloader/cacheattr.c Thu Apr 24 14:08:29 2008 -0600 @@ -0,0 +1,99 @@ +/* + * cacheattr.c: MTRR and PAT initialisation. + * + * Copyright (c) 2008, Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir.fraser@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include "util.h" +#include "config.h" + +#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) +#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) +#define MSR_MTRRcap 0x00fe +#define MSR_MTRRfix64K_00000 0x0250 +#define MSR_MTRRfix16K_80000 0x0258 +#define MSR_MTRRfix16K_A0000 0x0259 +#define MSR_MTRRfix4K_C0000 0x0268 +#define MSR_MTRRfix4K_C8000 0x0269 +#define MSR_MTRRfix4K_D0000 0x026a +#define MSR_MTRRfix4K_D8000 0x026b +#define MSR_MTRRfix4K_E0000 0x026c +#define MSR_MTRRfix4K_E8000 0x026d +#define MSR_MTRRfix4K_F0000 0x026e +#define MSR_MTRRfix4K_F8000 0x026f +#define MSR_PAT 0x0277 +#define MSR_MTRRdefType 0x02ff + +void cacheattr_init(void) +{ + uint32_t eax, ebx, ecx, edx; + uint64_t mtrr_cap, mtrr_def, content, addr_mask; + unsigned int i, nr_var_ranges, phys_bits = 36; + + /* Does the CPU support architectural MTRRs? */ + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + if ( !(edx & (1u << 12)) ) + return; + + /* Find the physical address size for this CPU. */ + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + if ( eax >= 0x80000008 ) + { + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + phys_bits = (uint8_t)eax; + } + + printf("%u-bit phys ... ", phys_bits); + + addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1); + mtrr_cap = rdmsr(MSR_MTRRcap); + mtrr_def = (1u << 11) | 6; /* E, default type WB */ + + /* Fixed-range MTRRs supported? */ + if ( mtrr_cap & (1u << 8) ) + { + /* 0x00000-0x9ffff: Write Back (WB) */ + content = 0x0606060606060606ull; + wrmsr(MSR_MTRRfix64K_00000, content); + wrmsr(MSR_MTRRfix16K_80000, content); + /* 0xa0000-0xbffff: Write Combining (WC) */ + if ( mtrr_cap & (1u << 10) ) /* WC supported? */ + content = 0x0101010101010101ull; + wrmsr(MSR_MTRRfix16K_A0000, content); + /* 0xc0000-0xfffff: Write Back (WB) */ + content = 0x0606060606060606ull; + for ( i = 0; i < 8; i++ ) + wrmsr(MSR_MTRRfix4K_C0000 + i, content); + mtrr_def |= 1u << 10; /* FE */ + printf("fixed MTRRs ... "); + } + + /* Variable-range MTRRs supported? */ + nr_var_ranges = (uint8_t)mtrr_cap; + if ( nr_var_ranges != 0 ) + { + /* A single UC range covering PCI space. */ + wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE); + wrmsr(MSR_MTRRphysMask(0), + ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11)); + printf("var MTRRs ... "); + } + + wrmsr(MSR_MTRRdefType, mtrr_def); +} diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/config.h --- a/tools/firmware/hvmloader/config.h Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/firmware/hvmloader/config.h Thu Apr 24 14:08:29 2008 -0600 @@ -10,6 +10,9 @@ #define PCI_ISA_DEVFN 0x08 /* dev 1, fn 0 */ #define PCI_ISA_IRQ_MASK 0x0c20U /* ISA IRQs 5,10,11 are PCI connected */ + +#define PCI_MEMBASE 0xf0000000 +#define PCI_MEMSIZE 0x0c000000 #define ROMBIOS_SEG 0xF000 #define ROMBIOS_BEGIN 0x000F0000 diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/hvmloader.c --- a/tools/firmware/hvmloader/hvmloader.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/firmware/hvmloader/hvmloader.c Thu Apr 24 14:08:29 2008 -0600 @@ -96,6 +96,7 @@ asm ( "stack: \n" " .skip 0x4000 \n" "stack_top: \n" + " .text \n" ); void smp_initialise(void); @@ -158,7 +159,7 @@ static void pci_setup(void) struct resource { uint32_t base, max; } *resource; - struct resource mem_resource = { 0xf0000000, 0xfc000000 }; + struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE }; struct resource io_resource = { 0xc000, 0x10000 }; /* Create a list of device BARs in descending order of size. */ diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/smp.c --- a/tools/firmware/hvmloader/smp.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/firmware/hvmloader/smp.c Thu Apr 24 14:08:29 2008 -0600 @@ -66,12 +66,15 @@ asm ( "stack: \n" " .skip 0x4000 \n" "stack_top: \n" + " .text \n" ); + +extern void cacheattr_init(void); /*static*/ void ap_start(void) { printf(" - CPU%d ... ", ap_cpuid); - + cacheattr_init(); printf("done.\n"); wmb(); ap_callin = 1; @@ -121,12 +124,10 @@ void smp_initialise(void) { unsigned int i, nr_cpus = get_vcpu_nr(); - if ( nr_cpus <= 1 ) - return; - memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start); printf("Multiprocessor initialisation:\n"); + ap_start(); for ( i = 1; i < nr_cpus; i++ ) boot_cpu(i); } diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/Makefile --- a/tools/ioemu/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -87,7 +87,7 @@ endif install: all $(if $(BUILD_DOCS),install-doc) mkdir -p "$(DESTDIR)$(bindir)" - $(INSTALL) -m 755 -s $(TOOLS) "$(DESTDIR)$(prefix)/sbin" + $(INSTALL) -m 755 $(TOOLS) "$(DESTDIR)$(SBINDIR)" # mkdir -p "$(DESTDIR)$(datadir)" # for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \ # video.x openbios-sparc32 linux_boot.bin pxe-ne2k_pci.bin \ diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/cirrus_vga.c --- a/tools/ioemu/hw/cirrus_vga.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/hw/cirrus_vga.c Thu Apr 24 14:08:29 2008 -0600 @@ -2595,6 +2595,10 @@ static void *set_vram_mapping(unsigned l memset(vram_pointer, 0, nr_extents * TARGET_PAGE_SIZE); +#ifdef CONFIG_STUBDOM + xenfb_pv_display_start(vram_pointer); +#endif + free(extent_start); return vram_pointer; diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/pci.c --- a/tools/ioemu/hw/pci.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/hw/pci.c Thu Apr 24 14:08:29 2008 -0600 @@ -79,18 +79,30 @@ int pci_bus_num(PCIBus *s) void pci_device_save(PCIDevice *s, QEMUFile *f) { - qemu_put_be32(f, 1); /* PCI device version */ + uint8_t irq_state = 0; + int i; + qemu_put_be32(f, 2); /* PCI device version */ qemu_put_buffer(f, s->config, 256); + for (i = 0; i < 4; i++) + irq_state |= !!s->irq_state[i] << i; + qemu_put_buffer(f, &irq_state, 1); } int pci_device_load(PCIDevice *s, QEMUFile *f) { uint32_t version_id; version_id = qemu_get_be32(f); - if (version_id != 1) + if (version_id != 1 && version_id != 2) return -EINVAL; qemu_get_buffer(f, s->config, 256); pci_update_mappings(s); + if (version_id == 2) { + uint8_t irq_state; + int i; + qemu_get_buffer(f, &irq_state, 1); + for (i = 0; i < 4; i++) + pci_set_irq(s, i, !!(irq_state >> i)); + } return 0; } diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/hw/vga.c Thu Apr 24 14:08:29 2008 -0600 @@ -2067,8 +2067,8 @@ void vga_common_init(VGAState *s, Displa & ~(TARGET_PAGE_SIZE - 1)); /* Video RAM must be 128-bit aligned for SSE optimizations later */ - s->vram_alloc = qemu_malloc(vga_ram_size + 15); - s->vram_ptr = (uint8_t *)((long)(s->vram_alloc + 15) & ~15L); + /* and page-aligned for PVFB memory sharing */ + s->vram_ptr = s->vram_alloc = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size); s->vram_offset = vga_ram_offset; s->vram_size = vga_ram_size; @@ -2210,7 +2210,7 @@ void *vga_update_vram(VGAState *s, void } if (!vga_ram_base) { - vga_ram_base = qemu_malloc(vga_ram_size + TARGET_PAGE_SIZE + 1); + vga_ram_base = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size + TARGET_PAGE_SIZE + 1); if (!vga_ram_base) { fprintf(stderr, "reallocate error\n"); return NULL; diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/xen_blktap.c --- a/tools/ioemu/hw/xen_blktap.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/hw/xen_blktap.c Thu Apr 24 14:08:29 2008 -0600 @@ -581,17 +581,13 @@ static void handle_blktap_ctrlmsg(void* */ static int open_ctrl_socket(char *devname) { - int ret; int ipc_fd; if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0) DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR); - ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO); - if ( (ret != 0) && (errno != EEXIST) ) { - DPRINTF("ERROR: pipe failed (%d)\n", errno); + if (access(devname, R_OK | W_OK)) return -1; - } ipc_fd = open(devname,O_RDWR|O_NONBLOCK); @@ -601,42 +597,6 @@ static int open_ctrl_socket(char *devnam } return ipc_fd; -} - -/** - * Unmaps all disks and closes their pipes - */ -void shutdown_blktap(void) -{ - fd_list_entry_t *ptr; - struct td_state *s; - char *devname; - - DPRINTF("Shutdown blktap\n"); - - /* Unmap all disks */ - ptr = fd_start; - while (ptr != NULL) { - s = ptr->s; - unmap_disk(s); - close(ptr->tap_fd); - ptr = ptr->next; - } - - /* Delete control pipes */ - if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) { - DPRINTF("Delete %s\n", devname); - if (unlink(devname)) - DPRINTF("Could not delete: %s\n", strerror(errno)); - free(devname); - } - - if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { - DPRINTF("Delete %s\n", devname); - if (unlink(devname)) - DPRINTF("Could not delete: %s\n", strerror(errno)); - free(devname); - } } /** @@ -679,8 +639,5 @@ int init_blktap(void) /* Attach a handler to the read pipe (called from qemu main loop) */ qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL); - /* Register handler to clean up when the domain is destroyed */ - atexit(&shutdown_blktap); - return 0; } diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/xenfb.c --- a/tools/ioemu/hw/xenfb.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/hw/xenfb.c Thu Apr 24 14:08:29 2008 -0600 @@ -1235,14 +1235,10 @@ static struct semaphore kbd_sem = __SEMA static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0); static struct kbdfront_dev *kbd_dev; static char *kbd_path, *fb_path; +static void *vga_vram, *nonshared_vram; +static DisplayState *xenfb_ds; static unsigned char linux2scancode[KEY_MAX + 1]; - -#define WIDTH 1024 -#define HEIGHT 768 -#define DEPTH 32 -#define LINESIZE (1280 * (DEPTH / 8)) -#define MEMSIZE (LINESIZE * HEIGHT) int xenfb_connect_vkbd(const char *path) { @@ -1256,33 +1252,73 @@ int xenfb_connect_vfb(const char *path) return 0; } -static void xenfb_pv_update(DisplayState *s, int x, int y, int w, int h) -{ - struct fbfront_dev *fb_dev = s->opaque; +static void xenfb_pv_update(DisplayState *ds, int x, int y, int w, int h) +{ + struct fbfront_dev *fb_dev = ds->opaque; + if (!fb_dev) + return; fbfront_update(fb_dev, x, y, w, h); } -static void xenfb_pv_resize(DisplayState *s, int w, int h, int linesize) -{ - struct fbfront_dev *fb_dev = s->opaque; - fprintf(stderr,"resize to %dx%d required\n", w, h); - s->width = w; - s->height = h; - /* TODO: send resize event if supported */ - memset(s->data, 0, MEMSIZE); - fbfront_update(fb_dev, 0, 0, WIDTH, HEIGHT); +static void xenfb_pv_resize(DisplayState *ds, int w, int h, int linesize) +{ + struct fbfront_dev *fb_dev = ds->opaque; + fprintf(stderr,"resize to %dx%d, %d required\n", w, h, linesize); + ds->width = w; + ds->height = h; + if (!linesize) + ds->shared_buf = 0; + if (!ds->shared_buf) + linesize = w * 4; + ds->linesize = linesize; + if (!fb_dev) + return; + if (ds->shared_buf) { + ds->data = NULL; + } else { + ds->data = nonshared_vram; + fbfront_resize(fb_dev, w, h, linesize, ds->depth, VGA_RAM_SIZE); + } } static void xenfb_pv_colourdepth(DisplayState *ds, int depth) { - /* TODO: send redepth event if supported */ + struct fbfront_dev *fb_dev = ds->opaque; static int lastdepth = -1; + if (!depth) { + ds->shared_buf = 0; + ds->depth = 32; + } else { + ds->shared_buf = 1; + ds->depth = depth; + } if (depth != lastdepth) { fprintf(stderr,"redepth to %d required\n", depth); lastdepth = depth; + } else return; + if (!fb_dev) + return; + if (ds->shared_buf) { + ds->data = NULL; + } else { + ds->data = nonshared_vram; + fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, VGA_RAM_SIZE); } - /* We can't redepth for now */ - ds->depth = DEPTH; +} + +static void xenfb_pv_setdata(DisplayState *ds, void *pixels) +{ + struct fbfront_dev *fb_dev = ds->opaque; + int offset = pixels - vga_vram; + ds->data = pixels; + if (!fb_dev) + return; + fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, offset); +} + +static void xenfb_pv_refresh(DisplayState *ds) +{ + vga_hw_update(); } static void xenfb_kbd_handler(void *opaque) @@ -1373,13 +1409,6 @@ static void xenfb_kbd_handler(void *opaq } } -static void xenfb_pv_refresh(DisplayState *ds) -{ - /* always request negociation */ - ds->depth = -1; - vga_hw_update(); -} - static void kbdfront_thread(void *p) { int scancode, keycode; @@ -1399,40 +1428,72 @@ static void kbdfront_thread(void *p) int xenfb_pv_display_init(DisplayState *ds) { - void *data; + if (!fb_path || !kbd_path) + return -1; + + create_thread("kbdfront", kbdfront_thread, (void*) kbd_path); + + xenfb_ds = ds; + + ds->data = nonshared_vram = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE); + memset(ds->data, 0, VGA_RAM_SIZE); + ds->depth = 32; + ds->bgr = 0; + ds->width = 640; + ds->height = 400; + ds->linesize = 640 * 4; + ds->dpy_update = xenfb_pv_update; + ds->dpy_resize = xenfb_pv_resize; + ds->dpy_colourdepth = xenfb_pv_colourdepth; + ds->dpy_setdata = xenfb_pv_setdata; + ds->dpy_refresh = xenfb_pv_refresh; + return 0; +} + +int xenfb_pv_display_start(void *data) +{ + DisplayState *ds = xenfb_ds; struct fbfront_dev *fb_dev; int kbd_fd; + int offset = 0; + unsigned long *mfns; + int n = VGA_RAM_SIZE / PAGE_SIZE; + int i; if (!fb_path || !kbd_path) - return -1; - - create_thread("kbdfront", kbdfront_thread, (void*) kbd_path); - - data = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE); - fb_dev = init_fbfront(fb_path, data, WIDTH, HEIGHT, DEPTH, LINESIZE, MEMSIZE); + return 0; + + vga_vram = data; + mfns = malloc(2 * n * sizeof(*mfns)); + for (i = 0; i < n; i++) + mfns[i] = virtual_to_mfn(vga_vram + i * PAGE_SIZE); + for (i = 0; i < n; i++) + mfns[n + i] = virtual_to_mfn(nonshared_vram + i * PAGE_SIZE); + + fb_dev = init_fbfront(fb_path, mfns, ds->width, ds->height, ds->depth, ds->linesize, 2 * n); + free(mfns); if (!fb_dev) { fprintf(stderr,"can't open frame buffer\n"); exit(1); } free(fb_path); + if (ds->shared_buf) { + offset = (void*) ds->data - vga_vram; + } else { + offset = VGA_RAM_SIZE; + ds->data = nonshared_vram; + } + if (offset) + fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, offset); + down(&kbd_sem); free(kbd_path); kbd_fd = kbdfront_open(kbd_dev); qemu_set_fd_handler(kbd_fd, xenfb_kbd_handler, NULL, ds); - ds->data = data; - ds->linesize = LINESIZE; - ds->depth = DEPTH; - ds->bgr = 0; - ds->width = WIDTH; - ds->height = HEIGHT; - ds->dpy_update = xenfb_pv_update; - ds->dpy_resize = xenfb_pv_resize; - ds->dpy_colourdepth = xenfb_pv_colourdepth; - ds->dpy_refresh = xenfb_pv_refresh; - ds->opaque = fb_dev; + xenfb_ds->opaque = fb_dev; return 0; } #endif diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/tapdisk-ioemu.c --- a/tools/ioemu/tapdisk-ioemu.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/tapdisk-ioemu.c Thu Apr 24 14:08:29 2008 -0600 @@ -4,6 +4,7 @@ #include <string.h> #include <stdint.h> #include <signal.h> +#include <unistd.h> #include <sys/time.h> #include <assert.h> @@ -15,6 +16,8 @@ extern void bdrv_init(void); extern void *qemu_mallocz(size_t size); extern void qemu_free(void *ptr); + +extern void *fd_start; int domid = 0; FILE* logfile; @@ -95,12 +98,17 @@ int main(void) int max_fd; fd_set rfds; struct timeval tv; + void *old_fd_start = NULL; logfile = stderr; bdrv_init(); qemu_aio_init(); init_blktap(); + + /* Daemonize */ + if (fork() != 0) + exit(0); /* * Main loop: Pass events to the corrsponding handlers and check for @@ -137,6 +145,12 @@ int main(void) } else pioh = &ioh->next; } + + /* Exit when the last image has been closed */ + if (old_fd_start != NULL && fd_start == NULL) + exit(0); + + old_fd_start = fd_start; } return 0; } diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/target-i386-dm/helper2.c Thu Apr 24 14:08:29 2008 -0600 @@ -482,7 +482,7 @@ void cpu_handle_ioreq(void *opaque) CPUState *env = opaque; ioreq_t *req = cpu_get_ioreq(); - handle_buffered_io(env); + __handle_buffered_iopage(env); if (req) { __handle_ioreq(env, req); diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/vl.c Thu Apr 24 14:08:29 2008 -0600 @@ -140,9 +140,9 @@ #define MAX_IOPORTS 65536 const char *bios_dir = CONFIG_QEMU_SHAREDIR; -void **ioport_opaque; -IOPortReadFunc *(*ioport_read_table)[MAX_IOPORTS]; -IOPortWriteFunc *(*ioport_write_table)[MAX_IOPORTS]; +void *ioport_opaque[MAX_IOPORTS]; +IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS]; +IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS]; /* Note: bs_table[MAX_DISKS] is a dummy block driver if none available to store the VM snapshots */ BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS + 1], *fd_table[MAX_FD]; @@ -281,9 +281,6 @@ void default_ioport_writel(void *opaque, void init_ioports(void) { - ioport_opaque = calloc(MAX_IOPORTS, sizeof(*ioport_opaque)); - ioport_read_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_read_table)); - ioport_write_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_write_table)); } /* size is the word size in byte */ @@ -6276,12 +6273,6 @@ void qemu_system_powerdown_request(void) powerdown_requested = 1; if (cpu_single_env) cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT); -} - -static void qemu_sighup_handler(int signal) -{ - fprintf(stderr, "Received SIGHUP, terminating.\n"); - exit(0); } void main_loop_wait(int timeout) @@ -7979,7 +7970,7 @@ int main(int argc, char **argv) #ifndef CONFIG_STUBDOM /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */ - signal(SIGHUP, qemu_sighup_handler); + signal(SIGHUP, SIG_DFL); sigemptyset(&set); sigaddset(&set, SIGTERM); sigaddset(&set, SIGHUP); diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/ioemu/vl.h Thu Apr 24 14:08:29 2008 -0600 @@ -1545,6 +1545,7 @@ char *xenstore_vm_read(int domid, char * /* xenfb.c */ int xenfb_pv_display_init(DisplayState *ds); +int xenfb_pv_display_start(void *vram_start); int xenfb_connect_vkbd(const char *path); int xenfb_connect_vfb(const char *path); diff -r 239b44eeb2d6 -r dc510776dd59 tools/libfsimage/ext2fs/fsys_ext2fs.c --- a/tools/libfsimage/ext2fs/fsys_ext2fs.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/libfsimage/ext2fs/fsys_ext2fs.c Thu Apr 24 14:08:29 2008 -0600 @@ -77,7 +77,52 @@ struct ext2_super_block __u32 s_rev_level; /* Revision level */ __u16 s_def_resuid; /* Default uid for reserved blocks */ __u16 s_def_resgid; /* Default gid for reserved blocks */ - __u32 s_reserved[235]; /* Padding to the end of the block */ + /* + * These fields are for EXT2_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __u32 s_first_ino; /* First non-reserved inode */ + __u16 s_inode_size; /* size of inode structure */ + __u16 s_block_group_nr; /* block group # of this superblock */ + __u32 s_feature_compat; /* compatible feature set */ + __u32 s_feature_incompat; /* incompatible feature set */ + __u32 s_feature_ro_compat; /* readonly-compatible feature set */ + __u8 s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + __u32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT2_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_reserved_gdt_blocks;/* Per group table for online growth */ + /* + * Journaling support valid if EXT2_FEATURE_COMPAT_HAS_JOURNAL set. + */ + __u8 s_journal_uuid[16]; /* uuid of journal superblock */ + __u32 s_journal_inum; /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ + __u32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_jnl_backup_type; /* Default type of journal backup */ + __u16 s_reserved_word_pad; + __u32 s_default_mount_opts; + __u32 s_first_meta_bg; /* First metablock group */ + __u32 s_mkfs_time; /* When the filesystem was created */ + __u32 s_jnl_blocks[17]; /* Backup of the journal inode */ + __u32 s_reserved[172]; /* Padding to the end of the block */ }; struct ext2_group_desc @@ -216,6 +261,9 @@ struct ext2_dir_entry #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) #define EXT2_ADDR_PER_BLOCK_BITS(s) (log2(EXT2_ADDR_PER_BLOCK(s))) +#define EXT2_INODE_SIZE(s) (SUPERBLOCK->s_inode_size) +#define EXT2_INODES_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s)/EXT2_INODE_SIZE(s)) + /* linux/ext2_fs.h */ #define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) /* kind of from ext2/super.c */ @@ -537,7 +585,7 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna gdp = GROUP_DESC; ino_blk = gdp[desc].bg_inode_table + (((current_ino - 1) % (SUPERBLOCK->s_inodes_per_group)) - >> log2 (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode))); + >> log2 (EXT2_INODES_PER_BLOCK (SUPERBLOCK))); #ifdef E2DEBUG printf ("inode table fsblock=%d\n", ino_blk); #endif /* E2DEBUG */ @@ -549,13 +597,12 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna /* reset indirect blocks! */ mapblock2 = mapblock1 = -1; - raw_inode = INODE + - ((current_ino - 1) - & (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode) - 1)); + raw_inode = (struct ext2_inode *)((char *)INODE + + ((current_ino - 1) & (EXT2_INODES_PER_BLOCK (SUPERBLOCK) - 1)) * + EXT2_INODE_SIZE (SUPERBLOCK)); #ifdef E2DEBUG printf ("ipb=%d, sizeof(inode)=%d\n", - (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode)), - sizeof (struct ext2_inode)); + EXT2_INODES_PER_BLOCK (SUPERBLOCK), EXT2_INODE_SIZE (SUPERBLOCK)); printf ("inode=%x, raw_inode=%x\n", INODE, raw_inode); printf ("offset into inode table block=%d\n", (int) raw_inode - (int) INODE); for (i = (unsigned char *) INODE; i <= (unsigned char *) raw_inode; diff -r 239b44eeb2d6 -r dc510776dd59 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/libxc/xc_hvm_build.c Thu Apr 24 14:08:29 2008 -0600 @@ -298,7 +298,7 @@ static int setup_guest(int xc_handle, _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); munmap(ident_pt, PAGE_SIZE); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, - special_page_nr + SPECIALPAGE_IDENT_PT); + (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT); /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */ entry_eip = elf_uval(&elf, elf.ehdr, e_entry); diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/util/acmpolicy.py --- a/tools/python/xen/util/acmpolicy.py Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/python/xen/util/acmpolicy.py Thu Apr 24 14:08:29 2008 -0600 @@ -17,6 +17,7 @@ #============================================================================ import os +import sha import stat import array import struct @@ -35,7 +36,7 @@ ACM_POLICIES_DIR = security.policy_dir_p # Constants needed for generating a binary policy from its XML # representation -ACM_POLICY_VERSION = 3 # Latest one +ACM_POLICY_VERSION = 4 # Latest one ACM_CHWALL_VERSION = 1 ACM_STE_VERSION = 1 @@ -965,6 +966,10 @@ class ACMPolicy(XSPolicy): return dom.toxml() return None + def hash(self): + """ Calculate a SAH1 hash of the XML policy """ + return sha.sha(self.toxml()) + def save(self): ### Save the XML policy into a file ### rc = -xsconstants.XSERR_FILE_ERROR @@ -1403,7 +1408,7 @@ class ACMPolicy(XSPolicy): ste_bin += "\x00" #Write binary header: - headerformat="!iiiiiiiiii" + headerformat="!iiiiiiiiii20s" totallen_bin = struct.calcsize(headerformat) + \ len(pr_bin) + len(chw_bin) + len(ste_bin) polref_offset = struct.calcsize(headerformat) @@ -1425,7 +1430,8 @@ class ACMPolicy(XSPolicy): primpoloffset, secpolcode, secpoloffset, - major, minor) + major, minor, + self.hash().digest()) all_bin = array.array('B') for s in [ hdr_bin, pr_bin, chw_bin, ste_bin ]: @@ -1443,6 +1449,21 @@ class ACMPolicy(XSPolicy): rc = -xsconstants.XSERR_BAD_LABEL return rc, mapfile, all_bin.tostring() + def validate_enforced_policy_hash(self): + """ verify that the policy hash embedded in the binary policy + that is currently enforce matches the one of the XML policy. + """ + if self.hash().digest() != self.get_enforced_policy_hash(): + raise Exception('Policy hashes do not match') + + def get_enforced_policy_hash(self): + binpol = self.get_enforced_binary() + headerformat="!iiiiiiiiii20s" + res = struct.unpack(headerformat, binpol[:60]) + if len(res) >= 11: + return res[10] + return None + def get_enforced_binary(self): rc, binpol = security.hv_get_policy() if rc != 0: diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/python/xen/xend/XendDomain.py Thu Apr 24 14:08:29 2008 -0600 @@ -1622,7 +1622,31 @@ class XendDomain: vcpu) except Exception, ex: raise XendError(str(ex)) - + + def domain_reset(self, domid): + """Terminate domain immediately, and then create domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @rtype: None + @raise XendError: Failed to destroy or create + @raise XendInvalidDomain: Domain is not valid + """ + + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + if dominfo and dominfo.getDomid() == DOM0_ID: + raise XendError("Cannot reset privileged domain %s" % domid) + if dominfo._stateGet() not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + raise VMBadState("Domain '%s' is not started" % domid, + POWER_STATE_NAMES[DOM_STATE_RUNNING], + POWER_STATE_NAMES[dominfo._stateGet()]) + try: + dominfo.resetDomain() + except Exception, ex: + raise XendError(str(ex)) + def instance(): """Singleton constructor. Use this instead of the class constructor. diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Apr 24 14:08:29 2008 -0600 @@ -1837,6 +1837,9 @@ class XendDomainInfo: @raise: VmError for invalid devices """ + if self.image: + self.image.prepareEnvironment() + ordered_refs = self.info.ordered_device_refs() for dev_uuid in ordered_refs: devclass, config = self.info['devices'][dev_uuid] @@ -2323,6 +2326,34 @@ class XendDomainInfo: self._cleanup_phantom_devs(paths) + def resetDomain(self): + log.debug("XendDomainInfo.resetDomain(%s)", str(self.domid)) + + old_domid = self.domid + prev_vm_xend = self._listRecursiveVm('xend') + new_dom_info = self.info + try: + self._unwatchVm() + self.destroy() + + new_dom = None + try: + from xen.xend import XendDomain + new_dom_info['domid'] = None + new_dom = XendDomain.instance().domain_create_from_dict( + new_dom_info) + for x in prev_vm_xend[0][1]: + new_dom._writeVm('xend/%s' % x[0], x[1]) + new_dom.waitForDevices() + new_dom.unpause() + except: + if new_dom: + new_dom.destroy() + raise + except: + log.exception('Failed to reset domain %s.', str(old_domid)) + + def resumeDomain(self): log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid)) diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendXSPolicyAdmin.py --- a/tools/python/xen/xend/XendXSPolicyAdmin.py Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/python/xen/xend/XendXSPolicyAdmin.py Thu Apr 24 14:08:29 2008 -0600 @@ -54,6 +54,7 @@ class XSPolicyAdmin: try: self.xsobjs[ref] = ACMPolicy(name=act_pol_name, ref=ref) self.policies[ref] = (act_pol_name, xsconstants.ACM_POLICY_ID) + self.xsobjs[ref].validate_enforced_policy_hash() except Exception, e: log.error("Could not find XML representation of policy '%s': " "%s" % (act_pol_name,e)) diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/python/xen/xend/image.py Thu Apr 24 14:08:29 2008 -0600 @@ -184,6 +184,42 @@ class ImageHandler: def buildDomain(self): """Build the domain. Define in subclass.""" raise NotImplementedError() + + def prepareEnvironment(self): + """Prepare the environment for the execution of the domain. This + method is called before any devices are set up.""" + + domid = self.vm.getDomid() + + # Delete left-over pipes + try: + os.unlink('/var/run/tap/qemu-read-%d' % domid) + os.unlink('/var/run/tap/qemu-write-%d' % domid) + except: + pass + + # No device model, don't create pipes + if self.device_model is None: + return + + # If we use a device model, the pipes for communication between + # blktapctrl and ioemu must be present before the devices are + # created (blktapctrl must access them for new block devices) + + # mkdir throws an exception if the path already exists + try: + os.mkdir('/var/run/tap', 0755) + except: + pass + + try: + os.mkfifo('/var/run/tap/qemu-read-%d' % domid, 0600) + os.mkfifo('/var/run/tap/qemu-write-%d' % domid, 0600) + except OSError, e: + log.warn('Could not create blktap pipes for domain %d' % domid) + log.exception(e) + pass + # Return a list of cmd line args to the device models based on the # xm config file @@ -411,6 +447,12 @@ class ImageHandler: self.pid = None state = xstransact.Remove("/local/domain/0/device-model/%i" % self.vm.getDomid()) + + try: + os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid()) + os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid()) + except: + pass class LinuxImageHandler(ImageHandler): @@ -643,7 +685,9 @@ class IA64_HVM_ImageHandler(HVMImageHand # ROM size for guest firmware, io page, xenstore page # buffer io page, buffer pio page and memmap info page extra_pages = 1024 + 5 - return mem_kb + extra_pages * page_kb + mem_kb += extra_pages * page_kb + # Add 8 MiB overhead for QEMU's video RAM. + return mem_kb + 8192 def getRequiredInitialReservation(self): return self.vm.getMemoryTarget() diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/python/xen/xm/main.py Thu Apr 24 14:08:29 2008 -0600 @@ -107,6 +107,7 @@ SUBCOMMAND_HELP = { 'Migrate a domain to another machine.'), 'pause' : ('<Domain>', 'Pause execution of a domain.'), 'reboot' : ('<Domain> [-wa]', 'Reboot a domain.'), + 'reset' : ('<Domain>', 'Reset a domain.'), 'restore' : ('<CheckpointFile> [-p]', 'Restore a domain from a saved state.'), 'save' : ('[-c] <Domain> <CheckpointFile>', @@ -274,6 +275,7 @@ common_commands = [ "migrate", "pause", "reboot", + "reset", "restore", "resume", "save", @@ -303,6 +305,7 @@ domain_commands = [ "pause", "reboot", "rename", + "reset", "restore", "resume", "save", @@ -1247,6 +1250,13 @@ def xm_shutdown(args): arg_check(args, "shutdown", 1, 4) from xen.xm import shutdown shutdown.main(["shutdown"] + args) + +def xm_reset(args): + arg_check(args, "reset", 1) + dom = args[0] + + # TODO: XenAPI + server.xend.domain.reset(dom) def xm_pause(args): arg_check(args, "pause", 1) @@ -2474,6 +2484,7 @@ commands = { "dump-core": xm_dump_core, "reboot": xm_reboot, "rename": xm_rename, + "reset": xm_reset, "restore": xm_restore, "resume": xm_resume, "save": xm_save, diff -r 239b44eeb2d6 -r dc510776dd59 tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/tests/test_x86_emulator.c Thu Apr 24 14:08:29 2008 -0600 @@ -26,14 +26,8 @@ static int read( unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - unsigned long addr = offset; - switch ( bytes ) - { - case 1: *val = *(uint8_t *)addr; break; - case 2: *val = *(uint16_t *)addr; break; - case 4: *val = *(uint32_t *)addr; break; - case 8: *val = *(unsigned long *)addr; break; - } + *val = 0; + memcpy(val, (void *)offset, bytes); return X86EMUL_OKAY; } @@ -44,48 +38,19 @@ static int write( unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - unsigned long addr = offset; - switch ( bytes ) - { - case 1: *(uint8_t *)addr = (uint8_t)val; break; - case 2: *(uint16_t *)addr = (uint16_t)val; break; - case 4: *(uint32_t *)addr = (uint32_t)val; break; - case 8: *(unsigned long *)addr = val; break; - } + memcpy((void *)offset, &val, bytes); return X86EMUL_OKAY; } static int cmpxchg( unsigned int seg, unsigned long offset, - unsigned long old, - unsigned long new, + void *old, + void *new, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - unsigned long addr = offset; - switch ( bytes ) - { - case 1: *(uint8_t *)addr = (uint8_t)new; break; - case 2: *(uint16_t *)addr = (uint16_t)new; break; - case 4: *(uint32_t *)addr = (uint32_t)new; break; - case 8: *(unsigned long *)addr = new; break; - } - return X86EMUL_OKAY; -} - -static int cmpxchg8b( - unsigned int seg, - unsigned long offset, - unsigned long old_lo, - unsigned long old_hi, - unsigned long new_lo, - unsigned long new_hi, - struct x86_emulate_ctxt *ctxt) -{ - unsigned long addr = offset; - ((unsigned long *)addr)[0] = new_lo; - ((unsigned long *)addr)[1] = new_hi; + memcpy((void *)offset, new, bytes); return X86EMUL_OKAY; } @@ -94,7 +59,6 @@ static struct x86_emulate_ops emulops = .insn_fetch = read, .write = write, .cmpxchg = cmpxchg, - .cmpxchg8b = cmpxchg8b }; int main(int argc, char **argv) diff -r 239b44eeb2d6 -r dc510776dd59 tools/tests/x86_emulate.c --- a/tools/tests/x86_emulate.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/tests/x86_emulate.c Thu Apr 24 14:08:29 2008 -0600 @@ -4,10 +4,4 @@ #include <public/xen.h> #include "x86_emulate/x86_emulate.h" - -#define __emulate_fpu_insn(_op) \ -do{ rc = X86EMUL_UNHANDLEABLE; \ - goto done; \ -} while (0) - #include "x86_emulate/x86_emulate.c" diff -r 239b44eeb2d6 -r dc510776dd59 tools/xenmon/xenbaked.c --- a/tools/xenmon/xenbaked.c Thu Apr 24 14:02:16 2008 -0600 +++ b/tools/xenmon/xenbaked.c Thu Apr 24 14:08:29 2008 -0600 @@ -509,14 +509,36 @@ int monitor_tbufs(void) { for ( i = 0; (i < num) && !interrupted; i++ ) { - while ( meta[i]->cons != meta[i]->prod ) + unsigned long start_offset, end_offset, cons, prod; + + cons = meta[i]->cons; + prod = meta[i]->prod; + xen_rmb(); /* read prod, then read item. */ + + if ( cons == prod ) + continue; + + start_offset = cons % data_size; + end_offset = prod % data_size; + + if ( start_offset >= end_offset ) { - xen_rmb(); /* read prod, then read item. */ + while ( start_offset != data_size ) + { + rec_size = process_record( + i, (struct t_rec *)(data[i] + start_offset)); + start_offset += rec_size; + } + start_offset = 0; + } + while ( start_offset != end_offset ) + { rec_size = process_record( - i, (struct t_rec *)(data[i] + meta[i]->cons % data_size)); - xen_mb(); /* read item, then update cons. */ - meta[i]->cons += rec_size; + i, (struct t_rec *)(data[i] + start_offset)); + start_offset += rec_size; } + xen_mb(); /* read item, then update cons. */ + meta[i]->cons = prod; } wait_for_event(); diff -r 239b44eeb2d6 -r dc510776dd59 xen/Makefile --- a/xen/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -44,6 +44,7 @@ _clean: delete-unfresh-files $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) clean rm -f include/asm *.o $(TARGET)* *~ core rm -f include/asm-*/asm-offsets.h + [ -d tools/figlet ] && rm -f .banner* .PHONY: _distclean _distclean: clean @@ -70,8 +71,14 @@ delete-unfresh-files: rm -f include/xen/compile.h; \ fi +.banner: Makefile + $(MAKE) -C tools + @tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) 2>$@2 >$@1 + @cat $@1 $@2 >$@ + @rm -f $@1 $@2 + # compile.h contains dynamic build info. Rebuilt on every 'make' invocation. -include/xen/compile.h: include/xen/compile.h.in +include/xen/compile.h: include/xen/compile.h.in .banner @sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \ -e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \ -e 's/@@whoami@@/$(USER)/g' \ @@ -83,7 +90,8 @@ include/xen/compile.h: include/xen/compi -e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \ -e 's!@@changeset@@!$(shell ((hg parents --template "{date|date} {rev}:{node|short}" >/dev/null && hg parents --template "{date|date} {rev}:{node|short}") || echo "unavailable") 2>/dev/null)!g' \ < include/xen/compile.h.in > $@.new - tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) >> $@.new + @grep \" .banner >> $@.new + @grep -v \" .banner @mv -f $@.new $@ include/asm-$(TARGET_ARCH)/asm-offsets.h: arch/$(TARGET_ARCH)/asm-offsets.s diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -52,6 +52,8 @@ obj-y += tboot.o obj-$(crash_debug) += gdbstub.o +x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h + $(TARGET): $(TARGET)-syms boot/mkelf32 ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \ `$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'` diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/bitops.c --- a/xen/arch/x86/bitops.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/bitops.c Thu Apr 24 14:08:29 2008 -0600 @@ -8,17 +8,18 @@ unsigned int __find_first_bit( unsigned long d0, d1, res; asm volatile ( - " xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */ + "1: xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */ " repe; scas"__OS"\n\t" - " je 1f\n\t" + " je 2f\n\t" + " bsf -"STR(BITS_PER_LONG/8)"(%2),%0\n\t" + " jz 1b\n\t" " lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t" - " bsf (%2),%0\n" - "1: sub %%ebx,%%edi\n\t" + "2: sub %%ebx,%%edi\n\t" " shl $3,%%edi\n\t" " add %%edi,%%eax" : "=&a" (res), "=&c" (d0), "=&D" (d1) - : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG), - "2" (addr), "b" ((int)(long)addr) : "memory" ); + : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr) + : "memory" ); return res; } @@ -34,8 +35,7 @@ unsigned int __find_next_bit( if ( bit != 0 ) { /* Look for a bit in the first word. */ - asm ( "bsf %1,%%"__OP"ax" - : "=a" (set) : "r" (*p >> bit), "0" (BITS_PER_LONG) ); + set = __scanbit(*p >> bit, BITS_PER_LONG - bit); if ( set < (BITS_PER_LONG - bit) ) return (offset + set); offset += BITS_PER_LONG - bit; @@ -56,18 +56,20 @@ unsigned int __find_first_zero_bit( unsigned long d0, d1, d2, res; asm volatile ( + "1: xor %%eax,%%eax ; not %3\n\t" /* rAX == ~0ul */ " xor %%edx,%%edx\n\t" /* also ensures ZF==1 if size==0 */ " repe; scas"__OS"\n\t" - " je 1f\n\t" + " je 2f\n\t" + " xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t" + " jz 1b\n\t" + " bsf %3,%0\n\t" " lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t" - " xor (%2),%3\n\t" - " bsf %3,%0\n" - "1: sub %%ebx,%%edi\n\t" + "2: sub %%ebx,%%edi\n\t" " shl $3,%%edi\n\t" " add %%edi,%%edx" : "=&d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) - : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG), - "2" (addr), "b" ((int)(long)addr), "3" (-1L) : "memory" ); + : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr) + : "memory" ); return res; } @@ -83,7 +85,7 @@ unsigned int __find_next_zero_bit( if ( bit != 0 ) { /* Look for zero in the first word. */ - asm ( "bsf %1,%%"__OP"ax" : "=a" (set) : "r" (~(*p >> bit)) ); + set = __scanbit(~(*p >> bit), BITS_PER_LONG - bit); if ( set < (BITS_PER_LONG - bit) ) return (offset + set); offset += BITS_PER_LONG - bit; diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/cpu/mtrr/main.c --- a/xen/arch/x86/cpu/mtrr/main.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/cpu/mtrr/main.c Thu Apr 24 14:08:29 2008 -0600 @@ -586,8 +586,6 @@ struct mtrr_value { unsigned long lsize; }; -extern void global_init_mtrr_pat(void); - /** * mtrr_bp_init - initialize mtrrs on the boot CPU * @@ -654,11 +652,8 @@ void __init mtrr_bp_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - if (use_intel()) { + if (use_intel()) get_mtrr_state(); - /* initialize some global data for MTRR/PAT virutalization */ - global_init_mtrr_pat(); - } } } diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/domain.c Thu Apr 24 14:08:29 2008 -0600 @@ -521,10 +521,10 @@ int arch_domain_create(struct domain *d, clear_page(d->shared_info); share_xen_page_with_guest( virt_to_page(d->shared_info), d, XENSHARE_writable); - } - - if ( (rc = iommu_domain_init(d)) != 0 ) - goto fail; + + if ( (rc = iommu_domain_init(d)) != 0 ) + goto fail; + } if ( is_hvm_domain(d) ) { @@ -562,7 +562,8 @@ void arch_domain_destroy(struct domain * if ( is_hvm_domain(d) ) hvm_domain_destroy(d); - iommu_domain_destroy(d); + if ( !is_idle_domain(d) ) + iommu_domain_destroy(d); paging_final_teardown(d); diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/domain_build.c Thu Apr 24 14:08:29 2008 -0600 @@ -957,8 +957,8 @@ int __init construct_dom0( rc |= ioports_deny_access(dom0, 0x40, 0x43); /* PIT Channel 2 / PC Speaker Control. */ rc |= ioports_deny_access(dom0, 0x61, 0x61); - /* PCI configuration spaces. */ - rc |= ioports_deny_access(dom0, 0xcf8, 0xcff); + /* PCI configuration space (NB. 0xcf8 has special treatment). */ + rc |= ioports_deny_access(dom0, 0xcfc, 0xcff); /* Command-line I/O ranges. */ process_dom0_ioports_disable(); diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/emulate.c Thu Apr 24 14:08:29 2008 -0600 @@ -28,6 +28,33 @@ static int hvmemul_do_io( ioreq_t *p = &vio->vp_ioreq; int rc; + /* Only retrieve the value from singleton (non-REP) reads. */ + ASSERT((val == NULL) || ((dir == IOREQ_READ) && !value_is_ptr)); + + if ( is_mmio && !value_is_ptr ) + { + /* Part of a multi-cycle read or write? */ + if ( dir == IOREQ_WRITE ) + { + paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; + unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; + if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) + return X86EMUL_OKAY; + } + else + { + paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; + unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; + if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) + { + *val = 0; + memcpy(val, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa], + size); + return X86EMUL_OKAY; + } + } + } + switch ( curr->arch.hvm_vcpu.io_state ) { case HVMIO_none: @@ -36,8 +63,13 @@ static int hvmemul_do_io( curr->arch.hvm_vcpu.io_state = HVMIO_none; if ( val == NULL ) return X86EMUL_UNHANDLEABLE; - *val = curr->arch.hvm_vcpu.io_data; - return X86EMUL_OKAY; + goto finish_access; + case HVMIO_dispatched: + /* May have to wait for previous cycle of a multi-write to complete. */ + if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) && + (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa + + curr->arch.hvm_vcpu.mmio_large_write_bytes)) ) + return X86EMUL_RETRY; default: return X86EMUL_UNHANDLEABLE; } @@ -80,8 +112,6 @@ static int hvmemul_do_io( *reps = p->count; p->state = STATE_IORESP_READY; hvm_io_assist(); - if ( val != NULL ) - *val = curr->arch.hvm_vcpu.io_data; curr->arch.hvm_vcpu.io_state = HVMIO_none; break; case X86EMUL_UNHANDLEABLE: @@ -92,7 +122,43 @@ static int hvmemul_do_io( BUG(); } - return rc; + if ( rc != X86EMUL_OKAY ) + return rc; + + finish_access: + if ( val != NULL ) + *val = curr->arch.hvm_vcpu.io_data; + + if ( is_mmio && !value_is_ptr ) + { + /* Part of a multi-cycle read or write? */ + if ( dir == IOREQ_WRITE ) + { + paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; + unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; + if ( bytes == 0 ) + pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr; + if ( addr == (pa + bytes) ) + curr->arch.hvm_vcpu.mmio_large_write_bytes += size; + } + else + { + paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; + unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; + if ( bytes == 0 ) + pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr; + if ( (addr == (pa + bytes)) && + ((bytes + size) < + sizeof(curr->arch.hvm_vcpu.mmio_large_read)) ) + { + memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa], + val, size); + curr->arch.hvm_vcpu.mmio_large_read_bytes += size; + } + } + } + + return X86EMUL_OKAY; } static int hvmemul_do_pio( @@ -371,11 +437,15 @@ static int hvmemul_cmpxchg( static int hvmemul_cmpxchg( enum x86_segment seg, unsigned long offset, - unsigned long old, - unsigned long new, + void *p_old, + void *p_new, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { + unsigned long new = 0; + if ( bytes > sizeof(new) ) + return X86EMUL_UNHANDLEABLE; + memcpy(&new, p_new, bytes); /* Fix this in case the guest is really relying on r-m-w atomicity. */ return hvmemul_write(seg, offset, new, bytes, ctxt); } @@ -603,7 +673,7 @@ static int hvmemul_read_msr( _regs.ecx = (uint32_t)reg; - if ( (rc = hvm_funcs.msr_read_intercept(&_regs)) != 0 ) + if ( (rc = hvm_msr_read_intercept(&_regs)) != 0 ) return rc; *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax; @@ -621,7 +691,7 @@ static int hvmemul_write_msr( _regs.eax = (uint32_t)val; _regs.ecx = (uint32_t)reg; - return hvm_funcs.msr_write_intercept(&_regs); + return hvm_msr_write_intercept(&_regs); } static int hvmemul_wbinvd( @@ -674,11 +744,40 @@ static int hvmemul_inject_sw_interrupt( return X86EMUL_OKAY; } -static void hvmemul_load_fpu_ctxt( - struct x86_emulate_ctxt *ctxt) -{ - if ( !current->fpu_dirtied ) +static int hvmemul_get_fpu( + void (*exception_callback)(void *, struct cpu_user_regs *), + void *exception_callback_arg, + enum x86_emulate_fpu_type type, + struct x86_emulate_ctxt *ctxt) +{ + struct vcpu *curr = current; + + switch ( type ) + { + case X86EMUL_FPU_fpu: + break; + case X86EMUL_FPU_mmx: + if ( !cpu_has_mmx ) + return X86EMUL_UNHANDLEABLE; + break; + default: + return X86EMUL_UNHANDLEABLE; + } + + if ( !curr->fpu_dirtied ) hvm_funcs.fpu_dirty_intercept(); + + curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback; + curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg; + + return X86EMUL_OKAY; +} + +static void hvmemul_put_fpu( + struct x86_emulate_ctxt *ctxt) +{ + struct vcpu *curr = current; + curr->arch.hvm_vcpu.fpu_exception_callback = NULL; } static int hvmemul_invlpg( @@ -720,7 +819,8 @@ static struct x86_emulate_ops hvm_emulat .cpuid = hvmemul_cpuid, .inject_hw_exception = hvmemul_inject_hw_exception, .inject_sw_interrupt = hvmemul_inject_sw_interrupt, - .load_fpu_ctxt = hvmemul_load_fpu_ctxt, + .get_fpu = hvmemul_get_fpu, + .put_fpu = hvmemul_put_fpu, .invlpg = hvmemul_invlpg }; @@ -763,6 +863,11 @@ int hvm_emulate_one( hvmemul_ctxt->exn_pending = 0; rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops); + + if ( rc != X86EMUL_RETRY ) + curr->arch.hvm_vcpu.mmio_large_read_bytes = + curr->arch.hvm_vcpu.mmio_large_write_bytes = 0; + if ( rc != X86EMUL_OKAY ) return rc; diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/hvm.c Thu Apr 24 14:08:29 2008 -0600 @@ -494,14 +494,14 @@ static int hvm_load_cpu_ctxt(struct doma ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) ) { gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n", - ctxt.msr_efer); + ctxt.cr0); return -EINVAL; } if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS ) { gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n", - ctxt.msr_efer); + ctxt.cr4); return -EINVAL; } @@ -620,8 +620,6 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1, HVMSR_PER_VCPU); -extern int reset_vmsr(struct mtrr_state *m, u64 *p); - int hvm_vcpu_initialise(struct vcpu *v) { int rc; @@ -647,7 +645,7 @@ int hvm_vcpu_initialise(struct vcpu *v) spin_lock_init(&v->arch.hvm_vcpu.tm_lock); INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list); - rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr); + rc = hvm_vcpu_cacheattr_init(v); if ( rc != 0 ) goto fail3; @@ -681,6 +679,7 @@ int hvm_vcpu_initialise(struct vcpu *v) void hvm_vcpu_destroy(struct vcpu *v) { + hvm_vcpu_cacheattr_destroy(v); vlapic_destroy(v); hvm_funcs.vcpu_destroy(v); @@ -1604,6 +1603,9 @@ void hvm_cpuid(unsigned int input, unsig *ebx &= 0x0000FFFFu; *ebx |= (current->vcpu_id * 2) << 24; + /* We always support MTRR MSRs. */ + *edx |= bitmaskof(X86_FEATURE_MTRR); + *ecx &= (bitmaskof(X86_FEATURE_XMM3) | bitmaskof(X86_FEATURE_SSSE3) | bitmaskof(X86_FEATURE_CX16) | @@ -1653,6 +1655,146 @@ void hvm_cpuid(unsigned int input, unsig #endif break; } +} + +int hvm_msr_read_intercept(struct cpu_user_regs *regs) +{ + uint32_t ecx = regs->ecx; + uint64_t msr_content = 0; + struct vcpu *v = current; + uint64_t *var_range_base, *fixed_range_base; + int index; + + var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges; + fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges; + + switch ( ecx ) + { + case MSR_IA32_TSC: + msr_content = hvm_get_guest_time(v); + break; + + case MSR_IA32_APICBASE: + msr_content = vcpu_vlapic(v)->hw.apic_base_msr; + break; + + case MSR_IA32_MCG_CAP: + case MSR_IA32_MCG_STATUS: + case MSR_IA32_MC0_STATUS: + case MSR_IA32_MC1_STATUS: + case MSR_IA32_MC2_STATUS: + case MSR_IA32_MC3_STATUS: + case MSR_IA32_MC4_STATUS: + case MSR_IA32_MC5_STATUS: + /* No point in letting the guest see real MCEs */ + msr_content = 0; + break; + + case MSR_IA32_CR_PAT: + msr_content = v->arch.hvm_vcpu.pat_cr; + break; + + case MSR_MTRRcap: + msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap; + break; + case MSR_MTRRdefType: + msr_content = v->arch.hvm_vcpu.mtrr.def_type + | (v->arch.hvm_vcpu.mtrr.enabled << 10); + break; + case MSR_MTRRfix64K_00000: + msr_content = fixed_range_base[0]; + break; + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + index = regs->ecx - MSR_MTRRfix16K_80000; + msr_content = fixed_range_base[index + 1]; + break; + case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000: + index = regs->ecx - MSR_MTRRfix4K_C0000; + msr_content = fixed_range_base[index + 3]; + break; + case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7: + index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0; + msr_content = var_range_base[index]; + break; + + default: + return hvm_funcs.msr_read_intercept(regs); + } + + regs->eax = (uint32_t)msr_content; + regs->edx = (uint32_t)(msr_content >> 32); + return X86EMUL_OKAY; +} + +int hvm_msr_write_intercept(struct cpu_user_regs *regs) +{ + extern bool_t mtrr_var_range_msr_set( + struct mtrr_state *v, u32 msr, u64 msr_content); + extern bool_t mtrr_fix_range_msr_set( + struct mtrr_state *v, int row, u64 msr_content); + extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content); + extern bool_t pat_msr_set(u64 *pat, u64 msr); + + uint32_t ecx = regs->ecx; + uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32); + struct vcpu *v = current; + int index; + + switch ( ecx ) + { + case MSR_IA32_TSC: + hvm_set_guest_time(v, msr_content); + pt_reset(v); + break; + + case MSR_IA32_APICBASE: + vlapic_msr_set(vcpu_vlapic(v), msr_content); + break; + + case MSR_IA32_CR_PAT: + if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) ) + goto gp_fault; + break; + + case MSR_MTRRcap: + goto gp_fault; + case MSR_MTRRdefType: + if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) ) + goto gp_fault; + break; + case MSR_MTRRfix64K_00000: + if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) ) + goto gp_fault; + break; + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + index = regs->ecx - MSR_MTRRfix16K_80000 + 1; + if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, + index, msr_content) ) + goto gp_fault; + break; + case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000: + index = regs->ecx - MSR_MTRRfix4K_C0000 + 3; + if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, + index, msr_content) ) + goto gp_fault; + break; + case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7: + if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr, + regs->ecx, msr_content) ) + goto gp_fault; + break; + + default: + return hvm_funcs.msr_write_intercept(regs); + } + + return X86EMUL_OKAY; + +gp_fault: + hvm_inject_exception(TRAP_gp_fault, 0, 0); + return X86EMUL_EXCEPTION; } enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack) diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/mtrr.c --- a/xen/arch/x86/hvm/mtrr.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/mtrr.c Thu Apr 24 14:08:29 2008 -0600 @@ -27,7 +27,6 @@ #include <asm/hvm/support.h> #include <asm/hvm/cacheattr.h> -/* Xen holds the native MTRR MSRs */ extern struct mtrr_state mtrr_state; static uint64_t phys_base_msr_mask; @@ -35,19 +34,17 @@ static uint32_t size_or_mask; static uint32_t size_or_mask; static uint32_t size_and_mask; -static void init_pat_entry_tbl(uint64_t pat); -static void init_mtrr_epat_tbl(void); -static uint8_t get_mtrr_type(struct mtrr_state *m, paddr_t pa); -/* get page attribute fields (PAn) from PAT MSR */ +/* Get page attribute fields (PAn) from PAT MSR. */ #define pat_cr_2_paf(pat_cr,n) ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff) -/* pat entry to PTE flags (PAT, PCD, PWT bits) */ + +/* PAT entry to PTE flags (PAT, PCD, PWT bits). */ static uint8_t pat_entry_2_pte_flags[8] = { 0, _PAGE_PWT, _PAGE_PCD, _PAGE_PCD | _PAGE_PWT, _PAGE_PAT, _PAGE_PAT | _PAGE_PWT, _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT }; -/* effective mm type lookup table, according to MTRR and PAT */ +/* Effective mm type lookup table, according to MTRR and PAT. */ static uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = { /********PAT(UC,WC,RS,RS,WT,WP,WB,UC-)*/ /* RS means reserved type(2,3), and type is hardcoded here */ @@ -67,12 +64,13 @@ static uint8_t mm_type_tbl[MTRR_NUM_TYPE {0, 1, 2, 2, 4, 5, 6, 0} }; -/* reverse lookup table, to find a pat type according to MTRR and effective - * memory type. This table is dynamically generated +/* + * Reverse lookup table, to find a pat type according to MTRR and effective + * memory type. This table is dynamically generated. */ static uint8_t mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES]; -/* lookup table for PAT entry of a given PAT value in host pat */ +/* Lookup table for PAT entry of a given PAT value in host PAT. */ static uint8_t pat_entry_tbl[PAT_TYPE_NUMS]; static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr, @@ -139,220 +137,63 @@ bool_t is_var_mtrr_overlapped(struct mtr return 0; } -/* reserved mtrr for guest OS */ -#define RESERVED_MTRR 2 +#define MTRR_PHYSMASK_VALID_BIT 11 +#define MTRR_PHYSMASK_SHIFT 12 + +#define MTRR_PHYSBASE_TYPE_MASK 0xff /* lowest 8 bits */ +#define MTRR_PHYSBASE_SHIFT 12 +#define MTRR_VCNT 8 + #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) bool_t mtrr_var_range_msr_set(struct mtrr_state *m, uint32_t msr, uint64_t msr_content); -bool_t mtrr_def_type_msr_set(struct mtrr_state *m, uint64_t msr_content); bool_t mtrr_fix_range_msr_set(struct mtrr_state *m, uint32_t row, uint64_t msr_content); -static void set_var_mtrr(uint32_t reg, struct mtrr_state *m, - uint32_t base, uint32_t size, - uint32_t type) -{ - struct mtrr_var_range *vr; - - vr = &m->var_ranges[reg]; - - if ( size == 0 ) - { - /* The invalid bit is kept in the mask, so we simply clear the - * relevant mask register to disable a range. - */ - mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), 0); - } - else - { - vr->base_lo = base << PAGE_SHIFT | type; - vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); - vr->mask_lo = -size << PAGE_SHIFT | 0x800; - vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); - - mtrr_var_range_msr_set(m, MTRRphysBase_MSR(reg), *(uint64_t *)vr); - mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), - *((uint64_t *)vr + 1)); - } -} -/* From Intel Vol. III Section 10.11.4, the Range Size and Base Alignment has - * some kind of requirement: - * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum). - * 2. The base address must be 2^N aligned, where the N here is equal to - * the N in previous requirement. So a 8K range must be 8K aligned not 4K aligned. - */ -static uint32_t range_to_mtrr(uint32_t reg, struct mtrr_state *m, - uint32_t range_startk, uint32_t range_sizek, - uint8_t type) -{ - if ( !range_sizek || (reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR)) ) - { - gdprintk(XENLOG_WARNING, - "Failed to init var mtrr msr[%d]" - "range_size:%x, total available MSR:%d\n", - reg, range_sizek, - (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR)); - return reg; - } - - while ( range_sizek ) - { - uint32_t max_align, align, sizek; - - max_align = (range_startk == 0) ? 32 : ffs(range_startk); - align = min_t(uint32_t, fls(range_sizek), max_align); - sizek = 1 << (align - 1); - - set_var_mtrr(reg++, m, range_startk, sizek, type); - - range_startk += sizek; - range_sizek -= sizek; - - if ( reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR) ) - { - gdprintk(XENLOG_WARNING, - "Failed to init var mtrr msr[%d]," - "total available MSR:%d\n", - reg, (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR)); - break; - } - } - - return reg; -} - -static void setup_fixed_mtrrs(struct vcpu *v) -{ - uint64_t content; - int32_t i; - struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr; - - /* 1. Map (0~A0000) as WB */ - content = 0x0606060606060606ull; - mtrr_fix_range_msr_set(m, 0, content); - mtrr_fix_range_msr_set(m, 1, content); - /* 2. Map VRAM(A0000~C0000) as WC */ - content = 0x0101010101010101; - mtrr_fix_range_msr_set(m, 2, content); - /* 3. Map (C0000~100000) as UC */ - for ( i = 3; i < 11; i++) - mtrr_fix_range_msr_set(m, i, 0); -} - -static void setup_var_mtrrs(struct vcpu *v) -{ - p2m_type_t p2m; - uint64_t e820_mfn; - int8_t *p = NULL; - uint8_t nr = 0; - int32_t i; - uint32_t reg = 0; - uint64_t size = 0; - uint64_t addr = 0; - struct e820entry *e820_table; - - e820_mfn = mfn_x(gfn_to_mfn(v->domain, - HVM_E820_PAGE >> PAGE_SHIFT, &p2m)); - - p = (int8_t *)map_domain_page(e820_mfn); - - nr = *(uint8_t*)(p + HVM_E820_NR_OFFSET); - e820_table = (struct e820entry*)(p + HVM_E820_OFFSET); - /* search E820 table, set MTRR for RAM */ - for ( i = 0; i < nr; i++) - { - if ( (e820_table[i].addr >= 0x100000) && - (e820_table[i].type == E820_RAM) ) - { - if ( e820_table[i].addr == 0x100000 ) - { - size = e820_table[i].size + 0x100000 + PAGE_SIZE * 5; - addr = 0; - } - else - { - /* Larger than 4G */ - size = e820_table[i].size; - addr = e820_table[i].addr; - } - - reg = range_to_mtrr(reg, &v->arch.hvm_vcpu.mtrr, - addr >> PAGE_SHIFT, size >> PAGE_SHIFT, - MTRR_TYPE_WRBACK); - } - } -} - -void init_mtrr_in_hyper(struct vcpu *v) -{ - /* TODO:MTRR should be initialized in BIOS or other places. - * workaround to do it in here - */ - if ( v->arch.hvm_vcpu.mtrr.is_initialized ) - return; - - setup_fixed_mtrrs(v); - setup_var_mtrrs(v); - /* enable mtrr */ - mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, 0xc00); - - v->arch.hvm_vcpu.mtrr.is_initialized = 1; -} - -static int32_t reset_mtrr(struct mtrr_state *m) -{ - m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT); - if ( m->var_ranges == NULL ) - return -ENOMEM; - memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range)); - memset(m->fixed_ranges, 0, sizeof(m->fixed_ranges)); - m->enabled = 0; - m->def_type = 0;/*mtrr is disabled*/ - m->mtrr_cap = (0x5<<8)|MTRR_VCNT;/*wc,fix enabled, and vcnt=8*/ - m->overlapped = 0; - return 0; -} - -/* init global variables for MTRR and PAT */ -void global_init_mtrr_pat(void) + +static int hvm_mtrr_pat_init(void) { extern uint64_t host_pat; - uint32_t phys_addr; - - init_mtrr_epat_tbl(); - init_pat_entry_tbl(host_pat); - /* Get max physical address, set some global variable */ - if ( cpuid_eax(0x80000000) < 0x80000008 ) - phys_addr = 36; - else - phys_addr = cpuid_eax(0x80000008); - - phys_base_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0xf00UL; - phys_mask_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0x7ffUL; - - size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); - size_and_mask = ~size_or_mask & 0xfff00000; -} - -static void init_pat_entry_tbl(uint64_t pat) -{ - int32_t i, j; + unsigned int i, j, phys_addr; + + memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl)); + for ( i = 0; i < MTRR_NUM_TYPES; i++ ) + { + for ( j = 0; j < PAT_TYPE_NUMS; j++ ) + { + int32_t tmp = mm_type_tbl[i][j]; + if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) ) + mtrr_epat_tbl[i][tmp] = j; + } + } memset(&pat_entry_tbl, INVALID_MEM_TYPE, PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0])); - for ( i = 0; i < PAT_TYPE_NUMS; i++ ) { for ( j = 0; j < PAT_TYPE_NUMS; j++ ) { - if ( pat_cr_2_paf(pat, j) == i ) + if ( pat_cr_2_paf(host_pat, j) == i ) { pat_entry_tbl[i] = j; break; } } } -} + + phys_addr = 36; + if ( cpuid_eax(0x80000000) >= 0x80000008 ) + phys_addr = (uint8_t)cpuid_eax(0x80000008); + + phys_base_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0xf00UL; + phys_mask_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0x7ffUL; + + size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); + size_and_mask = ~size_or_mask & 0xfff00000; + + return 0; +} +__initcall(hvm_mtrr_pat_init); uint8_t pat_type_2_pte_flags(uint8_t pat_type) { @@ -368,24 +209,35 @@ uint8_t pat_type_2_pte_flags(uint8_t pat return pat_entry_2_pte_flags[pat_entry_tbl[PAT_TYPE_UNCACHABLE]]; } -int32_t reset_vmsr(struct mtrr_state *m, uint64_t *pat_ptr) -{ - int32_t rc; - - rc = reset_mtrr(m); - if ( rc != 0 ) - return rc; - - *pat_ptr = ((uint64_t)PAT_TYPE_WRBACK) | /* PAT0: WB */ - ((uint64_t)PAT_TYPE_WRTHROUGH << 8) | /* PAT1: WT */ - ((uint64_t)PAT_TYPE_UC_MINUS << 16) | /* PAT2: UC- */ - ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | /* PAT3: UC */ - ((uint64_t)PAT_TYPE_WRBACK << 32) | /* PAT4: WB */ - ((uint64_t)PAT_TYPE_WRTHROUGH << 40) | /* PAT5: WT */ - ((uint64_t)PAT_TYPE_UC_MINUS << 48) | /* PAT6: UC- */ - ((uint64_t)PAT_TYPE_UNCACHABLE << 56); /* PAT7: UC */ - - return 0; +int hvm_vcpu_cacheattr_init(struct vcpu *v) +{ + struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr; + + memset(m, 0, sizeof(*m)); + + m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT); + if ( m->var_ranges == NULL ) + return -ENOMEM; + memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range)); + + m->mtrr_cap = (1u << 10) | (1u << 8) | MTRR_VCNT; + + v->arch.hvm_vcpu.pat_cr = + ((uint64_t)PAT_TYPE_WRBACK) | /* PAT0: WB */ + ((uint64_t)PAT_TYPE_WRTHROUGH << 8) | /* PAT1: WT */ + ((uint64_t)PAT_TYPE_UC_MINUS << 16) | /* PAT2: UC- */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | /* PAT3: UC */ + ((uint64_t)PAT_TYPE_WRBACK << 32) | /* PAT4: WB */ + ((uint64_t)PAT_TYPE_WRTHROUGH << 40) | /* PAT5: WT */ + ((uint64_t)PAT_TYPE_UC_MINUS << 48) | /* PAT6: UC- */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 56); /* PAT7: UC */ + + return 0; +} + +void hvm_vcpu_cacheattr_destroy(struct vcpu *v) +{ + xfree(v->arch.hvm_vcpu.mtrr.var_ranges); } /* @@ -512,23 +364,6 @@ static uint8_t effective_mm_type(struct return effective; } -static void init_mtrr_epat_tbl(void) -{ - int32_t i, j; - /* set default value to an invalid type, just for checking conflict */ - memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl)); - - for ( i = 0; i < MTRR_NUM_TYPES; i++ ) - { - for ( j = 0; j < PAT_TYPE_NUMS; j++ ) - { - int32_t tmp = mm_type_tbl[i][j]; - if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) ) - mtrr_epat_tbl[i][tmp] = j; - } - } -} - uint32_t get_pat_flags(struct vcpu *v, uint32_t gl1e_flags, paddr_t gpaddr, @@ -856,7 +691,6 @@ static int hvm_load_mtrr_msr(struct doma mtrr_def_type_msr_set(mtrr_state, hw_mtrr.msr_mtrr_def_type); - v->arch.hvm_vcpu.mtrr.is_initialized = 1; return 0; } diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/Makefile --- a/xen/arch/x86/hvm/svm/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/svm/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -1,8 +1,6 @@ subdir-$(x86_32) += x86_32 -subdir-$(x86_32) += x86_32 -subdir-$(x86_64) += x86_64 - obj-y += asid.o obj-y += emulate.o +obj-y += entry.o obj-y += intr.o obj-y += svm.o obj-y += vmcb.o diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/entry.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/svm/entry.S Thu Apr 24 14:08:29 2008 -0600 @@ -0,0 +1,178 @@ +/* + * entry.S: SVM architecture-specific entry/exit handling. + * Copyright (c) 2005-2007, Advanced Micro Devices, Inc. + * Copyright (c) 2004, Intel Corporation. + * Copyright (c) 2008, Citrix Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <xen/config.h> +#include <xen/errno.h> +#include <xen/softirq.h> +#include <asm/types.h> +#include <asm/asm_defns.h> +#include <asm/apicdef.h> +#include <asm/page.h> +#include <public/xen.h> + +#define VMRUN .byte 0x0F,0x01,0xD8 +#define STGI .byte 0x0F,0x01,0xDC +#define CLGI .byte 0x0F,0x01,0xDD + +#define get_current(reg) \ + mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \ + or r(sp), r(reg); \ + and $~(BYTES_PER_LONG-1),r(reg); \ + mov (r(reg)),r(reg); + +#if defined(__x86_64__) +#define r(reg) %r##reg +#define addr_of(lbl) lbl(%rip) +#define call_with_regs(fn) \ + mov %rsp,%rdi; \ + call fn; +#else /* defined(__i386__) */ +#define r(reg) %e##reg +#define addr_of(lbl) lbl +#define UREGS_rax UREGS_eax +#define UREGS_rip UREGS_eip +#define UREGS_rsp UREGS_esp +#define call_with_regs(fn) \ + mov %esp,%eax; \ + push %eax; \ + call fn; \ + add $4,%esp; +#endif + +ENTRY(svm_asm_do_resume) + get_current(bx) + CLGI + + mov VCPU_processor(r(bx)),%eax + shl $IRQSTAT_shift,r(ax) + lea addr_of(irq_stat),r(dx) + testl $~0,(r(dx),r(ax),1) + jnz .Lsvm_process_softirqs + + call svm_asid_handle_vmrun + call svm_intr_assist + + cmpb $0,addr_of(tb_init_done) + jnz .Lsvm_trace +.Lsvm_trace_done: + + mov VCPU_svm_vmcb(r(bx)),r(cx) + mov UREGS_rax(r(sp)),r(ax) + mov r(ax),VMCB_rax(r(cx)) + mov UREGS_rip(r(sp)),r(ax) + mov r(ax),VMCB_rip(r(cx)) + mov UREGS_rsp(r(sp)),r(ax) + mov r(ax),VMCB_rsp(r(cx)) + mov UREGS_eflags(r(sp)),r(ax) + mov r(ax),VMCB_rflags(r(cx)) + + mov VCPU_svm_vmcb_pa(r(bx)),r(ax) + +#if defined(__x86_64__) + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %r11 + pop %r10 + pop %r9 + pop %r8 + add $8,%rsp /* Skip %rax: restored by VMRUN. */ + pop %rcx + pop %rdx + pop %rsi + pop %rdi +#else /* defined(__i386__) */ + pop %ebx + pop %ecx + pop %edx + pop %esi + pop %edi + pop %ebp +#endif + + VMRUN + +#if defined(__x86_64__) + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + push %r8 + push %r9 + push %r10 + push %r11 + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +#else /* defined(__i386__) */ + push %ebp + push %edi + push %esi + push %edx + push %ecx + push %ebx +#endif + + get_current(bx) + movb $0,VCPU_svm_vmcb_in_sync(r(bx)) + mov VCPU_svm_vmcb(r(bx)),r(cx) + mov VMCB_rax(r(cx)),r(ax) + mov r(ax),UREGS_rax(r(sp)) + mov VMCB_rip(r(cx)),r(ax) + mov r(ax),UREGS_rip(r(sp)) + mov VMCB_rsp(r(cx)),r(ax) + mov r(ax),UREGS_rsp(r(sp)) + mov VMCB_rflags(r(cx)),r(ax) + mov r(ax),UREGS_eflags(r(sp)) + +#ifndef NDEBUG + mov $0xbeef,%ax + mov %ax,UREGS_error_code(r(sp)) + mov %ax,UREGS_entry_vector(r(sp)) + mov %ax,UREGS_saved_upcall_mask(r(sp)) + mov %ax,UREGS_cs(r(sp)) + mov %ax,UREGS_ds(r(sp)) + mov %ax,UREGS_es(r(sp)) + mov %ax,UREGS_fs(r(sp)) + mov %ax,UREGS_gs(r(sp)) + mov %ax,UREGS_ss(r(sp)) +#endif + + STGI +.globl svm_stgi_label +svm_stgi_label: + call_with_regs(svm_vmexit_handler) + jmp svm_asm_do_resume + +.Lsvm_process_softirqs: + STGI + call do_softirq + jmp svm_asm_do_resume + +.Lsvm_trace: + call svm_trace_vmentry + jmp .Lsvm_trace_done diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/svm/intr.c Thu Apr 24 14:08:29 2008 -0600 @@ -102,15 +102,17 @@ static void svm_dirq_assist(struct vcpu struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; struct dev_intx_gsi_link *digl; - if ( !amd_iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) + if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) return; for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS); irq < NR_IRQS; irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) ) { + if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) ) + continue; + stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]); - clear_bit(irq, &hvm_irq_dpci->dirq_mask); list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list ) { diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Apr 24 14:08:29 2008 -0600 @@ -911,6 +911,9 @@ static void svm_cpuid_intercept( __clear_bit(X86_FEATURE_PAE & 31, edx); __clear_bit(X86_FEATURE_PSE36 & 31, edx); + /* We always support MTRR MSRs. */ + *edx |= bitmaskof(X86_FEATURE_MTRR); + /* Filter all other features according to a whitelist. */ *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) | bitmaskof(X86_FEATURE_ALTMOVCR) | @@ -924,7 +927,9 @@ static void svm_cpuid_intercept( bitmaskof(X86_FEATURE_SYSCALL) | bitmaskof(X86_FEATURE_MP) | bitmaskof(X86_FEATURE_MMXEXT) | - bitmaskof(X86_FEATURE_FFXSR)); + bitmaskof(X86_FEATURE_FFXSR) | + bitmaskof(X86_FEATURE_3DNOW) | + bitmaskof(X86_FEATURE_3DNOWEXT)); break; case 0x80000007: @@ -981,14 +986,6 @@ static int svm_msr_read_intercept(struct switch ( ecx ) { - case MSR_IA32_TSC: - msr_content = hvm_get_guest_time(v); - break; - - case MSR_IA32_APICBASE: - msr_content = vcpu_vlapic(v)->hw.apic_base_msr; - break; - case MSR_EFER: msr_content = v->arch.hvm_vcpu.guest_efer; break; @@ -1013,18 +1010,6 @@ static int svm_msr_read_intercept(struct case MSR_K8_VM_HSAVE_PA: goto gpf; - - case MSR_IA32_MCG_CAP: - case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_STATUS: - case MSR_IA32_MC1_STATUS: - case MSR_IA32_MC2_STATUS: - case MSR_IA32_MC3_STATUS: - case MSR_IA32_MC4_STATUS: - case MSR_IA32_MC5_STATUS: - /* No point in letting the guest see real MCEs */ - msr_content = 0; - break; case MSR_IA32_DEBUGCTLMSR: msr_content = vmcb->debugctlmsr; @@ -1083,15 +1068,6 @@ static int svm_msr_write_intercept(struc switch ( ecx ) { - case MSR_IA32_TSC: - hvm_set_guest_time(v, msr_content); - pt_reset(v); - break; - - case MSR_IA32_APICBASE: - vlapic_msr_set(vcpu_vlapic(v), msr_content); - break; - case MSR_K8_VM_HSAVE_PA: goto gpf; @@ -1152,12 +1128,12 @@ static void svm_do_msr_access(struct cpu if ( vmcb->exitinfo1 == 0 ) { - rc = svm_msr_read_intercept(regs); + rc = hvm_msr_read_intercept(regs); inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL); } else { - rc = svm_msr_write_intercept(regs); + rc = hvm_msr_write_intercept(regs); inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL); } diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_32/Makefile --- a/xen/arch/x86/hvm/svm/x86_32/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -obj-y += exits.o diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_32/exits.S --- a/xen/arch/x86/hvm/svm/x86_32/exits.S Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,131 +0,0 @@ -/* - * exits.S: SVM architecture-specific exit handling. - * Copyright (c) 2005-2007, Advanced Micro Devices, Inc. - * Copyright (c) 2004, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#include <xen/config.h> -#include <xen/errno.h> -#include <xen/softirq.h> -#include <asm/asm_defns.h> -#include <asm/apicdef.h> -#include <asm/page.h> -#include <public/xen.h> - -#define GET_CURRENT(reg) \ - movl $STACK_SIZE-4,reg; \ - orl %esp,reg; \ - andl $~3,reg; \ - movl (reg),reg; - -#define VMRUN .byte 0x0F,0x01,0xD8 -#define STGI .byte 0x0F,0x01,0xDC -#define CLGI .byte 0x0F,0x01,0xDD - -ENTRY(svm_asm_do_resume) - GET_CURRENT(%ebx) - CLGI - - movl VCPU_processor(%ebx),%eax - shl $IRQSTAT_shift,%eax - testl $~0,irq_stat(%eax,1) - jnz svm_process_softirqs - - call svm_asid_handle_vmrun - call svm_intr_assist - - /* Check if the trace buffer is initialized. - * Because the below condition is unlikely, we jump out of line - * instead of having a mostly taken branch over the unlikely code. - */ - cmpb $0,tb_init_done - jnz svm_trace -svm_trace_done: - - movl VCPU_svm_vmcb(%ebx),%ecx - movl UREGS_eax(%esp),%eax - movl %eax,VMCB_rax(%ecx) - movl UREGS_eip(%esp),%eax - movl %eax,VMCB_rip(%ecx) - movl UREGS_esp(%esp),%eax - movl %eax,VMCB_rsp(%ecx) - movl UREGS_eflags(%esp),%eax - movl %eax,VMCB_rflags(%ecx) - - movl VCPU_svm_vmcb_pa(%ebx),%eax - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - - VMRUN - - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - - GET_CURRENT(%ebx) - movb $0,VCPU_svm_vmcb_in_sync(%ebx) - movl VCPU_svm_vmcb(%ebx),%ecx - movl VMCB_rax(%ecx),%eax - movl %eax,UREGS_eax(%esp) - movl VMCB_rip(%ecx),%eax - movl %eax,UREGS_eip(%esp) - movl VMCB_rsp(%ecx),%eax - movl %eax,UREGS_esp(%esp) - movl VMCB_rflags(%ecx),%eax - movl %eax,UREGS_eflags(%esp) - -#ifndef NDEBUG - movw $0xbeef,%ax - movw %ax,UREGS_error_code(%esp) - movw %ax,UREGS_entry_vector(%esp) - movw %ax,UREGS_saved_upcall_mask(%esp) - movw %ax,UREGS_cs(%esp) - movw %ax,UREGS_ds(%esp) - movw %ax,UREGS_es(%esp) - movw %ax,UREGS_fs(%esp) - movw %ax,UREGS_gs(%esp) - movw %ax,UREGS_ss(%esp) -#endif - - STGI -.globl svm_stgi_label; -svm_stgi_label: - movl %esp,%eax - push %eax - call svm_vmexit_handler - addl $4,%esp - jmp svm_asm_do_resume - - ALIGN -svm_process_softirqs: - STGI - call do_softirq - jmp svm_asm_do_resume - -svm_trace: - /* Call out to C, as this is not speed critical path - * Note: svm_trace_vmentry will recheck the tb_init_done, - * but this is on the slow path, so who cares - */ - call svm_trace_vmentry - jmp svm_trace_done diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_64/Makefile --- a/xen/arch/x86/hvm/svm/x86_64/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -obj-y += exits.o diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_64/exits.S --- a/xen/arch/x86/hvm/svm/x86_64/exits.S Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,148 +0,0 @@ -/* - * exits.S: AMD-V architecture-specific exit handling. - * Copyright (c) 2005-2007, Advanced Micro Devices, Inc. - * Copyright (c) 2004, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#include <xen/config.h> -#include <xen/errno.h> -#include <xen/softirq.h> -#include <asm/asm_defns.h> -#include <asm/apicdef.h> -#include <asm/page.h> -#include <public/xen.h> - -#define GET_CURRENT(reg) \ - movq $STACK_SIZE-8,reg; \ - orq %rsp,reg; \ - andq $~7,reg; \ - movq (reg),reg; - -#define VMRUN .byte 0x0F,0x01,0xD8 -#define STGI .byte 0x0F,0x01,0xDC -#define CLGI .byte 0x0F,0x01,0xDD - -ENTRY(svm_asm_do_resume) - GET_CURRENT(%rbx) - CLGI - - movl VCPU_processor(%rbx),%eax - shl $IRQSTAT_shift,%rax - leaq irq_stat(%rip),%rdx - testl $~0,(%rdx,%rax,1) - jnz svm_process_softirqs - - call svm_asid_handle_vmrun - call svm_intr_assist - - /* Check if the trace buffer is initialized. - * Because the below condition is unlikely, we jump out of line - * instead of having a mostly taken branch over the unlikely code. - */ - cmpb $0,tb_init_done(%rip) - jnz svm_trace -svm_trace_done: - - movq VCPU_svm_vmcb(%rbx),%rcx - movq UREGS_rax(%rsp),%rax - movq %rax,VMCB_rax(%rcx) - movq UREGS_rip(%rsp),%rax - movq %rax,VMCB_rip(%rcx) - movq UREGS_rsp(%rsp),%rax - movq %rax,VMCB_rsp(%rcx) - movq UREGS_eflags(%rsp),%rax - movq %rax,VMCB_rflags(%rcx) - - movq VCPU_svm_vmcb_pa(%rbx),%rax - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx - popq %r11 - popq %r10 - popq %r9 - popq %r8 - addq $8,%rsp /* Skip %rax: restored by VMRUN. */ - popq %rcx - popq %rdx - popq %rsi - popq %rdi - - VMRUN - - pushq %rdi - pushq %rsi - pushq %rdx - pushq %rcx - pushq %rax - pushq %r8 - pushq %r9 - pushq %r10 - pushq %r11 - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - GET_CURRENT(%rbx) - movb $0,VCPU_svm_vmcb_in_sync(%rbx) - movq VCPU_svm_vmcb(%rbx),%rcx - movq VMCB_rax(%rcx),%rax - movq %rax,UREGS_rax(%rsp) - movq VMCB_rip(%rcx),%rax - movq %rax,UREGS_rip(%rsp) - movq VMCB_rsp(%rcx),%rax - movq %rax,UREGS_rsp(%rsp) - movq VMCB_rflags(%rcx),%rax - movq %rax,UREGS_eflags(%rsp) - -#ifndef NDEBUG - movw $0xbeef,%ax - movw %ax,UREGS_error_code(%rsp) - movw %ax,UREGS_entry_vector(%rsp) - movw %ax,UREGS_saved_upcall_mask(%rsp) - movw %ax,UREGS_cs(%rsp) - movw %ax,UREGS_ds(%rsp) - movw %ax,UREGS_es(%rsp) - movw %ax,UREGS_fs(%rsp) - movw %ax,UREGS_gs(%rsp) - movw %ax,UREGS_ss(%rsp) -#endif - - STGI -.globl svm_stgi_label; -svm_stgi_label: - movq %rsp,%rdi - call svm_vmexit_handler - jmp svm_asm_do_resume - - ALIGN -svm_process_softirqs: - STGI - call do_softirq - jmp svm_asm_do_resume - -svm_trace: - /* Call out to C, as this is not speed critical path - * Note: svm_trace_vmentry will recheck the tb_init_done, - * but this is on the slow path, so who cares - */ - call svm_trace_vmentry - jmp svm_trace_done diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/Makefile --- a/xen/arch/x86/hvm/vmx/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -1,6 +1,4 @@ subdir-$(x86_32) += x86_32 -subdir-$(x86_32) += x86_32 -subdir-$(x86_64) += x86_64 - +obj-y += entry.o obj-y += intr.o obj-y += realmode.o obj-y += vmcs.o diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/entry.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/entry.S Thu Apr 24 14:08:29 2008 -0600 @@ -0,0 +1,198 @@ +/* + * entry.S: VMX architecture-specific entry/exit handling. + * Copyright (c) 2004, Intel Corporation. + * Copyright (c) 2008, Citrix Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <xen/config.h> +#include <xen/errno.h> +#include <xen/softirq.h> +#include <asm/types.h> +#include <asm/asm_defns.h> +#include <asm/apicdef.h> +#include <asm/page.h> +#include <public/xen.h> + +#define VMRESUME .byte 0x0f,0x01,0xc3 +#define VMLAUNCH .byte 0x0f,0x01,0xc2 +#define VMREAD(off) .byte 0x0f,0x78,0x47,((off)-UREGS_rip) +#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip) + +/* VMCS field encodings */ +#define GUEST_RSP 0x681c +#define GUEST_RIP 0x681e +#define GUEST_RFLAGS 0x6820 + +#define get_current(reg) \ + mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \ + or r(sp), r(reg); \ + and $~(BYTES_PER_LONG-1),r(reg); \ + mov (r(reg)),r(reg); + +#if defined(__x86_64__) +#define r(reg) %r##reg +#define addr_of(lbl) lbl(%rip) +#define call_with_regs(fn) \ + mov %rsp,%rdi; \ + call fn; +#else /* defined(__i386__) */ +#define r(reg) %e##reg +#define addr_of(lbl) lbl +#define UREGS_rip UREGS_eip +#define UREGS_rsp UREGS_esp +#define call_with_regs(fn) \ + mov %esp,%eax; \ + push %eax; \ + call fn; \ + add $4,%esp; +#endif + + ALIGN +.globl vmx_asm_vmexit_handler +vmx_asm_vmexit_handler: +#if defined(__x86_64__) + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + push %r8 + push %r9 + push %r10 + push %r11 + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +#else /* defined(__i386__) */ + push %eax + push %ebp + push %edi + push %esi + push %edx + push %ecx + push %ebx +#endif + + get_current(bx) + + movb $1,VCPU_vmx_launched(r(bx)) + + lea UREGS_rip(r(sp)),r(di) + mov $GUEST_RIP,%eax + /*VMREAD(UREGS_rip)*/ + .byte 0x0f,0x78,0x07 /* vmread r(ax),(r(di)) */ + mov $GUEST_RSP,%eax + VMREAD(UREGS_rsp) + mov $GUEST_RFLAGS,%eax + VMREAD(UREGS_eflags) + + mov %cr2,r(ax) + mov r(ax),VCPU_hvm_guest_cr2(r(bx)) + +#ifndef NDEBUG + mov $0xbeef,%ax + mov %ax,UREGS_error_code(r(sp)) + mov %ax,UREGS_entry_vector(r(sp)) + mov %ax,UREGS_saved_upcall_mask(r(sp)) + mov %ax,UREGS_cs(r(sp)) + mov %ax,UREGS_ds(r(sp)) + mov %ax,UREGS_es(r(sp)) + mov %ax,UREGS_fs(r(sp)) + mov %ax,UREGS_gs(r(sp)) + mov %ax,UREGS_ss(r(sp)) +#endif + + call_with_regs(vmx_vmexit_handler) + +.globl vmx_asm_do_vmentry +vmx_asm_do_vmentry: + get_current(bx) + cli + + mov VCPU_processor(r(bx)),%eax + shl $IRQSTAT_shift,r(ax) + lea addr_of(irq_stat),r(dx) + cmpl $0,(r(dx),r(ax),1) + jnz .Lvmx_process_softirqs + + call vmx_intr_assist + + testb $0xff,VCPU_vmx_emul(r(bx)) + jnz .Lvmx_goto_realmode + + mov VCPU_hvm_guest_cr2(r(bx)),r(ax) + mov r(ax),%cr2 + call vmx_trace_vmentry + + lea UREGS_rip(r(sp)),r(di) + mov $GUEST_RIP,%eax + /*VMWRITE(UREGS_rip)*/ + .byte 0x0f,0x79,0x07 /* vmwrite (r(di)),r(ax) */ + mov $GUEST_RSP,%eax + VMWRITE(UREGS_rsp) + mov $GUEST_RFLAGS,%eax + VMWRITE(UREGS_eflags) + + cmpb $0,VCPU_vmx_launched(r(bx)) +#if defined(__x86_64__) + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi +#else /* defined(__i386__) */ + pop %ebx + pop %ecx + pop %edx + pop %esi + pop %edi + pop %ebp + pop %eax +#endif + je .Lvmx_launch + +/*.Lvmx_resume:*/ + VMRESUME + call vm_resume_fail + ud2 + +.Lvmx_launch: + VMLAUNCH + call vm_launch_fail + ud2 + +.Lvmx_goto_realmode: + sti + call_with_regs(vmx_realmode) + jmp vmx_asm_do_vmentry + +.Lvmx_process_softirqs: + sti + call do_softirq + jmp vmx_asm_do_vmentry diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/intr.c Thu Apr 24 14:08:29 2008 -0600 @@ -111,15 +111,17 @@ static void vmx_dirq_assist(struct vcpu struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; struct dev_intx_gsi_link *digl; - if ( !vtd_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) + if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) return; for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS); irq < NR_IRQS; irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) ) { + if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) ) + continue; + stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]); - clear_bit(irq, &hvm_irq_dpci->dirq_mask); list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list ) { diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 24 14:08:29 2008 -0600 @@ -1622,17 +1622,11 @@ static int vmx_msr_read_intercept(struct u64 msr_content = 0; u32 ecx = regs->ecx, eax, edx; struct vcpu *v = current; - int index; - u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges; - u64 *fixed_range_base = (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges; HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx); switch ( ecx ) { - case MSR_IA32_TSC: - msr_content = hvm_get_guest_time(v); - break; case MSR_IA32_SYSENTER_CS: msr_content = (u32)__vmread(GUEST_SYSENTER_CS); break; @@ -1641,35 +1635,6 @@ static int vmx_msr_read_intercept(struct break; case MSR_IA32_SYSENTER_EIP: msr_content = __vmread(GUEST_SYSENTER_EIP); - break; - case MSR_IA32_APICBASE: - msr_content = vcpu_vlapic(v)->hw.apic_base_msr; - break; - case MSR_IA32_CR_PAT: - msr_content = v->arch.hvm_vcpu.pat_cr; - break; - case MSR_MTRRcap: - msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap; - break; - case MSR_MTRRdefType: - msr_content = v->arch.hvm_vcpu.mtrr.def_type - | (v->arch.hvm_vcpu.mtrr.enabled << 10); - break; - case MSR_MTRRfix64K_00000: - msr_content = fixed_range_base[0]; - break; - case MSR_MTRRfix16K_80000: - case MSR_MTRRfix16K_A0000: - index = regs->ecx - MSR_MTRRfix16K_80000; - msr_content = fixed_range_base[index + 1]; - break; - case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000: - index = regs->ecx - MSR_MTRRfix4K_C0000; - msr_content = fixed_range_base[index + 3]; - break; - case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7: - index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0; - msr_content = var_range_base[index]; break; case MSR_IA32_DEBUGCTLMSR: msr_content = __vmread(GUEST_IA32_DEBUGCTL); @@ -1679,17 +1644,6 @@ static int vmx_msr_read_intercept(struct break; case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; - case MSR_IA32_MCG_CAP: - case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_STATUS: - case MSR_IA32_MC1_STATUS: - case MSR_IA32_MC2_STATUS: - case MSR_IA32_MC3_STATUS: - case MSR_IA32_MC4_STATUS: - case MSR_IA32_MC5_STATUS: - /* No point in letting the guest see real MCEs */ - msr_content = 0; - break; case MSR_IA32_MISC_ENABLE: rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); /* Debug Trace Store is not supported. */ @@ -1729,8 +1683,8 @@ static int vmx_msr_read_intercept(struct goto gp_fault; } - regs->eax = msr_content & 0xFFFFFFFF; - regs->edx = msr_content >> 32; + regs->eax = (uint32_t)msr_content; + regs->edx = (uint32_t)(msr_content >> 32); done: hvmtrace_msr_read(v, ecx, msr_content); @@ -1833,19 +1787,11 @@ void vmx_vlapic_msr_changed(struct vcpu vmx_vmcs_exit(v); } -extern bool_t mtrr_var_range_msr_set(struct mtrr_state *v, - u32 msr, u64 msr_content); -extern bool_t mtrr_fix_range_msr_set(struct mtrr_state *v, - int row, u64 msr_content); -extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content); -extern bool_t pat_msr_set(u64 *pat, u64 msr); - static int vmx_msr_write_intercept(struct cpu_user_regs *regs) { u32 ecx = regs->ecx; u64 msr_content; struct vcpu *v = current; - int index; HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x", ecx, (u32)regs->eax, (u32)regs->edx); @@ -1856,10 +1802,6 @@ static int vmx_msr_write_intercept(struc switch ( ecx ) { - case MSR_IA32_TSC: - hvm_set_guest_time(v, msr_content); - pt_reset(v); - break; case MSR_IA32_SYSENTER_CS: __vmwrite(GUEST_SYSENTER_CS, msr_content); break; @@ -1869,41 +1811,6 @@ static int vmx_msr_write_intercept(struc case MSR_IA32_SYSENTER_EIP: __vmwrite(GUEST_SYSENTER_EIP, msr_content); break; - case MSR_IA32_APICBASE: - vlapic_msr_set(vcpu_vlapic(v), msr_content); - break; - case MSR_IA32_CR_PAT: - if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) ) - goto gp_fault; - break; - case MSR_MTRRdefType: - if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) ) - goto gp_fault; - break; - case MSR_MTRRfix64K_00000: - if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) ) - goto gp_fault; - break; - case MSR_MTRRfix16K_80000: - case MSR_MTRRfix16K_A0000: - index = regs->ecx - MSR_MTRRfix16K_80000 + 1; - if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, - index, msr_content) ) - goto gp_fault; - break; - case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000: - index = regs->ecx - MSR_MTRRfix4K_C0000 + 3; - if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, - index, msr_content) ) - goto gp_fault; - break; - case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7: - if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr, - regs->ecx, msr_content) ) - goto gp_fault; - break; - case MSR_MTRRcap: - goto gp_fault; case MSR_IA32_DEBUGCTLMSR: { int i, rc = 0; @@ -2330,12 +2237,12 @@ asmlinkage void vmx_vmexit_handler(struc break; case EXIT_REASON_MSR_READ: inst_len = __get_instruction_length(); /* Safe: RDMSR */ - if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY ) + if ( hvm_msr_read_intercept(regs) == X86EMUL_OKAY ) __update_guest_eip(inst_len); break; case EXIT_REASON_MSR_WRITE: inst_len = __get_instruction_length(); /* Safe: WRMSR */ - if ( vmx_msr_write_intercept(regs) == X86EMUL_OKAY ) + if ( hvm_msr_write_intercept(regs) == X86EMUL_OKAY ) __update_guest_eip(inst_len); break; diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_32/Makefile --- a/xen/arch/x86/hvm/vmx/x86_32/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -obj-y += exits.o diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,148 +0,0 @@ -/* - * exits.S: VMX architecture-specific exit handling. - * Copyright (c) 2004, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ -#include <xen/config.h> -#include <xen/errno.h> -#include <xen/softirq.h> -#include <asm/asm_defns.h> -#include <asm/apicdef.h> -#include <asm/page.h> -#include <public/xen.h> - -#define VMRESUME .byte 0x0f,0x01,0xc3 -#define VMLAUNCH .byte 0x0f,0x01,0xc2 -#define VMREAD(off) .byte 0x0f,0x78,0x44,0x24,off -#define VMWRITE(off) .byte 0x0f,0x79,0x44,0x24,off - -/* VMCS field encodings */ -#define GUEST_RSP 0x681c -#define GUEST_RIP 0x681e -#define GUEST_RFLAGS 0x6820 - -#define GET_CURRENT(reg) \ - movl $STACK_SIZE-4, reg; \ - orl %esp, reg; \ - andl $~3,reg; \ - movl (reg),reg; - -#define HVM_SAVE_ALL_NOSEGREGS \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; - -#define HVM_RESTORE_ALL_NOSEGREGS \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax - - ALIGN -ENTRY(vmx_asm_vmexit_handler) - HVM_SAVE_ALL_NOSEGREGS - GET_CURRENT(%ebx) - - movl $GUEST_RIP,%eax - VMREAD(UREGS_eip) - movl $GUEST_RSP,%eax - VMREAD(UREGS_esp) - movl $GUEST_RFLAGS,%eax - VMREAD(UREGS_eflags) - - movl %cr2,%eax - movl %eax,VCPU_hvm_guest_cr2(%ebx) - -#ifndef NDEBUG - movw $0xbeef,%ax - movw %ax,UREGS_error_code(%esp) - movw %ax,UREGS_entry_vector(%esp) - movw %ax,UREGS_saved_upcall_mask(%esp) - movw %ax,UREGS_cs(%esp) - movw %ax,UREGS_ds(%esp) - movw %ax,UREGS_es(%esp) - movw %ax,UREGS_fs(%esp) - movw %ax,UREGS_gs(%esp) - movw %ax,UREGS_ss(%esp) -#endif - - movl %esp,%eax - push %eax - call vmx_vmexit_handler - addl $4,%esp - jmp vmx_asm_do_vmentry - - ALIGN -vmx_process_softirqs: - sti - call do_softirq - jmp vmx_asm_do_vmentry - - ALIGN -ENTRY(vmx_asm_do_vmentry) - GET_CURRENT(%ebx) - cli # tests must not race interrupts - - movl VCPU_processor(%ebx),%eax - shl $IRQSTAT_shift,%eax - cmpl $0,irq_stat(%eax,1) - jnz vmx_process_softirqs - - call vmx_intr_assist - - testb $0xff,VCPU_vmx_emul(%ebx) - jnz vmx_goto_realmode - - movl VCPU_hvm_guest_cr2(%ebx),%eax - movl %eax,%cr2 - call vmx_trace_vmentry - - movl $GUEST_RIP,%eax - VMWRITE(UREGS_eip) - movl $GUEST_RSP,%eax - VMWRITE(UREGS_esp) - movl $GUEST_RFLAGS,%eax - VMWRITE(UREGS_eflags) - - cmpb $0,VCPU_vmx_launched(%ebx) - je vmx_launch - -/*vmx_resume:*/ - HVM_RESTORE_ALL_NOSEGREGS - VMRESUME - call vm_resume_fail - ud2 - -vmx_launch: - movb $1,VCPU_vmx_launched(%ebx) - HVM_RESTORE_ALL_NOSEGREGS - VMLAUNCH - call vm_launch_fail - ud2 - -vmx_goto_realmode: - sti - movl %esp,%eax - push %eax - call vmx_realmode - addl $4,%esp - jmp vmx_asm_do_vmentry diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_64/Makefile --- a/xen/arch/x86/hvm/vmx/x86_64/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -obj-y += exits.o diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Thu Apr 24 14:02:16 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,165 +0,0 @@ -/* - * exits.S: VMX architecture-specific exit handling. - * Copyright (c) 2004, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ -#include <xen/config.h> -#include <xen/errno.h> -#include <xen/softirq.h> -#include <asm/asm_defns.h> -#include <asm/apicdef.h> -#include <asm/page.h> -#include <public/xen.h> - -#define VMRESUME .byte 0x0f,0x01,0xc3 -#define VMLAUNCH .byte 0x0f,0x01,0xc2 -#define VMREAD(off) .byte 0x0f,0x78,0x47,((off)-UREGS_rip) -#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip) - -/* VMCS field encodings */ -#define GUEST_RSP 0x681c -#define GUEST_RIP 0x681e -#define GUEST_RFLAGS 0x6820 - -#define GET_CURRENT(reg) \ - movq $STACK_SIZE-8, reg; \ - orq %rsp, reg; \ - andq $~7,reg; \ - movq (reg),reg; - -#define HVM_SAVE_ALL_NOSEGREGS \ - pushq %rdi; \ - pushq %rsi; \ - pushq %rdx; \ - pushq %rcx; \ - pushq %rax; \ - pushq %r8; \ - pushq %r9; \ - pushq %r10; \ - pushq %r11; \ - pushq %rbx; \ - pushq %rbp; \ - pushq %r12; \ - pushq %r13; \ - pushq %r14; \ - pushq %r15; - -#define HVM_RESTORE_ALL_NOSEGREGS \ - popq %r15; \ - popq %r14; \ - popq %r13; \ - popq %r12; \ - popq %rbp; \ - popq %rbx; \ - popq %r11; \ - popq %r10; \ - popq %r9; \ - popq %r8; \ - popq %rax; \ - popq %rcx; \ - popq %rdx; \ - popq %rsi; \ - popq %rdi - - ALIGN -ENTRY(vmx_asm_vmexit_handler) - HVM_SAVE_ALL_NOSEGREGS - GET_CURRENT(%rbx) - - leaq UREGS_rip(%rsp),%rdi - movl $GUEST_RIP,%eax - /*VMREAD(UREGS_rip)*/ - .byte 0x0f,0x78,0x07 /* vmread %rax,(%rdi) */ - movl $GUEST_RSP,%eax - VMREAD(UREGS_rsp) - movl $GUEST_RFLAGS,%eax - VMREAD(UREGS_eflags) - - movq %cr2,%rax - movq %rax,VCPU_hvm_guest_cr2(%rbx) - -#ifndef NDEBUG - movw $0xbeef,%ax - movw %ax,UREGS_error_code(%rsp) - movw %ax,UREGS_entry_vector(%rsp) - movw %ax,UREGS_saved_upcall_mask(%rsp) - movw %ax,UREGS_cs(%rsp) - movw %ax,UREGS_ds(%rsp) - movw %ax,UREGS_es(%rsp) - movw %ax,UREGS_fs(%rsp) - movw %ax,UREGS_gs(%rsp) - movw %ax,UREGS_ss(%rsp) -#endif - - movq %rsp,%rdi - call vmx_vmexit_handler - jmp vmx_asm_do_vmentry - - ALIGN -vmx_process_softirqs: - sti - call do_softirq - jmp vmx_asm_do_vmentry - - ALIGN -ENTRY(vmx_asm_do_vmentry) - GET_CURRENT(%rbx) - cli # tests must not race interrupts - - movl VCPU_processor(%rbx),%eax - shl $IRQSTAT_shift,%rax - leaq irq_stat(%rip),%rdx - cmpl $0,(%rdx,%rax,1) - jnz vmx_process_softirqs - - call vmx_intr_assist - - testb $0xff,VCPU_vmx_emul(%rbx) - jnz vmx_goto_realmode - - movq VCPU_hvm_guest_cr2(%rbx),%rax - movq %rax,%cr2 - call vmx_trace_vmentry - - leaq UREGS_rip(%rsp),%rdi - movl $GUEST_RIP,%eax - /*VMWRITE(UREGS_rip)*/ - .byte 0x0f,0x79,0x07 /* vmwrite (%rdi),%rax */ - movl $GUEST_RSP,%eax - VMWRITE(UREGS_rsp) - movl $GUEST_RFLAGS,%eax - VMWRITE(UREGS_eflags) - - cmpb $0,VCPU_vmx_launched(%rbx) - je vmx_launch - -/*vmx_resume:*/ - HVM_RESTORE_ALL_NOSEGREGS - VMRESUME - call vm_resume_fail - ud2 - -vmx_launch: - movb $1,VCPU_vmx_launched(%rbx) - HVM_RESTORE_ALL_NOSEGREGS - VMLAUNCH - call vm_launch_fail - ud2 - -vmx_goto_realmode: - sti - movq %rsp,%rdi - call vmx_realmode - jmp vmx_asm_do_vmentry diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/mm.c Thu Apr 24 14:08:29 2008 -0600 @@ -3279,15 +3279,6 @@ long arch_memory_op(int op, XEN_GUEST_HA case XENMAPSPACE_shared_info: if ( xatp.idx == 0 ) mfn = virt_to_mfn(d->shared_info); - /* XXX: assumption here, this is called after E820 table is build - * need the E820 to initialize MTRR. - */ - if ( is_hvm_domain(d) ) { - extern void init_mtrr_in_hyper(struct vcpu *); - struct vcpu *vs; - for_each_vcpu(d, vs) - init_mtrr_in_hyper(vs); - } break; case XENMAPSPACE_grant_table: spin_lock(&d->grant_table->lock); @@ -3625,29 +3616,18 @@ static int ptwr_emulated_cmpxchg( static int ptwr_emulated_cmpxchg( enum x86_segment seg, unsigned long offset, - unsigned long old, - unsigned long new, + void *p_old, + void *p_new, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { + paddr_t old = 0, new = 0; + if ( bytes > sizeof(paddr_t) ) + return X86EMUL_UNHANDLEABLE; + memcpy(&old, p_old, bytes); + memcpy(&new, p_new, bytes); return ptwr_emulated_update( offset, old, new, bytes, 1, - container_of(ctxt, struct ptwr_emulate_ctxt, ctxt)); -} - -static int ptwr_emulated_cmpxchg8b( - enum x86_segment seg, - unsigned long offset, - unsigned long old, - unsigned long old_hi, - unsigned long new, - unsigned long new_hi, - struct x86_emulate_ctxt *ctxt) -{ - if ( CONFIG_PAGING_LEVELS == 2 ) - return X86EMUL_UNHANDLEABLE; - return ptwr_emulated_update( - offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1, container_of(ctxt, struct ptwr_emulate_ctxt, ctxt)); } @@ -3656,7 +3636,6 @@ static struct x86_emulate_ops ptwr_emula .insn_fetch = ptwr_emulated_read, .write = ptwr_emulated_write, .cmpxchg = ptwr_emulated_cmpxchg, - .cmpxchg8b = ptwr_emulated_cmpxchg8b }; /* Write page fault handler: check if guest is trying to modify a PTE. */ diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/mm/shadow/common.c Thu Apr 24 14:08:29 2008 -0600 @@ -239,15 +239,15 @@ static int static int hvm_emulate_cmpxchg(enum x86_segment seg, unsigned long offset, - unsigned long old, - unsigned long new, + void *p_old, + void *p_new, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - unsigned long addr; + unsigned long addr, old[2], new[2]; int rc; if ( !is_x86_user_segment(seg) ) @@ -258,35 +258,21 @@ hvm_emulate_cmpxchg(enum x86_segment seg if ( rc ) return rc; - return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( - v, addr, old, new, bytes, sh_ctxt); -} - -static int -hvm_emulate_cmpxchg8b(enum x86_segment seg, - unsigned long offset, - unsigned long old_lo, - unsigned long old_hi, - unsigned long new_lo, - unsigned long new_hi, - struct x86_emulate_ctxt *ctxt) -{ - struct sh_emulate_ctxt *sh_ctxt = - container_of(ctxt, struct sh_emulate_ctxt, ctxt); - struct vcpu *v = current; - unsigned long addr; - int rc; - - if ( !is_x86_user_segment(seg) ) - return X86EMUL_UNHANDLEABLE; - - rc = hvm_translate_linear_addr( - seg, offset, 8, hvm_access_write, sh_ctxt, &addr); - if ( rc ) - return rc; - - return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( - v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt); + old[0] = new[0] = 0; + memcpy(old, p_old, bytes); + memcpy(new, p_new, bytes); + + if ( bytes <= sizeof(long) ) + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( + v, addr, old[0], new[0], bytes, sh_ctxt); + +#ifdef __i386__ + if ( bytes == 8 ) + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( + v, addr, old[0], old[1], new[0], new[1], sh_ctxt); +#endif + + return X86EMUL_UNHANDLEABLE; } static struct x86_emulate_ops hvm_shadow_emulator_ops = { @@ -294,7 +280,6 @@ static struct x86_emulate_ops hvm_shadow .insn_fetch = hvm_emulate_insn_fetch, .write = hvm_emulate_write, .cmpxchg = hvm_emulate_cmpxchg, - .cmpxchg8b = hvm_emulate_cmpxchg8b, }; static int @@ -338,36 +323,34 @@ static int static int pv_emulate_cmpxchg(enum x86_segment seg, unsigned long offset, - unsigned long old, - unsigned long new, + void *p_old, + void *p_new, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); + unsigned long old[2], new[2]; struct vcpu *v = current; + if ( !is_x86_user_segment(seg) ) return X86EMUL_UNHANDLEABLE; - return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( - v, offset, old, new, bytes, sh_ctxt); -} - -static int -pv_emulate_cmpxchg8b(enum x86_segment seg, - unsigned long offset, - unsigned long old_lo, - unsigned long old_hi, - unsigned long new_lo, - unsigned long new_hi, - struct x86_emulate_ctxt *ctxt) -{ - struct sh_emulate_ctxt *sh_ctxt = - container_of(ctxt, struct sh_emulate_ctxt, ctxt); - struct vcpu *v = current; - if ( !is_x86_user_segment(seg) ) - return X86EMUL_UNHANDLEABLE; - return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( - v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt); + + old[0] = new[0] = 0; + memcpy(old, p_old, bytes); + memcpy(new, p_new, bytes); + + if ( bytes <= sizeof(long) ) + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( + v, offset, old[0], new[0], bytes, sh_ctxt); + +#ifdef __i386__ + if ( bytes == 8 ) + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( + v, offset, old[0], old[1], new[0], new[1], sh_ctxt); +#endif + + return X86EMUL_UNHANDLEABLE; } static struct x86_emulate_ops pv_shadow_emulator_ops = { @@ -375,7 +358,6 @@ static struct x86_emulate_ops pv_shadow_ .insn_fetch = pv_emulate_read, .write = pv_emulate_write, .cmpxchg = pv_emulate_cmpxchg, - .cmpxchg8b = pv_emulate_cmpxchg8b, }; struct x86_emulate_ops *shadow_init_emulation( diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Apr 24 14:08:29 2008 -0600 @@ -2089,7 +2089,7 @@ static shadow_l1e_t * shadow_get_and_cre else { /* Shadowing an actual guest l1 table */ - if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */ + if ( !mfn_valid(gw->l1mfn) ) return NULL; /* No guest page. */ *sl1mfn = get_shadow_status(v, gw->l1mfn, SH_type_l1_shadow); if ( !mfn_valid(*sl1mfn) ) { @@ -4365,7 +4365,7 @@ static void emulate_unmap_dest(struct vc atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version); } -int +static int sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src, u32 bytes, struct sh_emulate_ctxt *sh_ctxt) { @@ -4389,7 +4389,7 @@ sh_x86_emulate_write(struct vcpu *v, uns return X86EMUL_OKAY; } -int +static int sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, unsigned long old, unsigned long new, unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt) @@ -4432,7 +4432,8 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u return rv; } -int +#ifdef __i386__ +static int sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, unsigned long old_lo, unsigned long old_hi, unsigned long new_lo, unsigned long new_hi, @@ -4465,7 +4466,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, shadow_unlock(v->domain); return rv; } - +#endif /**************************************************************************/ /* Audit tools */ @@ -4738,7 +4739,9 @@ struct paging_mode sh_paging_mode = { .shadow.detach_old_tables = sh_detach_old_tables, .shadow.x86_emulate_write = sh_x86_emulate_write, .shadow.x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, +#ifdef __i386__ .shadow.x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, +#endif .shadow.make_monitor_table = sh_make_monitor_table, .shadow.destroy_monitor_table = sh_destroy_monitor_table, #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/setup.c Thu Apr 24 14:08:29 2008 -0600 @@ -1019,10 +1019,6 @@ void __init __start_xen(unsigned long mb _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start; } - iommu_setup(); - - amd_iommu_detect(); - /* * We're going to setup domain0 using the module(s) that we stashed safely * above our heap. The second module, if present, is an initrd ramdisk. diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/smp.c --- a/xen/arch/x86/smp.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/smp.c Thu Apr 24 14:08:29 2008 -0600 @@ -75,20 +75,10 @@ static inline int __prepare_ICR2 (unsign return SET_APIC_DEST_FIELD(mask); } -static inline void check_IPI_mask(cpumask_t cpumask) -{ - /* - * Sanity, and necessary. An IPI with no target generates a send accept - * error with Pentium and P6 APICs. - */ - ASSERT(cpus_subset(cpumask, cpu_online_map)); - ASSERT(!cpus_empty(cpumask)); -} - void apic_wait_icr_idle(void) { - while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) - cpu_relax(); + while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) + cpu_relax(); } void send_IPI_mask_flat(cpumask_t cpumask, int vector) @@ -97,7 +87,8 @@ void send_IPI_mask_flat(cpumask_t cpumas unsigned long cfg; unsigned long flags; - check_IPI_mask(cpumask); + /* An IPI with no target generates a send accept error from P5/P6 APICs. */ + WARN_ON(mask == 0); local_irq_save(flags); @@ -130,17 +121,9 @@ void send_IPI_mask_phys(cpumask_t mask, unsigned long cfg, flags; unsigned int query_cpu; - check_IPI_mask(mask); - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - local_irq_save(flags); - for_each_cpu_mask( query_cpu, mask ) + for_each_cpu_mask ( query_cpu, mask ) { /* * Wait for idle. diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/traps.c Thu Apr 24 14:08:29 2008 -0600 @@ -479,6 +479,7 @@ static inline void do_trap( static inline void do_trap( int trapnr, struct cpu_user_regs *regs, int use_error_code) { + struct vcpu *curr = current; unsigned long fixup; DEBUGGER_trap_entry(trapnr, regs); @@ -494,6 +495,14 @@ static inline void do_trap( dprintk(XENLOG_ERR, "Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup)); regs->eip = fixup; + return; + } + + if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) && + is_hvm_vcpu(curr) && curr->arch.hvm_vcpu.fpu_exception_callback ) + { + curr->arch.hvm_vcpu.fpu_exception_callback( + curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs); return; } @@ -1399,6 +1408,13 @@ static int admin_io_okay( unsigned int port, unsigned int bytes, struct vcpu *v, struct cpu_user_regs *regs) { + /* + * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses. + * We never permit direct access to that register. + */ + if ( (port == 0xcf8) && (bytes == 4) ) + return 0; + return ioports_access_permitted(v->domain, port, port + bytes - 1); } @@ -1431,10 +1447,10 @@ static uint32_t guest_io_read( { sub_data = pv_pit_handler(port, 0, 0); } - else if ( (port & 0xfffc) == 0xcf8 ) - { - size = min(bytes, 4 - (port & 3)); - sub_data = v->domain->arch.pci_cf8 >> ((port & 3) * 8); + else if ( (port == 0xcf8) && (bytes == 4) ) + { + size = 4; + sub_data = v->domain->arch.pci_cf8; } else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) ) { @@ -1489,19 +1505,10 @@ static void guest_io_write( { pv_pit_handler(port, (uint8_t)data, 1); } - else if ( (port & 0xfffc) == 0xcf8 ) - { - size = min(bytes, 4 - (port & 3)); - if ( size == 4 ) - { - v->domain->arch.pci_cf8 = data; - } - else - { - uint32_t mask = ((1u << (size * 8)) - 1) << ((port & 3) * 8); - v->domain->arch.pci_cf8 &= ~mask; - v->domain->arch.pci_cf8 |= (data << ((port & 3) * 8)) & mask; - } + else if ( (port == 0xcf8) && (bytes == 4) ) + { + size = 4; + v->domain->arch.pci_cf8 = data; } else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) ) { diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/x86_emulate.c Thu Apr 24 14:08:29 2008 -0600 @@ -11,23 +11,7 @@ #include <asm/x86_emulate.h> +/* Avoid namespace pollution. */ #undef cmpxchg -#define __emulate_fpu_insn(_op) \ -do{ int _exn; \ - asm volatile ( \ - "1: " _op "\n" \ - "2: \n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov $1,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " "__FIXUP_ALIGN"\n" \ - " "__FIXUP_WORD" 1b,3b\n" \ - ".previous" \ - : "=r" (_exn) : "0" (0) ); \ - generate_exception_if(_exn, EXC_MF, -1); \ -} while (0) - #include "x86_emulate/x86_emulate.c" diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate/x86_emulate.c --- a/xen/arch/x86/x86_emulate/x86_emulate.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c Thu Apr 24 14:08:29 2008 -0600 @@ -195,9 +195,9 @@ static uint8_t twobyte_table[256] = { /* 0x50 - 0x5F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x6F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, /* 0x80 - 0x87 */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, @@ -546,6 +546,62 @@ do { ? (uint16_t)_regs.eip : (uint32_t)_regs.eip); \ } while (0) +struct fpu_insn_ctxt { + uint8_t insn_bytes; + uint8_t exn_raised; +}; + +static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs) +{ + struct fpu_insn_ctxt *fic = _fic; + fic->exn_raised = 1; + regs->eip += fic->insn_bytes; +} + +#define get_fpu(_type, _fic) \ +do{ (_fic)->exn_raised = 0; \ + fail_if(ops->get_fpu == NULL); \ + rc = ops->get_fpu(fpu_handle_exception, _fic, _type, ctxt); \ + if ( rc ) goto done; \ +} while (0) +#define put_fpu(_fic) \ +do{ \ + if ( ops->put_fpu != NULL ) \ + ops->put_fpu(ctxt); \ + generate_exception_if((_fic)->exn_raised, EXC_MF, -1); \ +} while (0) + +#define emulate_fpu_insn(_op) \ +do{ struct fpu_insn_ctxt fic; \ + get_fpu(X86EMUL_FPU_fpu, &fic); \ + asm volatile ( \ + "movb $2f-1f,%0 \n" \ + "1: " _op " \n" \ + "2: \n" \ + : "=m" (fic.insn_bytes) : : "memory" ); \ + put_fpu(&fic); \ +} while (0) + +#define emulate_fpu_insn_memdst(_op, _arg) \ +do{ struct fpu_insn_ctxt fic; \ + get_fpu(X86EMUL_FPU_fpu, &fic); \ + asm volatile ( \ + "movb $2f-1f,%0 \n" \ + "1: " _op " %1 \n" \ + "2: \n" \ + : "=m" (fic.insn_bytes), "=m" (_arg) \ + : : "memory" ); \ + put_fpu(&fic); \ +} while (0) + +#define emulate_fpu_insn_stub(_bytes...) \ +do{ uint8_t stub[] = { _bytes, 0xc3 }; \ + struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 }; \ + get_fpu(X86EMUL_FPU_fpu, &fic); \ + (*(void(*)(void))stub)(); \ + put_fpu(&fic); \ +} while (0) + static unsigned long __get_rep_prefix( struct cpu_user_regs *int_regs, struct cpu_user_regs *ext_regs, @@ -851,6 +907,7 @@ protmode_load_seg( struct { uint32_t a, b; } desc; unsigned long val; uint8_t dpl, rpl, cpl; + uint32_t new_desc_b; int rc, fault_type = EXC_TS; /* NULL selector? */ @@ -933,10 +990,11 @@ protmode_load_seg( } /* Ensure Accessed flag is set. */ + new_desc_b = desc.b | 0x100; rc = ((desc.b & 0x100) ? X86EMUL_OKAY : ops->cmpxchg( - x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b, - desc.b | 0x100, 4, ctxt)); + x86_seg_none, desctab.base + (sel & 0xfff8) + 4, + &desc.b, &new_desc_b, 4, ctxt)); } while ( rc == X86EMUL_CMPXCHG_FAILED ); if ( rc ) @@ -2036,8 +2094,8 @@ x86_emulate( /* nothing to do */; else if ( lock_prefix ) rc = ops->cmpxchg( - dst.mem.seg, dst.mem.off, dst.orig_val, - dst.val, dst.bytes, ctxt); + dst.mem.seg, dst.mem.off, &dst.orig_val, + &dst.val, dst.bytes, ctxt); else rc = ops->write( dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt); @@ -2399,9 +2457,7 @@ x86_emulate( } case 0x9b: /* wait/fwait */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); - __emulate_fpu_insn("fwait"); + emulate_fpu_insn("fwait"); break; case 0x9c: /* pushf */ @@ -2721,77 +2777,89 @@ x86_emulate( } case 0xd9: /* FPU 0xd9 */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); switch ( modrm ) { - case 0xc0: __emulate_fpu_insn(".byte 0xd9,0xc0"); break; - case 0xc1: __emulate_fpu_insn(".byte 0xd9,0xc1"); break; - case 0xc2: __emulate_fpu_insn(".byte 0xd9,0xc2"); break; - case 0xc3: __emulate_fpu_insn(".byte 0xd9,0xc3"); break; - case 0xc4: __emulate_fpu_insn(".byte 0xd9,0xc4"); break; - case 0xc5: __emulate_fpu_insn(".byte 0xd9,0xc5"); break; - case 0xc6: __emulate_fpu_insn(".byte 0xd9,0xc6"); break; - case 0xc7: __emulate_fpu_insn(".byte 0xd9,0xc7"); break; - case 0xe0: __emulate_fpu_insn(".byte 0xd9,0xe0"); break; - case 0xe8: __emulate_fpu_insn(".byte 0xd9,0xe8"); break; - case 0xee: __emulate_fpu_insn(".byte 0xd9,0xee"); break; + case 0xc0 ... 0xc7: /* fld %stN */ + case 0xc8 ... 0xcf: /* fxch %stN */ + case 0xd0: /* fnop */ + case 0xe0: /* fchs */ + case 0xe1: /* fabs */ + case 0xe4: /* ftst */ + case 0xe5: /* fxam */ + case 0xe8: /* fld1 */ + case 0xe9: /* fldl2t */ + case 0xea: /* fldl2e */ + case 0xeb: /* fldpi */ + case 0xec: /* fldlg2 */ + case 0xed: /* fldln2 */ + case 0xee: /* fldz */ + case 0xf0: /* f2xm1 */ + case 0xf1: /* fyl2x */ + case 0xf2: /* fptan */ + case 0xf3: /* fpatan */ + case 0xf4: /* fxtract */ + case 0xf5: /* fprem1 */ + case 0xf6: /* fdecstp */ + case 0xf7: /* fincstp */ + case 0xf8: /* fprem */ + case 0xf9: /* fyl2xp1 */ + case 0xfa: /* fsqrt */ + case 0xfb: /* fsincos */ + case 0xfc: /* frndint */ + case 0xfd: /* fscale */ + case 0xfe: /* fsin */ + case 0xff: /* fcos */ + emulate_fpu_insn_stub(0xd9, modrm); + break; default: fail_if((modrm_reg & 7) != 7); fail_if(modrm >= 0xc0); /* fnstcw m2byte */ ea.bytes = 2; dst = ea; - asm volatile ( "fnstcw %0" : "=m" (dst.val) ); + emulate_fpu_insn_memdst("fnstcw", dst.val); } break; case 0xdb: /* FPU 0xdb */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); fail_if(modrm != 0xe3); /* fninit */ - asm volatile ( "fninit" ); + emulate_fpu_insn("fninit"); break; case 0xdd: /* FPU 0xdd */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); fail_if((modrm_reg & 7) != 7); fail_if(modrm >= 0xc0); /* fnstsw m2byte */ ea.bytes = 2; dst = ea; - asm volatile ( "fnstsw %0" : "=m" (dst.val) ); + emulate_fpu_insn_memdst("fnstsw", dst.val); break; case 0xde: /* FPU 0xde */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); switch ( modrm ) { - case 0xd9: __emulate_fpu_insn(".byte 0xde,0xd9"); break; - case 0xf8: __emulate_fpu_insn(".byte 0xde,0xf8"); break; - case 0xf9: __emulate_fpu_insn(".byte 0xde,0xf9"); break; - case 0xfa: __emulate_fpu_insn(".byte 0xde,0xfa"); break; - case 0xfb: __emulate_fpu_insn(".byte 0xde,0xfb"); break; - case 0xfc: __emulate_fpu_insn(".byte 0xde,0xfc"); break; - case 0xfd: __emulate_fpu_insn(".byte 0xde,0xfd"); break; - case 0xfe: __emulate_fpu_insn(".byte 0xde,0xfe"); break; - case 0xff: __emulate_fpu_insn(".byte 0xde,0xff"); break; - default: goto cannot_emulate; + case 0xc0 ... 0xc7: /* faddp %stN */ + case 0xc8 ... 0xcf: /* fmulp %stN */ + case 0xd9: /* fcompp */ + case 0xe0 ... 0xe7: /* fsubrp %stN */ + case 0xe8 ... 0xef: /* fsubp %stN */ + case 0xf0 ... 0xf7: /* fdivrp %stN */ + case 0xf8 ... 0xff: /* fdivp %stN */ + emulate_fpu_insn_stub(0xde, modrm); + break; + default: + goto cannot_emulate; } break; case 0xdf: /* FPU 0xdf */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); fail_if(modrm != 0xe0); /* fnstsw %ax */ dst.bytes = 2; dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; - asm volatile ( "fnstsw %0" : "=m" (dst.val) ); + emulate_fpu_insn_memdst("fnstsw", dst.val); break; case 0xe0 ... 0xe2: /* loop{,z,nz} */ { @@ -2975,6 +3043,7 @@ x86_emulate( case 0xa3: bt: /* bt */ emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags); + dst.type = OP_NONE; break; case 0xa4: /* shld imm8,r,r/m */ @@ -3067,7 +3136,11 @@ x86_emulate( : "=r" (dst.val), "=q" (zf) : "r" (src.val), "1" (0) ); _regs.eflags &= ~EFLG_ZF; - _regs.eflags |= zf ? EFLG_ZF : 0; + if ( zf ) + { + _regs.eflags |= EFLG_ZF; + dst.type = OP_NONE; + } break; } @@ -3077,7 +3150,11 @@ x86_emulate( : "=r" (dst.val), "=q" (zf) : "r" (src.val), "1" (0) ); _regs.eflags &= ~EFLG_ZF; - _regs.eflags |= zf ? EFLG_ZF : 0; + if ( zf ) + { + _regs.eflags |= EFLG_ZF; + dst.type = OP_NONE; + } break; } @@ -3310,6 +3387,44 @@ x86_emulate( break; } + case 0x6f: /* movq mm/m64,mm */ { + uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 }; + struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 }; + uint64_t val; + if ( ea.type == OP_MEM ) + { + unsigned long lval, hval; + if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &lval, 4, ctxt)) || + (rc = ops->read(ea.mem.seg, ea.mem.off+4, &hval, 4, ctxt)) ) + goto done; + val = ((uint64_t)hval << 32) | (uint32_t)lval; + stub[2] = modrm & 0x38; /* movq (%eax),%mmN */ + } + get_fpu(X86EMUL_FPU_mmx, &fic); + asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" ); + put_fpu(&fic); + break; + } + + case 0x7f: /* movq mm,mm/m64 */ { + uint8_t stub[] = { 0x0f, 0x7f, modrm, 0xc3 }; + struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 }; + uint64_t val; + if ( ea.type == OP_MEM ) + stub[2] = modrm & 0x38; /* movq %mmN,(%eax) */ + get_fpu(X86EMUL_FPU_mmx, &fic); + asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" ); + put_fpu(&fic); + if ( ea.type == OP_MEM ) + { + unsigned long lval = (uint32_t)val, hval = (uint32_t)(val >> 32); + if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, lval, 4, ctxt)) || + (rc = ops->write(ea.mem.seg, ea.mem.off+4, hval, 4, ctxt)) ) + goto done; + } + break; + } + case 0x80 ... 0x8f: /* jcc (near) */ { int rel = (((op_bytes == 2) && !mode_64bit()) ? (int32_t)insn_fetch_type(int16_t) @@ -3346,60 +3461,49 @@ x86_emulate( src.val = x86_seg_gs; goto pop_seg; - case 0xc7: /* Grp9 (cmpxchg8b) */ -#if defined(__i386__) - { - unsigned long old_lo, old_hi; + case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ { + unsigned long old[2], exp[2], new[2]; + unsigned int i; + generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) || - (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) ) - goto done; - if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) ) - { - _regs.eax = old_lo; - _regs.edx = old_hi; + op_bytes *= 2; + + /* Get actual old value. */ + for ( i = 0; i < (op_bytes/sizeof(long)); i++ ) + if ( (rc = ops->read(ea.mem.seg, ea.mem.off + i*sizeof(long), + &old[i], sizeof(long), ctxt)) != 0 ) + goto done; + + /* Get expected and proposed values. */ + if ( op_bytes == 8 ) + { + ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] = _regs.edx; + ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] = _regs.ecx; + } + else + { + exp[0] = _regs.eax; exp[1] = _regs.edx; + new[0] = _regs.ebx; new[1] = _regs.ecx; + } + + if ( memcmp(old, exp, op_bytes) ) + { + /* Expected != actual: store actual to rDX:rAX and clear ZF. */ + _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0]; + _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1]; _regs.eflags &= ~EFLG_ZF; } - else if ( ops->cmpxchg8b == NULL ) - { - rc = X86EMUL_UNHANDLEABLE; - goto done; - } else { - if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi, - _regs.ebx, _regs.ecx, ctxt)) != 0 ) + /* Expected == actual: attempt atomic cmpxchg and set ZF. */ + if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, + new, op_bytes, ctxt)) != 0 ) goto done; _regs.eflags |= EFLG_ZF; } break; } -#elif defined(__x86_64__) - { - unsigned long old, new; - generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); - generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 ) - goto done; - if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) || - ((uint32_t)(old>>32) != (uint32_t)_regs.edx) ) - { - _regs.eax = (uint32_t)(old>>0); - _regs.edx = (uint32_t)(old>>32); - _regs.eflags &= ~EFLG_ZF; - } - else - { - new = (_regs.ecx<<32)|(uint32_t)_regs.ebx; - if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, - new, 8, ctxt)) != 0 ) - goto done; - _regs.eflags |= EFLG_ZF; - } - break; - } -#endif case 0xc8 ... 0xcf: /* bswap */ dst.type = OP_REG; diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate/x86_emulate.h --- a/xen/arch/x86/x86_emulate/x86_emulate.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/arch/x86/x86_emulate/x86_emulate.h Thu Apr 24 14:08:29 2008 -0600 @@ -95,6 +95,12 @@ struct segment_register { /* (cmpxchg accessor): CMPXCHG failed. Maps to X86EMUL_RETRY in caller. */ #define X86EMUL_CMPXCHG_FAILED 3 +/* FPU sub-types which may be requested via ->get_fpu(). */ +enum x86_emulate_fpu_type { + X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */ + X86EMUL_FPU_mmx /* MMX instruction set (%mm0-%mm7) */ +}; + /* * These operations represent the instruction emulator's interface to memory. * @@ -104,8 +110,7 @@ struct segment_register { * some out-of-band mechanism, unknown to the emulator. The memop signals * failure by returning X86EMUL_EXCEPTION to the emulator, which will * then immediately bail. - * 2. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only - * cmpxchg8b_emulated need support 8-byte accesses. + * 2. Valid access sizes are 1, 2, 4 and 8 (x86/64 only) bytes. * 3. The emulator cannot handle 64-bit mode emulation on an x86/32 system. */ struct x86_emulate_ops @@ -153,34 +158,16 @@ struct x86_emulate_ops /* * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation. - * @old: [IN ] Value expected to be current at @addr. - * @new: [IN ] Value to write to @addr. + * @p_old: [IN ] Pointer to value expected to be current at @addr. + * @p_new: [IN ] Pointer to value to write to @addr. + * @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes). */ int (*cmpxchg)( enum x86_segment seg, unsigned long offset, - unsigned long old, - unsigned long new, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt); - - /* - * cmpxchg8b: Emulate an atomic (LOCKed) CMPXCHG8B operation. - * @old: [IN ] Value expected to be current at @addr. - * @new: [IN ] Value to write to @addr. - * NOTES: - * 1. This function is only ever called when emulating a real CMPXCHG8B. - * 2. This function is *never* called on x86/64 systems. - * 2. Not defining this function (i.e., specifying NULL) is equivalent - * to defining a function that always returns X86EMUL_UNHANDLEABLE. - */ - int (*cmpxchg8b)( - enum x86_segment seg, - unsigned long offset, - unsigned long old_lo, - unsigned long old_hi, - unsigned long new_lo, - unsigned long new_hi, + void *p_old, + void *p_new, + unsigned int bytes, struct x86_emulate_ctxt *ctxt); /* @@ -342,8 +329,19 @@ struct x86_emulate_ops uint8_t insn_len, struct x86_emulate_ctxt *ctxt); - /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */ - void (*load_fpu_ctxt)( + /* + * get_fpu: Load emulated environment's FPU state onto processor. + * @exn_callback: On any FPU or SIMD exception, pass control to + * (*exception_callback)(exception_callback_arg, regs). + */ + int (*get_fpu)( + void (*exception_callback)(void *, struct cpu_user_regs *), + void *exception_callback_arg, + enum x86_emulate_fpu_type type, + struct x86_emulate_ctxt *ctxt); + + /* put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers. */ + void (*put_fpu)( struct x86_emulate_ctxt *ctxt); /* invlpg: Invalidate paging structures which map addressed byte. */ diff -r 239b44eeb2d6 -r dc510776dd59 xen/common/trace.c --- a/xen/common/trace.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/common/trace.c Thu Apr 24 14:08:29 2008 -0600 @@ -374,6 +374,15 @@ static inline int insert_lost_records(st (unsigned char *)&ed); } +/* + * Notification is performed in qtasklet to avoid deadlocks with contexts + * which __trace_var() may be called from (e.g., scheduler critical regions). + */ +static void trace_notify_dom0(unsigned long unused) +{ + send_guest_global_virq(dom0, VIRQ_TBUF); +} +static DECLARE_TASKLET(trace_notify_dom0_tasklet, trace_notify_dom0, 0); /** * trace - Enters a trace tuple into the trace buffer for the current CPU. @@ -506,7 +515,7 @@ void __trace_var(u32 event, int cycles, /* Notify trace buffer consumer that we've crossed the high water mark. */ if ( started_below_highwater && (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) - send_guest_global_virq(dom0, VIRQ_TBUF); + tasklet_schedule(&trace_notify_dom0_tasklet); } /* diff -r 239b44eeb2d6 -r dc510776dd59 xen/common/xencomm.c --- a/xen/common/xencomm.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/common/xencomm.c Thu Apr 24 14:08:29 2008 -0600 @@ -323,7 +323,7 @@ xencomm_copy_chunk_to( (unsigned long)xencomm_vaddr(paddr, page)); memcpy(xencomm_vaddr(paddr, page), (void *)from, len); - xencomm_mark_dirty(xencomm_vaddr(paddr, page), len); + xencomm_mark_dirty((unsigned long)xencomm_vaddr(paddr, page), len); put_page(page); return 0; diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/char/console.c Thu Apr 24 14:08:29 2008 -0600 @@ -322,7 +322,7 @@ static long guest_console_write(XEN_GUES while ( count > 0 ) { - while ( serial_tx_space(sercon_handle) < (SERIAL_TXBUFSZ / 2) ) + while ( serial_tx_space(sercon_handle) < (serial_txbufsz / 2) ) { if ( hypercall_preempt_check() ) break; diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/char/serial.c --- a/xen/drivers/char/serial.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/char/serial.c Thu Apr 24 14:08:29 2008 -0600 @@ -15,6 +15,19 @@ #include <xen/mm.h> #include <xen/serial.h> +/* Never drop characters, even if the async transmit buffer fills. */ +/* #define SERIAL_NEVER_DROP_CHARS 1 */ + +unsigned int serial_txbufsz = 16384; +static void __init parse_serial_tx_buffer(const char *s) +{ + serial_txbufsz = max((unsigned int)parse_size_and_unit(s, NULL), 512u); +} +custom_param("serial_tx_buffer", parse_serial_tx_buffer); + +#define mask_serial_rxbuf_idx(_i) ((_i)&(serial_rxbufsz-1)) +#define mask_serial_txbuf_idx(_i) ((_i)&(serial_txbufsz-1)) + static struct serial_port com[2] = { { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }, { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED } @@ -36,8 +49,8 @@ void serial_rx_interrupt(struct serial_p fn = port->rx_hi; else if ( !(c & 0x80) && (port->rx_lo != NULL) ) fn = port->rx_lo; - else if ( (port->rxbufp - port->rxbufc) != SERIAL_RXBUFSZ ) - port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufp++)] = c; + else if ( (port->rxbufp - port->rxbufc) != serial_rxbufsz ) + port->rxbuf[mask_serial_rxbuf_idx(port->rxbufp++)] = c; } spin_unlock_irqrestore(&port->rx_lock, flags); @@ -72,7 +85,7 @@ void serial_tx_interrupt(struct serial_p if ( port->txbufc == port->txbufp ) break; port->driver->putc( - port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]); + port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]); } } @@ -81,22 +94,24 @@ void serial_tx_interrupt(struct serial_p static void __serial_putc(struct serial_port *port, char c) { - int i; - if ( (port->txbuf != NULL) && !port->sync ) { /* Interrupt-driven (asynchronous) transmitter. */ - if ( (port->txbufp - port->txbufc) == SERIAL_TXBUFSZ ) - { - /* Buffer is full: we spin, but could alternatively drop chars. */ +#ifdef SERIAL_NEVER_DROP_CHARS + if ( (port->txbufp - port->txbufc) == serial_txbufsz ) + { + /* Buffer is full: we spin waiting for space to appear. */ + int i; while ( !port->driver->tx_empty(port) ) cpu_relax(); for ( i = 0; i < port->tx_fifo_size; i++ ) port->driver->putc( - port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]); - port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c; - } - else if ( ((port->txbufp - port->txbufc) == 0) && + port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]); + port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c; + return; + } +#endif + if ( ((port->txbufp - port->txbufc) == 0) && port->driver->tx_empty(port) ) { /* Buffer and UART FIFO are both empty. */ @@ -105,7 +120,7 @@ static void __serial_putc(struct serial_ else { /* Normal case: buffer the character. */ - port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c; + port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c; } } else if ( port->driver->tx_empty ) @@ -200,7 +215,7 @@ char serial_getc(int handle) if ( port->rxbufp != port->rxbufc ) { - c = port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufc++)]; + c = port->rxbuf[mask_serial_rxbuf_idx(port->rxbufc++)]; spin_unlock_irqrestore(&port->rx_lock, flags); break; } @@ -336,7 +351,7 @@ void serial_start_sync(int handle) while ( !port->driver->tx_empty(port) ) cpu_relax(); port->driver->putc( - port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]); + port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]); } } @@ -364,9 +379,9 @@ int serial_tx_space(int handle) { struct serial_port *port; if ( handle == -1 ) - return SERIAL_TXBUFSZ; - port = &com[handle & SERHND_IDX]; - return SERIAL_TXBUFSZ - (port->txbufp - port->txbufc); + return serial_txbufsz; + port = &com[handle & SERHND_IDX]; + return serial_txbufsz - (port->txbufp - port->txbufc); } void __devinit serial_init_preirq(void) @@ -431,7 +446,7 @@ void serial_async_transmit(struct serial BUG_ON(!port->driver->tx_empty); if ( port->txbuf == NULL ) port->txbuf = alloc_xenheap_pages( - get_order_from_bytes(SERIAL_TXBUFSZ)); + get_order_from_bytes(serial_txbufsz)); } /* diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_acpi.c --- a/xen/drivers/passthrough/amd/iommu_acpi.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Thu Apr 24 14:08:29 2008 -0600 @@ -139,7 +139,7 @@ static int __init register_exclusion_ran iommu = find_iommu_for_device(bus, devfn); if ( !iommu ) { - dprintk(XENLOG_ERR, "IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf); + amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf); return -ENODEV; } req = ivrs_mappings[bdf].dte_requestor_id; @@ -221,7 +221,7 @@ static int __init parse_ivmd_device_sele bdf = ivmd_block->header.dev_id; if ( bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf); + amd_iov_error("IVMD Error: Invalid Dev_Id 0x%x\n", bdf); return -ENODEV; } @@ -238,21 +238,18 @@ static int __init parse_ivmd_device_rang first_bdf = ivmd_block->header.dev_id; if ( first_bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVMD Error: " - "Invalid Range_First Dev_Id 0x%x\n", first_bdf); + amd_iov_error( + "IVMD Error: Invalid Range_First Dev_Id 0x%x\n", first_bdf); return -ENODEV; } last_bdf = ivmd_block->last_dev_id; if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) ) { - dprintk(XENLOG_ERR, "IVMD Error: " - "Invalid Range_Last Dev_Id 0x%x\n", last_bdf); - return -ENODEV; - } - - dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n", - first_bdf, last_bdf); + amd_iov_error( + "IVMD Error: Invalid Range_Last Dev_Id 0x%x\n", last_bdf); + return -ENODEV; + } for ( bdf = first_bdf, error = 0; (bdf <= last_bdf) && !error; bdf++ ) error = register_exclusion_range_for_device( @@ -272,8 +269,7 @@ static int __init parse_ivmd_device_iomm ivmd_block->cap_offset); if ( !iommu ) { - dprintk(XENLOG_ERR, - "IVMD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", + amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", ivmd_block->header.dev_id, ivmd_block->cap_offset); return -ENODEV; } @@ -290,7 +286,7 @@ static int __init parse_ivmd_block(struc if ( ivmd_block->header.length < sizeof(struct acpi_ivmd_block_header) ) { - dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n"); + amd_iov_error("IVMD Error: Invalid Block Length!\n"); return -ENODEV; } @@ -299,10 +295,9 @@ static int __init parse_ivmd_block(struc base = start_addr & PAGE_MASK; limit = (start_addr + mem_length - 1) & PAGE_MASK; - dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n", - ivmd_block->header.type); - dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr); - dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length); + amd_iov_info("IVMD Block: Type 0x%x\n",ivmd_block->header.type); + amd_iov_info(" Start_Addr_Phys 0x%lx\n", start_addr); + amd_iov_info(" Mem_Length 0x%lx\n", mem_length); if ( get_field_from_byte(ivmd_block->header.flags, AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK, @@ -321,7 +316,7 @@ static int __init parse_ivmd_block(struc } else { - dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n"); + amd_iov_error("IVMD Error: Invalid Flag Field!\n"); return -ENODEV; } @@ -344,7 +339,7 @@ static int __init parse_ivmd_block(struc base, limit, iw, ir); default: - dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n"); + amd_iov_error("IVMD Error: Invalid Block Type!\n"); return -ENODEV; } } @@ -354,7 +349,7 @@ static u16 __init parse_ivhd_device_padd { if ( header_length < (block_length + pad_length) ) { - dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n"); + amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n"); return 0; } @@ -369,8 +364,7 @@ static u16 __init parse_ivhd_device_sele bdf = ivhd_device->header.dev_id; if ( bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Dev_Id 0x%x\n", bdf); + amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf); return 0; } @@ -393,14 +387,14 @@ static u16 __init parse_ivhd_device_rang dev_length = sizeof(struct acpi_ivhd_device_range); if ( header_length < (block_length + dev_length) ) { - dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n"); + amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n"); return 0; } if ( ivhd_device->range.trailer.type != AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) { - dprintk(XENLOG_ERR, "IVHD Error: " + amd_iov_error("IVHD Error: " "Invalid Range: End_Type 0x%x\n", ivhd_device->range.trailer.type); return 0; @@ -409,21 +403,20 @@ static u16 __init parse_ivhd_device_rang first_bdf = ivhd_device->header.dev_id; if ( first_bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + amd_iov_error( + "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf); return 0; } last_bdf = ivhd_device->range.trailer.dev_id; if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); - return 0; - } - - dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n", - first_bdf, last_bdf); + amd_iov_error( + "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf); + return 0; + } + + amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf); /* override flags for range of devices */ sys_mgt = get_field_from_byte(ivhd_device->header.flags, @@ -444,28 +437,25 @@ static u16 __init parse_ivhd_device_alia dev_length = sizeof(struct acpi_ivhd_device_alias); if ( header_length < (block_length + dev_length) ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Length!\n"); + amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n"); return 0; } bdf = ivhd_device->header.dev_id; if ( bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Dev_Id 0x%x\n", bdf); + amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf); return 0; } alias_id = ivhd_device->alias.dev_id; if ( alias_id >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Alias Dev_Id 0x%x\n", alias_id); - return 0; - } - - dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id); + amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id); + return 0; + } + + amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id); /* override requestor_id and flags for device */ ivrs_mappings[bdf].dte_requestor_id = alias_id; @@ -490,15 +480,14 @@ static u16 __init parse_ivhd_device_alia dev_length = sizeof(struct acpi_ivhd_device_alias_range); if ( header_length < (block_length + dev_length) ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Length!\n"); + amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n"); return 0; } if ( ivhd_device->alias_range.trailer.type != AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) { - dprintk(XENLOG_ERR, "IVHD Error: " + amd_iov_error("IVHD Error: " "Invalid Range: End_Type 0x%x\n", ivhd_device->alias_range.trailer.type); return 0; @@ -507,30 +496,28 @@ static u16 __init parse_ivhd_device_alia first_bdf = ivhd_device->header.dev_id; if ( first_bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR,"IVHD Error: " - "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + amd_iov_error( + "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf); return 0; } last_bdf = ivhd_device->alias_range.trailer.dev_id; if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); + amd_iov_error( + "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf); return 0; } alias_id = ivhd_device->alias_range.alias.dev_id; if ( alias_id >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Alias Dev_Id 0x%x\n", alias_id); - return 0; - } - - dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n", - first_bdf, last_bdf); - dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id); + amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id); + return 0; + } + + amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf); + amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id); /* override requestor_id and flags for range of devices */ sys_mgt = get_field_from_byte(ivhd_device->header.flags, @@ -555,16 +542,14 @@ static u16 __init parse_ivhd_device_exte dev_length = sizeof(struct acpi_ivhd_device_extended); if ( header_length < (block_length + dev_length) ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Length!\n"); + amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n"); return 0; } bdf = ivhd_device->header.dev_id; if ( bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Dev_Id 0x%x\n", bdf); + amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf); return 0; } @@ -587,15 +572,14 @@ static u16 __init parse_ivhd_device_exte dev_length = sizeof(struct acpi_ivhd_device_extended_range); if ( header_length < (block_length + dev_length) ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Length!\n"); + amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n"); return 0; } if ( ivhd_device->extended_range.trailer.type != AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) { - dprintk(XENLOG_ERR, "IVHD Error: " + amd_iov_error("IVHD Error: " "Invalid Range: End_Type 0x%x\n", ivhd_device->extended_range.trailer.type); return 0; @@ -604,20 +588,20 @@ static u16 __init parse_ivhd_device_exte first_bdf = ivhd_device->header.dev_id; if ( first_bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + amd_iov_error( + "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf); return 0; } last_bdf = ivhd_device->extended_range.trailer.dev_id; if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); - return 0; - } - - dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n", + amd_iov_error( + "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf); + return 0; + } + + amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf); /* override flags for range of devices */ @@ -639,7 +623,7 @@ static int __init parse_ivhd_block(struc if ( ivhd_block->header.length < sizeof(struct acpi_ivhd_block_header) ) { - dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n"); + amd_iov_error("IVHD Error: Invalid Block Length!\n"); return -ENODEV; } @@ -647,21 +631,16 @@ static int __init parse_ivhd_block(struc ivhd_block->cap_offset); if ( !iommu ) { - dprintk(XENLOG_ERR, - "IVHD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", + amd_iov_error("IVHD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", ivhd_block->header.dev_id, ivhd_block->cap_offset); return -ENODEV; } - dprintk(XENLOG_INFO, "IVHD Block:\n"); - dprintk(XENLOG_INFO, " Cap_Offset 0x%x\n", - ivhd_block->cap_offset); - dprintk(XENLOG_INFO, " MMIO_BAR_Phys 0x%lx\n", - (unsigned long)ivhd_block->mmio_base); - dprintk(XENLOG_INFO, " PCI_Segment 0x%x\n", - ivhd_block->pci_segment); - dprintk(XENLOG_INFO, " IOMMU_Info 0x%x\n", - ivhd_block->iommu_info); + amd_iov_info("IVHD Block:\n"); + amd_iov_info(" Cap_Offset 0x%x\n", ivhd_block->cap_offset); + amd_iov_info(" MMIO_BAR_Phys 0x%"PRIx64"\n",ivhd_block->mmio_base); + amd_iov_info( " PCI_Segment 0x%x\n", ivhd_block->pci_segment); + amd_iov_info( " IOMMU_Info 0x%x\n", ivhd_block->iommu_info); /* override IOMMU support flags */ iommu->coherent = get_field_from_byte(ivhd_block->header.flags, @@ -692,13 +671,10 @@ static int __init parse_ivhd_block(struc ivhd_device = (union acpi_ivhd_device *) ((u8 *)ivhd_block + block_length); - dprintk(XENLOG_INFO, "IVHD Device Entry:\n"); - dprintk(XENLOG_INFO, " Type 0x%x\n", - ivhd_device->header.type); - dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", - ivhd_device->header.dev_id); - dprintk(XENLOG_INFO, " Flags 0x%x\n", - ivhd_device->header.flags); + amd_iov_info( "IVHD Device Entry:\n"); + amd_iov_info( " Type 0x%x\n", ivhd_device->header.type); + amd_iov_info( " Dev_Id 0x%x\n", ivhd_device->header.dev_id); + amd_iov_info( " Flags 0x%x\n", ivhd_device->header.flags); switch ( ivhd_device->header.type ) { @@ -741,8 +717,7 @@ static int __init parse_ivhd_block(struc ivhd_block->header.length, block_length); break; default: - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device Type!\n"); + amd_iov_error("IVHD Error: Invalid Device Type!\n"); dev_length = 0; break; } @@ -774,46 +749,49 @@ static int __init parse_ivrs_block(struc return parse_ivmd_block(ivmd_block); default: - dprintk(XENLOG_ERR, "IVRS Error: Invalid Block Type!\n"); + amd_iov_error("IVRS Error: Invalid Block Type!\n"); return -ENODEV; } return 0; } -void __init dump_acpi_table_header(struct acpi_table_header *table) -{ +static void __init dump_acpi_table_header(struct acpi_table_header *table) +{ +#ifdef AMD_IOV_DEBUG int i; - printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n"); - printk(XENLOG_INFO " Signature "); + amd_iov_info("ACPI Table:\n"); + amd_iov_info(" Signature "); for ( i = 0; i < ACPI_NAME_SIZE; i++ ) printk("%c", table->signature[i]); printk("\n"); - printk(" Length 0x%x\n", table->length); - printk(" Revision 0x%x\n", table->revision); - printk(" CheckSum 0x%x\n", table->checksum); - - printk(" OEM_Id "); + amd_iov_info(" Length 0x%x\n", table->length); + amd_iov_info(" Revision 0x%x\n", table->revision); + amd_iov_info(" CheckSum 0x%x\n", table->checksum); + + amd_iov_info(" OEM_Id "); for ( i = 0; i < ACPI_OEM_ID_SIZE; i++ ) printk("%c", table->oem_id[i]); printk("\n"); - printk(" OEM_Table_Id "); + amd_iov_info(" OEM_Table_Id "); for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; i++ ) printk("%c", table->oem_table_id[i]); printk("\n"); - printk(" OEM_Revision 0x%x\n", table->oem_revision); - - printk(" Creator_Id "); + amd_iov_info(" OEM_Revision 0x%x\n", table->oem_revision); + + amd_iov_info(" Creator_Id "); for ( i = 0; i < ACPI_NAME_SIZE; i++ ) printk("%c", table->asl_compiler_id[i]); printk("\n"); - printk(" Creator_Revision 0x%x\n", + amd_iov_info(" Creator_Revision 0x%x\n", table->asl_compiler_revision); +#endif + } int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size) @@ -827,9 +805,7 @@ int __init parse_ivrs_table(unsigned lon BUG_ON(!table); -#if 0 dump_acpi_table_header(table); -#endif /* validate checksum: sum of entire table == 0 */ checksum = 0; @@ -838,7 +814,7 @@ int __init parse_ivrs_table(unsigned lon checksum += raw_table[i]; if ( checksum ) { - dprintk(XENLOG_ERR, "IVRS Error: " + amd_iov_error("IVRS Error: " "Invalid Checksum 0x%x\n", checksum); return -ENODEV; } @@ -850,15 +826,15 @@ int __init parse_ivrs_table(unsigned lon ivrs_block = (struct acpi_ivrs_block_header *) ((u8 *)table + length); - dprintk(XENLOG_INFO, "IVRS Block:\n"); - dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type); - dprintk(XENLOG_INFO, " Flags 0x%x\n", ivrs_block->flags); - dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length); - dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id); + amd_iov_info("IVRS Block:\n"); + amd_iov_info(" Type 0x%x\n", ivrs_block->type); + amd_iov_info(" Flags 0x%x\n", ivrs_block->flags); + amd_iov_info(" Length 0x%x\n", ivrs_block->length); + amd_iov_info(" Dev_Id 0x%x\n", ivrs_block->dev_id); if ( table->length < (length + ivrs_block->length) ) { - dprintk(XENLOG_ERR, "IVRS Error: " + amd_iov_error("IVRS Error: " "Table Length Exceeded: 0x%x -> 0x%lx\n", table->length, (length + ivrs_block->length)); diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_detect.c --- a/xen/drivers/passthrough/amd/iommu_detect.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_detect.c Thu Apr 24 14:08:29 2008 -0600 @@ -85,6 +85,45 @@ int __init get_iommu_last_downstream_bus return 0; } +static int __init get_iommu_msi_capabilities(u8 bus, u8 dev, u8 func, + struct amd_iommu *iommu) +{ + int cap_ptr, cap_id; + u32 cap_header; + u16 control; + int count = 0; + + cap_ptr = pci_conf_read8(bus, dev, func, + PCI_CAPABILITY_LIST); + + while ( cap_ptr >= PCI_MIN_CAP_OFFSET && + count < PCI_MAX_CAP_BLOCKS ) + { + cap_ptr &= PCI_CAP_PTR_MASK; + cap_header = pci_conf_read32(bus, dev, func, cap_ptr); + cap_id = get_field_from_reg_u32(cap_header, + PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT); + + if ( cap_id == PCI_CAP_ID_MSI ) + { + iommu->msi_cap = cap_ptr; + break; + } + cap_ptr = get_field_from_reg_u32(cap_header, + PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT); + count++; + } + + if ( !iommu->msi_cap ) + return -ENODEV; + + amd_iov_info("Found MSI capability block \n"); + control = pci_conf_read16(bus, dev, func, + iommu->msi_cap + PCI_MSI_FLAGS); + iommu->maskbit = control & PCI_MSI_FLAGS_MASKBIT; + return 0; +} + int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr, struct amd_iommu *iommu) { @@ -99,8 +138,7 @@ int __init get_iommu_capabilities(u8 bus if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) ) { - dprintk(XENLOG_ERR , - "AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar); + amd_iov_error("Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar); return -ENODEV; } @@ -133,6 +171,8 @@ int __init get_iommu_capabilities(u8 bus iommu->msi_number = get_field_from_reg_u32( misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT); + get_iommu_msi_capabilities(bus, dev, func, iommu); + return 0; } @@ -176,24 +216,24 @@ static int __init scan_functions_for_iom int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback) { int func, hdr_type; - int count, error = 0; - - func = 0; - count = 1; - while ( VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func, - PCI_VENDOR_ID)) && - !error && (func < count) ) + int count = 1, error = 0; + + for ( func = 0; + (func < count) && !error && + VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func, + PCI_VENDOR_ID)); + func++ ) + { hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE); - if ( func == 0 && IS_PCI_MULTI_FUNCTION(hdr_type) ) + if ( (func == 0) && IS_PCI_MULTI_FUNCTION(hdr_type) ) count = PCI_MAX_FUNC_COUNT; if ( IS_PCI_TYPE0_HEADER(hdr_type) || IS_PCI_TYPE1_HEADER(hdr_type) ) error = scan_caps_for_iommu(bus, dev, func, iommu_detect_callback); - func++; } return error; diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_init.c Thu Apr 24 14:08:29 2008 -0600 @@ -27,6 +27,7 @@ #include "../pci_regs.h" extern int nr_amd_iommus; +static struct amd_iommu *vector_to_iommu[NR_VECTORS]; int __init map_iommu_mmio_region(struct amd_iommu *iommu) { @@ -34,8 +35,7 @@ int __init map_iommu_mmio_region(struct if ( nr_amd_iommus > MAX_AMD_IOMMUS ) { - gdprintk(XENLOG_ERR, - "IOMMU: nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus); + amd_iov_error("nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus); return -ENOMEM; } @@ -109,6 +109,33 @@ void __init register_iommu_cmd_buffer_in writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET); } +void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu) +{ + u64 addr_64, addr_lo, addr_hi; + u32 power_of2_entries; + u32 entry; + + addr_64 = (u64)virt_to_maddr(iommu->event_log.buffer); + addr_lo = addr_64 & DMA_32BIT_MASK; + addr_hi = addr_64 >> 32; + + set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, + IOMMU_EVENT_LOG_BASE_LOW_MASK, + IOMMU_EVENT_LOG_BASE_LOW_SHIFT, &entry); + writel(entry, iommu->mmio_base + IOMMU_EVENT_LOG_BASE_LOW_OFFSET); + + power_of2_entries = get_order_from_bytes(iommu->event_log.alloc_size) + + IOMMU_EVENT_LOG_POWER_OF2_ENTRIES_PER_PAGE; + + set_field_in_reg_u32((u32)addr_hi, 0, + IOMMU_EVENT_LOG_BASE_HIGH_MASK, + IOMMU_EVENT_LOG_BASE_HIGH_SHIFT, &entry); + set_field_in_reg_u32(power_of2_entries, entry, + IOMMU_EVENT_LOG_LENGTH_MASK, + IOMMU_EVENT_LOG_LENGTH_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_EVENT_LOG_BASE_HIGH_OFFSET); +} + static void __init set_iommu_translation_control(struct amd_iommu *iommu, int enable) { @@ -179,10 +206,281 @@ static void __init register_iommu_exclus writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET); } +static void __init set_iommu_event_log_control(struct amd_iommu *iommu, + int enable) +{ + u32 entry; + + entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); + set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED : + IOMMU_CONTROL_DISABLED, entry, + IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK, + IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); + + set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED : + IOMMU_CONTROL_DISABLED, entry, + IOMMU_CONTROL_EVENT_LOG_INT_MASK, + IOMMU_CONTROL_EVENT_LOG_INT_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); + + set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED : + IOMMU_CONTROL_DISABLED, entry, + IOMMU_CONTROL_COMP_WAIT_INT_MASK, + IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); +} + +static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[]) +{ + u32 tail, head, *event_log; + int i; + + BUG_ON( !iommu || !event ); + + /* make sure there's an entry in the log */ + tail = get_field_from_reg_u32( + readl(iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET), + IOMMU_EVENT_LOG_TAIL_MASK, + IOMMU_EVENT_LOG_TAIL_SHIFT); + if ( tail != iommu->event_log_head ) + { + /* read event log entry */ + event_log = (u32 *)(iommu->event_log.buffer + + (iommu->event_log_head * + IOMMU_EVENT_LOG_ENTRY_SIZE)); + for ( i = 0; i < IOMMU_EVENT_LOG_U32_PER_ENTRY; i++ ) + event[i] = event_log[i]; + if ( ++iommu->event_log_head == iommu->event_log.entries ) + iommu->event_log_head = 0; + + /* update head pointer */ + set_field_in_reg_u32(iommu->event_log_head, 0, + IOMMU_EVENT_LOG_HEAD_MASK, + IOMMU_EVENT_LOG_HEAD_SHIFT, &head); + writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET); + return 0; + } + + return -EFAULT; +} + +static void amd_iommu_msi_data_init(struct amd_iommu *iommu, int vector) +{ + u32 msi_data; + u8 bus = (iommu->bdf >> 8) & 0xff; + u8 dev = PCI_SLOT(iommu->bdf & 0xff); + u8 func = PCI_FUNC(iommu->bdf & 0xff); + + msi_data = MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(vector); + + pci_conf_write32(bus, dev, func, + iommu->msi_cap + PCI_MSI_DATA_64, msi_data); +} + +static void amd_iommu_msi_addr_init(struct amd_iommu *iommu, int phy_cpu) +{ + + int bus = (iommu->bdf >> 8) & 0xff; + int dev = PCI_SLOT(iommu->bdf & 0xff); + int func = PCI_FUNC(iommu->bdf & 0xff); + + u32 address_hi = 0; + u32 address_lo = MSI_ADDR_HEADER | + MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DESTID_CPU(phy_cpu); + + pci_conf_write32(bus, dev, func, + iommu->msi_cap + PCI_MSI_ADDRESS_LO, address_lo); + pci_conf_write32(bus, dev, func, + iommu->msi_cap + PCI_MSI_ADDRESS_HI, address_hi); +} + +static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag) +{ + u16 control; + int bus = (iommu->bdf >> 8) & 0xff; + int dev = PCI_SLOT(iommu->bdf & 0xff); + int func = PCI_FUNC(iommu->bdf & 0xff); + + control = pci_conf_read16(bus, dev, func, + iommu->msi_cap + PCI_MSI_FLAGS); + control &= ~(1); + if ( flag ) + control |= flag; + pci_conf_write16(bus, dev, func, + iommu->msi_cap + PCI_MSI_FLAGS, control); +} + +static void iommu_msi_unmask(unsigned int vector) +{ + unsigned long flags; + struct amd_iommu *iommu = vector_to_iommu[vector]; + + /* FIXME: do not support mask bits at the moment */ + if ( iommu->maskbit ) + return; + + spin_lock_irqsave(&iommu->lock, flags); + amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED); + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void iommu_msi_mask(unsigned int vector) +{ + unsigned long flags; + struct amd_iommu *iommu = vector_to_iommu[vector]; + + /* FIXME: do not support mask bits at the moment */ + if ( iommu->maskbit ) + return; + + spin_lock_irqsave(&iommu->lock, flags); + amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED); + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static unsigned int iommu_msi_startup(unsigned int vector) +{ + iommu_msi_unmask(vector); + return 0; +} + +static void iommu_msi_end(unsigned int vector) +{ + iommu_msi_unmask(vector); + ack_APIC_irq(); +} + +static void iommu_msi_set_affinity(unsigned int vector, cpumask_t dest) +{ + struct amd_iommu *iommu = vector_to_iommu[vector]; + amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest))); +} + +static struct hw_interrupt_type iommu_msi_type = { + .typename = "AMD_IOV_MSI", + .startup = iommu_msi_startup, + .shutdown = iommu_msi_mask, + .enable = iommu_msi_unmask, + .disable = iommu_msi_mask, + .ack = iommu_msi_mask, + .end = iommu_msi_end, + .set_affinity = iommu_msi_set_affinity, +}; + +static void parse_event_log_entry(u32 entry[]) +{ + u16 domain_id, device_id; + u32 code; + u64 *addr; + char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY", + "IO_PAGE_FALT", + "DEV_TABLE_HW_ERROR", + "PAGE_TABLE_HW_ERROR", + "ILLEGAL_COMMAND_ERROR", + "COMMAND_HW_ERROR", + "IOTLB_INV_TIMEOUT", + "INVALID_DEV_REQUEST"}; + + code = get_field_from_reg_u32(entry[1], + IOMMU_EVENT_CODE_MASK, + IOMMU_EVENT_CODE_SHIFT); + + if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST) + || (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) ) + { + amd_iov_error("Invalid event log entry!\n"); + return; + } + + if ( code == IOMMU_EVENT_IO_PAGE_FALT ) + { + device_id = get_field_from_reg_u32(entry[0], + IOMMU_EVENT_DEVICE_ID_MASK, + IOMMU_EVENT_DEVICE_ID_SHIFT); + domain_id = get_field_from_reg_u32(entry[1], + IOMMU_EVENT_DOMAIN_ID_MASK, + IOMMU_EVENT_DOMAIN_ID_SHIFT); + addr= (u64*) (entry + 2); + printk(XENLOG_ERR "AMD_IOV: " + "%s: domain:%d, device id:0x%x, fault address:0x%"PRIx64"\n", + event_str[code-1], domain_id, device_id, *addr); + } +} + +static void amd_iommu_page_fault(int vector, void *dev_id, + struct cpu_user_regs *regs) +{ + u32 event[4]; + unsigned long flags; + int ret = 0; + struct amd_iommu *iommu = dev_id; + + spin_lock_irqsave(&iommu->lock, flags); + ret = amd_iommu_read_event_log(iommu, event); + spin_unlock_irqrestore(&iommu->lock, flags); + + if ( ret != 0 ) + return; + parse_event_log_entry(event); +} + +static int set_iommu_interrupt_handler(struct amd_iommu *iommu) +{ + int vector, ret; + unsigned long flags; + + vector = assign_irq_vector(AUTO_ASSIGN); + vector_to_iommu[vector] = iommu; + + /* make irq == vector */ + irq_vector[vector] = vector; + vector_irq[vector] = vector; + + if ( !vector ) + { + amd_iov_error("no vectors\n"); + return 0; + } + + irq_desc[vector].handler = &iommu_msi_type; + ret = request_irq(vector, amd_iommu_page_fault, 0, "dmar", iommu); + if ( ret ) + { + amd_iov_error("can't request irq\n"); + return 0; + } + + spin_lock_irqsave(&iommu->lock, flags); + + amd_iommu_msi_data_init (iommu, vector); + amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map))); + amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED); + + spin_unlock_irqrestore(&iommu->lock, flags); + + return vector; +} + void __init enable_iommu(struct amd_iommu *iommu) { + unsigned long flags; + + set_iommu_interrupt_handler(iommu); + + spin_lock_irqsave(&iommu->lock, flags); + register_iommu_exclusion_range(iommu); set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED); + set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); - printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus); -} + + spin_unlock_irqrestore(&iommu->lock, flags); + + printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus); +} diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_map.c Thu Apr 24 14:08:29 2008 -0600 @@ -154,8 +154,7 @@ void flush_command_buffer(struct amd_iom } else { - dprintk(XENLOG_WARNING, "AMD IOMMU: Warning:" - " ComWaitInt bit did not assert!\n"); + amd_iov_warning("Warning: ComWaitInt bit did not assert!\n"); } } } @@ -402,10 +401,9 @@ int amd_iommu_map_page(struct domain *d, pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn); if ( pte == NULL ) { - dprintk(XENLOG_ERR, - "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn); + amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); - return -EIO; + return -EFAULT; } set_page_table_entry_present((u32 *)pte, maddr, iw, ir); @@ -439,10 +437,9 @@ int amd_iommu_unmap_page(struct domain * pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn); if ( pte == NULL ) { - dprintk(XENLOG_ERR, - "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn); + amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); - return -EIO; + return -EFAULT; } /* mark PTE as 'page not present' */ @@ -479,9 +476,8 @@ int amd_iommu_reserve_domain_unity_map( hd->root_table, hd->paging_mode, phys_addr >> PAGE_SHIFT); if ( pte == NULL ) { - dprintk(XENLOG_ERR, - "AMD IOMMU: Invalid IO pagetable entry " - "phys_addr = %lx\n", phys_addr); + amd_iov_error( + "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr); spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } @@ -528,8 +524,7 @@ int amd_iommu_sync_p2m(struct domain *d) pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn); if ( pte == NULL ) { - dprintk(XENLOG_ERR, - "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn); + amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Apr 24 14:08:29 2008 -0600 @@ -29,16 +29,11 @@ struct list_head amd_iommu_head; struct list_head amd_iommu_head; long amd_iommu_poll_comp_wait = COMPLETION_WAIT_DEFAULT_POLLING_COUNT; static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES; -int nr_amd_iommus = 0; - -unsigned short ivrs_bdf_entries = 0; -struct ivrs_mappings *ivrs_mappings = NULL; - -/* will set if amd-iommu HW is found */ -int amd_iommu_enabled = 0; - -static int enable_amd_iommu = 0; -boolean_param("enable_amd_iommu", enable_amd_iommu); +static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES; +int nr_amd_iommus; + +unsigned short ivrs_bdf_entries; +struct ivrs_mappings *ivrs_mappings; static void deallocate_domain_page_tables(struct hvm_iommu *hd) { @@ -73,25 +68,8 @@ static void __init deallocate_iommu_reso static void __init deallocate_iommu_resources(struct amd_iommu *iommu) { deallocate_iommu_table_struct(&iommu->dev_table); - deallocate_iommu_table_struct(&iommu->cmd_buffer);; -} - -static void __init detect_cleanup(void) -{ - struct amd_iommu *iommu, *next; - - list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list ) - { - list_del(&iommu->list); - deallocate_iommu_resources(iommu); - xfree(iommu); - } - - if ( ivrs_mappings ) - { - xfree(ivrs_mappings); - ivrs_mappings = NULL; - } + deallocate_iommu_table_struct(&iommu->cmd_buffer); + deallocate_iommu_table_struct(&iommu->event_log); } static int __init allocate_iommu_table_struct(struct table_struct *table, @@ -102,7 +80,7 @@ static int __init allocate_iommu_table_s if ( !table->buffer ) { - dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating %s\n", name); + amd_iov_error("Error allocating %s\n", name); return -ENOMEM; } @@ -139,6 +117,20 @@ static int __init allocate_iommu_resourc "Command Buffer") != 0 ) goto error_out; + /* allocate 'event log' in power of 2 increments of 4K */ + iommu->event_log_head = 0; + iommu->event_log.alloc_size = + PAGE_SIZE << get_order_from_bytes( + PAGE_ALIGN(amd_iommu_event_log_entries * + IOMMU_EVENT_LOG_ENTRY_SIZE)); + + iommu->event_log.entries = + iommu->event_log.alloc_size / IOMMU_EVENT_LOG_ENTRY_SIZE; + + if ( allocate_iommu_table_struct(&iommu->event_log, + "Event Log") != 0 ) + goto error_out; + return 0; error_out: @@ -153,7 +145,7 @@ int iommu_detect_callback(u8 bus, u8 dev iommu = (struct amd_iommu *) xmalloc(struct amd_iommu); if ( !iommu ) { - dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating amd_iommu\n"); + amd_iov_error("Error allocating amd_iommu\n"); return -ENOMEM; } memset(iommu, 0, sizeof(struct amd_iommu)); @@ -203,6 +195,7 @@ static int __init amd_iommu_init(void) goto error_out; register_iommu_dev_table_in_mmio_space(iommu); register_iommu_cmd_buffer_in_mmio_space(iommu); + register_iommu_event_log_in_mmio_space(iommu); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -220,18 +213,14 @@ static int __init amd_iommu_init(void) } if ( acpi_table_parse(ACPI_IVRS, parse_ivrs_table) != 0 ) - dprintk(XENLOG_INFO, "AMD IOMMU: Did not find IVRS table!\n"); + amd_iov_error("Did not find IVRS table!\n"); for_each_amd_iommu ( iommu ) { - spin_lock_irqsave(&iommu->lock, flags); /* enable IOMMU translation services */ enable_iommu(iommu); nr_amd_iommus++; - spin_unlock_irqrestore(&iommu->lock, flags); - } - - amd_iommu_enabled = 1; + } return 0; @@ -262,7 +251,7 @@ struct amd_iommu *find_iommu_for_device( return NULL; } -void amd_iommu_setup_domain_device( +static void amd_iommu_setup_domain_device( struct domain *domain, struct amd_iommu *iommu, int bdf) { void *dte; @@ -288,12 +277,12 @@ void amd_iommu_setup_domain_device( sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable; dev_ex = ivrs_mappings[req_id].dte_allow_exclusion; amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr, - req_id, sys_mgt, dev_ex, + hd->domain_id, sys_mgt, dev_ex, hd->paging_mode); invalidate_dev_table_entry(iommu, req_id); flush_command_buffer(iommu); - dprintk(XENLOG_INFO, "AMD IOMMU: Set DTE req_id:%x, " + amd_iov_info("Enable DTE:0x%x, " "root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n", req_id, root_ptr, hd->domain_id, hd->paging_mode); @@ -301,9 +290,9 @@ void amd_iommu_setup_domain_device( } } -void __init amd_iommu_setup_dom0_devices(void) -{ - struct hvm_iommu *hd = domain_hvm_iommu(dom0); +static void amd_iommu_setup_dom0_devices(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); struct amd_iommu *iommu; struct pci_dev *pdev; int bus, dev, func; @@ -333,80 +322,72 @@ void __init amd_iommu_setup_dom0_devices find_iommu_for_device(bus, pdev->devfn) : NULL; if ( iommu ) - amd_iommu_setup_domain_device(dom0, iommu, bdf); + amd_iommu_setup_domain_device(d, iommu, bdf); } } } } -int amd_iommu_detect(void) -{ - unsigned long i; +int amd_iov_detect(void) +{ int last_bus; - struct amd_iommu *iommu; - - if ( !enable_amd_iommu ) - { - printk("AMD IOMMU: Disabled\n"); - return 0; - } + struct amd_iommu *iommu, *next; INIT_LIST_HEAD(&amd_iommu_head); if ( scan_for_iommu(iommu_detect_callback) != 0 ) { - dprintk(XENLOG_ERR, "AMD IOMMU: Error detection\n"); + amd_iov_error("Error detection\n"); goto error_out; } if ( !iommu_found() ) { - printk("AMD IOMMU: Not found!\n"); - return 0; - } - else - { - /* allocate 'ivrs mappings' table */ - /* note: the table has entries to accomodate all IOMMUs */ - last_bus = 0; - for_each_amd_iommu ( iommu ) - if ( iommu->last_downstream_bus > last_bus ) - last_bus = iommu->last_downstream_bus; - - ivrs_bdf_entries = (last_bus + 1) * - IOMMU_DEV_TABLE_ENTRIES_PER_BUS; - ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries); - - if ( !ivrs_mappings ) - { - dprintk(XENLOG_ERR, "AMD IOMMU:" - " Error allocating IVRS DevMappings table\n"); - goto error_out; - } - memset(ivrs_mappings, 0, - ivrs_bdf_entries * sizeof(struct ivrs_mappings)); - } + printk("AMD_IOV: IOMMU not found!\n"); + goto error_out; + } + + /* allocate 'ivrs mappings' table */ + /* note: the table has entries to accomodate all IOMMUs */ + last_bus = 0; + for_each_amd_iommu ( iommu ) + if ( iommu->last_downstream_bus > last_bus ) + last_bus = iommu->last_downstream_bus; + + ivrs_bdf_entries = (last_bus + 1) * + IOMMU_DEV_TABLE_ENTRIES_PER_BUS; + ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries); + if ( ivrs_mappings == NULL ) + { + amd_iov_error("Error allocating IVRS DevMappings table\n"); + goto error_out; + } + memset(ivrs_mappings, 0, + ivrs_bdf_entries * sizeof(struct ivrs_mappings)); if ( amd_iommu_init() != 0 ) { - dprintk(XENLOG_ERR, "AMD IOMMU: Error initialization\n"); - goto error_out; - } - - if ( iommu_domain_init(dom0) != 0 ) - goto error_out; - - /* setup 1:1 page table for dom0 */ - for ( i = 0; i < max_page; i++ ) - amd_iommu_map_page(dom0, i, i); - - amd_iommu_setup_dom0_devices(); + amd_iov_error("Error initialization\n"); + goto error_out; + } + return 0; error_out: - detect_cleanup(); + list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list ) + { + list_del(&iommu->list); + deallocate_iommu_resources(iommu); + xfree(iommu); + } + + if ( ivrs_mappings ) + { + xfree(ivrs_mappings); + ivrs_mappings = NULL; + } + return -ENODEV; - } static int allocate_domain_resources(struct hvm_iommu *hd) @@ -447,12 +428,10 @@ static int get_paging_mode(unsigned long return -ENOMEM; } - dprintk(XENLOG_INFO, "AMD IOMMU: paging mode = %d\n", level); - return level; } -int amd_iommu_domain_init(struct domain *domain) +static int amd_iommu_domain_init(struct domain *domain) { struct hvm_iommu *hd = domain_hvm_iommu(domain); @@ -463,10 +442,18 @@ int amd_iommu_domain_init(struct domain return -ENOMEM; } - if ( is_hvm_domain(domain) ) - hd->paging_mode = IOMMU_PAGE_TABLE_LEVEL_4; - else - hd->paging_mode = get_paging_mode(max_page); + hd->paging_mode = is_hvm_domain(domain)? + IOMMU_PAGE_TABLE_LEVEL_4 : get_paging_mode(max_page); + + if ( domain->domain_id == 0 ) + { + unsigned long i; + /* setup 1:1 page table for dom0 */ + for ( i = 0; i < max_page; i++ ) + amd_iommu_map_page(domain, i, i); + + amd_iommu_setup_dom0_devices(domain); + } hd->domain_id = domain->domain_id; @@ -490,7 +477,7 @@ static void amd_iommu_disable_domain_dev memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE); invalidate_dev_table_entry(iommu, req_id); flush_command_buffer(iommu); - dprintk(XENLOG_INFO , "AMD IOMMU: disable DTE 0x%x," + amd_iov_info("Disable DTE:0x%x," " domain_id:%d, paging_mode:%d\n", req_id, domain_hvm_iommu(domain)->domain_id, domain_hvm_iommu(domain)->paging_mode); @@ -525,7 +512,7 @@ static int reassign_device( struct domai if ( !iommu ) { - gdprintk(XENLOG_ERR , "AMD IOMMU: fail to find iommu." + amd_iov_error("Fail to find iommu." " %x:%x.%x cannot be assigned to domain %d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id); return -ENODEV; @@ -540,8 +527,7 @@ static int reassign_device( struct domai spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); amd_iommu_setup_domain_device(target, iommu, bdf); - gdprintk(XENLOG_INFO , - "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n", + amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn), source->domain_id, target->domain_id); @@ -550,7 +536,7 @@ static int reassign_device( struct domai return 0; } -int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) +static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) { int bdf = (bus << 8) | devfn; int req_id = ivrs_mappings[bdf].dte_requestor_id; @@ -580,8 +566,7 @@ static void release_domain_devices(struc { pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list); pdev_flr(pdev->bus, pdev->devfn); - gdprintk(XENLOG_INFO , - "AMD IOMMU: release devices %x:%x.%x\n", + amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); reassign_device(d, dom0, pdev->bus, pdev->devfn); } @@ -637,16 +622,13 @@ static void deallocate_iommu_page_tables hd ->root_table = NULL; } -void amd_iommu_domain_destroy(struct domain *d) -{ - if ( !amd_iommu_enabled ) - return; - +static void amd_iommu_domain_destroy(struct domain *d) +{ deallocate_iommu_page_tables(d); release_domain_devices(d); } -void amd_iommu_return_device( +static void amd_iommu_return_device( struct domain *s, struct domain *t, u8 bus, u8 devfn) { pdev_flr(bus, devfn); diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/iommu.c Thu Apr 24 14:08:29 2008 -0600 @@ -18,6 +18,11 @@ extern struct iommu_ops intel_iommu_ops; extern struct iommu_ops amd_iommu_ops; +int intel_vtd_setup(void); +int amd_iov_detect(void); + +int iommu_enabled = 1; +boolean_param("iommu", iommu_enabled); int iommu_domain_init(struct domain *domain) { @@ -134,3 +139,28 @@ void deassign_device(struct domain *d, u return hd->platform_ops->reassign_device(d, dom0, bus, devfn); } + +static int iommu_setup(void) +{ + int rc = -ENODEV; + + if ( !iommu_enabled ) + goto out; + + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + rc = intel_vtd_setup(); + break; + case X86_VENDOR_AMD: + rc = amd_iov_detect(); + break; + } + + iommu_enabled = (rc == 0); + + out: + printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis"); + return rc; +} +__initcall(iommu_setup); diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/dmar.c --- a/xen/drivers/passthrough/vtd/dmar.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/vtd/dmar.c Thu Apr 24 14:08:29 2008 -0600 @@ -30,8 +30,7 @@ #include "dmar.h" #include "../pci_regs.h" -int vtd_enabled; -boolean_param("vtd", vtd_enabled); +int vtd_enabled = 1; #undef PREFIX #define PREFIX VTDPREFIX "ACPI DMAR:" @@ -79,14 +78,9 @@ struct acpi_drhd_unit * ioapic_to_drhd(u struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id) { struct acpi_drhd_unit *drhd; - list_for_each_entry( drhd, &acpi_drhd_units, list ) { - if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) { - dprintk(XENLOG_INFO VTDPREFIX, - "ioapic_to_drhd: drhd->address = %lx\n", - drhd->address); + list_for_each_entry( drhd, &acpi_drhd_units, list ) + if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) return drhd; - } - } return NULL; } @@ -94,15 +88,9 @@ struct iommu * ioapic_to_iommu(unsigned { struct acpi_drhd_unit *drhd; - list_for_each_entry( drhd, &acpi_drhd_units, list ) { - if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) { - dprintk(XENLOG_INFO VTDPREFIX, - "ioapic_to_iommu: drhd->address = %lx\n", - drhd->address); + list_for_each_entry( drhd, &acpi_drhd_units, list ) + if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) return drhd->iommu; - } - } - dprintk(XENLOG_INFO VTDPREFIX, "returning NULL\n"); return NULL; } @@ -150,21 +138,11 @@ struct acpi_drhd_unit * acpi_find_matche if ( acpi_pci_device_match(drhd->devices, drhd->devices_cnt, dev) ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "acpi_find_matched_drhd_unit: drhd->address = %lx\n", - drhd->address); return drhd; - } } if ( include_all_drhd ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n", - include_all_drhd->address); return include_all_drhd; - } return NULL; } @@ -174,11 +152,9 @@ struct acpi_rmrr_unit * acpi_find_matche struct acpi_rmrr_unit *rmrr; list_for_each_entry ( rmrr, &acpi_rmrr_units, list ) - { if ( acpi_pci_device_match(rmrr->devices, rmrr->devices_cnt, dev) ) return rmrr; - } return NULL; } @@ -199,11 +175,7 @@ struct acpi_atsr_unit * acpi_find_matche } if ( all_ports_atsru ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "acpi_find_matched_atsr_unit: all_ports_atsru\n"); return all_ports_atsru;; - } return NULL; } @@ -604,22 +576,24 @@ int acpi_dmar_init(void) { int rc; - if ( !vtd_enabled ) - return -ENODEV; + rc = -ENODEV; + if ( !iommu_enabled ) + goto fail; if ( (rc = vtd_hw_check()) != 0 ) - return rc; + goto fail; acpi_table_parse(ACPI_DMAR, acpi_parse_dmar); + rc = -ENODEV; if ( list_empty(&acpi_drhd_units) ) - { - dprintk(XENLOG_ERR VTDPREFIX, "No DMAR devices found\n"); - vtd_enabled = 0; - return -ENODEV; - } + goto fail; printk("Intel VT-d has been enabled\n"); return 0; -} + + fail: + vtd_enabled = 0; + return -ENODEV; +} diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/vtd/iommu.c Thu Apr 24 14:08:29 2008 -0600 @@ -41,6 +41,9 @@ static int domid_bitmap_size; static int domid_bitmap_size; /* domain id bitmap size in bits */ static unsigned long *domid_bitmap; /* iommu domain id bitmap */ +static void setup_dom0_devices(struct domain *d); +static void setup_dom0_rmrr(struct domain *d); + #define DID_FIELD_WIDTH 16 #define DID_HIGH_OFFSET 8 static void context_set_domain_id(struct context_entry *context, @@ -78,17 +81,12 @@ static struct intel_iommu *alloc_intel_i struct intel_iommu *intel; intel = xmalloc(struct intel_iommu); - if ( !intel ) - { - gdprintk(XENLOG_ERR VTDPREFIX, - "Allocate intel_iommu failed.\n"); + if ( intel == NULL ) return NULL; - } memset(intel, 0, sizeof(struct intel_iommu)); spin_lock_init(&intel->qi_ctrl.qinval_lock); spin_lock_init(&intel->qi_ctrl.qinval_poll_lock); - spin_lock_init(&intel->ir_ctrl.iremap_lock); return intel; @@ -96,68 +94,22 @@ static struct intel_iommu *alloc_intel_i static void free_intel_iommu(struct intel_iommu *intel) { - if ( intel ) - { - xfree(intel); - intel = NULL; - } + xfree(intel); } struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu) { - if ( !iommu ) - return NULL; - - if ( !iommu->intel ) - { - iommu->intel = alloc_intel_iommu(); - if ( !iommu->intel ) - { - dprintk(XENLOG_ERR VTDPREFIX, - "iommu_qi_ctrl: Allocate iommu->intel failed.\n"); - return NULL; - } - } - - return &(iommu->intel->qi_ctrl); + return iommu ? &iommu->intel->qi_ctrl : NULL; } struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu) { - if ( !iommu ) - return NULL; - - if ( !iommu->intel ) - { - iommu->intel = alloc_intel_iommu(); - if ( !iommu->intel ) - { - dprintk(XENLOG_ERR VTDPREFIX, - "iommu_ir_ctrl: Allocate iommu->intel failed.\n"); - return NULL; - } - } - - return &(iommu->intel->ir_ctrl); + return iommu ? &iommu->intel->ir_ctrl : NULL; } struct iommu_flush *iommu_get_flush(struct iommu *iommu) { - if ( !iommu ) - return NULL; - - if ( !iommu->intel ) - { - iommu->intel = alloc_intel_iommu(); - if ( !iommu->intel ) - { - dprintk(XENLOG_ERR VTDPREFIX, - "iommu_get_flush: Allocate iommu->intel failed.\n"); - return NULL; - } - } - - return &(iommu->intel->flush); + return iommu ? &iommu->intel->flush : NULL; } unsigned int clflush_size; @@ -276,11 +228,7 @@ static u64 addr_to_dma_page_maddr(struct dma_set_pte_addr(*pte, maddr); vaddr = map_vtd_domain_page(maddr); if ( !vaddr ) - { - unmap_vtd_domain_page(parent); - spin_unlock_irqrestore(&hd->mapping_lock, flags); - return 0; - } + break; /* * high level table always sets r/w, last level @@ -294,14 +242,9 @@ static u64 addr_to_dma_page_maddr(struct { vaddr = map_vtd_domain_page(pte->val); if ( !vaddr ) - { - unmap_vtd_domain_page(parent); - spin_unlock_irqrestore(&hd->mapping_lock, flags); - return 0; - } + break; } - unmap_vtd_domain_page(parent); if ( level == 2 ) { pte_maddr = pte->val & PAGE_MASK_4K; @@ -309,11 +252,13 @@ static u64 addr_to_dma_page_maddr(struct break; } + unmap_vtd_domain_page(parent); parent = (struct dma_pte *)vaddr; vaddr = NULL; level--; } + unmap_vtd_domain_page(parent); spin_unlock_irqrestore(&hd->mapping_lock, flags); return pte_maddr; } @@ -688,7 +633,7 @@ void dma_pte_free_pagetable(struct domai struct dma_pte *page, *pte; int total = agaw_to_level(hd->agaw); int level; - u32 tmp; + u64 tmp; u64 pg_maddr; drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); @@ -709,7 +654,10 @@ void dma_pte_free_pagetable(struct domai { pg_maddr = dma_addr_level_page_maddr(domain, tmp, level); if ( pg_maddr == 0 ) - return; + { + tmp += level_size(level); + continue; + } page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); pte = page + address_level_offset(tmp, level); dma_clear_pte(*pte); @@ -730,18 +678,11 @@ void dma_pte_free_pagetable(struct domai } } -/* iommu handling */ static int iommu_set_root_entry(struct iommu *iommu) { u32 cmd, sts; unsigned long flags; - - if ( iommu == NULL ) - { - gdprintk(XENLOG_ERR VTDPREFIX, - "iommu_set_root_entry: iommu == NULL\n"); - return -EINVAL; - } + s_time_t start_time; if ( iommu->root_maddr != 0 ) { @@ -760,11 +701,14 @@ static int iommu_set_root_entry(struct i dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd); /* Make sure hardware complete it */ + start_time = NOW(); for ( ; ; ) { sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); if ( sts & DMA_GSTS_RTPS ) break; + if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); cpu_relax(); } @@ -777,6 +721,7 @@ static int iommu_enable_translation(stru { u32 sts; unsigned long flags; + s_time_t start_time; dprintk(XENLOG_INFO VTDPREFIX, "iommu_enable_translation: iommu->reg = %p\n", iommu->reg); @@ -784,11 +729,14 @@ static int iommu_enable_translation(stru iommu->gcmd |= DMA_GCMD_TE; dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); /* Make sure hardware complete it */ + start_time = NOW(); for ( ; ; ) { sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); if ( sts & DMA_GSTS_TES ) break; + if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); cpu_relax(); } @@ -802,17 +750,21 @@ int iommu_disable_translation(struct iom { u32 sts; unsigned long flags; + s_time_t start_time; spin_lock_irqsave(&iommu->register_lock, flags); iommu->gcmd &= ~ DMA_GCMD_TE; dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); /* Make sure hardware complete it */ + start_time = NOW(); for ( ; ; ) { sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); if ( !(sts & DMA_GSTS_TES) ) break; + if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) + panic("DMAR hardware is malfunctional, please disable IOMMU\n"); cpu_relax(); } spin_unlock_irqrestore(&iommu->register_lock, flags); @@ -1039,69 +991,64 @@ int iommu_set_interrupt(struct iommu *io return vector; } -struct iommu *iommu_alloc(void *hw_data) -{ - struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data; +static int iommu_alloc(struct acpi_drhd_unit *drhd) +{ struct iommu *iommu; if ( nr_iommus > MAX_IOMMUS ) { gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus); - return NULL; + return -ENOMEM; } iommu = xmalloc(struct iommu); - if ( !iommu ) - return NULL; + if ( iommu == NULL ) + return -ENOMEM; memset(iommu, 0, sizeof(struct iommu)); + iommu->intel = alloc_intel_iommu(); + if ( iommu->intel == NULL ) + { + xfree(iommu); + return -ENOMEM; + } + set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address); - iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); - - printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n", - iommu->reg, drhd->address); - + iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); nr_iommus++; - - if ( !iommu->reg ) - { - printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n"); - goto error; - } iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); - printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap); - printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap); - spin_lock_init(&iommu->lock); spin_lock_init(&iommu->register_lock); - iommu->intel = alloc_intel_iommu(); - drhd->iommu = iommu; - return iommu; - error: - xfree(iommu); - return NULL; -} - -static void free_iommu(struct iommu *iommu) -{ - if ( !iommu ) + return 0; +} + +static void iommu_free(struct acpi_drhd_unit *drhd) +{ + struct iommu *iommu = drhd->iommu; + + if ( iommu == NULL ) return; + if ( iommu->root_maddr != 0 ) { free_pgtable_maddr(iommu->root_maddr); iommu->root_maddr = 0; } + if ( iommu->reg ) iounmap(iommu->reg); + free_intel_iommu(iommu->intel); free_irq(iommu->vector); xfree(iommu); + + drhd->iommu = NULL; } #define guestwidth_to_adjustwidth(gaw) ({ \ @@ -1111,22 +1058,21 @@ static void free_iommu(struct iommu *iom agaw = 64; \ agaw; }) -int intel_iommu_domain_init(struct domain *domain) -{ - struct hvm_iommu *hd = domain_hvm_iommu(domain); +static int intel_iommu_domain_init(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); struct iommu *iommu = NULL; int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH; - int adjust_width, agaw; + int i, adjust_width, agaw; unsigned long sagaw; struct acpi_drhd_unit *drhd; - if ( !vtd_enabled || list_empty(&acpi_drhd_units) ) - return 0; - - for_each_drhd_unit ( drhd ) - iommu = drhd->iommu ? : iommu_alloc(drhd); - - /* calculate AGAW */ + INIT_LIST_HEAD(&hd->pdev_list); + + drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); + iommu = drhd->iommu; + + /* Calculate AGAW. */ if ( guest_width > cap_mgaw(iommu->cap) ) guest_width = cap_mgaw(iommu->cap); adjust_width = guestwidth_to_adjustwidth(guest_width); @@ -1142,6 +1088,26 @@ int intel_iommu_domain_init(struct domai return -ENODEV; } hd->agaw = agaw; + + if ( d->domain_id == 0 ) + { + /* Set up 1:1 page table for dom0. */ + for ( i = 0; i < max_page; i++ ) + iommu_map_page(d, i, i); + + setup_dom0_devices(d); + setup_dom0_rmrr(d); + + iommu_flush_all(); + + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + if ( iommu_enable_translation(iommu) ) + return -EIO; + } + } + return 0; } @@ -1153,28 +1119,15 @@ static int domain_context_mapping_one( struct hvm_iommu *hd = domain_hvm_iommu(domain); struct context_entry *context, *context_entries; unsigned long flags; - int ret = 0; u64 maddr; maddr = bus_to_context_maddr(iommu, bus); context_entries = (struct context_entry *)map_vtd_domain_page(maddr); context = &context_entries[devfn]; - if ( !context ) + + if ( context_present(*context) ) { unmap_vtd_domain_page(context_entries); - gdprintk(XENLOG_ERR VTDPREFIX, - "domain_context_mapping_one:context == NULL:" - "bdf = %x:%x:%x\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - return -ENOMEM; - } - - if ( context_present(*context) ) - { - unmap_vtd_domain_page(context_entries); - gdprintk(XENLOG_WARNING VTDPREFIX, - "domain_context_mapping_one:context present:bdf=%x:%x:%x\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); return 0; } @@ -1203,12 +1156,6 @@ static int domain_context_mapping_one( context_set_present(*context); iommu_flush_cache_entry(iommu, context); - gdprintk(XENLOG_INFO VTDPREFIX, - "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64 - " hd->pgd_maddr=%"PRIx64"\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - context->hi, context->lo, hd->pgd_maddr); - unmap_vtd_domain_page(context_entries); if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain), @@ -1218,7 +1165,8 @@ static int domain_context_mapping_one( else iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0); spin_unlock_irqrestore(&iommu->lock, flags); - return ret; + + return 0; } static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap) @@ -1377,28 +1325,12 @@ static int domain_context_unmap_one( maddr = bus_to_context_maddr(iommu, bus); context_entries = (struct context_entry *)map_vtd_domain_page(maddr); context = &context_entries[devfn]; - if ( !context ) + + if ( !context_present(*context) ) { unmap_vtd_domain_page(context_entries); - gdprintk(XENLOG_ERR VTDPREFIX, - "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - return -ENOMEM; - } - - if ( !context_present(*context) ) - { - unmap_vtd_domain_page(context_entries); - gdprintk(XENLOG_WARNING VTDPREFIX, - "domain_context_unmap_one-%x:%x:%x- " - "context NOT present:return\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); return 0; } - - gdprintk(XENLOG_INFO VTDPREFIX, - "domain_context_unmap_one: bdf = %x:%x:%x\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); spin_lock_irqsave(&iommu->lock, flags); context_clear_present(*context); @@ -1431,24 +1363,12 @@ static int domain_context_unmap( sub_bus = pci_conf_read8( pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); - - gdprintk(XENLOG_INFO VTDPREFIX, - "domain_context_unmap:BRIDGE:%x:%x:%x " - "sec_bus=%x sub_bus=%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), sec_bus, sub_bus); break; case DEV_TYPE_PCIe_ENDPOINT: - gdprintk(XENLOG_INFO VTDPREFIX, - "domain_context_unmap:PCIe : bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); ret = domain_context_unmap_one(domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn)); break; case DEV_TYPE_PCI: - gdprintk(XENLOG_INFO VTDPREFIX, - "domain_context_unmap:PCI: bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); if ( pdev->bus == 0 ) ret = domain_context_unmap_one( domain, iommu, @@ -1502,35 +1422,29 @@ void reassign_device_ownership( int status; unsigned long flags; - gdprintk(XENLOG_INFO VTDPREFIX, - "reassign_device-%x:%x:%x- source = %d target = %d\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - source->domain_id, target->domain_id); - pdev_flr(bus, devfn); for_each_pdev( source, pdev ) - { - if ( (pdev->bus != bus) || (pdev->devfn != devfn) ) - continue; - - drhd = acpi_find_matched_drhd_unit(pdev); - iommu = drhd->iommu; - domain_context_unmap(source, iommu, pdev); - - /* Move pci device from the source domain to target domain. */ - spin_lock_irqsave(&source_hd->iommu_list_lock, flags); - spin_lock_irqsave(&target_hd->iommu_list_lock, flags); - list_move(&pdev->list, &target_hd->pdev_list); - spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); - spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); - - status = domain_context_mapping(target, iommu, pdev); - if ( status != 0 ) - gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); - - break; - } + if ( (pdev->bus == bus) && (pdev->devfn == devfn) ) + goto found; + + return; + + found: + drhd = acpi_find_matched_drhd_unit(pdev); + iommu = drhd->iommu; + domain_context_unmap(source, iommu, pdev); + + /* Move pci device from the source domain to target domain. */ + spin_lock_irqsave(&source_hd->iommu_list_lock, flags); + spin_lock_irqsave(&target_hd->iommu_list_lock, flags); + list_move(&pdev->list, &target_hd->pdev_list); + spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); + spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); + + status = domain_context_mapping(target, iommu, pdev); + if ( status != 0 ) + gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); } void return_devices_to_dom0(struct domain *d) @@ -1541,9 +1455,6 @@ void return_devices_to_dom0(struct domai while ( !list_empty(&hd->pdev_list) ) { pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list); - dprintk(XENLOG_INFO VTDPREFIX, - "return_devices_to_dom0: bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn); } @@ -1600,7 +1511,7 @@ int intel_iommu_map_page( return 0; #endif - pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K); + pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K); if ( pg_maddr == 0 ) return -ENOMEM; page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); @@ -1643,11 +1554,11 @@ int intel_iommu_unmap_page(struct domain } int iommu_page_mapping(struct domain *domain, paddr_t iova, - void *hpa, size_t size, int prot) + paddr_t hpa, size_t size, int prot) { struct acpi_drhd_unit *drhd; struct iommu *iommu; - unsigned long start_pfn, end_pfn; + u64 start_pfn, end_pfn; struct dma_pte *page = NULL, *pte = NULL; int index; u64 pg_maddr; @@ -1657,9 +1568,8 @@ int iommu_page_mapping(struct domain *do if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 ) return -EINVAL; iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K; - start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K); - end_pfn = (unsigned long) - ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K); + start_pfn = hpa >> PAGE_SHIFT_4K; + end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K; index = 0; while ( start_pfn < end_pfn ) { @@ -1668,7 +1578,7 @@ int iommu_page_mapping(struct domain *do return -ENOMEM; page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); pte = page + (start_pfn & LEVEL_MASK); - dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); + dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K); dma_set_pte_prot(*pte, prot); iommu_flush_cache_entry(iommu, pte); unmap_vtd_domain_page(page); @@ -1727,7 +1637,7 @@ static int iommu_prepare_rmrr_dev( /* page table init */ size = rmrr->end_address - rmrr->base_address + 1; ret = iommu_page_mapping(d, rmrr->base_address, - (void *)rmrr->base_address, size, + rmrr->base_address, size, DMA_PTE_READ|DMA_PTE_WRITE); if ( ret ) return ret; @@ -1743,37 +1653,15 @@ static int iommu_prepare_rmrr_dev( return ret; } -void __init setup_dom0_devices(void) -{ - struct hvm_iommu *hd = domain_hvm_iommu(dom0); +static void setup_dom0_devices(struct domain *d) +{ + struct hvm_iommu *hd; struct acpi_drhd_unit *drhd; struct pci_dev *pdev; int bus, dev, func, ret; u32 l; -#ifdef DEBUG_VTD_CONTEXT_ENTRY - for ( bus = 0; bus < 256; bus++ ) - { - for ( dev = 0; dev < 32; dev++ ) - { - for ( func = 0; func < 8; func++ ) - { - struct context_entry *context; - struct pci_dev device; - - device.bus = bus; - device.devfn = PCI_DEVFN(dev, func); - drhd = acpi_find_matched_drhd_unit(&device); - context = device_to_context_entry(drhd->iommu, - bus, PCI_DEVFN(dev, func)); - if ( (context->lo != 0) || (context->hi != 0) ) - dprintk(XENLOG_INFO VTDPREFIX, - "setup_dom0_devices-%x:%x:%x- context not 0\n", - bus, dev, func); - } - } - } -#endif + hd = domain_hvm_iommu(d); for ( bus = 0; bus < 256; bus++ ) { @@ -1792,18 +1680,13 @@ void __init setup_dom0_devices(void) list_add_tail(&pdev->list, &hd->pdev_list); drhd = acpi_find_matched_drhd_unit(pdev); - ret = domain_context_mapping(dom0, drhd->iommu, pdev); + ret = domain_context_mapping(d, drhd->iommu, pdev); if ( ret != 0 ) gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); } } } - - for_each_pdev ( dom0, pdev ) - dprintk(XENLOG_INFO VTDPREFIX, - "setup_dom0_devices: bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); } void clear_fault_bits(struct iommu *iommu) @@ -1850,13 +1733,6 @@ static int init_vtd_hw(void) flush->context = flush_context_reg; flush->iotlb = flush_iotlb_reg; } - return 0; -} - -static int init_vtd2_hw(void) -{ - struct acpi_drhd_unit *drhd; - struct iommu *iommu; for_each_drhd_unit ( drhd ) { @@ -1873,52 +1749,38 @@ static int init_vtd2_hw(void) dprintk(XENLOG_ERR VTDPREFIX, "Interrupt Remapping hardware not found\n"); } - return 0; -} - -static int enable_vtd_translation(void) -{ - struct acpi_drhd_unit *drhd; - struct iommu *iommu; - - for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - if ( iommu_enable_translation(iommu) ) - return -EIO; - } - return 0; -} - -static void setup_dom0_rmrr(void) + + return 0; +} + +static void setup_dom0_rmrr(struct domain *d) { struct acpi_rmrr_unit *rmrr; struct pci_dev *pdev; int ret; for_each_rmrr_device ( rmrr, pdev ) - ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev); + ret = iommu_prepare_rmrr_dev(d, rmrr, pdev); if ( ret ) gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: mapping reserved region failed\n"); end_for_each_rmrr_device ( rmrr, pdev ) } -int iommu_setup(void) -{ - struct hvm_iommu *hd = domain_hvm_iommu(dom0); +int intel_vtd_setup(void) +{ struct acpi_drhd_unit *drhd; struct iommu *iommu; - unsigned long i; if ( !vtd_enabled ) - return 0; + return -ENODEV; spin_lock_init(&domid_bitmap_lock); - INIT_LIST_HEAD(&hd->pdev_list); - - /* setup clflush size */ clflush_size = get_clflush_size(); + + for_each_drhd_unit ( drhd ) + if ( iommu_alloc(drhd) != 0 ) + goto error; /* Allocate IO page directory page for the domain. */ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); @@ -1933,27 +1795,15 @@ int iommu_setup(void) memset(domid_bitmap, 0, domid_bitmap_size / 8); set_bit(0, domid_bitmap); - /* setup 1:1 page table for dom0 */ - for ( i = 0; i < max_page; i++ ) - iommu_map_page(dom0, i, i); - init_vtd_hw(); - setup_dom0_devices(); - setup_dom0_rmrr(); - iommu_flush_all(); - enable_vtd_translation(); - init_vtd2_hw(); return 0; error: - printk("iommu_setup() failed\n"); for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - free_iommu(iommu); - } - return -EIO; + iommu_free(drhd); + vtd_enabled = 0; + return -ENOMEM; } /* @@ -1979,10 +1829,6 @@ int intel_iommu_assign_device(struct dom if ( list_empty(&acpi_drhd_units) ) return ret; - - gdprintk(XENLOG_INFO VTDPREFIX, - "assign_device: bus = %x dev = %x func = %x\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); reassign_device_ownership(dom0, d, bus, devfn); diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/utils.c --- a/xen/drivers/passthrough/vtd/utils.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/vtd/utils.c Thu Apr 24 14:08:29 2008 -0600 @@ -60,10 +60,10 @@ int vtd_hw_check(void) dprintk(XENLOG_WARNING VTDPREFIX, "*** vendor = %x device = %x revision = %x\n", vendor, device, revision); - vtd_enabled = 0; return -ENODEV; } } + return 0; } diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/x86/vtd.c --- a/xen/drivers/passthrough/vtd/x86/vtd.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c Thu Apr 24 14:08:29 2008 -0600 @@ -114,8 +114,6 @@ void hvm_dpci_isairq_eoi(struct domain * if ( --dpci->mirq[i].pending == 0 ) { spin_unlock(&dpci->dirq_lock); - gdprintk(XENLOG_INFO VTDPREFIX, - "hvm_dpci_isairq_eoi:: mirq = %x\n", i); stop_timer(&dpci->hvm_timer[irq_to_vector(i)]); pirq_guest_eoi(d, i); } @@ -130,8 +128,6 @@ void iommu_set_pgd(struct domain *d) { struct hvm_iommu *hd = domain_hvm_iommu(d); unsigned long p2m_table; - int level = agaw_to_level(hd->agaw); - l3_pgentry_t *l3e; p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table)); @@ -153,12 +149,12 @@ void iommu_set_pgd(struct domain *d) return; } pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K); - hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K; + hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; unmap_domain_page(dpte); break; case VTD_PAGE_TABLE_LEVEL_4: pgd_mfn = _mfn(p2m_table); - hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K; + hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; break; default: gdprintk(XENLOG_ERR VTDPREFIX, @@ -173,6 +169,8 @@ void iommu_set_pgd(struct domain *d) int i; u64 pmd_maddr; unsigned long flags; + l3_pgentry_t *l3e; + int level = agaw_to_level(hd->agaw); spin_lock_irqsave(&hd->mapping_lock, flags); hd->pgd_maddr = alloc_pgtable_maddr(); @@ -236,6 +234,8 @@ void iommu_set_pgd(struct domain *d) #elif CONFIG_PAGING_LEVELS == 4 mfn_t pgd_mfn; + l3_pgentry_t *l3e; + int level = agaw_to_level(hd->agaw); switch ( level ) { @@ -250,12 +250,12 @@ void iommu_set_pgd(struct domain *d) } pgd_mfn = _mfn(l3e_get_pfn(*l3e)); - hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K; + hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; unmap_domain_page(l3e); break; case VTD_PAGE_TABLE_LEVEL_4: pgd_mfn = _mfn(p2m_table); - hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K; + hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; break; default: gdprintk(XENLOG_ERR VTDPREFIX, diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/amd-iommu.h Thu Apr 24 14:08:29 2008 -0600 @@ -28,10 +28,9 @@ #define iommu_found() (!list_empty(&amd_iommu_head)) -extern int amd_iommu_enabled; extern struct list_head amd_iommu_head; -extern int __init amd_iommu_detect(void); +extern int __init amd_iov_detect(void); struct table_struct { void *buffer; @@ -79,6 +78,9 @@ struct amd_iommu { int exclusion_allow_all; uint64_t exclusion_base; uint64_t exclusion_limit; + + int msi_cap; + int maskbit; }; struct ivrs_mappings { diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/bitops.h --- a/xen/include/asm-x86/bitops.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/bitops.h Thu Apr 24 14:08:29 2008 -0600 @@ -331,10 +331,9 @@ extern unsigned int __find_next_zero_bit extern unsigned int __find_next_zero_bit( const unsigned long *addr, unsigned int size, unsigned int offset); -/* return index of first bit set in val or BITS_PER_LONG when no bit is set */ -static inline unsigned int __scanbit(unsigned long val) -{ - asm ( "bsf %1,%0" : "=r" (val) : "r" (val), "0" (BITS_PER_LONG) ); +static inline unsigned int __scanbit(unsigned long val, unsigned long max) +{ + asm ( "bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max) ); return (unsigned int)val; } @@ -346,9 +345,9 @@ static inline unsigned int __scanbit(uns * Returns the bit-number of the first set bit, not the number of the byte * containing a bit. */ -#define find_first_bit(addr,size) \ -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - (__scanbit(*(const unsigned long *)addr)) : \ +#define find_first_bit(addr,size) \ +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ + (__scanbit(*(const unsigned long *)addr, size)) : \ __find_first_bit(addr,size))) /** @@ -357,9 +356,9 @@ static inline unsigned int __scanbit(uns * @offset: The bitnumber to start searching at * @size: The maximum size to search */ -#define find_next_bit(addr,size,off) \ -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - ((off) + (__scanbit((*(const unsigned long *)addr) >> (off)))) : \ +#define find_next_bit(addr,size,off) \ +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ + ((off) + (__scanbit((*(const unsigned long *)addr) >> (off), size))) : \ __find_next_bit(addr,size,off))) /** @@ -370,9 +369,9 @@ static inline unsigned int __scanbit(uns * Returns the bit-number of the first zero bit, not the number of the byte * containing a bit. */ -#define find_first_zero_bit(addr,size) \ -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - (__scanbit(~*(const unsigned long *)addr)) : \ +#define find_first_zero_bit(addr,size) \ +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ + (__scanbit(~*(const unsigned long *)addr, size)) : \ __find_first_zero_bit(addr,size))) /** @@ -381,9 +380,9 @@ static inline unsigned int __scanbit(uns * @offset: The bitnumber to start searching at * @size: The maximum size to search */ -#define find_next_zero_bit(addr,size,off) \ -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off))))) : \ +#define find_next_zero_bit(addr,size,off) \ +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ + ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off)), size))) : \ __find_next_zero_bit(addr,size,off))) @@ -391,8 +390,7 @@ static inline unsigned int __scanbit(uns * find_first_set_bit - find the first set bit in @word * @word: the word to search * - * Returns the bit-number of the first set bit. If no bits are set then the - * result is undefined. + * Returns the bit-number of the first set bit. The input must *not* be zero. */ static inline unsigned int find_first_set_bit(unsigned long word) { @@ -401,26 +399,10 @@ static inline unsigned int find_first_se } /** - * ffz - find first zero in word. - * @word: The word to search - * - * Undefined if no zero exists, so code should check against ~0UL first. - */ -static inline unsigned long ffz(unsigned long word) -{ - asm ( "bsf %1,%0" - :"=r" (word) - :"r" (~word)); - return word; -} - -/** * ffs - find first bit set * @x: the word to search * - * This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). + * This is defined the same way as the libc and compiler builtin ffs routines. */ static inline int ffs(unsigned long x) { diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Apr 24 14:08:29 2008 -0600 @@ -139,6 +139,8 @@ int hvm_vcpu_initialise(struct vcpu *v); int hvm_vcpu_initialise(struct vcpu *v); void hvm_vcpu_destroy(struct vcpu *v); void hvm_vcpu_down(struct vcpu *v); +int hvm_vcpu_cacheattr_init(struct vcpu *v); +void hvm_vcpu_cacheattr_destroy(struct vcpu *v); void hvm_send_assist_req(struct vcpu *v); diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/hvm/support.h Thu Apr 24 14:08:29 2008 -0600 @@ -130,5 +130,7 @@ int hvm_set_cr0(unsigned long value); int hvm_set_cr0(unsigned long value); int hvm_set_cr3(unsigned long value); int hvm_set_cr4(unsigned long value); +int hvm_msr_read_intercept(struct cpu_user_regs *regs); +int hvm_msr_write_intercept(struct cpu_user_regs *regs); #endif /* __ASM_X86_HVM_SUPPORT_H__ */ diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Thu Apr 24 14:08:29 2008 -0600 @@ -35,6 +35,9 @@ /* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */ #define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES 512 +/* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */ +#define IOMMU_EVENT_LOG_DEFAULT_ENTRIES 512 + #define BITMAP_ENTRIES_PER_BYTE 8 #define PTE_PER_TABLE_SHIFT 9 @@ -303,6 +306,11 @@ #define IOMMU_EVENT_COMMAND_HW_ERROR 0x6 #define IOMMU_EVENT_IOTLB_INV_TIMEOUT 0x7 #define IOMMU_EVENT_INVALID_DEV_REQUEST 0x8 + +#define IOMMU_EVENT_DOMAIN_ID_MASK 0x0000FFFF +#define IOMMU_EVENT_DOMAIN_ID_SHIFT 0 +#define IOMMU_EVENT_DEVICE_ID_MASK 0x0000FFFF +#define IOMMU_EVENT_DEVICE_ID_SHIFT 0 /* Control Register */ #define IOMMU_CONTROL_MMIO_OFFSET 0x18 @@ -427,4 +435,33 @@ #define IOMMU_IO_READ_ENABLED 1 #define HACK_BIOS_SETTINGS 0 +/* MSI interrupt */ +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) + +#define MSI_DATA_DELIVERY_SHIFT 8 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT) +#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_SHIFT) + +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) +#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) + +#define MSI_DATA_TRIGGER_SHIFT 15 +#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) +#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) + +#define MSI_TARGET_CPU_SHIFT 12 +#define MSI_ADDR_HEADER 0xfee00000 +#define MSI_ADDR_DESTID_MASK 0xfff0000f +#define MSI_ADDR_DESTID_CPU(cpu) ((cpu) << MSI_TARGET_CPU_SHIFT) + +#define MSI_ADDR_DESTMODE_SHIFT 2 +#define MSI_ADDR_DESTMODE_PHYS (0 << MSI_ADDR_DESTMODE_SHIFT) +#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT) + +#define MSI_ADDR_REDIRECTION_SHIFT 3 +#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) + #endif /* _ASM_X86_64_AMD_IOMMU_DEFS_H */ diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Thu Apr 24 14:08:29 2008 -0600 @@ -35,6 +35,19 @@ #define DMA_32BIT_MASK 0x00000000ffffffffULL #define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) +#ifdef AMD_IOV_DEBUG +#define amd_iov_info(fmt, args...) \ + printk(XENLOG_INFO "AMD_IOV: " fmt, ## args) +#define amd_iov_warning(fmt, args...) \ + printk(XENLOG_WARNING "AMD_IOV: " fmt, ## args) +#define amd_iov_error(fmt, args...) \ + printk(XENLOG_ERR "AMD_IOV: %s:%d: " fmt, __FILE__ , __LINE__ , ## args) +#else +#define amd_iov_info(fmt, args...) +#define amd_iov_warning(fmt, args...) +#define amd_iov_error(fmt, args...) +#endif + typedef int (*iommu_detect_callback_ptr_t)( u8 bus, u8 dev, u8 func, u8 cap_ptr); @@ -49,6 +62,7 @@ void __init unmap_iommu_mmio_region(stru void __init unmap_iommu_mmio_region(struct amd_iommu *iommu); void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu); void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu); +void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu); void __init enable_iommu(struct amd_iommu *iommu); /* mapping functions */ @@ -69,11 +83,6 @@ void invalidate_dev_table_entry(struct a /* send cmd to iommu */ int send_iommu_command(struct amd_iommu *iommu, u32 cmd[]); void flush_command_buffer(struct amd_iommu *iommu); - -/* iommu domain funtions */ -int amd_iommu_domain_init(struct domain *domain); -void amd_iommu_setup_domain_device(struct domain *domain, - struct amd_iommu *iommu, int bdf); /* find iommu for bdf */ struct amd_iommu *find_iommu_for_device(int bus, int devfn); diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/hvm/vcpu.h Thu Apr 24 14:08:29 2008 -0600 @@ -83,7 +83,16 @@ struct hvm_vcpu { */ unsigned long mmio_gva; unsigned long mmio_gpfn; + /* Callback into x86_emulate when emulating FPU/MMX/XMM instructions. */ + void (*fpu_exception_callback)(void *, struct cpu_user_regs *); + void *fpu_exception_callback_arg; + /* We may read up to m128 as a number of device-model transactions. */ + paddr_t mmio_large_read_pa; + uint8_t mmio_large_read[16]; + unsigned int mmio_large_read_bytes; + /* We may write up to m128 as a number of device-model transactions. */ + paddr_t mmio_large_write_pa; + unsigned int mmio_large_write_bytes; }; #endif /* __ASM_X86_HVM_VCPU_H__ */ - diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/mtrr.h --- a/xen/include/asm-x86/mtrr.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/mtrr.h Thu Apr 24 14:08:29 2008 -0600 @@ -11,13 +11,6 @@ #define MTRR_TYPE_WRBACK 6 #define MTRR_NUM_TYPES 7 #define MEMORY_NUM_TYPES MTRR_NUM_TYPES - -#define MTRR_PHYSMASK_VALID_BIT 11 -#define MTRR_PHYSMASK_SHIFT 12 - -#define MTRR_PHYSBASE_TYPE_MASK 0xff /* lowest 8 bits */ -#define MTRR_PHYSBASE_SHIFT 12 -#define MTRR_VCNT 8 #define NORMAL_CACHE_MODE 0 #define NO_FILL_CACHE_MODE 2 @@ -58,7 +51,6 @@ struct mtrr_state { u64 mtrr_cap; /* ranges in var MSRs are overlapped or not:0(no overlapped) */ bool_t overlapped; - bool_t is_initialized; }; extern void mtrr_save_fixed_ranges(void *); diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/paging.h --- a/xen/include/asm-x86/paging.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/asm-x86/paging.h Thu Apr 24 14:08:29 2008 -0600 @@ -83,12 +83,14 @@ struct shadow_paging_mode { unsigned long new, unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt); +#ifdef __i386__ int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va, unsigned long old_lo, unsigned long old_hi, unsigned long new_lo, unsigned long new_hi, struct sh_emulate_ctxt *sh_ctxt); +#endif mfn_t (*make_monitor_table )(struct vcpu *v); void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); int (*guess_wrmap )(struct vcpu *v, diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/public/xsm/acm.h --- a/xen/include/public/xsm/acm.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/public/xsm/acm.h Thu Apr 24 14:08:29 2008 -0600 @@ -91,7 +91,7 @@ * whenever the interpretation of the related * policy's data structure changes */ -#define ACM_POLICY_VERSION 3 +#define ACM_POLICY_VERSION 4 #define ACM_CHWALL_VERSION 1 #define ACM_STE_VERSION 1 @@ -131,6 +131,10 @@ typedef uint16_t domaintype_t; /* high-16 = version, low-16 = check magic */ #define ACM_MAGIC 0x0001debc +/* size of the SHA1 hash identifying the XML policy from which the + binary policy was created */ +#define ACM_SHA1_HASH_SIZE 20 + /* each offset in bytes from start of the struct they * are part of */ @@ -160,6 +164,7 @@ struct acm_policy_buffer { uint32_t secondary_policy_code; uint32_t secondary_buffer_offset; struct acm_policy_version xml_pol_version; /* add in V3 */ + uint8_t xml_policy_hash[ACM_SHA1_HASH_SIZE]; /* added in V4 */ }; diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/xen/iommu.h Thu Apr 24 14:08:29 2008 -0600 @@ -27,9 +27,8 @@ #include <public/domctl.h> extern int vtd_enabled; -extern int amd_iommu_enabled; +extern int iommu_enabled; -#define iommu_enabled ( amd_iommu_enabled || vtd_enabled ) #define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu) #define domain_vmx_iommu(d) (&d->arch.hvm_domain.hvm_iommu.vmx_iommu) @@ -72,7 +71,6 @@ struct iommu { struct intel_iommu *intel; }; -int iommu_setup(void); int iommu_domain_init(struct domain *d); void iommu_domain_destroy(struct domain *d); int device_assigned(u8 bus, u8 devfn); diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xen/serial.h --- a/xen/include/xen/serial.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/xen/serial.h Thu Apr 24 14:08:29 2008 -0600 @@ -16,12 +16,10 @@ void serial_set_rx_handler(int handle, s void serial_set_rx_handler(int handle, serial_rx_fn fn); /* Number of characters we buffer for a polling receiver. */ -#define SERIAL_RXBUFSZ 32 -#define MASK_SERIAL_RXBUF_IDX(_i) ((_i)&(SERIAL_RXBUFSZ-1)) +#define serial_rxbufsz 32 /* Number of characters we buffer for an interrupt-driven transmitter. */ -#define SERIAL_TXBUFSZ 16384 -#define MASK_SERIAL_TXBUF_IDX(_i) ((_i)&(SERIAL_TXBUFSZ-1)) +extern unsigned int serial_txbufsz; struct uart_driver; @@ -39,7 +37,7 @@ struct serial_port { /* Receiver callback functions (asynchronous receivers). */ serial_rx_fn rx_lo, rx_hi, rx; /* Receive data buffer (polling receivers). */ - char rxbuf[SERIAL_RXBUFSZ]; + char rxbuf[serial_rxbufsz]; unsigned int rxbufp, rxbufc; /* Serial I/O is concurrency-safe. */ spinlock_t rx_lock, tx_lock; diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xsm/acm/acm_core.h --- a/xen/include/xsm/acm/acm_core.h Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/include/xsm/acm/acm_core.h Thu Apr 24 14:08:29 2008 -0600 @@ -34,6 +34,7 @@ struct acm_binary_policy { u16 primary_policy_code; u16 secondary_policy_code; struct acm_policy_version xml_pol_version; + u8 xml_policy_hash[ACM_SHA1_HASH_SIZE]; }; struct chwall_binary_policy { diff -r 239b44eeb2d6 -r dc510776dd59 xen/tools/Makefile --- a/xen/tools/Makefile Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/tools/Makefile Thu Apr 24 14:08:29 2008 -0600 @@ -4,12 +4,12 @@ include $(XEN_ROOT)/Config.mk .PHONY: default default: - $(MAKE) -C figlet + [ -d figlet ] && $(MAKE) -C figlet $(MAKE) symbols .PHONY: clean clean: - $(MAKE) -C figlet clean + [ -d figlet ] && $(MAKE) -C figlet clean rm -f *.o symbols symbols: symbols.c diff -r 239b44eeb2d6 -r dc510776dd59 xen/tools/figlet/figlet.c --- a/xen/tools/figlet/figlet.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/tools/figlet/figlet.c Thu Apr 24 14:08:29 2008 -0600 @@ -1488,18 +1488,7 @@ static void myputchar(unsigned char c) putc(c, stderr); - if ( nr_chars == 0 ) - putchar('"'); - - putchar('\\'); - putchar('0' + ((c>>6)&7)); - putchar('0' + ((c>>3)&7)); - putchar('0' + ((c>>0)&7)); - - if ( c == '\n' ) - startline = 1; - - if ( ++nr_chars == 18 ) + if ( nr_chars == 18 ) { nr_chars = 0; putchar('"'); @@ -1507,6 +1496,17 @@ static void myputchar(unsigned char c) putchar('\\'); putchar('\n'); } + + if ( nr_chars++ == 0 ) + putchar('"'); + + putchar('\\'); + putchar('0' + ((c>>6)&7)); + putchar('0' + ((c>>3)&7)); + putchar('0' + ((c>>0)&7)); + + if ( c == '\n' ) + startline = 1; } void putstring(string) diff -r 239b44eeb2d6 -r dc510776dd59 xen/xsm/acm/acm_policy.c --- a/xen/xsm/acm/acm_policy.c Thu Apr 24 14:02:16 2008 -0600 +++ b/xen/xsm/acm/acm_policy.c Thu Apr 24 14:08:29 2008 -0600 @@ -156,6 +156,10 @@ _acm_update_policy(void *buf, u32 buf_si &pol->xml_pol_version, sizeof(acm_bin_pol.xml_pol_version)); + memcpy(&acm_bin_pol.xml_policy_hash, + pol->xml_policy_hash, + sizeof(acm_bin_pol.xml_policy_hash)); + if ( acm_primary_ops->is_default_policy() && acm_secondary_ops->is_default_policy() ) require_update = 0; @@ -257,6 +261,10 @@ acm_get_policy(XEN_GUEST_HANDLE_64(void) memcpy(&bin_pol->xml_pol_version, &acm_bin_pol.xml_pol_version, sizeof(struct acm_policy_version)); + + memcpy(&bin_pol->xml_policy_hash, + &acm_bin_pol.xml_policy_hash, + sizeof(acm_bin_pol.xml_policy_hash)); ret = acm_dump_policy_reference( policy_buffer + be32_to_cpu(bin_pol->policy_reference_offset), _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |