[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1207070943 21600 # Node ID feee6422144f1a7b9caa91e178244214e8c2e14e # Parent daf16171a05f5502d95d4809bb51c14929237bde # Parent 59d2638a7243a469b0d6fbd815b06bd8c5795b1c merge with xen-unstable.hg --- .hgignore | 1 docs/ChangeLog | 10 docs/src/user.tex | 185 - docs/xen-api/revision-history.tex | 11 docs/xen-api/xenapi-coversheet.tex | 2 docs/xen-api/xenapi-datamodel.tex | 35 extras/mini-os/blkfront.c | 25 extras/mini-os/gnttab.c | 20 extras/mini-os/kernel.c | 2 extras/mini-os/minios.mk | 4 extras/mini-os/netfront.c | 4 tools/blktap/drivers/block-qcow.c | 68 tools/firmware/hvmloader/Makefile | 15 tools/firmware/hvmloader/hvmloader.c | 2 tools/firmware/hvmloader/smbios.c | 57 tools/firmware/hvmloader/util.c | 53 tools/firmware/hvmloader/util.h | 48 tools/include/xen-foreign/mkheader.py | 4 tools/ioemu/block-qcow.c | 74 tools/ioemu/block-qcow2.c | 22 tools/ioemu/block-raw.c | 27 tools/ioemu/block-vmdk.c | 8 tools/ioemu/block.c | 38 tools/ioemu/block_int.h | 4 tools/ioemu/hw/ide.c | 62 tools/ioemu/hw/ne2000.c | 2 tools/ioemu/hw/scsi-disk.c | 8 tools/ioemu/hw/vga.c | 20 tools/ioemu/hw/vga_int.h | 1 tools/ioemu/hw/xenfb.c | 37 tools/ioemu/sdl.c | 74 tools/ioemu/vl.h | 5 tools/ioemu/vnc.c | 1 tools/libfsimage/Rules.mk | 1 tools/libfsimage/check-libext2fs | 4 tools/libfsimage/common/fsimage_grub.c | 54 tools/libfsimage/common/fsimage_grub.h | 2 tools/libfsimage/common/fsimage_plugin.c | 5 tools/libxc/Makefile | 9 tools/libxc/xc_dom_bzimageloader.c | 159 + tools/libxc/xc_dom_elfloader.c | 2 tools/libxen/include/xen/api/xen_acmpolicy.h | 6 tools/libxen/include/xen/api/xen_xspolicy.h | 27 tools/libxen/src/xen_xspolicy.c | 18 tools/pygrub/src/pygrub | 15 tools/python/xen/lowlevel/xc/xc.c | 28 tools/python/xen/util/xsconstants.py | 8 tools/python/xen/util/xsm/acm/acm.py | 42 tools/python/xen/xend/XendBootloader.py | 13 tools/python/xen/xend/XendDomainInfo.py | 9 tools/python/xen/xend/XendNode.py | 3 tools/python/xen/xend/XendXSPolicy.py | 10 tools/python/xen/xend/server/vfbif.py | 2 tools/python/xen/xm/XenAPI.py | 1 tools/python/xen/xm/create.py | 8 tools/python/xen/xm/messages/xen-xm.pot | 7 tools/tests/Makefile | 10 tools/tests/test_x86_emulator.c | 29 tools/tests/x86_emulate.c | 13 xen/arch/ia64/xen/dom0_ops.c | 2 xen/arch/ia64/xen/dom_fw_common.c | 2 xen/arch/ia64/xen/dom_fw_domu.c | 2 xen/arch/powerpc/sysctl.c | 6 xen/arch/x86/boot/trampoline.S | 9 xen/arch/x86/hvm/emulate.c | 306 +- xen/arch/x86/hvm/hvm.c | 242 + xen/arch/x86/hvm/io.c | 117 xen/arch/x86/hvm/svm/emulate.c | 4 xen/arch/x86/hvm/svm/svm.c | 10 xen/arch/x86/hvm/vmx/realmode.c | 4 xen/arch/x86/hvm/vmx/vmx.c | 100 xen/arch/x86/hvm/vmx/x86_32/exits.S | 4 xen/arch/x86/hvm/vmx/x86_64/exits.S | 4 xen/arch/x86/mm.c | 28 xen/arch/x86/mm/shadow/common.c | 8 xen/arch/x86/mm/shadow/multi.c | 6 xen/arch/x86/sysctl.c | 14 xen/arch/x86/x86_emulate.c | 3410 ----------------------- xen/arch/x86/x86_emulate/x86_emulate.c | 3429 ++++++++++++++++++++++++ xen/arch/x86/x86_emulate/x86_emulate.h | 401 ++ xen/common/domain.c | 8 xen/common/domctl.c | 156 - xen/common/event_channel.c | 49 xen/common/grant_table.c | 43 xen/common/memory.c | 51 xen/include/asm-x86/hvm/hvm.h | 29 xen/include/asm-x86/hvm/io.h | 9 xen/include/asm-x86/hvm/support.h | 37 xen/include/asm-x86/hvm/vcpu.h | 21 xen/include/asm-x86/hvm/vmx/vmx.h | 182 - xen/include/asm-x86/x86_emulate.h | 403 -- xen/include/public/arch-ia64.h | 18 xen/include/public/arch-powerpc.h | 2 xen/include/public/arch-x86/xen-x86_64.h | 4 xen/include/public/arch-x86/xen.h | 2 xen/include/public/hvm/save.h | 4 xen/include/public/io/fbif.h | 29 xen/include/public/sysctl.h | 11 xen/include/public/xsm/acm.h | 1 xen/include/xen/hvm/save.h | 1 xen/include/xsm/acm/acm_hooks.h | 14 xen/xsm/acm/acm_chinesewall_hooks.c | 36 xen/xsm/acm/acm_policy.c | 3 xen/xsm/acm/acm_simple_type_enforcement_hooks.c | 2 104 files changed, 5803 insertions(+), 4834 deletions(-) diff -r daf16171a05f -r feee6422144f .hgignore --- a/.hgignore Tue Apr 01 10:30:57 2008 -0600 +++ b/.hgignore Tue Apr 01 11:29:03 2008 -0600 @@ -184,6 +184,7 @@ ^tools/tests/blowfish\.bin$ ^tools/tests/blowfish\.h$ ^tools/tests/test_x86_emulator$ +^tools/tests/x86_emulate$ ^tools/vnet/Make.local$ ^tools/vnet/build/.*$ ^tools/vnet/gc$ diff -r daf16171a05f -r feee6422144f docs/ChangeLog --- a/docs/ChangeLog Tue Apr 01 10:30:57 2008 -0600 +++ b/docs/ChangeLog Tue Apr 01 11:29:03 2008 -0600 @@ -15,6 +15,16 @@ http://lists.xensource.com/archives/html Xen 3.3 release --------------- + +17336: Add platform capabilities field to XEN_SYSCTL_physinfo +http://xenbits.xensource.com/xen-unstable.hg?rev/250606290439 + +17289: PV framebuffer dynamic resolution facility +http://xenbits.xensource.com/xen-unstable.hg?rev/d97e61001d81 + +Guest may send XENFB_TYPE_RESIZE if feature-resize=1 in +xenstore of the backend VNC server. VNC server code sets +feature-resize if it can handle the resize request. 16857: XS_SET_TARGET http://xenbits.xensource.com/xen-unstable.hg?rev/26fc953a89bb diff -r daf16171a05f -r feee6422144f docs/src/user.tex --- a/docs/src/user.tex Tue Apr 01 10:30:57 2008 -0600 +++ b/docs/src/user.tex Tue Apr 01 11:29:03 2008 -0600 @@ -1618,9 +1618,9 @@ really takes up half of the size allocat really takes up half of the size allocated. For example, to create a 2GB sparse file-backed virtual block device -(actually only consumes 1KB of disk): +(actually only consumes no disk space at all): \begin{quote} - \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_ + \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=0_ \end{quote} Make a file system in the disk file: @@ -4306,14 +4306,22 @@ mailing lists and subscription informati \appendix -\chapter{Unmodified (VMX) guest domains in Xen with Intel\textregistered Virtualization Technology (VT)} - -Xen supports guest domains running unmodified Guest operating systems using Virtualization Technology (VT) available on recent Intel Processors. More information about the Intel Virtualization Technology implementing Virtual Machine Extensions (VMX) in the processor is available on the Intel website at \\ +\chapter{Unmodified (HVM) guest domains in Xen with Hardware support for Virtualization} + +Xen supports guest domains running unmodified guest operating systems using +virtualization extensions available on recent processors. Currently processors +featuring the Intel Virtualization Extension (Intel-VT) or the AMD extension +(AMD-V) are supported. The technology covering both implementations is +called HVM (for Hardware Virtual Machine) in Xen. More information about the +virtualization extensions are available on the respective websites: {\small {\tt http://www.intel.com/technology/computing/vptech}} -\section{Building Xen with VT support} - -The following packages need to be installed in order to build Xen with VT support. Some Linux distributions do not provide these packages by default. + + {\small {\tt http://www.amd.com/us-en/assets/content\_type/white\_papers\_and\_tech\_docs/24593.pdf}} + +\section{Building Xen with HVM support} + +The following packages need to be installed in order to build Xen with HVM support. Some Linux distributions do not provide these packages by default. \begin{tabular}{lp{11.0cm}} {\bfseries Package} & {\bfseries Description} \\ @@ -4322,70 +4330,75 @@ dev86 & The dev86 package provides an as If the dev86 package is not available on the x86\_64 distribution, you can install the i386 version of it. The dev86 rpm package for various distributions can be found at {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=dev86\&submit=Search}} \\ -LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse can be virtualized by the vncserver library. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. There is a significant performance degradation in 0.8 version. The current sources in the CVS tree have fixed this degradation. So it is highly recommended to download the latest CVS sources and install them.\\ - SDL-devel, SDL & Simple DirectMedia Layer (SDL) is another way of virtualizing the unmodified guest console. It provides an X window for the guest console. If the SDL and SDL-devel packages are not installed by default on the build system, they can be obtained from {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL\&submit=Search}} -, {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL-devel\&submit=Search}} \\ + + +{\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL-devel\&submit=Search}} \\ \end{tabular} -\section{Configuration file for unmodified VMX guests} - -The Xen installation includes a sample configuration file, {\small {\tt /etc/xen/xmexample.vmx}}. There are comments describing all the options. In addition to the common options that are the same as those for paravirtualized guest configurations, VMX guest configurations have the following settings: +\section{Configuration file for unmodified HVM guests} + +The Xen installation includes a sample configuration file, {\small {\tt /etc/xen/xmexample.hvm}}. There are comments describing all the options. In addition to the common options that are the same as those for paravirtualized guest configurations, HVM guest configurations have the following settings: \begin{tabular}{lp{11.0cm}} {\bfseries Parameter} & {\bfseries Description} \\ -kernel & The VMX firmware loader, {\small {\tt /usr/lib/xen/boot/vmxloader}}\\ - -builder & The domain build function. The VMX domain uses the vmx builder.\\ - -acpi & Enable VMX guest ACPI, default=0 (disabled)\\ - -apic & Enable VMX guest APIC, default=0 (disabled)\\ - -pae & Enable VMX guest PAE, default=0 (disabled)\\ - -vif & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the VMX NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\ - -disk & Defines the disk devices you want the domain to have access to, and what you want them accessible as. If using a physical device as the VMX guest's disk, each disk entry is of the form +kernel & The HVM firmware loader, {\small {\tt /usr/lib/xen/boot/hvmloader}}\\ + +builder & The domain build function. The HVM domain uses the 'hvm' builder.\\ + +acpi & Enable HVM guest ACPI, default=1 (enabled)\\ + +apic & Enable HVM guest APIC, default=1 (enabled)\\ + +pae & Enable HVM guest PAE, default=1 (enabled)\\ + +hap & Enable hardware-assisted paging support, such as AMD-V's nested paging +or Intel\textregistered VT's extended paging. If available, Xen will +use hardware-assisted paging instead of shadow paging for this guest's memory +management.\\ + +vif & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the HVM NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\ + +disk & Defines the disk devices you want the domain to have access to, and what you want them accessible as. If using a physical device as the HVM guest's disk, each disk entry is of the form {\small {\tt phy:UNAME,ioemu:DEV,MODE,}} -where UNAME is the device, DEV is the device name the domain will see, and MODE is r for read-only, w for read-write. ioemu means the disk will use ioemu to virtualize the VMX disk. If not adding ioemu, it uses vbd like paravirtualized guests. +where UNAME is the host device file, DEV is the device name the domain will see, and MODE is r for read-only, w for read-write. ioemu means the disk will use ioemu to virtualize the HVM disk. If not adding ioemu, it uses vbd like paravirtualized guests. If using disk image file, its form should be like {\small {\tt file:FILEPATH,ioemu:DEV,MODE}} +Optical devices can be emulated by appending cdrom to the device type + +{\small {\tt ',hdc:cdrom,r'}} + If using more than one disk, there should be a comma between each disk entry. For example: -{\scriptsize {\tt disk = ['file:/var/images/image1.img,ioemu:hda,w', 'file:/var/images/image2.img,ioemu:hdb,w']}}\\ - -cdrom & Disk image for CD-ROM. The default is {\small {\tt /dev/cdrom}} for Domain0. Inside the VMX domain, the CD-ROM will available as device {\small {\tt /dev/hdc}}. The entry can also point to an ISO file.\\ - -boot & Boot from floppy (a), hard disk (c) or CD-ROM (d). For example, to boot from CD-ROM, the entry should be: - -boot='d'\\ - -device\_model & The device emulation tool for VMX guests. This parameter should not be changed.\\ +{\scriptsize {\tt disk = ['file:/var/images/image1.img,ioemu:hda,w', 'phy:hda1,hdb1,w', 'file:/var/images/install1.iso,hdc:cdrom,r']}}\\ + +boot & Boot from floppy (a), hard disk (c) or CD-ROM (d). For example, to boot from CD-ROM and fallback to HD, the entry should be: + +boot='dc'\\ + +device\_model & The device emulation tool for HVM guests. This parameter should not be changed.\\ sdl & Enable SDL library for graphics, default = 0 (disabled)\\ vnc & Enable VNC library for graphics, default = 1 (enabled)\\ -vncviewer & Enable spawning of the vncviewer (only valid when vnc=1), default = 1 (enabled) - -If vnc=1 and vncviewer=0, user can use vncviewer to manually connect VMX from remote. For example: - -{\small {\tt vncviewer domain0\_IP\_address:VMX\_domain\_id}} \\ - -ne2000 & Enable ne2000, default = 0 (disabled; use pcnet)\\ - -serial & Enable redirection of VMX serial output to pty device\\ +vncconsole & Enable spawning of the vncviewer (only valid when vnc=1), default = 0 (disabled) + +If vnc=1 and vncconsole=0, user can use vncviewer to manually connect HVM from remote. For example: + +{\small {\tt vncviewer domain0\_IP\_address:HVM\_domain\_id}} \\ + +serial & Enable redirection of HVM serial output to pty device\\ \end{tabular} @@ -4416,9 +4429,9 @@ Details about mouse emulation are provid localtime & Set the real time clock to local time [default=0, that is, set to UTC].\\ -enable-audio & Enable audio support. This is under development.\\ - -full-screen & Start in full screen. This is under development.\\ +soundhw & Enable sound card support and specify the hardware to emulate. Values can be sb16, es1370 or all. Default is none.\\ + +full-screen & Start in full screen.\\ nographic & Another way to redirect serial output. If enabled, no 'sdl' or 'vnc' can work. Not recommended.\\ @@ -4430,18 +4443,18 @@ If you are using a physical disk or phys If you are using a physical disk or physical disk partition, you need to install a Linux OS on the disk first. Then the boot loader should be installed in the correct place. For example {\small {\tt dev/sda}} for booting from the whole disk, or {\small {\tt /dev/sda1}} for booting from partition 1. \subsection{Using disk image files} -You need to create a large empty disk image file first; then, you need to install a Linux OS onto it. There are two methods you can choose. One is directly installing it using a VMX guest while booting from the OS installation CD-ROM. The other is copying an installed OS into it. The boot loader will also need to be installed. +You need to create a large empty disk image file first; then, you need to install a Linux OS onto it. There are two methods you can choose. One is directly installing it using a HVM guest while booting from the OS installation CD-ROM. The other is copying an installed OS into it. The boot loader will also need to be installed. \subsubsection*{To create the image file:} The image size should be big enough to accommodate the entire OS. This example assumes the size is 1G (which is probably too small for most OSes). -{\small {\tt \# dd if=/dev/zero of=hd.img bs=1M count=1 seek=1023}} - -\subsubsection*{To directly install Linux OS into an image file using a VMX guest:} - -Install Xen and create VMX with the original image file with booting from CD-ROM. Then it is just like a normal Linux OS installation. The VMX configuration file should have these two entries before creating: - -{\small {\tt cdrom='/dev/cdrom' +{\small {\tt \# dd if=/dev/zero of=hd.img bs=1M count=0 seek=1024}} + +\subsubsection*{To directly install Linux OS into an image file using a HVM guest:} + +Install Xen and create HVM with the original image file with booting from CD-ROM. Then it is just like a normal Linux OS installation. The HVM configuration file should have a stanza for the CD-ROM as well as a boot device specification: + +{\small {\tt disk=['file:/var/images/your-hd.img,hda,w', ',hdc:cdrom,r' ] boot='d'}} If this method does not succeed, you can choose the following method of copying an installed Linux OS into an image file. @@ -4509,31 +4522,28 @@ none /sys sysfs Now, the guest OS image {\small {\tt hd.img}} is ready. You can also reference {\small {\tt http://free.oszoo.org}} for quickstart images. But make sure to install the boot loader. -\subsection{Install Windows into an Image File using a VMX guest} -In order to install a Windows OS, you should keep {\small {\tt acpi=0}} in your VMX configuration file. - -\section{VMX Guests} -\subsection{Editing the Xen VMX config file} -Make a copy of the example VMX configuration file {\small {\tt /etc/xen/xmeaxmple.vmx}} and edit the line that reads - -{\small {\tt disk = [ 'file:/var/images/\emph{guest.img},ioemu:hda,w' ]}} - -replacing \emph{guest.img} with the name of the guest OS image file you just made. - -\subsection{Creating VMX guests} -Simply follow the usual method of creating the guest, using the -f parameter and providing the filename of your VMX configuration file:\\ +\section{HVM Guests} +\subsection{Editing the Xen HVM config file} +Make a copy of the example HVM configuration file {\small {\tt /etc/xen/xmexample.hvm}} and edit the line that reads + +{\small {\tt disk = [ 'file:/var/images/\emph{min-el3-i386.img},hda,w' ]}} + +replacing \emph{min-el3-i386.img} with the name of the guest OS image file you just made. + +\subsection{Creating HVM guests} +Simply follow the usual method of creating the guest, providing the filename of your HVM configuration file:\\ {\small {\tt \# xend start\\ -\# xm create /etc/xen/vmxguest.vmx}} - -In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when VMX guests are created. If you want to use SDL to create VMX guests, set {\small {\tt sdl=1}} in your VMX configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}. +\# xm create /etc/xen/hvmguest.hvm}} + +In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when HVM guests are created. If you want to use SDL to create HVM guests, set {\small {\tt sdl=1}} in your HVM configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}. \subsection{Mouse issues, especially under VNC} Mouse handling when using VNC is a little problematic. The problem is that the VNC viewer provides a virtual pointer which is located at an absolute location in the VNC window and only absolute coordinates are provided. -The VMX device model converts these absolute mouse coordinates +The HVM device model converts these absolute mouse coordinates into the relative motion deltas that are expected by the PS/2 mouse driver running in the guest. Unfortunately, @@ -4550,7 +4560,7 @@ can be provided by the device model emul can be provided by the device model emulation code.) To deal with these mouse issues there are 4 different -mouse emulations available from the VMX device model: +mouse emulations available from the HVM device model: \begin{description} \item[PS/2 mouse over the PS/2 port.] @@ -4845,7 +4855,7 @@ and product id and product id \textbf{310b}. This device could be made available -to the VMX guest by including the +to the HVM guest by including the config file entry {\small \begin{verbatim} @@ -4959,7 +4969,7 @@ will remove the USB mouse will remove the USB mouse driver from the Dom0 kernel and the mouse will now be -accessible by the VMX guest. +accessible by the HVM guest. Be aware the the Linux USB hotplug system will reload @@ -4981,26 +4991,25 @@ reloaded. reloaded. \end{description} -\subsection{Destroy VMX guests} -VMX guests can be destroyed in the same way as can paravirtualized guests. We recommend that you type the command +\subsection{Destroy HVM guests} +HVM guests can be destroyed in the same way as can paravirtualized guests. We recommend that you shut-down the guest using the guest OS' provided method, for Linux, type the command {\small {\tt poweroff}} -in the VMX guest's console first to prevent data loss. Then execute the command +in the HVM guest's console, for Windows use Start -> Shutdown first to prevent +data loss. Depending on the configuration the guest will be automatically +destroyed, otherwise execute the command {\small {\tt xm destroy \emph{vmx\_guest\_id} }} at the Domain0 console. -\subsection{VMX window (X or VNC) Hot Key} -If you are running in the X environment after creating a VMX guest, an X window is created. There are several hot keys for control of the VMX guest that can be used in the window. +\subsection{HVM window (X or VNC) Hot Key} +If you are running in the X environment after creating a HVM guest, an X window is created. There are several hot keys for control of the HVM guest that can be used in the window. -{\bfseries Ctrl+Alt+2} switches from guest VGA window to the control window. Typing {\small {\tt help }} shows the control commands help. For example, 'q' is the command to destroy the VMX guest.\\ -{\bfseries Ctrl+Alt+1} switches back to VMX guest's VGA.\\ -{\bfseries Ctrl+Alt+3} switches to serial port output. It captures serial output from the VMX guest. It works only if the VMX guest was configured to use the serial port. \\ - -\subsection{Save/Restore and Migration} -VMX guests currently cannot be saved and restored, nor migrated. These features are currently under active development. +{\bfseries Ctrl+Alt+2} switches from guest VGA window to the control window. Typing {\small {\tt help }} shows the control commands help. For example, 'q' is the command to destroy the HVM guest.\\ +{\bfseries Ctrl+Alt+1} switches back to HVM guest's VGA.\\ +{\bfseries Ctrl+Alt+3} switches to serial port output. It captures serial output from the HVM guest. It works only if the HVM guest was configured to use the serial port. \\ \chapter{Vnets - Domain Virtual Networking} diff -r daf16171a05f -r feee6422144f docs/xen-api/revision-history.tex --- a/docs/xen-api/revision-history.tex Tue Apr 01 10:30:57 2008 -0600 +++ b/docs/xen-api/revision-history.tex Tue Apr 01 11:29:03 2008 -0600 @@ -23,12 +23,19 @@ \end{flushleft} \end{minipage}\\ \hline - 1.0.2 & 11th Feb. 08 & S. Berger & + 1.0.3 & 11th Feb. 08 & S. Berger & \begin{minipage}[t]{7cm} \begin{flushleft} Added table of contents and hyperlink cross reference. \end{flushleft} \end{minipage}\\ \hline + 1.0.4 & 23rd March 08 & S. Berger & + \begin{minipage}[t]{7cm} + \begin{flushleft} + Added XSPolicy.can\_run + \end{flushleft} + \end{minipage}\\ + \hline \end{tabular} -\end{center} \ No newline at end of file +\end{center} diff -r daf16171a05f -r feee6422144f docs/xen-api/xenapi-coversheet.tex --- a/docs/xen-api/xenapi-coversheet.tex Tue Apr 01 10:30:57 2008 -0600 +++ b/docs/xen-api/xenapi-coversheet.tex Tue Apr 01 11:29:03 2008 -0600 @@ -22,7 +22,7 @@ \newcommand{\releasestatement}{Stable Release} %% Document revision -\newcommand{\revstring}{API Revision 1.0.2} +\newcommand{\revstring}{API Revision 1.0.4} %% Document authors \newcommand{\docauthors}{ diff -r daf16171a05f -r feee6422144f docs/xen-api/xenapi-datamodel.tex --- a/docs/xen-api/xenapi-datamodel.tex Tue Apr 01 10:30:57 2008 -0600 +++ b/docs/xen-api/xenapi-datamodel.tex Tue Apr 01 11:29:03 2008 -0600 @@ -14938,6 +14938,41 @@ Currently active instantiation flags. \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} +\subsubsection{RPC name:~can\_run} + +{\bf Overview:} +Check whether a VM with the given security label could run on the system. + + \noindent {\bf Signature:} +\begin{verbatim} int can_run (session_id s, string security_label)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt string } & security_label & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +Error code indicating whether a VM with the given security label could run. +If zero, it can run. + +\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt SECURITY\_ERROR} + \subsubsection{RPC name:~get\_all} {\bf Overview:} diff -r daf16171a05f -r feee6422144f extras/mini-os/blkfront.c --- a/extras/mini-os/blkfront.c Tue Apr 01 10:30:57 2008 -0600 +++ b/extras/mini-os/blkfront.c Tue Apr 01 11:29:03 2008 -0600 @@ -319,6 +319,7 @@ int blkfront_aio_poll(struct blkfront_de { RING_IDX rp, cons; struct blkif_response *rsp; + int more; moretodo: #ifdef HAVE_LIBC @@ -334,6 +335,7 @@ moretodo: while ((cons != rp)) { rsp = RING_GET_RESPONSE(&dev->ring, cons); + nr_consumed++; if (rsp->status != BLKIF_RSP_OKAY) printk("block error %d for op %d\n", rsp->status, rsp->operation); @@ -343,29 +345,30 @@ moretodo: case BLKIF_OP_WRITE: { struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id; + int status = rsp->status; int j; for (j = 0; j < aiocbp->n; j++) gnttab_end_access(aiocbp->gref[j]); + dev->ring.rsp_cons = ++cons; /* Nota: callback frees aiocbp itself */ - aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0); + aiocbp->aio_cb(aiocbp, status ? -EIO : 0); + if (dev->ring.rsp_cons != cons) + /* We reentered, we must not continue here */ + goto out; break; } + default: + printk("unrecognized block operation %d response\n", rsp->operation); case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: - break; - default: - printk("unrecognized block operation %d response\n", rsp->operation); + dev->ring.rsp_cons = ++cons; break; } - - nr_consumed++; - ++cons; - } - dev->ring.rsp_cons = cons; - - int more; + } + +out: RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more); if (more) goto moretodo; diff -r daf16171a05f -r feee6422144f extras/mini-os/gnttab.c --- a/extras/mini-os/gnttab.c Tue Apr 01 10:30:57 2008 -0600 +++ b/extras/mini-os/gnttab.c Tue Apr 01 11:29:03 2008 -0600 @@ -32,6 +32,9 @@ static grant_entry_t *gnttab_table; static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +#ifdef GNT_DEBUG +static char inuse[NR_GRANT_ENTRIES]; +#endif static __DECLARE_SEMAPHORE_GENERIC(gnttab_sem, NR_GRANT_ENTRIES); static void @@ -39,6 +42,10 @@ put_free_entry(grant_ref_t ref) { unsigned long flags; local_irq_save(flags); +#ifdef GNT_DEBUG + BUG_ON(!inuse[ref]); + inuse[ref] = 0; +#endif gnttab_list[ref] = gnttab_list[0]; gnttab_list[0] = ref; local_irq_restore(flags); @@ -54,6 +61,10 @@ get_free_entry(void) local_irq_save(flags); ref = gnttab_list[0]; gnttab_list[0] = gnttab_list[ref]; +#ifdef GNT_DEBUG + BUG_ON(inuse[ref]); + inuse[ref] = 1; +#endif local_irq_restore(flags); return ref; } @@ -92,10 +103,12 @@ gnttab_end_access(grant_ref_t ref) { u16 flags, nflags; + BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES); + nflags = gnttab_table[ref].flags; do { if ((flags = nflags) & (GTF_reading|GTF_writing)) { - printk("WARNING: g.e. still in use!\n"); + printk("WARNING: g.e. still in use! (%x)\n", flags); return 0; } } while ((nflags = synch_cmpxchg(&gnttab_table[ref].flags, flags, 0)) != @@ -110,6 +123,8 @@ gnttab_end_transfer(grant_ref_t ref) { unsigned long frame; u16 flags; + + BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES); while (!((flags = gnttab_table[ref].flags) & GTF_transfer_committed)) { if (synch_cmpxchg(&gnttab_table[ref].flags, flags, 0) == flags) { @@ -164,6 +179,9 @@ init_gnttab(void) unsigned long frames[NR_GRANT_FRAMES]; int i; +#ifdef GNT_DEBUG + memset(inuse, 1, sizeof(inuse)); +#endif for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) put_free_entry(i); diff -r daf16171a05f -r feee6422144f extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Tue Apr 01 10:30:57 2008 -0600 +++ b/extras/mini-os/kernel.c Tue Apr 01 11:29:03 2008 -0600 @@ -341,7 +341,7 @@ static void kbdfront_thread(void *p) { struct kbdfront_dev *kbd_dev; DEFINE_WAIT(w); - int x = WIDTH / 2, y = HEIGHT / 2, z; + int x = WIDTH / 2, y = HEIGHT / 2, z = 0; kbd_dev = init_kbdfront(NULL, 1); if (!kbd_dev) diff -r daf16171a05f -r feee6422144f extras/mini-os/minios.mk --- a/extras/mini-os/minios.mk Tue Apr 01 10:30:57 2008 -0600 +++ b/extras/mini-os/minios.mk Tue Apr 01 11:29:03 2008 -0600 @@ -16,6 +16,10 @@ DEF_LDFLAGS = ifeq ($(debug),y) DEF_CFLAGS += -g +#DEF_CFLAGS += -DMM_DEBUG +#DEF_CFLAGS += -DFS_DEBUG +#DEF_CFLAGS += -DLIBC_DEBUG +DEF_CFLAGS += -DGNT_DEBUG else DEF_CFLAGS += -O3 endif diff -r daf16171a05f -r feee6422144f extras/mini-os/netfront.c --- a/extras/mini-os/netfront.c Tue Apr 01 10:30:57 2008 -0600 +++ b/extras/mini-os/netfront.c Tue Apr 01 11:29:03 2008 -0600 @@ -120,6 +120,7 @@ moretodo: if (rx->status == NETIF_RSP_NULL) continue; int id = rx->id; + BUG_ON(id >= NET_TX_RING_SIZE); buf = &dev->rx_buffers[id]; page = (unsigned char*)buf->page; @@ -204,6 +205,7 @@ void network_tx_buf_gc(struct netfront_d printk("packet error\n"); id = txrsp->id; + BUG_ON(id >= NET_TX_RING_SIZE); struct net_buffer* buf = &dev->tx_buffers[id]; gnttab_end_access(buf->gref); buf->gref=GRANT_INVALID_REF; @@ -510,6 +512,8 @@ void netfront_xmit(struct netfront_dev * struct net_buffer* buf; void* page; + BUG_ON(len > PAGE_SIZE); + down(&dev->tx_sem); local_irq_save(flags); diff -r daf16171a05f -r feee6422144f tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/blktap/drivers/block-qcow.c Tue Apr 01 11:29:03 2008 -0600 @@ -76,6 +76,7 @@ #define QCOW_OFLAG_COMPRESSED (1LL << 63) #define SPARSE_FILE 0x01 +#define EXTHDR_L1_BIG_ENDIAN 0x02 #ifndef O_BINARY #define O_BINARY 0 @@ -147,19 +148,30 @@ static int decompress_cluster(struct tdq static uint32_t gen_cksum(char *ptr, int len) { + int i; unsigned char *md; uint32_t ret; md = malloc(MD5_DIGEST_LENGTH); if(!md) return 0; - - if (MD5((unsigned char *)ptr, len, md) != md) { - free(md); - return 0; - } - - memcpy(&ret, md, sizeof(uint32_t)); + + /* Convert L1 table to big endian */ + for(i = 0; i < len / sizeof(uint64_t); i++) { + cpu_to_be64s(&((uint64_t*) ptr)[i]); + } + + /* Generate checksum */ + if (MD5((unsigned char *)ptr, len, md) != md) + ret = 0; + else + memcpy(&ret, md, sizeof(uint32_t)); + + /* Convert L1 table back to native endianess */ + for(i = 0; i < len / sizeof(uint64_t); i++) { + be64_to_cpus(&((uint64_t*) ptr)[i]); + } + free(md); return ret; } @@ -354,7 +366,8 @@ static uint64_t get_cluster_offset(struc int n_start, int n_end) { int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector; - char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr; + char *tmp_ptr2, *l2_ptr, *l1_ptr; + uint64_t *tmp_ptr; uint64_t l2_offset, *l2_table, cluster_offset, tmp; uint32_t min_count; int new_l2_table; @@ -400,6 +413,11 @@ static uint64_t get_cluster_offset(struc DPRINTF("ERROR allocating memory for L1 table\n"); } memcpy(tmp_ptr, l1_ptr, 4096); + + /* Convert block to write to big endian */ + for(i = 0; i < 4096 / sizeof(uint64_t); i++) { + cpu_to_be64s(&tmp_ptr[i]); + } /* * Issue non-asynchronous L1 write. @@ -777,7 +795,7 @@ int tdqcow_open (struct disk_driver *dd, goto fail; for(i = 0; i < s->l1_size; i++) { - //be64_to_cpus(&s->l1_table[i]); + be64_to_cpus(&s->l1_table[i]); //DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]); if (s->l1_table[i] > final_cluster) final_cluster = s->l1_table[i]; @@ -810,6 +828,38 @@ int tdqcow_open (struct disk_driver *dd, be32_to_cpus(&exthdr->xmagic); if(exthdr->xmagic != XEN_MAGIC) goto end_xenhdr; + + /* Try to detect old tapdisk images. They have to be fixed because + * they don't use big endian but native endianess for the L1 table */ + if ((exthdr->flags & EXTHDR_L1_BIG_ENDIAN) == 0) { + + /* + The image is broken. Fix it. The L1 table has already been + byte-swapped, so we can write it to the image file as it is + currently in memory. Then swap it back to native endianess + for operation. + */ + + DPRINTF("qcow: Converting image to big endian L1 table\n"); + + lseek(fd, s->l1_table_offset, SEEK_SET); + if (write(fd, s->l1_table, l1_table_size) != l1_table_size) { + DPRINTF("qcow: Failed to write new L1 table\n"); + goto fail; + } + + for(i = 0;i < s->l1_size; i++) { + cpu_to_be64s(&s->l1_table[i]); + } + + /* Write the big endian flag to the extended header */ + exthdr->flags |= EXTHDR_L1_BIG_ENDIAN; + + if (write(fd, buf, 512) != 512) { + DPRINTF("qcow: Failed to write extended header\n"); + goto fail; + } + } /*Finally check the L1 table cksum*/ be32_to_cpus(&exthdr->cksum); diff -r daf16171a05f -r feee6422144f tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/firmware/hvmloader/Makefile Tue Apr 01 11:29:03 2008 -0600 @@ -42,16 +42,21 @@ OBJS = $(patsubst %.c,%.o,$(SRCS)) .PHONY: all all: hvmloader -hvmloader: roms.h subdirs-all $(SRCS) - $(CC) $(CFLAGS) -c $(SRCS) - $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) -o hvmloader.tmp $(OBJS) acpi/acpi.a +smbios.o: CFLAGS += -D__SMBIOS_DATE__="\"$(shell date +%m/%d/%Y)\"" + +hvmloader: roms.h subdirs-all $(OBJS) + $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) \ + -o hvmloader.tmp $(OBJS) acpi/acpi.a $(OBJCOPY) hvmloader.tmp hvmloader rm -f hvmloader.tmp -roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../etherboot/eb-roms.h ../extboot/extboot.bin +roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin \ + ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../etherboot/eb-roms.h \ + ../extboot/extboot.bin sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h - sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h + sh ./mkhex vgabios_cirrusvga \ + ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h cat ../etherboot/eb-roms.h >> roms.h sh ./mkhex extboot ../extboot/extboot.bin >> roms.h diff -r daf16171a05f -r feee6422144f tools/firmware/hvmloader/hvmloader.c --- a/tools/firmware/hvmloader/hvmloader.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/firmware/hvmloader/hvmloader.c Tue Apr 01 11:29:03 2008 -0600 @@ -420,6 +420,8 @@ int main(void) init_hypercalls(); + printf("CPU speed is %u MHz\n", get_cpu_mhz()); + printf("Writing SMBIOS tables ...\n"); smbios_sz = hvm_write_smbios_tables(); diff -r daf16171a05f -r feee6422144f tools/firmware/hvmloader/smbios.c --- a/tools/firmware/hvmloader/smbios.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/firmware/hvmloader/smbios.c Tue Apr 01 11:29:03 2008 -0600 @@ -21,6 +21,7 @@ */ #include <stdint.h> +#include <xen/xen.h> #include <xen/version.h> #include "smbios_types.h" #include "util.h" @@ -246,13 +247,14 @@ smbios_entry_point_init(void *start, int i; struct smbios_entry_point *ep = (struct smbios_entry_point *)start; + memset(ep, 0, sizeof(*ep)); + strncpy(ep->anchor_string, "_SM_", 4); ep->length = 0x1f; ep->smbios_major_version = 2; ep->smbios_minor_version = 4; ep->max_structure_size = max_structure_size; ep->entry_point_revision = 0; - memset(ep->formatted_area, 0, 5); strncpy(ep->intermediate_anchor_string, "_DMI_", 5); ep->structure_table_length = structure_table_length; @@ -260,9 +262,6 @@ smbios_entry_point_init(void *start, ep->number_of_structures = number_of_structures; ep->smbios_bcd_revision = 0x24; - ep->checksum = 0; - ep->intermediate_checksum = 0; - sum = 0; for ( i = 0; i < 0x10; i++ ) sum += ((int8_t *)start)[i]; @@ -280,22 +279,27 @@ smbios_type_0_init(void *start, const ch uint32_t xen_major_version, uint32_t xen_minor_version) { struct smbios_type_0 *p = (struct smbios_type_0 *)start; - + static const char *smbios_release_date = __SMBIOS_DATE__; + + memset(p, 0, sizeof(*p)); + p->header.type = 0; p->header.length = sizeof(struct smbios_type_0); p->header.handle = 0; - + p->vendor_str = 1; p->version_str = 2; p->starting_address_segment = 0xe800; - p->release_date_str = 0; + p->release_date_str = 3; p->rom_size = 0; - - memset(p->characteristics, 0, 8); - p->characteristics[7] = 0x08; /* BIOS characteristics not supported */ - p->characteristics_extension_bytes[0] = 0; - p->characteristics_extension_bytes[1] = 0; - + + /* BIOS Characteristics. */ + p->characteristics[0] = 0x80; /* PCI is supported */ + p->characteristics[2] = 0x08; /* EDD is supported */ + + /* Extended Characteristics: Enable Targeted Content Distribution. */ + p->characteristics_extension_bytes[1] = 0x04; + p->major_release = (uint8_t) xen_major_version; p->minor_release = (uint8_t) xen_minor_version; p->embedded_controller_major = 0xff; @@ -306,6 +310,8 @@ smbios_type_0_init(void *start, const ch start += strlen("Xen") + 1; strcpy((char *)start, xen_version); start += strlen(xen_version) + 1; + strcpy((char *)start, smbios_release_date); + start += strlen(smbios_release_date) + 1; *((uint8_t *)start) = 0; return start + 1; @@ -318,6 +324,9 @@ smbios_type_1_init(void *start, const ch { char uuid_str[37]; struct smbios_type_1 *p = (struct smbios_type_1 *)start; + + memset(p, 0, sizeof(*p)); + p->header.type = 1; p->header.length = sizeof(struct smbios_type_1); p->header.handle = 0x100; @@ -355,6 +364,8 @@ smbios_type_3_init(void *start) { struct smbios_type_3 *p = (struct smbios_type_3 *)start; + memset(p, 0, sizeof(*p)); + p->header.type = 3; p->header.length = sizeof(struct smbios_type_3); p->header.handle = 0x300; @@ -379,11 +390,14 @@ smbios_type_3_init(void *start) /* Type 4 -- Processor Information */ static void * -smbios_type_4_init(void *start, unsigned int cpu_number, char *cpu_manufacturer) +smbios_type_4_init( + void *start, unsigned int cpu_number, char *cpu_manufacturer) { char buf[80]; struct smbios_type_4 *p = (struct smbios_type_4 *)start; uint32_t eax, ebx, ecx, edx; + + memset(p, 0, sizeof(*p)); p->header.type = 4; p->header.length = sizeof(struct smbios_type_4); @@ -403,8 +417,7 @@ smbios_type_4_init(void *start, unsigned p->voltage = 0; p->external_clock = 0; - p->max_speed = 0; /* unknown */ - p->current_speed = 0; /* unknown */ + p->max_speed = p->current_speed = get_cpu_mhz(); p->status = 0x41; /* socket populated, CPU enabled */ p->upgrade = 0x01; /* other */ @@ -430,6 +443,8 @@ smbios_type_16_init(void *start, uint32_ smbios_type_16_init(void *start, uint32_t memsize) { struct smbios_type_16 *p = (struct smbios_type_16*)start; + + memset(p, 0, sizeof(*p)); p->header.type = 16; p->header.handle = 0x1000; @@ -453,6 +468,8 @@ smbios_type_17_init(void *start, uint32_ { struct smbios_type_17 *p = (struct smbios_type_17 *)start; + memset(p, 0, sizeof(*p)); + p->header.type = 17; p->header.length = sizeof(struct smbios_type_17); p->header.handle = 0x1100; @@ -484,6 +501,8 @@ smbios_type_19_init(void *start, uint32_ { struct smbios_type_19 *p = (struct smbios_type_19 *)start; + memset(p, 0, sizeof(*p)); + p->header.type = 19; p->header.length = sizeof(struct smbios_type_19); p->header.handle = 0x1300; @@ -503,6 +522,8 @@ smbios_type_20_init(void *start, uint32_ smbios_type_20_init(void *start, uint32_t memory_size_mb) { struct smbios_type_20 *p = (struct smbios_type_20 *)start; + + memset(p, 0, sizeof(*p)); p->header.type = 20; p->header.length = sizeof(struct smbios_type_20); @@ -528,6 +549,8 @@ smbios_type_32_init(void *start) { struct smbios_type_32 *p = (struct smbios_type_32 *)start; + memset(p, 0, sizeof(*p)); + p->header.type = 32; p->header.length = sizeof(struct smbios_type_32); p->header.handle = 0x2000; @@ -544,6 +567,8 @@ smbios_type_127_init(void *start) smbios_type_127_init(void *start) { struct smbios_type_127 *p = (struct smbios_type_127 *)start; + + memset(p, 0, sizeof(*p)); p->header.type = 127; p->header.length = sizeof(struct smbios_type_127); diff -r daf16171a05f -r feee6422144f tools/firmware/hvmloader/util.c --- a/tools/firmware/hvmloader/util.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/firmware/hvmloader/util.c Tue Apr 01 11:29:03 2008 -0600 @@ -21,7 +21,10 @@ #include "util.h" #include "config.h" #include "e820.h" +#include "hypercall.h" #include <stdint.h> +#include <xen/xen.h> +#include <xen/memory.h> #include <xen/hvm/hvm_info_table.h> void outb(uint16_t addr, uint8_t val) @@ -585,6 +588,56 @@ int get_apic_mode(void) return (t ? t->apic_mode : 1); } +uint16_t get_cpu_mhz(void) +{ + struct xen_add_to_physmap xatp; + struct shared_info *shared_info = (struct shared_info *)0xa0000; + struct vcpu_time_info *info = &shared_info->vcpu_info[0].time; + uint64_t cpu_khz; + uint32_t tsc_to_nsec_mul, version; + int8_t tsc_shift; + + static uint16_t cpu_mhz; + if ( cpu_mhz != 0 ) + return cpu_mhz; + + /* Map shared-info page to 0xa0000 (i.e., overlap VGA hole). */ + xatp.domid = DOMID_SELF; + xatp.space = XENMAPSPACE_shared_info; + xatp.idx = 0; + xatp.gpfn = (unsigned long)shared_info >> 12; + if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + BUG(); + + /* Get a consistent snapshot of scale factor (multiplier and shift). */ + do { + version = info->version; + rmb(); + tsc_to_nsec_mul = info->tsc_to_system_mul; + tsc_shift = info->tsc_shift; + rmb(); + } while ((version & 1) | (version ^ info->version)); + + /* Compute CPU speed in kHz. */ + cpu_khz = 1000000ull << 32; + do_div(cpu_khz, tsc_to_nsec_mul); + if ( tsc_shift < 0 ) + cpu_khz = cpu_khz << -tsc_shift; + else + cpu_khz = cpu_khz >> tsc_shift; + + /* Get the VGA MMIO hole back by remapping shared info to scratch. */ + xatp.domid = DOMID_SELF; + xatp.space = XENMAPSPACE_shared_info; + xatp.idx = 0; + xatp.gpfn = 0xfffff; /* scratch pfn */ + if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) + BUG(); + + cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000); + return cpu_mhz; +} + /* * Local variables: * mode: C diff -r daf16171a05f -r feee6422144f tools/firmware/hvmloader/util.h --- a/tools/firmware/hvmloader/util.h Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/firmware/hvmloader/util.h Tue Apr 01 11:29:03 2008 -0600 @@ -10,11 +10,11 @@ #undef NULL #define NULL ((void*)0) -extern void __assert_failed(char *assertion, char *file, int line) +void __assert_failed(char *assertion, char *file, int line) __attribute__((noreturn)); #define ASSERT(p) \ do { if (!(p)) __assert_failed(#p, __FILE__, __LINE__); } while (0) -extern void __bug(char *file, int line) __attribute__((noreturn)); +void __bug(char *file, int line) __attribute__((noreturn)); #define BUG() __bug(__FILE__, __LINE__) #define BUG_ON(p) do { if (p) BUG(); } while (0) #define BUILD_BUG_ON(p) ((void)sizeof(char[1 - 2 * !!(p)])) @@ -49,9 +49,53 @@ void pci_write(uint32_t devfn, uint32_t #define pci_writew(devfn, reg, val) (pci_write(devfn, reg, 2, (uint16_t)val)) #define pci_writel(devfn, reg, val) (pci_write(devfn, reg, 4, (uint32_t)val)) +/* Get CPU speed in MHz. */ +uint16_t get_cpu_mhz(void); + /* Do cpuid instruction, with operation 'idx' */ void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); + +/* Read the TSC register. */ +static inline uint64_t rdtsc(void) +{ + uint64_t tsc; + asm volatile ( "rdtsc" : "=A" (tsc) ); + return tsc; +} + +/* Relax the CPU and let the compiler know that time passes. */ +static inline void cpu_relax(void) +{ + asm volatile ( "rep ; nop" : : : "memory" ); +} + +/* Memory barriers. */ +#define barrier() asm volatile ( "" : : : "memory" ) +#define rmb() barrier() +#define wmb() barrier() + +/* + * Divide a 64-bit dividend by a 32-bit divisor. + * (1) Overwrites the 64-bit dividend _in_place_ with the quotient + * (2) Returns the 32-bit remainder + */ +#define do_div(n, base) ({ \ + unsigned long __upper, __low, __high, __mod, __base; \ + __base = (base); \ + asm ( "" : "=a" (__low), "=d" (__high) : "A" (n) ); \ + __upper = __high; \ + if ( __high ) \ + { \ + __upper = __high % (__base); \ + __high = __high / (__base); \ + } \ + asm ( "divl %2" \ + : "=a" (__low), "=d" (__mod) \ + : "rm" (__base), "0" (__low), "1" (__upper) ); \ + asm ( "" : "=A" (n) : "a" (__low), "d" (__high) ); \ + __mod; \ +}) /* HVM-builder info. */ int get_vcpu_nr(void); diff -r daf16171a05f -r feee6422144f tools/include/xen-foreign/mkheader.py --- a/tools/include/xen-foreign/mkheader.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/include/xen-foreign/mkheader.py Tue Apr 01 11:29:03 2008 -0600 @@ -37,8 +37,8 @@ inttypes["x86_64"] = { "xen_pfn_t" : "__align8__ uint64_t", }; header["x86_64"] = """ -#ifdef __GNUC__ -# define __DECL_REG(name) __extension__ union { uint64_t r ## name, e ## name; } +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +# define __DECL_REG(name) union { uint64_t r ## name, e ## name; } # define __align8__ __attribute__((aligned (8))) #else # define __DECL_REG(name) uint64_t r ## name diff -r daf16171a05f -r feee6422144f tools/ioemu/block-qcow.c --- a/tools/ioemu/block-qcow.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/block-qcow.c Tue Apr 01 11:29:03 2008 -0600 @@ -37,6 +37,11 @@ #define QCOW_OFLAG_COMPRESSED (1LL << 63) +#define XEN_MAGIC (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb) + +#define EXTHDR_SPARSE_FILE 0x01 +#define EXTHDR_L1_BIG_ENDIAN 0x02 + typedef struct QCowHeader { uint32_t magic; uint32_t version; @@ -49,6 +54,14 @@ typedef struct QCowHeader { uint32_t crypt_method; uint64_t l1_table_offset; } QCowHeader; + +/*Extended header for Xen enhancements*/ +typedef struct QCowHeader_ext { + uint32_t xmagic; + uint32_t cksum; + uint32_t min_cluster_alloc; + uint32_t flags; +} QCowHeader_ext; #define L2_CACHE_SIZE 16 @@ -137,6 +150,51 @@ static int qcow_open(BlockDriverState *b if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != s->l1_size * sizeof(uint64_t)) goto fail; + + /* Try to detect old tapdisk images. They have to be fixed because they + * don't use big endian but native endianess for the L1 table */ + if (header.backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) { + + QCowHeader_ext exthdr; + uint64_t l1_bytes = s->l1_size * sizeof(uint64_t); + + if (bdrv_pread(s->hd, sizeof(header), &exthdr, sizeof(exthdr)) + != sizeof(exthdr)) + goto end_xenhdr; + + be32_to_cpus(&exthdr.xmagic); + if (exthdr.xmagic != XEN_MAGIC) + goto end_xenhdr; + + be32_to_cpus(&exthdr.flags); + if (exthdr.flags & EXTHDR_L1_BIG_ENDIAN) + goto end_xenhdr; + + /* The image is broken. Fix it. */ + fprintf(stderr, "qcow: Converting image to big endian L1 table\n"); + + for(i = 0;i < s->l1_size; i++) { + cpu_to_be64s(&s->l1_table[i]); + } + + if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, + l1_bytes) != l1_bytes) { + fprintf(stderr, "qcow: Failed to write new L1 table\n"); + goto fail; + } + + exthdr.flags |= EXTHDR_L1_BIG_ENDIAN; + cpu_to_be32s(&exthdr.flags); + + if (bdrv_pwrite(s->hd, sizeof(header), &exthdr, sizeof(exthdr)) + != sizeof(exthdr)) { + fprintf(stderr, "qcow: Failed to write extended header\n"); + goto fail; + } + } +end_xenhdr: + + /* L1 table is big endian now */ for(i = 0;i < s->l1_size; i++) { be64_to_cpus(&s->l1_table[i]); } @@ -725,6 +783,13 @@ static void qcow_aio_cancel(BlockDriverA qemu_aio_release(acb); } +static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVQcowState *s = bs->opaque; + return bdrv_aio_flush(s->hd, cb, opaque); +} + static void qcow_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -869,10 +934,10 @@ static int qcow_write_compressed(BlockDr return 0; } -static void qcow_flush(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - bdrv_flush(s->hd); +static int qcow_flush(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + return bdrv_flush(s->hd); } static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -899,6 +964,7 @@ BlockDriver bdrv_qcow = { .bdrv_aio_read = qcow_aio_read, .bdrv_aio_write = qcow_aio_write, .bdrv_aio_cancel = qcow_aio_cancel, + .bdrv_aio_flush = qcow_aio_flush, .aiocb_size = sizeof(QCowAIOCB), .bdrv_write_compressed = qcow_write_compressed, .bdrv_get_info = qcow_get_info, diff -r daf16171a05f -r feee6422144f tools/ioemu/block-qcow2.c --- a/tools/ioemu/block-qcow2.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/block-qcow2.c Tue Apr 01 11:29:03 2008 -0600 @@ -1007,6 +1007,13 @@ static void qcow_aio_cancel(BlockDriverA qemu_aio_release(acb); } +static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVQcowState *s = bs->opaque; + return bdrv_aio_flush(s->hd, cb, opaque); +} + static void qcow_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -1228,10 +1235,10 @@ static int qcow_write_compressed(BlockDr return 0; } -static void qcow_flush(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - bdrv_flush(s->hd); +static int qcow_flush(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + return bdrv_flush(s->hd); } static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -1886,6 +1893,8 @@ static int grow_refcount_table(BlockDriv int64_t table_offset; uint64_t data64; uint32_t data32; + int old_table_size; + int64_t old_table_offset; if (min_size <= s->refcount_table_size) return 0; @@ -1931,10 +1940,14 @@ static int grow_refcount_table(BlockDriv &data32, sizeof(data32)) != sizeof(data32)) goto fail; qemu_free(s->refcount_table); + old_table_offset = s->refcount_table_offset; + old_table_size = s->refcount_table_size; s->refcount_table = new_table; s->refcount_table_size = new_table_size; + s->refcount_table_offset = table_offset; update_refcount(bs, table_offset, new_table_size2, 1); + free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t)); return 0; fail: free_clusters(bs, table_offset, new_table_size2); @@ -2235,6 +2248,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_aio_read = qcow_aio_read, .bdrv_aio_write = qcow_aio_write, .bdrv_aio_cancel = qcow_aio_cancel, + .bdrv_aio_flush = qcow_aio_flush, .aiocb_size = sizeof(QCowAIOCB), .bdrv_write_compressed = qcow_write_compressed, diff -r daf16171a05f -r feee6422144f tools/ioemu/block-raw.c --- a/tools/ioemu/block-raw.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/block-raw.c Tue Apr 01 11:29:03 2008 -0600 @@ -496,6 +496,21 @@ static void raw_aio_cancel(BlockDriverAI pacb = &acb->next; } } + +static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + RawAIOCB *acb; + + acb = raw_aio_setup(bs, 0, NULL, 0, cb, opaque); + if (!acb) + return NULL; + if (aio_fsync(O_SYNC, &acb->aiocb) < 0) { + qemu_aio_release(acb); + return NULL; + } + return &acb->common; +} #endif static void raw_close(BlockDriverState *bs) @@ -600,10 +615,12 @@ static int raw_create(const char *filena return 0; } -static void raw_flush(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - fsync(s->fd); +static int raw_flush(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + if (fsync(s->fd)) + return errno; + return 0; } BlockDriver bdrv_raw = { @@ -621,6 +638,7 @@ BlockDriver bdrv_raw = { .bdrv_aio_read = raw_aio_read, .bdrv_aio_write = raw_aio_write, .bdrv_aio_cancel = raw_aio_cancel, + .bdrv_aio_flush = raw_aio_flush, .aiocb_size = sizeof(RawAIOCB), #endif .protocol_name = "file", @@ -959,6 +977,7 @@ BlockDriver bdrv_host_device = { .bdrv_aio_read = raw_aio_read, .bdrv_aio_write = raw_aio_write, .bdrv_aio_cancel = raw_aio_cancel, + .bdrv_aio_flush = raw_aio_flush, .aiocb_size = sizeof(RawAIOCB), #endif .bdrv_pread = raw_pread, diff -r daf16171a05f -r feee6422144f tools/ioemu/block-vmdk.c --- a/tools/ioemu/block-vmdk.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/block-vmdk.c Tue Apr 01 11:29:03 2008 -0600 @@ -734,10 +734,10 @@ static void vmdk_close(BlockDriverState vmdk_parent_close(s->hd); } -static void vmdk_flush(BlockDriverState *bs) -{ - BDRVVmdkState *s = bs->opaque; - bdrv_flush(s->hd); +static int vmdk_flush(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + return bdrv_flush(s->hd); } BlockDriver bdrv_vmdk = { diff -r daf16171a05f -r feee6422144f tools/ioemu/block.c --- a/tools/ioemu/block.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/block.c Tue Apr 01 11:29:03 2008 -0600 @@ -48,6 +48,8 @@ static BlockDriverAIOCB *bdrv_aio_write_ int64_t sector_num, const uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb); +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, @@ -155,6 +157,8 @@ void bdrv_register(BlockDriver *bdrv) bdrv->bdrv_read = bdrv_read_em; bdrv->bdrv_write = bdrv_write_em; } + if (!bdrv->bdrv_aio_flush) + bdrv->bdrv_aio_flush = bdrv_aio_flush_em; bdrv->next = first_drv; first_drv = bdrv; } @@ -885,12 +889,14 @@ const char *bdrv_get_device_name(BlockDr return bs->device_name; } -void bdrv_flush(BlockDriverState *bs) -{ - if (bs->drv->bdrv_flush) - bs->drv->bdrv_flush(bs); - if (bs->backing_hd) - bdrv_flush(bs->backing_hd); +int bdrv_flush(BlockDriverState *bs) +{ + int ret = 0; + if (bs->drv->bdrv_flush) + ret = bs->drv->bdrv_flush(bs); + if (!ret && bs->backing_hd) + ret = bdrv_flush(bs->backing_hd); + return ret; } void bdrv_info(void) @@ -1138,6 +1144,17 @@ void bdrv_aio_cancel(BlockDriverAIOCB *a drv->bdrv_aio_cancel(acb); } +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockDriver *drv = bs->drv; + + if (!drv) + return NULL; + + return drv->bdrv_aio_flush(bs, cb, opaque); +} + /**************************************************************/ /* async block device emulation */ @@ -1213,6 +1230,15 @@ static void bdrv_aio_cancel_em(BlockDriv qemu_aio_release(acb); } #endif /* !QEMU_TOOL */ + +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + ret = bdrv_flush(bs); + cb(opaque, ret); + return NULL; +} /**************************************************************/ /* sync block device emulation */ diff -r daf16171a05f -r feee6422144f tools/ioemu/block_int.h --- a/tools/ioemu/block_int.h Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/block_int.h Tue Apr 01 11:29:03 2008 -0600 @@ -36,7 +36,7 @@ struct BlockDriver { void (*bdrv_close)(BlockDriverState *bs); int (*bdrv_create)(const char *filename, int64_t total_sectors, const char *backing_file, int flags); - void (*bdrv_flush)(BlockDriverState *bs); + int (*bdrv_flush)(BlockDriverState *bs); int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int (*bdrv_set_key)(BlockDriverState *bs, const char *key); @@ -49,6 +49,8 @@ struct BlockDriver { int64_t sector_num, const uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb); + BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); int aiocb_size; const char *protocol_name; diff -r daf16171a05f -r feee6422144f tools/ioemu/hw/ide.c --- a/tools/ioemu/hw/ide.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/hw/ide.c Tue Apr 01 11:29:03 2008 -0600 @@ -751,6 +751,7 @@ static inline void ide_set_irq(IDEState static inline void ide_set_irq(IDEState *s) { BMDMAState *bm = s->bmdma; + if (!s->bs) return; /* yikes */ if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) { if (bm) { bm->status |= BM_STATUS_INT; @@ -916,6 +917,8 @@ static void ide_read_dma_cb(void *opaque int n; int64_t sector_num; + if (!s->bs) return; /* yikes */ + n = s->io_buffer_size >> 9; sector_num = ide_get_sector(s); if (n > 0) { @@ -1024,6 +1027,8 @@ static void ide_write_dma_cb(void *opaqu int n; int64_t sector_num; + if (!s->bs) return; /* yikes */ + n = s->io_buffer_size >> 9; sector_num = ide_get_sector(s); if (n > 0) { @@ -1070,6 +1075,39 @@ static void ide_sector_write_dma(IDEStat s->io_buffer_index = 0; s->io_buffer_size = 0; ide_dma_start(s, ide_write_dma_cb); +} + +static void ide_device_utterly_broken(IDEState *s) { + s->status |= BUSY_STAT; + s->bs = NULL; + /* This prevents all future commands from working. All of the + * asynchronous callbacks (and ide_set_irq, as a safety measure) + * check to see whether this has happened and bail if so. + */ +} + +static void ide_flush_cb(void *opaque, int ret) +{ + IDEState *s = opaque; + + if (!s->bs) return; /* yikes */ + + if (ret) { + /* We are completely doomed. The IDE spec does not permit us + * to return an error from a flush except via a protocol which + * requires us to say where the error is and which + * contemplates the guest repeating the flush attempt to + * attempt flush the remaining data. We can't support that + * because f(data)sync (which is what the block drivers use + * eventually) doesn't report the necessary information or + * give us the necessary control. So we make the disk vanish. + */ + ide_device_utterly_broken(s); + return; + } + else + s->status = READY_STAT; + ide_set_irq(s); } static void ide_atapi_cmd_ok(IDEState *s) @@ -1297,6 +1335,8 @@ static void ide_atapi_cmd_read_dma_cb(vo BMDMAState *bm = opaque; IDEState *s = bm->ide_if; int data_offset, n; + + if (!s->bs) return; /* yikes */ if (ret < 0) { ide_atapi_io_error(s, ret); @@ -1703,6 +1743,8 @@ static void cdrom_change_cb(void *opaque IDEState *s = opaque; int64_t nb_sectors; + if (!s->bs) return; /* yikes */ + /* XXX: send interrupt too */ bdrv_get_geometry(s->bs, &nb_sectors); s->nb_sectors = nb_sectors; @@ -1744,6 +1786,7 @@ static void ide_ioport_write(void *opaqu IDEState *s; int unit, n; int lba48 = 0; + int ret; #ifdef DEBUG_IDE printf("IDE: write addr=0x%x val=0x%02x\n", addr, val); @@ -1806,8 +1849,8 @@ static void ide_ioport_write(void *opaqu printf("ide: CMD=%02x\n", val); #endif s = ide_if->cur_drive; - /* ignore commands to non existant slave */ - if (s != ide_if && !s->bs) + /* ignore commands to non existant device */ + if (!s->bs) break; switch(val) { @@ -1976,10 +2019,8 @@ static void ide_ioport_write(void *opaqu break; case WIN_FLUSH_CACHE: case WIN_FLUSH_CACHE_EXT: - if (s->bs) - bdrv_flush(s->bs); - s->status = READY_STAT; - ide_set_irq(s); + s->status = BUSY_STAT; + bdrv_aio_flush(s->bs, ide_flush_cb, s); break; case WIN_IDLEIMMEDIATE: case WIN_STANDBY: @@ -2723,6 +2764,7 @@ static void pci_ide_save(QEMUFile* f, vo if (s->identify_set) { qemu_put_buffer(f, (const uint8_t *)s->identify_data, 512); } + qemu_put_8s(f, &s->write_cache); qemu_put_8s(f, &s->feature); qemu_put_8s(f, &s->error); qemu_put_be32s(f, &s->nsector); @@ -2749,7 +2791,7 @@ static int pci_ide_load(QEMUFile* f, voi PCIIDEState *d = opaque; int ret, i; - if (version_id != 1) + if (version_id != 1 && version_id != 2) return -EINVAL; ret = pci_device_load(&d->dev, f); if (ret < 0) @@ -2780,6 +2822,8 @@ static int pci_ide_load(QEMUFile* f, voi if (s->identify_set) { qemu_get_buffer(f, (uint8_t *)s->identify_data, 512); } + if (version_id >= 2) + qemu_get_8s(f, &s->write_cache); qemu_get_8s(f, &s->feature); qemu_get_8s(f, &s->error); qemu_get_be32s(f, &s->nsector); @@ -2854,7 +2898,7 @@ void pci_piix_ide_init(PCIBus *bus, Bloc buffered_pio_init(); - register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d); + register_savevm("ide", 0, 2, pci_ide_save, pci_ide_load, d); } /* hd_table must contain 4 block drivers */ @@ -2895,7 +2939,7 @@ void pci_piix3_ide_init(PCIBus *bus, Blo buffered_pio_init(); - register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d); + register_savevm("ide", 0, 2, pci_ide_save, pci_ide_load, d); } /***********************************************************/ diff -r daf16171a05f -r feee6422144f tools/ioemu/hw/ne2000.c --- a/tools/ioemu/hw/ne2000.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/hw/ne2000.c Tue Apr 01 11:29:03 2008 -0600 @@ -207,7 +207,7 @@ static int ne2000_buffer_full(NE2000Stat index = s->curpag << 8; boundary = s->boundary << 8; - if (index <= boundary) + if (index < boundary) avail = boundary - index; else avail = (s->stop - s->start) - (index - boundary); diff -r daf16171a05f -r feee6422144f tools/ioemu/hw/scsi-disk.c --- a/tools/ioemu/hw/scsi-disk.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/hw/scsi-disk.c Tue Apr 01 11:29:03 2008 -0600 @@ -291,6 +291,7 @@ int32_t scsi_send_command(SCSIDevice *s, uint8_t command; uint8_t *outbuf; SCSIRequest *r; + int ret; command = buf[0]; r = scsi_find_request(s, tag); @@ -496,7 +497,12 @@ int32_t scsi_send_command(SCSIDevice *s, break; case 0x35: DPRINTF("Syncronise cache (sector %d, count %d)\n", lba, len); - bdrv_flush(s->bdrv); + ret = bdrv_flush(s->bdrv); + if (ret) { + DPRINTF("IO error on bdrv_flush\n"); + scsi_command_complete(r, SENSE_HARDWARE_ERROR); + return 0; + } break; case 0x43: { diff -r daf16171a05f -r feee6422144f tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/hw/vga.c Tue Apr 01 11:29:03 2008 -0600 @@ -1486,7 +1486,7 @@ static void vga_draw_graphic(VGAState *s static void vga_draw_graphic(VGAState *s, int full_update) { int y1, y, update, linesize, y_start, double_scan, mask, depth; - int width, height, shift_control, line_offset, bwidth, changed_flag; + int width, height, shift_control, line_offset, bwidth, ds_depth; ram_addr_t page0, page1; int disp_width, multi_scan, multi_run; uint8_t *d; @@ -1499,13 +1499,13 @@ static void vga_draw_graphic(VGAState *s s->get_resolution(s, &width, &height); disp_width = width; - changed_flag = 0; + ds_depth = s->ds->depth; depth = s->get_bpp(s); if (s->ds->dpy_colourdepth != NULL && - (s->ds->depth != depth || !s->ds->shared_buf)) { + (ds_depth != depth || !s->ds->shared_buf)) s->ds->dpy_colourdepth(s->ds, depth); - changed_flag = 1; - } + if (ds_depth != s->ds->depth) full_update = 1; + s->rgb_to_pixel = rgb_to_pixel_dup_table[get_depth_index(s->ds)]; @@ -1569,17 +1569,18 @@ static void vga_draw_graphic(VGAState *s } vga_draw_line = vga_draw_line_table[v * NB_DEPTHS + get_depth_index(s->ds)]; - if (disp_width != s->last_width || + if (s->line_offset != s->last_line_offset || + disp_width != s->last_width || height != s->last_height) { dpy_resize(s->ds, disp_width, height, s->line_offset); s->last_scr_width = disp_width; s->last_scr_height = height; s->last_width = disp_width; s->last_height = height; + s->last_line_offset = s->line_offset; full_update = 1; - changed_flag = 1; - } - if (s->ds->shared_buf && (changed_flag || s->ds->data != s->vram_ptr + (s->start_addr * 4))) + } + if (s->ds->shared_buf && (full_update || s->ds->data != s->vram_ptr + (s->start_addr * 4))) s->ds->dpy_setdata(s->ds, s->vram_ptr + (s->start_addr * 4)); if (!s->ds->shared_buf && s->cursor_invalidate) s->cursor_invalidate(s); @@ -2072,6 +2073,7 @@ void vga_common_init(VGAState *s, Displa s->vram_offset = vga_ram_offset; s->vram_size = vga_ram_size; s->ds = ds; + ds->palette = s->last_palette; s->get_bpp = vga_get_bpp; s->get_offsets = vga_get_offsets; s->get_resolution = vga_get_resolution; diff -r daf16171a05f -r feee6422144f tools/ioemu/hw/vga_int.h --- a/tools/ioemu/hw/vga_int.h Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/hw/vga_int.h Tue Apr 01 11:29:03 2008 -0600 @@ -129,6 +129,7 @@ uint32_t line_compare; \ uint32_t start_addr; \ uint32_t plane_updated; \ + uint32_t last_line_offset; \ uint8_t last_cw, last_ch; \ uint32_t last_width, last_height; /* in chars or pixels */ \ uint32_t last_scr_width, last_scr_height; /* in pixels */ \ diff -r daf16171a05f -r feee6422144f tools/ioemu/hw/xenfb.c --- a/tools/ioemu/hw/xenfb.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/hw/xenfb.c Tue Apr 01 11:29:03 2008 -0600 @@ -56,6 +56,7 @@ struct xenfb { int depth; /* colour depth of guest framebuffer */ int width; /* pixel width of guest framebuffer */ int height; /* pixel height of guest framebuffer */ + int offset; /* offset of the framebuffer */ int abs_pointer_wanted; /* Whether guest supports absolute pointer */ int button_state; /* Last seen pointer button state */ char protocol[64]; /* frontend protocol */ @@ -516,6 +517,18 @@ static void xenfb_on_fb_event(struct xen } xenfb_guest_copy(xenfb, x, y, w, h); break; + case XENFB_TYPE_RESIZE: + xenfb->width = event->resize.width; + xenfb->height = event->resize.height; + xenfb->depth = event->resize.depth; + xenfb->row_stride = event->resize.stride; + xenfb->offset = event->resize.offset; + dpy_colourdepth(xenfb->ds, xenfb->depth); + dpy_resize(xenfb->ds, xenfb->width, xenfb->height, xenfb->row_stride); + if (xenfb->ds->shared_buf) + dpy_setdata(xenfb->ds, xenfb->pixels + xenfb->offset); + xenfb_invalidate(xenfb); + break; } } xen_mb(); /* ensure we're done with ring contents */ @@ -680,6 +693,7 @@ static int xenfb_read_frontend_fb_config static int xenfb_read_frontend_fb_config(struct xenfb *xenfb) { struct xenfb_page *fb_page; int val; + int videoram; if (xenfb_xs_scanf1(xenfb->xsh, xenfb->fb.otherend, "feature-update", "%d", &val) < 0) @@ -702,10 +716,30 @@ static int xenfb_read_frontend_fb_config /* TODO check for consistency with the above */ xenfb->fb_len = fb_page->mem_length; xenfb->row_stride = fb_page->line_length; + + /* Protect against hostile frontend, limit fb_len to max allowed */ + if (xenfb_xs_scanf1(xenfb->xsh, xenfb->fb.nodename, "videoram", "%d", + &videoram) < 0) + videoram = 0; + videoram = videoram * 1024 * 1024; + if (videoram && xenfb->fb_len > videoram) { + fprintf(stderr, "Framebuffer requested length of %zd exceeded allowed %d\n", + xenfb->fb_len, videoram); + xenfb->fb_len = videoram; + if (xenfb->row_stride * xenfb->height > xenfb->fb_len) + xenfb->height = xenfb->fb_len / xenfb->row_stride; + } fprintf(stderr, "Framebuffer depth %d width %d height %d line %d\n", fb_page->depth, fb_page->width, fb_page->height, fb_page->line_length); if (xenfb_map_fb(xenfb, xenfb->fb.otherend_id) < 0) return -1; + + /* Indicate we have the frame buffer resize feature */ + xenfb_xs_printf(xenfb->xsh, xenfb->fb.nodename, "feature-resize", "1"); + + /* Tell kbd pointer the screen geometry */ + xenfb_xs_printf(xenfb->xsh, xenfb->kbd.nodename, "width", "%d", xenfb->width); + xenfb_xs_printf(xenfb->xsh, xenfb->kbd.nodename, "height", "%d", xenfb->height); if (xenfb_switch_state(&xenfb->fb, XenbusStateConnected)) return -1; @@ -1074,6 +1108,7 @@ static void xenfb_mouse_event(void *opaq #define BLT(SRC_T,DST_T,RSB,GSB,BSB,RDB,GDB,BDB) \ for (line = y ; line < (y+h) ; line++) { \ SRC_T *src = (SRC_T *)(xenfb->pixels \ + + xenfb->offset \ + (line * xenfb->row_stride) \ + (x * xenfb->depth / 8)); \ DST_T *dst = (DST_T *)(xenfb->ds->data \ @@ -1116,7 +1151,7 @@ static void xenfb_guest_copy(struct xenf if (xenfb->depth == xenfb->ds->depth) { /* Perfect match can use fast path */ for (line = y ; line < (y+h) ; line++) { memcpy(xenfb->ds->data + (line * xenfb->ds->linesize) + (x * xenfb->ds->depth / 8), - xenfb->pixels + (line * xenfb->row_stride) + (x * xenfb->depth / 8), + xenfb->pixels + xenfb->offset + (line * xenfb->row_stride) + (x * xenfb->depth / 8), w * xenfb->depth / 8); } } else { /* Mismatch requires slow pixel munging */ diff -r daf16171a05f -r feee6422144f tools/ioemu/sdl.c --- a/tools/ioemu/sdl.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/sdl.c Tue Apr 01 11:29:03 2008 -0600 @@ -85,19 +85,33 @@ static void opengl_setdata(DisplayState glPixelStorei(GL_UNPACK_LSB_FIRST, 1); switch (ds->depth) { case 8: - tex_format = GL_RGB; - tex_type = GL_UNSIGNED_BYTE_3_3_2; - glPixelStorei (GL_UNPACK_ALIGNMENT, 1); + if (ds->palette == NULL) { + tex_format = GL_RGB; + tex_type = GL_UNSIGNED_BYTE_3_3_2; + } else { + int i; + GLushort paletter[256], paletteg[256], paletteb[256]; + for (i = 0; i < 256; i++) { + uint8_t rgb = ds->palette[i] >> 16; + paletter[i] = ((rgb & 0xe0) >> 5) * 65535 / 7; + paletteg[i] = ((rgb & 0x1c) >> 2) * 65535 / 7; + paletteb[i] = (rgb & 0x3) * 65535 / 3; + } + glPixelMapusv(GL_PIXEL_MAP_I_TO_R, 256, paletter); + glPixelMapusv(GL_PIXEL_MAP_I_TO_G, 256, paletteg); + glPixelMapusv(GL_PIXEL_MAP_I_TO_B, 256, paletteb); + + tex_format = GL_COLOR_INDEX; + tex_type = GL_UNSIGNED_BYTE; + } break; case 16: tex_format = GL_RGB; tex_type = GL_UNSIGNED_SHORT_5_6_5; - glPixelStorei (GL_UNPACK_ALIGNMENT, 2); break; case 24: tex_format = GL_BGR; tex_type = GL_UNSIGNED_BYTE; - glPixelStorei (GL_UNPACK_ALIGNMENT, 1); break; case 32: if (!ds->bgr) { @@ -107,7 +121,6 @@ static void opengl_setdata(DisplayState tex_format = GL_RGBA; tex_type = GL_UNSIGNED_BYTE; } - glPixelStorei (GL_UNPACK_ALIGNMENT, 4); break; } glPixelStorei(GL_UNPACK_ROW_LENGTH, (ds->linesize * 8) / ds->depth); @@ -184,6 +197,17 @@ static void sdl_setdata(DisplayState *ds return; } shared = SDL_CreateRGBSurfaceFrom(pixels, width, height, ds->depth, ds->linesize, rmask , gmask, bmask, amask); + if (ds->depth == 8 && ds->palette != NULL) { + SDL_Color palette[256]; + int i; + for (i = 0; i < 256; i++) { + uint8_t rgb = ds->palette[i] >> 16; + palette[i].r = ((rgb & 0xe0) >> 5) * 255 / 7; + palette[i].g = ((rgb & 0x1c) >> 2) * 255 / 7; + palette[i].b = (rgb & 0x3) * 255 / 3; + } + SDL_SetColors(shared, palette, 0, 256); + } ds->data = pixels; } @@ -210,21 +234,32 @@ static void sdl_resize(DisplayState *ds, again: screen = SDL_SetVideoMode(w, h, 0, flags); -#ifndef CONFIG_OPENGL + if (!screen) { fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError()); + if (opengl_enabled) { + /* Fallback to SDL */ + opengl_enabled = 0; + ds->dpy_update = sdl_update; + ds->dpy_setdata = sdl_setdata; + sdl_resize(ds, w, h, linesize); + return; + } exit(1); } - if (!screen->pixels && (flags & SDL_HWSURFACE) && (flags & SDL_FULLSCREEN)) { - flags &= ~SDL_HWSURFACE; - goto again; - } - - if (!screen->pixels) { - fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError()); - exit(1); - } -#endif + + if (!opengl_enabled) { + if (!screen->pixels && (flags & SDL_HWSURFACE) && (flags & SDL_FULLSCREEN)) { + flags &= ~SDL_HWSURFACE; + goto again; + } + + if (!screen->pixels) { + fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError()); + exit(1); + } + } + ds->width = w; ds->height = h; if (!ds->shared_buf) { @@ -262,7 +297,10 @@ static void sdl_resize(DisplayState *ds, static void sdl_colourdepth(DisplayState *ds, int depth) { - if (!depth || !ds->depth) return; + if (!depth || !ds->depth) { + ds->shared_buf = 0; + return; + } ds->shared_buf = 1; ds->depth = depth; ds->linesize = width * depth / 8; diff -r daf16171a05f -r feee6422144f tools/ioemu/vl.h --- a/tools/ioemu/vl.h Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/vl.h Tue Apr 01 11:29:03 2008 -0600 @@ -653,6 +653,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDr const uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); void qemu_aio_init(void); void qemu_aio_poll(void); @@ -662,7 +664,7 @@ void qemu_aio_wait_end(void); void qemu_aio_wait_end(void); /* Ensure contents are flushed to disk. */ -void bdrv_flush(BlockDriverState *bs); +int bdrv_flush(BlockDriverState *bs); #define BDRV_TYPE_HD 0 #define BDRV_TYPE_CDROM 1 @@ -935,6 +937,7 @@ struct DisplayState { int width; int height; void *opaque; + uint32_t *palette; uint64_t gui_timer_interval; int switchbpp; diff -r daf16171a05f -r feee6422144f tools/ioemu/vnc.c --- a/tools/ioemu/vnc.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/ioemu/vnc.c Tue Apr 01 11:29:03 2008 -0600 @@ -1640,6 +1640,7 @@ static void vnc_dpy_colourdepth(DisplayS if (ds->depth == 32) return; depth = 32; break; + case 8: case 0: ds->shared_buf = 0; return; diff -r daf16171a05f -r feee6422144f tools/libfsimage/Rules.mk --- a/tools/libfsimage/Rules.mk Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libfsimage/Rules.mk Tue Apr 01 11:29:03 2008 -0600 @@ -11,6 +11,7 @@ FSDIR-$(CONFIG_SunOS)-x86_64 = $(PREFIX) FSDIR-$(CONFIG_SunOS)-x86_64 = $(PREFIX)/lib/fs/$(FS)/64 FSDIR-$(CONFIG_SunOS)-x86_32 = $(PREFIX)/lib/fs/$(FS)/ FSDIR-$(CONFIG_SunOS) = $(FSDIR-$(CONFIG_SunOS)-$(XEN_TARGET_ARCH)) +FSDIR-$(CONFIG_NetBSD) = $(LIBDIR)/fs/$(FS) FSDIR = $(FSDIR-y) FSLIB = fsimage.so diff -r daf16171a05f -r feee6422144f tools/libfsimage/check-libext2fs --- a/tools/libfsimage/check-libext2fs Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libfsimage/check-libext2fs Tue Apr 01 11:29:03 2008 -0600 @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh cat >ext2-test.c <<EOF #include <ext2fs/ext2fs.h> @@ -9,7 +9,7 @@ int main() } EOF -${CC:-gcc} -o ext2-test ext2-test.c -lext2fs >/dev/null 2>&1 +${CC-gcc} -o ext2-test ext2-test.c -lext2fs >/dev/null 2>&1 if [ $? = 0 ]; then echo ext2fs-lib else diff -r daf16171a05f -r feee6422144f tools/libfsimage/common/fsimage_grub.c --- a/tools/libfsimage/common/fsimage_grub.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libfsimage/common/fsimage_grub.c Tue Apr 01 11:29:03 2008 -0600 @@ -204,19 +204,47 @@ fsig_devread(fsi_file_t *ffi, unsigned i fsig_devread(fsi_file_t *ffi, unsigned int sector, unsigned int offset, unsigned int bufsize, char *buf) { - uint64_t off = ffi->ff_fsi->f_off + ((uint64_t)sector * 512) + offset; - ssize_t bytes_read = 0; - - while (bufsize) { - ssize_t ret = pread(ffi->ff_fsi->f_fd, buf + bytes_read, - bufsize, (off_t)off); - if (ret == -1) - return (0); - if (ret == 0) - return (0); - - bytes_read += ret; - bufsize -= ret; + off_t off; + ssize_t ret; + int n, r; + char tmp[SECTOR_SIZE]; + + off = ffi->ff_fsi->f_off + ((off_t)sector * SECTOR_SIZE) + offset; + + /* + * Make reads from a raw disk sector-aligned. This is a requirement + * for NetBSD. Split the read up into to three parts to meet this + * requirement. + */ + + n = (off & (SECTOR_SIZE - 1)); + if (n > 0) { + r = SECTOR_SIZE - n; + if (r > bufsize) + r = bufsize; + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off - n); + if (ret < n + r) + return (0); + memcpy(buf, tmp + n, r); + buf += r; + bufsize -= r; + off += r; + } + + n = (bufsize & ~(SECTOR_SIZE - 1)); + if (n > 0) { + ret = pread(ffi->ff_fsi->f_fd, buf, n, off); + if (ret < n) + return (0); + buf += n; + bufsize -= n; + off += n; + } + if (bufsize > 0) { + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off); + if (ret < bufsize) + return (0); + memcpy(buf, tmp, bufsize); } return (1); diff -r daf16171a05f -r feee6422144f tools/libfsimage/common/fsimage_grub.h --- a/tools/libfsimage/common/fsimage_grub.h Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libfsimage/common/fsimage_grub.h Tue Apr 01 11:29:03 2008 -0600 @@ -44,7 +44,7 @@ typedef struct fsig_plugin_ops { } fsig_plugin_ops_t; #define STAGE1_5 -#define FSYS_BUFLEN 0x8000 +#define FSYS_BUFLEN 0x40000 #define SECTOR_BITS 9 #define SECTOR_SIZE 0x200 diff -r daf16171a05f -r feee6422144f tools/libfsimage/common/fsimage_plugin.c --- a/tools/libfsimage/common/fsimage_plugin.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libfsimage/common/fsimage_plugin.c Tue Apr 01 11:29:03 2008 -0600 @@ -131,7 +131,10 @@ static int load_plugins(void) int err; int ret = -1; -#ifdef __sun__ +#if defined(FSIMAGE_FSDIR) + if (fsdir == NULL) + fsdir = FSIMAGE_FSDIR; +#elif defined(__sun__) if (fsdir == NULL) fsdir = "/usr/lib/fs"; diff -r daf16171a05f -r feee6422144f tools/libxc/Makefile --- a/tools/libxc/Makefile Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libxc/Makefile Tue Apr 01 11:29:03 2008 -0600 @@ -46,10 +46,11 @@ GUEST_SRCS-y += libelf-dominfo.c libelf- GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c # new domain builder -GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c -GUEST_SRCS-y += xc_dom_elfloader.c -GUEST_SRCS-y += xc_dom_binloader.c -GUEST_SRCS-y += xc_dom_compat_linux.c +GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c +GUEST_SRCS-y += xc_dom_elfloader.c +GUEST_SRCS-$(CONFIG_X86) += xc_dom_bzimageloader.c +GUEST_SRCS-y += xc_dom_binloader.c +GUEST_SRCS-y += xc_dom_compat_linux.c GUEST_SRCS-$(CONFIG_X86) += xc_dom_x86.c GUEST_SRCS-$(CONFIG_IA64) += xc_dom_ia64.c diff -r daf16171a05f -r feee6422144f tools/libxc/xc_dom_bzimageloader.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_dom_bzimageloader.c Tue Apr 01 11:29:03 2008 -0600 @@ -0,0 +1,159 @@ +/* + * Xen domain builder -- bzImage bits + * + * Parse and load bzImage kernel images. + * + * This relies on version 2.08 of the boot protocol, which contains an + * ELF file embedded in the bzImage. The loader extracts this ELF + * image and passes it off to the standard ELF loader. + * + * This code is licenced under the GPL. + * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>. + * written 2007 by Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx> + * written 2008 by Ian Campbell <ijc@xxxxxxxxxxxxxx> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "xg_private.h" +#include "xc_dom.h" + +struct setup_header { + uint8_t _pad0[0x1f1]; /* skip uninteresting stuff */ + uint8_t setup_sects; + uint16_t root_flags; + uint32_t syssize; + uint16_t ram_size; + uint16_t vid_mode; + uint16_t root_dev; + uint16_t boot_flag; + uint16_t jump; + uint32_t header; +#define HDR_MAGIC "HdrS" +#define HDR_MAGIC_SZ 4 + uint16_t version; +#define VERSION(h,l) (((h)<<8) | (l)) + uint32_t realmode_swtch; + uint16_t start_sys; + uint16_t kernel_version; + uint8_t type_of_loader; + uint8_t loadflags; + uint16_t setup_move_size; + uint32_t code32_start; + uint32_t ramdisk_image; + uint32_t ramdisk_size; + uint32_t bootsect_kludge; + uint16_t heap_end_ptr; + uint16_t _pad1; + uint32_t cmd_line_ptr; + uint32_t initrd_addr_max; + uint32_t kernel_alignment; + uint8_t relocatable_kernel; + uint8_t _pad2[3]; + uint32_t cmdline_size; + uint32_t hardware_subarch; + uint64_t hardware_subarch_data; + uint32_t payload_offset; + uint32_t payload_length; +} __attribute__((packed)); + +extern struct xc_dom_loader elf_loader; + +static unsigned int payload_offset(struct setup_header *hdr) +{ + unsigned int off; + + off = (hdr->setup_sects + 1) * 512; + off += hdr->payload_offset; + return off; +} + +static int check_bzimage_kernel(struct xc_dom_image *dom, int verbose) +{ + struct setup_header *hdr; + + if ( dom->kernel_blob == NULL ) + { + if ( verbose ) + xc_dom_panic(XC_INTERNAL_ERROR, "%s: no kernel image loaded\n", + __FUNCTION__); + return -EINVAL; + } + if ( dom->kernel_size < sizeof(struct setup_header) ) + { + if ( verbose ) + xc_dom_panic(XC_INTERNAL_ERROR, "%s: kernel image too small\n", + __FUNCTION__); + return -EINVAL; + } + + hdr = dom->kernel_blob; + + if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 ) + { + if ( verbose ) + xc_dom_panic(XC_INVALID_KERNEL, "%s: kernel is not a bzImage\n", + __FUNCTION__); + return -EINVAL; + } + + if ( hdr->version < VERSION(2,8) ) + { + if ( verbose ) + xc_dom_panic(XC_INVALID_KERNEL, "%s: boot protocol too old (%04x)\n", + __FUNCTION__, hdr->version); + return -EINVAL; + } + + dom->kernel_blob = dom->kernel_blob + payload_offset(hdr); + dom->kernel_size = hdr->payload_length; + + if ( xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size) == -1 ) + { + if ( verbose ) + xc_dom_panic(XC_INVALID_KERNEL, "%s: unable to decompress kernel\n", + __FUNCTION__); + return -EINVAL; + } + + return elf_loader.probe(dom); +} + +static int xc_dom_probe_bzimage_kernel(struct xc_dom_image *dom) +{ + return check_bzimage_kernel(dom, 0); +} + +static int xc_dom_parse_bzimage_kernel(struct xc_dom_image *dom) +{ + return elf_loader.parser(dom); +} + +static int xc_dom_load_bzimage_kernel(struct xc_dom_image *dom) +{ + return elf_loader.loader(dom); +} + +static struct xc_dom_loader bzimage_loader = { + .name = "Linux bzImage", + .probe = xc_dom_probe_bzimage_kernel, + .parser = xc_dom_parse_bzimage_kernel, + .loader = xc_dom_load_bzimage_kernel, +}; + +static void __init register_loader(void) +{ + xc_dom_register_loader(&bzimage_loader); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r daf16171a05f -r feee6422144f tools/libxc/xc_dom_elfloader.c --- a/tools/libxc/xc_dom_elfloader.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libxc/xc_dom_elfloader.c Tue Apr 01 11:29:03 2008 -0600 @@ -281,7 +281,7 @@ static int xc_dom_load_elf_kernel(struct /* ------------------------------------------------------------------------ */ -static struct xc_dom_loader elf_loader = { +struct xc_dom_loader elf_loader = { .name = "ELF-generic", .probe = xc_dom_probe_elf_kernel, .parser = xc_dom_parse_elf_kernel, diff -r daf16171a05f -r feee6422144f tools/libxen/include/xen/api/xen_acmpolicy.h --- a/tools/libxen/include/xen/api/xen_acmpolicy.h Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libxen/include/xen/api/xen_acmpolicy.h Tue Apr 01 11:29:03 2008 -0600 @@ -74,7 +74,7 @@ xen_acm_header_free(xen_acm_header *hdr) /** * Get the referenced policy's record. */ -bool +extern bool xen_acmpolicy_get_record(xen_session *session, xen_acmpolicy_record **result, xen_xspolicy xspolicy); @@ -118,14 +118,14 @@ xen_acmpolicy_get_enforced_binary(xen_se /** * Get the ACM ssidref of the given VM. */ -bool +extern bool xen_acmpolicy_get_VM_ssidref(xen_session *session, int64_t *result, xen_vm vm); /** * Get the UUID field of the given policy. */ -bool +extern bool xen_acmpolicy_get_uuid(xen_session *session, char **result, xen_xspolicy xspolicy); diff -r daf16171a05f -r feee6422144f tools/libxen/include/xen/api/xen_xspolicy.h --- a/tools/libxen/include/xen/api/xen_xspolicy.h Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libxen/include/xen/api/xen_xspolicy.h Tue Apr 01 11:29:03 2008 -0600 @@ -68,6 +68,8 @@ enum xs_instantiationflags { #define XSERR_RESOURCE_ACCESS 23 + XSERR_BASE #define XSERR_HV_OP_FAILED 24 + XSERR_BASE #define XSERR_BOOTPOLICY_INSTALL_ERROR 25 + XSERR_BASE +#define XSERR_VM_NOT_AUTHORIZED 26 + XSERR_BASE +#define XSERR_VM_IN_CONFLICT 27 + XSERR_BASE /** @@ -179,28 +181,28 @@ typedef struct xen_xs_policystate char *errors; } xen_xs_policystate; -void +extern void xen_xs_policystate_free(xen_xs_policystate *state); /** * Get the referenced policy's record. */ -bool +extern bool xen_xspolicy_get_record(xen_session *session, xen_xspolicy_record **result, xen_xspolicy xspolicy); /** * Get the UUID field of the given policy. */ -bool +extern bool xen_xspolicy_get_uuid(xen_session *session, char **result, xen_xspolicy xspolicy); /** * Get a policy given it's UUID */ -bool +extern bool xen_xspolicy_get_by_uuid(xen_session *session, xen_xspolicy *result, char *uuid); @@ -208,7 +210,7 @@ xen_xspolicy_get_by_uuid(xen_session *se /** * Get the types of policies supported by the system. */ -bool +extern bool xen_xspolicy_get_xstype(xen_session *session, xs_type *result); @@ -216,13 +218,13 @@ xen_xspolicy_get_xstype(xen_session *ses * Get information about the currently managed policy. * (The API allows only one policy to be on the system.) */ -bool +extern bool xen_xspolicy_get_xspolicy(xen_session *session, xen_xs_policystate **result); /** * Activate the referenced policy by loading it into the hypervisor. */ -bool +extern bool xen_xspolicy_activate_xspolicy(xen_session *session, int64_t *result, xen_xspolicy xspolicy, xs_instantiationflags flags); @@ -234,7 +236,7 @@ xen_xspolicy_activate_xspolicy(xen_sessi * on whether to load the policy immediately and whether to overwrite * an existing policy on the system. */ -bool +extern bool xen_xspolicy_set_xspolicy(xen_session *session, xen_xs_policystate **result, xs_type type, char *repr, int64_t flags, bool overwrite); @@ -248,7 +250,7 @@ xen_xspolicy_set_xspolicy(xen_session *s * for example fail if other domains than Domain-0 are running and have * different labels than Domain-0. */ -bool +extern bool xen_xspolicy_reset_xspolicy(xen_session *session, xen_xs_policystate **result, xs_type type); @@ -281,4 +283,11 @@ xen_xspolicy_get_resource_label(xen_sess xen_xspolicy_get_resource_label(xen_session *session, char **label, char *resource); +/** + * Check whether a VM with the given VM-label could run. + */ +extern bool +xen_xspolicy_can_run(xen_session *session, int64_t *result, + char *security_label); + #endif diff -r daf16171a05f -r feee6422144f tools/libxen/src/xen_xspolicy.c --- a/tools/libxen/src/xen_xspolicy.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/libxen/src/xen_xspolicy.c Tue Apr 01 11:29:03 2008 -0600 @@ -343,3 +343,21 @@ xen_xspolicy_activate_xspolicy(xen_sessi XEN_CALL_("XSPolicy.activate_xspolicy"); return session->ok; } + + +bool +xen_xspolicy_can_run(xen_session *session, int64_t *result, + char *security_label) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = security_label } + }; + + abstract_type result_type = abstract_type_int; + + *result = 0; + XEN_CALL_("XSPolicy.can_run"); + return session->ok; +} diff -r daf16171a05f -r feee6422144f tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/pygrub/src/pygrub Tue Apr 01 11:29:03 2008 -0600 @@ -240,10 +240,10 @@ class Grub: if y > self.start_image + maxy: break if y == self.selected_image: - attr = curses.A_REVERSE - else: - attr = 0 - self.entry_win.addstr(y + 1 - self.start_image, 2, i.title.ljust(70), attr) + self.entry_win.attron(curses.A_REVERSE) + self.entry_win.addstr(y + 1 - self.start_image, 2, i.title.ljust(70)) + if y == self.selected_image: + self.entry_win.attroff(curses.A_REVERSE) self.entry_win.refresh() def edit_entry(self, origimg): @@ -269,16 +269,17 @@ class Grub: self.entry_win.box() for idx in range(1, len(img.lines)): # current line should be highlighted - attr = 0 if idx == curline: - attr = curses.A_REVERSE + self.entry_win.attron(curses.A_REVERSE) # trim the line l = img.lines[idx].ljust(70) if len(l) > 70: l = l[:69] + ">" - self.entry_win.addstr(idx, 2, l, attr) + self.entry_win.addstr(idx, 2, l) + if idx == curline: + self.entry_win.attroff(curses.A_REVERSE) self.entry_win.refresh() c = self.screen.getch() diff -r daf16171a05f -r feee6422144f tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/lowlevel/xc/xc.c Tue Apr 01 11:29:03 2008 -0600 @@ -762,11 +762,12 @@ static PyObject *pyxc_physinfo(XcObject { #define MAX_CPU_ID 255 xc_physinfo_t info; - char cpu_cap[128], *p=cpu_cap, *q=cpu_cap; + char cpu_cap[128], virt_caps[128], *p; int i, j, max_cpu_id; uint64_t free_heap; PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj; xc_cpu_to_node_t map[MAX_CPU_ID + 1]; + const char *virtcap_names[] = { "hvm", "hvm_directio" }; set_xen_guest_handle(info.cpu_to_node, map); info.max_cpu_id = MAX_CPU_ID; @@ -774,17 +775,21 @@ static PyObject *pyxc_physinfo(XcObject if ( xc_physinfo(self->xc_handle, &info) != 0 ) return pyxc_error_to_exception(); - *q = 0; + p = cpu_cap; + *p = '\0'; for ( i = 0; i < sizeof(info.hw_cap)/4; i++ ) - { p += sprintf(p, "%08x:", info.hw_cap[i]); - if ( info.hw_cap[i] ) - q = p; - } - if ( q > cpu_cap ) - *(q-1) = 0; - - ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", + *(p-1) = 0; + + p = virt_caps; + *p = '\0'; + for ( i = 0; i < 2; i++ ) + if ( (info.capabilities >> i) & 1 ) + p += sprintf(p, "%s ", virtcap_names[i]); + if ( p != virt_caps ) + *(p-1) = '\0'; + + ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}", "nr_nodes", info.nr_nodes, "max_cpu_id", info.max_cpu_id, "threads_per_core", info.threads_per_core, @@ -794,7 +799,8 @@ static PyObject *pyxc_physinfo(XcObject "free_memory", pages_to_kib(info.free_pages), "scrub_memory", pages_to_kib(info.scrub_pages), "cpu_khz", info.cpu_khz, - "hw_caps", cpu_cap); + "hw_caps", cpu_cap, + "virt_caps", virt_caps); max_cpu_id = info.max_cpu_id; if ( max_cpu_id > MAX_CPU_ID ) diff -r daf16171a05f -r feee6422144f tools/python/xen/util/xsconstants.py --- a/tools/python/xen/util/xsconstants.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/util/xsconstants.py Tue Apr 01 11:29:03 2008 -0600 @@ -57,7 +57,9 @@ XSERR_RESOURCE_ACCESS = 23 + XS XSERR_RESOURCE_ACCESS = 23 + XSERR_BASE XSERR_HV_OP_FAILED = 24 + XSERR_BASE XSERR_BOOTPOLICY_INSTALL_ERROR = 25 + XSERR_BASE -XSERR_LAST = 25 + XSERR_BASE ## KEEP LAST +XSERR_VM_NOT_AUTHORIZED = 26 + XSERR_BASE +XSERR_VM_IN_CONFLICT = 27 + XSERR_BASE +XSERR_LAST = 27 + XSERR_BASE ## KEEP LAST XSERR_MESSAGES = [ '', @@ -85,7 +87,9 @@ XSERR_MESSAGES = [ 'The policy is not loaded', 'Error accessing resource', 'Operation failed in hypervisor', - 'Boot policy installation error' + 'Boot policy installation error', + 'VM is not authorized to run', + 'VM label conflicts with another VM' ] def xserr2string(err): diff -r daf16171a05f -r feee6422144f tools/python/xen/util/xsm/acm/acm.py --- a/tools/python/xen/util/xsm/acm/acm.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/util/xsm/acm/acm.py Tue Apr 01 11:29:03 2008 -0600 @@ -68,6 +68,7 @@ policy_name_re = re.compile(".*[chwall|s #decision hooks known to the hypervisor ACMHOOK_sharing = 1 ACMHOOK_authorization = 2 +ACMHOOK_conflictset = 3 #other global variables NULL_SSIDREF = 0 @@ -373,7 +374,7 @@ def label2ssidref(labelname, policyname, else: return (sec_ssid[0] << 16) | pri_ssid[0] finally: - mapfile_unlock() + mapfile_unlock() def refresh_ssidref(config): @@ -550,6 +551,18 @@ def hv_get_policy(): if len(bin_pol) == 0: bin_pol = None return rc, bin_pol + + +def is_in_conflict(ssidref): + """ Check whether the given ssidref is in conflict with any running + domain. + """ + decision = acm.getdecision('ssidref', str(ssidref), + 'ssidref', str(ssidref), + ACMHOOK_conflictset) + if decision == "DENIED": + return True + return False def set_policy(xs_type, xml, flags, overwrite): @@ -1550,6 +1563,33 @@ def get_security_label(self, xspol=None) return label +def check_can_run(sec_label): + """ Check whether a VM could run, given its vm label. A VM can run if + - it is authorized + - is not in conflict with any running domain + """ + try: + mapfile_lock() + + if sec_label == None or sec_label == "": + vm_label = ACM_LABEL_UNLABELED + else: + poltype, policy, vm_label = sec_label.split(':') + if policy != get_active_policy_name(): + return -xsconstants.XSERR_BAD_POLICY_NAME + ssidref = label2ssidref(vm_label, policy, 'dom') + if ssidref != xsconstants.INVALID_SSIDREF: + if not has_authorization(ssidref): + return -xsconstants.XSERR_VM_NOT_AUTHORIZED + if is_in_conflict(ssidref): + return -xsconstants.XSERR_VM_IN_CONFLICT + return -xsconstants.XSERR_SUCCESS + else: + return -xsconstants.XSERR_BAD_LABEL + finally: + mapfile_unlock() + + __cond = threading.Condition() __script_runner = None __orders = [] diff -r daf16171a05f -r feee6422144f tools/python/xen/xend/XendBootloader.py --- a/tools/python/xen/xend/XendBootloader.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xend/XendBootloader.py Tue Apr 01 11:29:03 2008 -0600 @@ -12,7 +12,7 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # -import os, select, errno, stat, signal +import os, select, errno, stat, signal, tty import random import shlex from xen.xend import sxp @@ -43,6 +43,9 @@ def bootloader(blexec, disk, dom, quiet log.error(msg) raise VmError(msg) + if os.uname()[0] == "NetBSD" and disk.startswith('/dev/'): + disk = disk.replace("/dev/", "/dev/r") + mkdir.parents("/var/run/xend/boot/", stat.S_IRWXU) while True: @@ -63,12 +66,8 @@ def bootloader(blexec, disk, dom, quiet # where we copy characters between the two master fds, as well as # listening on the bootloader's fifo for the results. - # Termios runes for very raw access to the pty master fds. - attr = [ 0, 0, termios.CS8 | termios.CREAD | termios.CLOCAL, - 0, 0, 0, [0] * 32 ] - (m1, s1) = pty.openpty() - termios.tcsetattr(m1, termios.TCSANOW, attr) + tty.setraw(m1); fcntl.fcntl(m1, fcntl.F_SETFL, os.O_NDELAY); os.close(s1) slavename = ptsname.ptsname(m1) @@ -109,7 +108,7 @@ def bootloader(blexec, disk, dom, quiet # record that this domain is bootloading dom.bootloader_pid = child - termios.tcsetattr(m2, termios.TCSANOW, attr) + tty.setraw(m2); fcntl.fcntl(m2, fcntl.F_SETFL, os.O_NDELAY); while True: try: diff -r daf16171a05f -r feee6422144f tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Apr 01 11:29:03 2008 -0600 @@ -2047,11 +2047,10 @@ class XendDomainInfo: try: if self.info['platform'].get('localtime', 0): - t = time.time() - loc = time.localtime(t) - utc = time.gmtime(t) - timeoffset = int(time.mktime(loc) - time.mktime(utc)) - self.info['platform']['rtc_timeoffset'] = timeoffset + if time.localtime(time.time())[8]: + self.info['platform']['rtc_timeoffset'] = -time.altzone + else: + self.info['platform']['rtc_timeoffset'] = -time.timezone self.image = image.create(self, self.info) diff -r daf16171a05f -r feee6422144f tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xend/XendNode.py Tue Apr 01 11:29:03 2008 -0600 @@ -92,6 +92,7 @@ class XendNode: physinfo = self.physinfo_dict() cpu_count = physinfo['nr_cpus'] cpu_features = physinfo['hw_caps'] + virt_caps = physinfo['virt_caps'] # If the number of CPUs don't match, we should just reinitialise # the CPU UUIDs. @@ -112,6 +113,7 @@ class XendNode: self.cpus[u].update( { 'host' : self.uuid, 'features' : cpu_features, + 'virt_caps': virt_caps, 'speed' : int(float(cpuinfo[number]['cpu MHz'])), 'vendor' : cpuinfo[number]['vendor_id'], 'modelname': cpuinfo[number]['model name'], @@ -605,6 +607,7 @@ class XendNode: 'threads_per_core', 'cpu_mhz', 'hw_caps', + 'virt_caps', 'total_memory', 'free_memory', 'node_to_cpu', diff -r daf16171a05f -r feee6422144f tools/python/xen/xend/XendXSPolicy.py --- a/tools/python/xen/xend/XendXSPolicy.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xend/XendXSPolicy.py Tue Apr 01 11:29:03 2008 -0600 @@ -48,7 +48,8 @@ class XendXSPolicy(XendBase): 'rm_xsbootpolicy', 'get_resource_label', 'set_resource_label', - 'get_labeled_resources' ] + 'get_labeled_resources', + 'can_run' ] return XendBase.getFuncs() + funcs getClass = classmethod(getClass) @@ -190,6 +191,12 @@ class XendXSPolicy(XendBase): res = security.get_resource_label_xapi(resource) return res + def can_run(self, sec_label): + irc = security.validate_label_xapi(sec_label, 'dom') + if irc != xsconstants.XSERR_SUCCESS: + raise SecurityError(irc) + return security.check_can_run(sec_label) + get_xstype = classmethod(get_xstype) get_xspolicy = classmethod(get_xspolicy) set_xspolicy = classmethod(set_xspolicy) @@ -198,6 +205,7 @@ class XendXSPolicy(XendBase): set_resource_label = classmethod(set_resource_label) get_resource_label = classmethod(get_resource_label) get_labeled_resources = classmethod(get_labeled_resources) + can_run = classmethod(can_run) class XendACMPolicy(XendXSPolicy): diff -r daf16171a05f -r feee6422144f tools/python/xen/xend/server/vfbif.py --- a/tools/python/xen/xend/server/vfbif.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xend/server/vfbif.py Tue Apr 01 11:29:03 2008 -0600 @@ -6,7 +6,7 @@ import os import os CONFIG_ENTRIES = ['type', 'vncdisplay', 'vnclisten', 'vncpasswd', 'vncunused', - 'display', 'xauthority', 'keymap', + 'videoram', 'display', 'xauthority', 'keymap', 'uuid', 'location', 'protocol', 'opengl'] class VfbifController(DevController): diff -r daf16171a05f -r feee6422144f tools/python/xen/xm/XenAPI.py --- a/tools/python/xen/xm/XenAPI.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xm/XenAPI.py Tue Apr 01 11:29:03 2008 -0600 @@ -64,6 +64,7 @@ errormap = { "HANDLE_INVALID": N_("The %(1)s handle %(2)s is invalid."), "OPERATION_NOT_ALLOWED": N_("You attempted an operation that was not allowed."), "NETWORK_ALREADY_CONNECTED": N_("The network you specified already has a PIF attached to it, and so another one may not be attached."), + "SECURITY_ERROR": N_("%(2)s"), } translation = gettext.translation('xen-xm', fallback = True) diff -r daf16171a05f -r feee6422144f tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xm/create.py Tue Apr 01 11:29:03 2008 -0600 @@ -500,6 +500,11 @@ gopts.var('vncunused', val='', use="""Try to find an unused port for the VNC server. Only valid when vnc=1.""") +gopts.var('videoram', val='', + fn=set_value, default=None, + use="""Maximum amount of videoram PV guest can allocate + for frame buffer.""") + gopts.var('sdl', val='', fn=set_value, default=None, use="""Should the device model use SDL?""") @@ -645,7 +650,8 @@ def configure_vfbs(config_devs, vals): d['type'] = 'sdl' for (k,v) in d.iteritems(): if not k in [ 'vnclisten', 'vncunused', 'vncdisplay', 'display', - 'xauthority', 'type', 'vncpasswd', 'opengl' ]: + 'videoram', 'xauthority', 'type', 'vncpasswd', + 'opengl' ]: err("configuration option %s unknown to vfbs" % k) config.append([k,v]) if not d.has_key("keymap"): diff -r daf16171a05f -r feee6422144f tools/python/xen/xm/messages/xen-xm.pot --- a/tools/python/xen/xm/messages/xen-xm.pot Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/python/xen/xm/messages/xen-xm.pot Tue Apr 01 11:29:03 2008 -0600 @@ -8,7 +8,7 @@ msgstr "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2007-04-05 14:17-0400\n" +"POT-Creation-Date: 2008-03-31 17:40+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Language-Team: LANGUAGE <LL@xxxxxx>\n" @@ -61,3 +61,8 @@ msgid "" "The network you specified already has a PIF attached to it, and so another " "one may not be attached." msgstr "" + +#: xen/xm/XenAPI.py:67 +#, python-format +msgid "%(2)s" +msgstr "" diff -r daf16171a05f -r feee6422144f tools/tests/Makefile --- a/tools/tests/Makefile Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/tests/Makefile Tue Apr 01 11:29:03 2008 -0600 @@ -21,13 +21,17 @@ blowfish.h: blowfish.bin .PHONY: clean clean: - rm -rf $(TARGET) *.o *~ core blowfish.h blowfish.bin + rm -rf $(TARGET) *.o *~ core blowfish.h blowfish.bin x86_emulate .PHONY: install install: -x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c +.PHONY: x86_emulate +x86_emulate: + [ -L x86_emulate ] || ln -sf $(XEN_ROOT)/xen/arch/x86/x86_emulate . + +x86_emulate.o: x86_emulate.c x86_emulate $(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $< -test_x86_emulator.o: test_x86_emulator.c blowfish.h +test_x86_emulator.o: test_x86_emulator.c blowfish.h x86_emulate $(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $< diff -r daf16171a05f -r feee6422144f tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Tue Apr 01 10:30:57 2008 -0600 +++ b/tools/tests/test_x86_emulator.c Tue Apr 01 11:29:03 2008 -0600 @@ -1,20 +1,11 @@ - #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdint.h> -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; -typedef int8_t s8; -typedef int16_t s16; -typedef int32_t s32; -typedef int64_t s64; #include <public/xen.h> -#include <asm-x86/x86_emulate.h> #include <sys/mman.h> +#include "x86_emulate/x86_emulate.h" #include "blowfish.h" #define MMAP_SZ 16384 @@ -38,9 +29,9 @@ static int read( unsigned long addr = offset; switch ( bytes ) { - case 1: *val = *(u8 *)addr; break; - case 2: *val = *(u16 *)addr; break; - case 4: *val = *(u32 *)addr; break; + case 1: *val = *(uint8_t *)addr; break; + case 2: *val = *(uint16_t *)addr; break; + case 4: *val = *(uint32_t *)addr; break; case 8: *val = *(unsigned long *)addr; break; } return X86EMUL_OKAY; @@ -56,9 +47,9 @@ static int write( unsigned long addr = offset; switch ( bytes ) { - case 1: *(u8 *)addr = (u8)val; break; - case 2: *(u16 *)addr = (u16)val; break; - case 4: *(u32 *)addr = (u32)val; break; + case 1: *(uint8_t *)addr = (uint8_t)val; break; + case 2: *(uint16_t *)addr = (uint16_t)val; break; + case 4: *(uint32_t *)addr = (uint32_t)val; break; case 8: *(unsigned long *)addr = val; break; } return X86EMUL_OKAY; @@ -75,9 +66,9 @@ static int cmpxchg( unsigned long addr = offset; switch ( bytes ) { - case 1: *(u8 *)addr = (u8)new; break; - case 2: *(u16 *)addr = (u16)new; break; - case 4: *(u32 *)addr = (u32)new; break; + case 1: *(uint8_t *)addr = (uint8_t)new; break; + case 2: *(uint16_t *)addr = (uint16_t)new; break; + case 4: *(uint32_t *)addr = (uint32_t)new; break; case 8: *(unsigned long *)addr = new; break; } return X86EMUL_OKAY; diff -r daf16171a05f -r feee6422144f tools/tests/x86_emulate.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/tests/x86_emulate.c Tue Apr 01 11:29:03 2008 -0600 @@ -0,0 +1,13 @@ +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <public/xen.h> + +#include "x86_emulate/x86_emulate.h" + +#define __emulate_fpu_insn(_op) \ +do{ rc = X86EMUL_UNHANDLEABLE; \ + goto done; \ +} while (0) + +#include "x86_emulate/x86_emulate.c" diff -r daf16171a05f -r feee6422144f xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/ia64/xen/dom0_ops.c Tue Apr 01 11:29:03 2008 -0600 @@ -410,6 +410,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XE xen_sysctl_physinfo_t *pi = &op->u.physinfo; + memset(pi, 0, sizeof(*pi)); pi->threads_per_core = cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket = cpus_weight(cpu_core_map[0]) / pi->threads_per_core; @@ -419,7 +420,6 @@ long arch_do_sysctl(xen_sysctl_t *op, XE pi->free_pages = avail_domheap_pages(); pi->scrub_pages = avail_scrub_pages(); pi->cpu_khz = local_cpu_data->proc_freq / 1000; - memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); max_array_ent = pi->max_cpu_id; pi->max_cpu_id = last_cpu(cpu_online_map); diff -r daf16171a05f -r feee6422144f xen/arch/ia64/xen/dom_fw_common.c --- a/xen/arch/ia64/xen/dom_fw_common.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/ia64/xen/dom_fw_common.c Tue Apr 01 11:29:03 2008 -0600 @@ -20,7 +20,7 @@ #include <assert.h> #include <inttypes.h> -#include <xen/arch-ia64.h> +#include <xen/xen.h> #include <asm/bundle.h> #include "xg_private.h" diff -r daf16171a05f -r feee6422144f xen/arch/ia64/xen/dom_fw_domu.c --- a/xen/arch/ia64/xen/dom_fw_domu.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/ia64/xen/dom_fw_domu.c Tue Apr 01 11:29:03 2008 -0600 @@ -37,7 +37,7 @@ #include <errno.h> #include <inttypes.h> -#include <xen/arch-ia64.h> +#include <xen/xen.h> #include "xg_private.h" #include "xc_dom.h" diff -r daf16171a05f -r feee6422144f xen/arch/powerpc/sysctl.c --- a/xen/arch/powerpc/sysctl.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/powerpc/sysctl.c Tue Apr 01 11:29:03 2008 -0600 @@ -41,6 +41,7 @@ long arch_do_sysctl(struct xen_sysctl *s { xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo; + memset(pi, 0, sizeof(*pi)); pi->threads_per_core = cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket = @@ -50,10 +51,7 @@ long arch_do_sysctl(struct xen_sysctl *s pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->cpu_khz = cpu_khz; - memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); - ret = 0; - if ( copy_to_guest(u_sysctl, sysctl, 1) ) - ret = -EFAULT; + ret = copy_to_guest(u_sysctl, sysctl, 1) ? -EFAULT : 0; } break; diff -r daf16171a05f -r feee6422144f xen/arch/x86/boot/trampoline.S --- a/xen/arch/x86/boot/trampoline.S Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/boot/trampoline.S Tue Apr 01 11:29:03 2008 -0600 @@ -156,9 +156,12 @@ 1: mov $(BOOT_TRAMPOLINE>>4),%a sti #if defined(__x86_64__) - /* Declare that our target operating mode is long mode. */ - movw $0xec00,%ax # declare target operating mode - movw $0x0002,%bx # long mode + /* + * Declare that our target operating mode is long mode. + * Initialise 32-bit registers since some buggy BIOSes depend on it. + */ + movl $0xec00,%eax # declare target operating mode + movl $0x0002,%ebx # long mode int $0x15 #endif diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/emulate.c Tue Apr 01 11:29:03 2008 -0600 @@ -19,23 +19,93 @@ #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> +static int hvmemul_do_io( + int is_mmio, paddr_t addr, unsigned long count, int size, + paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val) +{ + struct vcpu *curr = current; + vcpu_iodata_t *vio = get_ioreq(curr); + ioreq_t *p = &vio->vp_ioreq; + + switch ( curr->arch.hvm_vcpu.io_state ) + { + case HVMIO_none: + break; + case HVMIO_completed: + curr->arch.hvm_vcpu.io_state = HVMIO_none; + if ( val == NULL ) + return X86EMUL_UNHANDLEABLE; + *val = curr->arch.hvm_vcpu.io_data; + return X86EMUL_OKAY; + default: + return X86EMUL_UNHANDLEABLE; + } + + curr->arch.hvm_vcpu.io_state = + (val == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; + + if ( p->state != STATE_IOREQ_NONE ) + gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n", + p->state); + + p->dir = dir; + p->data_is_ptr = value_is_ptr; + p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; + p->size = size; + p->addr = addr; + p->count = count; + p->df = df; + p->data = value; + p->io_count++; + + if ( is_mmio + ? (hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p)) + : hvm_portio_intercept(p) ) + { + p->state = STATE_IORESP_READY; + hvm_io_assist(); + if ( val != NULL ) + *val = curr->arch.hvm_vcpu.io_data; + curr->arch.hvm_vcpu.io_state = HVMIO_none; + return X86EMUL_OKAY; + } + + hvm_send_assist_req(curr); + return (val != NULL) ? X86EMUL_RETRY : X86EMUL_OKAY; +} + +static int hvmemul_do_pio( + unsigned long port, unsigned long count, int size, + paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val) +{ + return hvmemul_do_io(0, port, count, size, value, + dir, df, value_is_ptr, val); +} + +static int hvmemul_do_mmio( + paddr_t gpa, unsigned long count, int size, + paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val) +{ + return hvmemul_do_io(1, gpa, count, size, value, + dir, df, value_is_ptr, val); +} + /* * Convert addr from linear to physical form, valid over the range * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to * the valid computed range. It is always >0 when X86EMUL_OKAY is returned. + * @pfec indicates the access checks to be performed during page-table walks. */ static int hvmemul_linear_to_phys( unsigned long addr, paddr_t *paddr, unsigned int bytes_per_rep, unsigned long *reps, - enum hvm_access_type access_type, + uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt) { struct vcpu *curr = current; unsigned long pfn, npfn, done, todo, i; - struct segment_register *sreg; - uint32_t pfec; /* Clip repetitions to a sensible maximum. */ *reps = min_t(unsigned long, *reps, 4096); @@ -48,14 +118,6 @@ static int hvmemul_linear_to_phys( } *paddr = addr & ~PAGE_MASK; - - /* Gather access-type information for the page walks. */ - sreg = hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt); - pfec = PFEC_page_present; - if ( sreg->attr.fields.dpl == 3 ) - pfec |= PFEC_user_mode; - if ( access_type == hvm_access_write ) - pfec |= PFEC_write_access; /* Get the first PFN in the range. */ if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN ) @@ -74,18 +136,19 @@ static int hvmemul_linear_to_phys( for ( i = 1; done < todo; i++ ) { /* Get the next PFN in the range. */ - if ( (npfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN ) - { - hvm_inject_exception(TRAP_page_fault, pfec, addr); - return X86EMUL_EXCEPTION; - } + npfn = paging_gva_to_gfn(curr, addr, &pfec); /* Is it contiguous with the preceding PFNs? If not then we're done. */ - if ( npfn != (pfn + i) ) + if ( (npfn == INVALID_GFN) || (npfn != (pfn + i)) ) { done /= bytes_per_rep; if ( done == 0 ) - return X86EMUL_UNHANDLEABLE; + { + if ( npfn != INVALID_GFN ) + return X86EMUL_UNHANDLEABLE; + hvm_inject_exception(TRAP_page_fault, pfec, addr); + return X86EMUL_EXCEPTION; + } *reps = done; break; } @@ -142,7 +205,10 @@ static int __hvmemul_read( enum hvm_access_type access_type, struct hvm_emulate_ctxt *hvmemul_ctxt) { + struct vcpu *curr = current; unsigned long addr; + uint32_t pfec = PFEC_page_present; + paddr_t gpa; int rc; rc = hvmemul_virtual_to_linear( @@ -152,41 +218,40 @@ static int __hvmemul_read( *val = 0; + if ( unlikely(curr->arch.hvm_vcpu.mmio_gva == (addr & PAGE_MASK)) && + curr->arch.hvm_vcpu.mmio_gva ) + { + unsigned int off = addr & (PAGE_SIZE - 1); + if ( access_type == hvm_access_insn_fetch ) + return X86EMUL_UNHANDLEABLE; + gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off); + if ( (off + bytes) <= PAGE_SIZE ) + return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val); + } + + if ( (seg != x86_seg_none) && + (hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3) ) + pfec |= PFEC_user_mode; + rc = ((access_type == hvm_access_insn_fetch) ? - hvm_fetch_from_guest_virt(val, addr, bytes) : - hvm_copy_from_guest_virt(val, addr, bytes)); + hvm_fetch_from_guest_virt(val, addr, bytes, pfec) : + hvm_copy_from_guest_virt(val, addr, bytes, pfec)); if ( rc == HVMCOPY_bad_gva_to_gfn ) return X86EMUL_EXCEPTION; if ( rc == HVMCOPY_bad_gfn_to_mfn ) { - struct vcpu *curr = current; unsigned long reps = 1; - paddr_t gpa; if ( access_type == hvm_access_insn_fetch ) return X86EMUL_UNHANDLEABLE; rc = hvmemul_linear_to_phys( - addr, &gpa, bytes, &reps, access_type, hvmemul_ctxt); + addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; - if ( curr->arch.hvm_vcpu.io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - if ( !curr->arch.hvm_vcpu.io_completed ) - { - curr->arch.hvm_vcpu.io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes, - 0, IOREQ_READ, 0, 0); - } - - if ( !curr->arch.hvm_vcpu.io_completed ) - return X86EMUL_RETRY; - - *val = curr->arch.hvm_vcpu.io_data; - curr->arch.hvm_vcpu.io_completed = 0; + return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val); } return X86EMUL_OKAY; @@ -236,7 +301,10 @@ static int hvmemul_write( { struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct vcpu *curr = current; unsigned long addr; + uint32_t pfec = PFEC_page_present | PFEC_write_access; + paddr_t gpa; int rc; rc = hvmemul_virtual_to_linear( @@ -244,27 +312,34 @@ static int hvmemul_write( if ( rc != X86EMUL_OKAY ) return rc; - rc = hvm_copy_to_guest_virt(addr, &val, bytes); + if ( unlikely(curr->arch.hvm_vcpu.mmio_gva == (addr & PAGE_MASK)) && + curr->arch.hvm_vcpu.mmio_gva ) + { + unsigned int off = addr & (PAGE_SIZE - 1); + gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off); + if ( (off + bytes) <= PAGE_SIZE ) + return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE, + 0, 0, NULL); + } + + if ( (seg != x86_seg_none) && + (hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3) ) + pfec |= PFEC_user_mode; + + rc = hvm_copy_to_guest_virt(addr, &val, bytes, pfec); if ( rc == HVMCOPY_bad_gva_to_gfn ) return X86EMUL_EXCEPTION; if ( rc == HVMCOPY_bad_gfn_to_mfn ) { - struct vcpu *curr = current; unsigned long reps = 1; - paddr_t gpa; rc = hvmemul_linear_to_phys( - addr, &gpa, bytes, &reps, hvm_access_write, hvmemul_ctxt); + addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; - if ( curr->arch.hvm_vcpu.io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - curr->arch.hvm_vcpu.io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes, - val, IOREQ_WRITE, 0, 0); + return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL); } return X86EMUL_OKAY; @@ -292,8 +367,8 @@ static int hvmemul_rep_ins( { struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - struct vcpu *curr = current; unsigned long addr; + uint32_t pfec = PFEC_page_present | PFEC_write_access; paddr_t gpa; int rc; @@ -303,19 +378,16 @@ static int hvmemul_rep_ins( if ( rc != X86EMUL_OKAY ) return rc; + if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) + pfec |= PFEC_user_mode; + rc = hvmemul_linear_to_phys( - addr, &gpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt); - if ( rc != X86EMUL_OKAY ) - return rc; - - if ( curr->arch.hvm_vcpu.io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - curr->arch.hvm_vcpu.io_in_progress = 1; - send_pio_req(src_port, *reps, bytes_per_rep, gpa, IOREQ_READ, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - - return X86EMUL_OKAY; + addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + return hvmemul_do_pio(src_port, *reps, bytes_per_rep, gpa, IOREQ_READ, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL); } static int hvmemul_rep_outs( @@ -328,8 +400,8 @@ static int hvmemul_rep_outs( { struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - struct vcpu *curr = current; unsigned long addr; + uint32_t pfec = PFEC_page_present; paddr_t gpa; int rc; @@ -339,20 +411,16 @@ static int hvmemul_rep_outs( if ( rc != X86EMUL_OKAY ) return rc; + if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) + pfec |= PFEC_user_mode; + rc = hvmemul_linear_to_phys( - addr, &gpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt); - if ( rc != X86EMUL_OKAY ) - return rc; - - if ( curr->arch.hvm_vcpu.io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - curr->arch.hvm_vcpu.io_in_progress = 1; - send_pio_req(dst_port, *reps, bytes_per_rep, - gpa, IOREQ_WRITE, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - - return X86EMUL_OKAY; + addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; + + return hvmemul_do_pio(dst_port, *reps, bytes_per_rep, gpa, IOREQ_WRITE, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL); } static int hvmemul_rep_movs( @@ -366,9 +434,9 @@ static int hvmemul_rep_movs( { struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - struct vcpu *curr = current; unsigned long saddr, daddr; paddr_t sgpa, dgpa; + uint32_t pfec = PFEC_page_present; p2m_type_t p2mt; int rc; @@ -384,39 +452,32 @@ static int hvmemul_rep_movs( if ( rc != X86EMUL_OKAY ) return rc; + if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) + pfec |= PFEC_user_mode; + rc = hvmemul_linear_to_phys( - saddr, &sgpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt); + saddr, &sgpa, bytes_per_rep, reps, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; rc = hvmemul_linear_to_phys( - daddr, &dgpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt); - if ( rc != X86EMUL_OKAY ) - return rc; - - if ( curr->arch.hvm_vcpu.io_in_progress ) - return X86EMUL_UNHANDLEABLE; + daddr, &dgpa, bytes_per_rep, reps, + pfec | PFEC_write_access, hvmemul_ctxt); + if ( rc != X86EMUL_OKAY ) + return rc; (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt); if ( !p2m_is_ram(p2mt) ) - { - curr->arch.hvm_vcpu.io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, sgpa, *reps, bytes_per_rep, - dgpa, IOREQ_READ, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - } - else - { - (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt); - if ( p2m_is_ram(p2mt) ) - return X86EMUL_UNHANDLEABLE; - curr->arch.hvm_vcpu.io_in_progress = 1; - send_mmio_req(IOREQ_TYPE_COPY, dgpa, *reps, bytes_per_rep, - sgpa, IOREQ_WRITE, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1); - } - - return X86EMUL_OKAY; + return hvmemul_do_mmio( + sgpa, *reps, bytes_per_rep, dgpa, IOREQ_READ, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL); + + (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt); + if ( p2m_is_ram(p2mt) ) + return X86EMUL_UNHANDLEABLE; + return hvmemul_do_mmio( + dgpa, *reps, bytes_per_rep, sgpa, IOREQ_WRITE, + !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL); } static int hvmemul_read_segment( @@ -452,24 +513,7 @@ static int hvmemul_read_io( unsigned long *val, struct x86_emulate_ctxt *ctxt) { - struct vcpu *curr = current; - - if ( curr->arch.hvm_vcpu.io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - if ( !curr->arch.hvm_vcpu.io_completed ) - { - curr->arch.hvm_vcpu.io_in_progress = 1; - send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0); - } - - if ( !curr->arch.hvm_vcpu.io_completed ) - return X86EMUL_RETRY; - - *val = curr->arch.hvm_vcpu.io_data; - curr->arch.hvm_vcpu.io_completed = 0; - - return X86EMUL_OKAY; + return hvmemul_do_pio(port, 1, bytes, 0, IOREQ_READ, 0, 0, val); } static int hvmemul_write_io( @@ -478,21 +522,7 @@ static int hvmemul_write_io( unsigned long val, struct x86_emulate_ctxt *ctxt) { - struct vcpu *curr = current; - - if ( port == 0xe9 ) - { - hvm_print_line(curr, val); - return X86EMUL_OKAY; - } - - if ( curr->arch.hvm_vcpu.io_in_progress ) - return X86EMUL_UNHANDLEABLE; - - curr->arch.hvm_vcpu.io_in_progress = 1; - send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0); - - return X86EMUL_OKAY; + return hvmemul_do_pio(port, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL); } static int hvmemul_read_cr( @@ -674,7 +704,7 @@ int hvm_emulate_one( { struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs; struct vcpu *curr = current; - uint32_t new_intr_shadow; + uint32_t new_intr_shadow, pfec = PFEC_page_present; unsigned long addr; int rc; @@ -690,6 +720,9 @@ int hvm_emulate_one( hvmemul_ctxt->ctxt.sp_size = hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16; } + + if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) + pfec |= PFEC_user_mode; hvmemul_ctxt->insn_buf_eip = regs->eip; hvmemul_ctxt->insn_buf_bytes = @@ -698,7 +731,8 @@ int hvm_emulate_one( regs->eip, sizeof(hvmemul_ctxt->insn_buf), hvm_access_insn_fetch, hvmemul_ctxt->ctxt.addr_size, &addr) && !hvm_fetch_from_guest_virt_nofault( - hvmemul_ctxt->insn_buf, addr, sizeof(hvmemul_ctxt->insn_buf))) + hvmemul_ctxt->insn_buf, addr, + sizeof(hvmemul_ctxt->insn_buf), pfec)) ? sizeof(hvmemul_ctxt->insn_buf) : 0; hvmemul_ctxt->exn_pending = 0; diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/hvm.c Tue Apr 01 11:29:03 2008 -0600 @@ -81,6 +81,58 @@ void hvm_enable(struct hvm_function_tabl printk("HVM: Hardware Assisted Paging detected.\n"); } +/* + * Need to re-inject a given event? We avoid re-injecting software exceptions + * and interrupts because the faulting/trapping instruction can simply be + * re-executed (neither VMX nor SVM update RIP when they VMEXIT during + * INT3/INTO/INTn). + */ +int hvm_event_needs_reinjection(uint8_t type, uint8_t vector) +{ + switch ( type ) + { + case X86_EVENTTYPE_EXT_INTR: + case X86_EVENTTYPE_NMI: + return 1; + case X86_EVENTTYPE_HW_EXCEPTION: + /* + * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly + * check for these vectors, as they are really SW Exceptions. SVM has + * not updated RIP to point after the trapping instruction (INT3/INTO). + */ + return (vector != 3) && (vector != 4); + default: + /* Software exceptions/interrupts can be re-executed (e.g., INT n). */ + break; + } + return 0; +} + +/* + * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1. + * This means we can assume that @vec2 is contributory or a page fault. + */ +uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2) +{ + /* Exception during double-fault delivery always causes a triple fault. */ + if ( vec1 == TRAP_double_fault ) + { + hvm_triple_fault(); + return TRAP_double_fault; /* dummy return */ + } + + /* Exception during page-fault delivery always causes a double fault. */ + if ( vec1 == TRAP_page_fault ) + return TRAP_double_fault; + + /* Discard the first exception if it's benign or if we now have a #PF. */ + if ( !((1u << vec1) & 0x7c01u) || (vec2 == TRAP_page_fault) ) + return vec2; + + /* Cannot combine the exceptions: double fault. */ + return TRAP_double_fault; +} + void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc) { u64 host_tsc; @@ -203,6 +255,30 @@ static int hvm_set_ioreq_page( return 0; } +static int hvm_print_line( + int dir, uint32_t port, uint32_t bytes, uint32_t *val) +{ + struct vcpu *curr = current; + struct hvm_domain *hd = &curr->domain->arch.hvm_domain; + char c = *val; + + BUG_ON(bytes != 1); + + spin_lock(&hd->pbuf_lock); + hd->pbuf[hd->pbuf_idx++] = c; + if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') ) + { + if ( c != '\n' ) + hd->pbuf[hd->pbuf_idx++] = '\n'; + hd->pbuf[hd->pbuf_idx] = '\0'; + printk(XENLOG_G_DEBUG "HVM%u: %s", curr->domain->domain_id, hd->pbuf); + hd->pbuf_idx = 0; + } + spin_unlock(&hd->pbuf_lock); + + return 1; +} + int hvm_domain_initialise(struct domain *d) { int rc; @@ -236,6 +312,8 @@ int hvm_domain_initialise(struct domain hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); + + register_portio_handler(d, 0xe9, 1, hvm_print_line); rc = hvm_funcs.domain_initialise(d); if ( rc != 0 ) @@ -1250,7 +1328,7 @@ void hvm_task_switch( goto out; } - if ( !tr.attr.fields.g && (tr.limit < (sizeof(tss)-1)) ) + if ( tr.limit < (sizeof(tss)-1) ) { hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0); goto out; @@ -1358,7 +1436,7 @@ void hvm_task_switch( if ( hvm_virtual_to_linear_addr(x86_seg_ss, ®, regs->esp, 4, hvm_access_write, 32, &linear_addr) ) - hvm_copy_to_guest_virt_nofault(linear_addr, &errcode, 4); + hvm_copy_to_guest_virt_nofault(linear_addr, &errcode, 4, 0); } out: @@ -1366,60 +1444,31 @@ void hvm_task_switch( hvm_unmap(nptss_desc); } -/* - * __hvm_copy(): - * @buf = hypervisor buffer - * @addr = guest address to copy to/from - * @size = number of bytes to copy - * @dir = copy *to* guest (TRUE) or *from* guest (FALSE)? - * @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)? - * @fetch = copy is an instruction fetch? - * Returns number of bytes failed to copy (0 == complete success). - */ +#define HVMCOPY_from_guest (0u<<0) +#define HVMCOPY_to_guest (1u<<0) +#define HVMCOPY_no_fault (0u<<1) +#define HVMCOPY_fault (1u<<1) +#define HVMCOPY_phys (0u<<2) +#define HVMCOPY_virt (1u<<2) static enum hvm_copy_result __hvm_copy( - void *buf, paddr_t addr, int size, int dir, int virt, int fetch) + void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec) { struct vcpu *curr = current; unsigned long gfn, mfn; p2m_type_t p2mt; char *p; - int count, todo; - uint32_t pfec = PFEC_page_present; - - /* - * We cannot use hvm_get_segment_register() while executing in - * vmx_realmode() as segment register state is cached. Furthermore, - * VMREADs on every data access hurts emulation performance. - * Hence we do not gather extra PFEC flags if CR0.PG == 0. - */ - if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) ) - virt = 0; - - if ( virt ) - { - struct segment_register sreg; - hvm_get_segment_register(curr, x86_seg_ss, &sreg); - if ( sreg.attr.fields.dpl == 3 ) - pfec |= PFEC_user_mode; - - if ( dir ) - pfec |= PFEC_write_access; - - if ( fetch ) - pfec |= PFEC_insn_fetch; - } - - todo = size; + int count, todo = size; + while ( todo > 0 ) { count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); - if ( virt ) + if ( flags & HVMCOPY_virt ) { gfn = paging_gva_to_gfn(curr, addr, &pfec); if ( gfn == INVALID_GFN ) { - if ( virt == 2 ) /* 2 means generate a fault */ + if ( flags & HVMCOPY_fault ) hvm_inject_exception(TRAP_page_fault, pfec, addr); return HVMCOPY_bad_gva_to_gfn; } @@ -1437,16 +1486,18 @@ static enum hvm_copy_result __hvm_copy( p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK); - if ( dir ) + if ( flags & HVMCOPY_to_guest ) { - memcpy(p, buf, count); /* dir == TRUE: *to* guest */ + memcpy(p, buf, count); paging_mark_dirty(curr->domain, mfn); } else - memcpy(buf, p, count); /* dir == FALSE: *from guest */ + { + memcpy(buf, p, count); + } unmap_domain_page(p); - + addr += count; buf += count; todo -= count; @@ -1458,56 +1509,73 @@ enum hvm_copy_result hvm_copy_to_guest_p enum hvm_copy_result hvm_copy_to_guest_phys( paddr_t paddr, void *buf, int size) { - return __hvm_copy(buf, paddr, size, 1, 0, 0); + return __hvm_copy(buf, paddr, size, + HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_phys, + 0); } enum hvm_copy_result hvm_copy_from_guest_phys( void *buf, paddr_t paddr, int size) { - return __hvm_copy(buf, paddr, size, 0, 0, 0); + return __hvm_copy(buf, paddr, size, + HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_phys, + 0); } enum hvm_copy_result hvm_copy_to_guest_virt( - unsigned long vaddr, void *buf, int size) -{ - return __hvm_copy(buf, vaddr, size, 1, 2, 0); + unsigned long vaddr, void *buf, int size, uint32_t pfec) +{ + return __hvm_copy(buf, vaddr, size, + HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_virt, + PFEC_page_present | PFEC_write_access | pfec); } enum hvm_copy_result hvm_copy_from_guest_virt( - void *buf, unsigned long vaddr, int size) -{ - return __hvm_copy(buf, vaddr, size, 0, 2, 0); + void *buf, unsigned long vaddr, int size, uint32_t pfec) +{ + return __hvm_copy(buf, vaddr, size, + HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt, + PFEC_page_present | pfec); } enum hvm_copy_result hvm_fetch_from_guest_virt( - void *buf, unsigned long vaddr, int size) -{ - return __hvm_copy(buf, vaddr, size, 0, 2, hvm_nx_enabled(current)); + void *buf, unsigned long vaddr, int size, uint32_t pfec) +{ + if ( hvm_nx_enabled(current) ) + pfec |= PFEC_insn_fetch; + return __hvm_copy(buf, vaddr, size, + HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt, + PFEC_page_present | pfec); } enum hvm_copy_result hvm_copy_to_guest_virt_nofault( - unsigned long vaddr, void *buf, int size) -{ - return __hvm_copy(buf, vaddr, size, 1, 1, 0); + unsigned long vaddr, void *buf, int size, uint32_t pfec) +{ + return __hvm_copy(buf, vaddr, size, + HVMCOPY_to_guest | HVMCOPY_no_fault | HVMCOPY_virt, + PFEC_page_present | PFEC_write_access | pfec); } enum hvm_copy_result hvm_copy_from_guest_virt_nofault( - void *buf, unsigned long vaddr, int size) -{ - return __hvm_copy(buf, vaddr, size, 0, 1, 0); + void *buf, unsigned long vaddr, int size, uint32_t pfec) +{ + return __hvm_copy(buf, vaddr, size, + HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt, + PFEC_page_present | pfec); } enum hvm_copy_result hvm_fetch_from_guest_virt_nofault( - void *buf, unsigned long vaddr, int size) -{ - return __hvm_copy(buf, vaddr, size, 0, 1, hvm_nx_enabled(current)); + void *buf, unsigned long vaddr, int size, uint32_t pfec) +{ + if ( hvm_nx_enabled(current) ) + pfec |= PFEC_insn_fetch; + return __hvm_copy(buf, vaddr, size, + HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt, + PFEC_page_present | pfec); } DEFINE_PER_CPU(int, guest_handles_in_xen_space); -/* Note that copy_{to,from}_user_hvm require the PTE to be writable even - when they're only trying to read from it. The guest is expected to - deal with this. */ unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len) { int rc; @@ -1518,7 +1586,8 @@ unsigned long copy_to_user_hvm(void *to, return 0; } - rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from, len); + rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from, + len, 0); return rc ? len : 0; /* fake a copy_to_user() return code */ } @@ -1532,26 +1601,8 @@ unsigned long copy_from_user_hvm(void *t return 0; } - rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len); + rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0); return rc ? len : 0; /* fake a copy_from_user() return code */ -} - -/* HVM specific printbuf. Mostly used for hvmloader chit-chat. */ -void hvm_print_line(struct vcpu *v, const char c) -{ - struct hvm_domain *hd = &v->domain->arch.hvm_domain; - - spin_lock(&hd->pbuf_lock); - hd->pbuf[hd->pbuf_idx++] = c; - if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') ) - { - if ( c != '\n' ) - hd->pbuf[hd->pbuf_idx++] = '\n'; - hd->pbuf[hd->pbuf_idx] = '\0'; - printk(XENLOG_G_DEBUG "HVM%u: %s", v->domain->domain_id, hd->pbuf); - hd->pbuf_idx = 0; - } - spin_unlock(&hd->pbuf_lock); } #define bitmaskof(idx) (1U << ((idx) & 31)) @@ -1655,7 +1706,7 @@ static long hvm_grant_table_op( static long hvm_grant_table_op( unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count) { - if ( cmd != GNTTABOP_query_size ) + if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) ) return -ENOSYS; /* all other commands need auditing */ return do_grant_table_op(cmd, uop, count); } @@ -2109,12 +2160,15 @@ long do_hvm_op(unsigned long op, XEN_GUE return -EINVAL; if ( a.domid == DOMID_SELF ) + { d = rcu_lock_current_domain(); - else { - d = rcu_lock_domain_by_id(a.domid); - if ( d == NULL ) + } + else + { + if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL ) return -ESRCH; - if ( !IS_PRIV_FOR(current->domain, d) ) { + if ( !IS_PRIV_FOR(current->domain, d) ) + { rc = -EPERM; goto param_fail; } diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/io.c Tue Apr 01 11:29:03 2008 -0600 @@ -123,73 +123,6 @@ int hvm_buffered_io_send(ioreq_t *p) return 1; } -void send_pio_req(unsigned long port, unsigned long count, int size, - paddr_t value, int dir, int df, int value_is_ptr) -{ - struct vcpu *v = current; - vcpu_iodata_t *vio = get_ioreq(v); - ioreq_t *p = &vio->vp_ioreq; - - if ( p->state != STATE_IOREQ_NONE ) - gdprintk(XENLOG_WARNING, - "WARNING: send pio with something already pending (%d)?\n", - p->state); - - p->dir = dir; - p->data_is_ptr = value_is_ptr; - p->type = IOREQ_TYPE_PIO; - p->size = size; - p->addr = port; - p->count = count; - p->df = df; - p->data = value; - p->io_count++; - - if ( hvm_portio_intercept(p) ) - { - p->state = STATE_IORESP_READY; - hvm_io_assist(); - } - else - { - hvm_send_assist_req(v); - } -} - -void send_mmio_req(unsigned char type, paddr_t gpa, - unsigned long count, int size, paddr_t value, - int dir, int df, int value_is_ptr) -{ - struct vcpu *v = current; - vcpu_iodata_t *vio = get_ioreq(v); - ioreq_t *p = &vio->vp_ioreq; - - if ( p->state != STATE_IOREQ_NONE ) - gdprintk(XENLOG_WARNING, - "WARNING: send mmio with something already pending (%d)?\n", - p->state); - - p->dir = dir; - p->data_is_ptr = value_is_ptr; - p->type = type; - p->size = size; - p->addr = gpa; - p->count = count; - p->df = df; - p->data = value; - p->io_count++; - - if ( hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p) ) - { - p->state = STATE_IORESP_READY; - hvm_io_assist(); - } - else - { - hvm_send_assist_req(v); - } -} - void send_timeoffset_req(unsigned long timeoff) { ioreq_t p[1]; @@ -248,6 +181,11 @@ int handle_mmio(void) hvm_emulate_prepare(&ctxt, guest_cpu_user_regs()); rc = hvm_emulate_one(&ctxt); + + if ( curr->arch.hvm_vcpu.io_state == HVMIO_awaiting_completion ) + curr->arch.hvm_vcpu.io_state = HVMIO_handle_mmio_awaiting_completion; + else + curr->arch.hvm_vcpu.mmio_gva = 0; switch ( rc ) { @@ -271,41 +209,46 @@ int handle_mmio(void) hvm_emulate_writeback(&ctxt); - curr->arch.hvm_vcpu.mmio_in_progress = curr->arch.hvm_vcpu.io_in_progress; - return 1; } +int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn) +{ + current->arch.hvm_vcpu.mmio_gva = gva & PAGE_MASK; + current->arch.hvm_vcpu.mmio_gpfn = gpfn; + return handle_mmio(); +} + void hvm_io_assist(void) { - struct vcpu *v = current; - ioreq_t *p = &get_ioreq(v)->vp_ioreq; + struct vcpu *curr = current; + ioreq_t *p = &get_ioreq(curr)->vp_ioreq; + enum hvm_io_state io_state; if ( p->state != STATE_IORESP_READY ) { gdprintk(XENLOG_ERR, "Unexpected HVM iorequest state %d.\n", p->state); - domain_crash(v->domain); - goto out; + domain_crash_synchronous(); } rmb(); /* see IORESP_READY /then/ read contents of ioreq */ p->state = STATE_IOREQ_NONE; - if ( v->arch.hvm_vcpu.io_in_progress ) - { - v->arch.hvm_vcpu.io_in_progress = 0; - if ( (p->dir == IOREQ_READ) && !p->data_is_ptr ) - { - v->arch.hvm_vcpu.io_completed = 1; - v->arch.hvm_vcpu.io_data = p->data; - if ( v->arch.hvm_vcpu.mmio_in_progress ) - (void)handle_mmio(); - } - } - - out: - vcpu_end_shutdown_deferral(v); + io_state = curr->arch.hvm_vcpu.io_state; + curr->arch.hvm_vcpu.io_state = HVMIO_none; + + if ( (io_state == HVMIO_awaiting_completion) || + (io_state == HVMIO_handle_mmio_awaiting_completion) ) + { + curr->arch.hvm_vcpu.io_state = HVMIO_completed; + curr->arch.hvm_vcpu.io_data = p->data; + if ( io_state == HVMIO_handle_mmio_awaiting_completion ) + (void)handle_mmio(); + } + + if ( p->state == STATE_IOREQ_NONE ) + vcpu_end_shutdown_deferral(curr); } void dpci_ioport_read(uint32_t mport, ioreq_t *p) diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/svm/emulate.c --- a/xen/arch/x86/hvm/svm/emulate.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/svm/emulate.c Tue Apr 01 11:29:03 2008 -0600 @@ -32,9 +32,11 @@ static int inst_copy_from_guest( static int inst_copy_from_guest( unsigned char *buf, unsigned long guest_eip, int inst_len) { + struct vmcb_struct *vmcb = current->arch.hvm_svm.vmcb; + uint32_t pfec = (vmcb->cpl == 3) ? PFEC_user_mode : 0; if ( (inst_len > MAX_INST_LEN) || (inst_len <= 0) ) return 0; - if ( hvm_fetch_from_guest_virt_nofault(buf, guest_eip, inst_len) ) + if ( hvm_fetch_from_guest_virt_nofault(buf, guest_eip, inst_len, pfec) ) return 0; return inst_len; } diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Apr 01 11:29:03 2008 -0600 @@ -725,7 +725,15 @@ static void svm_inject_exception( { struct vcpu *curr = current; struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; - eventinj_t event; + eventinj_t event = vmcb->eventinj; + + if ( unlikely(event.fields.v) && + (event.fields.type == X86_EVENTTYPE_HW_EXCEPTION) ) + { + trapnr = hvm_combine_hw_exceptions(event.fields.vector, trapnr); + if ( trapnr == TRAP_double_fault ) + errcode = 0; + } event.bytes = 0; event.fields.v = 1; diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/vmx/realmode.c --- a/xen/arch/x86/hvm/vmx/realmode.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/realmode.c Tue Apr 01 11:29:03 2008 -0600 @@ -190,7 +190,7 @@ void vmx_realmode(struct cpu_user_regs * hvm_emulate_prepare(&hvmemul_ctxt, regs); - if ( curr->arch.hvm_vcpu.io_completed ) + if ( curr->arch.hvm_vcpu.io_state == HVMIO_completed ) realmode_emulate_one(&hvmemul_ctxt); /* Only deliver interrupts into emulated real mode. */ @@ -203,7 +203,7 @@ void vmx_realmode(struct cpu_user_regs * while ( curr->arch.hvm_vmx.vmxemul && !softirq_pending(smp_processor_id()) && - !curr->arch.hvm_vcpu.io_in_progress ) + (curr->arch.hvm_vcpu.io_state == HVMIO_none) ) { /* * Check for pending interrupts only every 16 instructions, because diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 01 11:29:03 2008 -0600 @@ -983,6 +983,62 @@ static void vmx_flush_guest_tlbs(void) * because VMRESUME will flush it for us. */ } + + +static void __vmx_inject_exception( + struct vcpu *v, int trap, int type, int error_code) +{ + unsigned long intr_fields; + + /* + * NB. Callers do not need to worry about clearing STI/MOV-SS blocking: + * "If the VM entry is injecting, there is no blocking by STI or by + * MOV SS following the VM entry, regardless of the contents of the + * interruptibility-state field [in the guest-state area before the + * VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State). + */ + + intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap); + if ( error_code != HVM_DELIVER_NO_ERROR_CODE ) { + __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + intr_fields |= INTR_INFO_DELIVER_CODE_MASK; + } + + __vmwrite(VM_ENTRY_INTR_INFO, intr_fields); + + if ( trap == TRAP_page_fault ) + HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code); + else + HVMTRACE_2D(INJ_EXC, v, trap, error_code); +} + +void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code) +{ + unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO); + + if ( unlikely(intr_info & INTR_INFO_VALID_MASK) && + (((intr_info >> 8) & 7) == X86_EVENTTYPE_HW_EXCEPTION) ) + { + trap = hvm_combine_hw_exceptions((uint8_t)intr_info, trap); + if ( trap == TRAP_double_fault ) + error_code = 0; + } + + __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code); +} + +void vmx_inject_extint(struct vcpu *v, int trap) +{ + __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR, + HVM_DELIVER_NO_ERROR_CODE); +} + +void vmx_inject_nmi(struct vcpu *v) +{ + __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI, + HVM_DELIVER_NO_ERROR_CODE); +} + static void vmx_inject_exception( unsigned int trapnr, int errcode, unsigned long cr2) { @@ -1184,23 +1240,6 @@ static void vmx_do_cpuid(struct cpu_user regs->edx = edx; } -#define CASE_GET_REG_P(REG, reg) \ - case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break - -#ifdef __i386__ -#define CASE_EXTEND_GET_REG_P -#else -#define CASE_EXTEND_GET_REG_P \ - CASE_GET_REG_P(R8, r8); \ - CASE_GET_REG_P(R9, r9); \ - CASE_GET_REG_P(R10, r10); \ - CASE_GET_REG_P(R11, r11); \ - CASE_GET_REG_P(R12, r12); \ - CASE_GET_REG_P(R13, r13); \ - CASE_GET_REG_P(R14, r14); \ - CASE_GET_REG_P(R15, r15) -#endif - static void vmx_dr_access(unsigned long exit_qualification, struct cpu_user_regs *regs) { @@ -1224,9 +1263,9 @@ static void vmx_invlpg_intercept(unsigne } #define CASE_SET_REG(REG, reg) \ - case REG_ ## REG: regs->reg = value; break + case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: regs->reg = value; break #define CASE_GET_REG(REG, reg) \ - case REG_ ## REG: value = regs->reg; break + case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: value = regs->reg; break #define CASE_EXTEND_SET_REG \ CASE_EXTEND_REG(S) @@ -1352,26 +1391,25 @@ static int vmx_cr_access(unsigned long e unsigned long value; struct vcpu *v = current; - switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE ) - { - case TYPE_MOV_TO_CR: - gp = exit_qualification & CONTROL_REG_ACCESS_REG; - cr = exit_qualification & CONTROL_REG_ACCESS_NUM; + switch ( exit_qualification & VMX_CONTROL_REG_ACCESS_TYPE ) + { + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR: + gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR; + cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM; return mov_to_cr(gp, cr, regs); - case TYPE_MOV_FROM_CR: - gp = exit_qualification & CONTROL_REG_ACCESS_REG; - cr = exit_qualification & CONTROL_REG_ACCESS_NUM; + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR: + gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR; + cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM; mov_from_cr(cr, gp, regs); break; - case TYPE_CLTS: + case VMX_CONTROL_REG_ACCESS_TYPE_CLTS: v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; vmx_update_guest_cr(v, 0); HVMTRACE_0D(CLTS, current); break; - case TYPE_LMSW: + case VMX_CONTROL_REG_ACCESS_TYPE_LMSW: value = v->arch.hvm_vcpu.guest_cr[0]; - value = (value & ~0xF) | - (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF); + value = (value & ~0xFFFF) | ((exit_qualification >> 16) & 0xFFFF); HVMTRACE_1D(LMSW, current, value); return !hvm_set_cr0(value); default: diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Tue Apr 01 11:29:03 2008 -0600 @@ -60,6 +60,7 @@ ALIGN ENTRY(vmx_asm_vmexit_handler) HVM_SAVE_ALL_NOSEGREGS + GET_CURRENT(%ebx) movl $GUEST_RIP,%eax VMREAD(UREGS_eip) @@ -67,6 +68,9 @@ ENTRY(vmx_asm_vmexit_handler) VMREAD(UREGS_esp) movl $GUEST_RFLAGS,%eax VMREAD(UREGS_eflags) + + movl %cr2,%eax + movl %eax,VCPU_hvm_guest_cr2(%ebx) #ifndef NDEBUG movw $0xbeef,%ax diff -r daf16171a05f -r feee6422144f xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Tue Apr 01 11:29:03 2008 -0600 @@ -76,6 +76,7 @@ ALIGN ENTRY(vmx_asm_vmexit_handler) HVM_SAVE_ALL_NOSEGREGS + GET_CURRENT(%rbx) leaq UREGS_rip(%rsp),%rdi movl $GUEST_RIP,%eax @@ -85,6 +86,9 @@ ENTRY(vmx_asm_vmexit_handler) VMREAD(UREGS_rsp) movl $GUEST_RFLAGS,%eax VMREAD(UREGS_eflags) + + movq %cr2,%rax + movq %rax,VCPU_hvm_guest_cr2(%rbx) #ifndef NDEBUG movw $0xbeef,%ax diff -r daf16171a05f -r feee6422144f xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/mm.c Tue Apr 01 11:29:03 2008 -0600 @@ -2114,14 +2114,14 @@ static int set_foreigndom(domid_t domid) info->foreign = rcu_lock_domain(dom_xen); break; default: - e = rcu_lock_domain_by_id(domid); - if ( e == NULL ) + if ( (e = rcu_lock_domain_by_id(domid)) == NULL ) { MEM_LOG("Unknown domain '%u'", domid); okay = 0; break; } - if (!IS_PRIV_FOR(d, e)) { + if ( !IS_PRIV_FOR(d, e) ) + { MEM_LOG("Cannot set foreign dom"); okay = 0; rcu_unlock_domain(e); @@ -3259,12 +3259,15 @@ long arch_memory_op(int op, XEN_GUEST_HA return -EFAULT; if ( xatp.domid == DOMID_SELF ) + { d = rcu_lock_current_domain(); - else { - d = rcu_lock_domain_by_id(xatp.domid); - if ( d == NULL ) + } + else + { + if ( (d = rcu_lock_domain_by_id(xatp.domid)) == NULL ) return -ESRCH; - if ( !IS_PRIV_FOR(current->domain, d) ) { + if ( !IS_PRIV_FOR(current->domain, d) ) + { rcu_unlock_domain(d); return -EPERM; } @@ -3355,12 +3358,15 @@ long arch_memory_op(int op, XEN_GUEST_HA return -EINVAL; if ( fmap.domid == DOMID_SELF ) + { d = rcu_lock_current_domain(); - else { - d = rcu_lock_domain_by_id(fmap.domid); - if ( d == NULL ) + } + else + { + if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL ) return -ESRCH; - if ( !IS_PRIV_FOR(current->domain, d) ) { + if ( !IS_PRIV_FOR(current->domain, d) ) + { rcu_unlock_domain(d); return -EPERM; } diff -r daf16171a05f -r feee6422144f xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/mm/shadow/common.c Tue Apr 01 11:29:03 2008 -0600 @@ -152,9 +152,9 @@ hvm_read(enum x86_segment seg, *val = 0; if ( access_type == hvm_access_insn_fetch ) - rc = hvm_fetch_from_guest_virt(val, addr, bytes); + rc = hvm_fetch_from_guest_virt(val, addr, bytes, 0); else - rc = hvm_copy_from_guest_virt(val, addr, bytes); + rc = hvm_copy_from_guest_virt(val, addr, bytes, 0); switch ( rc ) { @@ -416,7 +416,7 @@ struct x86_emulate_ops *shadow_init_emul x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf), hvm_access_insn_fetch, sh_ctxt, &addr) && !hvm_fetch_from_guest_virt_nofault( - sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf))) + sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf), 0)) ? sizeof(sh_ctxt->insn_buf) : 0; return &hvm_shadow_emulator_ops; @@ -444,7 +444,7 @@ void shadow_continue_emulation(struct sh x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf), hvm_access_insn_fetch, sh_ctxt, &addr) && !hvm_fetch_from_guest_virt_nofault( - sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf))) + sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf), 0)) ? sizeof(sh_ctxt->insn_buf) : 0; sh_ctxt->insn_buf_eip = regs->eip; } diff -r daf16171a05f -r feee6422144f xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/mm/shadow/multi.c Tue Apr 01 11:29:03 2008 -0600 @@ -2881,7 +2881,8 @@ static int sh_page_fault(struct vcpu *v, perfc_incr(shadow_fault_fast_mmio); SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa); reset_early_unshadow(v); - return handle_mmio() ? EXCRET_fault_fixed : 0; + return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) + ? EXCRET_fault_fixed : 0); } else { @@ -3199,7 +3200,8 @@ static int sh_page_fault(struct vcpu *v, shadow_audit_tables(v); reset_early_unshadow(v); shadow_unlock(d); - return handle_mmio() ? EXCRET_fault_fixed : 0; + return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) + ? EXCRET_fault_fixed : 0); not_a_shadow_fault: sh_audit_gw(v, &gw); diff -r daf16171a05f -r feee6422144f xen/arch/x86/sysctl.c --- a/xen/arch/x86/sysctl.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/sysctl.c Tue Apr 01 11:29:03 2008 -0600 @@ -47,18 +47,22 @@ long arch_do_sysctl( if ( ret ) break; + memset(pi, 0, sizeof(*pi)); pi->threads_per_core = cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket = cpus_weight(cpu_core_map[0]) / pi->threads_per_core; pi->nr_cpus = (u32)num_online_cpus(); pi->nr_nodes = num_online_nodes(); - pi->total_pages = total_pages; - pi->free_pages = avail_domheap_pages(); - pi->scrub_pages = avail_scrub_pages(); - pi->cpu_khz = cpu_khz; - memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); + pi->total_pages = total_pages; + pi->free_pages = avail_domheap_pages(); + pi->scrub_pages = avail_scrub_pages(); + pi->cpu_khz = cpu_khz; memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4); + if ( hvm_enabled ) + pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm; + if ( iommu_enabled ) + pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm_directio; max_array_ent = pi->max_cpu_id; pi->max_cpu_id = last_cpu(cpu_online_map); diff -r daf16171a05f -r feee6422144f xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Tue Apr 01 10:30:57 2008 -0600 +++ b/xen/arch/x86/x86_emulate.c Tue Apr 01 11:29:03 2008 -0600 @@ -1,484 +1,18 @@ /****************************************************************************** * x86_emulate.c * - * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. + * Wrapper for generic x86 instruction decoder and emulator. * - * Copyright (c) 2005-2007 Keir Fraser - * Copyright (c) 2005-2007 XenSource Inc. + * Copyright (c) 2008, Citrix Systems, Inc. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Authors: + * Keir Fraser <keir.fraser@xxxxxxxxxx> */ -#ifndef __XEN__ -#include <stddef.h> -#include <stdint.h> -#include <string.h> -#include <public/xen.h> -#else -#include <xen/config.h> -#include <xen/types.h> -#include <xen/lib.h> -#include <asm/regs.h> +#include <asm/x86_emulate.h> + #undef cmpxchg -#endif -#include <asm-x86/x86_emulate.h> -/* Operand sizes: 8-bit operands or specified/overridden size. */ -#define ByteOp (1<<0) /* 8-bit operands. */ -/* Destination operand type. */ -#define DstBitBase (0<<1) /* Memory operand, bit string. */ -#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ -#define DstReg (2<<1) /* Register operand. */ -#define DstMem (3<<1) /* Memory operand. */ -#define DstMask (3<<1) -/* Source operand type. */ -#define SrcNone (0<<3) /* No source operand. */ -#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ -#define SrcReg (1<<3) /* Register operand. */ -#define SrcMem (2<<3) /* Memory operand. */ -#define SrcMem16 (3<<3) /* Memory operand (16-bit). */ -#define SrcImm (4<<3) /* Immediate operand. */ -#define SrcImmByte (5<<3) /* 8-bit sign-extended immediate operand. */ -#define SrcMask (7<<3) -/* Generic ModRM decode. */ -#define ModRM (1<<6) -/* Destination is only written; never read. */ -#define Mov (1<<7) - -static uint8_t opcode_table[256] = { - /* 0x00 - 0x07 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps, - /* 0x08 - 0x0F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, 0, - /* 0x10 - 0x17 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps, - /* 0x18 - 0x1F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps, - /* 0x20 - 0x27 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, - /* 0x28 - 0x2F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, - /* 0x30 - 0x37 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, - /* 0x38 - 0x3F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, - /* 0x40 - 0x4F */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x50 - 0x5F */ - ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, - ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, - ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, - ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, - /* 0x60 - 0x67 */ - ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcMem16|ModRM|Mov, - 0, 0, 0, 0, - /* 0x68 - 0x6F */ - ImplicitOps|Mov, DstReg|SrcImm|ModRM|Mov, - ImplicitOps|Mov, DstReg|SrcImmByte|ModRM|Mov, - ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, - /* 0x70 - 0x77 */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x78 - 0x7F */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x80 - 0x87 */ - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - /* 0x88 - 0x8F */ - ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov, - ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM, - DstReg|SrcMem|ModRM|Mov, DstMem|SrcNone|ModRM|Mov, - /* 0x90 - 0x97 */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x98 - 0x9F */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0xA0 - 0xA7 */ - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps, ImplicitOps, - /* 0xA8 - 0xAF */ - ByteOp|DstReg|SrcImm, DstReg|SrcImm, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps, ImplicitOps, - /* 0xB0 - 0xB7 */ - ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, - ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, - ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, - ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, - /* 0xB8 - 0xBF */ - DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, - DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, - /* 0xC0 - 0xC7 */ - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, - ImplicitOps, ImplicitOps, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov, - /* 0xC8 - 0xCF */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0xD0 - 0xD7 */ - ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0xD8 - 0xDF */ - 0, ImplicitOps|ModRM|Mov, 0, ImplicitOps|ModRM|Mov, - 0, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov, - /* 0xE0 - 0xE7 */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0xE8 - 0xEF */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0xF0 - 0xF7 */ - 0, ImplicitOps, 0, 0, - ImplicitOps, ImplicitOps, - ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, - /* 0xF8 - 0xFF */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM -}; - -static uint8_t twobyte_table[256] = { - /* 0x00 - 0x07 */ - 0, ImplicitOps|ModRM, 0, 0, 0, 0, ImplicitOps, 0, - /* 0x08 - 0x0F */ - ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps|ModRM, 0, 0, - /* 0x10 - 0x17 */ - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x18 - 0x1F */ - ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, - ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, - /* 0x20 - 0x27 */ - ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, - 0, 0, 0, 0, - /* 0x28 - 0x2F */ - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x30 - 0x37 */ - ImplicitOps, ImplicitOps, ImplicitOps, 0, 0, 0, 0, 0, - /* 0x38 - 0x3F */ - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x40 - 0x47 */ - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - /* 0x48 - 0x4F */ - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - /* 0x50 - 0x5F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x60 - 0x6F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x80 - 0x87 */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x88 - 0x8F */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x90 - 0x97 */ - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - /* 0x98 - 0x9F */ - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, - /* 0xA0 - 0xA7 */ - ImplicitOps, ImplicitOps, ImplicitOps, DstBitBase|SrcReg|ModRM, - DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0, - /* 0xA8 - 0xAF */ - ImplicitOps, ImplicitOps, 0, DstBitBase|SrcReg|ModRM, - DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstReg|SrcMem|ModRM, - /* 0xB0 - 0xB7 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - DstReg|SrcMem|ModRM|Mov, DstBitBase|SrcReg|ModRM, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, - /* 0xB8 - 0xBF */ - 0, 0, DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM, - DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, - /* 0xC0 - 0xC7 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0, - 0, 0, 0, ImplicitOps|ModRM, - /* 0xC8 - 0xCF */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0xD0 - 0xDF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xF0 - 0xFF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* Type, address-of, and value of an instruction's operand. */ -struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; - unsigned int bytes; - unsigned long val, orig_val; - union { - /* OP_REG: Pointer to register field. */ - unsigned long *reg; - /* OP_MEM: Segment and offset. */ - struct { - enum x86_segment seg; - unsigned long off; - } mem; - }; -}; - -/* MSRs. */ -#define MSR_TSC 0x10 - -/* Control register flags. */ -#define CR0_PE (1<<0) -#define CR4_TSD (1<<2) - -/* EFLAGS bit definitions. */ -#define EFLG_VIP (1<<20) -#define EFLG_VIF (1<<19) -#define EFLG_AC (1<<18) -#define EFLG_VM (1<<17) -#define EFLG_RF (1<<16) -#define EFLG_NT (1<<14) -#define EFLG_IOPL (3<<12) -#define EFLG_OF (1<<11) -#define EFLG_DF (1<<10) -#define EFLG_IF (1<<9) -#define EFLG_TF (1<<8) -#define EFLG_SF (1<<7) -#define EFLG_ZF (1<<6) -#define EFLG_AF (1<<4) -#define EFLG_PF (1<<2) -#define EFLG_CF (1<<0) - -/* Exception definitions. */ -#define EXC_DE 0 -#define EXC_DB 1 -#define EXC_BP 3 -#define EXC_OF 4 -#define EXC_BR 5 -#define EXC_UD 6 -#define EXC_TS 10 -#define EXC_NP 11 -#define EXC_SS 12 -#define EXC_GP 13 -#define EXC_PF 14 -#define EXC_MF 16 - -/* - * Instruction emulation: - * Most instructions are emulated directly via a fragment of inline assembly - * code. This allows us to save/restore EFLAGS and thus very easily pick up - * any modified flags. - */ - -#if defined(__x86_64__) -#define _LO32 "k" /* force 32-bit operand */ -#define _STK "%%rsp" /* stack pointer */ -#define _BYTES_PER_LONG "8" -#elif defined(__i386__) -#define _LO32 "" /* force 32-bit operand */ -#define _STK "%%esp" /* stack pointer */ -#define _BYTES_PER_LONG "4" -#endif - -/* - * These EFLAGS bits are restored from saved value during emulation, and - * any changes are written back to the saved value after emulation. - */ -#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) - -/* Before executing instruction: restore necessary bits in EFLAGS. */ -#define _PRE_EFLAGS(_sav, _msk, _tmp) \ -/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \ -"movl %"_sav",%"_LO32 _tmp"; " \ -"push %"_tmp"; " \ -"push %"_tmp"; " \ -"movl %"_msk",%"_LO32 _tmp"; " \ -"andl %"_LO32 _tmp",("_STK"); " \ -"pushf; " \ -"notl %"_LO32 _tmp"; " \ -"andl %"_LO32 _tmp",("_STK"); " \ -"andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); " \ -"pop %"_tmp"; " \ -"orl %"_LO32 _tmp",("_STK"); " \ -"popf; " \ -"pop %"_sav"; " - -/* After executing instruction: write-back necessary bits in EFLAGS. */ -#define _POST_EFLAGS(_sav, _msk, _tmp) \ -/* _sav |= EFLAGS & _msk; */ \ -"pushf; " \ -"pop %"_tmp"; " \ -"andl %"_msk",%"_LO32 _tmp"; " \ -"orl %"_LO32 _tmp",%"_sav"; " - -/* Raw emulation: instruction has two explicit operands. */ -#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy)\ -do{ unsigned long _tmp; \ - switch ( (_dst).bytes ) \ - { \ - case 2: \ - asm volatile ( \ - _PRE_EFLAGS("0","4","2") \ - _op"w %"_wx"3,%1; " \ - _POST_EFLAGS("0","4","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _wy ((_src).val), "i" (EFLAGS_MASK), \ - "m" (_eflags), "m" ((_dst).val) ); \ - break; \ - case 4: \ - asm volatile ( \ - _PRE_EFLAGS("0","4","2") \ - _op"l %"_lx"3,%1; " \ - _POST_EFLAGS("0","4","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _ly ((_src).val), "i" (EFLAGS_MASK), \ - "m" (_eflags), "m" ((_dst).val) ); \ - break; \ - case 8: \ - __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy); \ - break; \ - } \ -} while (0) -#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\ -do{ unsigned long _tmp; \ - switch ( (_dst).bytes ) \ - { \ - case 1: \ - asm volatile ( \ - _PRE_EFLAGS("0","4","2") \ - _op"b %"_bx"3,%1; " \ - _POST_EFLAGS("0","4","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _by ((_src).val), "i" (EFLAGS_MASK), \ - "m" (_eflags), "m" ((_dst).val) ); \ - break; \ - default: \ - __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy);\ - break; \ - } \ -} while (0) -/* Source operand is byte-sized and may be restricted to just %cl. */ -#define emulate_2op_SrcB(_op, _src, _dst, _eflags) \ - __emulate_2op(_op, _src, _dst, _eflags, \ - "b", "c", "b", "c", "b", "c", "b", "c") -/* Source operand is byte, word, long or quad sized. */ -#define emulate_2op_SrcV(_op, _src, _dst, _eflags) \ - __emulate_2op(_op, _src, _dst, _eflags, \ - "b", "q", "w", "r", _LO32, "r", "", "r") -/* Source operand is word, long or quad sized. */ -#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \ - __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ - "w", "r", _LO32, "r", "", "r") - -/* Instruction has only one explicit operand (no source operand). */ -#define emulate_1op(_op,_dst,_eflags) \ -do{ unsigned long _tmp; \ - switch ( (_dst).bytes ) \ - { \ - case 1: \ - asm volatile ( \ - _PRE_EFLAGS("0","3","2") \ - _op"b %1; " \ - _POST_EFLAGS("0","3","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ - break; \ - case 2: \ - asm volatile ( \ - _PRE_EFLAGS("0","3","2") \ - _op"w %1; " \ - _POST_EFLAGS("0","3","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ - break; \ - case 4: \ - asm volatile ( \ - _PRE_EFLAGS("0","3","2") \ - _op"l %1; " \ - _POST_EFLAGS("0","3","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ - break; \ - case 8: \ - __emulate_1op_8byte(_op, _dst, _eflags); \ - break; \ - } \ -} while (0) - -/* Emulate an instruction with quadword operands (x86/64 only). */ -#if defined(__x86_64__) -#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ -do{ asm volatile ( \ - _PRE_EFLAGS("0","4","2") \ - _op"q %"_qx"3,%1; " \ - _POST_EFLAGS("0","4","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _qy ((_src).val), "i" (EFLAGS_MASK), \ - "m" (_eflags), "m" ((_dst).val) ); \ -} while (0) -#define __emulate_1op_8byte(_op, _dst, _eflags) \ -do{ asm volatile ( \ - _PRE_EFLAGS("0","3","2") \ - _op"q %1; " \ - _POST_EFLAGS("0","3","2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ -} while (0) -#elif defined(__i386__) -#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) -#define __emulate_1op_8byte(_op, _dst, _eflags) -#endif /* __i386__ */ - -#ifdef __XEN__ #define __emulate_fpu_insn(_op) \ do{ int _exn; \ asm volatile ( \ @@ -495,2935 +29,5 @@ do{ int _exn; : "=r" (_exn) : "0" (0) ); \ generate_exception_if(_exn, EXC_MF, -1); \ } while (0) -#else -#define __emulate_fpu_insn(_op) \ -do{ rc = X86EMUL_UNHANDLEABLE; \ - goto done; \ -} while (0) -#endif - -/* Fetch next part of the instruction being emulated. */ -#define insn_fetch_bytes(_size) \ -({ unsigned long _x, _eip = _regs.eip; \ - if ( !mode_64bit() ) _eip = (uint32_t)_eip; /* ignore upper dword */ \ - _regs.eip += (_size); /* real hardware doesn't truncate */ \ - generate_exception_if((uint8_t)(_regs.eip - ctxt->regs->eip) > 15, \ - EXC_GP, 0); \ - rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt); \ - if ( rc ) goto done; \ - _x; \ -}) -#define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type))) - -#define truncate_word(ea, byte_width) \ -({ unsigned long __ea = (ea); \ - unsigned int _width = (byte_width); \ - ((_width == sizeof(unsigned long)) ? __ea : \ - (__ea & ((1UL << (_width << 3)) - 1))); \ -}) -#define truncate_ea(ea) truncate_word((ea), ad_bytes) - -#define mode_64bit() (def_ad_bytes == 8) - -#define fail_if(p) \ -do { \ - rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY; \ - if ( rc ) goto done; \ -} while (0) - -#define generate_exception_if(p, e, ec) \ -({ if ( (p) ) { \ - fail_if(ops->inject_hw_exception == NULL); \ - rc = ops->inject_hw_exception(e, ec, ctxt) ? : X86EMUL_EXCEPTION; \ - goto done; \ - } \ -}) - -/* - * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1, - * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only. - */ -static int even_parity(uint8_t v) -{ - asm ( "test %b0,%b0; setp %b0" : "=a" (v) : "0" (v) ); - return v; -} - -/* Update address held in a register, based on addressing mode. */ -#define _register_address_increment(reg, inc, byte_width) \ -do { \ - int _inc = (inc); /* signed type ensures sign extension to long */ \ - unsigned int _width = (byte_width); \ - if ( _width == sizeof(unsigned long) ) \ - (reg) += _inc; \ - else if ( mode_64bit() ) \ - (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1); \ - else \ - (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) | \ - (((reg) + _inc) & ((1UL << (_width << 3)) - 1)); \ -} while (0) -#define register_address_increment(reg, inc) \ - _register_address_increment((reg), (inc), ad_bytes) - -#define sp_pre_dec(dec) ({ \ - _register_address_increment(_regs.esp, -(dec), ctxt->sp_size/8); \ - truncate_word(_regs.esp, ctxt->sp_size/8); \ -}) -#define sp_post_inc(inc) ({ \ - unsigned long __esp = truncate_word(_regs.esp, ctxt->sp_size/8); \ - _register_address_increment(_regs.esp, (inc), ctxt->sp_size/8); \ - __esp; \ -}) - -#define jmp_rel(rel) \ -do { \ - int _rel = (int)(rel); \ - _regs.eip += _rel; \ - if ( !mode_64bit() ) \ - _regs.eip = ((op_bytes == 2) \ - ? (uint16_t)_regs.eip : (uint32_t)_regs.eip); \ -} while (0) - -static unsigned long __get_rep_prefix( - struct cpu_user_regs *int_regs, - struct cpu_user_regs *ext_regs, - int ad_bytes) -{ - unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx : - (ad_bytes == 4) ? (uint32_t)int_regs->ecx : - int_regs->ecx); - - /* Skip the instruction if no repetitions are required. */ - if ( ecx == 0 ) - ext_regs->eip = int_regs->eip; - - return ecx; -} - -#define get_rep_prefix() ({ \ - unsigned long max_reps = 1; \ - if ( rep_prefix ) \ - max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes); \ - if ( max_reps == 0 ) \ - goto done; \ - max_reps; \ -}) - -static void __put_rep_prefix( - struct cpu_user_regs *int_regs, - struct cpu_user_regs *ext_regs, - int ad_bytes, - unsigned long reps_completed) -{ - unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx : - (ad_bytes == 4) ? (uint32_t)int_regs->ecx : - int_regs->ecx); - - /* Reduce counter appropriately, and repeat instruction if non-zero. */ - ecx -= reps_completed; - if ( ecx != 0 ) - int_regs->eip = ext_regs->eip; - - if ( ad_bytes == 2 ) - *(uint16_t *)&int_regs->ecx = ecx; - else if ( ad_bytes == 4 ) - int_regs->ecx = (uint32_t)ecx; - else - int_regs->ecx = ecx; -} - -#define put_rep_prefix(reps_completed) ({ \ - if ( rep_prefix ) \ - __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \ -}) - -/* - * Unsigned multiplication with double-word result. - * IN: Multiplicand=m[0], Multiplier=m[1] - * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] - */ -static int mul_dbl(unsigned long m[2]) -{ - int rc; - asm ( "mul %4; seto %b2" - : "=a" (m[0]), "=d" (m[1]), "=q" (rc) - : "0" (m[0]), "1" (m[1]), "2" (0) ); - return rc; -} - -/* - * Signed multiplication with double-word result. - * IN: Multiplicand=m[0], Multiplier=m[1] - * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] - */ -static int imul_dbl(unsigned long m[2]) -{ - int rc; - asm ( "imul %4; seto %b2" - : "=a" (m[0]), "=d" (m[1]), "=q" (rc) - : "0" (m[0]), "1" (m[1]), "2" (0) ); - return rc; -} - -/* - * Unsigned division of double-word dividend. - * IN: Dividend=u[1]:u[0], Divisor=v - * OUT: Return 1: #DE - * Return 0: Quotient=u[0], Remainder=u[1] - */ -static int div_dbl(unsigned long u[2], unsigned long v) -{ - if ( (v == 0) || (u[1] >= v) ) - return 1; - asm ( "div %4" - : "=a" (u[0]), "=d" (u[1]) - : "0" (u[0]), "1" (u[1]), "r" (v) ); - return 0; -} - -/* - * Signed division of double-word dividend. - * IN: Dividend=u[1]:u[0], Divisor=v - * OUT: Return 1: #DE - * Return 0: Quotient=u[0], Remainder=u[1] - * NB. We don't use idiv directly as it's moderately hard to work out - * ahead of time whether it will #DE, which we cannot allow to happen. - */ -static int idiv_dbl(unsigned long u[2], unsigned long v) -{ - int negu = (long)u[1] < 0, negv = (long)v < 0; - - /* u = abs(u) */ - if ( negu ) - { - u[1] = ~u[1]; - if ( (u[0] = -u[0]) == 0 ) - u[1]++; - } - - /* abs(u) / abs(v) */ - if ( div_dbl(u, negv ? -v : v) ) - return 1; - - /* Remainder has same sign as dividend. It cannot overflow. */ - if ( negu ) - u[1] = -u[1]; - - /* Quotient is overflowed if sign bit is set. */ - if ( negu ^ negv ) - { - if ( (long)u[0] >= 0 ) - u[0] = -u[0]; - else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */ - return 1; - } - else if ( (long)u[0] < 0 ) - return 1; - - return 0; -} - -static int -test_cc( - unsigned int condition, unsigned int flags) -{ - int rc = 0; - - switch ( (condition & 15) >> 1 ) - { - case 0: /* o */ - rc |= (flags & EFLG_OF); - break; - case 1: /* b/c/nae */ - rc |= (flags & EFLG_CF); - break; - case 2: /* z/e */ - rc |= (flags & EFLG_ZF); - break; - case 3: /* be/na */ - rc |= (flags & (EFLG_CF|EFLG_ZF)); - break; - case 4: /* s */ - rc |= (flags & EFLG_SF); - break; - case 5: /* p/pe */ - rc |= (flags & EFLG_PF); - break; - case 7: /* le/ng */ - rc |= (flags & EFLG_ZF); - /* fall through */ - case 6: /* l/nge */ - rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); - break; - } - - /* Odd condition identifiers (lsb == 1) have inverted sense. */ - return (!!rc ^ (condition & 1)); -} - -static int -get_cpl( - struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - struct segment_register reg; - - if ( ctxt->regs->eflags & EFLG_VM ) - return 3; - - if ( (ops->read_segment == NULL) || - ops->read_segment(x86_seg_ss, ®, ctxt) ) - return -1; - - return reg.attr.fields.dpl; -} - -static int -_mode_iopl( - struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - int cpl = get_cpl(ctxt, ops); - if ( cpl == -1 ) - return -1; - return ((cpl >= 0) && (cpl <= ((ctxt->regs->eflags >> 12) & 3))); -} - -#define mode_ring0() ({ \ - int _cpl = get_cpl(ctxt, ops); \ - fail_if(_cpl < 0); \ - (_cpl == 0); \ -}) -#define mode_iopl() ({ \ - int _iopl = _mode_iopl(ctxt, ops); \ - fail_if(_iopl < 0); \ - _iopl; \ -}) - -static int -in_realmode( - struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - unsigned long cr0; - int rc; - - if ( ops->read_cr == NULL ) - return 0; - - rc = ops->read_cr(0, &cr0, ctxt); - return (!rc && !(cr0 & CR0_PE)); -} - -static int -realmode_load_seg( - enum x86_segment seg, - uint16_t sel, - struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - struct segment_register reg; - int rc; - - if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 ) - return rc; - - reg.sel = sel; - reg.base = (uint32_t)sel << 4; - - return ops->write_segment(seg, ®, ctxt); -} - -static int -protmode_load_seg( - enum x86_segment seg, - uint16_t sel, - struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - struct segment_register desctab, cs, segr; - struct { uint32_t a, b; } desc; - unsigned long val; - uint8_t dpl, rpl, cpl; - int rc, fault_type = EXC_TS; - - /* NULL selector? */ - if ( (sel & 0xfffc) == 0 ) - { - if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) ) - goto raise_exn; - memset(&segr, 0, sizeof(segr)); - return ops->write_segment(seg, &segr, ctxt); - } - - /* LDT descriptor must be in the GDT. */ - if ( (seg == x86_seg_ldtr) && (sel & 4) ) - goto raise_exn; - - if ( (rc = ops->read_segment(x86_seg_cs, &cs, ctxt)) || - (rc = ops->read_segment((sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, - &desctab, ctxt)) ) - return rc; - - /* Check against descriptor table limit. */ - if ( ((sel & 0xfff8) + 7) > desctab.limit ) - goto raise_exn; - - do { - if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8), - &val, 4, ctxt)) ) - return rc; - desc.a = val; - if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8) + 4, - &val, 4, ctxt)) ) - return rc; - desc.b = val; - - /* Segment present in memory? */ - if ( !(desc.b & (1u<<15)) ) - { - fault_type = EXC_NP; - goto raise_exn; - } - - /* LDT descriptor is a system segment. All others are code/data. */ - if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) ) - goto raise_exn; - - dpl = (desc.b >> 13) & 3; - rpl = sel & 3; - cpl = cs.sel & 3; - - switch ( seg ) - { - case x86_seg_cs: - /* Code segment? */ - if ( !(desc.b & (1u<<11)) ) - goto raise_exn; - /* Non-conforming segment: check DPL against RPL. */ - if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) ) - goto raise_exn; - break; - case x86_seg_ss: - /* Writable data segment? */ - if ( (desc.b & (5u<<9)) != (1u<<9) ) - goto raise_exn; - if ( (dpl != cpl) || (dpl != rpl) ) - goto raise_exn; - break; - case x86_seg_ldtr: - /* LDT system segment? */ - if ( (desc.b & (15u<<8)) != (2u<<8) ) - goto raise_exn; - goto skip_accessed_flag; - default: - /* Readable code or data segment? */ - if ( (desc.b & (5u<<9)) == (4u<<9) ) - goto raise_exn; - /* Non-conforming segment: check DPL against RPL and CPL. */ - if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) ) - goto raise_exn; - break; - } - - /* Ensure Accessed flag is set. */ - rc = ((desc.b & 0x100) ? X86EMUL_OKAY : - ops->cmpxchg( - x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b, - desc.b | 0x100, 4, ctxt)); - } while ( rc == X86EMUL_CMPXCHG_FAILED ); - - if ( rc ) - return rc; - - /* Force the Accessed flag in our local copy. */ - desc.b |= 0x100; - - skip_accessed_flag: - segr.base = (((desc.b << 0) & 0xff000000u) | - ((desc.b << 16) & 0x00ff0000u) | - ((desc.a >> 16) & 0x0000ffffu)); - segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) | - ((desc.b >> 12) & 0x0f00u)); - segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu); - if ( segr.attr.fields.g ) - segr.limit = (segr.limit << 12) | 0xfffu; - segr.sel = sel; - return ops->write_segment(seg, &segr, ctxt); - - raise_exn: - if ( ops->inject_hw_exception == NULL ) - return X86EMUL_UNHANDLEABLE; - if ( (rc = ops->inject_hw_exception(fault_type, sel & 0xfffc, ctxt)) ) - return rc; - return X86EMUL_EXCEPTION; -} - -static int -load_seg( - enum x86_segment seg, - uint16_t sel, - struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - if ( (ops->read_segment == NULL) || - (ops->write_segment == NULL) ) - return X86EMUL_UNHANDLEABLE; - - if ( in_realmode(ctxt, ops) ) - return realmode_load_seg(seg, sel, ctxt, ops); - - return protmode_load_seg(seg, sel, ctxt, ops); -} - -void * -decode_register( - uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs) -{ - void *p; - - switch ( modrm_reg ) - { - case 0: p = ®s->eax; break; - case 1: p = ®s->ecx; break; - case 2: p = ®s->edx; break; - case 3: p = ®s->ebx; break; - case 4: p = (highbyte_regs ? - ((unsigned char *)®s->eax + 1) : - (unsigned char *)®s->esp); break; - case 5: p = (highbyte_regs ? - ((unsigned char *)®s->ecx + 1) : - (unsigned char *)®s->ebp); break; - case 6: p = (highbyte_regs ? - ((unsigned char *)®s->edx + 1) : - (unsigned char *)®s->esi); break; - case 7: p = (highbyte_regs ? - ((unsigned char *)®s->ebx + 1) : - (unsigned char *)®s->edi); break; -#if defined(__x86_64__) - case 8: p = ®s->r8; break; - case 9: p = ®s->r9; break; - case 10: p = ®s->r10; break; - case 11: p = ®s->r11; break; - case 12: p = ®s->r12; break; - case 13: p = ®s->r13; break; - case 14: p = ®s->r14; break; - case 15: p = ®s->r15; break; -#endif - default: p = NULL; break; - } - - return p; -} - -#define decode_segment_failed x86_seg_tr -enum x86_segment -decode_segment( - uint8_t modrm_reg) -{ - switch ( modrm_reg ) - { - case 0: return x86_seg_es; - case 1: return x86_seg_cs; - case 2: return x86_seg_ss; - case 3: return x86_seg_ds; - case 4: return x86_seg_fs; - case 5: return x86_seg_gs; - default: break; - } - return decode_segment_failed; -} - -int -x86_emulate( - struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) -{ - /* Shadow copy of register state. Committed on successful emulation. */ - struct cpu_user_regs _regs = *ctxt->regs; - - uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0; - uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; - unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 - unsigned int lock_prefix = 0, rep_prefix = 0; - int override_seg = -1, rc = X86EMUL_OKAY; - struct operand src, dst; - - /* Data operand effective address (usually computed from ModRM). */ - struct operand ea; - - /* Default is a memory operand relative to segment DS. */ - ea.type = OP_MEM; - ea.mem.seg = x86_seg_ds; - ea.mem.off = 0; - - ctxt->retire.byte = 0; - - op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8; - if ( op_bytes == 8 ) - { - op_bytes = def_op_bytes = 4; -#ifndef __x86_64__ - return X86EMUL_UNHANDLEABLE; -#endif - } - - /* Prefix bytes. */ - for ( ; ; ) - { - switch ( b = insn_fetch_type(uint8_t) ) - { - case 0x66: /* operand-size override */ - op_bytes = def_op_bytes ^ 6; - break; - case 0x67: /* address-size override */ - ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6); - break; - case 0x2e: /* CS override */ - override_seg = x86_seg_cs; - break; - case 0x3e: /* DS override */ - override_seg = x86_seg_ds; - break; - case 0x26: /* ES override */ - override_seg = x86_seg_es; - break; - case 0x64: /* FS override */ - override_seg = x86_seg_fs; - break; - case 0x65: /* GS override */ - override_seg = x86_seg_gs; - break; - case 0x36: /* SS override */ - override_seg = x86_seg_ss; - break; - case 0xf0: /* LOCK */ - lock_prefix = 1; - break; - case 0xf2: /* REPNE/REPNZ */ - rep_prefix = REPNE_PREFIX; - break; - case 0xf3: /* REP/REPE/REPZ */ - rep_prefix = REPE_PREFIX; - break; - case 0x40 ... 0x4f: /* REX */ - if ( !mode_64bit() ) - goto done_prefixes; - rex_prefix = b; - continue; - default: - goto done_prefixes; - } - - /* Any legacy prefix after a REX prefix nullifies its effect. */ - rex_prefix = 0; - } - done_prefixes: - - if ( rex_prefix & 8 ) /* REX.W */ - op_bytes = 8; - - /* Opcode byte(s). */ - d = opcode_table[b]; - if ( d == 0 ) - { - /* Two-byte opcode? */ - if ( b == 0x0f ) - { - twobyte = 1; - b = insn_fetch_type(uint8_t); - d = twobyte_table[b]; - } - - /* Unrecognised? */ - if ( d == 0 ) - goto cannot_emulate; - } - - /* Lock prefix is allowed only on RMW instructions. */ - generate_exception_if((d & Mov) && lock_prefix, EXC_GP, 0); - - /* ModRM and SIB bytes. */ - if ( d & ModRM ) - { - modrm = insn_fetch_type(uint8_t); - modrm_mod = (modrm & 0xc0) >> 6; - modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3); - modrm_rm = modrm & 0x07; - - if ( modrm_mod == 3 ) - { - modrm_rm |= (rex_prefix & 1) << 3; - ea.type = OP_REG; - ea.reg = decode_register( - modrm_rm, &_regs, (d & ByteOp) && (rex_prefix == 0)); - } - else if ( ad_bytes == 2 ) - { - /* 16-bit ModR/M decode. */ - switch ( modrm_rm ) - { - case 0: - ea.mem.off = _regs.ebx + _regs.esi; - break; - case 1: - ea.mem.off = _regs.ebx + _regs.edi; - break; - case 2: - ea.mem.seg = x86_seg_ss; - ea.mem.off = _regs.ebp + _regs.esi; - break; - case 3: - ea.mem.seg = x86_seg_ss; - ea.mem.off = _regs.ebp + _regs.edi; - break; - case 4: - ea.mem.off = _regs.esi; - break; - case 5: - ea.mem.off = _regs.edi; - break; - case 6: - if ( modrm_mod == 0 ) - break; - ea.mem.seg = x86_seg_ss; - ea.mem.off = _regs.ebp; - break; - case 7: - ea.mem.off = _regs.ebx; - break; - } - switch ( modrm_mod ) - { - case 0: - if ( modrm_rm == 6 ) - ea.mem.off = insn_fetch_type(int16_t); - break; - case 1: - ea.mem.off += insn_fetch_type(int8_t); - break; - case 2: - ea.mem.off += insn_fetch_type(int16_t); - break; - } - ea.mem.off = truncate_ea(ea.mem.off); - } - else - { - /* 32/64-bit ModR/M decode. */ - if ( modrm_rm == 4 ) - { - sib = insn_fetch_type(uint8_t); - sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8); - sib_base = (sib & 7) | ((rex_prefix << 3) & 8); - if ( sib_index != 4 ) - ea.mem.off = *(long*)decode_register(sib_index, &_regs, 0); - ea.mem.off <<= (sib >> 6) & 3; - if ( (modrm_mod == 0) && ((sib_base & 7) == 5) ) - ea.mem.off += insn_fetch_type(int32_t); - else if ( sib_base == 4 ) - { - ea.mem.seg = x86_seg_ss; - ea.mem.off += _regs.esp; - if ( !twobyte && (b == 0x8f) ) - /* POP <rm> computes its EA post increment. */ - ea.mem.off += ((mode_64bit() && (op_bytes == 4)) - ? 8 : op_bytes); - } - else if ( sib_base == 5 ) - { - ea.mem.seg = x86_seg_ss; - ea.mem.off += _regs.ebp; - } - else - ea.mem.off += *(long*)decode_register(sib_base, &_regs, 0); - } - else - { - modrm_rm |= (rex_prefix & 1) << 3; - ea.mem.off = *(long *)decode_register(modrm_rm, &_regs, 0); - if ( (modrm_rm == 5) && (modrm_mod != 0) ) - ea.mem.seg = x86_seg_ss; - } - switch ( modrm_mod ) - { - case 0: - if ( (modrm_rm & 7) != 5 ) - break; - ea.mem.off = insn_fetch_type(int32_t); - if ( !mode_64bit() ) - break; - /* Relative to RIP of next instruction. Argh! */ - ea.mem.off += _regs.eip; - if ( (d & SrcMask) == SrcImm ) - ea.mem.off += (d & ByteOp) ? 1 : - ((op_bytes == 8) ? 4 : op_bytes); - else if ( (d & SrcMask) == SrcImmByte ) - ea.mem.off += 1; - else if ( !twobyte && ((b & 0xfe) == 0xf6) && - ((modrm_reg & 7) <= 1) ) - /* Special case in Grp3: test has immediate operand. */ - ea.mem.off += (d & ByteOp) ? 1 - : ((op_bytes == 8) ? 4 : op_bytes); - else if ( twobyte && ((b & 0xf7) == 0xa4) ) - /* SHLD/SHRD with immediate byte third operand. */ - ea.mem.off++; - break; - case 1: - ea.mem.off += insn_fetch_type(int8_t); - break; - case 2: - ea.mem.off += insn_fetch_type(int32_t); - break; - } - ea.mem.off = truncate_ea(ea.mem.off); - } - } - - if ( override_seg != -1 ) - ea.mem.seg = override_seg; - - /* Special instructions do their own operand decoding. */ - if ( (d & DstMask) == ImplicitOps ) - goto special_insn; - - /* Decode and fetch the source operand: register, memory or immediate. */ - switch ( d & SrcMask ) - { - case SrcNone: - break; - case SrcReg: - src.type = OP_REG; - if ( d & ByteOp ) - { - src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0)); - src.val = *(uint8_t *)src.reg; - src.bytes = 1; - } - else - { - src.reg = decode_register(modrm_reg, &_regs, 0); - switch ( (src.bytes = op_bytes) ) - { - case 2: src.val = *(uint16_t *)src.reg; break; - case 4: src.val = *(uint32_t *)src.reg; break; - case 8: src.val = *(uint64_t *)src.reg; break; - } - } - break; - case SrcMem16: - ea.bytes = 2; - goto srcmem_common; - case SrcMem: - ea.bytes = (d & ByteOp) ? 1 : op_bytes; - srcmem_common: - src = ea; - if ( src.type == OP_REG ) - { - switch ( src.bytes ) - { - case 1: src.val = *(uint8_t *)src.reg; break; - case 2: src.val = *(uint16_t *)src.reg; break; - case 4: src.val = *(uint32_t *)src.reg; break; - case 8: src.val = *(uint64_t *)src.reg; break; - } - } - else if ( (rc = ops->read(src.mem.seg, src.mem.off, - &src.val, src.bytes, ctxt)) ) - goto done; - break; - case SrcImm: - src.type = OP_IMM; - src.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( src.bytes == 8 ) src.bytes = 4; - /* NB. Immediates are sign-extended as necessary. */ - switch ( src.bytes ) - { - case 1: src.val = insn_fetch_type(int8_t); break; - case 2: src.val = insn_fetch_type(int16_t); break; - case 4: src.val = insn_fetch_type(int32_t); break; - } - break; - case SrcImmByte: - src.type = OP_IMM; - src.bytes = 1; - src.val = insn_fetch_type(int8_t); - break; - } - - /* Decode and fetch the destination operand: register or memory. */ - switch ( d & DstMask ) - { - case DstReg: - dst.type = OP_REG; - if ( d & ByteOp ) - { - dst.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0)); - dst.val = *(uint8_t *)dst.reg; - dst.bytes = 1; - } - else - { - dst.reg = decode_register(modrm_reg, &_regs, 0); - switch ( (dst.bytes = op_bytes) ) - { - case 2: dst.val = *(uint16_t *)dst.reg; break; - case 4: dst.val = *(uint32_t *)dst.reg; break; - case 8: dst.val = *(uint64_t *)dst.reg; break; - } - } - break; - case DstBitBase: - if ( ((d & SrcMask) == SrcImmByte) || (ea.type == OP_REG) ) - { - src.val &= (op_bytes << 3) - 1; - } - else - { - /* - * EA += BitOffset DIV op_bytes*8 - * BitOffset = BitOffset MOD op_bytes*8 - * DIV truncates towards negative infinity. - * MOD always produces a positive result. - */ - if ( op_bytes == 2 ) - src.val = (int16_t)src.val; - else if ( op_bytes == 4 ) - src.val = (int32_t)src.val; - if ( (long)src.val < 0 ) - { - unsigned long byte_offset; - byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1)); - ea.mem.off -= byte_offset; - src.val = (byte_offset << 3) + src.val; - } - else - { - ea.mem.off += (src.val >> 3) & ~(op_bytes - 1); - src.val &= (op_bytes << 3) - 1; - } - } - /* Becomes a normal DstMem operation from here on. */ - d = (d & ~DstMask) | DstMem; - case DstMem: - ea.bytes = (d & ByteOp) ? 1 : op_bytes; - dst = ea; - if ( dst.type == OP_REG ) - { - switch ( dst.bytes ) - { - case 1: dst.val = *(uint8_t *)dst.reg; break; - case 2: dst.val = *(uint16_t *)dst.reg; break; - case 4: dst.val = *(uint32_t *)dst.reg; break; - case 8: dst.val = *(uint64_t *)dst.reg; break; - } - } - else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */ - { - if ( (rc = ops->read(dst.mem.seg, dst.mem.off, - &dst.val, dst.bytes, ctxt)) ) - goto done; - dst.orig_val = dst.val; - } - break; - } - - /* LOCK prefix allowed only on instructions with memory destination. */ - generate_exception_if(lock_prefix && (dst.type != OP_MEM), EXC_GP, 0); - - if ( twobyte ) - goto twobyte_insn; - - switch ( b ) - { - case 0x04 ... 0x05: /* add imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x00 ... 0x03: add: /* add */ - emulate_2op_SrcV("add", src, dst, _regs.eflags); - break; - - case 0x0c ... 0x0d: /* or imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x08 ... 0x0b: or: /* or */ - emulate_2op_SrcV("or", src, dst, _regs.eflags); - break; - - case 0x14 ... 0x15: /* adc imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x10 ... 0x13: adc: /* adc */ - emulate_2op_SrcV("adc", src, dst, _regs.eflags); - break; - - case 0x1c ... 0x1d: /* sbb imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x18 ... 0x1b: sbb: /* sbb */ - emulate_2op_SrcV("sbb", src, dst, _regs.eflags); - break; - - case 0x24 ... 0x25: /* and imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x20 ... 0x23: and: /* and */ - emulate_2op_SrcV("and", src, dst, _regs.eflags); - break; - - case 0x2c ... 0x2d: /* sub imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x28 ... 0x2b: sub: /* sub */ - emulate_2op_SrcV("sub", src, dst, _regs.eflags); - break; - - case 0x34 ... 0x35: /* xor imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x30 ... 0x33: xor: /* xor */ - emulate_2op_SrcV("xor", src, dst, _regs.eflags); - break; - - case 0x3c ... 0x3d: /* cmp imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x38 ... 0x3b: cmp: /* cmp */ - emulate_2op_SrcV("cmp", src, dst, _regs.eflags); - break; - - case 0x62: /* bound */ { - unsigned long src_val2; - int lb, ub, idx; - generate_exception_if(mode_64bit() || (src.type != OP_MEM), - EXC_UD, -1); - if ( (rc = ops->read(src.mem.seg, src.mem.off + op_bytes, - &src_val2, op_bytes, ctxt)) ) - goto done; - ub = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2; - lb = (op_bytes == 2) ? (int16_t)src.val : (int32_t)src.val; - idx = (op_bytes == 2) ? (int16_t)dst.val : (int32_t)dst.val; - generate_exception_if((idx < lb) || (idx > ub), EXC_BR, -1); - dst.type = OP_NONE; - break; - } - - case 0x63: /* movsxd (x86/64) / arpl (x86/32) */ - if ( mode_64bit() ) - { - /* movsxd */ - if ( src.type == OP_REG ) - src.val = *(int32_t *)src.reg; - else if ( (rc = ops->read(src.mem.seg, src.mem.off, - &src.val, 4, ctxt)) ) - goto done; - dst.val = (int32_t)src.val; - } - else - { - /* arpl */ - uint16_t src_val = dst.val; - dst = src; - _regs.eflags &= ~EFLG_ZF; - _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0; - if ( _regs.eflags & EFLG_ZF ) - dst.val = (dst.val & ~3) | (src_val & 3); - else - dst.type = OP_NONE; - generate_exception_if(in_realmode(ctxt, ops), EXC_UD, -1); - } - break; - - case 0x69: /* imul imm16/32 */ - case 0x6b: /* imul imm8 */ { - unsigned long src1; /* ModR/M source operand */ - if ( ea.type == OP_REG ) - src1 = *ea.reg; - else if ( (rc = ops->read(ea.mem.seg, ea.mem.off, - &src1, op_bytes, ctxt)) ) - goto done; - _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( dst.bytes ) - { - case 2: - dst.val = ((uint32_t)(int16_t)src.val * - (uint32_t)(int16_t)src1); - if ( (int16_t)dst.val != (uint32_t)dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - break; -#ifdef __x86_64__ - case 4: - dst.val = ((uint64_t)(int32_t)src.val * - (uint64_t)(int32_t)src1); - if ( (int32_t)dst.val != dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - break; -#endif - default: { - unsigned long m[2] = { src.val, src1 }; - if ( imul_dbl(m) ) - _regs.eflags |= EFLG_OF|EFLG_CF; - dst.val = m[0]; - break; - } - } - break; - } - - case 0x82: /* Grp1 (x86/32 only) */ - generate_exception_if(mode_64bit(), EXC_UD, -1); - case 0x80: case 0x81: case 0x83: /* Grp1 */ - switch ( modrm_reg & 7 ) - { - case 0: goto add; - case 1: goto or; - case 2: goto adc; - case 3: goto sbb; - case 4: goto and; - case 5: goto sub; - case 6: goto xor; - case 7: goto cmp; - } - break; - - case 0xa8 ... 0xa9: /* test imm,%%eax */ - dst.reg = (unsigned long *)&_regs.eax; - dst.val = _regs.eax; - case 0x84 ... 0x85: test: /* test */ - emulate_2op_SrcV("test", src, dst, _regs.eflags); - break; - - case 0x86 ... 0x87: xchg: /* xchg */ - /* Write back the register source. */ - switch ( dst.bytes ) - { - case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break; - case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break; - case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */ - case 8: *src.reg = dst.val; break; - } - /* Write back the memory destination with implicit LOCK prefix. */ - dst.val = src.val; - lock_prefix = 1; - break; - - case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ - generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1); - case 0x88 ... 0x8b: /* mov */ - dst.val = src.val; - break; - - case 0x8c: /* mov Sreg,r/m */ { - struct segment_register reg; - enum x86_segment seg = decode_segment(modrm_reg); - generate_exception_if(seg == decode_segment_failed, EXC_UD, -1); - fail_if(ops->read_segment == NULL); - if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 ) - goto done; - dst.val = reg.sel; - if ( dst.type == OP_MEM ) - dst.bytes = 2; - break; - } - - case 0x8e: /* mov r/m,Sreg */ { - enum x86_segment seg = decode_segment(modrm_reg); - generate_exception_if(seg == decode_segment_failed, EXC_UD, -1); - if ( (rc = load_seg(seg, (uint16_t)src.val, ctxt, ops)) != 0 ) - goto done; - if ( seg == x86_seg_ss ) - ctxt->retire.flags.mov_ss = 1; - dst.type = OP_NONE; - break; - } - - case 0x8d: /* lea */ - dst.val = ea.mem.off; - break; - - case 0x8f: /* pop (sole member of Grp1a) */ - generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1); - /* 64-bit mode: POP defaults to a 64-bit operand. */ - if ( mode_64bit() && (dst.bytes == 4) ) - dst.bytes = 8; - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes), - &dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - break; - - case 0xb0 ... 0xb7: /* mov imm8,r8 */ - dst.reg = decode_register( - (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0)); - dst.val = src.val; - break; - - case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */ - if ( dst.bytes == 8 ) /* Fetch more bytes to obtain imm64 */ - src.val = ((uint32_t)src.val | - ((uint64_t)insn_fetch_type(uint32_t) << 32)); - dst.reg = decode_register( - (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); - dst.val = src.val; - break; - - case 0xc0 ... 0xc1: grp2: /* Grp2 */ - switch ( modrm_reg & 7 ) - { - case 0: /* rol */ - emulate_2op_SrcB("rol", src, dst, _regs.eflags); - break; - case 1: /* ror */ - emulate_2op_SrcB("ror", src, dst, _regs.eflags); - break; - case 2: /* rcl */ - emulate_2op_SrcB("rcl", src, dst, _regs.eflags); - break; - case 3: /* rcr */ - emulate_2op_SrcB("rcr", src, dst, _regs.eflags); - break; - case 4: /* sal/shl */ - case 6: /* sal/shl */ - emulate_2op_SrcB("sal", src, dst, _regs.eflags); - break; - case 5: /* shr */ - emulate_2op_SrcB("shr", src, dst, _regs.eflags); - break; - case 7: /* sar */ - emulate_2op_SrcB("sar", src, dst, _regs.eflags); - break; - } - break; - - case 0xc4: /* les */ { - unsigned long sel; - dst.val = x86_seg_es; - les: /* dst.val identifies the segment */ - generate_exception_if(src.type != OP_MEM, EXC_UD, -1); - if ( (rc = ops->read(src.mem.seg, src.mem.off + src.bytes, - &sel, 2, ctxt)) != 0 ) - goto done; - if ( (rc = load_seg(dst.val, (uint16_t)sel, ctxt, ops)) != 0 ) - goto done; - dst.val = src.val; - break; - } - - case 0xc5: /* lds */ - dst.val = x86_seg_ds; - goto les; - - case 0xd0 ... 0xd1: /* Grp2 */ - src.val = 1; - goto grp2; - - case 0xd2 ... 0xd3: /* Grp2 */ - src.val = _regs.ecx; - goto grp2; - - case 0xf6 ... 0xf7: /* Grp3 */ - switch ( modrm_reg & 7 ) - { - case 0 ... 1: /* test */ - /* Special case in Grp3: test has an immediate source operand. */ - src.type = OP_IMM; - src.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( src.bytes == 8 ) src.bytes = 4; - switch ( src.bytes ) - { - case 1: src.val = insn_fetch_type(int8_t); break; - case 2: src.val = insn_fetch_type(int16_t); break; - case 4: src.val = insn_fetch_type(int32_t); break; - } - goto test; - case 2: /* not */ - dst.val = ~dst.val; - break; - case 3: /* neg */ - emulate_1op("neg", dst, _regs.eflags); - break; - case 4: /* mul */ - src = dst; - dst.type = OP_REG; - dst.reg = (unsigned long *)&_regs.eax; - dst.val = *dst.reg; - _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( src.bytes ) - { - case 1: - dst.val = (uint8_t)dst.val; - dst.val *= src.val; - if ( (uint8_t)dst.val != (uint16_t)dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - dst.bytes = 2; - break; - case 2: - dst.val = (uint16_t)dst.val; - dst.val *= src.val; - if ( (uint16_t)dst.val != (uint32_t)dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - *(uint16_t *)&_regs.edx = dst.val >> 16; - break; -#ifdef __x86_64__ - case 4: - dst.val = (uint32_t)dst.val; - dst.val *= src.val; - if ( (uint32_t)dst.val != dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - _regs.edx = (uint32_t)(dst.val >> 32); - break; -#endif - default: { - unsigned long m[2] = { src.val, dst.val }; - if ( mul_dbl(m) ) - _regs.eflags |= EFLG_OF|EFLG_CF; - _regs.edx = m[1]; - dst.val = m[0]; - break; - } - } - break; - case 5: /* imul */ - src = dst; - dst.type = OP_REG; - dst.reg = (unsigned long *)&_regs.eax; - dst.val = *dst.reg; - _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( src.bytes ) - { - case 1: - dst.val = ((uint16_t)(int8_t)src.val * - (uint16_t)(int8_t)dst.val); - if ( (int8_t)dst.val != (uint16_t)dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - dst.bytes = 2; - break; - case 2: - dst.val = ((uint32_t)(int16_t)src.val * - (uint32_t)(int16_t)dst.val); - if ( (int16_t)dst.val != (uint32_t)dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - *(uint16_t *)&_regs.edx = dst.val >> 16; - break; -#ifdef __x86_64__ - case 4: - dst.val = ((uint64_t)(int32_t)src.val * - (uint64_t)(int32_t)dst.val); - if ( (int32_t)dst.val != dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - _regs.edx = (uint32_t)(dst.val >> 32); - break; -#endif - default: { - unsigned long m[2] = { src.val, dst.val }; - if ( imul_dbl(m) ) - _regs.eflags |= EFLG_OF|EFLG_CF; - _regs.edx = m[1]; - dst.val = m[0]; - break; - } - } - break; - case 6: /* div */ { - unsigned long u[2], v; - src = dst; - dst.type = OP_REG; - dst.reg = (unsigned long *)&_regs.eax; - switch ( src.bytes ) - { - case 1: - u[0] = (uint16_t)_regs.eax; - u[1] = 0; - v = (uint8_t)src.val; - generate_exception_if( - div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]), - EXC_DE, -1); - dst.val = (uint8_t)u[0]; - ((uint8_t *)&_regs.eax)[1] = u[1]; - break; - case 2: - u[0] = ((uint32_t)_regs.edx << 16) | (uint16_t)_regs.eax; - u[1] = 0; - v = (uint16_t)src.val; - generate_exception_if( - div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]), - EXC_DE, -1); - dst.val = (uint16_t)u[0]; - *(uint16_t *)&_regs.edx = u[1]; - break; -#ifdef __x86_64__ - case 4: - u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax; - u[1] = 0; - v = (uint32_t)src.val; - generate_exception_if( - div_dbl(u, v) || ((uint32_t)u[0] != u[0]), - EXC_DE, -1); - dst.val = (uint32_t)u[0]; - _regs.edx = (uint32_t)u[1]; - break; -#endif - default: - u[0] = _regs.eax; - u[1] = _regs.edx; - v = src.val; - generate_exception_if(div_dbl(u, v), EXC_DE, -1); - dst.val = u[0]; - _regs.edx = u[1]; - break; - } - break; - } - case 7: /* idiv */ { - unsigned long u[2], v; - src = dst; - dst.type = OP_REG; - dst.reg = (unsigned long *)&_regs.eax; - switch ( src.bytes ) - { - case 1: - u[0] = (int16_t)_regs.eax; - u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; - v = (int8_t)src.val; - generate_exception_if( - idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]), - EXC_DE, -1); - dst.val = (int8_t)u[0]; - ((int8_t *)&_regs.eax)[1] = u[1]; - break; - case 2: - u[0] = (int32_t)((_regs.edx << 16) | (uint16_t)_regs.eax); - u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; - v = (int16_t)src.val; - generate_exception_if( - idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]), - EXC_DE, -1); - dst.val = (int16_t)u[0]; - *(int16_t *)&_regs.edx = u[1]; - break; -#ifdef __x86_64__ - case 4: - u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax; - u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; - v = (int32_t)src.val; - generate_exception_if( - idiv_dbl(u, v) || ((int32_t)u[0] != u[0]), - EXC_DE, -1); - dst.val = (int32_t)u[0]; - _regs.edx = (uint32_t)u[1]; - break; -#endif - default: - u[0] = _regs.eax; - u[1] = _regs.edx; - v = src.val; - generate_exception_if(idiv_dbl(u, v), EXC_DE, -1); - dst.val = u[0]; - _regs.edx = u[1]; - break; - } - break; - } - default: - goto cannot_emulate; - } - break; - - case 0xfe: /* Grp4 */ - generate_exception_if((modrm_reg & 7) >= 2, EXC_UD, -1); - case 0xff: /* Grp5 */ - switch ( modrm_reg & 7 ) - { - case 0: /* inc */ - emulate_1op("inc", dst, _regs.eflags); - break; - case 1: /* dec */ - emulate_1op("dec", dst, _regs.eflags); - break; - case 2: /* call (near) */ - case 4: /* jmp (near) */ - if ( (dst.bytes != 8) && mode_64bit() ) - { - dst.bytes = op_bytes = 8; - if ( dst.type == OP_REG ) - dst.val = *dst.reg; - else if ( (rc = ops->read(dst.mem.seg, dst.mem.off, - &dst.val, 8, ctxt)) != 0 ) - goto done; - } - src.val = _regs.eip; - _regs.eip = dst.val; - if ( (modrm_reg & 7) == 2 ) - goto push; /* call */ - dst.type = OP_NONE; - break; - case 3: /* call (far, absolute indirect) */ - case 5: /* jmp (far, absolute indirect) */ { - unsigned long sel; - - generate_exception_if(dst.type != OP_MEM, EXC_UD, -1); - - if ( (rc = ops->read(dst.mem.seg, dst.mem.off+dst.bytes, - &sel, 2, ctxt)) ) - goto done; - - if ( (modrm_reg & 7) == 3 ) /* call */ - { - struct segment_register reg; - fail_if(ops->read_segment == NULL); - if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) || - (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - reg.sel, op_bytes, ctxt)) || - (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - _regs.eip, op_bytes, ctxt)) ) - goto done; - } - - if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 ) - goto done; - _regs.eip = dst.val; - - dst.type = OP_NONE; - break; - } - case 6: /* push */ - /* 64-bit mode: PUSH defaults to a 64-bit operand. */ - if ( mode_64bit() && (dst.bytes == 4) ) - { - dst.bytes = 8; - if ( dst.type == OP_REG ) - dst.val = *dst.reg; - else if ( (rc = ops->read(dst.mem.seg, dst.mem.off, - &dst.val, 8, ctxt)) != 0 ) - goto done; - } - if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), - dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - dst.type = OP_NONE; - break; - case 7: - generate_exception_if(1, EXC_UD, -1); - default: - goto cannot_emulate; - } - break; - } - - writeback: - switch ( dst.type ) - { - case OP_REG: - /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ - switch ( dst.bytes ) - { - case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break; - case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break; - case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */ - case 8: *dst.reg = dst.val; break; - } - break; - case OP_MEM: - if ( !(d & Mov) && (dst.orig_val == dst.val) && - !ctxt->force_writeback ) - /* nothing to do */; - else if ( lock_prefix ) - rc = ops->cmpxchg( - dst.mem.seg, dst.mem.off, dst.orig_val, - dst.val, dst.bytes, ctxt); - else - rc = ops->write( - dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt); - if ( rc != 0 ) - goto done; - default: - break; - } - - /* Commit shadow register state. */ - _regs.eflags &= ~EFLG_RF; - *ctxt->regs = _regs; - if ( (_regs.eflags & EFLG_TF) && (rc == X86EMUL_OKAY) && - (ops->inject_hw_exception != NULL) ) - rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION; - - done: - return rc; - - special_insn: - dst.type = OP_NONE; - - /* - * The only implicit-operands instructions allowed a LOCK prefix are - * CMPXCHG{8,16}B, MOV CRn, MOV DRn. - */ - generate_exception_if(lock_prefix && - ((b < 0x20) || (b > 0x23)) && /* MOV CRn/DRn */ - (b != 0xc7), /* CMPXCHG{8,16}B */ - EXC_GP, 0); - - if ( twobyte ) - goto twobyte_special_insn; - - switch ( b ) - { - case 0x06: /* push %%es */ { - struct segment_register reg; - src.val = x86_seg_es; - push_seg: - fail_if(ops->read_segment == NULL); - if ( (rc = ops->read_segment(src.val, ®, ctxt)) != 0 ) - return rc; - /* 64-bit mode: PUSH defaults to a 64-bit operand. */ - if ( mode_64bit() && (op_bytes == 4) ) - op_bytes = 8; - if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - reg.sel, op_bytes, ctxt)) != 0 ) - goto done; - break; - } - - case 0x07: /* pop %%es */ - src.val = x86_seg_es; - pop_seg: - fail_if(ops->write_segment == NULL); - /* 64-bit mode: POP defaults to a 64-bit operand. */ - if ( mode_64bit() && (op_bytes == 4) ) - op_bytes = 8; - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), - &dst.val, op_bytes, ctxt)) != 0 ) - goto done; - if ( (rc = load_seg(src.val, (uint16_t)dst.val, ctxt, ops)) != 0 ) - return rc; - break; - - case 0x0e: /* push %%cs */ - src.val = x86_seg_cs; - goto push_seg; - - case 0x16: /* push %%ss */ - src.val = x86_seg_ss; - goto push_seg; - - case 0x17: /* pop %%ss */ - src.val = x86_seg_ss; - ctxt->retire.flags.mov_ss = 1; - goto pop_seg; - - case 0x1e: /* push %%ds */ - src.val = x86_seg_ds; - goto push_seg; - - case 0x1f: /* pop %%ds */ - src.val = x86_seg_ds; - goto pop_seg; - - case 0x27: /* daa */ { - uint8_t al = _regs.eax; - unsigned long eflags = _regs.eflags; - generate_exception_if(mode_64bit(), EXC_UD, -1); - _regs.eflags &= ~(EFLG_CF|EFLG_AF); - if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) ) - { - *(uint8_t *)&_regs.eax += 6; - _regs.eflags |= EFLG_AF; - } - if ( (al > 0x99) || (eflags & EFLG_CF) ) - { - *(uint8_t *)&_regs.eax += 0x60; - _regs.eflags |= EFLG_CF; - } - _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); - _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; - _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; - _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; - break; - } - - case 0x2f: /* das */ { - uint8_t al = _regs.eax; - unsigned long eflags = _regs.eflags; - generate_exception_if(mode_64bit(), EXC_UD, -1); - _regs.eflags &= ~(EFLG_CF|EFLG_AF); - if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) ) - { - _regs.eflags |= EFLG_AF; - if ( (al < 6) || (eflags & EFLG_CF) ) - _regs.eflags |= EFLG_CF; - *(uint8_t *)&_regs.eax -= 6; - } - if ( (al > 0x99) || (eflags & EFLG_CF) ) - { - *(uint8_t *)&_regs.eax -= 0x60; - _regs.eflags |= EFLG_CF; - } - _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); - _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; - _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; - _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; - break; - } - - case 0x37: /* aaa */ - case 0x3f: /* aas */ - generate_exception_if(mode_64bit(), EXC_UD, -1); - _regs.eflags &= ~EFLG_CF; - if ( ((uint8_t)_regs.eax > 9) || (_regs.eflags & EFLG_AF) ) - { - ((uint8_t *)&_regs.eax)[0] += (b == 0x37) ? 6 : -6; - ((uint8_t *)&_regs.eax)[1] += (b == 0x37) ? 1 : -1; - _regs.eflags |= EFLG_CF | EFLG_AF; - } - ((uint8_t *)&_regs.eax)[0] &= 0x0f; - break; - - case 0x40 ... 0x4f: /* inc/dec reg */ - dst.type = OP_REG; - dst.reg = decode_register(b & 7, &_regs, 0); - dst.bytes = op_bytes; - dst.val = *dst.reg; - if ( b & 8 ) - emulate_1op("dec", dst, _regs.eflags); - else - emulate_1op("inc", dst, _regs.eflags); - break; - - case 0x50 ... 0x57: /* push reg */ - src.val = *(unsigned long *)decode_register( - (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); - goto push; - - case 0x58 ... 0x5f: /* pop reg */ - dst.type = OP_REG; - dst.reg = decode_register( - (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); - dst.bytes = op_bytes; - if ( mode_64bit() && (dst.bytes == 4) ) - dst.bytes = 8; - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes), - &dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - break; - - case 0x60: /* pusha */ { - int i; - unsigned long regs[] = { - _regs.eax, _regs.ecx, _regs.edx, _regs.ebx, - _regs.esp, _regs.ebp, _regs.esi, _regs.edi }; - generate_exception_if(mode_64bit(), EXC_UD, -1); - for ( i = 0; i < 8; i++ ) - if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - regs[i], op_bytes, ctxt)) != 0 ) - goto done; - break; - } - - case 0x61: /* popa */ { - int i; - unsigned long dummy_esp, *regs[] = { - (unsigned long *)&_regs.edi, (unsigned long *)&_regs.esi, - (unsigned long *)&_regs.ebp, (unsigned long *)&dummy_esp, - (unsigned long *)&_regs.ebx, (unsigned long *)&_regs.edx, - (unsigned long *)&_regs.ecx, (unsigned long *)&_regs.eax }; - generate_exception_if(mode_64bit(), EXC_UD, -1); - for ( i = 0; i < 8; i++ ) - { - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), - &dst.val, op_bytes, ctxt)) != 0 ) - goto done; - switch ( op_bytes ) - { - case 1: *(uint8_t *)regs[i] = (uint8_t)dst.val; break; - case 2: *(uint16_t *)regs[i] = (uint16_t)dst.val; break; - case 4: *regs[i] = (uint32_t)dst.val; break; /* 64b: zero-ext */ - case 8: *regs[i] = dst.val; break; - } - } - break; - } - - case 0x68: /* push imm{16,32,64} */ - src.val = ((op_bytes == 2) - ? (int32_t)insn_fetch_type(int16_t) - : insn_fetch_type(int32_t)); - goto push; - - case 0x6a: /* push imm8 */ - src.val = insn_fetch_type(int8_t); - push: - d |= Mov; /* force writeback */ - dst.type = OP_MEM; - dst.bytes = op_bytes; - if ( mode_64bit() && (dst.bytes == 4) ) - dst.bytes = 8; - dst.val = src.val; - dst.mem.seg = x86_seg_ss; - dst.mem.off = sp_pre_dec(dst.bytes); - break; - - case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ { - unsigned long nr_reps = get_rep_prefix(); - dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; - dst.mem.seg = x86_seg_es; - dst.mem.off = truncate_ea(_regs.edi); - if ( (nr_reps > 1) && (ops->rep_ins != NULL) && - ((rc = ops->rep_ins((uint16_t)_regs.edx, dst.mem.seg, - dst.mem.off, dst.bytes, - &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) ) - { - if ( rc != 0 ) - goto done; - } - else - { - fail_if(ops->read_io == NULL); - if ( (rc = ops->read_io((uint16_t)_regs.edx, dst.bytes, - &dst.val, ctxt)) != 0 ) - goto done; - dst.type = OP_MEM; - nr_reps = 1; - } - register_address_increment( - _regs.edi, - nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes)); - put_rep_prefix(nr_reps); - break; - } - - case 0x6e ... 0x6f: /* outs %esi,%dx */ { - unsigned long nr_reps = get_rep_prefix(); - dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; - if ( (nr_reps > 1) && (ops->rep_outs != NULL) && - ((rc = ops->rep_outs(ea.mem.seg, truncate_ea(_regs.esi), - (uint16_t)_regs.edx, dst.bytes, - &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) ) - { - if ( rc != 0 ) - goto done; - } - else - { - if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi), - &dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - fail_if(ops->write_io == NULL); - if ( (rc = ops->write_io((uint16_t)_regs.edx, dst.bytes, - dst.val, ctxt)) != 0 ) - goto done; - nr_reps = 1; - } - register_address_increment( - _regs.esi, - nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes)); - put_rep_prefix(nr_reps); - break; - } - - case 0x70 ... 0x7f: /* jcc (short) */ { - int rel = insn_fetch_type(int8_t); - if ( test_cc(b, _regs.eflags) ) - jmp_rel(rel); - break; - } - - case 0x90: /* nop / xchg %%r8,%%rax */ - if ( !(rex_prefix & 1) ) - break; /* nop */ - - case 0x91 ... 0x97: /* xchg reg,%%rax */ - src.type = dst.type = OP_REG; - src.bytes = dst.bytes = op_bytes; - src.reg = (unsigned long *)&_regs.eax; - src.val = *src.reg; - dst.reg = decode_register( - (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); - dst.val = *dst.reg; - goto xchg; - - case 0x98: /* cbw/cwde/cdqe */ - switch ( op_bytes ) - { - case 2: *(int16_t *)&_regs.eax = (int8_t)_regs.eax; break; /* cbw */ - case 4: _regs.eax = (uint32_t)(int16_t)_regs.eax; break; /* cwde */ - case 8: _regs.eax = (int32_t)_regs.eax; break; /* cdqe */ - } - break; - - case 0x99: /* cwd/cdq/cqo */ - switch ( op_bytes ) - { - case 2: - *(int16_t *)&_regs.edx = ((int16_t)_regs.eax < 0) ? -1 : 0; - break; - case 4: - _regs.edx = (uint32_t)(((int32_t)_regs.eax < 0) ? -1 : 0); - break; - case 8: - _regs.edx = (_regs.eax < 0) ? -1 : 0; - break; - } - break; - - case 0x9a: /* call (far, absolute) */ { - struct segment_register reg; - uint16_t sel; - uint32_t eip; - - fail_if(ops->read_segment == NULL); - generate_exception_if(mode_64bit(), EXC_UD, -1); - - eip = insn_fetch_bytes(op_bytes); - sel = insn_fetch_type(uint16_t); - - if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) || - (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - reg.sel, op_bytes, ctxt)) || - (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), - _regs.eip, op_bytes, ctxt)) ) - goto done; - - if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 ) - goto done; - _regs.eip = eip; - break; - } - - case 0x9b: /* wait/fwait */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); - __emulate_fpu_insn("fwait"); - break; - - case 0x9c: /* pushf */ - src.val = _regs.eflags; - goto push; - - case 0x9d: /* popf */ { - uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM; - if ( !mode_ring0() ) - mask |= EFLG_IOPL; - if ( !mode_iopl() ) - mask |= EFLG_IF; - /* 64-bit mode: POP defaults to a 64-bit operand. */ - if ( mode_64bit() && (op_bytes == 4) ) - op_bytes = 8; - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), - &dst.val, op_bytes, ctxt)) != 0 ) - goto done; - if ( op_bytes == 2 ) - dst.val = (uint16_t)dst.val | (_regs.eflags & 0xffff0000u); - dst.val &= 0x257fd5; - _regs.eflags &= mask; - _regs.eflags |= (uint32_t)(dst.val & ~mask) | 0x02; - break; - } - - case 0x9e: /* sahf */ - *(uint8_t *)&_regs.eflags = (((uint8_t *)&_regs.eax)[1] & 0xd7) | 0x02; - break; - - case 0x9f: /* lahf */ - ((uint8_t *)&_regs.eax)[1] = (_regs.eflags & 0xd7) | 0x02; - break; - - case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */ - /* Source EA is not encoded via ModRM. */ - dst.type = OP_REG; - dst.reg = (unsigned long *)&_regs.eax; - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( (rc = ops->read(ea.mem.seg, insn_fetch_bytes(ad_bytes), - &dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - break; - - case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */ - /* Destination EA is not encoded via ModRM. */ - dst.type = OP_MEM; - dst.mem.seg = ea.mem.seg; - dst.mem.off = insn_fetch_bytes(ad_bytes); - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.val = (unsigned long)_regs.eax; - break; - - case 0xa4 ... 0xa5: /* movs */ { - unsigned long nr_reps = get_rep_prefix(); - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.mem.seg = x86_seg_es; - dst.mem.off = truncate_ea(_regs.edi); - if ( (nr_reps > 1) && (ops->rep_movs != NULL) && - ((rc = ops->rep_movs(ea.mem.seg, truncate_ea(_regs.esi), - dst.mem.seg, dst.mem.off, dst.bytes, - &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) ) - { - if ( rc != 0 ) - goto done; - } - else - { - if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi), - &dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - dst.type = OP_MEM; - nr_reps = 1; - } - register_address_increment( - _regs.esi, - nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes)); - register_address_increment( - _regs.edi, - nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes)); - put_rep_prefix(nr_reps); - break; - } - - case 0xa6 ... 0xa7: /* cmps */ { - unsigned long next_eip = _regs.eip; - get_rep_prefix(); - src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi), - &dst.val, dst.bytes, ctxt)) || - (rc = ops->read(x86_seg_es, truncate_ea(_regs.edi), - &src.val, src.bytes, ctxt)) ) - goto done; - register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); - register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes); - put_rep_prefix(1); - /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */ - emulate_2op_SrcV("cmp", src, dst, _regs.eflags); - if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) || - ((rep_prefix == REPNE_PREFIX) && (_regs.eflags & EFLG_ZF)) ) - _regs.eip = next_eip; - break; - } - - case 0xaa ... 0xab: /* stos */ { - /* unsigned long max_reps = */get_rep_prefix(); - dst.type = OP_MEM; - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.mem.seg = x86_seg_es; - dst.mem.off = truncate_ea(_regs.edi); - dst.val = _regs.eax; - register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); - put_rep_prefix(1); - break; - } - - case 0xac ... 0xad: /* lods */ { - /* unsigned long max_reps = */get_rep_prefix(); - dst.type = OP_REG; - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.reg = (unsigned long *)&_regs.eax; - if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi), - &dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); - put_rep_prefix(1); - break; - } - - case 0xae ... 0xaf: /* scas */ { - unsigned long next_eip = _regs.eip; - get_rep_prefix(); - src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.val = _regs.eax; - if ( (rc = ops->read(x86_seg_es, truncate_ea(_regs.edi), - &src.val, src.bytes, ctxt)) != 0 ) - goto done; - register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes); - put_rep_prefix(1); - /* cmp: dst - src ==> src=*%%edi,dst=%%eax ==> %%eax - *%%edi */ - emulate_2op_SrcV("cmp", src, dst, _regs.eflags); - if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) || - ((rep_prefix == REPNE_PREFIX) && (_regs.eflags & EFLG_ZF)) ) - _regs.eip = next_eip; - break; - } - - case 0xc2: /* ret imm16 (near) */ - case 0xc3: /* ret (near) */ { - int offset = (b == 0xc2) ? insn_fetch_type(uint16_t) : 0; - op_bytes = mode_64bit() ? 8 : op_bytes; - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes + offset), - &dst.val, op_bytes, ctxt)) != 0 ) - goto done; - _regs.eip = dst.val; - break; - } - - case 0xc8: /* enter imm16,imm8 */ { - uint16_t size = insn_fetch_type(uint16_t); - uint8_t depth = insn_fetch_type(uint8_t) & 31; - int i; - - dst.type = OP_REG; - dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes; - dst.reg = (unsigned long *)&_regs.ebp; - if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), - _regs.ebp, dst.bytes, ctxt)) ) - goto done; - dst.val = _regs.esp; - - if ( depth > 0 ) - { - for ( i = 1; i < depth; i++ ) - { - unsigned long ebp, temp_data; - ebp = truncate_word(_regs.ebp - i*dst.bytes, ctxt->sp_size/8); - if ( (rc = ops->read(x86_seg_ss, ebp, - &temp_data, dst.bytes, ctxt)) || - (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), - temp_data, dst.bytes, ctxt)) ) - goto done; - } - if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), - dst.val, dst.bytes, ctxt)) ) - goto done; - } - - sp_pre_dec(size); - break; - } - - case 0xc9: /* leave */ - /* First writeback, to %%esp. */ - dst.type = OP_REG; - dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes; - dst.reg = (unsigned long *)&_regs.esp; - dst.val = _regs.ebp; - - /* Flush first writeback, since there is a second. */ - switch ( dst.bytes ) - { - case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break; - case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break; - case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */ - case 8: *dst.reg = dst.val; break; - } - - /* Second writeback, to %%ebp. */ - dst.reg = (unsigned long *)&_regs.ebp; - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes), - &dst.val, dst.bytes, ctxt)) ) - goto done; - break; - - case 0xca: /* ret imm16 (far) */ - case 0xcb: /* ret (far) */ { - int offset = (b == 0xca) ? insn_fetch_type(uint16_t) : 0; - op_bytes = mode_64bit() ? 8 : op_bytes; - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), - &dst.val, op_bytes, ctxt)) || - (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes + offset), - &src.val, op_bytes, ctxt)) || - (rc = load_seg(x86_seg_cs, (uint16_t)src.val, ctxt, ops)) ) - goto done; - _regs.eip = dst.val; - break; - } - - case 0xcc: /* int3 */ - src.val = EXC_BP; - goto swint; - - case 0xcd: /* int imm8 */ - src.val = insn_fetch_type(uint8_t); - swint: - fail_if(ops->inject_sw_interrupt == NULL); - rc = ops->inject_sw_interrupt(src.val, _regs.eip - ctxt->regs->eip, - ctxt) ? : X86EMUL_EXCEPTION; - goto done; - - case 0xce: /* into */ - generate_exception_if(mode_64bit(), EXC_UD, -1); - if ( !(_regs.eflags & EFLG_OF) ) - break; - src.val = EXC_OF; - goto swint; - - case 0xcf: /* iret */ { - unsigned long cs, eip, eflags; - uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM; - if ( !mode_ring0() ) - mask |= EFLG_IOPL; - if ( !mode_iopl() ) - mask |= EFLG_IF; - fail_if(!in_realmode(ctxt, ops)); - if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), - &eip, op_bytes, ctxt)) || - (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), - &cs, op_bytes, ctxt)) || - (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), - &eflags, op_bytes, ctxt)) ) - goto done; - if ( op_bytes == 2 ) - eflags = (uint16_t)eflags | (_regs.eflags & 0xffff0000u); - eflags &= 0x257fd5; - _regs.eflags &= mask; - _regs.eflags |= (uint32_t)(eflags & ~mask) | 0x02; - _regs.eip = eip; - if ( (rc = load_seg(x86_seg_cs, (uint16_t)cs, ctxt, ops)) != 0 ) - goto done; - break; - } - - case 0xd4: /* aam */ { - unsigned int base = insn_fetch_type(uint8_t); - uint8_t al = _regs.eax; - generate_exception_if(mode_64bit(), EXC_UD, -1); - generate_exception_if(base == 0, EXC_DE, -1); - *(uint16_t *)&_regs.eax = ((al / base) << 8) | (al % base); - _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); - _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; - _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; - _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; - break; - } - - case 0xd5: /* aad */ { - unsigned int base = insn_fetch_type(uint8_t); - uint16_t ax = _regs.eax; - generate_exception_if(mode_64bit(), EXC_UD, -1); - *(uint16_t *)&_regs.eax = (uint8_t)(ax + ((ax >> 8) * base)); - _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); - _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; - _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; - _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; - break; - } - - case 0xd6: /* salc */ - generate_exception_if(mode_64bit(), EXC_UD, -1); - *(uint8_t *)&_regs.eax = (_regs.eflags & EFLG_CF) ? 0xff : 0x00; - break; - - case 0xd7: /* xlat */ { - unsigned long al = (uint8_t)_regs.eax; - if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.ebx + al), - &al, 1, ctxt)) != 0 ) - goto done; - *(uint8_t *)&_regs.eax = al; - break; - } - - case 0xd9: /* FPU 0xd9 */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); - switch ( modrm ) - { - case 0xc0: __emulate_fpu_insn(".byte 0xd9,0xc0"); break; - case 0xc1: __emulate_fpu_insn(".byte 0xd9,0xc1"); break; - case 0xc2: __emulate_fpu_insn(".byte 0xd9,0xc2"); break; - case 0xc3: __emulate_fpu_insn(".byte 0xd9,0xc3"); break; - case 0xc4: __emulate_fpu_insn(".byte 0xd9,0xc4"); break; - case 0xc5: __emulate_fpu_insn(".byte 0xd9,0xc5"); break; - case 0xc6: __emulate_fpu_insn(".byte 0xd9,0xc6"); break; - case 0xc7: __emulate_fpu_insn(".byte 0xd9,0xc7"); break; - case 0xe0: __emulate_fpu_insn(".byte 0xd9,0xe0"); break; - case 0xe8: __emulate_fpu_insn(".byte 0xd9,0xe8"); break; - case 0xee: __emulate_fpu_insn(".byte 0xd9,0xee"); break; - default: - fail_if((modrm_reg & 7) != 7); - fail_if(modrm >= 0xc0); - /* fnstcw m2byte */ - ea.bytes = 2; - dst = ea; - asm volatile ( "fnstcw %0" : "=m" (dst.val) ); - } - break; - - case 0xdb: /* FPU 0xdb */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); - fail_if(modrm != 0xe3); - /* fninit */ - asm volatile ( "fninit" ); - break; - - case 0xdd: /* FPU 0xdd */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); - fail_if((modrm_reg & 7) != 7); - fail_if(modrm >= 0xc0); - /* fnstsw m2byte */ - ea.bytes = 2; - dst = ea; - asm volatile ( "fnstsw %0" : "=m" (dst.val) ); - break; - - case 0xde: /* FPU 0xde */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); - switch ( modrm ) - { - case 0xd9: __emulate_fpu_insn(".byte 0xde,0xd9"); break; - case 0xf8: __emulate_fpu_insn(".byte 0xde,0xf8"); break; - case 0xf9: __emulate_fpu_insn(".byte 0xde,0xf9"); break; - case 0xfa: __emulate_fpu_insn(".byte 0xde,0xfa"); break; - case 0xfb: __emulate_fpu_insn(".byte 0xde,0xfb"); break; - case 0xfc: __emulate_fpu_insn(".byte 0xde,0xfc"); break; - case 0xfd: __emulate_fpu_insn(".byte 0xde,0xfd"); break; - case 0xfe: __emulate_fpu_insn(".byte 0xde,0xfe"); break; - case 0xff: __emulate_fpu_insn(".byte 0xde,0xff"); break; - default: goto cannot_emulate; - } - break; - - case 0xdf: /* FPU 0xdf */ - fail_if(ops->load_fpu_ctxt == NULL); - ops->load_fpu_ctxt(ctxt); - fail_if(modrm != 0xe0); - /* fnstsw %ax */ - dst.bytes = 2; - dst.type = OP_REG; - dst.reg = (unsigned long *)&_regs.eax; - asm volatile ( "fnstsw %0" : "=m" (dst.val) ); - break; - - case 0xe0 ... 0xe2: /* loop{,z,nz} */ { - int rel = insn_fetch_type(int8_t); - int do_jmp = !(_regs.eflags & EFLG_ZF); /* loopnz */ - if ( b == 0xe1 ) - do_jmp = !do_jmp; /* loopz */ - else if ( b == 0xe2 ) - do_jmp = 1; /* loop */ - switch ( ad_bytes ) - { - case 2: - do_jmp &= --(*(uint16_t *)&_regs.ecx) != 0; - break; - case 4: - do_jmp &= --(*(uint32_t *)&_regs.ecx) != 0; - _regs.ecx = (uint32_t)_regs.ecx; /* zero extend in x86/64 mode */ - break; - default: /* case 8: */ - do_jmp &= --_regs.ecx != 0; - break; - } - if ( do_jmp ) - jmp_rel(rel); - break; - } - - case 0xe3: /* jcxz/jecxz (short) */ { - int rel = insn_fetch_type(int8_t); - if ( (ad_bytes == 2) ? !(uint16_t)_regs.ecx : - (ad_bytes == 4) ? !(uint32_t)_regs.ecx : !_regs.ecx ) - jmp_rel(rel); - break; - } - - case 0xe4: /* in imm8,%al */ - case 0xe5: /* in imm8,%eax */ - case 0xe6: /* out %al,imm8 */ - case 0xe7: /* out %eax,imm8 */ - case 0xec: /* in %dx,%al */ - case 0xed: /* in %dx,%eax */ - case 0xee: /* out %al,%dx */ - case 0xef: /* out %eax,%dx */ { - unsigned int port = ((b < 0xe8) - ? insn_fetch_type(uint8_t) - : (uint16_t)_regs.edx); - op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; - if ( b & 2 ) - { - /* out */ - fail_if(ops->write_io == NULL); - rc = ops->write_io(port, op_bytes, _regs.eax, ctxt); - - } - else - { - /* in */ - dst.type = OP_REG; - dst.bytes = op_bytes; - dst.reg = (unsigned long *)&_regs.eax; - fail_if(ops->read_io == NULL); - rc = ops->read_io(port, dst.bytes, &dst.val, ctxt); - } - if ( rc != 0 ) - goto done; - break; - } - - case 0xe8: /* call (near) */ { - int rel = (((op_bytes == 2) && !mode_64bit()) - ? (int32_t)insn_fetch_type(int16_t) - : insn_fetch_type(int32_t)); - op_bytes = mode_64bit() ? 8 : op_bytes; - src.val = _regs.eip; - jmp_rel(rel); - goto push; - } - - case 0xe9: /* jmp (near) */ { - int rel = (((op_bytes == 2) && !mode_64bit()) - ? (int32_t)insn_fetch_type(int16_t) - : insn_fetch_type(int32_t)); - jmp_rel(rel); - break; - } - - case 0xea: /* jmp (far, absolute) */ { - uint16_t sel; - uint32_t eip; - generate_exception_if(mode_64bit(), EXC_UD, -1); - eip = insn_fetch_bytes(op_bytes); - sel = insn_fetch_type(uint16_t); - if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 ) - goto done; - _regs.eip = eip; - break; - } - - case 0xeb: /* jmp (short) */ { - int rel = insn_fetch_type(int8_t); - jmp_rel(rel); - break; - } - - case 0xf1: /* int1 (icebp) */ - src.val = EXC_DB; - goto swint; - - case 0xf4: /* hlt */ - ctxt->retire.flags.hlt = 1; - break; - - case 0xf5: /* cmc */ - _regs.eflags ^= EFLG_CF; - break; - - case 0xf8: /* clc */ - _regs.eflags &= ~EFLG_CF; - break; - - case 0xf9: /* stc */ - _regs.eflags |= EFLG_CF; - break; - - case 0xfa: /* cli */ - generate_exception_if(!mode_iopl(), EXC_GP, 0); - _regs.eflags &= ~EFLG_IF; - break; - - case 0xfb: /* sti */ - generate_exception_if(!mode_iopl(), EXC_GP, 0); - if ( !(_regs.eflags & EFLG_IF) ) - { - _regs.eflags |= EFLG_IF; - ctxt->retire.flags.sti = 1; - } - break; - - case 0xfc: /* cld */ - _regs.eflags &= ~EFLG_DF; - break; - - case 0xfd: /* std */ - _regs.eflags |= EFLG_DF; - break; - } - goto writeback; - - twobyte_insn: - switch ( b ) - { - case 0x40 ... 0x4f: /* cmovcc */ - dst.val = src.val; - if ( !test_cc(b, _regs.eflags) ) - dst.type = OP_NONE; - break; - - case 0x90 ... 0x9f: /* setcc */ - dst.val = test_cc(b, _regs.eflags); - break; - - case 0xb0 ... 0xb1: /* cmpxchg */ - /* Save real source value, then compare EAX against destination. */ - src.orig_val = src.val; - src.val = _regs.eax; - emulate_2op_SrcV("cmp", src, dst, _regs.eflags); - if ( _regs.eflags & EFLG_ZF ) - { - /* Success: write back to memory. */ - dst.val = src.orig_val; - } - else - { - /* Failure: write the value we saw to EAX. */ - dst.type = OP_REG; - dst.reg = (unsigned long *)&_regs.eax; - } - break; - - case 0xa3: bt: /* bt */ - emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags); - break; - - case 0xa4: /* shld imm8,r,r/m */ - case 0xa5: /* shld %%cl,r,r/m */ - case 0xac: /* shrd imm8,r,r/m */ - case 0xad: /* shrd %%cl,r,r/m */ { - uint8_t shift, width = dst.bytes << 3; - shift = (b & 1) ? (uint8_t)_regs.ecx : insn_fetch_type(uint8_t); - if ( (shift &= width - 1) == 0 ) - break; - dst.orig_val = truncate_word(dst.val, dst.bytes); - dst.val = ((shift == width) ? src.val : - (b & 8) ? - /* shrd */ - ((dst.orig_val >> shift) | - truncate_word(src.val << (width - shift), dst.bytes)) : - /* shld */ - ((dst.orig_val << shift) | - ((src.val >> (width - shift)) & ((1ull << shift) - 1)))); - dst.val = truncate_word(dst.val, dst.bytes); - _regs.eflags &= ~(EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_PF|EFLG_CF); - if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 ) - _regs.eflags |= EFLG_CF; - if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 ) - _regs.eflags |= EFLG_OF; - _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? EFLG_SF : 0; - _regs.eflags |= (dst.val == 0) ? EFLG_ZF : 0; - _regs.eflags |= even_parity(dst.val) ? EFLG_PF : 0; - break; - } - - case 0xb3: btr: /* btr */ - emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags); - break; - - case 0xab: bts: /* bts */ - emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags); - break; - - case 0xaf: /* imul */ - _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( dst.bytes ) - { - case 2: - dst.val = ((uint32_t)(int16_t)src.val * - (uint32_t)(int16_t)dst.val); - if ( (int16_t)dst.val != (uint32_t)dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - break; -#ifdef __x86_64__ - case 4: - dst.val = ((uint64_t)(int32_t)src.val * - (uint64_t)(int32_t)dst.val); - if ( (int32_t)dst.val != dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - break; -#endif - default: { - unsigned long m[2] = { src.val, dst.val }; - if ( imul_dbl(m) ) - _regs.eflags |= EFLG_OF|EFLG_CF; - dst.val = m[0]; - break; - } - } - break; - - case 0xb2: /* lss */ - dst.val = x86_seg_ss; - goto les; - - case 0xb4: /* lfs */ - dst.val = x86_seg_fs; - goto les; - - case 0xb5: /* lgs */ - dst.val = x86_seg_gs; - goto les; - - case 0xb6: /* movzx rm8,r{16,32,64} */ - /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ - dst.reg = decode_register(modrm_reg, &_regs, 0); - dst.bytes = op_bytes; - dst.val = (uint8_t)src.val; - break; - - case 0xbc: /* bsf */ { - int zf; - asm ( "bsf %2,%0; setz %b1" - : "=r" (dst.val), "=q" (zf) - : "r" (src.val), "1" (0) ); - _regs.eflags &= ~EFLG_ZF; - _regs.eflags |= zf ? EFLG_ZF : 0; - break; - } - - case 0xbd: /* bsr */ { - int zf; - asm ( "bsr %2,%0; setz %b1" - : "=r" (dst.val), "=q" (zf) - : "r" (src.val), "1" (0) ); - _regs.eflags &= ~EFLG_ZF; - _regs.eflags |= zf ? EFLG_ZF : 0; - break; - } - - case 0xb7: /* movzx rm16,r{16,32,64} */ - dst.val = (uint16_t)src.val; - break; - - case 0xbb: btc: /* btc */ - emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags); - break; - - case 0xba: /* Grp8 */ - switch ( modrm_reg & 7 ) - { - case 4: goto bt; - case 5: goto bts; - case 6: goto btr; - case 7: goto btc; - default: generate_exception_if(1, EXC_UD, -1); - } - break; - - case 0xbe: /* movsx rm8,r{16,32,64} */ - /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ - dst.reg = decode_register(modrm_reg, &_regs, 0); - dst.bytes = op_bytes; - dst.val = (int8_t)src.val; - break; - - case 0xbf: /* movsx rm16,r{16,32,64} */ - dst.val = (int16_t)src.val; - break; - - case 0xc0 ... 0xc1: /* xadd */ - /* Write back the register source. */ - switch ( dst.bytes ) - { - case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break; - case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break; - case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */ - case 8: *src.reg = dst.val; break; - } - goto add; - } - goto writeback; - - twobyte_special_insn: - switch ( b ) - { - case 0x01: /* Grp7 */ { - struct segment_register reg; - unsigned long base, limit, cr0, cr0w; - - if ( modrm == 0xdf ) /* invlpga */ - { - generate_exception_if(in_realmode(ctxt, ops), EXC_UD, -1); - generate_exception_if(!mode_ring0(), EXC_GP, 0); - fail_if(ops->invlpg == NULL); - if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.eax), - ctxt)) ) - goto done; - break; - } - - switch ( modrm_reg & 7 ) - { - case 0: /* sgdt */ - case 1: /* sidt */ - generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - fail_if(ops->read_segment == NULL); - if ( (rc = ops->read_segment((modrm_reg & 1) ? - x86_seg_idtr : x86_seg_gdtr, - ®, ctxt)) ) - goto done; - if ( op_bytes == 2 ) - reg.base &= 0xffffff; - if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, - reg.limit, 2, ctxt)) || - (rc = ops->write(ea.mem.seg, ea.mem.off+2, - reg.base, mode_64bit() ? 8 : 4, ctxt)) ) - goto done; - break; - case 2: /* lgdt */ - case 3: /* lidt */ - generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - fail_if(ops->write_segment == NULL); - memset(®, 0, sizeof(reg)); - if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, - &limit, 2, ctxt)) || - (rc = ops->read(ea.mem.seg, ea.mem.off+2, - &base, mode_64bit() ? 8 : 4, ctxt)) ) - goto done; - reg.base = base; - reg.limit = limit; - if ( op_bytes == 2 ) - reg.base &= 0xffffff; - if ( (rc = ops->write_segment((modrm_reg & 1) ? - x86_seg_idtr : x86_seg_gdtr, - ®, ctxt)) ) - goto done; - break; - case 4: /* smsw */ - ea.bytes = 2; - dst = ea; - fail_if(ops->read_cr == NULL); - if ( (rc = ops->read_cr(0, &dst.val, ctxt)) ) - goto done; - d |= Mov; /* force writeback */ - break; - case 6: /* lmsw */ - fail_if(ops->read_cr == NULL); - fail_if(ops->write_cr == NULL); - if ( (rc = ops->read_cr(0, &cr0, ctxt)) ) - goto done; - if ( ea.type == OP_REG ) - cr0w = *ea.reg; - else if ( (rc = ops->read(ea.mem.seg, ea.mem.off, - &cr0w, 2, ctxt)) ) - goto done; - cr0 &= 0xffff0000; - cr0 |= (uint16_t)cr0w; - if ( (rc = ops->write_cr(0, cr0, ctxt)) ) - goto done; - break; - case 7: /* invlpg */ - generate_exception_if(!mode_ring0(), EXC_GP, 0); - generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - fail_if(ops->invlpg == NULL); - if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) ) - goto done; - break; - default: - goto cannot_emulate; - } - break; - } - - case 0x06: /* clts */ - generate_exception_if(!mode_ring0(), EXC_GP, 0); - fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL)); - if ( (rc = ops->read_cr(0, &dst.val, ctxt)) || - (rc = ops->write_cr(0, dst.val&~8, ctxt)) ) - goto done; - break; - - case 0x08: /* invd */ - case 0x09: /* wbinvd */ - generate_exception_if(!mode_ring0(), EXC_GP, 0); - fail_if(ops->wbinvd == NULL); - if ( (rc = ops->wbinvd(ctxt)) != 0 ) - goto done; - break; - - case 0x0d: /* GrpP (prefetch) */ - case 0x18: /* Grp16 (prefetch/nop) */ - case 0x19 ... 0x1f: /* nop (amd-defined) */ - break; - - case 0x20: /* mov cr,reg */ - case 0x21: /* mov dr,reg */ - case 0x22: /* mov reg,cr */ - case 0x23: /* mov reg,dr */ - generate_exception_if(ea.type != OP_REG, EXC_UD, -1); - generate_exception_if(!mode_ring0(), EXC_GP, 0); - modrm_reg |= lock_prefix << 3; - if ( b & 2 ) - { - /* Write to CR/DR. */ - src.val = *(unsigned long *)decode_register(modrm_rm, &_regs, 0); - if ( !mode_64bit() ) - src.val = (uint32_t)src.val; - rc = ((b & 1) - ? (ops->write_dr - ? ops->write_dr(modrm_reg, src.val, ctxt) - : X86EMUL_UNHANDLEABLE) - : (ops->write_cr - ? ops->write_cr(modrm_reg, src.val, ctxt) - : X86EMUL_UNHANDLEABLE)); - } - else - { - /* Read from CR/DR. */ - dst.type = OP_REG; - dst.bytes = mode_64bit() ? 8 : 4; - dst.reg = decode_register(modrm_rm, &_regs, 0); - rc = ((b & 1) - ? (ops->read_dr - ? ops->read_dr(modrm_reg, &dst.val, ctxt) - : X86EMUL_UNHANDLEABLE) - : (ops->read_cr - ? ops->read_cr(modrm_reg, &dst.val, ctxt) - : X86EMUL_UNHANDLEABLE)); - } - if ( rc != 0 ) - goto done; - break; - - case 0x30: /* wrmsr */ { - uint64_t val = ((uint64_t)_regs.edx << 32) | (uint32_t)_regs.eax; - generate_exception_if(!mode_ring0(), EXC_GP, 0); - fail_if(ops->write_msr == NULL); - if ( (rc = ops->write_msr((uint32_t)_regs.ecx, val, ctxt)) != 0 ) - goto done; - break; - } - - case 0x31: /* rdtsc */ { - unsigned long cr4; - uint64_t val; - fail_if(ops->read_cr == NULL); - if ( (rc = ops->read_cr(4, &cr4, ctxt)) ) - goto done; - generate_exception_if((cr4 & CR4_TSD) && !mode_ring0(), EXC_GP, 0); - fail_if(ops->read_msr == NULL); - if ( (rc = ops->read_msr(MSR_TSC, &val, ctxt)) != 0 ) - goto done; - _regs.edx = (uint32_t)(val >> 32); - _regs.eax = (uint32_t)(val >> 0); - break; - } - - case 0x32: /* rdmsr */ { - uint64_t val; - generate_exception_if(!mode_ring0(), EXC_GP, 0); - fail_if(ops->read_msr == NULL); - if ( (rc = ops->read_msr((uint32_t)_regs.ecx, &val, ctxt)) != 0 ) - goto done; - _regs.edx = (uint32_t)(val >> 32); - _regs.eax = (uint32_t)(val >> 0); - break; - } - - case 0x80 ... 0x8f: /* jcc (near) */ { - int rel = (((op_bytes == 2) && !mode_64bit()) - ? (int32_t)insn_fetch_type(int16_t) - : insn_fetch_type(int32_t)); - if ( test_cc(b, _regs.eflags) ) - jmp_rel(rel); - break; - } - - case 0xa0: /* push %%fs */ - src.val = x86_seg_fs; - goto push_seg; - - case 0xa1: /* pop %%fs */ - src.val = x86_seg_fs; - goto pop_seg; - - case 0xa2: /* cpuid */ { - unsigned int eax = _regs.eax, ebx = _regs.ebx; - unsigned int ecx = _regs.ecx, edx = _regs.edx; - fail_if(ops->cpuid == NULL); - if ( (rc = ops->cpuid(&eax, &ebx, &ecx, &edx, ctxt)) != 0 ) - goto done; - _regs.eax = eax; _regs.ebx = ebx; - _regs.ecx = ecx; _regs.edx = edx; - break; - } - - case 0xa8: /* push %%gs */ - src.val = x86_seg_gs; - goto push_seg; - - case 0xa9: /* pop %%gs */ - src.val = x86_seg_gs; - goto pop_seg; - - case 0xc7: /* Grp9 (cmpxchg8b) */ -#if defined(__i386__) - { - unsigned long old_lo, old_hi; - generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); - generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) || - (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) ) - goto done; - if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) ) - { - _regs.eax = old_lo; - _regs.edx = old_hi; - _regs.eflags &= ~EFLG_ZF; - } - else if ( ops->cmpxchg8b == NULL ) - { - rc = X86EMUL_UNHANDLEABLE; - goto done; - } - else - { - if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi, - _regs.ebx, _regs.ecx, ctxt)) != 0 ) - goto done; - _regs.eflags |= EFLG_ZF; - } - break; - } -#elif defined(__x86_64__) - { - unsigned long old, new; - generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); - generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); - if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 ) - goto done; - if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) || - ((uint32_t)(old>>32) != (uint32_t)_regs.edx) ) - { - _regs.eax = (uint32_t)(old>>0); - _regs.edx = (uint32_t)(old>>32); - _regs.eflags &= ~EFLG_ZF; - } - else - { - new = (_regs.ecx<<32)|(uint32_t)_regs.ebx; - if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, - new, 8, ctxt)) != 0 ) - goto done; - _regs.eflags |= EFLG_ZF; - } - break; - } -#endif - - case 0xc8 ... 0xcf: /* bswap */ - dst.type = OP_REG; - dst.reg = decode_register( - (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); - switch ( dst.bytes = op_bytes ) - { - default: /* case 2: */ - /* Undefined behaviour. Writes zero on all tested CPUs. */ - dst.val = 0; - break; - case 4: -#ifdef __x86_64__ - asm ( "bswap %k0" : "=r" (dst.val) : "0" (*dst.reg) ); - break; - case 8: -#endif - asm ( "bswap %0" : "=r" (dst.val) : "0" (*dst.reg) ); - break; - } - break; - } - goto writeback; - - cannot_emulate: -#if 0 - gdprintk(XENLOG_DEBUG, "Instr:"); - for ( ea.mem.off = ctxt->regs->eip; ea.mem.off < _regs.eip; ea.mem.off++ ) - { - unsigned long x; - ops->insn_fetch(x86_seg_cs, ea.mem.off, &x, 1, ctxt); - printk(" %02x", (uint8_t)x); - } - printk("\n"); -#endif - return X86EMUL_UNHANDLEABLE; -} +#include "x86_emulate/x86_emulate.c" diff -r daf16171a05f -r feee6422144f xen/arch/x86/x86_emulate/x86_emulate.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c Tue Apr 01 11:29:03 2008 -0600 @@ -0,0 +1,3429 @@ +/****************************************************************************** + * x86_emulate.c + * + * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. + * + * Copyright (c) 2005-2007 Keir Fraser + * Copyright (c) 2005-2007 XenSource Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Operand sizes: 8-bit operands or specified/overridden size. */ +#define ByteOp (1<<0) /* 8-bit operands. */ +/* Destination operand type. */ +#define DstBitBase (0<<1) /* Memory operand, bit string. */ +#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ +#define DstReg (2<<1) /* Register operand. */ +#define DstMem (3<<1) /* Memory operand. */ +#define DstMask (3<<1) +/* Source operand type. */ +#define SrcNone (0<<3) /* No source operand. */ +#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ +#define SrcReg (1<<3) /* Register operand. */ +#define SrcMem (2<<3) /* Memory operand. */ +#define SrcMem16 (3<<3) /* Memory operand (16-bit). */ +#define SrcImm (4<<3) /* Immediate operand. */ +#define SrcImmByte (5<<3) /* 8-bit sign-extended immediate operand. */ +#define SrcMask (7<<3) +/* Generic ModRM decode. */ +#define ModRM (1<<6) +/* Destination is only written; never read. */ +#define Mov (1<<7) + +static uint8_t opcode_table[256] = { + /* 0x00 - 0x07 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps, + /* 0x08 - 0x0F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, 0, + /* 0x10 - 0x17 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps, + /* 0x18 - 0x1F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps, + /* 0x20 - 0x27 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, + /* 0x28 - 0x2F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, + /* 0x30 - 0x37 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, + /* 0x38 - 0x3F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, + /* 0x40 - 0x4F */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x50 - 0x5F */ + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + /* 0x60 - 0x67 */ + ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcMem16|ModRM|Mov, + 0, 0, 0, 0, + /* 0x68 - 0x6F */ + ImplicitOps|Mov, DstReg|SrcImm|ModRM|Mov, + ImplicitOps|Mov, DstReg|SrcImmByte|ModRM|Mov, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + /* 0x70 - 0x77 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x78 - 0x7F */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x80 - 0x87 */ + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + /* 0x88 - 0x8F */ + ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov, + ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM, + DstReg|SrcMem|ModRM|Mov, DstMem|SrcNone|ModRM|Mov, + /* 0x90 - 0x97 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x98 - 0x9F */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0xA0 - 0xA7 */ + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps, ImplicitOps, + /* 0xA8 - 0xAF */ + ByteOp|DstReg|SrcImm, DstReg|SrcImm, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps, ImplicitOps, + /* 0xB0 - 0xB7 */ + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + /* 0xB8 - 0xBF */ + DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, + DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, + /* 0xC0 - 0xC7 */ + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, + ImplicitOps, ImplicitOps, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov, + /* 0xC8 - 0xCF */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0xD0 - 0xD7 */ + ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, + ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0xD8 - 0xDF */ + 0, ImplicitOps|ModRM|Mov, 0, ImplicitOps|ModRM|Mov, + 0, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov, + /* 0xE0 - 0xE7 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0xE8 - 0xEF */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0xF0 - 0xF7 */ + 0, ImplicitOps, 0, 0, + ImplicitOps, ImplicitOps, + ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, + /* 0xF8 - 0xFF */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM +}; + +static uint8_t twobyte_table[256] = { + /* 0x00 - 0x07 */ + 0, ImplicitOps|ModRM, 0, 0, 0, 0, ImplicitOps, 0, + /* 0x08 - 0x0F */ + ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps|ModRM, 0, 0, + /* 0x10 - 0x17 */ + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x18 - 0x1F */ + ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, + ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, + /* 0x20 - 0x27 */ + ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, + 0, 0, 0, 0, + /* 0x28 - 0x2F */ + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x30 - 0x37 */ + ImplicitOps, ImplicitOps, ImplicitOps, 0, 0, 0, 0, 0, + /* 0x38 - 0x3F */ + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x40 - 0x47 */ + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + /* 0x48 - 0x4F */ + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + /* 0x50 - 0x5F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 - 0x6F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 - 0x7F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x80 - 0x87 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x88 - 0x8F */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x90 - 0x97 */ + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + /* 0x98 - 0x9F */ + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + /* 0xA0 - 0xA7 */ + ImplicitOps, ImplicitOps, ImplicitOps, DstBitBase|SrcReg|ModRM, + DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0, + /* 0xA8 - 0xAF */ + ImplicitOps, ImplicitOps, 0, DstBitBase|SrcReg|ModRM, + DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstReg|SrcMem|ModRM, + /* 0xB0 - 0xB7 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + DstReg|SrcMem|ModRM|Mov, DstBitBase|SrcReg|ModRM, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, + /* 0xB8 - 0xBF */ + 0, 0, DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM, + DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, + /* 0xC0 - 0xC7 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0, + 0, 0, 0, ImplicitOps|ModRM, + /* 0xC8 - 0xCF */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0xD0 - 0xDF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xE0 - 0xEF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xF0 - 0xFF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Type, address-of, and value of an instruction's operand. */ +struct operand { + enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + unsigned int bytes; + unsigned long val, orig_val; + union { + /* OP_REG: Pointer to register field. */ + unsigned long *reg; + /* OP_MEM: Segment and offset. */ + struct { + enum x86_segment seg; + unsigned long off; + } mem; + }; +}; + +/* MSRs. */ +#define MSR_TSC 0x10 + +/* Control register flags. */ +#define CR0_PE (1<<0) +#define CR4_TSD (1<<2) + +/* EFLAGS bit definitions. */ +#define EFLG_VIP (1<<20) +#define EFLG_VIF (1<<19) +#define EFLG_AC (1<<18) +#define EFLG_VM (1<<17) +#define EFLG_RF (1<<16) +#define EFLG_NT (1<<14) +#define EFLG_IOPL (3<<12) +#define EFLG_OF (1<<11) +#define EFLG_DF (1<<10) +#define EFLG_IF (1<<9) +#define EFLG_TF (1<<8) +#define EFLG_SF (1<<7) +#define EFLG_ZF (1<<6) +#define EFLG_AF (1<<4) +#define EFLG_PF (1<<2) +#define EFLG_CF (1<<0) + +/* Exception definitions. */ +#define EXC_DE 0 +#define EXC_DB 1 +#define EXC_BP 3 +#define EXC_OF 4 +#define EXC_BR 5 +#define EXC_UD 6 +#define EXC_TS 10 +#define EXC_NP 11 +#define EXC_SS 12 +#define EXC_GP 13 +#define EXC_PF 14 +#define EXC_MF 16 + +/* + * Instruction emulation: + * Most instructions are emulated directly via a fragment of inline assembly + * code. This allows us to save/restore EFLAGS and thus very easily pick up + * any modified flags. + */ + +#if defined(__x86_64__) +#define _LO32 "k" /* force 32-bit operand */ +#define _STK "%%rsp" /* stack pointer */ +#define _BYTES_PER_LONG "8" +#elif defined(__i386__) +#define _LO32 "" /* force 32-bit operand */ +#define _STK "%%esp" /* stack pointer */ +#define _BYTES_PER_LONG "4" +#endif + +/* + * These EFLAGS bits are restored from saved value during emulation, and + * any changes are written back to the saved value after emulation. + */ +#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) + +/* Before executing instruction: restore necessary bits in EFLAGS. */ +#define _PRE_EFLAGS(_sav, _msk, _tmp) \ +/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \ +"movl %"_sav",%"_LO32 _tmp"; " \ +"push %"_tmp"; " \ +"push %"_tmp"; " \ +"movl %"_msk",%"_LO32 _tmp"; " \ +"andl %"_LO32 _tmp",("_STK"); " \ +"pushf; " \ +"notl %"_LO32 _tmp"; " \ +"andl %"_LO32 _tmp",("_STK"); " \ +"andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); " \ +"pop %"_tmp"; " \ +"orl %"_LO32 _tmp",("_STK"); " \ +"popf; " \ +"pop %"_sav"; " + +/* After executing instruction: write-back necessary bits in EFLAGS. */ +#define _POST_EFLAGS(_sav, _msk, _tmp) \ +/* _sav |= EFLAGS & _msk; */ \ +"pushf; " \ +"pop %"_tmp"; " \ +"andl %"_msk",%"_LO32 _tmp"; " \ +"orl %"_LO32 _tmp",%"_sav"; " + +/* Raw emulation: instruction has two explicit operands. */ +#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy)\ +do{ unsigned long _tmp; \ + switch ( (_dst).bytes ) \ + { \ + case 2: \ + asm volatile ( \ + _PRE_EFLAGS("0","4","2") \ + _op"w %"_wx"3,%1; " \ + _POST_EFLAGS("0","4","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : _wy ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ + break; \ + case 4: \ + asm volatile ( \ + _PRE_EFLAGS("0","4","2") \ + _op"l %"_lx"3,%1; " \ + _POST_EFLAGS("0","4","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : _ly ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ + break; \ + case 8: \ + __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy); \ + break; \ + } \ +} while (0) +#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\ +do{ unsigned long _tmp; \ + switch ( (_dst).bytes ) \ + { \ + case 1: \ + asm volatile ( \ + _PRE_EFLAGS("0","4","2") \ + _op"b %"_bx"3,%1; " \ + _POST_EFLAGS("0","4","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : _by ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ + break; \ + default: \ + __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy);\ + break; \ + } \ +} while (0) +/* Source operand is byte-sized and may be restricted to just %cl. */ +#define emulate_2op_SrcB(_op, _src, _dst, _eflags) \ + __emulate_2op(_op, _src, _dst, _eflags, \ + "b", "c", "b", "c", "b", "c", "b", "c") +/* Source operand is byte, word, long or quad sized. */ +#define emulate_2op_SrcV(_op, _src, _dst, _eflags) \ + __emulate_2op(_op, _src, _dst, _eflags, \ + "b", "q", "w", "r", _LO32, "r", "", "r") +/* Source operand is word, long or quad sized. */ +#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \ + __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ + "w", "r", _LO32, "r", "", "r") + +/* Instruction has only one explicit operand (no source operand). */ +#define emulate_1op(_op,_dst,_eflags) \ +do{ unsigned long _tmp; \ + switch ( (_dst).bytes ) \ + { \ + case 1: \ + asm volatile ( \ + _PRE_EFLAGS("0","3","2") \ + _op"b %1; " \ + _POST_EFLAGS("0","3","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ + break; \ + case 2: \ + asm volatile ( \ + _PRE_EFLAGS("0","3","2") \ + _op"w %1; " \ + _POST_EFLAGS("0","3","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ + break; \ + case 4: \ + asm volatile ( \ + _PRE_EFLAGS("0","3","2") \ + _op"l %1; " \ + _POST_EFLAGS("0","3","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ + break; \ + case 8: \ + __emulate_1op_8byte(_op, _dst, _eflags); \ + break; \ + } \ +} while (0) + +/* Emulate an instruction with quadword operands (x86/64 only). */ +#if defined(__x86_64__) +#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ +do{ asm volatile ( \ + _PRE_EFLAGS("0","4","2") \ + _op"q %"_qx"3,%1; " \ + _POST_EFLAGS("0","4","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : _qy ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ +} while (0) +#define __emulate_1op_8byte(_op, _dst, _eflags) \ +do{ asm volatile ( \ + _PRE_EFLAGS("0","3","2") \ + _op"q %1; " \ + _POST_EFLAGS("0","3","2") \ + : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ +} while (0) +#elif defined(__i386__) +#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) +#define __emulate_1op_8byte(_op, _dst, _eflags) +#endif /* __i386__ */ + +/* Fetch next part of the instruction being emulated. */ +#define insn_fetch_bytes(_size) \ +({ unsigned long _x, _eip = _regs.eip; \ + if ( !mode_64bit() ) _eip = (uint32_t)_eip; /* ignore upper dword */ \ + _regs.eip += (_size); /* real hardware doesn't truncate */ \ + generate_exception_if((uint8_t)(_regs.eip - ctxt->regs->eip) > 15, \ + EXC_GP, 0); \ + rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt); \ + if ( rc ) goto done; \ + _x; \ +}) +#define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type))) + +#define truncate_word(ea, byte_width) \ +({ unsigned long __ea = (ea); \ + unsigned int _width = (byte_width); \ + ((_width == sizeof(unsigned long)) ? __ea : \ + (__ea & ((1UL << (_width << 3)) - 1))); \ +}) +#define truncate_ea(ea) truncate_word((ea), ad_bytes) + +#define mode_64bit() (def_ad_bytes == 8) + +#define fail_if(p) \ +do { \ + rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY; \ + if ( rc ) goto done; \ +} while (0) + +#define generate_exception_if(p, e, ec) \ +({ if ( (p) ) { \ + fail_if(ops->inject_hw_exception == NULL); \ + rc = ops->inject_hw_exception(e, ec, ctxt) ? : X86EMUL_EXCEPTION; \ + goto done; \ + } \ +}) + +/* + * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1, + * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only. + */ +static int even_parity(uint8_t v) +{ + asm ( "test %b0,%b0; setp %b0" : "=a" (v) : "0" (v) ); + return v; +} + +/* Update address held in a register, based on addressing mode. */ +#define _register_address_increment(reg, inc, byte_width) \ +do { \ + int _inc = (inc); /* signed type ensures sign extension to long */ \ + unsigned int _width = (byte_width); \ + if ( _width == sizeof(unsigned long) ) \ + (reg) += _inc; \ + else if ( mode_64bit() ) \ + (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1); \ + else \ + (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) | \ + (((reg) + _inc) & ((1UL << (_width << 3)) - 1)); \ +} while (0) +#define register_address_increment(reg, inc) \ + _register_address_increment((reg), (inc), ad_bytes) + +#define sp_pre_dec(dec) ({ \ + _register_address_increment(_regs.esp, -(dec), ctxt->sp_size/8); \ + truncate_word(_regs.esp, ctxt->sp_size/8); \ +}) +#define sp_post_inc(inc) ({ \ + unsigned long __esp = truncate_word(_regs.esp, ctxt->sp_size/8); \ + _register_address_increment(_regs.esp, (inc), ctxt->sp_size/8); \ + __esp; \ +}) + +#define jmp_rel(rel) \ +do { \ + int _rel = (int)(rel); \ + _regs.eip += _rel; \ + if ( !mode_64bit() ) \ + _regs.eip = ((op_bytes == 2) \ + ? (uint16_t)_regs.eip : (uint32_t)_regs.eip); \ +} while (0) + +static unsigned long __get_rep_prefix( + struct cpu_user_regs *int_regs, + struct cpu_user_regs *ext_regs, + int ad_bytes) +{ + unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx : + (ad_bytes == 4) ? (uint32_t)int_regs->ecx : + int_regs->ecx); + + /* Skip the instruction if no repetitions are required. */ + if ( ecx == 0 ) + ext_regs->eip = int_regs->eip; + + return ecx; +} + +#define get_rep_prefix() ({ \ + unsigned long max_reps = 1; \ + if ( rep_prefix ) \ + max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes); \ + if ( max_reps == 0 ) \ + goto done; \ + max_reps; \ +}) + +static void __put_rep_prefix( + struct cpu_user_regs *int_regs, + struct cpu_user_regs *ext_regs, + int ad_bytes, + unsigned long reps_completed) +{ + unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx : + (ad_bytes == 4) ? (uint32_t)int_regs->ecx : + int_regs->ecx); + + /* Reduce counter appropriately, and repeat instruction if non-zero. */ + ecx -= reps_completed; + if ( ecx != 0 ) + int_regs->eip = ext_regs->eip; + + if ( ad_bytes == 2 ) + *(uint16_t *)&int_regs->ecx = ecx; + else if ( ad_bytes == 4 ) + int_regs->ecx = (uint32_t)ecx; + else + int_regs->ecx = ecx; +} + +#define put_rep_prefix(reps_completed) ({ \ + if ( rep_prefix ) \ + __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \ +}) + +/* + * Unsigned multiplication with double-word result. + * IN: Multiplicand=m[0], Multiplier=m[1] + * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] + */ +static int mul_dbl(unsigned long m[2]) +{ + int rc; + asm ( "mul %4; seto %b2" + : "=a" (m[0]), "=d" (m[1]), "=q" (rc) + : "0" (m[0]), "1" (m[1]), "2" (0) ); + return rc; +} + +/* + * Signed multiplication with double-word result. + * IN: Multiplicand=m[0], Multiplier=m[1] + * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] + */ +static int imul_dbl(unsigned long m[2]) +{ + int rc; + asm ( "imul %4; seto %b2" + : "=a" (m[0]), "=d" (m[1]), "=q" (rc) + : "0" (m[0]), "1" (m[1]), "2" (0) ); + return rc; +} + +/* + * Unsigned division of double-word dividend. + * IN: Dividend=u[1]:u[0], Divisor=v + * OUT: Return 1: #DE + * Return 0: Quotient=u[0], Remainder=u[1] + */ +static int div_dbl(unsigned long u[2], unsigned long v) +{ + if ( (v == 0) || (u[1] >= v) ) + return 1; + asm ( "div %4" + : "=a" (u[0]), "=d" (u[1]) + : "0" (u[0]), "1" (u[1]), "r" (v) ); + return 0; +} + +/* + * Signed division of double-word dividend. + * IN: Dividend=u[1]:u[0], Divisor=v + * OUT: Return 1: #DE + * Return 0: Quotient=u[0], Remainder=u[1] + * NB. We don't use idiv directly as it's moderately hard to work out + * ahead of time whether it will #DE, which we cannot allow to happen. + */ +static int idiv_dbl(unsigned long u[2], unsigned long v) +{ + int negu = (long)u[1] < 0, negv = (long)v < 0; + + /* u = abs(u) */ + if ( negu ) + { + u[1] = ~u[1]; + if ( (u[0] = -u[0]) == 0 ) + u[1]++; + } + + /* abs(u) / abs(v) */ + if ( div_dbl(u, negv ? -v : v) ) + return 1; + + /* Remainder has same sign as dividend. It cannot overflow. */ + if ( negu ) + u[1] = -u[1]; + + /* Quotient is overflowed if sign bit is set. */ + if ( negu ^ negv ) + { + if ( (long)u[0] >= 0 ) + u[0] = -u[0]; + else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */ + return 1; + } + else if ( (long)u[0] < 0 ) + return 1; + + return 0; +} + +static int +test_cc( + unsigned int condition, unsigned int flags) +{ + int rc = 0; + + switch ( (condition & 15) >> 1 ) + { + case 0: /* o */ + rc |= (flags & EFLG_OF); + break; + case 1: /* b/c/nae */ + rc |= (flags & EFLG_CF); + break; + case 2: /* z/e */ + rc |= (flags & EFLG_ZF); + break; + case 3: /* be/na */ + rc |= (flags & (EFLG_CF|EFLG_ZF)); + break; + case 4: /* s */ + rc |= (flags & EFLG_SF); + break; + case 5: /* p/pe */ + rc |= (flags & EFLG_PF); + break; + case 7: /* le/ng */ + rc |= (flags & EFLG_ZF); + /* fall through */ + case 6: /* l/nge */ + rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); + break; + } + + /* Odd condition identifiers (lsb == 1) have inverted sense. */ + return (!!rc ^ (condition & 1)); +} + +static int +get_cpl( + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct segment_register reg; + + if ( ctxt->regs->eflags & EFLG_VM ) + return 3; + + if ( (ops->read_segment == NULL) || + ops->read_segment(x86_seg_ss, ®, ctxt) ) + return -1; + + return reg.attr.fields.dpl; +} + +static int +_mode_iopl( + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + int cpl = get_cpl(ctxt, ops); + if ( cpl == -1 ) + return -1; + return (cpl <= ((ctxt->regs->eflags >> 12) & 3)); +} + +#define mode_ring0() ({ \ + int _cpl = get_cpl(ctxt, ops); \ + fail_if(_cpl < 0); \ + (_cpl == 0); \ +}) +#define mode_iopl() ({ \ + int _iopl = _mode_iopl(ctxt, ops); \ + fail_if(_iopl < 0); \ + _iopl; \ +}) + +static int ioport_access_check( + unsigned int first_port, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + unsigned long iobmp; + struct segment_register tr; + int rc = X86EMUL_OKAY; + + if ( !(ctxt->regs->eflags & EFLG_VM) && mode_iopl() ) + return X86EMUL_OKAY; + + fail_if(ops->read_segment == NULL); + if ( (rc = ops->read_segment(x86_seg_tr, &tr, ctxt)) != 0 ) + return rc; + + /* Ensure that the TSS is valid and has an io-bitmap-offset field. */ + if ( !tr.attr.fields.p || + ((tr.attr.fields.type & 0xd) != 0x9) || + (tr.limit < 0x67) ) + goto raise_exception; + + if ( (rc = ops->read(x86_seg_none, tr.base + 0x66, &iobmp, 2, ctxt)) ) + return rc; + + /* Ensure TSS includes two bytes including byte containing first port. */ + iobmp += first_port / 8; + if ( tr.limit <= iobmp ) + goto raise_exception; + + if ( (rc = ops->read(x86_seg_none, tr.base + iobmp, &iobmp, 2, ctxt)) ) + return rc; + if ( (iobmp & (((1<<bytes)-1) << (first_port&7))) != 0 ) + goto raise_exception; + + done: + return rc; + + raise_exception: + fail_if(ops->inject_hw_exception == NULL); + return ops->inject_hw_exception(EXC_GP, 0, ctxt) ? : X86EMUL_EXCEPTION; +} + +static int +in_realmode( + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + unsigned long cr0; + int rc; + + if ( ops->read_cr == NULL ) + return 0; + + rc = ops->read_cr(0, &cr0, ctxt); + return (!rc && !(cr0 & CR0_PE)); +} + +static int +realmode_load_seg( + enum x86_segment seg, + uint16_t sel, + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct segment_register reg; + int rc; + + if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 ) + return rc; + + reg.sel = sel; + reg.base = (uint32_t)sel << 4; + + return ops->write_segment(seg, ®, ctxt); +} + +static int +protmode_load_seg( + enum x86_segment seg, + uint16_t sel, + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct segment_register desctab, cs, segr; + struct { uint32_t a, b; } desc; + unsigned long val; + uint8_t dpl, rpl, cpl; + int rc, fault_type = EXC_TS; + + /* NULL selector? */ + if ( (sel & 0xfffc) == 0 ) + { + if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) ) + goto raise_exn; + memset(&segr, 0, sizeof(segr)); + return ops->write_segment(seg, &segr, ctxt); + } + + /* LDT descriptor must be in the GDT. */ + if ( (seg == x86_seg_ldtr) && (sel & 4) ) + goto raise_exn; + + if ( (rc = ops->read_segment(x86_seg_cs, &cs, ctxt)) || + (rc = ops->read_segment((sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, + &desctab, ctxt)) ) + return rc; + + /* Check against descriptor table limit. */ + if ( ((sel & 0xfff8) + 7) > desctab.limit ) + goto raise_exn; + + do { + if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8), + &val, 4, ctxt)) ) + return rc; + desc.a = val; + if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8) + 4, + &val, 4, ctxt)) ) + return rc; + desc.b = val; + + /* Segment present in memory? */ + if ( !(desc.b & (1u<<15)) ) + { + fault_type = EXC_NP; + goto raise_exn; + } + + /* LDT descriptor is a system segment. All others are code/data. */ + if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) ) + goto raise_exn; + + dpl = (desc.b >> 13) & 3; + rpl = sel & 3; + cpl = cs.sel & 3; + + switch ( seg ) + { + case x86_seg_cs: + /* Code segment? */ + if ( !(desc.b & (1u<<11)) ) + goto raise_exn; + /* Non-conforming segment: check DPL against RPL. */ + if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) ) + goto raise_exn; + break; + case x86_seg_ss: + /* Writable data segment? */ + if ( (desc.b & (5u<<9)) != (1u<<9) ) + goto raise_exn; + if ( (dpl != cpl) || (dpl != rpl) ) + goto raise_exn; + break; + case x86_seg_ldtr: + /* LDT system segment? */ + if ( (desc.b & (15u<<8)) != (2u<<8) ) + goto raise_exn; + goto skip_accessed_flag; + default: + /* Readable code or data segment? */ + if ( (desc.b & (5u<<9)) == (4u<<9) ) + goto raise_exn; + /* Non-conforming segment: check DPL against RPL and CPL. */ + if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) ) + goto raise_exn; + break; + } + + /* Ensure Accessed flag is set. */ + rc = ((desc.b & 0x100) ? X86EMUL_OKAY : + ops->cmpxchg( + x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b, + desc.b | 0x100, 4, ctxt)); + } while ( rc == X86EMUL_CMPXCHG_FAILED ); + + if ( rc ) + return rc; + + /* Force the Accessed flag in our local copy. */ + desc.b |= 0x100; + + skip_accessed_flag: + segr.base = (((desc.b << 0) & 0xff000000u) | + ((desc.b << 16) & 0x00ff0000u) | + ((desc.a >> 16) & 0x0000ffffu)); + segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) | + ((desc.b >> 12) & 0x0f00u)); + segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu); + if ( segr.attr.fields.g ) + segr.limit = (segr.limit << 12) | 0xfffu; + segr.sel = sel; + return ops->write_segment(seg, &segr, ctxt); + + raise_exn: + if ( ops->inject_hw_exception == NULL ) + return X86EMUL_UNHANDLEABLE; + if ( (rc = ops->inject_hw_exception(fault_type, sel & 0xfffc, ctxt)) ) + return rc; + return X86EMUL_EXCEPTION; +} + +static int +load_seg( + enum x86_segment seg, + uint16_t sel, + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + if ( (ops->read_segment == NULL) || + (ops->write_segment == NULL) ) + return X86EMUL_UNHANDLEABLE; + + if ( in_realmode(ctxt, ops) ) + return realmode_load_seg(seg, sel, ctxt, ops); + + return protmode_load_seg(seg, sel, ctxt, ops); +} + +void * +decode_register( + uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs) +{ + void *p; + + switch ( modrm_reg ) + { + case 0: p = ®s->eax; break; + case 1: p = ®s->ecx; break; + case 2: p = ®s->edx; break; + case 3: p = ®s->ebx; break; + case 4: p = (highbyte_regs ? + ((unsigned char *)®s->eax + 1) : + (unsigned char *)®s->esp); break; + case 5: p = (highbyte_regs ? + ((unsigned char *)®s->ecx + 1) : + (unsigned char *)®s->ebp); break; + case 6: p = (highbyte_regs ? + ((unsigned char *)®s->edx + 1) : + (unsigned char *)®s->esi); break; + case 7: p = (highbyte_regs ? + ((unsigned char *)®s->ebx + 1) : + (unsigned char *)®s->edi); break; +#if defined(__x86_64__) + case 8: p = ®s->r8; break; + case 9: p = ®s->r9; break; + case 10: p = ®s->r10; break; + case 11: p = ®s->r11; break; + case 12: p = ®s->r12; break; + case 13: p = ®s->r13; break; + case 14: p = ®s->r14; break; + case 15: p = ®s->r15; break; +#endif + default: p = NULL; break; + } + + return p; +} + +#define decode_segment_failed x86_seg_tr +enum x86_segment +decode_segment( + uint8_t modrm_reg) +{ + switch ( modrm_reg ) + { + case 0: return x86_seg_es; + case 1: return x86_seg_cs; + case 2: return x86_seg_ss; + case 3: return x86_seg_ds; + case 4: return x86_seg_fs; + case 5: return x86_seg_gs; + default: break; + } + return decode_segment_failed; +} + +int +x86_emulate( + struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + /* Shadow copy of register state. Committed on successful emulation. */ + struct cpu_user_regs _regs = *ctxt->regs; + + uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0; + uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; + unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 + unsigned int lock_prefix = 0, rep_prefix = 0; + int override_seg = -1, rc = X86EMUL_OKAY; + struct operand src, dst; + + /* Data operand effective address (usually computed from ModRM). */ + struct operand ea; + + /* Default is a memory operand relative to segment DS. */ + ea.type = OP_MEM; + ea.mem.seg = x86_seg_ds; + ea.mem.off = 0; + + ctxt->retire.byte = 0; + + op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8; + if ( op_bytes == 8 ) + { + op_bytes = def_op_bytes = 4; +#ifndef __x86_64__ + return X86EMUL_UNHANDLEABLE; +#endif + } + + /* Prefix bytes. */ + for ( ; ; ) + { + switch ( b = insn_fetch_type(uint8_t) ) + { + case 0x66: /* operand-size override */ + op_bytes = def_op_bytes ^ 6; + break; + case 0x67: /* address-size override */ + ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6); + break; + case 0x2e: /* CS override */ + override_seg = x86_seg_cs; + break; + case 0x3e: /* DS override */ + override_seg = x86_seg_ds; + break; + case 0x26: /* ES override */ + override_seg = x86_seg_es; + break; + case 0x64: /* FS override */ + override_seg = x86_seg_fs; + break; + case 0x65: /* GS override */ + override_seg = x86_seg_gs; + break; + case 0x36: /* SS override */ + override_seg = x86_seg_ss; + break; + case 0xf0: /* LOCK */ + lock_prefix = 1; + break; + case 0xf2: /* REPNE/REPNZ */ + rep_prefix = REPNE_PREFIX; + break; + case 0xf3: /* REP/REPE/REPZ */ + rep_prefix = REPE_PREFIX; + break; + case 0x40 ... 0x4f: /* REX */ + if ( !mode_64bit() ) + goto done_prefixes; + rex_prefix = b; + continue; + default: + goto done_prefixes; + } + + /* Any legacy prefix after a REX prefix nullifies its effect. */ + rex_prefix = 0; + } + done_prefixes: + + if ( rex_prefix & 8 ) /* REX.W */ + op_bytes = 8; + + /* Opcode byte(s). */ + d = opcode_table[b]; + if ( d == 0 ) + { + /* Two-byte opcode? */ + if ( b == 0x0f ) + { + twobyte = 1; + b = insn_fetch_type(uint8_t); + d = twobyte_table[b]; + } + + /* Unrecognised? */ + if ( d == 0 ) + goto cannot_emulate; + } + + /* Lock prefix is allowed only on RMW instructions. */ + generate_exception_if((d & Mov) && lock_prefix, EXC_GP, 0); + + /* ModRM and SIB bytes. */ + if ( d & ModRM ) + { + modrm = insn_fetch_type(uint8_t); + modrm_mod = (modrm & 0xc0) >> 6; + modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3); + modrm_rm = modrm & 0x07; + + if ( modrm_mod == 3 ) + { + modrm_rm |= (rex_prefix & 1) << 3; + ea.type = OP_REG; + ea.reg = decode_register( + modrm_rm, &_regs, (d & ByteOp) && (rex_prefix == 0)); + } + else if ( ad_bytes == 2 ) + { + /* 16-bit ModR/M decode. */ + switch ( modrm_rm ) + { + case 0: + ea.mem.off = _regs.ebx + _regs.esi; + break; + case 1: + ea.mem.off = _regs.ebx + _regs.edi; + break; + case 2: + ea.mem.seg = x86_seg_ss; + ea.mem.off = _regs.ebp + _regs.esi; + break; + case 3: + ea.mem.seg = x86_seg_ss; + ea.mem.off = _regs.ebp + _regs.edi; + break; + case 4: + ea.mem.off = _regs.esi; + break; + case 5: + ea.mem.off = _regs.edi; + break; + case 6: + if ( modrm_mod == 0 ) + break; + ea.mem.seg = x86_seg_ss; + ea.mem.off = _regs.ebp; + break; + case 7: + ea.mem.off = _regs.ebx; + break; + } + switch ( modrm_mod ) + { + case 0: + if ( modrm_rm == 6 ) + ea.mem.off = insn_fetch_type(int16_t); + break; + case 1: + ea.mem.off += insn_fetch_type(int8_t); + break; + case 2: + ea.mem.off += insn_fetch_type(int16_t); + break; + } + ea.mem.off = truncate_ea(ea.mem.off); + } + else + { + /* 32/64-bit ModR/M decode. */ + if ( modrm_rm == 4 ) + { + sib = insn_fetch_type(uint8_t); + sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8); + sib_base = (sib & 7) | ((rex_prefix << 3) & 8); + if ( sib_index != 4 ) + ea.mem.off = *(long*)decode_register(sib_index, &_regs, 0); + ea.mem.off <<= (sib >> 6) & 3; + if ( (modrm_mod == 0) && ((sib_base & 7) == 5) ) + ea.mem.off += insn_fetch_type(int32_t); + else if ( sib_base == 4 ) + { + ea.mem.seg = x86_seg_ss; + ea.mem.off += _regs.esp; + if ( !twobyte && (b == 0x8f) ) + /* POP <rm> computes its EA post increment. */ + ea.mem.off += ((mode_64bit() && (op_bytes == 4)) + ? 8 : op_bytes); + } + else if ( sib_base == 5 ) + { + ea.mem.seg = x86_seg_ss; + ea.mem.off += _regs.ebp; + } + else + ea.mem.off += *(long*)decode_register(sib_base, &_regs, 0); + } + else + { + modrm_rm |= (rex_prefix & 1) << 3; + ea.mem.off = *(long *)decode_register(modrm_rm, &_regs, 0); + if ( (modrm_rm == 5) && (modrm_mod != 0) ) + ea.mem.seg = x86_seg_ss; + } + switch ( modrm_mod ) + { + case 0: + if ( (modrm_rm & 7) != 5 ) + break; + ea.mem.off = insn_fetch_type(int32_t); + if ( !mode_64bit() ) + break; + /* Relative to RIP of next instruction. Argh! */ + ea.mem.off += _regs.eip; + if ( (d & SrcMask) == SrcImm ) + ea.mem.off += (d & ByteOp) ? 1 : + ((op_bytes == 8) ? 4 : op_bytes); + else if ( (d & SrcMask) == SrcImmByte ) + ea.mem.off += 1; + else if ( !twobyte && ((b & 0xfe) == 0xf6) && + ((modrm_reg & 7) <= 1) ) + /* Special case in Grp3: test has immediate operand. */ + ea.mem.off += (d & ByteOp) ? 1 + : ((op_bytes == 8) ? 4 : op_bytes); + else if ( twobyte && ((b & 0xf7) == 0xa4) ) + /* SHLD/SHRD with immediate byte third operand. */ + ea.mem.off++; + break; + case 1: + ea.mem.off += insn_fetch_type(int8_t); + break; + case 2: + ea.mem.off += insn_fetch_type(int32_t); + break; + } + ea.mem.off = truncate_ea(ea.mem.off); + } + } + + if ( override_seg != -1 ) + ea.mem.seg = override_seg; + + /* Special instructions do their own operand decoding. */ + if ( (d & DstMask) == ImplicitOps ) + goto special_insn; + + /* Decode and fetch the source operand: register, memory or immediate. */ + switch ( d & SrcMask ) + { + case SrcNone: + break; + case SrcReg: + src.type = OP_REG; + if ( d & ByteOp ) + { + src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0)); + src.val = *(uint8_t *)src.reg; + src.bytes = 1; + } + else + { + src.reg = decode_register(modrm_reg, &_regs, 0); + switch ( (src.bytes = op_bytes) ) + { + case 2: src.val = *(uint16_t *)src.reg; break; + case 4: src.val = *(uint32_t *)src.reg; break; + case 8: src.val = *(uint64_t *)src.reg; break; + } + } + break; + case SrcMem16: + ea.bytes = 2; + goto srcmem_common; + case SrcMem: + ea.bytes = (d & ByteOp) ? 1 : op_bytes; + srcmem_common: + src = ea; + if ( src.type == OP_REG ) + { + switch ( src.bytes ) + { + case 1: src.val = *(uint8_t *)src.reg; break; + case 2: src.val = *(uint16_t *)src.reg; break; + case 4: src.val = *(uint32_t *)src.reg; break; + case 8: src.val = *(uint64_t *)src.reg; break; + } + } + else if ( (rc = ops->read(src.mem.seg, src.mem.off, + &src.val, src.bytes, ctxt)) ) + goto done; + break; + case SrcImm: + src.type = OP_IMM; + src.bytes = (d & ByteOp) ? 1 : op_bytes; + if ( src.bytes == 8 ) src.bytes = 4; + /* NB. Immediates are sign-extended as necessary. */ + switch ( src.bytes ) + { + case 1: src.val = insn_fetch_type(int8_t); break; + case 2: src.val = insn_fetch_type(int16_t); break; + case 4: src.val = insn_fetch_type(int32_t); break; + } + break; + case SrcImmByte: + src.type = OP_IMM; + src.bytes = 1; + src.val = insn_fetch_type(int8_t); + break; + } + + /* Decode and fetch the destination operand: register or memory. */ + switch ( d & DstMask ) + { + case DstReg: + dst.type = OP_REG; + if ( d & ByteOp ) + { + dst.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0)); + dst.val = *(uint8_t *)dst.reg; + dst.bytes = 1; + } + else + { + dst.reg = decode_register(modrm_reg, &_regs, 0); + switch ( (dst.bytes = op_bytes) ) + { + case 2: dst.val = *(uint16_t *)dst.reg; break; + case 4: dst.val = *(uint32_t *)dst.reg; break; + case 8: dst.val = *(uint64_t *)dst.reg; break; + } + } + break; + case DstBitBase: + if ( ((d & SrcMask) == SrcImmByte) || (ea.type == OP_REG) ) + { + src.val &= (op_bytes << 3) - 1; + } + else + { + /* + * EA += BitOffset DIV op_bytes*8 + * BitOffset = BitOffset MOD op_bytes*8 + * DIV truncates towards negative infinity. + * MOD always produces a positive result. + */ + if ( op_bytes == 2 ) + src.val = (int16_t)src.val; + else if ( op_bytes == 4 ) + src.val = (int32_t)src.val; + if ( (long)src.val < 0 ) + { + unsigned long byte_offset; + byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1)); + ea.mem.off -= byte_offset; + src.val = (byte_offset << 3) + src.val; + } + else + { + ea.mem.off += (src.val >> 3) & ~(op_bytes - 1); + src.val &= (op_bytes << 3) - 1; + } + } + /* Becomes a normal DstMem operation from here on. */ + d = (d & ~DstMask) | DstMem; + case DstMem: + ea.bytes = (d & ByteOp) ? 1 : op_bytes; + dst = ea; + if ( dst.type == OP_REG ) + { + switch ( dst.bytes ) + { + case 1: dst.val = *(uint8_t *)dst.reg; break; + case 2: dst.val = *(uint16_t *)dst.reg; break; + case 4: dst.val = *(uint32_t *)dst.reg; break; + case 8: dst.val = *(uint64_t *)dst.reg; break; + } + } + else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */ + { + if ( (rc = ops->read(dst.mem.seg, dst.mem.off, + &dst.val, dst.bytes, ctxt)) ) + goto done; + dst.orig_val = dst.val; + } + break; + } + + /* LOCK prefix allowed only on instructions with memory destination. */ + generate_exception_if(lock_prefix && (dst.type != OP_MEM), EXC_GP, 0); + + if ( twobyte ) + goto twobyte_insn; + + switch ( b ) + { + case 0x04 ... 0x05: /* add imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x00 ... 0x03: add: /* add */ + emulate_2op_SrcV("add", src, dst, _regs.eflags); + break; + + case 0x0c ... 0x0d: /* or imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x08 ... 0x0b: or: /* or */ + emulate_2op_SrcV("or", src, dst, _regs.eflags); + break; + + case 0x14 ... 0x15: /* adc imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x10 ... 0x13: adc: /* adc */ + emulate_2op_SrcV("adc", src, dst, _regs.eflags); + break; + + case 0x1c ... 0x1d: /* sbb imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x18 ... 0x1b: sbb: /* sbb */ + emulate_2op_SrcV("sbb", src, dst, _regs.eflags); + break; + + case 0x24 ... 0x25: /* and imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x20 ... 0x23: and: /* and */ + emulate_2op_SrcV("and", src, dst, _regs.eflags); + break; + + case 0x2c ... 0x2d: /* sub imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x28 ... 0x2b: sub: /* sub */ + emulate_2op_SrcV("sub", src, dst, _regs.eflags); + break; + + case 0x34 ... 0x35: /* xor imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x30 ... 0x33: xor: /* xor */ + emulate_2op_SrcV("xor", src, dst, _regs.eflags); + break; + + case 0x3c ... 0x3d: /* cmp imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x38 ... 0x3b: cmp: /* cmp */ + emulate_2op_SrcV("cmp", src, dst, _regs.eflags); + break; + + case 0x62: /* bound */ { + unsigned long src_val2; + int lb, ub, idx; + generate_exception_if(mode_64bit() || (src.type != OP_MEM), + EXC_UD, -1); + if ( (rc = ops->read(src.mem.seg, src.mem.off + op_bytes, + &src_val2, op_bytes, ctxt)) ) + goto done; + ub = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2; + lb = (op_bytes == 2) ? (int16_t)src.val : (int32_t)src.val; + idx = (op_bytes == 2) ? (int16_t)dst.val : (int32_t)dst.val; + generate_exception_if((idx < lb) || (idx > ub), EXC_BR, -1); + dst.type = OP_NONE; + break; + } + + case 0x63: /* movsxd (x86/64) / arpl (x86/32) */ + if ( mode_64bit() ) + { + /* movsxd */ + if ( src.type == OP_REG ) + src.val = *(int32_t *)src.reg; + else if ( (rc = ops->read(src.mem.seg, src.mem.off, + &src.val, 4, ctxt)) ) + goto done; + dst.val = (int32_t)src.val; + } + else + { + /* arpl */ + uint16_t src_val = dst.val; + dst = src; + _regs.eflags &= ~EFLG_ZF; + _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0; + if ( _regs.eflags & EFLG_ZF ) + dst.val = (dst.val & ~3) | (src_val & 3); + else + dst.type = OP_NONE; + generate_exception_if(in_realmode(ctxt, ops), EXC_UD, -1); + } + break; + + case 0x69: /* imul imm16/32 */ + case 0x6b: /* imul imm8 */ { + unsigned long src1; /* ModR/M source operand */ + if ( ea.type == OP_REG ) + src1 = *ea.reg; + else if ( (rc = ops->read(ea.mem.seg, ea.mem.off, + &src1, op_bytes, ctxt)) ) + goto done; + _regs.eflags &= ~(EFLG_OF|EFLG_CF); + switch ( dst.bytes ) + { + case 2: + dst.val = ((uint32_t)(int16_t)src.val * + (uint32_t)(int16_t)src1); + if ( (int16_t)dst.val != (uint32_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; +#ifdef __x86_64__ + case 4: + dst.val = ((uint64_t)(int32_t)src.val * + (uint64_t)(int32_t)src1); + if ( (int32_t)dst.val != dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; +#endif + default: { + unsigned long m[2] = { src.val, src1 }; + if ( imul_dbl(m) ) + _regs.eflags |= EFLG_OF|EFLG_CF; + dst.val = m[0]; + break; + } + } + break; + } + + case 0x82: /* Grp1 (x86/32 only) */ + generate_exception_if(mode_64bit(), EXC_UD, -1); + case 0x80: case 0x81: case 0x83: /* Grp1 */ + switch ( modrm_reg & 7 ) + { + case 0: goto add; + case 1: goto or; + case 2: goto adc; + case 3: goto sbb; + case 4: goto and; + case 5: goto sub; + case 6: goto xor; + case 7: goto cmp; + } + break; + + case 0xa8 ... 0xa9: /* test imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; + case 0x84 ... 0x85: test: /* test */ + emulate_2op_SrcV("test", src, dst, _regs.eflags); + break; + + case 0x86 ... 0x87: xchg: /* xchg */ + /* Write back the register source. */ + switch ( dst.bytes ) + { + case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break; + case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break; + case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */ + case 8: *src.reg = dst.val; break; + } + /* Write back the memory destination with implicit LOCK prefix. */ + dst.val = src.val; + lock_prefix = 1; + break; + + case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ + generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1); + case 0x88 ... 0x8b: /* mov */ + dst.val = src.val; + break; + + case 0x8c: /* mov Sreg,r/m */ { + struct segment_register reg; + enum x86_segment seg = decode_segment(modrm_reg); + generate_exception_if(seg == decode_segment_failed, EXC_UD, -1); + fail_if(ops->read_segment == NULL); + if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 ) + goto done; + dst.val = reg.sel; + if ( dst.type == OP_MEM ) + dst.bytes = 2; + break; + } + + case 0x8e: /* mov r/m,Sreg */ { + enum x86_segment seg = decode_segment(modrm_reg); + generate_exception_if(seg == decode_segment_failed, EXC_UD, -1); + if ( (rc = load_seg(seg, (uint16_t)src.val, ctxt, ops)) != 0 ) + goto done; + if ( seg == x86_seg_ss ) + ctxt->retire.flags.mov_ss = 1; + dst.type = OP_NONE; + break; + } + + case 0x8d: /* lea */ + dst.val = ea.mem.off; + break; + + case 0x8f: /* pop (sole member of Grp1a) */ + generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1); + /* 64-bit mode: POP defaults to a 64-bit operand. */ + if ( mode_64bit() && (dst.bytes == 4) ) + dst.bytes = 8; + if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes), + &dst.val, dst.bytes, ctxt)) != 0 ) + goto done; + break; + + case 0xb0 ... 0xb7: /* mov imm8,r8 */ + dst.reg = decode_register( + (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0)); + dst.val = src.val; + break; + + case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */ + if ( dst.bytes == 8 ) /* Fetch more bytes to obtain imm64 */ + src.val = ((uint32_t)src.val | + ((uint64_t)insn_fetch_type(uint32_t) << 32)); + dst.reg = decode_register( + (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); + dst.val = src.val; + break; + + case 0xc0 ... 0xc1: grp2: /* Grp2 */ + switch ( modrm_reg & 7 ) + { + case 0: /* rol */ + emulate_2op_SrcB("rol", src, dst, _regs.eflags); + break; + case 1: /* ror */ + emulate_2op_SrcB("ror", src, dst, _regs.eflags); + break; + case 2: /* rcl */ + emulate_2op_SrcB("rcl", src, dst, _regs.eflags); + break; + case 3: /* rcr */ + emulate_2op_SrcB("rcr", src, dst, _regs.eflags); + break; + case 4: /* sal/shl */ + case 6: /* sal/shl */ + emulate_2op_SrcB("sal", src, dst, _regs.eflags); + break; + case 5: /* shr */ + emulate_2op_SrcB("shr", src, dst, _regs.eflags); + break; + case 7: /* sar */ + emulate_2op_SrcB("sar", src, dst, _regs.eflags); + break; + } + break; + + case 0xc4: /* les */ { + unsigned long sel; + dst.val = x86_seg_es; + les: /* dst.val identifies the segment */ + generate_exception_if(src.type != OP_MEM, EXC_UD, -1); + if ( (rc = ops->read(src.mem.seg, src.mem.off + src.bytes, + &sel, 2, ctxt)) != 0 ) + goto done; + if ( (rc = load_seg(dst.val, (uint16_t)sel, ctxt, ops)) != 0 ) + goto done; + dst.val = src.val; + break; + } + + case 0xc5: /* lds */ + dst.val = x86_seg_ds; + goto les; + + case 0xd0 ... 0xd1: /* Grp2 */ + src.val = 1; + goto grp2; + + case 0xd2 ... 0xd3: /* Grp2 */ + src.val = _regs.ecx; + goto grp2; + + case 0xf6 ... 0xf7: /* Grp3 */ + switch ( modrm_reg & 7 ) + { + case 0 ... 1: /* test */ + /* Special case in Grp3: test has an immediate source operand. */ + src.type = OP_IMM; + src.bytes = (d & ByteOp) ? 1 : op_bytes; + if ( src.bytes == 8 ) src.bytes = 4; + switch ( src.bytes ) + { + case 1: src.val = insn_fetch_type(int8_t); break; + case 2: src.val = insn_fetch_type(int16_t); break; + case 4: src.val = insn_fetch_type(int32_t); break; + } + goto test; + case 2: /* not */ + dst.val = ~dst.val; + break; + case 3: /* neg */ + emulate_1op("neg", dst, _regs.eflags); + break; + case 4: /* mul */ + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + dst.val = *dst.reg; + _regs.eflags &= ~(EFLG_OF|EFLG_CF); + switch ( src.bytes ) + { + case 1: + dst.val = (uint8_t)dst.val; + dst.val *= src.val; + if ( (uint8_t)dst.val != (uint16_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + dst.bytes = 2; + break; + case 2: + dst.val = (uint16_t)dst.val; + dst.val *= src.val; + if ( (uint16_t)dst.val != (uint32_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + *(uint16_t *)&_regs.edx = dst.val >> 16; + break; +#ifdef __x86_64__ + case 4: + dst.val = (uint32_t)dst.val; + dst.val *= src.val; + if ( (uint32_t)dst.val != dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = (uint32_t)(dst.val >> 32); + break; +#endif + default: { + unsigned long m[2] = { src.val, dst.val }; + if ( mul_dbl(m) ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = m[1]; + dst.val = m[0]; + break; + } + } + break; + case 5: /* imul */ + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + dst.val = *dst.reg; + _regs.eflags &= ~(EFLG_OF|EFLG_CF); + switch ( src.bytes ) + { + case 1: + dst.val = ((uint16_t)(int8_t)src.val * + (uint16_t)(int8_t)dst.val); + if ( (int8_t)dst.val != (uint16_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + dst.bytes = 2; + break; + case 2: + dst.val = ((uint32_t)(int16_t)src.val * + (uint32_t)(int16_t)dst.val); + if ( (int16_t)dst.val != (uint32_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + *(uint16_t *)&_regs.edx = dst.val >> 16; + break; +#ifdef __x86_64__ + case 4: + dst.val = ((uint64_t)(int32_t)src.val * + (uint64_t)(int32_t)dst.val); + if ( (int32_t)dst.val != dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = (uint32_t)(dst.val >> 32); + break; +#endif + default: { + unsigned long m[2] = { src.val, dst.val }; + if ( imul_dbl(m) ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = m[1]; + dst.val = m[0]; + break; + } + } + break; + case 6: /* div */ { + unsigned long u[2], v; + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + switch ( src.bytes ) + { + case 1: + u[0] = (uint16_t)_regs.eax; + u[1] = 0; + v = (uint8_t)src.val; + generate_exception_if( + div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]), + EXC_DE, -1); + dst.val = (uint8_t)u[0]; + ((uint8_t *)&_regs.eax)[1] = u[1]; + break; + case 2: + u[0] = ((uint32_t)_regs.edx << 16) | (uint16_t)_regs.eax; + u[1] = 0; + v = (uint16_t)src.val; + generate_exception_if( + div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]), + EXC_DE, -1); + dst.val = (uint16_t)u[0]; + *(uint16_t *)&_regs.edx = u[1]; + break; +#ifdef __x86_64__ + case 4: + u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax; + u[1] = 0; + v = (uint32_t)src.val; + generate_exception_if( + div_dbl(u, v) || ((uint32_t)u[0] != u[0]), + EXC_DE, -1); + dst.val = (uint32_t)u[0]; + _regs.edx = (uint32_t)u[1]; + break; +#endif + default: + u[0] = _regs.eax; + u[1] = _regs.edx; + v = src.val; + generate_exception_if(div_dbl(u, v), EXC_DE, -1); + dst.val = u[0]; + _regs.edx = u[1]; + break; + } + break; + } + case 7: /* idiv */ { + unsigned long u[2], v; + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + switch ( src.bytes ) + { + case 1: + u[0] = (int16_t)_regs.eax; + u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; + v = (int8_t)src.val; + generate_exception_if( + idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]), + EXC_DE, -1); + dst.val = (int8_t)u[0]; + ((int8_t *)&_regs.eax)[1] = u[1]; + break; + case 2: + u[0] = (int32_t)((_regs.edx << 16) | (uint16_t)_regs.eax); + u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; + v = (int16_t)src.val; + generate_exception_if( + idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]), + EXC_DE, -1); + dst.val = (int16_t)u[0]; + *(int16_t *)&_regs.edx = u[1]; + break; +#ifdef __x86_64__ + case 4: + u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax; + u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; + v = (int32_t)src.val; + generate_exception_if( + idiv_dbl(u, v) || ((int32_t)u[0] != u[0]), + EXC_DE, -1); + dst.val = (int32_t)u[0]; + _regs.edx = (uint32_t)u[1]; + break; +#endif + default: + u[0] = _regs.eax; + u[1] = _regs.edx; + v = src.val; + generate_exception_if(idiv_dbl(u, v), EXC_DE, -1); + dst.val = u[0]; + _regs.edx = u[1]; + break; + } + break; + } + default: + goto cannot_emulate; + } + break; + + case 0xfe: /* Grp4 */ + generate_exception_if((modrm_reg & 7) >= 2, EXC_UD, -1); + case 0xff: /* Grp5 */ + switch ( modrm_reg & 7 ) + { + case 0: /* inc */ + emulate_1op("inc", dst, _regs.eflags); + break; + case 1: /* dec */ + emulate_1op("dec", dst, _regs.eflags); + break; + case 2: /* call (near) */ + case 4: /* jmp (near) */ + if ( (dst.bytes != 8) && mode_64bit() ) + { + dst.bytes = op_bytes = 8; + if ( dst.type == OP_REG ) + dst.val = *dst.reg; + else if ( (rc = ops->read(dst.mem.seg, dst.mem.off, + &dst.val, 8, ctxt)) != 0 ) + goto done; + } + src.val = _regs.eip; + _regs.eip = dst.val; + if ( (modrm_reg & 7) == 2 ) + goto push; /* call */ + dst.type = OP_NONE; + break; + case 3: /* call (far, absolute indirect) */ + case 5: /* jmp (far, absolute indirect) */ { + unsigned long sel; + + generate_exception_if(dst.type != OP_MEM, EXC_UD, -1); + + if ( (rc = ops->read(dst.mem.seg, dst.mem.off+dst.bytes, + &sel, 2, ctxt)) ) + goto done; + + if ( (modrm_reg & 7) == 3 ) /* call */ + { + struct segment_register reg; + fail_if(ops->read_segment == NULL); + if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) || + (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), + reg.sel, op_bytes, ctxt)) || + (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), + _regs.eip, op_bytes, ctxt)) ) + goto done; + } + + if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 ) + goto done; + _regs.eip = dst.val; + + dst.type = OP_NONE; + break; + } + case 6: /* push */ + /* 64-bit mode: PUSH defaults to a 64-bit operand. */ + if ( mode_64bit() && (dst.bytes == 4) ) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |