[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID 41823e46d6accdd6656b2c4bd73f1ad60af594f5 # Parent 4ed269e73e95cc7618950ab6cea58ca1e1c243db # Parent c4eead8a925bd047834bf784f02edae33426b913 merge with xen-unstable.hg diff -r 4ed269e73e95 -r 41823e46d6ac .hgignore --- a/.hgignore Mon Apr 17 08:47:36 2006 -0600 +++ b/.hgignore Tue Apr 18 09:35:40 2006 -0600 @@ -184,6 +184,7 @@ ^tools/xm-test/ramdisk/buildroot ^xen/BLOG$ ^xen/TAGS$ +^xen/cscope\.*$ ^xen/arch/x86/asm-offsets\.s$ ^xen/arch/x86/boot/mkelf32$ ^xen/arch/x86/xen\.lds$ diff -r 4ed269e73e95 -r 41823e46d6ac buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Mon Apr 17 08:47:36 2006 -0600 +++ b/buildconfigs/Rules.mk Tue Apr 18 09:35:40 2006 -0600 @@ -99,14 +99,14 @@ linux-2.6-xen.patch: ref-linux-$(LINUX_V linux-2.6-xen.patch: ref-linux-$(LINUX_VER)/.valid-ref rm -rf tmp-$@ cp -al $(<D) tmp-$@ - ( cd linux-2.6-xen-sparse && ./mkbuildtree ../tmp-$@ ) + ( cd linux-2.6-xen-sparse && bash ./mkbuildtree ../tmp-$@ ) diff -Nurp $(patsubst ref%,pristine%,$(<D)) tmp-$@ > $@ || true rm -rf tmp-$@ %-xen.patch: ref-%/.valid-ref rm -rf tmp-$@ cp -al $(<D) tmp-$@ - ( cd $*-xen-sparse && ./mkbuildtree ../tmp-$@ ) + ( cd $*-xen-sparse && bash ./mkbuildtree ../tmp-$@ ) diff -Nurp $(patsubst ref%,pristine%,$(<D)) tmp-$@ > $@ || true rm -rf tmp-$@ diff -r 4ed269e73e95 -r 41823e46d6ac buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Mon Apr 17 08:47:36 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Tue Apr 18 09:35:40 2006 -0600 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-xen0 -# Mon Feb 20 11:37:43 2006 +# Linux kernel version: 2.6.16-xen0 +# Thu Apr 13 14:58:29 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -99,6 +99,8 @@ CONFIG_X86_PC=y # CONFIG_MPSC is not set CONFIG_GENERIC_CPU=y CONFIG_X86_64_XEN=y +CONFIG_X86_NO_TSS=y +CONFIG_X86_NO_IDT=y CONFIG_X86_L1_CACHE_BYTES=128 CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_GOOD_APIC=y @@ -176,6 +178,19 @@ CONFIG_XEN_PCIDEV_FRONTEND=y CONFIG_XEN_PCIDEV_FRONTEND=y # CONFIG_XEN_PCIDEV_FE_DEBUG is not set # CONFIG_UNORDERED_IO is not set +# CONFIG_PCIEPORTBUS is not set +CONFIG_PCI_LEGACY_PROC=y +# CONFIG_PCI_DEBUG is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# +# CONFIG_HOTPLUG_PCI is not set # # Executable file formats / Emulations @@ -1001,11 +1016,7 @@ CONFIG_INFINIBAND_SRP=y CONFIG_INFINIBAND_SRP=y # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # CONFIG_EDAC is not set @@ -1239,7 +1250,7 @@ CONFIG_CRYPTO_CRC32C=m # Hardware crypto devices # CONFIG_XEN=y -CONFIG_NO_IDLE_HZ=y +CONFIG_XEN_INTERFACE_VERSION=0x00030101 # # XEN @@ -1266,6 +1277,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_HAVE_ARCH_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_NO_IDLE_HZ=y # # Library routines diff -r 4ed269e73e95 -r 41823e46d6ac buildconfigs/linux-defconfig_xenU_x86_64 --- a/buildconfigs/linux-defconfig_xenU_x86_64 Mon Apr 17 08:47:36 2006 -0600 +++ b/buildconfigs/linux-defconfig_xenU_x86_64 Tue Apr 18 09:35:40 2006 -0600 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-xen0 -# Thu Feb 16 22:56:02 2006 +# Linux kernel version: 2.6.16-xenU +# Thu Apr 13 14:59:16 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -103,6 +103,8 @@ CONFIG_MPSC=y CONFIG_MPSC=y # CONFIG_GENERIC_CPU is not set CONFIG_X86_64_XEN=y +CONFIG_X86_NO_TSS=y +CONFIG_X86_NO_IDT=y CONFIG_X86_L1_CACHE_BYTES=128 CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_GOOD_APIC=y @@ -145,6 +147,15 @@ CONFIG_GENERIC_PENDING_IRQ=y # # CONFIG_PCI is not set # CONFIG_UNORDERED_IO is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# # # Executable file formats / Emulations @@ -844,11 +855,7 @@ CONFIG_DUMMY_CONSOLE=y # # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # CONFIG_EDAC is not set @@ -1128,7 +1135,7 @@ CONFIG_CRYPTO_CRC32C=m # Hardware crypto devices # CONFIG_XEN=y -CONFIG_NO_IDLE_HZ=y +CONFIG_XEN_INTERFACE_VERSION=0x00030101 # # XEN @@ -1144,6 +1151,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_HAVE_ARCH_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_NO_IDLE_HZ=y # # Library routines diff -r 4ed269e73e95 -r 41823e46d6ac buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Mon Apr 17 08:47:36 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Tue Apr 18 09:35:40 2006 -0600 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.16-xen -# Mon Mar 27 09:43:44 2006 +# Thu Apr 13 15:01:04 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -186,6 +186,41 @@ CONFIG_XEN_PCIDEV_FRONTEND=y CONFIG_XEN_PCIDEV_FRONTEND=y # CONFIG_XEN_PCIDEV_FE_DEBUG is not set # CONFIG_UNORDERED_IO is not set +# CONFIG_PCIEPORTBUS is not set +# CONFIG_PCI_LEGACY_PROC is not set +# CONFIG_PCI_DEBUG is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +CONFIG_PCCARD=m +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=m +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=m +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +CONFIG_PD6729=m +CONFIG_I82092=m +CONFIG_PCCARD_NONSTATIC=m + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set # # Executable file formats / Emulations @@ -625,6 +660,10 @@ CONFIG_BT_HCIBCM203X=m CONFIG_BT_HCIBCM203X=m CONFIG_BT_HCIBPA10X=m CONFIG_BT_HCIBFUSB=m +# CONFIG_BT_HCIDTL1 is not set +# CONFIG_BT_HCIBT3C is not set +# CONFIG_BT_HCIBLUECARD is not set +# CONFIG_BT_HCIBTUART is not set CONFIG_BT_HCIVHCI=m CONFIG_IEEE80211=m # CONFIG_IEEE80211_DEBUG is not set @@ -769,6 +808,7 @@ CONFIG_PARPORT_PC=m CONFIG_PARPORT_PC=m # CONFIG_PARPORT_PC_FIFO is not set # CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_PC_PCMCIA is not set CONFIG_PARPORT_NOT_PC=y # CONFIG_PARPORT_GSC is not set CONFIG_PARPORT_1284=y @@ -851,6 +891,7 @@ CONFIG_BLK_DEV_IDE=y # CONFIG_BLK_DEV_HD_IDE is not set CONFIG_BLK_DEV_IDEDISK=y CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_BLK_DEV_IDECS is not set CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDETAPE is not set CONFIG_BLK_DEV_IDEFLOPPY=y @@ -1012,6 +1053,13 @@ CONFIG_SCSI_DC390T=m # CONFIG_SCSI_DEBUG is not set # +# PCMCIA SCSI adapter support +# +# CONFIG_PCMCIA_FDOMAIN is not set +# CONFIG_PCMCIA_QLOGIC is not set +# CONFIG_PCMCIA_SYM53C500 is not set + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1141,6 +1189,7 @@ CONFIG_WINBOND_840=m CONFIG_WINBOND_840=m CONFIG_DM9102=m CONFIG_ULI526X=m +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set CONFIG_NET_PCI=y CONFIG_PCNET32=m @@ -1224,6 +1273,13 @@ CONFIG_NET_RADIO=y # Obsolete Wireless cards support (pre-802.11) # # CONFIG_STRIP is not set +# CONFIG_PCMCIA_WAVELAN is not set +# CONFIG_PCMCIA_NETWAVE is not set + +# +# Wireless 802.11 Frequency Hopping cards support +# +# CONFIG_PCMCIA_RAYCS is not set # # Wireless 802.11b ISA/PCI cards support @@ -1243,6 +1299,15 @@ CONFIG_PCI_ATMEL=m CONFIG_PCI_ATMEL=m # +# Wireless 802.11b Pcmcia/Cardbus cards support +# +# CONFIG_PCMCIA_HERMES is not set +# CONFIG_PCMCIA_SPECTRUM is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_ATMEL is not set +# CONFIG_PCMCIA_WL3501 is not set + +# # Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support # CONFIG_PRISM54=m @@ -1250,7 +1315,13 @@ CONFIG_HOSTAP=m # CONFIG_HOSTAP_FIRMWARE is not set CONFIG_HOSTAP_PLX=m CONFIG_HOSTAP_PCI=m +# CONFIG_HOSTAP_CS is not set CONFIG_NET_WIRELESS=y + +# +# PCMCIA network device support +# +# CONFIG_NET_PCMCIA is not set # # Wan interfaces @@ -1376,6 +1447,10 @@ CONFIG_HISAX_ENTERNOW_PCI=y # # HiSax PCMCIA card service modules # +# CONFIG_HISAX_SEDLBAUER_CS is not set +# CONFIG_HISAX_ELSA_CS is not set +# CONFIG_HISAX_AVM_A1_CS is not set +# CONFIG_HISAX_TELES_CS is not set # # HiSax sub driver modules @@ -1412,6 +1487,7 @@ CONFIG_ISDN_DRV_AVMB1_B1PCI=m CONFIG_ISDN_DRV_AVMB1_B1PCI=m CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m +# CONFIG_ISDN_DRV_AVMB1_AVM_CS is not set CONFIG_ISDN_DRV_AVMB1_T1PCI=m CONFIG_ISDN_DRV_AVMB1_C4=m @@ -1600,6 +1676,13 @@ CONFIG_DRM_MGA=m CONFIG_DRM_MGA=m CONFIG_DRM_VIA=m CONFIG_DRM_SAVAGE=m + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set # CONFIG_MWAVE is not set # CONFIG_RAW_DRIVER is not set # CONFIG_HPET is not set @@ -2101,6 +2184,10 @@ CONFIG_SND_USB_USX2Y=m CONFIG_SND_USB_USX2Y=m # +# PCMCIA devices +# + +# # Open Sound System # # CONFIG_SOUND_PRIME is not set @@ -2134,6 +2221,7 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=m CONFIG_USB_SL811_HCD=m +# CONFIG_USB_SL811_CS is not set # # USB Device Class drivers @@ -2284,6 +2372,7 @@ CONFIG_USB_SERIAL_TI=m CONFIG_USB_SERIAL_TI=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m +# CONFIG_USB_SERIAL_OPTION is not set CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EZUSB=y @@ -2649,7 +2738,7 @@ CONFIG_CRYPTO_CRC32C=m # Hardware crypto devices # CONFIG_XEN=y -CONFIG_NO_IDLE_HZ=y +CONFIG_XEN_INTERFACE_VERSION=0x00030101 # # XEN @@ -2676,6 +2765,7 @@ CONFIG_XEN_SYSFS=m CONFIG_XEN_SYSFS=m CONFIG_HAVE_ARCH_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_NO_IDLE_HZ=y # # Library routines diff -r 4ed269e73e95 -r 41823e46d6ac buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Mon Apr 17 08:47:36 2006 -0600 +++ b/buildconfigs/mk.linux-2.6-xen Tue Apr 18 09:35:40 2006 -0600 @@ -22,8 +22,8 @@ build: $(LINUX_DIR)/include/linux/autoco rm -rf $(LINUX_DIR) cp -al $(<D) $(LINUX_DIR) # Apply arch-xen patches - ( cd linux-$(LINUX_SERIES)-xen-sparse ; \ - LINUX_ARCH=$(LINUX_ARCH) ./mkbuildtree ../$(LINUX_DIR) ) + ( cd linux-$(LINUX_SERIES)-xen-sparse && \ + LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_DIR) ) # Re-use config from install dir if one exits else use default config CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ [ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ diff -r 4ed269e73e95 -r 41823e46d6ac docs/src/user.tex --- a/docs/src/user.tex Mon Apr 17 08:47:36 2006 -0600 +++ b/docs/src/user.tex Tue Apr 18 09:35:40 2006 -0600 @@ -1232,8 +1232,15 @@ customized variants for your site's pref \subsection{PCI} \label{ss:pcidd} -Individual PCI devices can be assigned to a given domain to allow that -domain direct access to the PCI hardware. To use this functionality, ensure +Individual PCI devices can be assigned to a given domain (a PCI driver domain) +to allow that domain direct access to the PCI hardware. + +While PCI Driver Domains can increase the stability and security of a system +by addressing a number of security concerns, there are some security issues +that remain that you can read about in Section~\ref{s:ddsecurity}. + +\subsubsection{Compile-Time Setup} +To use this functionality, ensure that the PCI Backend is compiled in to a privileged domain (e.g. domain 0) and that the domains which will be assigned PCI devices have the PCI Frontend compiled in. In XenLinux, the PCI Backend is available under the Xen @@ -1241,21 +1248,73 @@ architecture-specific "Bus Options" sect architecture-specific "Bus Options" section. You may compile both the backend and the frontend into the same kernel; they will not affect each other. +\subsubsection{PCI Backend Configuration - Binding at Boot} The PCI devices you wish to assign to unprivileged domains must be "hidden" from your backend domain (usually domain 0) so that it does not load a driver for them. Use the \path{pciback.hide} kernel parameter which is specified on the kernel command-line and is configurable through GRUB (see Section~\ref{s:configure}). Note that devices are not really hidden from the -backend domain. The PCI Backend ensures that no other device driver loads -for those devices. PCI devices are identified by hexadecimal -slot/funciton numbers (on Linux, use \path{lspci} to determine slot/funciton -numbers of your devices) and can be specified with or without the PCI domain: \\ +backend domain. The PCI Backend appears to the Linux kernel as a regular PCI +device driver. The PCI Backend ensures that no other device driver loads +for the devices by binding itself as the device driver for those devices. +PCI devices are identified by hexadecimal slot/funciton numbers (on Linux, +use \path{lspci} to determine slot/funciton numbers of your devices) and +can be specified with or without the PCI domain: \\ \centerline{ {\tt ({\em bus}:{\em slot}.{\em func})} example {\tt (02:1d.3)}} \\ \centerline{ {\tt ({\em domain}:{\em bus}:{\em slot}.{\em func})} example {\tt (0000:02:1d.3)}} \\ An example kernel command-line which hides two PCI devices might be: \\ \centerline{ {\tt root=/dev/sda4 ro console=tty0 pciback.hide=(02:01.f)(0000:04:1d.0) } } \\ +\subsubsection{PCI Backend Configuration - Late Binding} +PCI devices can also be bound to the PCI Backend after boot through the manual +binding/unbinding facilities provided by the Linux kernel in sysfs (allowing +for a Xen user to give PCI devices to driver domains that were not specified +on the kernel command-line). There are several attributes with the PCI +Backend's sysfs directory (\path{/sys/bus/pci/drivers/pciback}) that can be +used to bind/unbind devices: + +\begin{description} +\item[slots] lists all of the PCI slots that the PCI Backend will try to seize + (or "hide" from Domain 0). A PCI slot must appear in this list before it can + be bound to the PCI Backend through the \path{bind} attribute. +\item[new\_slot] write the name of a slot here (in 0000:00:00.0 format) to + have the PCI Backend seize the device in this slot. +\item[remove\_slot] write the name of a slot here (same format as + \path{new\_slot}) to have the PCI Backend no longer try to seize devices in + this slot. Note that this does not unbind the driver from a device it has + already seized. +\item[bind] write the name of a slot here (in 0000:00:00.0 format) to have + the Linux kernel attempt to bind the device in that slot to the PCI Backend + driver. +\item[unbind] write the name of a skit here (same format as \path{bind}) to have + the Linux kernel unbind the device from the PCI Backend. DO NOT unbind a + device while it is currently given to a PCI driver domain! +\end{description} + +Some examples: + +Bind a device to the PCI Backend which is not bound to any other driver. +\begin{verbatim} +# # Add a new slot to the PCI Backend's list +# echo -n 0000:01:04.d > /sys/bus/pci/drivers/pciback/new_slot +# # Now that the backend is watching for the slot, bind to it +# echo -n 0000:01:04.d > /sys/bus/pci/drivers/pciback/bind +\end{verbatim} + +Unbind a device from its driver and bind to the PCI Backend. +\begin{verbatim} +# # Unbind a PCI network card from its network driver +# echo -n 0000:05:02.0 > /sys/bus/pci/drivers/3c905/unbind +# # And now bind it to the PCI Backend +# echo -n 0000:05:02.0 > /sys/bus/pci/drivers/pciback/new_slot +# echo -n 0000:05:02.0 > /sys/bus/pci/drivers/pciback/bind +\end{verbatim} + +Note that the "-n" option in the example is important as it causes echo to not +output a new-line. + +\subsubsection{PCI Frontend Configuration} To configure a domU to receive a PCI device: \begin{description} @@ -1281,9 +1340,6 @@ To configure a domU to receive a PCI dev \end{verbatim} } \end{description} - -There are a number of security concerns associated with PCI Driver Domains -that you can read about in Section~\ref{s:ddsecurity}. %% There are two possible types of privileges: IO privileges and %% administration privileges. diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/arch/i386/kernel/fixup.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Tue Apr 18 09:35:40 2006 -0600 @@ -68,6 +68,7 @@ fastcall void do_fixup_4gb_segment(struc DP(""); for (i = 5; i > 0; i--) { + touch_softlockup_watchdog(); printk("Pausing... %d", i); mdelay(1000); printk("\b\b\b\b\b\b\b\b\b\b\b\b"); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Tue Apr 18 09:35:40 2006 -0600 @@ -206,8 +206,8 @@ swiotlb_init(void) } /* - * We use __copy_to_user to transfer to the host buffer because the buffer - * may be mapped read-only (e.g, in blkback driver) but lower-level + * We use __copy_to_user_inatomic to transfer to the host buffer because the + * buffer may be mapped read-only (e.g, in blkback driver) but lower-level * drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an * unnecessary copy from the aperture to the host buffer, and a page fault. */ @@ -225,7 +225,7 @@ __sync_single(struct phys_addr buffer, c dev = dma_addr + size - len; host = kmp + buffer.offset; if (dir == DMA_FROM_DEVICE) { - if (__copy_to_user(host, dev, bytes)) + if (__copy_to_user_inatomic(host, dev, bytes)) /* inaccessible */; } else memcpy(dev, host, bytes); @@ -238,7 +238,7 @@ __sync_single(struct phys_addr buffer, c char *host = (char *)phys_to_virt( page_to_pseudophys(buffer.page)) + buffer.offset; if (dir == DMA_FROM_DEVICE) { - if (__copy_to_user(host, dma_addr, size)) + if (__copy_to_user_inatomic(host, dma_addr, size)) /* inaccessible */; } else if (dir == DMA_TO_DEVICE) memcpy(dma_addr, host, size); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/arch/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Tue Apr 18 09:35:40 2006 -0600 @@ -568,7 +568,6 @@ config UNORDERED_IO from i386. Requires that the driver writer used memory barriers properly. -if !X86_64_XEN source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" @@ -576,7 +575,6 @@ source "drivers/pcmcia/Kconfig" source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" -endif endmenu diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Apr 18 09:35:40 2006 -0600 @@ -186,9 +186,8 @@ static void fast_flush_area(pending_req_ handle = pending_handle(req, i); if (handle == BLKBACK_INVALID_HANDLE) continue; - unmap[invcount].host_addr = vaddr(req, i); - unmap[invcount].dev_bus_addr = 0; - unmap[invcount].handle = handle; + gnttab_set_unmap_op(&unmap[i], vaddr(req, i), GNTMAP_host_map, + handle); pending_handle(req, i) = BLKBACK_INVALID_HANDLE; invcount++; } @@ -384,6 +383,8 @@ static void dispatch_rw_block_io(blkif_t pending_req->nr_pages = nseg; for (i = 0; i < nseg; i++) { + uint32_t flags; + seg[i].nsec = req->seg[i].last_sect - req->seg[i].first_sect + 1; @@ -392,12 +393,11 @@ static void dispatch_rw_block_io(blkif_t goto fail_response; preq.nr_sects += seg[i].nsec; - map[i].host_addr = vaddr(pending_req, i); - map[i].dom = blkif->domid; - map[i].ref = req->seg[i].gref; - map[i].flags = GNTMAP_host_map; + flags = GNTMAP_host_map; if ( operation == WRITE ) - map[i].flags |= GNTMAP_readonly; + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->seg[i].gref, blkif->domid); } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Apr 18 09:35:40 2006 -0600 @@ -58,10 +58,8 @@ static int map_frontend_page(blkif_t *bl struct gnttab_map_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.flags = GNTMAP_host_map; - op.ref = shared_page; - op.dom = blkif->domid; + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -90,9 +88,8 @@ static void unmap_frontend_page(blkif_t struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.handle = blkif->shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Apr 18 09:35:40 2006 -0600 @@ -418,9 +418,9 @@ static void fast_flush_area(int idx, int if (BLKTAP_INVALID_HANDLE(handle)) continue; - unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); - unmap[op].dev_bus_addr = 0; - unmap[op].handle = handle->kernel; + gnttab_set_unmap_op(&unmap[op], + MMAP_VADDR(mmap_vstart, idx, i), + GNTMAP_host_map, handle->kernel); op++; if (create_lookup_pte_addr( @@ -430,9 +430,10 @@ static void fast_flush_area(int idx, int DPRINTK("Couldn't get a pte addr!\n"); return; } - unmap[op].host_addr = ptep; - unmap[op].dev_bus_addr = 0; - unmap[op].handle = handle->user; + gnttab_set_unmap_grnat_ref(&unmap[op], ptep, + GNTMAP_host_map | + GNTMAP_application_map | + GNTMAP_contains_pte, handle->user); op++; BLKTAP_INVALIDATE_HANDLE(handle); @@ -703,21 +704,21 @@ static void dispatch_rw_block_io(blkif_t unsigned long uvaddr; unsigned long kvaddr; uint64_t ptep; + uint32_t flags; uvaddr = MMAP_VADDR(user_vstart, pending_idx, i); kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i); - /* Map the remote page to kernel. */ - map[op].host_addr = kvaddr; - map[op].dom = blkif->domid; - map[op].ref = req->seg[i].gref; - map[op].flags = GNTMAP_host_map; + flags = GNTMAP_host_map; /* This needs a bit more thought in terms of interposition: * If we want to be able to modify pages during write using * grant table mappings, the guest will either need to allow * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */ if (req->operation == BLKIF_OP_WRITE) - map[op].flags |= GNTMAP_readonly; + flags |= GNTMAP_readonly; + /* Map the remote page to kernel. */ + gnttab_set_map_op(&map[op], kvaddr, flags, req->seg[i].gref, + blkif->domid); op++; /* Now map it to user. */ @@ -728,14 +729,13 @@ static void dispatch_rw_block_io(blkif_t goto bad_descriptor; } - map[op].host_addr = ptep; - map[op].dom = blkif->domid; - map[op].ref = req->seg[i].gref; - map[op].flags = GNTMAP_host_map | GNTMAP_application_map + flags = GNTMAP_host_map | GNTMAP_application_map | GNTMAP_contains_pte; /* Above interposition comment applies here as well. */ if (req->operation == BLKIF_OP_WRITE) - map[op].flags |= GNTMAP_readonly; + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], ptep, flags, req->seg[i].gref, + blkif->domid); op++; } diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Tue Apr 18 09:35:40 2006 -0600 @@ -33,10 +33,8 @@ static int map_frontend_page(blkif_t *bl struct gnttab_map_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.flags = GNTMAP_host_map; - op.ref = shared_page; - op.dom = blkif->domid; + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -59,9 +57,8 @@ static void unmap_frontend_page(blkif_t struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.handle = blkif->shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/core/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue Apr 18 09:35:40 2006 -0600 @@ -513,6 +513,8 @@ static void ack_dynirq(unsigned int irq) { int evtchn = evtchn_from_irq(irq); + move_native_irq(irq); + if (VALID_EVTCHN(evtchn)) { mask_evtchn(evtchn); clear_evtchn(evtchn); @@ -635,6 +637,8 @@ static void ack_pirq(unsigned int irq) static void ack_pirq(unsigned int irq) { int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); if (VALID_EVTCHN(evtchn)) { mask_evtchn(evtchn); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue Apr 18 09:35:40 2006 -0600 @@ -65,6 +65,7 @@ EXPORT_SYMBOL_GPL(gnttab_alloc_grant_ref EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); EXPORT_SYMBOL_GPL(gnttab_free_grant_references); EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); +EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); EXPORT_SYMBOL_GPL(gnttab_request_free_callback); @@ -322,6 +323,12 @@ gnttab_alloc_grant_references(u16 count, *head = h; return 0; +} + +int +gnttab_empty_grant_references(const grant_ref_t *private_head) +{ + return (*private_head == GNTTAB_LIST_END); } int diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Tue Apr 18 09:35:40 2006 -0600 @@ -150,10 +150,8 @@ static int map_frontend_pages( struct gnttab_map_grant_ref op; int ret; - op.host_addr = (unsigned long)netif->tx_comms_area->addr; - op.flags = GNTMAP_host_map; - op.ref = tx_ring_ref; - op.dom = netif->domid; + gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr, + GNTMAP_host_map, tx_ring_ref, netif->domid); lock_vm_area(netif->tx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -168,10 +166,8 @@ static int map_frontend_pages( netif->tx_shmem_ref = tx_ring_ref; netif->tx_shmem_handle = op.handle; - op.host_addr = (unsigned long)netif->rx_comms_area->addr; - op.flags = GNTMAP_host_map; - op.ref = rx_ring_ref; - op.dom = netif->domid; + gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr, + GNTMAP_host_map, rx_ring_ref, netif->domid); lock_vm_area(netif->rx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -194,18 +190,16 @@ static void unmap_frontend_pages(netif_t struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)netif->tx_comms_area->addr; - op.handle = netif->tx_shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr, + GNTMAP_host_map, netif->tx_shmem_handle); lock_vm_area(netif->tx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(netif->tx_comms_area); BUG_ON(ret); - op.host_addr = (unsigned long)netif->rx_comms_area->addr; - op.handle = netif->rx_shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr, + GNTMAP_host_map, netif->rx_shmem_handle); lock_vm_area(netif->rx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue Apr 18 09:35:40 2006 -0600 @@ -453,9 +453,9 @@ inline static void net_tx_action_dealloc gop = tx_unmap_ops; while (dc != dp) { pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; - gop->host_addr = MMAP_VADDR(pending_idx); - gop->dev_bus_addr = 0; - gop->handle = grant_tx_handle[pending_idx]; + gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx), + GNTMAP_host_map, + grant_tx_handle[pending_idx]); gop++; } ret = HYPERVISOR_grant_table_op( @@ -579,10 +579,9 @@ static void net_tx_action(unsigned long /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, 16); - mop->host_addr = MMAP_VADDR(pending_idx); - mop->dom = netif->domid; - mop->ref = txreq.gref; - mop->flags = GNTMAP_host_map | GNTMAP_readonly; + gnttab_set_map_op(mop, MMAP_VADDR(pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txreq.gref, netif->domid); mop++; memcpy(&pending_tx_info[pending_idx].req, diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Apr 18 09:35:40 2006 -0600 @@ -106,7 +106,7 @@ struct netfront_info /* Receive-ring batched refills. */ #define RX_MIN_TARGET 8 #define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET NET_RX_RING_SIZE +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) int rx_min_target, rx_max_target, rx_target; struct sk_buff_head rx_batch; @@ -119,6 +119,7 @@ struct netfront_info struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; struct sk_buff *rx_skbs[NET_RX_RING_SIZE+1]; +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; grant_ref_t gref_rx_head; @@ -505,8 +506,9 @@ static void network_tx_buf_gc(struct net } while (prod != np->tx.sring->rsp_prod); out: - if (np->tx_full && - ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { + if ((np->tx_full) && + ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE) && + !gnttab_empty_grant_references(&np->gref_tx_head)) { np->tx_full = 0; if (np->user_state == UST_OPEN) netif_wake_queue(dev); @@ -705,7 +707,8 @@ static int network_start_xmit(struct sk_ network_tx_buf_gc(dev); - if (RING_FULL(&np->tx)) { + if (RING_FULL(&np->tx) || + gnttab_empty_grant_references(&np->gref_tx_head)) { np->tx_full = 1; netif_stop_queue(dev); } @@ -1140,14 +1143,14 @@ static int create_netdev(int handle, str } /* A grant for every tx ring slot */ - if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, + if (gnttab_alloc_grant_references(TX_MAX_TARGET, &np->gref_tx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); err = -ENOMEM; goto exit; } /* A grant for every rx ring slot */ - if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, + if (gnttab_alloc_grant_references(RX_MAX_TARGET, &np->gref_rx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); gnttab_free_grant_references(np->gref_tx_head); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Tue Apr 18 09:35:40 2006 -0600 @@ -13,6 +13,7 @@ #include "common.h" #include <xen/balloon.h> +#include <xen/gnttab.h> static kmem_cache_t *tpmif_cachep; int num_frontends = 0; @@ -72,12 +73,10 @@ static int map_frontend_page(tpmif_t *tp static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page) { int ret; - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)tpmif->tx_area->addr, - .flags = GNTMAP_host_map, - .ref = shared_page, - .dom = tpmif->domid, - }; + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr, + GNTMAP_host_map, shared_page, tpmif->domid); lock_vm_area(tpmif->tx_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -100,9 +99,8 @@ static void unmap_frontend_page(tpmif_t struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)tpmif->tx_area->addr; - op.handle = tpmif->shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr, + GNTMAP_host_map, tpmif->shmem_handle); lock_vm_area(tpmif->tx_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Tue Apr 18 09:35:40 2006 -0600 @@ -21,6 +21,7 @@ #include <asm/uaccess.h> #include <xen/xenbus.h> #include <xen/interface/grant_table.h> +#include <xen/gnttab.h> /* local data structures */ struct data_exchange { @@ -278,10 +279,8 @@ int _packet_write(struct packet *pak, return 0; } - map_op.host_addr = MMAP_VADDR(tpmif, i); - map_op.flags = GNTMAP_host_map; - map_op.ref = tx->ref; - map_op.dom = tpmif->domid; + gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, tx->ref, tpmif->domid); if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &map_op, 1))) { @@ -308,9 +307,8 @@ int _packet_write(struct packet *pak, } tx->size = tocopy; - unmap_op.host_addr = MMAP_VADDR(tpmif, i); - unmap_op.handle = handle; - unmap_op.dev_bus_addr = 0; + gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, handle); if (unlikely (HYPERVISOR_grant_table_op @@ -422,10 +420,8 @@ static int packet_read_shmem(struct pack tx = &tpmif->tx->ring[i].req; - map_op.host_addr = MMAP_VADDR(tpmif, i); - map_op.flags = GNTMAP_host_map; - map_op.ref = tx->ref; - map_op.dom = tpmif->domid; + gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, tx->ref, tpmif->domid); if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &map_op, 1))) { @@ -461,9 +457,8 @@ static int packet_read_shmem(struct pack tpmif->domid, buffer[offset], buffer[offset + 1], buffer[offset + 2], buffer[offset + 3]); - unmap_op.host_addr = MMAP_VADDR(tpmif, i); - unmap_op.handle = handle; - unmap_op.dev_bus_addr = 0; + gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, handle); if (unlikely (HYPERVISOR_grant_table_op diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue Apr 18 09:35:40 2006 -0600 @@ -164,10 +164,10 @@ static void frontend_changed(struct xenb switch (frontend_state) { case XenbusStateInitialising: + case XenbusStateInitialised: + break; + case XenbusStateConnected: - break; - - case XenbusStateInitialised: err = connect_ring(be); if (err) { return; diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Tue Apr 18 09:35:40 2006 -0600 @@ -334,12 +334,6 @@ again: goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, - "state", "%d", XenbusStateInitialised); - if (err) { - goto abort_transaction; - } - err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) goto again; @@ -347,6 +341,9 @@ again: xenbus_dev_fatal(dev, err, "completing transaction"); goto destroy_tpmring; } + + xenbus_switch_state(dev, XenbusStateConnected); + return 0; abort_transaction: @@ -387,6 +384,7 @@ static void backend_changed(struct xenbu if (tp->is_suspended == 0) { device_unregister(&dev->dev); } + xenbus_switch_state(dev, XenbusStateClosed); break; } } @@ -439,6 +437,7 @@ static int tpmfront_suspend(struct xenbu /* lock, so no app can send */ mutex_lock(&suspend_lock); + xenbus_switch_state(dev, XenbusStateClosed); tp->is_suspended = 1; for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 25; ctr++) { diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Tue Apr 18 09:35:40 2006 -0600 @@ -37,11 +37,7 @@ /* Based on Rusty Russell's skeleton driver's map_page */ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) { - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op; struct vm_struct *area; *vaddr = NULL; @@ -50,8 +46,9 @@ int xenbus_map_ring_valloc(struct xenbus if (!area) return -ENOMEM; - op.host_addr = (unsigned long)area->addr; - + gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map, + gnt_ref, dev->otherend_id); + lock_vm_area(area); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); unlock_vm_area(area); @@ -76,13 +73,10 @@ int xenbus_map_ring(struct xenbus_device int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr) { - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + gnt_ref, dev->otherend_id); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); if (op.status != GNTST_okay) { @@ -101,9 +95,7 @@ int xenbus_unmap_ring_vfree(struct xenbu int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) { struct vm_struct *area; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; + struct gnttab_unmap_grant_ref op; /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) * method so that we don't have to muck with vmalloc internals here. @@ -124,7 +116,8 @@ int xenbus_unmap_ring_vfree(struct xenbu return GNTST_bad_virt_addr; } - op.handle = (grant_handle_t)area->phys_addr; + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + (grant_handle_t)area->phys_addr); lock_vm_area(area); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); @@ -145,11 +138,10 @@ int xenbus_unmap_ring(struct xenbus_devi int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr) { - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; + struct gnttab_unmap_grant_ref op; + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + handle); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); if (op.status != GNTST_okay) diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Tue Apr 18 09:35:40 2006 -0600 @@ -114,6 +114,7 @@ static ssize_t xenbus_dev_write(struct f { struct xenbus_dev_data *u = filp->private_data; struct xenbus_dev_transaction *trans = NULL; + uint32_t msg_type; void *reply; if ((len + u->len) > sizeof(u->u.buffer)) @@ -126,7 +127,9 @@ static ssize_t xenbus_dev_write(struct f if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) return len; - switch (u->u.msg.type) { + msg_type = u->u.msg.type; + + switch (msg_type) { case XS_TRANSACTION_START: case XS_TRANSACTION_END: case XS_DIRECTORY: @@ -138,7 +141,7 @@ static ssize_t xenbus_dev_write(struct f case XS_MKDIR: case XS_RM: case XS_SET_PERMS: - if (u->u.msg.type == XS_TRANSACTION_START) { + if (msg_type == XS_TRANSACTION_START) { trans = kmalloc(sizeof(*trans), GFP_KERNEL); if (!trans) return -ENOMEM; @@ -150,10 +153,10 @@ static ssize_t xenbus_dev_write(struct f return PTR_ERR(reply); } - if (u->u.msg.type == XS_TRANSACTION_START) { + if (msg_type == XS_TRANSACTION_START) { trans->handle = simple_strtoul(reply, NULL, 0); list_add(&trans->list, &u->transactions); - } else if (u->u.msg.type == XS_TRANSACTION_END) { + } else if (msg_type == XS_TRANSACTION_END) { list_for_each_entry(trans, &u->transactions, list) if (trans->handle == u->u.msg.tx_id) break; diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Apr 18 09:35:40 2006 -0600 @@ -825,6 +825,8 @@ static int resume_dev(struct device *dev return err; } + xdev->state = XenbusStateInitialising; + if (drv->resume) err = drv->resume(xdev); if (err) diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/include/xen/gnttab.h --- a/linux-2.6-xen-sparse/include/xen/gnttab.h Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Tue Apr 18 09:35:40 2006 -0600 @@ -40,6 +40,7 @@ #include <linux/config.h> #include <asm/hypervisor.h> #include <xen/interface/grant_table.h> +#include <xen/features.h> /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ #ifdef __ia64__ @@ -90,6 +91,8 @@ void gnttab_free_grant_reference(grant_r void gnttab_free_grant_references(grant_ref_t head); +int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); + int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); void gnttab_release_grant_reference(grant_ref_t *private_head, @@ -113,6 +116,37 @@ int gnttab_suspend(void); int gnttab_suspend(void); int gnttab_resume(void); +static inline void +gnttab_set_map_op(struct gnttab_map_grant_ref *map, unsigned long addr, + uint32_t flags, grant_ref_t ref, domid_t domid) +{ + if (flags & GNTMAP_contains_pte) + map->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + map->host_addr = __pa(addr); + else + map->host_addr = addr; + + map->flags = flags; + map->ref = ref; + map->dom = domid; +} + +static inline void +gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, unsigned long addr, + uint32_t flags, grant_handle_t handle) +{ + if (flags & GNTMAP_contains_pte) + unmap->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + unmap->host_addr = __pa(addr); + else + unmap->host_addr = addr; + + unmap->handle = handle; + unmap->dev_bus_addr = 0; +} + #endif /* __ASM_GNTTAB_H__ */ /* diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/mkbuildtree --- a/linux-2.6-xen-sparse/mkbuildtree Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/mkbuildtree Tue Apr 18 09:35:40 2006 -0600 @@ -90,8 +90,8 @@ RS=$DESTPATH RS=$DESTPATH # Arch-specific pre-processing -if [ -x arch/${LINUX_ARCH}/xen-mkbuildtree-pre ]; then - arch/${LINUX_ARCH}/xen-mkbuildtree-pre +if [ -e arch/${LINUX_ARCH}/xen-mkbuildtree-pre ]; then + bash arch/${LINUX_ARCH}/xen-mkbuildtree-pre fi # Remove old copies of files and directories at the destination @@ -115,6 +115,6 @@ relative_lndir ../../../${RS}/../xen/inc # Arch-specific post-processing cd ${AD} -if [ -x arch/${LINUX_ARCH}/xen-mkbuildtree-post ]; then - arch/${LINUX_ARCH}/xen-mkbuildtree-post +if [ -e arch/${LINUX_ARCH}/xen-mkbuildtree-post ]; then + bash arch/${LINUX_ARCH}/xen-mkbuildtree-post fi diff -r 4ed269e73e95 -r 41823e46d6ac linux-2.6-xen-sparse/net/core/dev.c --- a/linux-2.6-xen-sparse/net/core/dev.c Mon Apr 17 08:47:36 2006 -0600 +++ b/linux-2.6-xen-sparse/net/core/dev.c Tue Apr 18 09:35:40 2006 -0600 @@ -1294,6 +1294,7 @@ int dev_queue_xmit(struct sk_buff *skb) if ((skb->h.raw + skb->csum + 2) > skb->tail) goto out_kfree_skb; skb->ip_summed = CHECKSUM_HW; + skb->proto_csum_blank = 0; } #endif diff -r 4ed269e73e95 -r 41823e46d6ac tools/debugger/gdb/gdbbuild --- a/tools/debugger/gdb/gdbbuild Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/debugger/gdb/gdbbuild Tue Apr 18 09:35:40 2006 -0600 @@ -7,7 +7,7 @@ tar xjf gdb-6.2.1.tar.bz2 tar xjf gdb-6.2.1.tar.bz2 cd gdb-6.2.1-xen-sparse -./mkbuildtree ../gdb-6.2.1 +bash ./mkbuildtree ../gdb-6.2.1 cd .. mkdir gdb-6.2.1-linux-i386-xen diff -r 4ed269e73e95 -r 41823e46d6ac tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/examples/xend-config.sxp Tue Apr 18 09:35:40 2006 -0600 @@ -127,3 +127,6 @@ # Whether to enable core-dumps when domains crash. #(enable-dump no) + +# The tool used for initiating virtual TPM migration +#(external-migration-tool '') diff -r 4ed269e73e95 -r 41823e46d6ac tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/firmware/hvmloader/Makefile Tue Apr 18 09:35:40 2006 -0600 @@ -21,7 +21,7 @@ # External CFLAGS can do more harm than good. CFLAGS := -XEN_TARGET_ARCH = x86_32 +override XEN_TARGET_ARCH = x86_32 XEN_ROOT = ../../.. include $(XEN_ROOT)/Config.mk diff -r 4ed269e73e95 -r 41823e46d6ac tools/firmware/vmxassist/Makefile --- a/tools/firmware/vmxassist/Makefile Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/firmware/vmxassist/Makefile Tue Apr 18 09:35:40 2006 -0600 @@ -21,7 +21,7 @@ # External CFLAGS can do more harm than good. CFLAGS := -XEN_TARGET_ARCH = x86_32 +override XEN_TARGET_ARCH = x86_32 XEN_ROOT = ../../.. include $(XEN_ROOT)/Config.mk diff -r 4ed269e73e95 -r 41823e46d6ac tools/ioemu/vl.c --- a/tools/ioemu/vl.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/ioemu/vl.c Tue Apr 18 09:35:40 2006 -0600 @@ -138,7 +138,7 @@ int gus_enabled = 1; int gus_enabled = 1; int pci_enabled = 1; int prep_enabled = 0; -int rtc_utc = 0; +int rtc_utc = 1; int cirrus_vga_enabled = 1; int vga_accelerate = 1; int graphic_width = 800; diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_bvtsched.c --- a/tools/libxc/xc_bvtsched.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_bvtsched.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_bvtsched.c - * + * * API for manipulating parameters of the Borrowed Virtual Time scheduler. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -26,7 +26,7 @@ int xc_bvtsched_global_get(int xc_handle { DECLARE_DOM0_OP; int ret; - + op.cmd = DOM0_SCHEDCTL; op.u.schedctl.sched_id = SCHED_BVT; op.u.schedctl.direction = SCHED_INFO_GET; @@ -71,7 +71,7 @@ int xc_bvtsched_domain_get(int xc_handle long long *warpl, long long *warpu) { - + DECLARE_DOM0_OP; int ret; struct bvt_adjdom *adjptr = &op.u.adjustdom.u.bvt; diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_core.c Tue Apr 18 09:35:40 2006 -0600 @@ -23,7 +23,7 @@ copy_from_domain_page(int xc_handle, return 0; } -int +int xc_domain_dumpcore_via_callback(int xc_handle, uint32_t domid, void *args, @@ -45,13 +45,13 @@ xc_domain_dumpcore_via_callback(int xc_h PERROR("Could not allocate dump_mem"); goto error_out; } - + if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 ) { PERROR("Could not get info for domain"); goto error_out; } - + if ( domid != info.domid ) { PERROR("Domain %d does not exist", domid); @@ -61,10 +61,10 @@ xc_domain_dumpcore_via_callback(int xc_h for ( i = 0; i <= info.max_vcpu_id; i++ ) if ( xc_vcpu_getcontext(xc_handle, domid, i, &ctxt[nr_vcpus]) == 0) nr_vcpus++; - + nr_pages = info.nr_pages; - header.xch_magic = XC_CORE_MAGIC; + header.xch_magic = XC_CORE_MAGIC; header.xch_nr_vcpus = nr_vcpus; header.xch_nr_pages = nr_pages; header.xch_ctxt_offset = sizeof(struct xc_core_header); @@ -74,7 +74,7 @@ xc_domain_dumpcore_via_callback(int xc_h (sizeof(vcpu_guest_context_t) * nr_vcpus) + (nr_pages * sizeof(unsigned long))); header.xch_pages_offset = round_pgup(dummy_len); - + sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header)); if ( sts != 0 ) goto error_out; @@ -150,7 +150,7 @@ static int local_file_dump(void *args, c return 0; } -int +int xc_domain_dumpcore(int xc_handle, uint32_t domid, const char *corename) @@ -163,7 +163,7 @@ xc_domain_dumpcore(int xc_handle, PERROR("Could not open corefile %s: %s", corename, strerror(errno)); return -errno; } - + sts = xc_domain_dumpcore_via_callback( xc_handle, domid, &da, &local_file_dump); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_domain.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_domain.c - * + * * API for manipulating and obtaining information on domains. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -26,17 +26,17 @@ int xc_domain_create(int xc_handle, *pdomid = (uint16_t)op.u.createdomain.domain; return 0; -} - - -int xc_domain_pause(int xc_handle, +} + + +int xc_domain_pause(int xc_handle, uint32_t domid) { DECLARE_DOM0_OP; op.cmd = DOM0_PAUSEDOMAIN; op.u.pausedomain.domain = (domid_t)domid; return do_dom0_op(xc_handle, &op); -} +} int xc_domain_unpause(int xc_handle, @@ -46,7 +46,7 @@ int xc_domain_unpause(int xc_handle, op.cmd = DOM0_UNPAUSEDOMAIN; op.u.unpausedomain.domain = (domid_t)domid; return do_dom0_op(xc_handle, &op); -} +} int xc_domain_destroy(int xc_handle, @@ -88,7 +88,7 @@ int xc_domain_shutdown(int xc_handle, int xc_vcpu_setaffinity(int xc_handle, - uint32_t domid, + uint32_t domid, int vcpu, cpumap_t cpumap) { @@ -109,7 +109,7 @@ int xc_domain_getinfo(int xc_handle, unsigned int nr_doms; uint32_t next_domid = first_domid; DECLARE_DOM0_OP; - int rc = 0; + int rc = 0; memset(info, 0, max_doms*sizeof(xc_dominfo_t)); @@ -127,8 +127,8 @@ int xc_domain_getinfo(int xc_handle, info->blocked = !!(op.u.getdomaininfo.flags & DOMFLAGS_BLOCKED); info->running = !!(op.u.getdomaininfo.flags & DOMFLAGS_RUNNING); - info->shutdown_reason = - (op.u.getdomaininfo.flags>>DOMFLAGS_SHUTDOWNSHIFT) & + info->shutdown_reason = + (op.u.getdomaininfo.flags>>DOMFLAGS_SHUTDOWNSHIFT) & DOMFLAGS_SHUTDOWNMASK; if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_crash) ) @@ -152,7 +152,7 @@ int xc_domain_getinfo(int xc_handle, info++; } - if( !nr_doms ) return rc; + if( !nr_doms ) return rc; return nr_doms; } @@ -167,7 +167,7 @@ int xc_domain_getinfolist(int xc_handle, if ( mlock(info, max_domains*sizeof(xc_domaininfo_t)) != 0 ) return -1; - + op.cmd = DOM0_GETDOMAININFOLIST; op.u.getdomaininfolist.first_domain = first_domain; op.u.getdomaininfolist.max_domains = max_domains; @@ -177,10 +177,10 @@ int xc_domain_getinfolist(int xc_handle, ret = -1; else ret = op.u.getdomaininfolist.num_domains; - + if ( munlock(info, max_domains*sizeof(xc_domaininfo_t)) != 0 ) ret = -1; - + return ret; } @@ -209,7 +209,7 @@ int xc_vcpu_getcontext(int xc_handle, int xc_shadow_control(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int sop, unsigned long *dirty_bitmap, unsigned long pages, @@ -238,11 +238,11 @@ int xc_domain_setcpuweight(int xc_handle { int sched_id; int ret; - + /* Figure out which scheduler is currently used: */ if ( (ret = xc_sched_id(xc_handle, &sched_id)) != 0 ) return ret; - + switch ( sched_id ) { case SCHED_BVT: @@ -253,20 +253,20 @@ int xc_domain_setcpuweight(int xc_handle long long warpl; long long warpu; - /* Preserve all the scheduling parameters apart + /* Preserve all the scheduling parameters apart of MCU advance. */ if ( (ret = xc_bvtsched_domain_get( - xc_handle, domid, &mcuadv, + xc_handle, domid, &mcuadv, &warpback, &warpvalue, &warpl, &warpu)) != 0 ) return ret; - + /* The MCU advance is inverse of the weight. Default value of the weight is 1, default mcuadv 10. The scaling factor is therefore 10. */ if ( weight > 0 ) mcuadv = 10 / weight; - - ret = xc_bvtsched_domain_set(xc_handle, domid, mcuadv, + + ret = xc_bvtsched_domain_set(xc_handle, domid, mcuadv, warpback, warpvalue, warpl, warpu); break; } @@ -276,7 +276,7 @@ int xc_domain_setcpuweight(int xc_handle } int xc_domain_setmaxmem(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int max_memkb) { DECLARE_DOM0_OP; @@ -287,7 +287,7 @@ int xc_domain_setmaxmem(int xc_handle, } int xc_domain_memory_increase_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, @@ -297,7 +297,7 @@ int xc_domain_memory_increase_reservatio struct xen_memory_reservation reservation = { .extent_start = extent_start, /* may be NULL */ .nr_extents = nr_extents, - .extent_order = extent_order, + .extent_order = extent_order, .address_bits = address_bits, .domid = domid }; @@ -319,16 +319,16 @@ int xc_domain_memory_increase_reservatio } int xc_domain_memory_decrease_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned long *extent_start) { int err; struct xen_memory_reservation reservation = { - .extent_start = extent_start, + .extent_start = extent_start, .nr_extents = nr_extents, - .extent_order = extent_order, + .extent_order = extent_order, .address_bits = 0, .domid = domid }; @@ -411,7 +411,7 @@ int xc_domain_max_vcpus(int xc_handle, u return do_dom0_op(xc_handle, &op); } -int xc_domain_sethandle(int xc_handle, uint32_t domid, +int xc_domain_sethandle(int xc_handle, uint32_t domid, xen_domain_handle_t handle) { DECLARE_DOM0_OP; @@ -506,7 +506,7 @@ int xc_domain_iomem_permission(int xc_ha op.cmd = DOM0_IOMEM_PERMISSION; op.u.iomem_permission.domain = domid; op.u.iomem_permission.first_mfn = first_mfn; - op.u.iomem_permission.nr_mfns = nr_mfns; + op.u.iomem_permission.nr_mfns = nr_mfns; op.u.iomem_permission.allow_access = allow_access; return do_dom0_op(xc_handle, &op); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_elf.h --- a/tools/libxc/xc_elf.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_elf.h Tue Apr 18 09:35:40 2006 -0600 @@ -46,7 +46,7 @@ typedef uint16_t Elf64_Quarter; typedef uint16_t Elf64_Quarter; /* - * e_ident[] identification indexes + * e_ident[] identification indexes * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html */ #define EI_MAG0 0 /* file ID */ @@ -57,7 +57,7 @@ typedef uint16_t Elf64_Quarter; #define EI_DATA 5 /* data encoding */ #define EI_VERSION 6 /* ELF header version */ #define EI_OSABI 7 /* OS/ABI ID */ -#define EI_ABIVERSION 8 /* ABI version */ +#define EI_ABIVERSION 8 /* ABI version */ #define EI_PAD 9 /* start of pad bytes */ #define EI_NIDENT 16 /* Size of e_ident[] */ @@ -119,7 +119,7 @@ typedef struct elfhdr { Elf32_Half e_phnum; /* number of program header entries */ Elf32_Half e_shentsize; /* section header entry size */ Elf32_Half e_shnum; /* number of section header entries */ - Elf32_Half e_shstrndx; /* section header table's "section + Elf32_Half e_shstrndx; /* section header table's "section header string table" entry offset */ } Elf32_Ehdr; @@ -160,7 +160,7 @@ typedef struct { #define EM_486 6 /* Intel 80486 - unused? */ #define EM_860 7 /* Intel 80860 */ #define EM_MIPS 8 /* MIPS R3000 Big-Endian only */ -/* +/* * Don't know if EM_MIPS_RS4_BE, * EM_SPARC64, EM_PARISC, * or EM_PPC are ABI compliant @@ -441,7 +441,7 @@ typedef struct { #define DT_NUM 25 /* Number used. */ #define DT_LOPROC 0x70000000 /* reserved range for processor */ #define DT_HIPROC 0x7fffffff /* specific dynamic array tags */ - + /* Standard ELF hashing function */ unsigned int elf_hash(const unsigned char *name); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_evtchn.c --- a/tools/libxc/xc_evtchn.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_evtchn.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_evtchn.c - * + * * API for manipulating and accessing inter-domain event channels. - * + * * Copyright (c) 2004, K A Fraser. */ @@ -44,7 +44,7 @@ int xc_evtchn_alloc_unbound(int xc_handl if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) rc = op.u.alloc_unbound.port; - + return rc; } @@ -62,6 +62,6 @@ int xc_evtchn_status(int xc_handle, if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) memcpy(status, &op.u.status, sizeof(*status)); - + return rc; } diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_ia64_stubs.c Tue Apr 18 09:35:40 2006 -0600 @@ -22,7 +22,7 @@ unsigned long xc_ia64_fpsr_default(void) return FPSR_DEFAULT; } -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, int (*suspend)(int domid)) { @@ -53,11 +53,11 @@ xc_plan9_build(int xc_handle, VMM don't handle this now. This method will touch guest buffer to make sure the buffer's mapping is tracked by VMM, - */ + */ int xc_ia64_get_pfn_list(int xc_handle, - uint32_t domid, - unsigned long *pfn_buf, + uint32_t domid, + unsigned long *pfn_buf, unsigned int start_page, unsigned int nr_pages) { @@ -78,16 +78,16 @@ int xc_ia64_get_pfn_list(int xc_handle, op.u.getmemlist.buffer = __pfn_buf; if ( (max_pfns != -1UL) - && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 ) + && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 ) { PERROR("Could not lock pfn list buffer"); return -1; - } + } ret = do_dom0_op(xc_handle, &op); if (max_pfns != -1UL) - (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long)); + (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long)); if (max_pfns == -1UL) return 0; @@ -97,12 +97,12 @@ int xc_ia64_get_pfn_list(int xc_handle, __nr_pages -= num_pfns; __pfn_buf += num_pfns; - if (ret < 0) - // dummy write to make sure this tlb mapping is tracked by VMM + if (ret < 0) + // dummy write to make sure this tlb mapping is tracked by VMM *__pfn_buf = 0; - else - return nr_pages; - } + else + return nr_pages; + } return nr_pages; } @@ -111,7 +111,7 @@ long xc_get_max_pages(int xc_handle, uin dom0_op_t op; op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - return (do_dom0_op(xc_handle, &op) < 0) ? + return (do_dom0_op(xc_handle, &op) < 0) ? -1 : op.u.getdomaininfo.max_pages; } @@ -119,7 +119,7 @@ int xc_ia64_copy_to_domain_pages(int xc_ void* src_page, unsigned long dst_pfn, int nr_pages) { // N.B. gva should be page aligned - + unsigned long *page_array = NULL; int i; @@ -134,22 +134,22 @@ int xc_ia64_copy_to_domain_pages(int xc_ } for ( i=0; i< nr_pages; i++ ){ - if (xc_copy_to_domain_page(xc_handle, domid, page_array[i], - src_page + (i << PAGE_SHIFT))) - goto error_out; + if (xc_copy_to_domain_page(xc_handle, domid, page_array[i], + src_page + (i << PAGE_SHIFT))) + goto error_out; } free(page_array); return 0; - + error_out: free(page_array); return -1; } -#define HOB_SIGNATURE 0x3436474953424f48 // "HOBSIG64" -#define GFW_HOB_START ((4UL<<30)-(14UL<<20)) //4G -14M -#define GFW_HOB_SIZE (1UL<<20) //1M +#define HOB_SIGNATURE 0x3436474953424f48 // "HOBSIG64" +#define GFW_HOB_START ((4UL<<30)-(14UL<<20)) // 4G - 14M +#define GFW_HOB_SIZE (1UL<<20) // 1M #define RAW_GFW_START_NR(s) ((s) >> PAGE_SHIFT) #define RAW_GFW_HOB_START_NR(s) \ (RAW_GFW_START_NR(s) + ((GFW_HOB_START - GFW_START) >> PAGE_SHIFT)) @@ -170,7 +170,7 @@ typedef struct { * INFO HOB is the first data data in one HOB list * it contains the control information of the HOB list */ -typedef struct { +typedef struct { HOB_GENERIC_HEADER header; unsigned long length; // current length of hob unsigned long cur_pos; // current poisiton of hob @@ -251,7 +251,7 @@ hob_init( void *buffer ,unsigned long b // buffer too small return -1; } - + phit = (HOB_INFO*)buffer; phit->header.signature = HOB_SIGNATURE; phit->header.type = HOB_TYPE_INFO; @@ -259,7 +259,7 @@ hob_init( void *buffer ,unsigned long b phit->length = sizeof(HOB_INFO) + sizeof(HOB_GENERIC_HEADER); phit->cur_pos = 0; phit->buf_size = buf_size; - + terminal = (HOB_GENERIC_HEADER*) (buffer + sizeof(HOB_INFO)); terminal->signature= HOB_SIGNATURE; terminal->type = HOB_TYPE_TERMINAL; @@ -270,7 +270,7 @@ hob_init( void *buffer ,unsigned long b /* * Add a new HOB to the HOB List. - * + * * hob_start - start address of hob buffer * type - type of the hob to be added * data - data of the hob to be added @@ -285,8 +285,8 @@ hob_add( ) { HOB_INFO *phit; - HOB_GENERIC_HEADER *newhob,*tail; - + HOB_GENERIC_HEADER *newhob,*tail; + phit = (HOB_INFO*)hob_start; if (phit->length + data_size > phit->buf_size){ @@ -294,7 +294,7 @@ hob_add( return -1; } - //append new HOB + //append new HOB newhob = (HOB_GENERIC_HEADER*) (hob_start + phit->length - sizeof(HOB_GENERIC_HEADER)); newhob->signature = HOB_SIGNATURE; @@ -302,7 +302,7 @@ hob_add( newhob->length = data_size + sizeof(HOB_GENERIC_HEADER); memcpy((void*)newhob + sizeof(HOB_GENERIC_HEADER), data, data_size); - // append terminal HOB + // append terminal HOB tail = (HOB_GENERIC_HEADER*) ( hob_start + phit->length + data_size); tail->signature = HOB_SIGNATURE; tail->type = HOB_TYPE_TERMINAL; @@ -316,9 +316,9 @@ hob_add( } int get_hob_size(void* hob_buf){ - + HOB_INFO *phit = (HOB_INFO*)hob_buf; - + if (phit->header.signature != HOB_SIGNATURE){ PERROR("xc_get_hob_size:Incorrect signature"); return -1; @@ -328,32 +328,32 @@ int get_hob_size(void* hob_buf){ int build_hob (void* hob_buf, unsigned long hob_buf_size, unsigned long dom_mem_size) -{ - //Init HOB List +{ + //Init HOB List if (hob_init (hob_buf, hob_buf_size)<0){ PERROR("buffer too small"); goto err_out; } - + if ( add_mem_hob( hob_buf,dom_mem_size) < 0){ PERROR("Add memory hob failed, buffer too small"); goto err_out; } - + if ( add_pal_hob( hob_buf ) < 0 ){ PERROR("Add PAL hob failed, buffer too small"); goto err_out; } - + return 0; err_out: - return -1; -} - -static int + return -1; +} + +static int load_hob(int xc_handle, uint32_t dom, void *hob_buf, - unsigned long dom_mem_size) + unsigned long dom_mem_size) { // hob_buf should be page aligned int hob_size; @@ -370,22 +370,22 @@ load_hob(int xc_handle, uint32_t dom, vo } nr_pages = (hob_size + PAGE_SIZE -1) >> PAGE_SHIFT; - + return xc_ia64_copy_to_domain_pages(xc_handle, dom, hob_buf, RAW_GFW_HOB_START_NR(dom_mem_size), nr_pages ); } #define MIN(x, y) ((x) < (y)) ? (x) : (y) -static int +static int add_mem_hob(void* hob_buf, unsigned long dom_mem_size){ hob_mem_t memhob; // less than 3G memhob.start = 0; memhob.size = MIN(dom_mem_size, 0xC0000000); - + if (hob_add(hob_buf, HOB_TYPE_MEM, &memhob, sizeof(memhob)) < 0){ - return -1; + return -1; } if (dom_mem_size > 0xC0000000) { @@ -409,29 +409,29 @@ unsigned char config_pal_mem_attrib[8] = }; unsigned char config_pal_cache_info[152] = { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 4, 6, 7, 255, 1, 0, 1, 0, 64, 0, 0, 12, 12, + 6, 4, 6, 7, 255, 1, 0, 1, 0, 64, 0, 0, 12, 12, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 6, 7, 0, 1, - 0, 1, 0, 64, 0, 0, 12, 12, 49, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 6, 8, 7, 7, 255, 7, 0, 11, 0, 0, 16, 0, - 12, 17, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 7, + 0, 1, 0, 64, 0, 0, 12, 12, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 8, 7, 7, 255, 7, 0, 11, 0, 0, 16, 0, + 12, 17, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 7, 7, 7, 5, 9, 11, 0, 0, 4, 0, 12, 15, 49, 0, 254, 255, - 255, 255, 255, 255, 255, 255, 2, 8, 7, 7, 7, 5, 9, - 11, 0, 0, 4, 0, 12, 15, 49, 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 2, 8, 7, 7, 7, 5, 9, + 11, 0, 0, 4, 0, 12, 15, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 12, 7, 7, 7, 14, 1, 3, 0, 0, 192, 0, 12, 20, 49, 0 }; unsigned char config_pal_cache_prot_info[200] = { - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 16, 8, 0, 76, 12, 64, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 0, 16, 4, 0, 76, 44, 68, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, - 0, 16, 8, 0, 81, 44, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 0, 16, 4, 0, 76, 44, 68, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, + 0, 16, 8, 0, 81, 44, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, - 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, + 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, 32, 0, 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 160, - 12, 0, 84, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 160, + 12, 0, 84, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_debug_info[16] = { @@ -444,37 +444,37 @@ unsigned char config_pal_freq_base[8] = 109, 219, 182, 13, 0, 0, 0, 0 }; unsigned char config_pal_freq_ratios[24] = { - 11, 1, 0, 0, 77, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, + 11, 1, 0, 0, 77, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0 }; unsigned char config_pal_halt_info[64] = { - 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_perf_mon_info[136] = { - 12, 47, 18, 8, 0, 0, 0, 0, 241, 255, 0, 0, 255, 7, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 241, 255, 0, 0, 223, 0, 255, 255, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 47, 18, 8, 0, 0, 0, 0, 241, 255, 0, 0, 255, 7, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 241, 255, 0, 0, 223, 0, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_proc_get_features[104] = { - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 64, 6, 64, 49, 0, 0, 0, 0, 64, 6, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, - 231, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 64, 6, 64, 49, 0, 0, 0, 0, 64, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, + 231, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_ptce_info[24] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_register_info[64] = { @@ -509,7 +509,7 @@ typedef struct{ typedef struct{ hob_type_t type; void* data; - unsigned long size; + unsigned long size; }hob_batch_t; hob_batch_t hob_batch[]={ @@ -588,13 +588,13 @@ hob_batch_t hob_batch[]={ { HOB_TYPE_PAL_VM_PAGE_SIZE, &config_pal_vm_page_size, sizeof(config_pal_vm_page_size) - }, + }, }; static int add_pal_hob(void* hob_buf){ int i; for (i=0; i<sizeof(hob_batch)/sizeof(hob_batch_t); i++){ - if (hob_add(hob_buf, hob_batch[i].type, + if (hob_add(hob_buf, hob_batch[i].type, hob_batch[i].data, hob_batch[i].size)<0) return -1; @@ -620,7 +620,7 @@ static int setup_guest( int xc_handle, } /* Load guest firmware */ - if( xc_ia64_copy_to_domain_pages( xc_handle, dom, + if( xc_ia64_copy_to_domain_pages( xc_handle, dom, image, RAW_GFW_IMAGE_START_NR(dom_memsize, image_size), image_size>>PAGE_SHIFT)) { PERROR("Could not load guest firmware into domain"); @@ -635,7 +635,7 @@ static int setup_guest( int xc_handle, /* Retrieve special pages like io, xenstore, etc. */ if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, - RAW_IO_PAGE_START_NR(dom_memsize), 2) != 2 ) + RAW_IO_PAGE_START_NR(dom_memsize), 2) != 2 ) { PERROR("Could not get the page frame list"); goto error_out; @@ -643,9 +643,9 @@ static int setup_guest( int xc_handle, *store_mfn = page_array[1]; if ((sp = (shared_iopage_t *) xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, - page_array[0])) == 0) - goto error_out; + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[0])) == 0) + goto error_out; memset(sp, 0, PAGE_SIZE); for (i = 0; i < vcpus; i++) { @@ -698,14 +698,14 @@ int xc_hvm_build(int xc_handle, image_size = (image_size + PAGE_SIZE - 1) & PAGE_MASK; - if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ){ + if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ){ PERROR("Unable to mlock ctxt"); return 1; } op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - if ( (do_dom0_op(xc_handle, &op) < 0) || + if ( (do_dom0_op(xc_handle, &op) < 0) || ((uint16_t)op.u.getdomaininfo.domain != domid) ) { PERROR("Could not get info on domain"); goto error_out; diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_linux_build.c Tue Apr 18 09:35:40 2006 -0600 @@ -237,7 +237,7 @@ static int setup_pg_tables(int xc_handle else { *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT; - if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && + if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) *vl1e &= ~_PAGE_RW; } @@ -314,7 +314,7 @@ static int setup_pg_tables_pae(int xc_ha else *vl2e++ = l1tab | L2_PROT; } - + if ( shadow_mode_enabled ) { *vl1e = (count << PAGE_SHIFT) | L1_PROT; @@ -323,12 +323,12 @@ static int setup_pg_tables_pae(int xc_ha { *vl1e = ((uint64_t)page_array[count] << PAGE_SHIFT) | L1_PROT; if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && - (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) + (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) *vl1e &= ~_PAGE_RW; } vl1e++; } - + munmap(vl1tab, PAGE_SIZE); munmap(vl2tab, PAGE_SIZE); munmap(vl3tab, PAGE_SIZE); @@ -376,13 +376,13 @@ static int setup_pg_tables_64(int xc_han ctxt->ctrlreg[3] = pl4tab; else ctxt->ctrlreg[3] = l4tab; - + for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++) { if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) ) { alloc_pt(l1tab, vl1tab, pl1tab); - + if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) ) { alloc_pt(l2tab, vl2tab, pl2tab); @@ -410,7 +410,7 @@ static int setup_pg_tables_64(int xc_han *vl2e = l1tab | L2_PROT; vl2e++; } - + if ( shadow_mode_enabled ) { *vl1e = (count << PAGE_SHIFT) | L1_PROT; @@ -419,14 +419,14 @@ static int setup_pg_tables_64(int xc_han { *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT; if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && - (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) + (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) { *vl1e &= ~_PAGE_RW; } } vl1e++; } - + munmap(vl1tab, PAGE_SIZE); munmap(vl2tab, PAGE_SIZE); munmap(vl3tab, PAGE_SIZE); @@ -509,7 +509,7 @@ static int setup_guest(int xc_handle, " Loaded kernel: %p->%p\n" " Init. ramdisk: %p->%p\n" " TOTAL: %p->%p\n", - _p(dsi.v_kernstart), _p(dsi.v_kernend), + _p(dsi.v_kernstart), _p(dsi.v_kernend), _p(vinitrd_start), _p(vinitrd_end), _p(dsi.v_start), _p(v_end)); printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry)); @@ -696,10 +696,10 @@ static int setup_guest(int xc_handle, required_features); /* - * Why do we need this? The number of page-table frames depends on the - * size of the bootstrap address space. But the size of the address space - * depends on the number of page-table frames (since each one is mapped - * read-only). We have a pair of simultaneous equations in two unknowns, + * Why do we need this? The number of page-table frames depends on the + * size of the bootstrap address space. But the size of the address space + * depends on the number of page-table frames (since each one is mapped + * read-only). We have a pair of simultaneous equations in two unknowns, * which we solve by exhaustive search. */ v_end = round_pgup(dsi.v_end); @@ -731,13 +731,13 @@ static int setup_guest(int xc_handle, if ( dsi.pae_kernel ) { /* FIXME: assumes one L2 pgtable @ 0xc0000000 */ - if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> + if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages ) break; } else { - if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> + if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) break; } @@ -873,7 +873,7 @@ static int setup_guest(int xc_handle, count) ) { fprintf(stderr,"m2p update failure p=%lx m=%lx\n", - count, page_array[count]); + count, page_array[count]); munmap(physmap, PAGE_SIZE); goto error_out; } @@ -982,7 +982,7 @@ static int setup_guest(int xc_handle, start_info->mod_len = initrd->len; } if ( cmdline != NULL ) - { + { strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE); start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0'; } @@ -1073,14 +1073,14 @@ static int xc_linux_build_internal(int x #endif if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ) - { + { PERROR("%s: ctxt mlock failed", __func__); return 1; } op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - if ( (xc_dom0_op(xc_handle, &op) < 0) || + if ( (xc_dom0_op(xc_handle, &op) < 0) || ((uint16_t)op.u.getdomaininfo.domain != domid) ) { PERROR("Could not get info on domain"); @@ -1089,9 +1089,9 @@ static int xc_linux_build_internal(int x memset(ctxt, 0, sizeof(*ctxt)); - if ( setup_guest(xc_handle, domid, image, image_size, + if ( setup_guest(xc_handle, domid, image, image_size, initrd, - nr_pages, + nr_pages, &vstartinfo_start, &vkern_entry, &vstack_start, ctxt, cmdline, op.u.getdomaininfo.shared_info_frame, @@ -1152,7 +1152,7 @@ static int xc_linux_build_internal(int x /* No LDT. */ ctxt->ldt_ents = 0; - + /* Use the default Xen-provided GDT. */ ctxt->gdt_ents = 0; @@ -1184,7 +1184,7 @@ static int xc_linux_build_internal(int x launch_op.cmd = DOM0_SETVCPUCONTEXT; rc = xc_dom0_op(xc_handle, &launch_op); - + return rc; error_out: diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_linux_restore.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_linux_restore.c - * + * * Restore the state of a Linux session. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -13,13 +13,13 @@ #include "xg_save_restore.h" /* max mfn of the whole machine */ -static unsigned long max_mfn; +static unsigned long max_mfn; /* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; +static unsigned long hvirt_start; /* #levels of page tables used by the currrent guest */ -static unsigned int pt_levels; +static unsigned int pt_levels; /* total number of pages used by the current guest */ static unsigned long max_pfn; @@ -41,84 +41,84 @@ read_exact(int fd, void *buf, size_t cou s = read(fd, &b[r], count - r); if ((s == -1) && (errno == EINTR)) continue; - if (s <= 0) { + if (s <= 0) { break; - } + } r += s; } - return (r == count) ? 1 : 0; + return (r == count) ? 1 : 0; } /* -** In the state file (or during transfer), all page-table pages are -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. -** This function inverts that operation, replacing the pfn values with -** the (now known) appropriate mfn values. +** In the state file (or during transfer), all page-table pages are +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. +** This function inverts that operation, replacing the pfn values with +** the (now known) appropriate mfn values. */ -int uncanonicalize_pagetable(unsigned long type, void *page) -{ - int i, pte_last; - unsigned long pfn; - uint64_t pte; - - pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); +int uncanonicalize_pagetable(unsigned long type, void *page) +{ + int i, pte_last; + unsigned long pfn; + uint64_t pte; + + pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); /* Now iterate through the page table, uncanonicalizing each PTE */ - for(i = 0; i < pte_last; i++) { - - if(pt_levels == 2) - pte = ((uint32_t *)page)[i]; - else - pte = ((uint64_t *)page)[i]; - - if(pte & _PAGE_PRESENT) { + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + if(pte & _PAGE_PRESENT) { pfn = (pte >> PAGE_SHIFT) & 0xffffffff; - - if(pfn >= max_pfn) { + + if(pfn >= max_pfn) { /* This "page table page" is probably not one; bail. */ ERR("Frame number in type %lu page table is out of range: " - "i=%d pfn=0x%lx max_pfn=%lu", + "i=%d pfn=0x%lx max_pfn=%lu", type >> 28, i, pfn, max_pfn); - return 0; - } - - + return 0; + } + + pte &= 0xffffff0000000fffULL; pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; - if(pt_levels == 2) - ((uint32_t *)page)[i] = (uint32_t)pte; - else - ((uint64_t *)page)[i] = (uint64_t)pte; - - - - } - } - - return 1; + if(pt_levels == 2) + ((uint32_t *)page)[i] = (uint32_t)pte; + else + ((uint64_t *)page)[i] = (uint64_t)pte; + + + + } + } + + return 1; } -int xc_linux_restore(int xc_handle, int io_fd, - uint32_t dom, unsigned long nr_pfns, +int xc_linux_restore(int xc_handle, int io_fd, + uint32_t dom, unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOM0_OP; int rc = 1, i, n; - unsigned long mfn, pfn; + unsigned long mfn, pfn; unsigned int prev_pc, this_pc; int verify = 0; - int nraces = 0; + int nraces = 0; /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ shared_info_t *shared_info = (shared_info_t *)shared_info_page; - + /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -135,7 +135,7 @@ int xc_linux_restore(int xc_handle, int unsigned long *page = NULL; /* A copy of the pfn-to-mfn table frame list. */ - unsigned long *p2m_frame_list = NULL; + unsigned long *p2m_frame_list = NULL; /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; @@ -148,17 +148,17 @@ int xc_linux_restore(int xc_handle, int unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; struct mmuext_op pin[MAX_PIN_BATCH]; - unsigned int nr_pins; - - - max_pfn = nr_pfns; + unsigned int nr_pins; + + + max_pfn = nr_pfns; DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn); - if(!get_platform_info(xc_handle, dom, + if(!get_platform_info(xc_handle, dom, &max_mfn, &hvirt_start, &pt_levels)) { - ERR("Unable to get platform info."); + ERR("Unable to get platform info."); return 1; } @@ -171,20 +171,20 @@ int xc_linux_restore(int xc_handle, int /* Read the saved P2M frame list */ - if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { ERR("Couldn't allocate p2m_frame_list array"); goto out; } - - if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + + if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { ERR("read p2m_frame_list failed"); goto out; } - + /* We want zeroed memory so use calloc rather than malloc. */ - p2m = calloc(sizeof(unsigned long), max_pfn); - pfn_type = calloc(sizeof(unsigned long), max_pfn); + p2m = calloc(sizeof(unsigned long), max_pfn); + pfn_type = calloc(sizeof(unsigned long), max_pfn); region_mfn = calloc(sizeof(unsigned long), MAX_BATCH_SIZE); if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { @@ -192,7 +192,7 @@ int xc_linux_restore(int xc_handle, int errno = ENOMEM; goto out; } - + if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) { ERR("Could not mlock region_mfn"); goto out; @@ -207,27 +207,27 @@ int xc_linux_restore(int xc_handle, int } shared_info_frame = op.u.getdomaininfo.shared_info_frame; - if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { + if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { errno = ENOMEM; goto out; } - + if(xc_domain_memory_increase_reservation( - xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { + xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { ERR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn)); errno = ENOMEM; goto out; } - DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); + DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */ if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) { ERR("Did not read correct number of frame numbers for new dom"); goto out; } - - if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { + + if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { ERR("Could not initialise for MMU updates"); goto out; } @@ -242,7 +242,7 @@ int xc_linux_restore(int xc_handle, int prev_pc = 0; n = 0; - while (1) { + while (1) { int j; @@ -253,13 +253,13 @@ int xc_linux_restore(int xc_handle, int prev_pc = this_pc; } - if (!read_exact(io_fd, &j, sizeof(int))) { + if (!read_exact(io_fd, &j, sizeof(int))) { ERR("Error when reading batch size"); goto out; } PPRINTF("batch %d\n",j); - + if (j == -1) { verify = 1; fprintf(stderr, "Entering page verify mode\n"); @@ -269,27 +269,27 @@ int xc_linux_restore(int xc_handle, int if (j == 0) break; /* our work here is done */ - if (j > MAX_BATCH_SIZE) { + if (j > MAX_BATCH_SIZE) { ERR("Max batch size exceeded. Giving up."); goto out; } - - if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { + + if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { ERR("Error when reading region pfn types"); goto out; } - for (i = 0; i < j; i++) { + for (i = 0; i < j; i++) { if ((region_pfn_type[i] & LTAB_MASK) == XTAB) region_mfn[i] = 0; /* we know map will fail, but don't care */ - else - region_mfn[i] = p2m[region_pfn_type[i] & ~LTAB_MASK]; - - } - + else + region_mfn[i] = p2m[region_pfn_type[i] & ~LTAB_MASK]; + + } + if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_WRITE, region_mfn, j))) { + xc_handle, dom, PROT_WRITE, region_mfn, j))) { ERR("map batch failed"); goto out; } @@ -297,12 +297,12 @@ int xc_linux_restore(int xc_handle, int for ( i = 0; i < j; i++ ) { void *page; - unsigned long pagetype; + unsigned long pagetype; pfn = region_pfn_type[i] & ~LTAB_MASK; - pagetype = region_pfn_type[i] & LTAB_MASK; - - if (pagetype == XTAB) + pagetype = region_pfn_type[i] & LTAB_MASK; + + if (pagetype == XTAB) /* a bogus/unmapped page: skip it */ continue; @@ -311,72 +311,72 @@ int xc_linux_restore(int xc_handle, int goto out; } - pfn_type[pfn] = pagetype; + pfn_type[pfn] = pagetype; mfn = p2m[pfn]; /* In verify mode, we use a copy; otherwise we work in place */ - page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); - - if (!read_exact(io_fd, page, PAGE_SIZE)) { + page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); + + if (!read_exact(io_fd, page, PAGE_SIZE)) { ERR("Error when reading page (type was %lx)", pagetype); goto out; } - pagetype &= LTABTYPE_MASK; - - if(pagetype >= L1TAB && pagetype <= L4TAB) { - - /* - ** A page table page - need to 'uncanonicalize' it, i.e. - ** replace all the references to pfns with the corresponding - ** mfns for the new domain. - ** - ** On PAE we need to ensure that PGDs are in MFNs < 4G, and - ** so we may need to update the p2m after the main loop. - ** Hence we defer canonicalization of L1s until then. + pagetype &= LTABTYPE_MASK; + + if(pagetype >= L1TAB && pagetype <= L4TAB) { + + /* + ** A page table page - need to 'uncanonicalize' it, i.e. + ** replace all the references to pfns with the corresponding + ** mfns for the new domain. + ** + ** On PAE we need to ensure that PGDs are in MFNs < 4G, and + ** so we may need to update the p2m after the main loop. + ** Hence we defer canonicalization of L1s until then. */ - if(pt_levels != 3 || pagetype != L1TAB) { + if(pt_levels != 3 || pagetype != L1TAB) { if(!uncanonicalize_pagetable(pagetype, page)) { - /* + /* ** Failing to uncanonicalize a page table can be ok ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). + ** changed by now (and we'll get an update later). */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; } - } - - } else if(pagetype != NOTAB) { + } + + } else if(pagetype != NOTAB) { ERR("Bogus page type %lx page table is out of range: " "i=%d max_pfn=%lu", pagetype, i, max_pfn); goto out; - } + } if (verify) { int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); - if (res) { + if (res) { int v; DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " - "actualcs=%08lx\n", pfn, pfn_type[pfn], - csum_page(region_base + i*PAGE_SIZE), + "actualcs=%08lx\n", pfn, pfn_type[pfn], + csum_page(region_base + i*PAGE_SIZE), csum_page(buf)); for (v = 0; v < 4; v++) { - - unsigned long *p = (unsigned long *) + + unsigned long *p = (unsigned long *) (region_base + i*PAGE_SIZE); if (buf[v] != p[v]) DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); @@ -384,8 +384,8 @@ int xc_linux_restore(int xc_handle, int } } - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)mfn) << PAGE_SHIFT) + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)mfn) << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn)) { ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); goto out; @@ -398,149 +398,149 @@ int xc_linux_restore(int xc_handle, int DPRINTF("Received all pages (%d races)\n", nraces); - if(pt_levels == 3) { - - /* - ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This + if(pt_levels == 3) { + + /* + ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This ** is a little awkward and involves (a) finding all such PGDs and - ** replacing them with 'lowmem' versions; (b) upating the p2m[] + ** replacing them with 'lowmem' versions; (b) upating the p2m[] ** with the new info; and (c) canonicalizing all the L1s using the - ** (potentially updated) p2m[]. - ** + ** (potentially updated) p2m[]. + ** ** This is relatively slow (and currently involves two passes through ** the pfn_type[] array), but at least seems to be correct. May wish - ** to consider more complex approaches to optimize this later. + ** to consider more complex approaches to optimize this later. */ - int j, k; + int j, k; /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ for (i = 0; i < max_pfn; i++) { - + if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) { - unsigned long new_mfn; - uint64_t l3ptes[4]; - uint64_t *l3tab; + unsigned long new_mfn; + uint64_t l3ptes[4]; + uint64_t *l3tab; l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, p2m[i]); - - for(j = 0; j < 4; j++) - l3ptes[j] = l3tab[j]; - - munmap(l3tab, PAGE_SIZE); + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, p2m[i]); + + for(j = 0; j < 4; j++) + l3ptes[j] = l3tab[j]; + + munmap(l3tab, PAGE_SIZE); if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { ERR("Couldn't get a page below 4GB :-("); goto out; } - + p2m[i] = new_mfn; - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)new_mfn) - << PAGE_SHIFT) | + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)new_mfn) + << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i)) { ERR("Couldn't m2p on PAE root pgdir"); goto out; } - + l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, p2m[i]); - - for(j = 0; j < 4; j++) - l3tab[j] = l3ptes[j]; - - munmap(l3tab, PAGE_SIZE); - + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, p2m[i]); + + for(j = 0; j < 4; j++) + l3tab[j] = l3ptes[j]; + + munmap(l3tab, PAGE_SIZE); + } } /* Second pass: find all L1TABs and uncanonicalize them */ - j = 0; - - for(i = 0; i < max_pfn; i++) { - - if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { - region_mfn[j] = p2m[i]; - j++; - } - - if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { + j = 0; + + for(i = 0; i < max_pfn; i++) { + + if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { + region_mfn[j] = p2m[i]; + j++; + } + + if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ | PROT_WRITE, - region_mfn, j))) { + xc_handle, dom, PROT_READ | PROT_WRITE, + region_mfn, j))) { ERR("map batch failed"); goto out; } for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(L1TAB, + if(!uncanonicalize_pagetable(L1TAB, region_base + k*PAGE_SIZE)) { - ERR("failed uncanonicalize pt!"); - goto out; - } + ERR("failed uncanonicalize pt!"); + goto out; + } } - - munmap(region_base, j*PAGE_SIZE); - j = 0; - } - } - - } - - - if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERR("Error doing finish_mmu_updates()"); - goto out; - } + + munmap(region_base, j*PAGE_SIZE); + j = 0; + } + } + + } + + + if (xc_finish_mmu_updates(xc_handle, mmu)) { + ERR("Error doing finish_mmu_updates()"); + goto out; + } /* * Pin page tables. Do this after writing to them as otherwise Xen * will barf when doing the type-checking. */ - nr_pins = 0; + nr_pins = 0; for (i = 0; i < max_pfn; i++) { if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) { - if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { - ERR("Failed to pin batch of %d page tables", nr_pins); + if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { + ERR("Failed to pin batch of %d page tables", nr_pins); goto out; - } + } nr_pins = 0; } if ( (pfn_type[i] & LPINTAB) == 0 ) continue; - switch(pfn_type[i]) { - - case (L1TAB|LPINTAB): + switch(pfn_type[i]) { + + case (L1TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; - break; - - case (L2TAB|LPINTAB): + break; + + case (L2TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; - break; - - case (L3TAB|LPINTAB): + break; + + case (L3TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; - break; + break; case (L4TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; - break; - - default: - continue; + break; + + default: + continue; } pin[nr_pins].arg1.mfn = p2m[i]; - nr_pins++; + nr_pins++; } @@ -553,17 +553,17 @@ int xc_linux_restore(int xc_handle, int unsigned long *pfntab; int rc; - if (!read_exact(io_fd, &count, sizeof(count))) { + if (!read_exact(io_fd, &count, sizeof(count))) { ERR("Error when reading pfn count"); goto out; } - if(!(pfntab = malloc(sizeof(unsigned long) * count))) { + if(!(pfntab = malloc(sizeof(unsigned long) * count))) { ERR("Out of memory"); goto out; } - - if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { + + if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { ERR("Error when reading pfntab"); goto out; } @@ -572,14 +572,14 @@ int xc_linux_restore(int xc_handle, int unsigned long pfn = pfntab[i]; - if(pfn > max_pfn) + if(pfn > max_pfn) /* shouldn't happen - continue optimistically */ - continue; - - pfntab[i] = p2m[pfn]; - p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map - } - + continue; + + pfntab[i] = p2m[pfn]; + p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map + } + if (count > 0) { struct xen_memory_reservation reservation = { @@ -590,16 +590,16 @@ int xc_linux_restore(int xc_handle, int }; if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation)) != count) { + &reservation)) != count) { ERR("Could not decrease reservation : %d", rc); goto out; } else DPRINTF("Decreased reservation by %d pages\n", count); - } - } - - if (!read_exact(io_fd, &ctxt, sizeof(ctxt)) || - !read_exact(io_fd, shared_info_page, PAGE_SIZE)) { + } + } + + if (!read_exact(io_fd, &ctxt, sizeof(ctxt)) || + !read_exact(io_fd, shared_info_page, PAGE_SIZE)) { ERR("Error when reading ctxt or shared info page"); goto out; } @@ -642,15 +642,15 @@ int xc_linux_restore(int xc_handle, int if (pfn >= max_pfn) { ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", - pfn, max_pfn, pfn_type[pfn]); - goto out; - } - - if ( (pfn_type[pfn] & LTABTYPE_MASK) != + pfn, max_pfn, pfn_type[pfn]); + goto out; + } + + if ( (pfn_type[pfn] & LTABTYPE_MASK) != ((unsigned long)pt_levels<<LTAB_SHIFT) ) { ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, max_pfn, pfn_type[pfn], - (unsigned long)pt_levels<<LTAB_SHIFT); + pfn, max_pfn, pfn_type[pfn], + (unsigned long)pt_levels<<LTAB_SHIFT); goto out; } @@ -667,7 +667,7 @@ int xc_linux_restore(int xc_handle, int xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); memcpy(page, shared_info, sizeof(shared_info_t)); munmap(page, PAGE_SIZE); - + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ for (i = 0; i < P2M_FL_ENTRIES; i++) { pfn = p2m_frame_list[i]; @@ -678,16 +678,16 @@ int xc_linux_restore(int xc_handle, int p2m_frame_list[i] = p2m[pfn]; } - + /* Copy the P2M we've constructed to the 'live' P2M */ - if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, + if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, p2m_frame_list, P2M_FL_ENTRIES))) { ERR("Couldn't map p2m table"); goto out; } - memcpy(live_p2m, p2m, P2M_SIZE); - munmap(live_p2m, P2M_SIZE); + memcpy(live_p2m, p2m, P2M_SIZE); + munmap(live_p2m, P2M_SIZE); /* * Safety checking of saved context: diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_linux_save.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_linux_save.c - * + * * Save the state of a running Linux session. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -17,23 +17,23 @@ /* ** Default values for important tuning parameters. Can override by passing -** non-zero replacement values to xc_linux_save(). +** non-zero replacement values to xc_linux_save(). ** -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. -** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** */ -#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ #define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ /* max mfn of the whole machine */ -static unsigned long max_mfn; +static unsigned long max_mfn; /* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; +static unsigned long hvirt_start; /* #levels of page tables used by the currrent guest */ -static unsigned int pt_levels; +static unsigned int pt_levels; /* total number of pages used by the current guest */ static unsigned long max_pfn; @@ -56,8 +56,8 @@ static unsigned long *live_m2p = NULL; (((_mfn) < (max_mfn)) && \ ((mfn_to_pfn(_mfn) < (max_pfn)) && \ (live_p2m[mfn_to_pfn(_mfn)] == (_mfn)))) - - + + /* Returns TRUE if MFN is successfully converted to a PFN. */ #define translate_mfn_to_pfn(_pmfn) \ ({ \ @@ -70,12 +70,12 @@ static unsigned long *live_m2p = NULL; _res; \ }) -/* -** During (live) save/migrate, we maintain a number of bitmaps to track -** which pages we have to send, to fixup, and to skip. +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, to fixup, and to skip. */ -#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITS_PER_LONG (sizeof(unsigned long) * 8) #define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) #define BITMAP_ENTRY(_nr,_bmap) \ @@ -85,17 +85,17 @@ static unsigned long *live_m2p = NULL; static inline int test_bit (int nr, volatile void * addr) { - return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; } static inline void clear_bit (int nr, volatile void * addr) { - BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr)); + BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr)); } static inline void set_bit ( int nr, volatile void * addr) { - BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr)); + BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr)); } /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ @@ -122,7 +122,7 @@ static inline int permute( int i, int nr { /* Need a simple permutation function so that we scan pages in a pseudo random order, enabling us to get a better estimate of - the domain's page dirtying rate as we go (there are often + the domain's page dirtying rate as we go (there are often contiguous ranges of pfns that have similar behaviour, and we want to mix them up. */ @@ -130,21 +130,21 @@ static inline int permute( int i, int nr /* 512MB domain, 128k pages, order 17 */ /* - QPONMLKJIHGFEDCBA - QPONMLKJIH - GFEDCBA + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA */ - + /* - QPONMLKJIHGFEDCBA - EDCBA + QPONMLKJIHGFEDCBA + EDCBA QPONM LKJIHGF */ do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ - + return i; } @@ -165,7 +165,7 @@ static uint64_t llgettimeofday(void) static uint64_t tv_delta(struct timeval *new, struct timeval *old) { - return ((new->tv_sec - old->tv_sec)*1000000 ) + + return ((new->tv_sec - old->tv_sec)*1000000 ) + (new->tv_usec - old->tv_usec); } @@ -175,7 +175,7 @@ static uint64_t tv_delta(struct timeval /* ** We control the rate at which we transmit (or save) to minimize impact -** on running domains (including the target if we're doing live migrate). +** on running domains (including the target if we're doing live migrate). */ #define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */ @@ -193,10 +193,10 @@ static int mbit_rate, ombit_rate = 0; static int mbit_rate, ombit_rate = 0; /* Have we reached the maximum transmission rate? */ -#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) - - -static inline void initialize_mbit_rate() +#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) + + +static inline void initialize_mbit_rate() { mbit_rate = START_MBIT_RATE; } @@ -213,7 +213,7 @@ static int ratewrite(int io_fd, void *bu if (START_MBIT_RATE == 0) return write(io_fd, buf, n); - + budget -= n; if (budget < 0) { if (mbit_rate != ombit_rate) { @@ -253,46 +253,46 @@ static int ratewrite(int io_fd, void *bu #else /* ! ADAPTIVE SAVE */ -#define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) -#define initialize_mbit_rate() +#define RATE_IS_MAX() (0) +#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) +#define initialize_mbit_rate() #endif static inline ssize_t write_exact(int fd, void *buf, size_t count) { - if(write(fd, buf, count) != count) - return 0; - return 1; -} - - - -static int print_stats(int xc_handle, uint32_t domid, int pages_sent, + if(write(fd, buf, count) != count) + return 0; + return 1; +} + + + +static int print_stats(int xc_handle, uint32_t domid, int pages_sent, xc_shadow_control_stats_t *stats, int print) { static struct timeval wall_last; static long long d0_cpu_last; static long long d1_cpu_last; - + struct timeval wall_now; long long wall_delta; long long d0_cpu_now, d0_cpu_delta; long long d1_cpu_now, d1_cpu_delta; - + gettimeofday(&wall_now, NULL); - + d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; - if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) fprintf(stderr, "ARRHHH!!\n"); - + wall_delta = tv_delta(&wall_now,&wall_last)/1000; - + if (wall_delta == 0) wall_delta = 1; - + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; @@ -300,14 +300,14 @@ static int print_stats(int xc_handle, ui fprintf(stderr, "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " "dirtied %dMb/s %" PRId32 " pages\n", - wall_delta, + wall_delta, (int)((d0_cpu_delta*100)/wall_delta), (int)((d1_cpu_delta*100)/wall_delta), (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), stats->dirty_count); -#ifdef ADAPTIVE_SAVE +#ifdef ADAPTIVE_SAVE if (((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate) { mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) + 50; @@ -315,16 +315,16 @@ static int print_stats(int xc_handle, ui mbit_rate = MAX_MBIT_RATE; } #endif - + d0_cpu_last = d0_cpu_now; d1_cpu_last = d1_cpu_now; - wall_last = wall_now; + wall_last = wall_now; return 0; } -static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, +static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, unsigned long *arr, int runs) { long long start, now; @@ -335,24 +335,24 @@ static int analysis_phase(int xc_handle, for (j = 0; j < runs; j++) { int i; - + xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_CLEAN, arr, max_pfn, NULL); fprintf(stderr, "#Flush\n"); - for ( i = 0; i < 40; i++ ) { - usleep(50000); + for ( i = 0; i < 40; i++ ) { + usleep(50000); now = llgettimeofday(); xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_PEEK, NULL, 0, &stats); - + fprintf(stderr, "now= %lld faults= %" PRId32 " dirty= %" PRId32 - " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", - ((now-start)+500)/1000, + " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", + ((now-start)+500)/1000, stats.fault_count, stats.dirty_count, stats.dirty_net_count, stats.dirty_block_count); } } - + return -1; } @@ -375,7 +375,7 @@ static int suspend_and_state(int (*suspe return -1; } - if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) + if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) ERR("Could not get vcpu context"); @@ -383,22 +383,22 @@ static int suspend_and_state(int (*suspe return 0; // success if (info->paused) { - // try unpausing domain, wait, and retest + // try unpausing domain, wait, and retest xc_domain_unpause( xc_handle, dom ); - + ERR("Domain was paused. Wait and re-test."); usleep(10000); // 10ms - + goto retry; } if( ++i < 100 ) { ERR("Retry suspend domain."); - usleep(10000); // 10ms + usleep(10000); // 10ms goto retry; } - + ERR("Unable to suspend domain."); return -1; @@ -406,173 +406,173 @@ static int suspend_and_state(int (*suspe /* -** During transfer (or in the state file), all page-table pages must be -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. +** During transfer (or in the state file), all page-table pages must be +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. ** -** This function performs the appropriate conversion, taking into account -** which entries do not require canonicalization (in particular, those -** entries which map the virtual address reserved for the hypervisor). +** This function performs the appropriate conversion, taking into account +** which entries do not require canonicalization (in particular, those +** entries which map the virtual address reserved for the hypervisor). */ -void canonicalize_pagetable(unsigned long type, unsigned long pfn, - const void *spage, void *dpage) -{ - +void canonicalize_pagetable(unsigned long type, unsigned long pfn, + const void *spage, void *dpage) +{ + int i, pte_last, xen_start, xen_end; uint64_t pte; - /* + /* ** We need to determine which entries in this page table hold ** reserved hypervisor mappings. This depends on the current - ** page table type as well as the number of paging levels. + ** page table type as well as the number of paging levels. */ - xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - + xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); + if (pt_levels == 2 && type == L2TAB) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); - - if (pt_levels == 3 && type == L3TAB) - xen_start = L3_PAGETABLE_ENTRIES_PAE; - - /* - ** in PAE only the L2 mapping the top 1GB contains Xen mappings. + xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); + + if (pt_levels == 3 && type == L3TAB) + xen_start = L3_PAGETABLE_ENTRIES_PAE; + + /* + ** in PAE only the L2 mapping the top 1GB contains Xen mappings. ** We can spot this by looking for the guest linear mapping which - ** Xen always ensures is present in that L2. Guests must ensure - ** that this check will fail for other L2s. + ** Xen always ensures is present in that L2. Guests must ensure + ** that this check will fail for other L2s. */ if (pt_levels == 3 && type == L2TAB) { /* XXX index of the L2 entry in PAE mode which holds the guest LPT */ -#define PAE_GLPT_L2ENTRY (495) - pte = ((uint64_t*)spage)[PAE_GLPT_L2ENTRY]; +#define PAE_GLPT_L2ENTRY (495) + pte = ((uint64_t*)spage)[PAE_GLPT_L2ENTRY]; if(((pte >> PAGE_SHIFT) & 0x0fffffff) == live_p2m[pfn]) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; - } - - if (pt_levels == 4 && type == L4TAB) { + xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + } + + if (pt_levels == 4 && type == L4TAB) { /* - ** XXX SMH: should compute these from hvirt_start (which we have) - ** and hvirt_end (which we don't) + ** XXX SMH: should compute these from hvirt_start (which we have) + ** and hvirt_end (which we don't) */ - xen_start = 256; - xen_end = 272; + xen_start = 256; + xen_end = 272; } /* Now iterate through the page table, canonicalizing each PTE */ for (i = 0; i < pte_last; i++ ) { - unsigned long pfn, mfn; - + unsigned long pfn, mfn; + if (pt_levels == 2) pte = ((uint32_t*)spage)[i]; else pte = ((uint64_t*)spage)[i]; - + if (i >= xen_start && i < xen_end) pte = 0; - + if (pte & _PAGE_PRESENT) { - - mfn = (pte >> PAGE_SHIFT) & 0xfffffff; + + mfn = (pte >> PAGE_SHIFT) & 0xfffffff; if (!MFN_IS_IN_PSEUDOPHYS_MAP(mfn)) { - /* This will happen if the type info is stale which + /* This will happen if the type info is stale which is quite feasible under live migration */ DPRINTF("PT Race: [%08lx,%d] pte=%llx, mfn=%08lx\n", - type, i, (unsigned long long)pte, mfn); + type, i, (unsigned long long)pte, mfn); pfn = 0; /* zap it - we'll retransmit this page later */ - } else + } else pfn = mfn_to_pfn(mfn); - + pte &= 0xffffff0000000fffULL; pte |= (uint64_t)pfn << PAGE_SHIFT; } - + if (pt_levels == 2) ((uint32_t*)dpage)[i] = pte; else - ((uint64_t*)dpage)[i] = pte; - - } - - return; -} - - - -static unsigned long *xc_map_m2p(int xc_handle, - unsigned long max_mfn, - int prot) -{ + ((uint64_t*)dpage)[i] = pte; + + } + + return; +} + + + +static unsigned long *xc_map_m2p(int xc_handle, + unsigned long max_mfn, + int prot) +{ struct xen_machphys_mfn_list xmml; - privcmd_mmap_t ioctlx; - privcmd_mmap_entry_t *entries; - unsigned long m2p_chunks, m2p_size; - unsigned long *m2p; - int i, rc; - - m2p_size = M2P_SIZE(max_mfn); - m2p_chunks = M2P_CHUNKS(max_mfn); + privcmd_mmap_t ioctlx; + privcmd_mmap_entry_t *entries; + unsigned long m2p_chunks, m2p_size; + unsigned long *m2p; + int i, rc; + + m2p_size = M2P_SIZE(max_mfn); + m2p_chunks = M2P_CHUNKS(max_mfn); xmml.max_extents = m2p_chunks; - if (!(xmml.extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) { - ERR("failed to allocate space for m2p mfns"); - return NULL; - } + if (!(xmml.extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) { + ERR("failed to allocate space for m2p mfns"); + return NULL; + } if (xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) || (xmml.nr_extents != m2p_chunks)) { - ERR("xc_get_m2p_mfns"); + ERR("xc_get_m2p_mfns"); return NULL; } - if ((m2p = mmap(NULL, m2p_size, prot, + if ((m2p = mmap(NULL, m2p_size, prot, MAP_SHARED, xc_handle, 0)) == MAP_FAILED) { - ERR("failed to mmap m2p"); - return NULL; - } - - if (!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { - ERR("failed to allocate space for mmap entries"); - return NULL; - } + ERR("failed to mmap m2p"); + return NULL; + } + + if (!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { + ERR("failed to allocate space for mmap entries"); + return NULL; + } ioctlx.num = m2p_chunks; - ioctlx.dom = DOMID_XEN; - ioctlx.entry = entries; - - for (i=0; i < m2p_chunks; i++) { - entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); + ioctlx.dom = DOMID_XEN; + ioctlx.entry = entries; + + for (i=0; i < m2p_chunks; i++) { + entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); entries[i].mfn = xmml.extent_start[i]; entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; } if ((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) { - ERR("ioctl_mmap failed (rc = %d)", rc); - return NULL; + ERR("ioctl_mmap failed (rc = %d)", rc); + return NULL; } free(xmml.extent_start); - free(entries); - - return m2p; -} - - - -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + free(entries); + + return m2p; +} + + + +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, int (*suspend)(int)) { xc_dominfo_t info; int rc = 1, i, j, last_iter, iter = 0; - int live = (flags & XCFLAGS_LIVE); - int debug = (flags & XCFLAGS_DEBUG); + int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); int sent_last_iter, skip_this_iter; /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; - + /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -581,7 +581,7 @@ int xc_linux_save(int xc_handle, int io_ unsigned long *pfn_batch = NULL; /* A temporary mapping, and a copy, of one frame of guest memory. */ - char page[PAGE_SIZE]; + char page[PAGE_SIZE]; /* Double and single indirect references to the live P2M table */ unsigned long *live_p2m_frame_list_list = NULL; @@ -597,14 +597,14 @@ int xc_linux_save(int xc_handle, int io_ unsigned char *region_base = NULL; /* power of 2 order of max_pfn */ - int order_nr; + int order_nr; /* bitmap of pages: - - that should be sent this iteration (unless later marked as skip); + - that should be sent this iteration (unless later marked as skip); - to skip this iteration because already dirty; - to fixup by sending at the end if not already resent; */ unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL; - + xc_shadow_control_stats_t stats; unsigned long needed_to_fix = 0; @@ -612,29 +612,29 @@ int xc_linux_save(int xc_handle, int io_ /* If no explicit control parameters given, use defaults */ - if(!max_iters) - max_iters = DEF_MAX_ITERS; - if(!max_factor) - max_factor = DEF_MAX_FACTOR; - - initialize_mbit_rate(); - - if(!get_platform_info(xc_handle, dom, + if(!max_iters) + max_iters = DEF_MAX_ITERS; + if(!max_factor) + max_factor = DEF_MAX_FACTOR; + + initialize_mbit_rate(); + + if(!get_platform_info(xc_handle, dom, &max_mfn, &hvirt_start, &pt_levels)) { - ERR("Unable to get platform info."); + ERR("Unable to get platform info."); return 1; } if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { ERR("Could not get domain info"); - return 1; + return 1; } if (mlock(&ctxt, sizeof(ctxt))) { ERR("Unable to mlock ctxt"); return 1; } - + /* Only have to worry about vcpu 0 even for SMP */ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { ERR("Could not get vcpu context"); @@ -648,16 +648,16 @@ int xc_linux_save(int xc_handle, int io_ ERR("Domain is not in a valid Linux guest OS state"); goto out; } - + /* cheesy sanity check */ if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERR("Invalid state record -- pfn count out of range: %lu", - (info.max_memkb >> (PAGE_SHIFT - 10))); + ERR("Invalid state record -- pfn count out of range: %lu", + (info.max_memkb >> (PAGE_SHIFT - 10))); goto out; } - + /* Map the shared info frame */ - if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame))) { ERR("Couldn't map live_shinfo"); goto out; @@ -665,8 +665,8 @@ int xc_linux_save(int xc_handle, int io_ max_pfn = live_shinfo->arch.max_pfn; - live_p2m_frame_list_list = - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, + live_p2m_frame_list_list = + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, live_shinfo->arch.pfn_to_mfn_frame_list_list); if (!live_p2m_frame_list_list) { @@ -674,24 +674,24 @@ int xc_linux_save(int xc_handle, int io_ goto out; } - live_p2m_frame_list = + live_p2m_frame_list = xc_map_foreign_batch(xc_handle, dom, PROT_READ, live_p2m_frame_list_list, - P2M_FLL_ENTRIES); - + P2M_FLL_ENTRIES); + if (!live_p2m_frame_list) { ERR("Couldn't map p2m_frame_list"); goto out; } - /* Map all the frames of the pfn->mfn table. For migrate to succeed, - the guest must not change which frames are used for this purpose. + /* Map all the frames of the pfn->mfn table. For migrate to succeed, + the guest must not change which frames are used for this purpose. (its not clear why it would want to change them, and we'll be OK from a safety POV anyhow. */ live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ, live_p2m_frame_list, - P2M_FL_ENTRIES); + P2M_FL_ENTRIES); if (!live_p2m) { ERR("Couldn't map p2m table"); @@ -699,25 +699,25 @@ int xc_linux_save(int xc_handle, int io_ } /* Setup the mfn_to_pfn table mapping */ - if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { - ERR("Failed to map live M2P table"); - goto out; - } - - + if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { + ERR("Failed to map live M2P table"); + goto out; + } + + /* Get a local copy of the live_P2M_frame_list */ - if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { ERR("Couldn't allocate p2m_frame_list array"); goto out; } - memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); + memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); /* Canonicalise the pfn-to-mfn table frame-number list. */ for (i = 0; i < max_pfn; i += ulpp) { - if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { + if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { ERR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, - p2m_frame_list[i/ulpp]); + ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, + p2m_frame_list[i/ulpp]); goto out; } } @@ -725,31 +725,31 @@ int xc_linux_save(int xc_handle, int io_ /* Domain is still running at this point */ if (live) { - if (xc_shadow_control(xc_handle, dom, + if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL ) < 0) { + NULL, 0, NULL ) < 0) { ERR("Couldn't enable shadow mode"); goto out; } - + last_iter = 0; - + } else { - + /* This is a non-live suspend. Issue the call back to get the domain suspended */ - + last_iter = 1; - + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { ERR("Domain appears not to have suspended"); goto out; } - + } /* pretend we sent all the pages last iteration */ - sent_last_iter = max_pfn; + sent_last_iter = max_pfn; /* calculate the power of 2 order of max_pfn, e.g. @@ -758,15 +758,15 @@ int xc_linux_save(int xc_handle, int io_ continue; /* Setup to_send / to_fix and to_skip bitmaps */ - to_send = malloc(BITMAP_SIZE); - to_fix = calloc(1, BITMAP_SIZE); - to_skip = malloc(BITMAP_SIZE); - + to_send = malloc(BITMAP_SIZE); + to_fix = calloc(1, BITMAP_SIZE); + to_skip = malloc(BITMAP_SIZE); + if (!to_send || !to_fix || !to_skip) { ERR("Couldn't allocate to_send array"); goto out; } - + memset(to_send, 0xff, BITMAP_SIZE); if (mlock(to_send, BITMAP_SIZE)) { @@ -779,7 +779,7 @@ int xc_linux_save(int xc_handle, int io_ ERR("Unable to mlock to_skip"); return 1; } - + analysis_phase(xc_handle, dom, max_pfn, to_skip, 0); /* We want zeroed memory so use calloc rather than malloc. */ @@ -787,7 +787,7 @@ int xc_linux_save(int xc_handle, int io_ pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(unsigned long)); if ((pfn_type == NULL) || (pfn_batch == NULL)) { - ERR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); + ERR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); errno = ENOMEM; goto out; } @@ -803,12 +803,12 @@ int xc_linux_save(int xc_handle, int io_ */ { int err=0; - unsigned long mfn; + unsigned long mfn; for (i = 0; i < max_pfn; i++) { mfn = live_p2m[i]; - if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) { - DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, + if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) { + DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, mfn, mfn_to_pfn(mfn)); err++; } @@ -819,16 +819,16 @@ int xc_linux_save(int xc_handle, int io_ /* Start writing out the saved-domain record. */ - if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { ERR("write: max_pfn"); goto out; } - if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { ERR("write: p2m_frame_list"); goto out; } - + print_stats(xc_handle, dom, 0, &stats, 0); /* Now write out each data page, canonicalising page tables as we go... */ @@ -853,8 +853,8 @@ int xc_linux_save(int xc_handle, int io_ DPRINTF("\b\b\b\b%3d%%", this_pc); prev_pc = this_pc; } - - /* slightly wasteful to peek the whole array evey time, + + /* slightly wasteful to peek the whole array evey time, but this is fast enough for the moment. */ if (!last_iter && xc_shadow_control( xc_handle, dom, DOM0_SHADOW_CONTROL_OP_PEEK, @@ -862,7 +862,7 @@ int xc_linux_save(int xc_handle, int io_ ERR("Error peeking shadow bitmap"); goto out; } - + /* load pfn_type[] with the mfn of all the pages we're doing in this batch. */ @@ -873,11 +873,11 @@ int xc_linux_save(int xc_handle, int io_ if (debug) { DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", iter, (unsigned long)n, live_p2m[n], - test_bit(n, to_send), + test_bit(n, to_send), mfn_to_pfn(live_p2m[n]&0xFFFFF)); } - - if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) + + if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) skip_this_iter++; /* stats keeping */ if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) || @@ -885,13 +885,13 @@ int xc_linux_save(int xc_handle, int io_ (test_bit(n, to_fix) && last_iter))) continue; - /* + /* ** we get here if: ** 1. page is marked to_send & hasn't already been re-dirtied ** 2. (ignore to_skip in last iteration) ** 3. add in pages that still need fixup (net bufs) */ - + pfn_batch[batch] = n; pfn_type[batch] = live_p2m[n]; @@ -914,80 +914,80 @@ int xc_linux_save(int xc_handle, int io_ iter,n,pfn_type[batch]); } - clear_bit(n, to_fix); - + clear_bit(n, to_fix); + batch++; } - + if (batch == 0) goto skip; /* vanishingly unlikely... */ - + if ((region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_type, batch)) == 0) { + xc_handle, dom, PROT_READ, pfn_type, batch)) == 0) { ERR("map batch failed"); goto out; } - + if (xc_get_pfn_type_batch(xc_handle, dom, batch, pfn_type)) { ERR("get_pfn_type_batch failed"); goto out; } - + for (j = 0; j < batch; j++) { if ((pfn_type[j] & LTAB_MASK) == XTAB) { DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]); continue; } - - if (debug) + + if (debug) fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx" " sum= %08lx\n", - iter, + iter, (pfn_type[j] & LTAB_MASK) | pfn_batch[j], pfn_type[j], mfn_to_pfn(pfn_type[j]&(~LTAB_MASK)), csum_page(region_base + (PAGE_SIZE*j))); - + /* canonicalise mfn->pfn */ pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j]; } - if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { + if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { ERR("Error when writing to state file (2)"); goto out; } - if(!write_exact(io_fd, pfn_type, sizeof(unsigned long)*j)) { + if(!write_exact(io_fd, pfn_type, sizeof(unsigned long)*j)) { ERR("Error when writing to state file (3)"); goto out; } - + /* entering this loop, pfn_type is now in pfns (Not mfns) */ for (j = 0; j < batch; j++) { - - unsigned long pfn = pfn_type[j] & ~LTAB_MASK; - unsigned long pagetype = pfn_type[j] & LTAB_MASK; - void *spage = (void *) region_base + (PAGE_SIZE*j); + + unsigned long pfn = pfn_type[j] & ~LTAB_MASK; + unsigned long pagetype = pfn_type[j] & LTAB_MASK; + void *spage = (void *) region_base + (PAGE_SIZE*j); /* write out pages in batch */ if (pagetype == XTAB) continue; - pagetype &= LTABTYPE_MASK; - + pagetype &= LTABTYPE_MASK; + if (pagetype >= L1TAB && pagetype <= L4TAB) { - + /* We have a pagetable page: need to rewrite it. */ - canonicalize_pagetable(pagetype, pfn, spage, page); - + canonicalize_pagetable(pagetype, pfn, spage, page); + if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) { ERR("Error when writing to state file (4)"); goto out; } - - } else { + + } else { /* We have a normal page: just write it directly. */ if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) { @@ -996,36 +996,36 @@ int xc_linux_save(int xc_handle, int io_ } } } /* end of the write out for this batch */ - + sent_this_iter += batch; munmap(region_base, batch*PAGE_SIZE); - + } /* end of this while loop for this iteration */ - - skip: - + + skip: + total_sent += sent_this_iter; - DPRINTF("\r %d: sent %d, skipped %d, ", + DPRINTF("\r %d: sent %d, skipped %d, ", iter, sent_this_iter, skip_this_iter ); if (last_iter) { print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - DPRINTF("Total pages sent= %ld (%.2fx)\n", + DPRINTF("Total pages sent= %ld (%.2fx)\n", total_sent, ((float)total_sent)/max_pfn ); DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); - } + } if (last_iter && debug){ int minusone = -1; - memset(to_send, 0xff, BITMAP_SIZE); + memset(to_send, 0xff, BITMAP_SIZE); debug = 0; fprintf(stderr, "Entering debug resend-all mode\n"); - + /* send "-1" to put receiver into debug mode */ - if(!write_exact(io_fd, &minusone, sizeof(int))) { + if(!write_exact(io_fd, &minusone, sizeof(int))) { ERR("Error when writing to state file (6)"); goto out; } @@ -1033,34 +1033,34 @@ int xc_linux_save(int xc_handle, int io_ continue; } - if (last_iter) break; + if (last_iter) break; if (live) { - if( + if( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) || - (total_sent > max_pfn*max_factor) ) { + (total_sent > max_pfn*max_factor) ) { DPRINTF("Start last iteration\n"); last_iter = 1; - + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { ERR("Domain appears not to have suspended"); goto out; } - - DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", - info.shared_info_frame, - (unsigned long)ctxt.user_regs.eip, + + DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", + info.shared_info_frame, + (unsigned long)ctxt.user_regs.eip, (unsigned long)ctxt.user_regs.edx); - } - + } + if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_CLEAN, - to_send, max_pfn, &stats ) != max_pfn) { + to_send, max_pfn, &stats ) != max_pfn) { ERR("Error flushing shadow PT"); goto out; } @@ -1068,7 +1068,7 @@ int xc_linux_save(int xc_handle, int io_ sent_last_iter = sent_this_iter; print_stats(xc_handle, dom, sent_this_iter, &stats, 1); - + } @@ -1077,8 +1077,8 @@ int xc_linux_save(int xc_handle, int io_ DPRINTF("All memory is saved\n"); /* Zero terminate */ - i = 0; - if (!write_exact(io_fd, &i, sizeof(int))) { + i = 0; + if (!write_exact(io_fd, &i, sizeof(int))) { ERR("Error when writing to state file (6)"); goto out; } @@ -1086,18 +1086,18 @@ int xc_linux_save(int xc_handle, int io_ /* Send through a list of all the PFNs that were not in map at the close */ { unsigned int i,j; - unsigned long pfntab[1024]; + unsigned long pfntab[1024]; for (i = 0, j = 0; i < max_pfn; i++) { if (!is_mapped(live_p2m[i])) j++; } - - if(!write_exact(io_fd, &j, sizeof(unsigned int))) { + + if(!write_exact(io_fd, &j, sizeof(unsigned int))) { ERR("Error when writing to state file (6a)"); goto out; - } - + } + for (i = 0, j = 0; i < max_pfn; ) { if (!is_mapped(live_p2m[i])) @@ -1105,16 +1105,16 @@ int xc_linux_save(int xc_handle, int io_ i++; if (j == 1024 || i == max_pfn) { - if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { + if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { ERR("Error when writing to state file (6b)"); goto out; - } + } j = 0; } } } - + /* Canonicalise the suspend-record frame number. */ if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ){ ERR("Suspend record is not in range of pseudophys map"); @@ -1138,7 +1138,7 @@ int xc_linux_save(int xc_handle, int io_ PAGE_SHIFT; if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) || - !write_exact(io_fd, live_shinfo, PAGE_SIZE)) { + !write_exact(io_fd, live_shinfo, PAGE_SIZE)) { ERR("Error when writing to state file (1)"); goto out; } @@ -1149,26 +1149,26 @@ int xc_linux_save(int xc_handle, int io_ out: if (live) { - if(xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF, - NULL, 0, NULL ) < 0) { + if(xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF, + NULL, 0, NULL ) < 0) { DPRINTF("Warning - couldn't disable shadow mode"); } } - + if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); - - if (live_p2m_frame_list_list) - munmap(live_p2m_frame_list_list, PAGE_SIZE); - - if (live_p2m_frame_list) - munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); - - if(live_p2m) - munmap(live_p2m, P2M_SIZE); - - if(live_m2p) - munmap(live_m2p, M2P_SIZE(max_mfn)); + + if (live_p2m_frame_list_list) + munmap(live_p2m_frame_list_list, PAGE_SIZE); + + if (live_p2m_frame_list) + munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); + + if(live_p2m) + munmap(live_p2m, P2M_SIZE); + + if(live_m2p) + munmap(live_m2p, M2P_SIZE(max_mfn)); free(pfn_type); free(pfn_batch); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_load_aout9.c --- a/tools/libxc/xc_load_aout9.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_load_aout9.c Tue Apr 18 09:35:40 2006 -0600 @@ -22,7 +22,7 @@ struct Exec *get_header(const char *, un struct Exec *get_header(const char *, unsigned long, struct Exec *); -int +int probe_aout9( const char *image, unsigned long image_size, @@ -40,7 +40,7 @@ probe_aout9( return 0; } -static int +static int parseaout9image( const char *image, unsigned long image_size, @@ -74,7 +74,7 @@ parseaout9image( return 0; } -static int +static int loadaout9image( const char *image, unsigned long image_size, @@ -123,7 +123,7 @@ copyout( if(chunksz > PAGE_SIZE - pgoff) chunksz = PAGE_SIZE - pgoff; - pg = xc_map_foreign_range(xch, dom, PAGE_SIZE, PROT_WRITE, + pg = xc_map_foreign_range(xch, dom, PAGE_SIZE, PROT_WRITE, parray[off>>PAGE_SHIFT]); memcpy(pg + pgoff, buf, chunksz); munmap(pg, PAGE_SIZE); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_load_bin.c --- a/tools/libxc/xc_load_bin.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_load_bin.c Tue Apr 18 09:35:40 2006 -0600 @@ -161,7 +161,7 @@ findtable(const char *image, unsigned lo return NULL; } -static int parsebinimage(const char *image, +static int parsebinimage(const char *image, unsigned long image_size, struct domain_setup_info *dsi) { diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_load_elf.c Tue Apr 18 09:35:40 2006 -0600 @@ -51,7 +51,7 @@ static inline int is_loadable_phdr(Elf_P ((phdr->p_flags & (PF_W|PF_X)) != 0)); } -static int parseelfimage(const char *image, +static int parseelfimage(const char *image, unsigned long elfsize, struct domain_setup_info *dsi) { @@ -102,10 +102,10 @@ static int parseelfimage(const char *ima ERROR("ELF image has no section-header strings table (shstrtab)."); return -EINVAL; } - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + + shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (ehdr->e_shstrndx*ehdr->e_shentsize)); shstrtab = image + shdr->sh_offset; - + /* Find the special '__xen_guest' section and check its contents. */ for ( h = 0; h < ehdr->e_shnum; h++ ) { @@ -148,7 +148,7 @@ static int parseelfimage(const char *ima dsi->xen_guest_string = guestinfo; - for ( h = 0; h < ehdr->e_phnum; h++ ) + for ( h = 0; h < ehdr->e_phnum; h++ ) { phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) @@ -159,8 +159,8 @@ static int parseelfimage(const char *ima kernend = phdr->p_paddr + phdr->p_memsz; } - if ( (kernstart > kernend) || - (ehdr->e_entry < kernstart) || + if ( (kernstart > kernend) || + (ehdr->e_entry < kernstart) || (ehdr->e_entry > kernend) ) { ERROR("Malformed ELF image."); @@ -196,12 +196,12 @@ loadelfimage( char *va; unsigned long pa, done, chunksz; - for ( h = 0; h < ehdr->e_phnum; h++ ) + for ( h = 0; h < ehdr->e_phnum; h++ ) { phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) continue; - + for ( done = 0; done < phdr->p_filesz; done += chunksz ) { pa = (phdr->p_paddr + done) - dsi->v_start; @@ -265,7 +265,7 @@ loadelfsymtab( shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr)); memcpy(shdr, image + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr)); - for ( h = 0; h < ehdr->e_shnum; h++ ) + for ( h = 0; h < ehdr->e_shnum; h++ ) { if ( shdr[h].sh_type == SHT_STRTAB ) { diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_misc.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,6 +1,6 @@ /****************************************************************************** * xc_misc.c - * + * * Miscellaneous control interface functions. */ @@ -21,7 +21,7 @@ int xc_interface_close(int xc_handle) int xc_readconsolering(int xc_handle, char **pbuffer, - unsigned int *pnr_chars, + unsigned int *pnr_chars, int clear) { int ret; @@ -46,14 +46,14 @@ int xc_readconsolering(int xc_handle, safe_munlock(buffer, nr_chars); return ret; -} +} int xc_physinfo(int xc_handle, xc_physinfo_t *put_info) { int ret; DECLARE_DOM0_OP; - + op.cmd = DOM0_PHYSINFO; op.interface_version = DOM0_INTERFACE_VERSION; @@ -70,15 +70,15 @@ int xc_sched_id(int xc_handle, { int ret; DECLARE_DOM0_OP; - + op.cmd = DOM0_SCHED_ID; op.interface_version = DOM0_INTERFACE_VERSION; - + if ( (ret = do_dom0_op(xc_handle, &op)) != 0 ) return ret; - + *sched_id = op.u.sched_id.sched_id; - + return 0; } @@ -100,9 +100,9 @@ int xc_perfc_control(int xc_handle, long long xc_msr_read(int xc_handle, int cpu_mask, int msr) { - int rc; + int rc; DECLARE_DOM0_OP; - + op.cmd = DOM0_MSR; op.u.msr.write = 0; op.u.msr.msr = msr; @@ -116,9 +116,9 @@ int xc_msr_write(int xc_handle, int cpu_ int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low, unsigned int high) { - int rc; + int rc; DECLARE_DOM0_OP; - + op.cmd = DOM0_MSR; op.u.msr.write = 1; op.u.msr.msr = msr; @@ -127,7 +127,7 @@ int xc_msr_write(int xc_handle, int cpu_ op.u.msr.in2 = high; rc = do_dom0_op(xc_handle, &op); - + return rc; } diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_physdev.c --- a/tools/libxc/xc_physdev.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_physdev.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_physdev.c - * + * * API for manipulating physical-device access permissions. - * + * * Copyright (c) 2004, Rolf Neugebauer (Intel Research Cambridge) * Copyright (c) 2004, K A Fraser (University of Cambridge) */ diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_private.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,6 +1,6 @@ /****************************************************************************** * xc_private.c - * + * * Helper functions for the rest of the library. */ @@ -10,7 +10,7 @@ void *xc_map_foreign_batch(int xc_handle void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot, unsigned long *arr, int num ) { - privcmd_mmapbatch_t ioctlx; + privcmd_mmapbatch_t ioctlx; void *addr; addr = mmap(NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0); if ( addr == MAP_FAILED ) @@ -38,8 +38,8 @@ void *xc_map_foreign_range(int xc_handle int size, int prot, unsigned long mfn ) { - privcmd_mmap_t ioctlx; - privcmd_mmap_entry_t entry; + privcmd_mmap_t ioctlx; + privcmd_mmap_entry_t entry; void *addr; addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0); if ( addr == MAP_FAILED ) @@ -64,7 +64,7 @@ void *xc_map_foreign_range(int xc_handle /*******************/ /* NB: arr must be mlock'ed */ -int xc_get_pfn_type_batch(int xc_handle, +int xc_get_pfn_type_batch(int xc_handle, uint32_t dom, int num, unsigned long *arr) { DECLARE_DOM0_OP; @@ -76,8 +76,8 @@ int xc_get_pfn_type_batch(int xc_handle, } #define GETPFN_ERR (~0U) -unsigned int get_pfn_type(int xc_handle, - unsigned long mfn, +unsigned int get_pfn_type(int xc_handle, + unsigned long mfn, uint32_t dom) { DECLARE_DOM0_OP; @@ -119,7 +119,7 @@ int xc_mmuext_op( out1: return ret; -} +} static int flush_mmu_updates(int xc_handle, xc_mmu_t *mmu) { @@ -166,7 +166,7 @@ xc_mmu_t *xc_init_mmu_updates(int xc_han return mmu; } -int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, +int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, unsigned long long ptr, unsigned long long val) { mmu->updates[mmu->idx].ptr = ptr; @@ -288,7 +288,7 @@ int xc_memory_op(int xc_handle, out1: return ret; -} +} long long xc_domain_get_cpu_usage( int xc_handle, domid_t domid, int vcpu ) @@ -308,8 +308,8 @@ long long xc_domain_get_cpu_usage( int x int xc_get_pfn_list(int xc_handle, - uint32_t domid, - unsigned long *pfn_buf, + uint32_t domid, + unsigned long *pfn_buf, unsigned long max_pfns) { DECLARE_DOM0_OP; @@ -327,7 +327,7 @@ int xc_get_pfn_list(int xc_handle, { PERROR("xc_get_pfn_list: pfn_buf mlock failed"); return -1; - } + } ret = do_dom0_op(xc_handle, &op); @@ -356,13 +356,13 @@ long xc_get_tot_pages(int xc_handle, uin DECLARE_DOM0_OP; op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - return (do_dom0_op(xc_handle, &op) < 0) ? + return (do_dom0_op(xc_handle, &op) < 0) ? -1 : op.u.getdomaininfo.tot_pages; } int xc_copy_to_domain_page(int xc_handle, uint32_t domid, - unsigned long dst_pfn, + unsigned long dst_pfn, const char *src_page) { void *vaddr = xc_map_foreign_range( @@ -481,7 +481,7 @@ unsigned long xc_make_page_below_4G( { unsigned long new_mfn; - if ( xc_domain_memory_decrease_reservation( + if ( xc_domain_memory_decrease_reservation( xc_handle, domid, 1, 0, &mfn) != 0 ) { fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_private.h Tue Apr 18 09:35:40 2006 -0600 @@ -57,7 +57,7 @@ static inline void safe_munlock(const vo } static inline int do_privcmd(int xc_handle, - unsigned int cmd, + unsigned int cmd, unsigned long data) { return ioctl(xc_handle, cmd, data); @@ -67,7 +67,7 @@ static inline int do_xen_hypercall(int x privcmd_hypercall_t *hypercall) { return do_privcmd(xc_handle, - IOCTL_PRIVCMD_HYPERCALL, + IOCTL_PRIVCMD_HYPERCALL, (unsigned long)hypercall); } @@ -78,7 +78,7 @@ static inline int do_xen_version(int xc_ hypercall.op = __HYPERVISOR_xen_version; hypercall.arg[0] = (unsigned long) cmd; hypercall.arg[1] = (unsigned long) dest; - + return do_xen_hypercall(xc_handle, &hypercall); } @@ -121,13 +121,13 @@ typedef struct privcmd_mmap_entry { unsigned long va; unsigned long mfn; unsigned long npages; -} privcmd_mmap_entry_t; +} privcmd_mmap_entry_t; typedef struct privcmd_mmap { int num; domid_t dom; privcmd_mmap_entry_t *entry; -} privcmd_mmap_t; +} privcmd_mmap_t; */ #endif /* __XC_PRIVATE_H__ */ diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_ptrace.c Tue Apr 18 09:35:40 2006 -0600 @@ -46,7 +46,7 @@ static vcpu_guest_context_t ctxt[MAX static vcpu_guest_context_t ctxt[MAX_VIRT_CPUS]; extern int ffsll(long long int); -#define FOREACH_CPU(cpumap, i) for ( cpumap = online_cpumap; (i = ffsll(cpumap)); cpumap &= ~(1 << (index - 1)) ) +#define FOREACH_CPU(cpumap, i) for ( cpumap = online_cpumap; (i = ffsll(cpumap)); cpumap &= ~(1 << (index - 1)) ) static int @@ -58,22 +58,22 @@ fetch_regs(int xc_handle, int cpu, int * if (online) *online = 0; if ( !(regs_valid & (1 << cpu)) ) - { - retval = xc_vcpu_getcontext(xc_handle, current_domid, - cpu, &ctxt[cpu]); - if ( retval ) + { + retval = xc_vcpu_getcontext(xc_handle, current_domid, + cpu, &ctxt[cpu]); + if ( retval ) goto done; - regs_valid |= (1 << cpu); - - } - if ( online == NULL ) - goto done; - - retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info); - *online = info.online; - + regs_valid |= (1 << cpu); + + } + if ( online == NULL ) + goto done; + + retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info); + *online = info.online; + done: - return retval; + return retval; } static struct thr_ev_handlers { @@ -81,8 +81,8 @@ static struct thr_ev_handlers { thr_ev_handler_t td_death; } handlers; -void -xc_register_event_handler(thr_ev_handler_t h, +void +xc_register_event_handler(thr_ev_handler_t h, td_event_e e) { switch (e) { @@ -97,7 +97,7 @@ xc_register_event_handler(thr_ev_handler } } -static inline int +static inline int paging_enabled(vcpu_guest_context_t *v) { unsigned long cr0 = v->ctrlreg[0]; @@ -114,19 +114,19 @@ get_online_cpumap(int xc_handle, dom0_ge get_online_cpumap(int xc_handle, dom0_getdomaininfo_t *d, cpumap_t *cpumap) { int i, online, retval; - + *cpumap = 0; for (i = 0; i <= d->max_vcpu_id; i++) { if ((retval = fetch_regs(xc_handle, i, &online))) return retval; if (online) - *cpumap |= (1 << i); - } - + *cpumap |= (1 << i); + } + return 0; } -/* +/* * Notify GDB of any vcpus that have come online or gone offline * update online_cpumap * @@ -137,7 +137,7 @@ online_vcpus_changed(cpumap_t cpumap) { cpumap_t changed_cpumap = cpumap ^ online_cpumap; int index; - + while ( (index = ffsll(changed_cpumap)) ) { if ( cpumap & (1 << (index - 1)) ) { @@ -149,7 +149,7 @@ online_vcpus_changed(cpumap_t cpumap) changed_cpumap &= ~(1 << (index - 1)); } online_cpumap = cpumap; - + } /* --------------------- */ @@ -172,7 +172,7 @@ map_domain_va_32( static unsigned long pde_phys[MAX_VIRT_CPUS]; static uint32_t *pde_virt[MAX_VIRT_CPUS]; static unsigned long page_phys[MAX_VIRT_CPUS]; - static uint32_t *page_virt[MAX_VIRT_CPUS]; + static uint32_t *page_virt[MAX_VIRT_CPUS]; static int prev_perm[MAX_VIRT_CPUS]; if (ctxt[cpu].ctrlreg[3] == 0) @@ -221,7 +221,7 @@ map_domain_va_32( return NULL; } prev_perm[cpu] = perm; - } + } return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK)); } @@ -284,7 +284,7 @@ map_domain_va_64( if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ return map_domain_va_32(xc_handle, cpu, guest_va, perm); - l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, + l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); if ( l4 == NULL ) return NULL; @@ -349,7 +349,7 @@ map_domain_va( mode = MODE_64; else if ( strstr(caps, "-x86_32p") ) mode = MODE_PAE; - else if ( strstr(caps, "-x86_32") ) + else if ( strstr(caps, "-x86_32") ) mode = MODE_32; } @@ -374,7 +374,7 @@ map_domain_va( if (fetch_regs(xc_handle, cpu, NULL)) return NULL; - if (!paging_enabled(&ctxt[cpu])) { + if (!paging_enabled(&ctxt[cpu])) { static void * v; unsigned long page; @@ -383,9 +383,9 @@ map_domain_va( page = page_array[va >> PAGE_SHIFT] << PAGE_SHIFT; - v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, + v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, perm, page >> PAGE_SHIFT); - + if ( v == NULL ) return NULL; @@ -403,7 +403,7 @@ map_domain_va( int control_c_pressed_flag = 0; -static int +static int __xc_waitdomain( int xc_handle, int domain, @@ -420,7 +420,7 @@ __xc_waitdomain( op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = domain; - + retry: retval = do_dom0_op(xc_handle, &op); if ( retval || (op.u.getdomaininfo.domain != domain) ) @@ -429,7 +429,7 @@ __xc_waitdomain( goto done; } *status = op.u.getdomaininfo.flags; - + if ( options & WNOHANG ) goto done; @@ -472,16 +472,16 @@ xc_ptrace( void *data = (char *)edata; cpu = (request != PTRACE_ATTACH) ? domid_tid : 0; - + switch ( request ) - { + { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: if (current_isfile) - guest_va = (unsigned long *)map_domain_va_core(current_domid, + guest_va = (unsigned long *)map_domain_va_core(current_domid, cpu, addr, ctxt); else - guest_va = (unsigned long *)map_domain_va(xc_handle, + guest_va = (unsigned long *)map_domain_va(xc_handle, cpu, addr, PROT_READ); if ( guest_va == NULL ) goto out_error; @@ -492,26 +492,26 @@ xc_ptrace( case PTRACE_POKEDATA: /* XXX assume that all CPUs have the same address space */ if (current_isfile) - guest_va = (unsigned long *)map_domain_va_core(current_domid, + guest_va = (unsigned long *)map_domain_va_core(current_domid, cpu, addr, ctxt); else - guest_va = (unsigned long *)map_domain_va(xc_handle, + guest_va = (unsigned long *)map_domain_va(xc_handle, cpu, addr, PROT_READ|PROT_WRITE); - if ( guest_va == NULL ) + if ( guest_va == NULL ) goto out_error; *guest_va = (unsigned long)data; break; case PTRACE_GETREGS: - if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) + if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) goto out_error; - SET_PT_REGS(pt, ctxt[cpu].user_regs); + SET_PT_REGS(pt, ctxt[cpu].user_regs); memcpy(data, &pt, sizeof(struct gdb_regs)); break; case PTRACE_GETFPREGS: case PTRACE_GETFPXREGS: - if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) + if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) goto out_error; memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt)); break; @@ -520,7 +520,7 @@ xc_ptrace( if (current_isfile) goto out_unspported; /* XXX not yet supported */ SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs); - if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, + if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]))) goto out_error_dom0; break; @@ -531,8 +531,8 @@ xc_ptrace( /* XXX we can still have problems if the user switches threads * during single-stepping - but that just seems retarded */ - ctxt[cpu].user_regs.eflags |= PSL_T; - if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, + ctxt[cpu].user_regs.eflags |= PSL_T; + if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]))) goto out_error_dom0; /* FALLTHROUGH */ @@ -545,13 +545,13 @@ xc_ptrace( { FOREACH_CPU(cpumap, index) { cpu = index - 1; - if (fetch_regs(xc_handle, cpu, NULL)) + if (fetch_regs(xc_handle, cpu, NULL)) goto out_error; /* Clear trace flag */ - if ( ctxt[cpu].user_regs.eflags & PSL_T ) + if ( ctxt[cpu].user_regs.eflags & PSL_T ) { ctxt[cpu].user_regs.eflags &= ~PSL_T; - if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, + if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]))) goto out_error_dom0; } @@ -566,7 +566,7 @@ xc_ptrace( goto out_error_dom0; } regs_valid = 0; - if ((retval = xc_domain_unpause(xc_handle, current_domid > 0 ? + if ((retval = xc_domain_unpause(xc_handle, current_domid > 0 ? current_domid : -current_domid))) goto out_error_dom0; break; @@ -627,7 +627,7 @@ xc_ptrace( } -int +int xc_waitdomain( int xc_handle, int domain, diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_ptrace.h --- a/tools/libxc/xc_ptrace.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_ptrace.h Tue Apr 18 09:35:40 2006 -0600 @@ -107,7 +107,7 @@ struct gdb_regs { long esi; /* 12 */ long edi; /* 16 */ long ebp; /* 20 */ - long eax; /* 24 */ + long eax; /* 24 */ int xds; /* 28 */ int xes; /* 32 */ int xfs; /* 36 */ @@ -116,7 +116,7 @@ struct gdb_regs { long eip; /* 48 */ int xcs; /* 52 */ long eflags; /* 56 */ - long esp; /* 60 */ + long esp; /* 60 */ int xss; /* 64 */ }; @@ -169,20 +169,20 @@ typedef void (*thr_ev_handler_t)(long); typedef void (*thr_ev_handler_t)(long); void xc_register_event_handler( - thr_ev_handler_t h, + thr_ev_handler_t h, td_event_e e); long xc_ptrace( int xc_handle, - enum __ptrace_request request, + enum __ptrace_request request, uint32_t domid, - long addr, + long addr, long data); int xc_waitdomain( int xc_handle, - int domain, - int *status, + int domain, + int *status, int options); #endif /* XC_PTRACE */ diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_ptrace_core.c --- a/tools/libxc/xc_ptrace_core.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_ptrace_core.c Tue Apr 18 09:35:40 2006 -0600 @@ -39,7 +39,7 @@ map_domain_va_core(unsigned long domfd, static unsigned long page_phys[MAX_VIRT_CPUS]; static unsigned long *page_virt[MAX_VIRT_CPUS]; - if (cr3[cpu] != cr3_phys[cpu]) + if (cr3[cpu] != cr3_phys[cpu]) { cr3_phys[cpu] = cr3[cpu]; if (cr3_virt[cpu]) @@ -53,12 +53,12 @@ map_domain_va_core(unsigned long domfd, return NULL; } cr3_virt[cpu] = v; - } + } if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */ return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT; - if (pde != pde_phys[cpu]) + if (pde != pde_phys[cpu]) { pde_phys[cpu] = pde; if (pde_virt[cpu]) @@ -74,7 +74,7 @@ map_domain_va_core(unsigned long domfd, return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT; - if (page != page_phys[cpu]) + if (page != page_phys[cpu]) { page_phys[cpu] = page; if (page_virt[cpu]) @@ -89,11 +89,11 @@ map_domain_va_core(unsigned long domfd, return NULL; } page_virt[cpu] = v; - } + } return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK)); } -int +int xc_waitdomain_core( int xc_handle, int domfd, @@ -122,7 +122,7 @@ xc_waitdomain_core( nr_vcpus = header.xch_nr_vcpus; pages_offset = header.xch_pages_offset; - if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != + if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != sizeof(vcpu_guest_context_t)*nr_vcpus) return -1; @@ -134,7 +134,7 @@ xc_waitdomain_core( printf("Could not allocate p2m_array\n"); return -1; } - if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != + if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != sizeof(unsigned long)*nr_pages) return -1; diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_sedf.c --- a/tools/libxc/xc_sedf.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_sedf.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_sedf.c - * + * * API for manipulating parameters of the Simple EDF scheduler. - * + * * changes by Stephan Diestelhorst * based on code * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge. @@ -35,7 +35,7 @@ int xc_sedf_domain_get(int xc_handle, ui int ret; struct sedf_adjdom *p = &op.u.adjustdom.u.sedf; - op.cmd = DOM0_ADJUSTDOM; + op.cmd = DOM0_ADJUSTDOM; op.u.adjustdom.domain = (domid_t)domid; op.u.adjustdom.sched_id = SCHED_SEDF; op.u.adjustdom.direction = SCHED_INFO_GET; diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xc_tbuf.c --- a/tools/libxc/xc_tbuf.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xc_tbuf.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_tbuf.c - * + * * API for manipulating and accessing trace buffer parameters - * + * * Copyright (c) 2005, Rob Gardner */ @@ -18,7 +18,7 @@ int xc_tbuf_enable(int xc_handle, int en op.u.tbufcontrol.op = DOM0_TBUF_ENABLE; else op.u.tbufcontrol.op = DOM0_TBUF_DISABLE; - + return xc_dom0_op(xc_handle, &op); } @@ -30,10 +30,10 @@ int xc_tbuf_set_size(int xc_handle, uint op.interface_version = DOM0_INTERFACE_VERSION; op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE; op.u.tbufcontrol.size = size; - + return xc_dom0_op(xc_handle, &op); } - + int xc_tbuf_get_size(int xc_handle, uint32_t *size) { int rc; diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xenctrl.h Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xenctrl.h - * + * * A library for low-level access to the Xen control interfaces. - * + * * Copyright (c) 2003-2004, K A Fraser. */ @@ -30,7 +30,7 @@ /* * DEFINITIONS FOR CPU BARRIERS - */ + */ #if defined(__i386__) #define mb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) @@ -51,7 +51,7 @@ /* * INITIALIZATION FUNCTIONS - */ + */ /** * This function opens a handle to the hypervisor interface. This function can @@ -96,20 +96,20 @@ typedef struct xc_core_header { long xc_ptrace_core( int xc_handle, - enum __ptrace_request request, - uint32_t domid, - long addr, + enum __ptrace_request request, + uint32_t domid, + long addr, long data, vcpu_guest_context_t *ctxt); void * map_domain_va_core( - unsigned long domfd, - int cpu, + unsigned long domfd, + int cpu, void *guest_va, vcpu_guest_context_t *ctxt); int xc_waitdomain_core( int xc_handle, - int domain, - int *status, + int domain, + int *status, int options, vcpu_guest_context_t *ctxt); @@ -120,7 +120,7 @@ typedef struct { typedef struct { uint32_t domid; uint32_t ssidref; - unsigned int dying:1, crashed:1, shutdown:1, + unsigned int dying:1, crashed:1, shutdown:1, paused:1, blocked:1, running:1; unsigned int shutdown_reason; /* only meaningful if shutdown==1 */ unsigned long nr_pages; @@ -133,7 +133,7 @@ typedef struct { } xc_dominfo_t; typedef dom0_getdomaininfo_t xc_domaininfo_t; -int xc_domain_create(int xc_handle, +int xc_domain_create(int xc_handle, uint32_t ssidref, xen_domain_handle_t handle, uint32_t *pdomid); @@ -144,7 +144,7 @@ int xc_domain_create(int xc_handle, * xc_domain_dumpcore_via_callback - produces a dump, using a specified * callback function */ -int xc_domain_dumpcore(int xc_handle, +int xc_domain_dumpcore(int xc_handle, uint32_t domid, const char *corename); @@ -156,7 +156,7 @@ int xc_domain_dumpcore(int xc_handle, */ typedef int (dumpcore_rtn_t)(void *arg, char *buffer, unsigned int length); -int xc_domain_dumpcore_via_callback(int xc_handle, +int xc_domain_dumpcore_via_callback(int xc_handle, uint32_t domid, void *arg, dumpcore_rtn_t dump_rtn); @@ -170,7 +170,7 @@ int xc_domain_dumpcore_via_callback(int * @return 0 on success, -1 on failure. */ int xc_domain_max_vcpus(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int max); /** @@ -181,7 +181,7 @@ int xc_domain_max_vcpus(int xc_handle, * @parm domid the domain id to pause * @return 0 on success, -1 on failure. */ -int xc_domain_pause(int xc_handle, +int xc_domain_pause(int xc_handle, uint32_t domid); /** * This function unpauses a domain. The domain should have been previously @@ -191,7 +191,7 @@ int xc_domain_pause(int xc_handle, * @parm domid the domain id to unpause * return 0 on success, -1 on failure */ -int xc_domain_unpause(int xc_handle, +int xc_domain_unpause(int xc_handle, uint32_t domid); /** @@ -203,7 +203,7 @@ int xc_domain_unpause(int xc_handle, * @parm domid the domain id to destroy * @return 0 on success, -1 on failure */ -int xc_domain_destroy(int xc_handle, +int xc_domain_destroy(int xc_handle, uint32_t domid); /** @@ -217,7 +217,7 @@ int xc_domain_destroy(int xc_handle, * @parm reason is the reason (SHUTDOWN_xxx) for the shutdown * @return 0 on success, -1 on failure */ -int xc_domain_shutdown(int xc_handle, +int xc_domain_shutdown(int xc_handle, uint32_t domid, int reason); @@ -242,7 +242,7 @@ int xc_vcpu_setaffinity(int xc_handle, * @return the number of domains enumerated or -1 on error */ int xc_domain_getinfo(int xc_handle, - uint32_t first_domid, + uint32_t first_domid, unsigned int max_doms, xc_dominfo_t *info); @@ -307,12 +307,12 @@ long long xc_domain_get_cpu_usage(int xc domid_t domid, int vcpu); -int xc_domain_sethandle(int xc_handle, uint32_t domid, +int xc_domain_sethandle(int xc_handle, uint32_t domid, xen_domain_handle_t handle); typedef dom0_shadow_control_stats_t xc_shadow_control_stats_t; int xc_shadow_control(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int sop, unsigned long *dirty_bitmap, unsigned long pages, @@ -386,7 +386,7 @@ int xc_physdev_pci_access_modify(int xc_ int xc_readconsolering(int xc_handle, char **pbuffer, - unsigned int *pnr_chars, + unsigned int *pnr_chars, int clear); typedef dom0_physinfo_t xc_physinfo_t; @@ -397,18 +397,18 @@ int xc_sched_id(int xc_handle, int *sched_id); int xc_domain_setmaxmem(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int max_memkb); int xc_domain_memory_increase_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, unsigned long *extent_start); int xc_domain_memory_decrease_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned long *extent_start); @@ -443,7 +443,7 @@ int xc_domain_iomem_permission(int xc_ha unsigned long nr_mfns, uint8_t allow_access); -unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, +unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, unsigned long mfn); typedef dom0_perfc_desc_t xc_perfc_desc_t; @@ -492,11 +492,11 @@ unsigned long xc_translate_foreign_addre unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, int vcpu, unsigned long long virt); -int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, +int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, unsigned long max_pfns); int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + unsigned long *pfn_buf, unsigned int start_page, unsigned int nr_pages); int xc_copy_to_domain_page(int xc_handle, uint32_t domid, @@ -551,7 +551,7 @@ int xc_tbuf_set_size(int xc_handle, uint int xc_tbuf_set_size(int xc_handle, uint32_t size); /** - * This function retrieves the current size of the trace buffers. + * This function retrieves the current size of the trace buffers. * Note that the size returned is in terms of bytes, not pages. * @parm xc_handle a handle to an open hypervisor interface @@ -577,7 +577,7 @@ struct xc_mmu { }; typedef struct xc_mmu xc_mmu_t; xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom); -int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, +int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, unsigned long long ptr, unsigned long long val); int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xenguest.h Tue Apr 18 09:35:40 2006 -0600 @@ -1,8 +1,8 @@ /****************************************************************************** * xenguest.h - * + * * A library for guest domain management in Xen. - * + * * Copyright (c) 2003-2004, K A Fraser. */ @@ -21,7 +21,7 @@ * @parm dom the id of the domain * @return 0 on success, -1 on failure */ -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, int (*suspend)(int domid)); @@ -37,8 +37,8 @@ int xc_linux_save(int xc_handle, int io_ * @parm store_mfn returned with the mfn of the store page * @return 0 on success, -1 on failure */ -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long nr_pfns, unsigned int store_evtchn, +int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn); diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xg_private.c Tue Apr 18 09:35:40 2006 -0600 @@ -1,6 +1,6 @@ /****************************************************************************** * xg_private.c - * + * * Helper functions for the rest of the library. */ diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xg_private.h Tue Apr 18 09:35:40 2006 -0600 @@ -11,7 +11,7 @@ #include <sys/stat.h> #include "xenctrl.h" -#include "xenguest.h" +#include "xenguest.h" #include <xen/linux/privcmd.h> #include <xen/memory.h> @@ -62,7 +62,7 @@ unsigned long csum_page (void * page); #define L2_PAGETABLE_ENTRIES_PAE 512 #define L3_PAGETABLE_ENTRIES_PAE 4 -#if defined(__i386__) +#if defined(__i386__) #define L1_PAGETABLE_ENTRIES 1024 #define L2_PAGETABLE_ENTRIES 1024 #elif defined(__x86_64__) @@ -71,7 +71,7 @@ unsigned long csum_page (void * page); #define L3_PAGETABLE_ENTRIES 512 #define L4_PAGETABLE_ENTRIES 512 #endif - + #define PAGE_SHIFT XC_PAGE_SHIFT #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) @@ -167,8 +167,8 @@ typedef struct mfn_mapper { int error; int max_queue_size; void * addr; - privcmd_mmap_t ioctl; - + privcmd_mmap_t ioctl; + } mfn_mapper_t; int xc_copy_to_domain_page(int xc_handle, uint32_t domid, diff -r 4ed269e73e95 -r 41823e46d6ac tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/libxc/xg_save_restore.h Tue Apr 18 09:35:40 2006 -0600 @@ -1,7 +1,7 @@ /* ** xg_save_restore.h -** -** Defintions and utilities for save / restore. +** +** Defintions and utilities for save / restore. */ #include "xc_private.h" @@ -29,8 +29,8 @@ while (0) /* -** We process save/restore/migrate in batches of pages; the below -** determines how many pages we (at maximum) deal with in each batch. +** We process save/restore/migrate in batches of pages; the below +** determines how many pages we (at maximum) deal with in each batch. */ #define MAX_BATCH_SIZE 1024 /* up to 1024 pages (4MB) at a time */ @@ -40,56 +40,56 @@ while (0) /* -** Determine various platform information required for save/restore, in -** particular: +** Determine various platform information required for save/restore, in +** particular: ** -** - the maximum MFN on this machine, used to compute the size of -** the M2P table; -** -** - the starting virtual address of the the hypervisor; we use this -** to determine which parts of guest address space(s) do and don't -** require canonicalization during save/restore; and -** -** - the number of page-table levels for save/ restore. This should -** be a property of the domain, but for the moment we just read it +** - the maximum MFN on this machine, used to compute the size of +** the M2P table; +** +** - the starting virtual address of the the hypervisor; we use this +** to determine which parts of guest address space(s) do and don't +** require canonicalization during save/restore; and +** +** - the number of page-table levels for save/ restore. This should +** be a property of the domain, but for the moment we just read it ** from the hypervisor. ** -** Returns 1 on success, 0 on failure. +** Returns 1 on success, 0 on failure. */ -static int get_platform_info(int xc_handle, uint32_t dom, - /* OUT */ unsigned long *max_mfn, - /* OUT */ unsigned long *hvirt_start, +static int get_platform_info(int xc_handle, uint32_t dom, + /* OUT */ unsigned long *max_mfn, + /* OUT */ unsigned long *hvirt_start, /* OUT */ unsigned int *pt_levels) - -{ + +{ xen_capabilities_info_t xen_caps = ""; xen_platform_parameters_t xen_params; if (xc_version(xc_handle, XENVER_platform_parameters, &xen_params) != 0) return 0; - + if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) return 0; *max_mfn = xc_memory_op(xc_handle, XENMEM_maximum_ram_page, NULL); - + *hvirt_start = xen_params.virt_start; if (strstr(xen_caps, "xen-3.0-x86_64")) *pt_levels = 4; else if (strstr(xen_caps, "xen-3.0-x86_32p")) - *pt_levels = 3; + *pt_levels = 3; else if (strstr(xen_caps, "xen-3.0-x86_32")) - *pt_levels = 2; - else - return 0; - + *pt_levels = 2; + else + return 0; + return 1; -} +} -/* -** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables. +/* +** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables. ** The M2P simply holds the corresponding PFN, while the top bit of a P2M ** entry tell us whether or not the the PFN is currently mapped. */ @@ -98,18 +98,18 @@ static int get_platform_info(int xc_hand #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) -/* -** The M2P is made up of some number of 'chunks' of at least 2MB in size. -** The below definitions and utility function(s) deal with mapping the M2P -** regarldess of the underlying machine memory size or architecture. +/* +** The M2P is made up of some number of 'chunks' of at least 2MB in size. +** The below definitions and utility function(s) deal with mapping the M2P +** regarldess of the underlying machine memory size or architecture. */ -#define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE -#define M2P_CHUNK_SIZE (1 << M2P_SHIFT) -#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) +#define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE +#define M2P_CHUNK_SIZE (1 << M2P_SHIFT) +#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) #define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT) /* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */ -#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) +#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) /* Number of unsigned longs in a page */ #define ulpp (PAGE_SIZE/sizeof(unsigned long)) @@ -127,12 +127,12 @@ static int get_platform_info(int xc_hand #define NR_SLACK_ENTRIES ((8 * 1024 * 1024) / PAGE_SIZE) /* Is the given PFN within the 'slack' region at the top of the P2M? */ -#define IS_REAL_PFN(_pfn) ((max_pfn - (_pfn)) > NR_SLACK_ENTRIES) +#define IS_REAL_PFN(_pfn) ((max_pfn - (_pfn)) > NR_SLACK_ENTRIES) /* Returns TRUE if the PFN is currently mapped */ #define is_mapped(pfn_type) (!((pfn_type) & 0x80000000UL)) -#define INVALID_P2M_ENTRY (~0UL) +#define INVALID_P2M_ENTRY (~0UL) diff -r 4ed269e73e95 -r 41823e46d6ac tools/misc/xen-clone --- a/tools/misc/xen-clone Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/misc/xen-clone Tue Apr 18 09:35:40 2006 -0600 @@ -113,7 +113,7 @@ else # Turn linux into xenolinux then build it cd xenolinux-${LINUX_VER}-sparse - ./mkbuildtree ../../linux-${LINUX_VER} + bash ./mkbuildtree ../../linux-${LINUX_VER} cd ../.. mv linux-${LINUX_VER} xenolinux-${LINUX_VER} cd xenolinux-${LINUX_VER} diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/lowlevel/xs/xs.c Tue Apr 18 09:35:40 2006 -0600 @@ -589,7 +589,7 @@ static PyObject *xspy_transaction_end(Xs static PyObject *xspy_introduce_domain(XsHandle *self, PyObject *args) { - domid_t dom; + uint32_t dom; unsigned long page; unsigned int port; @@ -620,7 +620,7 @@ static PyObject *xspy_introduce_domain(X static PyObject *xspy_release_domain(XsHandle *self, PyObject *args) { - domid_t dom; + uint32_t dom; struct xs_handle *xh = xshandle(self); bool result = 0; @@ -677,7 +677,7 @@ static PyObject *xspy_get_domain_path(Xs static PyObject *xspy_get_domain_path(XsHandle *self, PyObject *args) { struct xs_handle *xh = xshandle(self); - int domid; + uint32_t domid; char *xsval; if (!xh) diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/xend/XendCheckpoint.py Tue Apr 18 09:35:40 2006 -0600 @@ -53,7 +53,7 @@ def read_exact(fd, size, errmsg): -def save(fd, dominfo, live): +def save(fd, dominfo, live, dst): write_exact(fd, SIGNATURE, "could not write guest state file: signature") config = sxp.to_string(dominfo.sxpr()) @@ -65,6 +65,8 @@ def save(fd, dominfo, live): dominfo.setName('migrating-' + domain_name) try: + dominfo.migrateDevices(live, dst, 1, domain_name) + write_exact(fd, pack("!i", len(config)), "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") @@ -85,7 +87,9 @@ def save(fd, dominfo, live): log.debug("Suspending %d ...", dominfo.getDomid()) dominfo.shutdown('suspend') dominfo.waitForShutdown() + dominfo.migrateDevices(live, dst, 2, domain_name) log.info("Domain %d suspended.", dominfo.getDomid()) + dominfo.migrateDevices(live, dst, 3, domain_name) tochild.write("done\n") tochild.flush() log.debug('Written done') diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/xend/XendDomain.py Tue Apr 18 09:35:40 2006 -0600 @@ -405,6 +405,9 @@ class XendDomain: if dominfo.getDomid() == PRIV_DOMAIN: raise XendError("Cannot migrate privileged domain %i" % domid) + """ The following call may raise a XendError exception """ + dominfo.testMigrateDevices(live, dst) + if port == 0: port = xroot.get_xend_relocation_port() try: @@ -414,8 +417,8 @@ class XendDomain: raise XendError("can't connect: %s" % err[1]) sock.send("receive\n") - sock.recv(80) - XendCheckpoint.save(sock.fileno(), dominfo, live) + sock.recv(80) + XendCheckpoint.save(sock.fileno(), dominfo, live, dst) def domain_save(self, domid, dst): @@ -435,7 +438,7 @@ class XendDomain: fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) try: # For now we don't support 'live checkpoint' - return XendCheckpoint.save(fd, dominfo, False) + return XendCheckpoint.save(fd, dominfo, False, dst) finally: os.close(fd) except OSError, ex: diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Apr 18 09:35:40 2006 -0600 @@ -1395,6 +1395,38 @@ class XendDomainInfo: if self.image: self.image.createDeviceModel() + ## public: + + def testMigrateDevices(self, live, dst): + """ Notify all device about intention of migration + @raise: XendError for a device that cannot be migrated + """ + for (n, c) in self.info['device']: + rc = self.migrateDevice(n, c, live, dst, 0) + if rc != 0: + raise XendError("Device of type '%s' refuses migration." % n) + + def migrateDevices(self, live, dst, step, domName=''): + """Notify the devices about migration + """ + ctr = 0 + try: + for (n, c) in self.info['device']: + self.migrateDevice(n, c, live, dst, step, domName) + ctr = ctr + 1 + except: + for (n, c) in self.info['device']: + if ctr == 0: + step = step - 1 + ctr = ctr - 1 + self.recoverMigrateDevice(n, c, live, dst, step, domName) + raise + + def migrateDevice(self, deviceClass, deviceConfig, live, dst, step, domName=''): + return self.getDeviceController(deviceClass).migrate(deviceConfig, live, dst, step, domName) + + def recoverMigrateDevice(self, deviceClass, deviceConfig, live, dst, step, domName=''): + return self.getDeviceController(deviceClass).recover_migrate(deviceConfig, live, dst, step, domName) def waitForDevices(self): """Wait for this domain's configured devices to connect. diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/xend/XendRoot.py Tue Apr 18 09:35:40 2006 -0600 @@ -85,6 +85,9 @@ class XendRoot: """Default for the flag indicating whether xend should run a unix-domain server (deprecated).""" xend_unix_server_default = 'no' + + """Default external migration tool """ + external_migration_tool_default = '' """Default path the unix-domain server listens at.""" xend_unix_path_default = '/var/lib/xend/xend-socket' @@ -250,6 +253,9 @@ class XendRoot: else: return None + def get_external_migration_tool(self): + """@return the name of the tool to handle virtual TPM migration.""" + return self.get_config_value('external-migration-tool', self.external_migration_tool_default) def get_enable_dump(self): return self.get_config_bool('enable-dump', 'no') diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/xend/server/DevController.py Tue Apr 18 09:35:40 2006 -0600 @@ -267,6 +267,41 @@ class DevController: raise NotImplementedError() + def migrate(self, deviceConfig, live, dst, step, domName): + """ Migration of a device. The 'live' parameter indicates + whether the device is live-migrated (live=1). 'dst' then gives + the hostname of the machine to migrate to. + This function is called for 4 steps: + If step == 0: Check whether the device is ready to be migrated + or can at all be migrated; return a '-1' if + the device is NOT ready, a '0' otherwise. If it is + not ready ( = not possible to migrate this device), + migration will not take place. + step == 1: Called immediately after step 0; migration + of the kernel has started; + step == 2: Called after the suspend has been issued + to the domain and the domain is not scheduled anymore. + Synchronize with what was started in step 1, if necessary. + Now the device should initiate its transfer to the + given target. Since there might be more than just + one device initiating a migration, this step should + put the process performing the transfer into the + background and return immediately to achieve as much + concurrency as possible. + step == 3: Synchronize with the migration of the device that + was initiated in step 2. + Make sure that the migration has finished and only + then return from the call. + """ + return 0 + + + def recover_migrate(self, deviceConfig, list, dst, step, domName): + """ Recover from device migration. The given step was the + last one that was successfully executed. + """ + return 0 + def getDomid(self): """Stub to {@link XendDomainInfo.getDomid}, for use by our diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/xend/server/tpmif.py --- a/tools/python/xen/xend/server/tpmif.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/xend/server/tpmif.py Tue Apr 18 09:35:40 2006 -0600 @@ -23,8 +23,16 @@ from xen.xend import sxp from xen.xend.XendLogging import log +from xen.xend.XendError import XendError +from xen.xend import XendRoot from xen.xend.server.DevController import DevController + +import os +import re + + +xroot = XendRoot.instance() class TPMifController(DevController): @@ -61,3 +69,43 @@ class TPMifController(DevController): result.append(['instance', instance]) return result + + def migrate(self, deviceConfig, live, dst, step, domName): + """@see DevContoller.migrate""" + if live: + tool = xroot.get_external_migration_tool() + if tool != '': + log.info("Request to live-migrate device to %s. step=%d.", + dst, step) + + if step == 0: + """Assuming for now that everything is ok and migration + with the given tool can proceed. + """ + return 0 + else: + fd = os.popen("%s -type vtpm -step %d -host %s -domname %s" % + (tool, step, dst, domName), + 'r') + for line in fd.readlines(): + mo = re.search('Error', line) + if mo: + raise XendError("vtpm: Fatal error in migration step %d." % + step) + return 0 + else: + log.debug("External migration tool not in configuration.") + return -1 + return 0 + + def recover_migrate(self, deviceConfig, live, dst, step, domName): + """@see DevContoller.recover_migrate""" + if live: + tool = xroot.get_external_migration_tool() + if tool != '': + log.info("Request to recover live-migrated device. last good step=%d.", + step) + fd = os.popen("%s -type vtpm -step %d -host %s -domname %s -recover" % + (tool, step, dst, domName), + 'r') + return 0 diff -r 4ed269e73e95 -r 41823e46d6ac tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/python/xen/xm/create.py Tue Apr 18 09:35:40 2006 -0600 @@ -158,7 +158,7 @@ gopts.var('cpu', val='CPU', use="CPU to run the VCPU0 on.") gopts.var('cpus', val='CPUS', - fn=set_int, default=None, + fn=set_value, default=None, use="CPUS to run the domain on.") gopts.var('pae', val='PAE', diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenmon/README --- a/tools/xenmon/README Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenmon/README Tue Apr 18 09:35:40 2006 -0600 @@ -84,6 +84,16 @@ Usage Notes and issues events cause a trace record to be emitted. - To exit xenmon, type 'q' - To cycle the display to other physical cpu's, type 'c' + - The first time xenmon is run, it attempts to allocate xen trace buffers + using a default size. If you wish to use a non-default value for the + trace buffer size, run the 'setsize' program (located in tools/xentrace) + and specify the number of memory pages as a parameter. The default is 20. + - Not well tested with domains using more than 1 virtual cpu + - If you create a lot of domains, or repeatedly kill a domain and restart it, + and the domain id's get to be bigger than NDOMAINS, then xenmon behaves badly. + This is a bug that is due to xenbaked's treatment of domain id's vs. domain + indices in a data array. Will be fixed in a future release; Workaround: + Increase NDOMAINS in xenbaked and rebuild. Future Work ----------- diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenmon/xenbaked.c --- a/tools/xenmon/xenbaked.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenmon/xenbaked.c Tue Apr 18 09:35:40 2006 -0600 @@ -7,6 +7,7 @@ * * Copyright (C) 2004 by Intel Research Cambridge * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins + * Copyright (C) 2006 by Hewlett Packard Fort Collins * * Authors: Diwaker Gupta, diwaker.gupta@xxxxxx * Rob Gardner, rob.gardner@xxxxxx @@ -42,6 +43,8 @@ #include <xenctrl.h> #include <xen/xen.h> #include <string.h> +#include <sys/select.h> +#include <xen/linux/evtchn.h> #include "xc_private.h" typedef struct { int counter; } atomic_t; @@ -81,14 +84,13 @@ settings_t opts; int interrupted = 0; /* gets set if we get a SIGHUP */ int rec_count = 0; +int wakeups = 0; time_t start_time; int dom0_flips = 0; _new_qos_data *new_qos; _new_qos_data **cpu_qos_data; - -#define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X) // array of currently running domains, indexed by cpu int *running = NULL; @@ -223,6 +225,9 @@ void dump_stats(void) printf("processed %d total records in %d seconds (%ld per second)\n", rec_count, (int)run_time, rec_count/run_time); + printf("woke up %d times in %d seconds (%ld per second)\n", wakeups, + (int) run_time, wakeups/run_time); + check_gotten_sum(); } @@ -243,6 +248,112 @@ void log_event(int event_id) stat_map[0].event_count++; // other } +#define EVTCHN_DEV_NAME "/dev/xen/evtchn" +#define EVTCHN_DEV_MAJOR 10 +#define EVTCHN_DEV_MINOR 201 + +int virq_port; +int eventchn_fd = -1; + +/* Returns the event channel handle. */ +/* Stolen from xenstore code */ +int eventchn_init(void) +{ + struct stat st; + struct ioctl_evtchn_bind_virq bind; + int rc; + + // to revert to old way: + if (0) + return -1; + + /* Make sure any existing device file links to correct device. */ + if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) || + (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR))) + (void)unlink(EVTCHN_DEV_NAME); + + reopen: + eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR); + if (eventchn_fd == -1) { + if ((errno == ENOENT) && + ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) && + (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600, + makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0)) + goto reopen; + return -errno; + } + + if (eventchn_fd < 0) + perror("Failed to open evtchn device"); + + bind.virq = VIRQ_TBUF; + rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind); + if (rc == -1) + perror("Failed to bind to domain exception virq port"); + virq_port = rc; + + return eventchn_fd; +} + +void wait_for_event(void) +{ + int ret; + fd_set inset; + evtchn_port_t port; + struct timeval tv; + + if (eventchn_fd < 0) { + nanosleep(&opts.poll_sleep, NULL); + return; + } + + FD_ZERO(&inset); + FD_SET(eventchn_fd, &inset); + tv.tv_sec = 1; + tv.tv_usec = 0; + // tv = millis_to_timespec(&opts.poll_sleep); + ret = select(eventchn_fd+1, &inset, NULL, NULL, &tv); + + if ( (ret == 1) && FD_ISSET(eventchn_fd, &inset)) { + if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port)) + perror("Failed to read from event fd"); + + // if (port == virq_port) + // printf("got the event I was looking for\r\n"); + + if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port)) + perror("Failed to write to event fd"); + } +} + +void enable_tracing_or_die(int xc_handle) +{ + int enable = 1; + int tbsize = DEFAULT_TBUF_SIZE; + + if (xc_tbuf_enable(xc_handle, enable) != 0) { + if (xc_tbuf_set_size(xc_handle, tbsize) != 0) { + perror("set_size Hypercall failure"); + exit(1); + } + printf("Set default trace buffer allocation (%d pages)\n", tbsize); + if (xc_tbuf_enable(xc_handle, enable) != 0) { + perror("Could not enable trace buffers\n"); + exit(1); + } + } + else + printf("Tracing enabled\n"); +} + +void disable_tracing(void) +{ + int enable = 0; + int xc_handle = xc_interface_open(); + + xc_tbuf_enable(xc_handle, enable); + xc_interface_close(xc_handle); +} /** @@ -258,6 +369,17 @@ void get_tbufs(unsigned long *mfn, unsig int ret; dom0_op_t op; /* dom0 op we'll build */ int xc_handle = xc_interface_open(); /* for accessing control interface */ + unsigned int tbsize; + + enable_tracing_or_die(xc_handle); + + if (xc_tbuf_get_size(xc_handle, &tbsize) != 0) { + perror("Failure to get tbuf info from Xen. Guess size is 0?"); + exit(1); + } + else + printf("Current tbuf size: 0x%x\n", tbsize); + op.cmd = DOM0_TBUFCONTROL; op.interface_version = DOM0_INTERFACE_VERSION; @@ -448,6 +570,11 @@ int monitor_tbufs(void) meta = init_bufs_ptrs (tbufs_mapped, num, size); data = init_rec_ptrs(meta, num); + // Set up event channel for select() + if (eventchn_init() < 0) { + fprintf(stderr, "Failed to initialize event channel; Using POLL method\r\n"); + } + /* now, scan buffers for events */ while ( !interrupted ) { @@ -460,7 +587,8 @@ int monitor_tbufs(void) meta[i]->cons++; } - nanosleep(&opts.poll_sleep, NULL); + wait_for_event(); + wakeups++; } /* cleanup */ @@ -640,6 +768,7 @@ int main(int argc, char **argv) dump_stats(); msync(new_qos, sizeof(_new_qos_data), MS_SYNC); + disable_tracing(); return ret; } @@ -737,7 +866,9 @@ void qos_update_thread(int cpu, int domi start = new_qos->domain_info[id].start_time; if (start > now) { // wrapped around run_time = now + (~0ULL - start); - printf("warning: start > now\n"); + // this could happen if there is nothing going on within a cpu; + // in this case the idle domain would run forever + // printf("warning: start > now\n"); } else run_time = now - start; @@ -746,11 +877,11 @@ void qos_update_thread(int cpu, int domi new_qos->domain_info[id].ns_oncpu_since_boot += run_time; new_qos->domain_info[id].start_time = now; new_qos->domain_info[id].ns_since_boot += time_since_update; -#if 1 + new_qos->qdata[n].ns_gotten[id] += run_time; - if (domid == 0 && cpu == 1) - printf("adding run time for dom0 on cpu1\r\n"); -#endif + // if (domid == 0 && cpu == 1) + // printf("adding run time for dom0 on cpu1\r\n"); + } new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid); @@ -916,13 +1047,13 @@ void qos_state_runnable(int cpu, int dom { int id = ID(domid); + qos_update_thread_stats(cpu, domid, now); + if (domain_runnable(id)) // double call? return; new_qos->domain_info[id].runnable = 1; update_blocked_time(domid, now); - qos_update_thread_stats(cpu, domid, now); - new_qos->domain_info[id].blocked_start_time = 0; /* invalidate */ new_qos->domain_info[id].runnable_start_time = now; // runnable_start_time[id] = now; @@ -951,7 +1082,7 @@ int domain_ok(int cpu, int domid, uint64 if (domid == IDLE_DOMAIN_ID) domid = NDOMAINS-1; if (domid < 0 || domid >= NDOMAINS) { - printf("bad domain id: %d\n", domid); + printf("bad domain id: %d\r\n", domid); return 0; } if (new_qos->domain_info[domid].in_use == 0) diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenmon/xenbaked.h --- a/tools/xenmon/xenbaked.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenmon/xenbaked.h Tue Apr 18 09:35:40 2006 -0600 @@ -1,5 +1,5 @@ /****************************************************************************** - * tools/xenbaked.h + * TOOLS/xenbaked.h * * Header file for xenbaked * @@ -30,6 +30,7 @@ #define million 1000000LL #define billion 1000000000LL +// caution: don't use QOS_ADD with negative numbers! #define QOS_ADD(N,A) ((N+A)<(NSAMPLES-1) ? (N+A) : A) #define QOS_INCR(N) ((N<(NSAMPLES-2)) ? (N+1) : 0) #define QOS_DECR(N) ((N==0) ? (NSAMPLES-1) : (N-1)) @@ -43,6 +44,8 @@ /* Number of data points to keep */ #define NSAMPLES 100 +#define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X) +#define DEFAULT_TBUF_SIZE 20 // per domain stuff typedef struct diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenmon/xenmon.py --- a/tools/xenmon/xenmon.py Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenmon/xenmon.py Tue Apr 18 09:35:40 2006 -0600 @@ -5,7 +5,7 @@ # There is a curses interface for live monitoring. XenMon also allows # logging to a file. For options, run python xenmon.py -h # -# Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins +# Copyright (C) 2005,2006 by Hewlett Packard, Palo Alto and Fort Collins # Authors: Lucy Cherkasova, lucy.cherkasova@xxxxxx # Rob Gardner, rob.gardner@xxxxxx # Diwaker Gupta, diwaker.gupta@xxxxxx @@ -85,6 +85,33 @@ def setup_cmdline_parser(): parser.add_option("--ms_per_sample", dest="mspersample", action="store", type="int", default=100, help = "determines how many ms worth of data goes in a sample") + parser.add_option("--cpu", dest="cpu", action="store", type="int", default=0, + help = "specifies which cpu to display data for") + + parser.add_option("--allocated", dest="allocated", action="store_true", + default=False, help="Display allocated time for each domain") + parser.add_option("--noallocated", dest="allocated", action="store_false", + default=False, help="Don't display allocated time for each domain") + + parser.add_option("--blocked", dest="blocked", action="store_true", + default=True, help="Display blocked time for each domain") + parser.add_option("--noblocked", dest="blocked", action="store_false", + default=True, help="Don't display blocked time for each domain") + + parser.add_option("--waited", dest="waited", action="store_true", + default=True, help="Display waiting time for each domain") + parser.add_option("--nowaited", dest="waited", action="store_false", + default=True, help="Don't display waiting time for each domain") + + parser.add_option("--excount", dest="excount", action="store_true", + default=False, help="Display execution count for each domain") + parser.add_option("--noexcount", dest="excount", action="store_false", + default=False, help="Don't display execution count for each domain") + parser.add_option("--iocount", dest="iocount", action="store_true", + default=False, help="Display I/O count for each domain") + parser.add_option("--noiocount", dest="iocount", action="store_false", + default=False, help="Don't display I/O count for each domain") + return parser # encapsulate information about a domain @@ -227,19 +254,17 @@ def display(scr, row, col, str, attr=0): # the live monitoring code -def show_livestats(): - cpu = 0 # cpu of interest to display data for +def show_livestats(cpu): ncpu = 1 # number of cpu's on this platform slen = 0 # size of shared data structure, incuding padding - global dom_in_use + cpu_1sec_usage = 0.0 + cpu_10sec_usage = 0.0 + heartbeat = 1 + global dom_in_use, options # mmap the (the first chunk of the) file shmf = open(SHM_FILE, "r+") shm = mmap.mmap(shmf.fileno(), QOS_DATA_SIZE) - - samples = [] - doms = [] - dom_in_use = [] # initialize curses stdscr = _c.initscr() @@ -253,7 +278,8 @@ def show_livestats(): # display in a loop while True: - for cpuidx in range(0, ncpu): + cpuidx = 0 + while cpuidx < ncpu: # calculate offset in mmap file to start from idx = cpuidx * slen @@ -261,6 +287,7 @@ def show_livestats(): samples = [] doms = [] + dom_in_use = [] # read in data for i in range(0, NSAMPLES): @@ -279,6 +306,8 @@ def show_livestats(): # dom_in_use.append(in_use) dom_in_use.append(dom[8]) idx += len +# print "dom_in_use(cpu=%d): " % cpuidx, dom_in_use + len = struct.calcsize("4i") oldncpu = ncpu @@ -294,6 +323,8 @@ def show_livestats(): # stop examining mmap data and start displaying stuff if cpuidx == cpu: break + + cpuidx = cpuidx + 1 # calculate starting and ending datapoints; never look at "next" since # it represents live data that may be in transition. @@ -312,12 +343,15 @@ def show_livestats(): row = 0 display(stdscr, row, 1, "CPU = %d" % cpu, _c.A_STANDOUT) - display(stdscr, row, 10, "%sLast 10 seconds%sLast 1 second" % (6*' ', 30*' '), _c.A_BOLD) + display(stdscr, row, 10, "%sLast 10 seconds (%3.2f%%)%sLast 1 second (%3.2f%%)" % (6*' ', cpu_10sec_usage, 30*' ', cpu_1sec_usage), _c.A_BOLD) row +=1 display(stdscr, row, 1, "%s" % ((maxx-2)*'=')) total_h1_cpu = 0 total_h2_cpu = 0 + + cpu_1sec_usage = 0.0 + cpu_10sec_usage = 0.0 for dom in range(0, NDOMAINS): if not dom_in_use[dom]: @@ -332,92 +366,102 @@ def show_livestats(): display(stdscr, row, col, "%s" % time_scale(h2[dom][0][0])) col += 12 display(stdscr, row, col, "%3.2f%%" % h2[dom][0][1]) + if dom != NDOMAINS - 1: + cpu_10sec_usage += h2[dom][0][1] col += 12 display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][0][2])) col += 18 display(stdscr, row, col, "%s" % time_scale(h1[dom][0][0])) col += 12 - display(stdscr, row, col, "%3.2f%%" % h1[dom][0][1]) + display(stdscr, row, col, "%3.2f%%" % h1[dom][0][1], _c.A_STANDOUT) col += 12 display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][0][2])) col += 18 display(stdscr, row, col, "Gotten") + + if dom != NDOMAINS - 1: + cpu_1sec_usage = cpu_1sec_usage + h1[dom][0][1] # display allocated - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 28 - display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1])) - col += 42 - display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][1])) - col += 18 - display(stdscr, row, col, "Allocated") + if options.allocated: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 28 + display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1])) + col += 42 + display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][1])) + col += 18 + display(stdscr, row, col, "Allocated") # display blocked - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 4 - display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h2[dom][2][1]) - col += 12 - display(stdscr, row, col, "%s/io" % time_scale(h2[dom][2][2])) - col += 18 - display(stdscr, row, col, "%s" % time_scale(h1[dom][2][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h1[dom][2][1]) - col += 12 - display(stdscr, row, col, "%s/io" % time_scale(h1[dom][2][2])) - col += 18 - display(stdscr, row, col, "Blocked") + if options.blocked: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h2[dom][2][1]) + col += 12 + display(stdscr, row, col, "%s/io" % time_scale(h2[dom][2][2])) + col += 18 + display(stdscr, row, col, "%s" % time_scale(h1[dom][2][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h1[dom][2][1]) + col += 12 + display(stdscr, row, col, "%s/io" % time_scale(h1[dom][2][2])) + col += 18 + display(stdscr, row, col, "Blocked") # display waited - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 4 - display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h2[dom][3][1]) - col += 12 - display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][3][2])) - col += 18 - display(stdscr, row, col, "%s" % time_scale(h1[dom][3][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h1[dom][3][1]) - col += 12 - display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][3][2])) - col += 18 - display(stdscr, row, col, "Waited") + if options.waited: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h2[dom][3][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][3][2])) + col += 18 + display(stdscr, row, col, "%s" % time_scale(h1[dom][3][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h1[dom][3][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][3][2])) + col += 18 + display(stdscr, row, col, "Waited") # display ex count - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - - col += 28 - display(stdscr, row, col, "%d/s" % h2[dom][4]) - col += 42 - display(stdscr, row, col, "%d" % h1[dom][4]) - col += 18 - display(stdscr, row, col, "Execution count") + if options.excount: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + + col += 28 + display(stdscr, row, col, "%d/s" % h2[dom][4]) + col += 42 + display(stdscr, row, col, "%d" % h1[dom][4]) + col += 18 + display(stdscr, row, col, "Execution count") # display io count - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 4 - display(stdscr, row, col, "%d/s" % h2[dom][5][0]) - col += 24 - display(stdscr, row, col, "%d/ex" % h2[dom][5][1]) - col += 18 - display(stdscr, row, col, "%d" % h1[dom][5][0]) - col += 24 - display(stdscr, row, col, "%3.2f/ex" % h1[dom][5][1]) - col += 18 - display(stdscr, row, col, "I/O Count") + if options.iocount: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%d/s" % h2[dom][5][0]) + col += 24 + display(stdscr, row, col, "%d/ex" % h2[dom][5][1]) + col += 18 + display(stdscr, row, col, "%d" % h1[dom][5][0]) + col += 24 + display(stdscr, row, col, "%3.2f/ex" % h1[dom][5][1]) + col += 18 + display(stdscr, row, col, "I/O Count") #row += 1 #stdscr.hline(row, 1, '-', maxx - 2) @@ -426,6 +470,9 @@ def show_livestats(): row += 1 + star = heartbeat * '*' + heartbeat = 1 - heartbeat + display(stdscr, row, 1, star) display(stdscr, row, 2, TOTALS % (total_h2_cpu, total_h1_cpu)) row += 1 # display(stdscr, row, 2, @@ -515,10 +562,10 @@ def writelog(): outfiles[dom].delayed_write("# passed cpu dom cpu(tot) cpu(%) cpu/ex allocated/ex blocked(tot) blocked(%) blocked/io waited(tot) waited(%) waited/ex ex/s io(tot) io/ex\n") while options.duration == 0 or interval < (options.duration * 1000): - for cpuidx in range(0, ncpu): + cpuidx = 0 + while cpuidx < ncpu: idx = cpuidx * slen # offset needed in mmap file - samples = [] doms = [] @@ -571,6 +618,7 @@ def writelog(): curr = time.time() interval += (curr - last) * 1000 last = curr + cpuidx = cpuidx + 1 time.sleep(options.interval / 1000.0) for dom in range(0, NDOMAINS): @@ -601,7 +649,7 @@ def main(): start_xenbaked() if options.live: - show_livestats() + show_livestats(options.cpu) else: try: writelog() diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenstore/xenstored_core.c Tue Apr 18 09:35:40 2006 -0600 @@ -77,6 +77,10 @@ static void check_store(void); } while (0) +int quota_nb_entry_per_domain = 1000; +int quota_nb_watch_per_domain = 128; +int quota_max_entry_size = 2048; /* 2K */ + #ifdef TESTING static bool failtest = false; @@ -455,6 +459,10 @@ static bool write_node(struct connection data.dsize = 3*sizeof(uint32_t) + node->num_perms*sizeof(node->perms[0]) + node->datalen + node->childlen; + + if (data.dsize >= quota_max_entry_size) + goto error; + data.dptr = talloc_size(node, data.dsize); ((uint32_t *)data.dptr)[0] = node->num_perms; ((uint32_t *)data.dptr)[1] = node->datalen; @@ -470,10 +478,12 @@ static bool write_node(struct connection /* TDB should set errno, but doesn't even set ecode AFAICT. */ if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) { corrupt(conn, "Write of %s = %s failed", key, data); - errno = ENOSPC; - return false; + goto error; } return true; + error: + errno = ENOSPC; + return false; } static enum xs_perm_type perm_for_conn(struct connection *conn, @@ -765,8 +775,11 @@ static void delete_node_single(struct co key.dptr = (void *)node->name; key.dsize = strlen(node->name); - if (tdb_delete(tdb_context(conn), key) != 0) + if (tdb_delete(tdb_context(conn), key) != 0) { corrupt(conn, "Could not delete '%s'", node->name); + return; + } + domain_entry_dec(conn); } /* Must not be / */ @@ -788,7 +801,10 @@ static struct node *construct_node(struc parent = construct_node(conn, parentname); if (!parent) return NULL; - + + if (domain_entry(conn) >= quota_nb_entry_per_domain) + return NULL; + /* Add child to parent. */ base = basename(name); baselen = strlen(base) + 1; @@ -814,6 +830,7 @@ static struct node *construct_node(struc node->children = node->data = NULL; node->childlen = node->datalen = 0; node->parent = parent; + domain_entry_inc(conn); return node; } @@ -848,8 +865,10 @@ static struct node *create_node(struct c /* We write out the nodes down, setting destructor in case * something goes wrong. */ for (i = node; i; i = i->parent) { - if (!write_node(conn, i)) + if (!write_node(conn, i)) { + domain_entry_dec(conn); return NULL; + } talloc_set_destructor(i, destroy_node); } @@ -1706,6 +1725,9 @@ static void usage(void) " --no-fork to request that the daemon does not fork,\n" " --output-pid to request that the pid of the daemon is output,\n" " --trace-file <file> giving the file for logging, and\n" +" --entry-nb <nb> limit the number of entries per domain,\n" +" --entry-size <size> limit the size of entry per domain, and\n" +" --entry-watch <nb> limit the number of watches per domain,\n" " --no-recovery to request that no recovery should be attempted when\n" " the store is corrupted (debug only),\n" " --preserve-local to request that /local is preserved on start-up,\n" @@ -1715,14 +1737,17 @@ static void usage(void) static struct option options[] = { { "no-domain-init", 0, NULL, 'D' }, + { "entry-nb", 1, NULL, 'E' }, { "pid-file", 1, NULL, 'F' }, { "help", 0, NULL, 'H' }, { "no-fork", 0, NULL, 'N' }, { "output-pid", 0, NULL, 'P' }, + { "entry-size", 1, NULL, 'S' }, { "trace-file", 1, NULL, 'T' }, { "no-recovery", 0, NULL, 'R' }, { "preserve-local", 0, NULL, 'L' }, { "verbose", 0, NULL, 'V' }, + { "watch-nb", 1, NULL, 'W' }, { NULL, 0, NULL, 0 } }; extern void dump_conn(struct connection *conn); @@ -1737,11 +1762,14 @@ int main(int argc, char *argv[]) bool no_domain_init = false; const char *pidfile = NULL; - while ((opt = getopt_long(argc, argv, "DF:HNPT:RLV", options, + while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options, NULL)) != -1) { switch (opt) { case 'D': no_domain_init = true; + break; + case 'E': + quota_nb_entry_per_domain = strtol(optarg, NULL, 10); break; case 'F': pidfile = optarg; @@ -1761,11 +1789,17 @@ int main(int argc, char *argv[]) case 'L': remove_local = false; break; + case 'S': + quota_max_entry_size = strtol(optarg, NULL, 10); + break; case 'T': tracefile = optarg; break; case 'V': verbose = true; + break; + case 'W': + quota_nb_watch_per_domain = strtol(optarg, NULL, 10); break; } } diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenstore/xenstored_domain.c Tue Apr 18 09:35:40 2006 -0600 @@ -74,6 +74,12 @@ struct domain /* Have we noticed that this domain is shutdown? */ int shutdown; + + /* number of entry from this domain in the store */ + int nbentry; + + /* number of watch for this domain */ + int nbwatch; }; static LIST_HEAD(domains); @@ -285,6 +291,8 @@ static struct domain *new_domain(void *c domain->conn->id = domid; domain->remote_port = port; + domain->nbentry = 0; + domain->nbwatch = 0; return domain; } @@ -562,6 +570,50 @@ int domain_init(void) return eventchn_fd; } +void domain_entry_inc(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + conn->domain->nbentry++; +} + +void domain_entry_dec(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + if (conn->domain->nbentry) + conn->domain->nbentry--; +} + +int domain_entry(struct connection *conn) +{ + return (conn && conn->domain && conn->domain->domid) + ? conn->domain->nbentry + : 0; +} + +void domain_watch_inc(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + conn->domain->nbwatch++; +} + +void domain_watch_dec(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + if (conn->domain->nbwatch) + conn->domain->nbwatch--; +} + +int domain_watch(struct connection *conn) +{ + return (conn && conn->domain && conn->domain->domid) + ? conn->domain->nbwatch + : 0; +} + /* * Local variables: * c-file-style: "linux" diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenstore/xenstored_domain.h --- a/tools/xenstore/xenstored_domain.h Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenstore/xenstored_domain.h Tue Apr 18 09:35:40 2006 -0600 @@ -47,4 +47,12 @@ bool domain_can_read(struct connection * bool domain_can_read(struct connection *conn); bool domain_can_write(struct connection *conn); +/* Quota manipulation */ +void domain_entry_inc(struct connection *conn); +void domain_entry_dec(struct connection *conn); +int domain_entry(struct connection *conn); +void domain_watch_inc(struct connection *conn); +void domain_watch_dec(struct connection *conn); +int domain_watch(struct connection *conn); + #endif /* _XENSTORED_DOMAIN_H */ diff -r 4ed269e73e95 -r 41823e46d6ac tools/xenstore/xenstored_watch.c --- a/tools/xenstore/xenstored_watch.c Mon Apr 17 08:47:36 2006 -0600 +++ b/tools/xenstore/xenstored_watch.c Tue Apr 18 09:35:40 2006 -0600 @@ -32,6 +32,8 @@ #include "xenstored_test.h" #include "xenstored_domain.h" +extern int quota_nb_watch_per_domain; + struct watch { /* Watches on this connection */ @@ -135,6 +137,11 @@ void do_watch(struct connection *conn, s } } + if (domain_watch(conn) > quota_nb_watch_per_domain) { + send_error(conn, E2BIG); + return; + } + watch = talloc(conn, struct watch); watch->node = talloc_strdup(watch, vec[0]); watch->token = talloc_strdup(watch, vec[1]); @@ -145,6 +152,7 @@ void do_watch(struct connection *conn, s INIT_LIST_HEAD(&watch->events); + domain_watch_inc(conn); list_add_tail(&watch->list, &conn->watches); trace_create(watch, "watch"); talloc_set_destructor(watch, destroy_watch); @@ -169,6 +177,7 @@ void do_unwatch(struct connection *conn, if (streq(watch->node, node) && streq(watch->token, vec[1])) { list_del(&watch->list); talloc_free(watch); + domain_watch_dec(conn); send_ack(conn, XS_UNWATCH); return; } diff -r 4ed269e73e95 -r 41823e46d6ac xen/Makefile --- a/xen/Makefile Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/Makefile Tue Apr 18 09:35:40 2006 -0600 @@ -10,19 +10,22 @@ export BASEDIR := $(CURDIR) .PHONY: default default: build -ifeq ($(XEN_ROOT),) +.PHONY: dist +dist: install -.PHONY: build install clean -build install clean: - make -f Rules.mk $@ +.PHONY: debug +debug: + objdump -D -S $(TARGET)-syms > $(TARGET).s -else +.PHONY: build install clean cscope TAGS tags +build install clean cscope TAGS tags:: + make -f Rules.mk _$@ -.PHONY: build -build: $(TARGET).gz +.PHONY: _build +_build: $(TARGET).gz -.PHONY: install -install: $(TARGET).gz +.PHONY: _install +_install: $(TARGET).gz [ -d $(DESTDIR)/boot ] || $(INSTALL_DIR) $(DESTDIR)/boot $(INSTALL_DATA) $(TARGET).gz $(DESTDIR)/boot/$(notdir $(TARGET))-$(XEN_FULLVERSION).gz ln -f -s $(notdir $(TARGET))-$(XEN_FULLVERSION).gz $(DESTDIR)/boot/$(notdir $(TARGET))-$(XEN_VERSION).$(XEN_SUBVERSION).gz @@ -35,8 +38,8 @@ install: $(TARGET).gz $(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io $(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen -.PHONY: clean -clean:: delete-unfresh-files +.PHONY: _clean +_clean: delete-unfresh-files $(MAKE) -C tools clean $(MAKE) -f $(BASEDIR)/Rules.mk -C common clean $(MAKE) -f $(BASEDIR)/Rules.mk -C drivers clean @@ -45,15 +48,6 @@ clean:: delete-unfresh-files rm -f include/asm *.o $(TARGET)* *~ core rm -f include/asm-*/asm-offsets.h rm -f include/xen/acm_policy.h - -endif - -.PHONY: dist -dist: install - -.PHONY: debug -debug: - objdump -D -S $(TARGET)-syms > $(TARGET).s $(TARGET).gz: $(TARGET) gzip -f -9 < $< > $@.new @@ -135,16 +129,16 @@ define all_sources find $(SUBDIRS) -name SCCS -prune -o -name '*.[chS]' -print ) endef -.PHONY: TAGS -TAGS: +.PHONY: _TAGS +_TAGS: $(all_sources) | etags - -.PHONY: tags -tags: +.PHONY: _tags +_tags: $(all_sources) | xargs ctags -.PHONY: cscope -cscope: +.PHONY: _cscope +_cscope: $(all_sources) > cscope.files cscope -k -b -q diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/Makefile Tue Apr 18 09:35:40 2006 -0600 @@ -76,6 +76,7 @@ boot/mkelf32: boot/mkelf32.c $(HOSTCC) $(HOSTCFLAGS) -o $@ $< shadow_guest32.o: shadow.c +shadow_guest32pae.o: shadow.c .PHONY: clean clean:: diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/audit.c --- a/xen/arch/x86/audit.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/audit.c Tue Apr 18 09:35:40 2006 -0600 @@ -639,7 +639,7 @@ void _audit_domain(struct domain *d, int void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn) { int i; - active_grant_entry_t *act = d->grant_table->active; + struct active_grant_entry *act = d->grant_table->active; spin_lock(&d->grant_table->lock); diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/hvm/intercept.c Tue Apr 18 09:35:40 2006 -0600 @@ -208,8 +208,9 @@ int register_io_handler(unsigned long ad static void pit_cal_count(struct hvm_virpit *vpit) { - u64 nsec_delta = (unsigned int)((NOW() - vpit->inject_point)); - + u64 nsec_delta = (unsigned int)((NOW() - vpit->count_point)); + + nsec_delta += vpit->count_advance; if (nsec_delta > vpit->period) HVM_DBG_LOG(DBG_LEVEL_1, "HVM_PIT: long time has passed from last injection!"); diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/hvm/svm/intr.c Tue Apr 18 09:35:40 2006 -0600 @@ -79,7 +79,8 @@ interrupt_post_injection(struct vcpu * v } else { vpit->pending_intr_nr--; } - vpit->inject_point = NOW(); + vpit->count_advance = 0; + vpit->count_point = NOW(); vpit->last_pit_gtime += vpit->period_cycles; svm_set_guest_time(v, vpit->last_pit_gtime); diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Apr 18 09:35:40 2006 -0600 @@ -315,19 +315,29 @@ static inline int long_mode_do_msr_write { case MSR_EFER: #ifdef __x86_64__ - if ((msr_content & EFER_LME) ^ test_bit(SVM_CPU_STATE_LME_ENABLED, - &vc->arch.hvm_svm.cpu_state)) + /* offending reserved bit will cause #GP */ + if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) ) { - if (test_bit(SVM_CPU_STATE_PG_ENABLED, &vc->arch.hvm_svm.cpu_state) - || !test_bit(SVM_CPU_STATE_PAE_ENABLED, - &vc->arch.hvm_svm.cpu_state)) + printk("trying to set reserved bit in EFER\n"); + svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0); + return 0; + } + + /* LME: 0 -> 1 */ + if ( msr_content & EFER_LME && + !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) ) + { + if ( svm_paging_enabled(vc) || + !test_bit(SVM_CPU_STATE_PAE_ENABLED, + &vc->arch.hvm_svm.cpu_state) ) { + printk("trying to set LME bit when " + "in paging mode or PAE bit is not set\n"); svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0); + return 0; } - } - - if (msr_content & EFER_LME) set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state); + } /* We have already recorded that we want LME, so it will be set * next time CR0 gets updated. So we clear that bit and continue. @@ -669,6 +679,7 @@ static void svm_freeze_time(struct vcpu if ( vpit->first_injected && !v->domain->arch.hvm_domain.guest_time ) { v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v); + vpit->count_advance += (NOW() - vpit->count_point); stop_timer(&(vpit->pit_timer)); } } @@ -757,7 +768,8 @@ void arch_svm_do_resume(struct vcpu *v) reset_stack_and_jump( svm_asm_do_resume ); } else { - printk("VCPU core pinned: %d to %d\n", v->arch.hvm_svm.launch_core, smp_processor_id() ); + printk("VCPU core pinned: %d to %d\n", + v->arch.hvm_svm.launch_core, smp_processor_id() ); v->arch.hvm_svm.launch_core = smp_processor_id(); svm_migrate_timers( v ); svm_do_resume( v ); @@ -922,6 +934,7 @@ static void svm_vmexit_do_cpuid(struct v clear_bit(X86_FEATURE_APIC, &edx); #if CONFIG_PAGING_LEVELS < 3 + clear_bit(X86_FEATURE_NX, &edx); clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); @@ -929,12 +942,14 @@ static void svm_vmexit_do_cpuid(struct v if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) { if ( !v->domain->arch.hvm_domain.pae_enabled ) - clear_bit(X86_FEATURE_PAE, &edx); + { + clear_bit(X86_FEATURE_PAE, &edx); + clear_bit(X86_FEATURE_NX, &edx); + } clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); } -#endif - +#endif /* Clear out reserved bits. */ ecx &= ~SVM_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */ clear_bit(X86_FEATURE_MWAIT & 31, &ecx); @@ -1312,8 +1327,7 @@ static int svm_set_cr0(unsigned long val unsigned long mfn; int paging_enabled; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned long crn; - + ASSERT(vmcb); /* We don't want to lose PG. ET is reserved and should be always be 1*/ @@ -1358,35 +1372,37 @@ static int svm_set_cr0(unsigned long val set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); vmcb->efer |= (EFER_LMA | EFER_LME); - -#if CONFIG_PAGING_LEVELS >= 4 - if (!shadow_set_guest_paging_levels(v->domain, 4)) + if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) ) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ } -#endif } else #endif /* __x86_64__ */ { #if CONFIG_PAGING_LEVELS >= 3 - if (!shadow_set_guest_paging_levels(v->domain, 2)) + /* seems it's a 32-bit or 32-bit PAE guest */ + if ( test_bit(SVM_CPU_STATE_PAE_ENABLED, + &v->arch.hvm_svm.cpu_state) ) { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ + /* The guest enables PAE first and then it enables PG, it is + * really a PAE guest */ + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); + } + } + else + { + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); /* need to take a clean path */ + } } #endif - } - - /* update CR4's PAE if needed */ - crn = vmcb->cr4; - if ((!(crn & X86_CR4_PAE)) - && test_bit(SVM_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_svm.cpu_state)) - { - HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n"); - vmcb->cr4 |= X86_CR4_PAE; } /* Now arch.guest_table points to machine physical. */ @@ -1402,7 +1418,16 @@ static int svm_set_cr0(unsigned long val /* arch->shadow_table should hold the next CR3 for shadow */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n", v->arch.hvm_svm.cpu_cr3, mfn); - } + + return 1; + } + + if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled ) + if ( v->arch.hvm_svm.cpu_cr3 ) { + put_page(mfn_to_page(get_mfn_from_gpfn( + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))); + v->arch.guest_table = mk_pagetable(0); + } /* * SVM implements paged real-mode and when we return to real-mode @@ -1415,6 +1440,14 @@ static int svm_set_cr0(unsigned long val return 0; } + clear_all_shadow_status( v->domain ); + set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); + vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); + } + else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) + { + /* we should take care of this kind of situation */ + clear_all_shadow_status(v->domain); set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); } @@ -1438,15 +1471,21 @@ static void mov_from_cr(int cr, int gp, { case 0: value = v->arch.hvm_svm.cpu_shadow_cr0; - break; + if (svm_dbg_on) + printk("CR0 read =%lx \n", value ); + break; case 2: value = vmcb->cr2; break; case 3: value = (unsigned long) v->arch.hvm_svm.cpu_cr3; - break; + if (svm_dbg_on) + printk("CR3 read =%lx \n", value ); + break; case 4: value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4; + if (svm_dbg_on) + printk( "CR4 read=%lx\n", value ); break; case 8: #if 0 @@ -1466,6 +1505,12 @@ static void mov_from_cr(int cr, int gp, } +static inline int svm_pgbit_test(struct vcpu *v) +{ + return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; +} + + /* * Write to control registers */ @@ -1486,12 +1531,15 @@ static int mov_to_cr(int gpreg, int cr, switch (cr) { case 0: + if (svm_dbg_on) + printk("CR0 write =%lx \n", value ); return svm_set_cr0(value); case 3: { unsigned long old_base_mfn, mfn; - + if (svm_dbg_on) + printk("CR3 write =%lx \n", value ); /* If paging is not enabled yet, simply copy the value to CR3. */ if (!svm_paging_enabled(v)) { v->arch.hvm_svm.cpu_cr3 = value; @@ -1533,19 +1581,104 @@ static int mov_to_cr(int gpreg, int cr, if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); + /* + * arch.shadow_table should now hold the next CR3 for shadow + */ +#if CONFIG_PAGING_LEVELS >= 3 + if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 ) + shadow_sync_all(v->domain); +#endif + v->arch.hvm_svm.cpu_cr3 = value; update_pagetables(v); - - /* arch.shadow_table should now hold the next CR3 for shadow*/ - v->arch.hvm_svm.cpu_cr3 = value; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); } break; } - case 4: - /* CR4 */ - if (value & X86_CR4_PAE) { + case 4: /* CR4 */ + { + if (svm_dbg_on) + printk( "write cr4=%lx, cr0=%lx\n", + value, v->arch.hvm_svm.cpu_shadow_cr0 ); + old_cr = v->arch.hvm_svm.cpu_shadow_cr4; + if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) ) + { + set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); + if ( svm_pgbit_test(v) ) + { + /* The guest is a 32-bit PAE guest. */ +#if CONFIG_PAGING_LEVELS >= 4 + unsigned long mfn, old_base_mfn; + + if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); /* need to take a clean path */ + } + + if ( !VALID_MFN(mfn = get_mfn_from_gpfn( + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) || + !get_page(mfn_to_page(mfn), v->domain) ) + { + printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3); + domain_crash_synchronous(); /* need to take a clean path */ + } + + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + + /* + * Now arch.guest_table points to machine physical. + */ + + v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + update_pagetables(v); + + HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", + (unsigned long) (mfn << PAGE_SHIFT)); + + vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); + + /* + * arch->shadow_table should hold the next CR3 for shadow + */ + + HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", + v->arch.hvm_svm.cpu_cr3, mfn); +#endif + } + else + { + /* The guest is a 64 bit or 32-bit PAE guest. */ +#if CONFIG_PAGING_LEVELS >= 4 + if ( (v->domain->arch.ops != NULL) && + v->domain->arch.ops->guest_paging_levels == PAGING_L2) + { + /* Seems the guest first enables PAE without enabling PG, + * it must enable PG after that, and it is a 32-bit PAE + * guest */ + + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); + } + } + else + { + if ( !shadow_set_guest_paging_levels(v->domain, + PAGING_L4) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); + } + } +#endif + } + } + else if (value & X86_CR4_PAE) { set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); } else { if (test_bit(SVM_CPU_STATE_LMA_ENABLED, @@ -1555,7 +1688,6 @@ static int mov_to_cr(int gpreg, int cr, clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); } - old_cr = v->arch.hvm_svm.cpu_shadow_cr4; v->arch.hvm_svm.cpu_shadow_cr4 = value; vmcb->cr4 = value | SVM_CR4_HOST_MASK; @@ -1569,6 +1701,7 @@ static int mov_to_cr(int gpreg, int cr, shadow_sync_all(v->domain); } break; + } default: printk("invalid cr: %d\n", cr); @@ -1933,6 +2066,7 @@ static int svm_do_vmmcall_reset_to_realm vmcb->cr4 = SVM_CR4_HOST_MASK; v->arch.hvm_svm.cpu_shadow_cr4 = 0; + clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); /* This will jump to ROMBIOS */ vmcb->rip = 0xFFF0; @@ -1989,6 +2123,7 @@ static int svm_do_vmmcall_reset_to_realm vmcb->idtr.base = 0x00; vmcb->rax = 0; + vmcb->rsp = 0; return 0; } @@ -2280,7 +2415,8 @@ void walk_shadow_and_guest_pt(unsigned l gpte.l1 = 0; __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) ); printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) ); - __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], sizeof(spte) ); + __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], + sizeof(spte) ); printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte)); } #endif /* SVM_WALK_GUEST_PAGES */ @@ -2313,6 +2449,17 @@ asmlinkage void svm_vmexit_handler(struc if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) { if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) + { + printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n", + intercepts_counter, + exit_reasons[exit_reason], exit_reason, regs.cs, + (unsigned long long) regs.rip, + (unsigned long long) vmcb->exitinfo1, + (unsigned long long) vmcb->exitinfo2, + (unsigned long long) vmcb->exitintinfo.bytes, + (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) ); + } + else { printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", intercepts_counter, @@ -2320,12 +2467,12 @@ asmlinkage void svm_vmexit_handler(struc (unsigned long long) regs.rip, (unsigned long long) vmcb->exitinfo1, (unsigned long long) vmcb->exitinfo2, - (unsigned long long) vmcb->exitintinfo.bytes); + (unsigned long long) vmcb->exitintinfo.bytes ); } } - else if (svm_dbg_on - && exit_reason != VMEXIT_IOIO - && exit_reason != VMEXIT_INTR) + else if ( svm_dbg_on + && exit_reason != VMEXIT_IOIO + && exit_reason != VMEXIT_INTR) { if (exit_reasons[exit_reason]) @@ -2350,7 +2497,9 @@ asmlinkage void svm_vmexit_handler(struc } #ifdef SVM_WALK_GUEST_PAGES - if( exit_reason == VMEXIT_EXCEPTION_PF && ( ( vmcb->exitinfo2 == vmcb->rip )|| vmcb->exitintinfo.bytes) ) + if( exit_reason == VMEXIT_EXCEPTION_PF + && ( ( vmcb->exitinfo2 == vmcb->rip ) + || vmcb->exitintinfo.bytes) ) { if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) walk_shadow_and_guest_pt( vmcb->exitinfo2 ); @@ -2434,13 +2583,24 @@ asmlinkage void svm_vmexit_handler(struc */ break; + case VMEXIT_INIT: + /* + * Nothing to do, in fact we should never get to this point. + */ + break; + + case VMEXIT_EXCEPTION_BP: #ifdef XEN_DEBUGGER - case VMEXIT_EXCEPTION_BP: svm_debug_save_cpu_user_regs(®s); pdb_handle_exception(3, ®s, 1); svm_debug_restore_cpu_user_regs(®s); - break; +#else + if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) ) + domain_pause_for_debugger(); + else + svm_inject_exception(vmcb, TRAP_int3, 0, 0); #endif + break; case VMEXIT_EXCEPTION_NM: svm_do_no_device_fault(vmcb); diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Tue Apr 18 09:35:40 2006 -0600 @@ -257,7 +257,8 @@ static int construct_init_vmcb_guest(str /* CR3 is set in svm_final_setup_guest */ __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :); - arch_svm->cpu_shadow_cr4 = crn & ~(X86_CR4_PGE | X86_CR4_PSE); + crn &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); + arch_svm->cpu_shadow_cr4 = crn; vmcb->cr4 = crn | SVM_CR4_HOST_MASK; vmcb->rsp = 0; @@ -484,6 +485,7 @@ void svm_do_resume(struct vcpu *v) if ( vpit->first_injected ) { if ( v->domain->arch.hvm_domain.guest_time ) { svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time); + vpit->count_point = NOW(); v->domain->arch.hvm_domain.guest_time = 0; } pickup_deactive_ticks(vpit); diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/io.c Tue Apr 18 09:35:40 2006 -0600 @@ -84,7 +84,8 @@ interrupt_post_injection(struct vcpu * v } else { vpit->pending_intr_nr--; } - vpit->inject_point = NOW(); + vpit->count_advance = 0; + vpit->count_point = NOW(); vpit->last_pit_gtime += vpit->period_cycles; set_guest_time(v, vpit->last_pit_gtime); @@ -208,6 +209,7 @@ void vmx_do_resume(struct vcpu *v) /* pick up the elapsed PIT ticks and re-enable pit_timer */ if ( vpit->first_injected ) { if ( v->domain->arch.hvm_domain.guest_time ) { + vpit->count_point = NOW(); set_guest_time(v, v->domain->arch.hvm_domain.guest_time); v->domain->arch.hvm_domain.guest_time = 0; } diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 18 09:35:40 2006 -0600 @@ -362,6 +362,7 @@ static void vmx_freeze_time(struct vcpu if ( vpit->first_injected && !v->domain->arch.hvm_domain.guest_time ) { v->domain->arch.hvm_domain.guest_time = get_guest_time(v); + vpit->count_advance += (NOW() - vpit->count_point); stop_timer(&(vpit->pit_timer)); } } diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Tue Apr 18 09:35:40 2006 -0600 @@ -94,6 +94,7 @@ ENTRY(vmx_asm_vmexit_handler) ENTRY(vmx_asm_vmexit_handler) /* selectors are restored/saved by VMX */ HVM_SAVE_ALL_NOSEGREGS + call vmx_trace_vmexit call vmx_vmexit_handler jmp vmx_asm_do_resume @@ -114,6 +115,7 @@ 1: /* vmx_restore_all_guest */ call vmx_intr_assist call vmx_load_cr2 + call vmx_trace_vmentry .endif /* * Check if we are going back to VMX-based VM diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/i8259.c Tue Apr 18 09:35:40 2006 -0600 @@ -318,7 +318,7 @@ void __init init_8259A(int auto_eoi) * outb_p - this has to work on a wide range of PC hardware. */ outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ - outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_p(FIRST_LEGACY_VECTOR + 0, 0x21); /* ICW2: 8259A-1 IR0-7 */ outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ if (auto_eoi) outb_p(0x03, 0x21); /* master does Auto EOI */ @@ -326,7 +326,7 @@ void __init init_8259A(int auto_eoi) outb_p(0x01, 0x21); /* master expects normal EOI */ outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ - outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_p(FIRST_LEGACY_VECTOR + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 */ outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode is to be investigated) */ diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/io_apic.c Tue Apr 18 09:35:40 2006 -0600 @@ -657,11 +657,11 @@ static inline int IO_APIC_irq_trigger(in } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; int assign_irq_vector(int irq) { - static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; + static int current_vector = FIRST_DYNAMIC_VECTOR, offset = 0; BUG_ON(irq >= NR_IRQ_VECTORS); if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) @@ -677,11 +677,11 @@ next: if (current_vector == 0x80) goto next; - if (current_vector >= FIRST_SYSTEM_VECTOR) { + if (current_vector > LAST_DYNAMIC_VECTOR) { offset++; if (!(offset%8)) return -ENOSPC; - current_vector = FIRST_DEVICE_VECTOR + offset; + current_vector = FIRST_DYNAMIC_VECTOR + offset; } vector_irq[current_vector] = irq; @@ -1321,10 +1321,25 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +int ioapic_ack_new = 1; +static void setup_ioapic_ack(char *s) +{ + if ( !strcmp(s, "old") ) + ioapic_ack_new = 0; + else if ( !strcmp(s, "new") ) + ioapic_ack_new = 1; + else + printk("Unknown ioapic_ack value specified: '%s'\n", s); +} +custom_param("ioapic_ack", setup_ioapic_ack); + static void mask_and_ack_level_ioapic_irq (unsigned int irq) { unsigned long v; int i; + + if ( ioapic_ack_new ) + return; mask_IO_APIC_irq(irq); /* @@ -1363,7 +1378,51 @@ static void mask_and_ack_level_ioapic_ir static void end_level_ioapic_irq (unsigned int irq) { - unmask_IO_APIC_irq(irq); + unsigned long v; + int i; + + if ( !ioapic_ack_new ) + { + if ( !(irq_desc[IO_APIC_VECTOR(irq)].status & IRQ_DISABLED) ) + unmask_IO_APIC_irq(irq); + return; + } + +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = IO_APIC_VECTOR(irq); + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_IO_APIC_irq(irq); + __edge_IO_APIC_irq(irq); + __level_IO_APIC_irq(irq); + if ( !(irq_desc[IO_APIC_VECTOR(irq)].status & IRQ_DISABLED) ) + __unmask_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } } static unsigned int startup_edge_ioapic_vector(unsigned int vector) @@ -1695,6 +1754,7 @@ void __init setup_IO_APIC(void) io_apic_irqs = ~PIC_IRQS; printk("ENABLING IO-APIC IRQs\n"); + printk(" -> Using %s ACK method\n", ioapic_ack_new ? "new" : "old"); /* * Set up IO-APIC IRQ routing. @@ -1956,9 +2016,9 @@ int ioapic_guest_write(unsigned long phy return 0; } - if ( old_rte.vector >= FIRST_DEVICE_VECTOR ) + if ( old_rte.vector >= FIRST_DYNAMIC_VECTOR ) old_irq = vector_irq[old_rte.vector]; - if ( new_rte.vector >= FIRST_DEVICE_VECTOR ) + if ( new_rte.vector >= FIRST_DYNAMIC_VECTOR ) new_irq = vector_irq[new_rte.vector]; if ( (old_irq != new_irq) && (old_irq != -1) && IO_APIC_IRQ(old_irq) ) diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/irq.c Tue Apr 18 09:35:40 2006 -0600 @@ -148,8 +148,23 @@ typedef struct { u8 nr_guests; u8 in_flight; u8 shareable; + u8 ack_type; +#define ACKTYPE_NONE 0 /* No final acknowledgement is required */ +#define ACKTYPE_UNMASK 1 /* Unmask PIC hardware (from any CPU) */ +#define ACKTYPE_EOI 2 /* EOI on the CPU that was interrupted */ + cpumask_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */ struct domain *guest[IRQ_MAX_GUESTS]; } irq_guest_action_t; + +/* + * Stack of interrupts awaiting EOI on each CPU. These must be popped in + * order, as only the current highest-priority pending irq can be EOIed. + */ +static struct { + u8 vector; /* Vector awaiting EOI */ + u8 ready; /* Ready for EOI now? */ +} pending_eoi[NR_CPUS][NR_VECTORS] __cacheline_aligned; +#define pending_eoi_sp(cpu) (pending_eoi[cpu][NR_VECTORS-1].vector) static void __do_IRQ_guest(int vector) { @@ -157,36 +172,209 @@ static void __do_IRQ_guest(int vector) irq_desc_t *desc = &irq_desc[vector]; irq_guest_action_t *action = (irq_guest_action_t *)desc->action; struct domain *d; - int i; + int i, sp, cpu = smp_processor_id(); + + if ( unlikely(action->nr_guests == 0) ) + { + /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */ + ASSERT(action->ack_type == ACKTYPE_EOI); + ASSERT(desc->status & IRQ_DISABLED); + desc->handler->end(vector); + return; + } + + if ( action->ack_type == ACKTYPE_EOI ) + { + sp = pending_eoi_sp(cpu); + ASSERT((sp == 0) || (pending_eoi[cpu][sp-1].vector < vector)); + ASSERT(sp < (NR_VECTORS-1)); + pending_eoi[cpu][sp].vector = vector; + pending_eoi[cpu][sp].ready = 0; + pending_eoi_sp(cpu) = sp+1; + cpu_set(cpu, action->cpu_eoi_map); + } for ( i = 0; i < action->nr_guests; i++ ) { d = action->guest[i]; - if ( !test_and_set_bit(irq, &d->pirq_mask) ) + if ( (action->ack_type != ACKTYPE_NONE) && + !test_and_set_bit(irq, &d->pirq_mask) ) action->in_flight++; send_guest_pirq(d, irq); } } +/* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */ +static void flush_ready_eoi(void *unused) +{ + irq_desc_t *desc; + int vector, sp, cpu = smp_processor_id(); + + ASSERT(!local_irq_is_enabled()); + + sp = pending_eoi_sp(cpu); + + while ( (--sp >= 0) && pending_eoi[cpu][sp].ready ) + { + vector = pending_eoi[cpu][sp].vector; + desc = &irq_desc[vector]; + spin_lock(&desc->lock); + desc->handler->end(vector); + spin_unlock(&desc->lock); + } + + pending_eoi_sp(cpu) = sp+1; +} + +static void __set_eoi_ready(irq_desc_t *desc) +{ + irq_guest_action_t *action = (irq_guest_action_t *)desc->action; + int vector, sp, cpu = smp_processor_id(); + + vector = desc - irq_desc; + + if ( !(desc->status & IRQ_GUEST) || + (action->in_flight != 0) || + !test_and_clear_bit(cpu, &action->cpu_eoi_map) ) + return; + + sp = pending_eoi_sp(cpu); + do { + ASSERT(sp > 0); + } while ( pending_eoi[cpu][--sp].vector != vector ); + ASSERT(!pending_eoi[cpu][sp].ready); + pending_eoi[cpu][sp].ready = 1; +} + +/* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */ +static void set_eoi_ready(void *data) +{ + irq_desc_t *desc = data; + + ASSERT(!local_irq_is_enabled()); + + spin_lock(&desc->lock); + __set_eoi_ready(desc); + spin_unlock(&desc->lock); + + flush_ready_eoi(NULL); +} + +/* + * Forcibly flush all pending EOIs on this CPU by emulating end-of-ISR + * notifications from guests. The caller of this function must ensure that + * all CPUs execute flush_ready_eoi(). + */ +static void flush_all_pending_eoi(void *unused) +{ + irq_desc_t *desc; + irq_guest_action_t *action; + int i, vector, sp, cpu = smp_processor_id(); + + ASSERT(!local_irq_is_enabled()); + + sp = pending_eoi_sp(cpu); + while ( --sp >= 0 ) + { + if ( pending_eoi[cpu][sp].ready ) + continue; + vector = pending_eoi[cpu][sp].vector; + desc = &irq_desc[vector]; + spin_lock(&desc->lock); + action = (irq_guest_action_t *)desc->action; + ASSERT(action->ack_type == ACKTYPE_EOI); + ASSERT(desc->status & IRQ_GUEST); + for ( i = 0; i < action->nr_guests; i++ ) + clear_bit(vector_to_irq(vector), &action->guest[i]->pirq_mask); + action->in_flight = 0; + spin_unlock(&desc->lock); + } + + flush_ready_eoi(NULL); +} + int pirq_guest_unmask(struct domain *d) { - irq_desc_t *desc; - unsigned int pirq; - shared_info_t *s = d->shared_info; + irq_desc_t *desc; + irq_guest_action_t *action; + cpumask_t cpu_eoi_map = CPU_MASK_NONE; + unsigned int pirq, cpu = smp_processor_id(); + shared_info_t *s = d->shared_info; for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS); pirq < NR_PIRQS; pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) ) { - desc = &irq_desc[irq_to_vector(pirq)]; + desc = &irq_desc[irq_to_vector(pirq)]; + action = (irq_guest_action_t *)desc->action; + spin_lock_irq(&desc->lock); + if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && - test_and_clear_bit(pirq, &d->pirq_mask) && - (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) - desc->handler->end(irq_to_vector(pirq)); - spin_unlock_irq(&desc->lock); - } - + test_and_clear_bit(pirq, &d->pirq_mask) ) + { + ASSERT(action->ack_type != ACKTYPE_NONE); + if ( --action->in_flight == 0 ) + { + if ( action->ack_type == ACKTYPE_UNMASK ) + desc->handler->end(irq_to_vector(pirq)); + cpu_eoi_map = action->cpu_eoi_map; + } + } + + if ( __test_and_clear_bit(cpu, &cpu_eoi_map) ) + { + __set_eoi_ready(desc); + spin_unlock(&desc->lock); + flush_ready_eoi(NULL); + local_irq_enable(); + } + else + { + spin_unlock_irq(&desc->lock); + } + + if ( !cpus_empty(cpu_eoi_map) ) + { + on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0); + cpu_eoi_map = CPU_MASK_NONE; + } + } + + return 0; +} + +extern int ioapic_ack_new; +int pirq_acktype(int irq) +{ + irq_desc_t *desc; + unsigned int vector; + + vector = irq_to_vector(irq); + if ( vector == 0 ) + return ACKTYPE_NONE; + + desc = &irq_desc[vector]; + + /* + * Edge-triggered IO-APIC interrupts need no final acknowledgement: + * we ACK early during interrupt processing. + */ + if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ) + return ACKTYPE_NONE; + + /* Legacy PIC interrupts can be acknowledged from any CPU. */ + if ( !strcmp(desc->handler->typename, "XT-PIC") ) + return ACKTYPE_UNMASK; + + /* + * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU + * on which they were received. This is because we tickle the LAPIC to EOI. + */ + if ( !strcmp(desc->handler->typename, "IO-APIC-level") ) + return ioapic_ack_new ? ACKTYPE_EOI : ACKTYPE_UNMASK; + + BUG(); return 0; } @@ -202,6 +390,7 @@ int pirq_guest_bind(struct vcpu *v, int if ( (irq < 0) || (irq >= NR_IRQS) ) return -EINVAL; + retry: vector = irq_to_vector(irq); if ( vector == 0 ) return -EINVAL; @@ -230,10 +419,12 @@ int pirq_guest_bind(struct vcpu *v, int goto out; } - action->nr_guests = 0; - action->in_flight = 0; - action->shareable = will_share; - + action->nr_guests = 0; + action->in_flight = 0; + action->shareable = will_share; + action->ack_type = pirq_acktype(irq); + action->cpu_eoi_map = CPU_MASK_NONE; + desc->depth = 0; desc->status |= IRQ_GUEST; desc->status &= ~IRQ_DISABLED; @@ -251,6 +442,18 @@ int pirq_guest_bind(struct vcpu *v, int rc = -EBUSY; goto out; } + else if ( action->nr_guests == 0 ) + { + /* + * Indicates that an ACKTYPE_EOI interrupt is being released. + * Wait for that to happen before continuing. + */ + ASSERT(action->ack_type == ACKTYPE_EOI); + ASSERT(desc->status & IRQ_DISABLED); + spin_unlock_irqrestore(&desc->lock, flags); + cpu_relax(); + goto retry; + } if ( action->nr_guests == IRQ_MAX_GUESTS ) { @@ -271,6 +474,7 @@ int pirq_guest_unbind(struct domain *d, unsigned int vector = irq_to_vector(irq); irq_desc_t *desc = &irq_desc[vector]; irq_guest_action_t *action; + cpumask_t cpu_eoi_map; unsigned long flags; int i; @@ -280,28 +484,68 @@ int pirq_guest_unbind(struct domain *d, action = (irq_guest_action_t *)desc->action; - if ( test_and_clear_bit(irq, &d->pirq_mask) && - (--action->in_flight == 0) ) - desc->handler->end(vector); - - if ( action->nr_guests == 1 ) - { - desc->action = NULL; - xfree(action); - desc->depth = 1; - desc->status |= IRQ_DISABLED; - desc->status &= ~IRQ_GUEST; - desc->handler->shutdown(vector); - } - else - { - i = 0; - while ( action->guest[i] && (action->guest[i] != d) ) - i++; - memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); - action->nr_guests--; - } - + i = 0; + while ( action->guest[i] && (action->guest[i] != d) ) + i++; + memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); + action->nr_guests--; + + switch ( action->ack_type ) + { + case ACKTYPE_UNMASK: + if ( test_and_clear_bit(irq, &d->pirq_mask) && + (--action->in_flight == 0) ) + desc->handler->end(vector); + break; + case ACKTYPE_EOI: + /* NB. If #guests == 0 then we clear the eoi_map later on. */ + if ( test_and_clear_bit(irq, &d->pirq_mask) && + (--action->in_flight == 0) && + (action->nr_guests != 0) ) + { + cpu_eoi_map = action->cpu_eoi_map; + spin_unlock_irqrestore(&desc->lock, flags); + on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0); + spin_lock_irqsave(&desc->lock, flags); + } + break; + } + + BUG_ON(test_bit(irq, &d->pirq_mask)); + + if ( action->nr_guests != 0 ) + goto out; + + BUG_ON(action->in_flight != 0); + + /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */ + desc->depth = 1; + desc->status |= IRQ_DISABLED; + desc->handler->disable(vector); + + /* + * We may have a EOI languishing anywhere in one of the per-CPU + * EOI stacks. Forcibly flush the stack on every CPU where this might + * be the case. + */ + cpu_eoi_map = action->cpu_eoi_map; + if ( !cpus_empty(cpu_eoi_map) ) + { + BUG_ON(action->ack_type != ACKTYPE_EOI); + spin_unlock_irqrestore(&desc->lock, flags); + on_selected_cpus(cpu_eoi_map, flush_all_pending_eoi, NULL, 1, 1); + on_selected_cpus(cpu_online_map, flush_ready_eoi, NULL, 1, 1); + spin_lock_irqsave(&desc->lock, flags); + } + + BUG_ON(!cpus_empty(action->cpu_eoi_map)); + + desc->action = NULL; + xfree(action); + desc->status &= ~IRQ_GUEST; + desc->handler->shutdown(vector); + + out: spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -373,3 +617,41 @@ static int __init setup_dump_irqs(void) return 0; } __initcall(setup_dump_irqs); + +static struct timer end_irq_timer[NR_CPUS]; + +/* + * force_intack: Forcibly emit all pending EOIs on each CPU every second. + * Mainly useful for debugging or poking lazy guests ISRs. + */ + +static void end_irq_timeout(void *unused) +{ + int cpu = smp_processor_id(); + + local_irq_disable(); + flush_all_pending_eoi(NULL); + local_irq_enable(); + + on_selected_cpus(cpu_online_map, flush_ready_eoi, NULL, 1, 0); + + set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); +} + +static void __init __setup_irq_timeout(void *unused) +{ + int cpu = smp_processor_id(); + init_timer(&end_irq_timer[cpu], end_irq_timeout, NULL, cpu); + set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); +} + +static int force_intack; +boolean_param("force_intack", force_intack); + +static int __init setup_irq_timeout(void) +{ + if ( force_intack ) + on_each_cpu(__setup_irq_timeout, NULL, 1, 1); + return 0; +} +__initcall(setup_irq_timeout); diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/physdev.c Tue Apr 18 09:35:40 2006 -0600 @@ -18,6 +18,9 @@ extern int extern int ioapic_guest_write( unsigned long physbase, unsigned int reg, u32 pval); +extern int +pirq_acktype( + int irq); /* * Demuxing hypercall. @@ -43,8 +46,7 @@ long do_physdev_op(GUEST_HANDLE(physdev_ if ( (irq < 0) || (irq >= NR_IRQS) ) break; op.u.irq_status_query.flags = 0; - /* Edge-triggered interrupts don't need an explicit unmask downcall. */ - if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") ) + if ( pirq_acktype(irq) != 0 ) op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY; ret = 0; break; diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/shadow.c Tue Apr 18 09:35:40 2006 -0600 @@ -1531,14 +1531,10 @@ static void resync_pae_guest_l3(struct d idx = get_cr3_idxval(v); smfn = __shadow_status( - d, ((unsigned long)(idx << PGT_score_shift) | entry->gpfn), PGT_l4_shadow); - -#ifndef NDEBUG + d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), PGT_l4_shadow); + if ( !smfn ) - { - BUG(); - } -#endif + continue; guest = (pgentry_64_t *)map_domain_page(entry->gmfn); snapshot = (pgentry_64_t *)map_domain_page(entry->snapshot_mfn); @@ -1550,9 +1546,35 @@ static void resync_pae_guest_l3(struct d if ( entry_has_changed( guest[index], snapshot[index], PAGE_FLAG_MASK) ) { + unsigned long gpfn; + + /* + * Looks like it's no longer a page table. + */ + if ( unlikely(entry_get_value(guest[index]) & PAE_PDPT_RESERVED) ) + { + if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(shadow_l3[i])); + + shadow_l3[i] = entry_empty(); + continue; + } + + gpfn = entry_get_pfn(guest[index]); + + if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(shadow_l3[i])); + + shadow_l3[i] = entry_empty(); + continue; + } + validate_entry_change(d, &guest[index], &shadow_l3[i], PAGING_L3); } + if ( entry_get_value(guest[index]) != 0 ) max = i; @@ -1675,6 +1697,19 @@ static int resync_all(struct domain *d, guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) ) { int error; + +#if CONFIG_PAGING_LEVELS == 4 + unsigned long gpfn; + + gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT; + + if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + guest_l1_pgentry_t tmp_gl1e = guest_l1e_empty(); + validate_pte_change(d, tmp_gl1e, sl1e_p); + continue; + } +#endif error = validate_pte_change(d, guest1[i], sl1e_p); if ( error == -1 ) @@ -1698,6 +1733,7 @@ static int resync_all(struct domain *d, perfc_incrc(resync_l1); perfc_incr_histo(wpt_updates, changed, PT_UPDATES); perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES); + if ( d->arch.ops->guest_paging_levels >= PAGING_L3 && unshadow_l1 ) { pgentry_64_t l2e = { 0 }; @@ -1804,18 +1840,22 @@ static int resync_all(struct domain *d, for ( i = min_shadow; i <= max_shadow; i++ ) { if ( (i < min_snapshot) || (i > max_snapshot) || - entry_has_changed( - guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) ) + entry_has_changed( + guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) ) { - unsigned long gpfn; gpfn = entry_get_pfn(guest_pt[i]); /* - * Looks like it's longer a page table. + * Looks like it's no longer a page table. */ if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( entry_get_flags(shadow_pt[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(shadow_pt[i])); + shadow_pt[i] = entry_empty(); continue; + } need_flush |= validate_entry_change( d, &guest_pt[i], &shadow_pt[i], @@ -1864,11 +1904,17 @@ static int resync_all(struct domain *d, unsigned long gpfn; gpfn = l4e_get_pfn(new_root_e); + /* - * Looks like it's longer a page table. + * Looks like it's no longer a page table. */ if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( l4e_get_flags(shadow4[i]) & _PAGE_PRESENT ) + put_shadow_ref(l4e_get_pfn(shadow4[i])); + shadow4[i] = l4e_empty(); continue; + } if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) { @@ -2372,7 +2418,7 @@ static void shadow_update_pagetables(str if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) { u32 index = get_cr3_idxval(v); - gpfn = (index << PGT_score_shift) | gpfn; + gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn; } #endif @@ -3233,8 +3279,35 @@ update_top_level_shadow(struct vcpu *v, int i; for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ ) + { + unsigned long gpfn; + + /* + * Looks like it's no longer a page table. + */ + if ( unlikely(entry_get_value(gple[index*4+i]) & PAE_PDPT_RESERVED) ) + { + if ( entry_get_flags(sple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(sple[i])); + + sple[i] = entry_empty(); + continue; + } + + gpfn = entry_get_pfn(gple[index*4+i]); + + if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( entry_get_flags(sple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(sple[i])); + + sple[i] = entry_empty(); + continue; + } + validate_entry_change( v->domain, &gple[index*4+i], &sple[i], PAGING_L3); + } unmap_domain_page(sple); } diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/shadow32.c Tue Apr 18 09:35:40 2006 -0600 @@ -583,6 +583,13 @@ static void free_shadow_pages(struct dom { put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table)); v->arch.shadow_table = mk_pagetable(0); + + if ( shadow_mode_external(d) ) + { + if ( v->arch.shadow_vtable ) + unmap_domain_page_global(v->arch.shadow_vtable); + v->arch.shadow_vtable = NULL; + } } if ( v->arch.monitor_shadow_ref ) @@ -2886,7 +2893,7 @@ int shadow_fault(unsigned long va, struc SH_VVLOG("shadow_fault( va=%lx, code=%lu )", va, (unsigned long)regs->error_code); perfc_incrc(shadow_fault_calls); - + check_pagetable(v, "pre-sf"); /* @@ -2917,7 +2924,16 @@ int shadow_fault(unsigned long va, struc // the mapping is in-sync, so the check of the PDE's present bit, above, // covers this access. // - orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)]; + if ( __copy_from_user(&gpte, + &linear_pg_table[l1_linear_offset(va)], + sizeof(gpte)) ) { + printk("%s() failed, crashing domain %d " + "due to a unaccessible linear page table (gpde=%" PRIpte "), va=%lx\n", + __func__, d->domain_id, l2e_get_intpte(gpde), va); + domain_crash_synchronous(); + } + orig_gpte = gpte; + if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) ) { SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ") (gpde %" PRIpte ")", @@ -2928,7 +2944,7 @@ int shadow_fault(unsigned long va, struc } /* Write fault? */ - if ( regs->error_code & 2 ) + if ( regs->error_code & 2 ) { int allow_writes = 0; @@ -2942,7 +2958,7 @@ int shadow_fault(unsigned long va, struc else { /* Write fault on a read-only mapping. */ - SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", + SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", l1e_get_intpte(gpte)); perfc_incrc(shadow_fault_bail_ro_mapping); goto fail; @@ -2955,10 +2971,10 @@ int shadow_fault(unsigned long va, struc } /* User access violation in guest? */ - if ( unlikely((regs->error_code & 4) && + if ( unlikely((regs->error_code & 4) && !(l1e_get_flags(gpte) & _PAGE_USER))) { - SH_VVLOG("shadow_fault - EXIT: wr fault on super page (%" PRIpte ")", + SH_VVLOG("shadow_fault - EXIT: wr fault on super page (%" PRIpte ")", l1e_get_intpte(gpte)); goto fail; @@ -2980,7 +2996,7 @@ int shadow_fault(unsigned long va, struc /* Read-protection violation in guest? */ if ( unlikely((regs->error_code & 1) )) { - SH_VVLOG("shadow_fault - EXIT: read fault on super page (%" PRIpte ")", + SH_VVLOG("shadow_fault - EXIT: read fault on super page (%" PRIpte ")", l1e_get_intpte(gpte)); goto fail; @@ -3275,19 +3291,29 @@ void __update_pagetables(struct vcpu *v) void clear_all_shadow_status(struct domain *d) { + struct vcpu *v = current; + + /* + * Don't clean up while other vcpus are working. + */ + if ( v->vcpu_id ) + return; + shadow_lock(d); + free_shadow_pages(d); free_shadow_ht_entries(d); - d->arch.shadow_ht = + d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets); if ( d->arch.shadow_ht == NULL ) { - printk("clear all shadow status:xmalloc fail\n"); + printk("clear all shadow status: xmalloc failed\n"); domain_crash_synchronous(); } memset(d->arch.shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status)); free_out_of_sync_entries(d); + shadow_unlock(d); } diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/shadow_public.c Tue Apr 18 09:35:40 2006 -0600 @@ -102,6 +102,15 @@ void free_shadow_pages(struct domain *d) int shadow_set_guest_paging_levels(struct domain *d, int levels) { + struct vcpu *v = current; + + /* + * Need to wait for VCPU0 to complete the on-going shadow ops. + */ + + if ( v->vcpu_id ) + return 1; + shadow_lock(d); switch(levels) { @@ -692,7 +701,6 @@ void free_shadow_page(unsigned long smfn void free_shadow_page(unsigned long smfn) { struct page_info *page = mfn_to_page(smfn); - unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask; struct domain *d = page_get_owner(mfn_to_page(gmfn)); unsigned long gpfn = mfn_to_gmfn(d, gmfn); @@ -709,10 +717,9 @@ void free_shadow_page(unsigned long smfn if ( !mfn ) gpfn |= (1UL << 63); } - if (d->arch.ops->guest_paging_levels == PAGING_L3) - if (type == PGT_l4_shadow ) { - gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_score_shift) | gpfn; - } + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + if ( type == PGT_l4_shadow ) + gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_pae_idx_shift) | gpfn; #endif delete_shadow_status(d, gpfn, gmfn, type); @@ -743,9 +750,24 @@ void free_shadow_page(unsigned long smfn #if CONFIG_PAGING_LEVELS >= 3 case PGT_l2_shadow: case PGT_l3_shadow: + shadow_demote(d, gpfn, gmfn); + free_shadow_tables(d, smfn, shadow_type_to_level(type)); + d->arch.shadow_page_count--; + break; + case PGT_l4_shadow: gpfn = gpfn & PGT_mfn_mask; - shadow_demote(d, gpfn, gmfn); + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + { + /* + * Since a single PDPT page can have multiple PDPs, it's possible + * that shadow_demote() has been already called for gmfn. + */ + if ( mfn_is_page_table(gmfn) ) + shadow_demote(d, gpfn, gmfn); + } else + shadow_demote(d, gpfn, gmfn); + free_shadow_tables(d, smfn, shadow_type_to_level(type)); d->arch.shadow_page_count--; break; @@ -898,6 +920,13 @@ void free_shadow_pages(struct domain *d) { put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table)); v->arch.shadow_table = mk_pagetable(0); + + if ( shadow_mode_external(d) ) + { + if ( v->arch.shadow_vtable ) + unmap_domain_page_global(v->arch.shadow_vtable); + v->arch.shadow_vtable = NULL; + } } if ( v->arch.monitor_shadow_ref ) @@ -2034,7 +2063,16 @@ void shadow_sync_and_drop_references( void clear_all_shadow_status(struct domain *d) { + struct vcpu *v = current; + + /* + * Don't clean up while other vcpus are working. + */ + if ( v->vcpu_id ) + return; + shadow_lock(d); + free_shadow_pages(d); free_shadow_ht_entries(d); d->arch.shadow_ht = @@ -2047,6 +2085,7 @@ void clear_all_shadow_status(struct doma shadow_ht_buckets * sizeof(struct shadow_status)); free_out_of_sync_entries(d); + shadow_unlock(d); } diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/smp.c --- a/xen/arch/x86/smp.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/smp.c Tue Apr 18 09:35:40 2006 -0600 @@ -261,7 +261,7 @@ int smp_call_function( return on_selected_cpus(allbutself, func, info, retry, wait); } -extern int on_selected_cpus( +int on_selected_cpus( cpumask_t selected, void (*func) (void *info), void *info, diff -r 4ed269e73e95 -r 41823e46d6ac xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/arch/x86/smpboot.c Tue Apr 18 09:35:40 2006 -0600 @@ -41,6 +41,7 @@ #include <xen/irq.h> #include <xen/delay.h> #include <xen/softirq.h> +#include <xen/serial.h> #include <asm/current.h> #include <asm/mc146818rtc.h> #include <asm/desc.h> @@ -1231,12 +1232,25 @@ void __init smp_cpus_done(unsigned int m void __init smp_intr_init(void) { + int irq, seridx; + /* * IRQ0 must be given a fixed assignment and initialized, * because it's used before the IO-APIC is set up. */ - irq_vector[0] = FIRST_DEVICE_VECTOR; - vector_irq[FIRST_DEVICE_VECTOR] = 0; + irq_vector[0] = FIRST_HIPRIORITY_VECTOR; + vector_irq[FIRST_HIPRIORITY_VECTOR] = 0; + + /* + * Also ensure serial interrupts are high priority. We do not + * want them to be blocked by unacknowledged guest-bound interrupts. + */ + for (seridx = 0; seridx < 2; seridx++) { + if ((irq = serial_irq(seridx)) < 0) + continue; + irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1; + vector_irq[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq; + } /* IPI for event checking. */ set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt); diff -r 4ed269e73e95 -r 41823e46d6ac xen/common/dom0_ops.c --- a/xen/common/dom0_ops.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/common/dom0_ops.c Tue Apr 18 09:35:40 2006 -0600 @@ -581,20 +581,31 @@ long do_dom0_op(GUEST_HANDLE(dom0_op_t) case DOM0_SETDOMAINMAXMEM: { struct domain *d; + unsigned long new_max; + ret = -ESRCH; d = find_domain_by_id(op->u.setdomainmaxmem.domain); - if ( d != NULL ) - { - d->max_pages = op->u.setdomainmaxmem.max_memkb >> (PAGE_SHIFT-10); - put_domain(d); + if ( d == NULL ) + break; + + ret = -EINVAL; + new_max = op->u.setdomainmaxmem.max_memkb >> (PAGE_SHIFT-10); + + spin_lock(&d->page_alloc_lock); + if ( new_max >= d->tot_pages ) + { + d->max_pages = new_max; ret = 0; } + spin_unlock(&d->page_alloc_lock); + + put_domain(d); } break; case DOM0_SETDOMAINHANDLE: { - struct domain *d; + struct domain *d; ret = -ESRCH; d = find_domain_by_id(op->u.setdomainhandle.domain); if ( d != NULL ) diff -r 4ed269e73e95 -r 41823e46d6ac xen/common/grant_table.c --- a/xen/common/grant_table.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/common/grant_table.c Tue Apr 18 09:35:40 2006 -0600 @@ -41,21 +41,21 @@ static inline int get_maptrack_handle( - grant_table_t *t) + struct grant_table *t) { unsigned int h; if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) ) return -1; - t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT; + t->maptrack_head = t->maptrack[h].ref; t->map_count++; return h; } static inline void put_maptrack_handle( - grant_table_t *t, int handle) -{ - t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT; + struct grant_table *t, int handle) +{ + t->maptrack[handle].ref = t->maptrack_head; t->maptrack_head = handle; t->map_count--; } @@ -76,7 +76,7 @@ __gnttab_map_grant_ref( int handle; unsigned long frame = 0; int rc = GNTST_okay; - active_grant_entry_t *act; + struct active_grant_entry *act; /* Entry details from @rd's shared grant table. */ grant_entry_t *sha; @@ -123,9 +123,9 @@ __gnttab_map_grant_ref( /* Get a maptrack handle. */ if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) { - int i; - grant_mapping_t *new_mt; - grant_table_t *lgt = ld->grant_table; + int i; + struct grant_mapping *new_mt; + struct grant_table *lgt = ld->grant_table; if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES ) { @@ -147,7 +147,7 @@ __gnttab_map_grant_ref( memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order); for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ ) - new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + new_mt[i].ref = i+1; free_xenheap_pages(lgt->maptrack, lgt->maptrack_order); lgt->maptrack = new_mt; @@ -264,10 +264,9 @@ __gnttab_map_grant_ref( TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom); - ld->grant_table->maptrack[handle].domid = op->dom; - ld->grant_table->maptrack[handle].ref_and_flags = - (op->ref << MAPTRACK_REF_SHIFT) | - (op->flags & MAPTRACK_GNTMAP_MASK); + ld->grant_table->maptrack[handle].domid = op->dom; + ld->grant_table->maptrack[handle].ref = op->ref; + ld->grant_table->maptrack[handle].flags = op->flags; op->dev_bus_addr = (u64)frame << PAGE_SHIFT; op->handle = handle; @@ -326,9 +325,9 @@ __gnttab_unmap_grant_ref( domid_t dom; grant_ref_t ref; struct domain *ld, *rd; - active_grant_entry_t *act; + struct active_grant_entry *act; grant_entry_t *sha; - grant_mapping_t *map; + struct grant_mapping *map; u16 flags; s16 rc = 0; unsigned long frame; @@ -340,7 +339,7 @@ __gnttab_unmap_grant_ref( map = &ld->grant_table->maptrack[op->handle]; if ( unlikely(op->handle >= ld->grant_table->maptrack_limit) || - unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) ) + unlikely(!map->flags) ) { DPRINTK("Bad handle (%d).\n", op->handle); op->status = GNTST_bad_handle; @@ -348,8 +347,8 @@ __gnttab_unmap_grant_ref( } dom = map->domid; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; - flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK; + ref = map->ref; + flags = map->flags; if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || unlikely(ld == rd) ) @@ -380,7 +379,7 @@ __gnttab_unmap_grant_ref( if ( flags & GNTMAP_device_map ) { ASSERT(act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask)); - map->ref_and_flags &= ~GNTMAP_device_map; + map->flags &= ~GNTMAP_device_map; if ( flags & GNTMAP_readonly ) { act->pin -= GNTPIN_devr_inc; @@ -401,7 +400,7 @@ __gnttab_unmap_grant_ref( goto unmap_out; ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)); - map->ref_and_flags &= ~GNTMAP_host_map; + map->flags &= ~GNTMAP_host_map; if ( flags & GNTMAP_readonly ) { act->pin -= GNTPIN_hstr_inc; @@ -414,9 +413,9 @@ __gnttab_unmap_grant_ref( } } - if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0 ) - { - map->ref_and_flags = 0; + if ( (map->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0 ) + { + map->flags = 0; put_maptrack_handle(ld->grant_table, op->handle); } @@ -534,12 +533,12 @@ gnttab_prepare_for_transfer( gnttab_prepare_for_transfer( struct domain *rd, struct domain *ld, grant_ref_t ref) { - grant_table_t *rgt; - grant_entry_t *sha; - domid_t sdom; - u16 sflags; - u32 scombo, prev_scombo; - int retries = 0; + struct grant_table *rgt; + struct grant_entry *sha; + domid_t sdom; + u16 sflags; + u32 scombo, prev_scombo; + int retries = 0; if ( unlikely((rgt = rd->grant_table) == NULL) || unlikely(ref >= NR_GRANT_ENTRIES) ) @@ -775,10 +774,11 @@ grant_table_create( grant_table_create( struct domain *d) { - grant_table_t *t; - int i; - - if ( (t = xmalloc(grant_table_t)) == NULL ) + struct grant_table *t; + int i; + + BUG_ON(MAPTRACK_MAX_ENTRIES < NR_GRANT_ENTRIES); + if ( (t = xmalloc(struct grant_table)) == NULL ) goto no_mem; /* Simple stuff. */ @@ -786,19 +786,19 @@ grant_table_create( spin_lock_init(&t->lock); /* Active grant table. */ - if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES)) - == NULL ) + t->active = xmalloc_array(struct active_grant_entry, NR_GRANT_ENTRIES); + if ( t->active == NULL ) goto no_mem; - memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES); + memset(t->active, 0, sizeof(struct active_grant_entry) * NR_GRANT_ENTRIES); /* Tracking of mapped foreign frames table */ if ( (t->maptrack = alloc_xenheap_page()) == NULL ) goto no_mem; t->maptrack_order = 0; - t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t); + t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping); memset(t->maptrack, 0, PAGE_SIZE); for ( i = 0; i < t->maptrack_limit; i++ ) - t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + t->maptrack[i].ref = i+1; /* Shared grant table. */ t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES); @@ -828,27 +828,26 @@ gnttab_release_mappings( gnttab_release_mappings( struct domain *d) { - grant_table_t *gt = d->grant_table; - grant_mapping_t *map; + struct grant_table *gt = d->grant_table; + struct grant_mapping *map; grant_ref_t ref; grant_handle_t handle; struct domain *rd; - active_grant_entry_t *act; - grant_entry_t *sha; + struct active_grant_entry *act; + struct grant_entry *sha; BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags)); for ( handle = 0; handle < gt->maptrack_limit; handle++ ) { map = >->maptrack[handle]; - if ( !(map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) ) + if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ) continue; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + ref = map->ref; DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n", - handle, ref, map->ref_and_flags & MAPTRACK_GNTMAP_MASK, - map->domid); + handle, ref, map->flags, map->domid); rd = find_domain_by_id(map->domid); BUG_ON(rd == NULL); @@ -858,16 +857,16 @@ gnttab_release_mappings( act = &rd->grant_table->active[ref]; sha = &rd->grant_table->shared[ref]; - if ( map->ref_and_flags & GNTMAP_readonly ) - { - if ( map->ref_and_flags & GNTMAP_device_map ) + if ( map->flags & GNTMAP_readonly ) + { + if ( map->flags & GNTMAP_device_map ) { BUG_ON(!(act->pin & GNTPIN_devr_mask)); act->pin -= GNTPIN_devr_inc; put_page(mfn_to_page(act->frame)); } - if ( map->ref_and_flags & GNTMAP_host_map ) + if ( map->flags & GNTMAP_host_map ) { BUG_ON(!(act->pin & GNTPIN_hstr_mask)); act->pin -= GNTPIN_hstr_inc; @@ -877,14 +876,14 @@ gnttab_release_mappings( } else { - if ( map->ref_and_flags & GNTMAP_device_map ) + if ( map->flags & GNTMAP_device_map ) { BUG_ON(!(act->pin & GNTPIN_devw_mask)); act->pin -= GNTPIN_devw_inc; put_page_and_type(mfn_to_page(act->frame)); } - if ( map->ref_and_flags & GNTMAP_host_map ) + if ( map->flags & GNTMAP_host_map ) { BUG_ON(!(act->pin & GNTPIN_hstw_mask)); act->pin -= GNTPIN_hstw_inc; @@ -903,7 +902,7 @@ gnttab_release_mappings( put_domain(rd); - map->ref_and_flags = 0; + map->flags = 0; } } @@ -912,7 +911,7 @@ grant_table_destroy( grant_table_destroy( struct domain *d) { - grant_table_t *t = d->grant_table; + struct grant_table *t = d->grant_table; if ( t == NULL ) return; diff -r 4ed269e73e95 -r 41823e46d6ac xen/drivers/char/console.c --- a/xen/drivers/char/console.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/drivers/char/console.c Tue Apr 18 09:35:40 2006 -0600 @@ -65,11 +65,12 @@ spinlock_t console_lock = SPIN_LOCK_UNLO #define COLUMNS 80 #define LINES 25 #define ATTRIBUTE 7 +#define VIDEO_SIZE (COLUMNS * LINES * 2) /* Clear the screen and initialize VIDEO, XPOS and YPOS. */ static void cls(void) { - memset(video, 0, COLUMNS * LINES * 2); + memset(video, 0, VIDEO_SIZE); xpos = ypos = 0; outw(10+(1<<(5+8)), 0x3d4); /* cursor off */ } @@ -107,9 +108,9 @@ static int detect_vga(void) * * These checks are basically to detect headless server boxes. */ - return (detect_video(__va(0xA0000)) || - detect_video(__va(0xB0000)) || - detect_video(__va(0xB8000))); + return (detect_video(ioremap(0xA0000, VIDEO_SIZE)) || + detect_video(ioremap(0xB0000, VIDEO_SIZE)) || + detect_video(ioremap(0xB8000, VIDEO_SIZE))); } /* This is actually code from vgaHWRestore in an old version of XFree86 :-) */ @@ -143,7 +144,7 @@ static void init_vga(void) return; } - video = __va(0xB8000); + video = ioremap(0xB8000, VIDEO_SIZE); tmp = inb(0x3da); outb(0x00, 0x3c0); @@ -180,12 +181,10 @@ static void put_newline(void) if (ypos >= LINES) { - static char zeroarr[2*COLUMNS] = { 0 }; ypos = LINES-1; - memcpy((char*)video, - (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS); - memcpy((char*)video + (LINES-1)*2*COLUMNS, - zeroarr, 2*COLUMNS); + memmove((char*)video, + (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS); + memset((char*)video + (LINES-1)*2*COLUMNS, 0, 2*COLUMNS); } } diff -r 4ed269e73e95 -r 41823e46d6ac xen/drivers/char/ns16550.c --- a/xen/drivers/char/ns16550.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/drivers/char/ns16550.c Tue Apr 18 09:35:40 2006 -0600 @@ -260,13 +260,20 @@ static void ns16550_endboot(struct seria #define ns16550_endboot NULL #endif +static int ns16550_irq(struct serial_port *port) +{ + struct ns16550 *uart = port->uart; + return ((uart->irq > 0) ? uart->irq : -1); +} + static struct uart_driver ns16550_driver = { .init_preirq = ns16550_init_preirq, .init_postirq = ns16550_init_postirq, .endboot = ns16550_endboot, .tx_empty = ns16550_tx_empty, .putc = ns16550_putc, - .getc = ns16550_getc + .getc = ns16550_getc, + .irq = ns16550_irq }; static int parse_parity_char(int c) diff -r 4ed269e73e95 -r 41823e46d6ac xen/drivers/char/serial.c --- a/xen/drivers/char/serial.c Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/drivers/char/serial.c Tue Apr 18 09:35:40 2006 -0600 @@ -372,6 +372,15 @@ void serial_endboot(void) com[i].driver->endboot(&com[i]); } +int serial_irq(int idx) +{ + if ( (idx >= 0) && (idx < ARRAY_SIZE(com)) && + com[idx].driver && com[idx].driver->irq ) + return com[idx].driver->irq(&com[idx]); + + return -1; +} + void serial_register_uart(int idx, struct uart_driver *driver, void *uart) { /* Store UART-specific info. */ diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue Apr 18 09:35:40 2006 -0600 @@ -61,8 +61,7 @@ extern unsigned int cpu_rev; CPU_BASED_MWAIT_EXITING | \ CPU_BASED_MOV_DR_EXITING | \ CPU_BASED_ACTIVATE_IO_BITMAP | \ - CPU_BASED_USE_TSC_OFFSETING | \ - CPU_BASED_UNCOND_IO_EXITING \ + CPU_BASED_USE_TSC_OFFSETING \ ) #define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/asm-x86/hvm/vpit.h --- a/xen/include/asm-x86/hvm/vpit.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/asm-x86/hvm/vpit.h Tue Apr 18 09:35:40 2006 -0600 @@ -38,7 +38,8 @@ struct hvm_virpit { struct hvm_virpit { /* for simulation of counter 0 in mode 2 */ u64 period_cycles; /* pit frequency in cpu cycles */ - s_time_t inject_point; /* the time inject virt intr */ + s_time_t count_advance; /* accumulated count advance since last fire */ + s_time_t count_point; /* last point accumulating count advance */ s_time_t scheduled; /* scheduled timer interrupt */ struct timer pit_timer; /* periodic timer for mode 2*/ unsigned int channel; /* the pit channel, counter 0~2 */ diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/asm-x86/irq.h --- a/xen/include/asm-x86/irq.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/asm-x86/irq.h Tue Apr 18 09:35:40 2006 -0600 @@ -11,8 +11,8 @@ #define IO_APIC_IRQ(irq) (((irq) >= 16) || ((1<<(irq)) & io_apic_irqs)) #define IO_APIC_VECTOR(irq) (irq_vector[irq]) -#define LEGACY_VECTOR(irq) ((irq) + FIRST_EXTERNAL_VECTOR) -#define LEGACY_IRQ_FROM_VECTOR(vec) ((vec) - FIRST_EXTERNAL_VECTOR) +#define LEGACY_VECTOR(irq) ((irq) + FIRST_LEGACY_VECTOR) +#define LEGACY_IRQ_FROM_VECTOR(vec) ((vec) - FIRST_LEGACY_VECTOR) #define irq_to_vector(irq) \ (IO_APIC_IRQ(irq) ? IO_APIC_VECTOR(irq) : LEGACY_VECTOR(irq)) diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/asm-x86/mach-default/irq_vectors.h --- a/xen/include/asm-x86/mach-default/irq_vectors.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/asm-x86/mach-default/irq_vectors.h Tue Apr 18 09:35:40 2006 -0600 @@ -1,96 +1,36 @@ -/* - * This file should contain #defines for all of the interrupt vector - * numbers used by this architecture. - * - * In addition, there are some standard defines: - * - * FIRST_EXTERNAL_VECTOR: - * The first free place for external interrupts - * - * SYSCALL_VECTOR: - * The IRQ vector a syscall makes the user to kernel transition - * under. - * - * TIMER_IRQ: - * The IRQ number the timer interrupt comes in at. - * - * NR_IRQS: - * The total number of interrupt vectors (including all the - * architecture specific interrupts) needed. - * - */ #ifndef _ASM_IRQ_VECTORS_H #define _ASM_IRQ_VECTORS_H -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define HYPERCALL_VECTOR 0x82 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ +/* Processor-initiated interrupts are all high priority. */ #define SPURIOUS_APIC_VECTOR 0xff #define ERROR_APIC_VECTOR 0xfe #define INVALIDATE_TLB_VECTOR 0xfd #define EVENT_CHECK_VECTOR 0xfc #define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef +#define THERMAL_APIC_VECTOR 0xfa +#define LOCAL_TIMER_VECTOR 0xf9 /* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) + * High-priority dynamically-allocated vectors. For interrupts that + * must be higher priority than any guest-bound interrupt. */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_HIPRIORITY_VECTOR 0xf0 +#define LAST_HIPRIORITY_VECTOR 0xf8 -#define TIMER_IRQ 0 +/* Legacy PIC uses vectors 0xe0-0xef. */ +#define FIRST_LEGACY_VECTOR 0xe0 +#define LAST_LEGACY_VECTOR 0xef -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ +#define HYPERCALL_VECTOR 0x82 -/* - * The maximum number of vectors supported by i386 processors - * is limited to 256. For processors other than i386, NR_VECTORS - * should be changed accordingly. - */ +/* Dynamically-allocated vectors available to any driver. */ +#define FIRST_DYNAMIC_VECTOR 0x20 +#define LAST_DYNAMIC_VECTOR 0xdf + #define NR_VECTORS 256 -#include "irq_vectors_limits.h" - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - +/* Limited by number of trap vectors. */ +#define NR_IRQS NR_VECTORS +#define NR_IRQ_VECTORS NR_IRQS #endif /* _ASM_IRQ_VECTORS_H */ diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/asm-x86/mm.h Tue Apr 18 09:35:40 2006 -0600 @@ -103,11 +103,13 @@ struct page_info #define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift) #define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask) #define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift) +#define PGT_pae_idx_shift PGT_high_mfn_shift #else /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */ #define PGT_mfn_mask ((1U<<23)-1) /* NX for PAE xen is not supported yet */ #define PGT_high_mfn_nx (1ULL << 63) +#define PGT_pae_idx_shift 23 #endif #define PGT_score_shift 23 diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/asm-x86/shadow_64.h Tue Apr 18 09:35:40 2006 -0600 @@ -119,6 +119,8 @@ typedef struct { intpte_t lo; } pgentry_ #define PAE_CR3_IDX_MASK 0x7f #define PAE_CR3_IDX_NO 128 +#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */ + /******************************************************************************/ static inline int table_offset_64(unsigned long va, int level) { diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/public/xen.h --- a/xen/include/public/xen.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/public/xen.h Tue Apr 18 09:35:40 2006 -0600 @@ -286,7 +286,8 @@ typedef struct vcpu_time_info { uint64_t system_time; /* Time, in nanosecs, since boot. */ /* * Current system time: - * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) * CPU frequency (Hz): * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift */ diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/xen/grant_table.h --- a/xen/include/xen/grant_table.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/xen/grant_table.h Tue Apr 18 09:35:40 2006 -0600 @@ -29,11 +29,11 @@ #include <asm/grant_table.h> /* Active grant entry - used for shadowing GTF_permit_access grants. */ -typedef struct { +struct active_grant_entry { u32 pin; /* Reference count information. */ domid_t domid; /* Domain being granted access. */ unsigned long frame; /* Frame being granted. */ -} active_grant_entry_t; +}; /* Count of writable host-CPU mappings. */ #define GNTPIN_hstw_shift (0) @@ -60,29 +60,30 @@ typedef struct { * Tracks a mapping of another domain's grant reference. Each domain has a * table of these, indexes into which are returned as a 'mapping handle'. */ -typedef struct { - u16 ref_and_flags; /* 0-4: GNTMAP_* ; 5-15: grant ref */ +struct grant_mapping { + u32 ref; /* grant ref */ + u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */ domid_t domid; /* granting domain */ -} grant_mapping_t; -#define MAPTRACK_GNTMAP_MASK 0x1f -#define MAPTRACK_REF_SHIFT 5 -#define MAPTRACK_MAX_ENTRIES (1 << (16 - MAPTRACK_REF_SHIFT)) +}; + +/* Fairly arbitrary. [POLICY] */ +#define MAPTRACK_MAX_ENTRIES 16384 /* Per-domain grant information. */ -typedef struct { +struct grant_table { /* Shared grant table (see include/public/grant_table.h). */ - grant_entry_t *shared; + struct grant_entry *shared; /* Active grant table. */ - active_grant_entry_t *active; + struct active_grant_entry *active; /* Mapping tracking table. */ - grant_mapping_t *maptrack; + struct grant_mapping *maptrack; unsigned int maptrack_head; unsigned int maptrack_order; unsigned int maptrack_limit; unsigned int map_count; /* Lock protecting updates to active and shared grant tables. */ spinlock_t lock; -} grant_table_t; +}; /* Create/destroy per-domain grant table context. */ int grant_table_create( diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/xen/sched.h --- a/xen/include/xen/sched.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/xen/sched.h Tue Apr 18 09:35:40 2006 -0600 @@ -125,7 +125,7 @@ struct domain struct evtchn *evtchn[NR_EVTCHN_BUCKETS]; spinlock_t evtchn_lock; - grant_table_t *grant_table; + struct grant_table *grant_table; /* * Interrupt to event-channel mappings. Updates should be protected by the diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/xen/serial.h --- a/xen/include/xen/serial.h Mon Apr 17 08:47:36 2006 -0600 +++ b/xen/include/xen/serial.h Tue Apr 18 09:35:40 2006 -0600 @@ -57,6 +57,8 @@ struct uart_driver { void (*putc)(struct serial_port *, char); /* Get a character from the serial line: returns 0 if none available. */ int (*getc)(struct serial_port *, char *); + /* Get IRQ number for this port's serial line: returns -1 if none. */ + int (*irq)(struct serial_port *); }; /* 'Serial handles' are composed from the following fields. */ @@ -99,6 +101,9 @@ void serial_end_sync(int handle); /* Return number of bytes headroom in transmit buffer. */ int serial_tx_space(int handle); +/* Return irq number for specified serial port (identified by index). */ +int serial_irq(int idx); + /* * Initialisation and helper functions for uart drivers. */ diff -r 4ed269e73e95 -r 41823e46d6ac xen/include/asm-x86/mach-default/irq_vectors_limits.h --- a/xen/include/asm-x86/mach-default/irq_vectors_limits.h Mon Apr 17 08:47:36 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H - -/* Limited by number of trap vectors. */ -#define NR_IRQS FIRST_SYSTEM_VECTOR -#define NR_IRQ_VECTORS NR_IRQS - -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |