[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID 91da9a1b7196b093c6b44906992bcbcad92b30a1 # Parent 83eb8d81c96f6639d63bb93dabeabb813cbd822c # Parent b39365343de08af6c76fa3492b2cffb436470b3f Merged. diff -r 83eb8d81c96f -r 91da9a1b7196 .hgignore --- a/.hgignore Sat Apr 15 18:25:09 2006 +++ b/.hgignore Sat Apr 15 18:25:21 2006 @@ -184,6 +184,7 @@ ^tools/xm-test/ramdisk/buildroot ^xen/BLOG$ ^xen/TAGS$ +^xen/cscope\.*$ ^xen/arch/x86/asm-offsets\.s$ ^xen/arch/x86/boot/mkelf32$ ^xen/arch/x86/xen\.lds$ diff -r 83eb8d81c96f -r 91da9a1b7196 buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Sat Apr 15 18:25:09 2006 +++ b/buildconfigs/Rules.mk Sat Apr 15 18:25:21 2006 @@ -99,14 +99,14 @@ linux-2.6-xen.patch: ref-linux-$(LINUX_VER)/.valid-ref rm -rf tmp-$@ cp -al $(<D) tmp-$@ - ( cd linux-2.6-xen-sparse && ./mkbuildtree ../tmp-$@ ) + ( cd linux-2.6-xen-sparse && bash ./mkbuildtree ../tmp-$@ ) diff -Nurp $(patsubst ref%,pristine%,$(<D)) tmp-$@ > $@ || true rm -rf tmp-$@ %-xen.patch: ref-%/.valid-ref rm -rf tmp-$@ cp -al $(<D) tmp-$@ - ( cd $*-xen-sparse && ./mkbuildtree ../tmp-$@ ) + ( cd $*-xen-sparse && bash ./mkbuildtree ../tmp-$@ ) diff -Nurp $(patsubst ref%,pristine%,$(<D)) tmp-$@ > $@ || true rm -rf tmp-$@ diff -r 83eb8d81c96f -r 91da9a1b7196 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Sat Apr 15 18:25:09 2006 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Sat Apr 15 18:25:21 2006 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-xen0 -# Mon Feb 20 11:37:43 2006 +# Linux kernel version: 2.6.16-xen0 +# Thu Apr 13 14:58:29 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -99,6 +99,8 @@ # CONFIG_MPSC is not set CONFIG_GENERIC_CPU=y CONFIG_X86_64_XEN=y +CONFIG_X86_NO_TSS=y +CONFIG_X86_NO_IDT=y CONFIG_X86_L1_CACHE_BYTES=128 CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_GOOD_APIC=y @@ -176,6 +178,19 @@ CONFIG_XEN_PCIDEV_FRONTEND=y # CONFIG_XEN_PCIDEV_FE_DEBUG is not set # CONFIG_UNORDERED_IO is not set +# CONFIG_PCIEPORTBUS is not set +CONFIG_PCI_LEGACY_PROC=y +# CONFIG_PCI_DEBUG is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# +# CONFIG_HOTPLUG_PCI is not set # # Executable file formats / Emulations @@ -1001,11 +1016,7 @@ CONFIG_INFINIBAND_SRP=y # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # CONFIG_EDAC is not set @@ -1239,7 +1250,7 @@ # Hardware crypto devices # CONFIG_XEN=y -CONFIG_NO_IDLE_HZ=y +CONFIG_XEN_INTERFACE_VERSION=0x00030101 # # XEN @@ -1266,6 +1277,7 @@ CONFIG_XEN_SYSFS=y CONFIG_HAVE_ARCH_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_NO_IDLE_HZ=y # # Library routines diff -r 83eb8d81c96f -r 91da9a1b7196 buildconfigs/linux-defconfig_xenU_x86_64 --- a/buildconfigs/linux-defconfig_xenU_x86_64 Sat Apr 15 18:25:09 2006 +++ b/buildconfigs/linux-defconfig_xenU_x86_64 Sat Apr 15 18:25:21 2006 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-xen0 -# Thu Feb 16 22:56:02 2006 +# Linux kernel version: 2.6.16-xenU +# Thu Apr 13 14:59:16 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -103,6 +103,8 @@ CONFIG_MPSC=y # CONFIG_GENERIC_CPU is not set CONFIG_X86_64_XEN=y +CONFIG_X86_NO_TSS=y +CONFIG_X86_NO_IDT=y CONFIG_X86_L1_CACHE_BYTES=128 CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_GOOD_APIC=y @@ -145,6 +147,15 @@ # # CONFIG_PCI is not set # CONFIG_UNORDERED_IO is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# # # Executable file formats / Emulations @@ -844,11 +855,7 @@ # # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # CONFIG_EDAC is not set @@ -1128,7 +1135,7 @@ # Hardware crypto devices # CONFIG_XEN=y -CONFIG_NO_IDLE_HZ=y +CONFIG_XEN_INTERFACE_VERSION=0x00030101 # # XEN @@ -1144,6 +1151,7 @@ CONFIG_XEN_SYSFS=y CONFIG_HAVE_ARCH_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_NO_IDLE_HZ=y # # Library routines diff -r 83eb8d81c96f -r 91da9a1b7196 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Sat Apr 15 18:25:09 2006 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Sat Apr 15 18:25:21 2006 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.16-xen -# Mon Mar 27 09:43:44 2006 +# Thu Apr 13 15:01:04 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -186,6 +186,41 @@ CONFIG_XEN_PCIDEV_FRONTEND=y # CONFIG_XEN_PCIDEV_FE_DEBUG is not set # CONFIG_UNORDERED_IO is not set +# CONFIG_PCIEPORTBUS is not set +# CONFIG_PCI_LEGACY_PROC is not set +# CONFIG_PCI_DEBUG is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +CONFIG_PCCARD=m +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=m +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=m +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +CONFIG_PD6729=m +CONFIG_I82092=m +CONFIG_PCCARD_NONSTATIC=m + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set # # Executable file formats / Emulations @@ -625,6 +660,10 @@ CONFIG_BT_HCIBCM203X=m CONFIG_BT_HCIBPA10X=m CONFIG_BT_HCIBFUSB=m +# CONFIG_BT_HCIDTL1 is not set +# CONFIG_BT_HCIBT3C is not set +# CONFIG_BT_HCIBLUECARD is not set +# CONFIG_BT_HCIBTUART is not set CONFIG_BT_HCIVHCI=m CONFIG_IEEE80211=m # CONFIG_IEEE80211_DEBUG is not set @@ -769,6 +808,7 @@ CONFIG_PARPORT_PC=m # CONFIG_PARPORT_PC_FIFO is not set # CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_PC_PCMCIA is not set CONFIG_PARPORT_NOT_PC=y # CONFIG_PARPORT_GSC is not set CONFIG_PARPORT_1284=y @@ -851,6 +891,7 @@ # CONFIG_BLK_DEV_HD_IDE is not set CONFIG_BLK_DEV_IDEDISK=y CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_BLK_DEV_IDECS is not set CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDETAPE is not set CONFIG_BLK_DEV_IDEFLOPPY=y @@ -1012,6 +1053,13 @@ # CONFIG_SCSI_DEBUG is not set # +# PCMCIA SCSI adapter support +# +# CONFIG_PCMCIA_FDOMAIN is not set +# CONFIG_PCMCIA_QLOGIC is not set +# CONFIG_PCMCIA_SYM53C500 is not set + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1141,6 +1189,7 @@ CONFIG_WINBOND_840=m CONFIG_DM9102=m CONFIG_ULI526X=m +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set CONFIG_NET_PCI=y CONFIG_PCNET32=m @@ -1224,6 +1273,13 @@ # Obsolete Wireless cards support (pre-802.11) # # CONFIG_STRIP is not set +# CONFIG_PCMCIA_WAVELAN is not set +# CONFIG_PCMCIA_NETWAVE is not set + +# +# Wireless 802.11 Frequency Hopping cards support +# +# CONFIG_PCMCIA_RAYCS is not set # # Wireless 802.11b ISA/PCI cards support @@ -1243,6 +1299,15 @@ CONFIG_PCI_ATMEL=m # +# Wireless 802.11b Pcmcia/Cardbus cards support +# +# CONFIG_PCMCIA_HERMES is not set +# CONFIG_PCMCIA_SPECTRUM is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_ATMEL is not set +# CONFIG_PCMCIA_WL3501 is not set + +# # Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support # CONFIG_PRISM54=m @@ -1250,7 +1315,13 @@ # CONFIG_HOSTAP_FIRMWARE is not set CONFIG_HOSTAP_PLX=m CONFIG_HOSTAP_PCI=m +# CONFIG_HOSTAP_CS is not set CONFIG_NET_WIRELESS=y + +# +# PCMCIA network device support +# +# CONFIG_NET_PCMCIA is not set # # Wan interfaces @@ -1376,6 +1447,10 @@ # # HiSax PCMCIA card service modules # +# CONFIG_HISAX_SEDLBAUER_CS is not set +# CONFIG_HISAX_ELSA_CS is not set +# CONFIG_HISAX_AVM_A1_CS is not set +# CONFIG_HISAX_TELES_CS is not set # # HiSax sub driver modules @@ -1412,6 +1487,7 @@ CONFIG_ISDN_DRV_AVMB1_B1PCI=m CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m +# CONFIG_ISDN_DRV_AVMB1_AVM_CS is not set CONFIG_ISDN_DRV_AVMB1_T1PCI=m CONFIG_ISDN_DRV_AVMB1_C4=m @@ -1600,6 +1676,13 @@ CONFIG_DRM_MGA=m CONFIG_DRM_VIA=m CONFIG_DRM_SAVAGE=m + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set # CONFIG_MWAVE is not set # CONFIG_RAW_DRIVER is not set # CONFIG_HPET is not set @@ -2101,6 +2184,10 @@ CONFIG_SND_USB_USX2Y=m # +# PCMCIA devices +# + +# # Open Sound System # # CONFIG_SOUND_PRIME is not set @@ -2134,6 +2221,7 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=m CONFIG_USB_SL811_HCD=m +# CONFIG_USB_SL811_CS is not set # # USB Device Class drivers @@ -2284,6 +2372,7 @@ CONFIG_USB_SERIAL_TI=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m +# CONFIG_USB_SERIAL_OPTION is not set CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EZUSB=y @@ -2649,7 +2738,7 @@ # Hardware crypto devices # CONFIG_XEN=y -CONFIG_NO_IDLE_HZ=y +CONFIG_XEN_INTERFACE_VERSION=0x00030101 # # XEN @@ -2676,6 +2765,7 @@ CONFIG_XEN_SYSFS=m CONFIG_HAVE_ARCH_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_NO_IDLE_HZ=y # # Library routines diff -r 83eb8d81c96f -r 91da9a1b7196 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Sat Apr 15 18:25:09 2006 +++ b/buildconfigs/mk.linux-2.6-xen Sat Apr 15 18:25:21 2006 @@ -22,8 +22,8 @@ rm -rf $(LINUX_DIR) cp -al $(<D) $(LINUX_DIR) # Apply arch-xen patches - ( cd linux-$(LINUX_SERIES)-xen-sparse ; \ - LINUX_ARCH=$(LINUX_ARCH) ./mkbuildtree ../$(LINUX_DIR) ) + ( cd linux-$(LINUX_SERIES)-xen-sparse && \ + LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_DIR) ) # Re-use config from install dir if one exits else use default config CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ [ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ diff -r 83eb8d81c96f -r 91da9a1b7196 docs/src/user.tex --- a/docs/src/user.tex Sat Apr 15 18:25:09 2006 +++ b/docs/src/user.tex Sat Apr 15 18:25:21 2006 @@ -1232,8 +1232,15 @@ \subsection{PCI} \label{ss:pcidd} -Individual PCI devices can be assigned to a given domain to allow that -domain direct access to the PCI hardware. To use this functionality, ensure +Individual PCI devices can be assigned to a given domain (a PCI driver domain) +to allow that domain direct access to the PCI hardware. + +While PCI Driver Domains can increase the stability and security of a system +by addressing a number of security concerns, there are some security issues +that remain that you can read about in Section~\ref{s:ddsecurity}. + +\subsubsection{Compile-Time Setup} +To use this functionality, ensure that the PCI Backend is compiled in to a privileged domain (e.g. domain 0) and that the domains which will be assigned PCI devices have the PCI Frontend compiled in. In XenLinux, the PCI Backend is available under the Xen @@ -1241,21 +1248,73 @@ architecture-specific "Bus Options" section. You may compile both the backend and the frontend into the same kernel; they will not affect each other. +\subsubsection{PCI Backend Configuration - Binding at Boot} The PCI devices you wish to assign to unprivileged domains must be "hidden" from your backend domain (usually domain 0) so that it does not load a driver for them. Use the \path{pciback.hide} kernel parameter which is specified on the kernel command-line and is configurable through GRUB (see Section~\ref{s:configure}). Note that devices are not really hidden from the -backend domain. The PCI Backend ensures that no other device driver loads -for those devices. PCI devices are identified by hexadecimal -slot/funciton numbers (on Linux, use \path{lspci} to determine slot/funciton -numbers of your devices) and can be specified with or without the PCI domain: \\ +backend domain. The PCI Backend appears to the Linux kernel as a regular PCI +device driver. The PCI Backend ensures that no other device driver loads +for the devices by binding itself as the device driver for those devices. +PCI devices are identified by hexadecimal slot/funciton numbers (on Linux, +use \path{lspci} to determine slot/funciton numbers of your devices) and +can be specified with or without the PCI domain: \\ \centerline{ {\tt ({\em bus}:{\em slot}.{\em func})} example {\tt (02:1d.3)}} \\ \centerline{ {\tt ({\em domain}:{\em bus}:{\em slot}.{\em func})} example {\tt (0000:02:1d.3)}} \\ An example kernel command-line which hides two PCI devices might be: \\ \centerline{ {\tt root=/dev/sda4 ro console=tty0 pciback.hide=(02:01.f)(0000:04:1d.0) } } \\ +\subsubsection{PCI Backend Configuration - Late Binding} +PCI devices can also be bound to the PCI Backend after boot through the manual +binding/unbinding facilities provided by the Linux kernel in sysfs (allowing +for a Xen user to give PCI devices to driver domains that were not specified +on the kernel command-line). There are several attributes with the PCI +Backend's sysfs directory (\path{/sys/bus/pci/drivers/pciback}) that can be +used to bind/unbind devices: + +\begin{description} +\item[slots] lists all of the PCI slots that the PCI Backend will try to seize + (or "hide" from Domain 0). A PCI slot must appear in this list before it can + be bound to the PCI Backend through the \path{bind} attribute. +\item[new\_slot] write the name of a slot here (in 0000:00:00.0 format) to + have the PCI Backend seize the device in this slot. +\item[remove\_slot] write the name of a slot here (same format as + \path{new\_slot}) to have the PCI Backend no longer try to seize devices in + this slot. Note that this does not unbind the driver from a device it has + already seized. +\item[bind] write the name of a slot here (in 0000:00:00.0 format) to have + the Linux kernel attempt to bind the device in that slot to the PCI Backend + driver. +\item[unbind] write the name of a skit here (same format as \path{bind}) to have + the Linux kernel unbind the device from the PCI Backend. DO NOT unbind a + device while it is currently given to a PCI driver domain! +\end{description} + +Some examples: + +Bind a device to the PCI Backend which is not bound to any other driver. +\begin{verbatim} +# # Add a new slot to the PCI Backend's list +# echo -n 0000:01:04.d > /sys/bus/pci/drivers/pciback/new_slot +# # Now that the backend is watching for the slot, bind to it +# echo -n 0000:01:04.d > /sys/bus/pci/drivers/pciback/bind +\end{verbatim} + +Unbind a device from its driver and bind to the PCI Backend. +\begin{verbatim} +# # Unbind a PCI network card from its network driver +# echo -n 0000:05:02.0 > /sys/bus/pci/drivers/3c905/unbind +# # And now bind it to the PCI Backend +# echo -n 0000:05:02.0 > /sys/bus/pci/drivers/pciback/new_slot +# echo -n 0000:05:02.0 > /sys/bus/pci/drivers/pciback/bind +\end{verbatim} + +Note that the "-n" option in the example is important as it causes echo to not +output a new-line. + +\subsubsection{PCI Frontend Configuration} To configure a domU to receive a PCI device: \begin{description} @@ -1281,9 +1340,6 @@ \end{verbatim} } \end{description} - -There are a number of security concerns associated with PCI Driver Domains -that you can read about in Section~\ref{s:ddsecurity}. %% There are two possible types of privileges: IO privileges and %% administration privileges. diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Sat Apr 15 18:25:21 2006 @@ -68,6 +68,7 @@ DP(""); for (i = 5; i > 0; i--) { + touch_softlockup_watchdog(); printk("Pausing... %d", i); mdelay(1000); printk("\b\b\b\b\b\b\b\b\b\b\b\b"); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Sat Apr 15 18:25:21 2006 @@ -206,8 +206,8 @@ } /* - * We use __copy_to_user to transfer to the host buffer because the buffer - * may be mapped read-only (e.g, in blkback driver) but lower-level + * We use __copy_to_user_inatomic to transfer to the host buffer because the + * buffer may be mapped read-only (e.g, in blkback driver) but lower-level * drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an * unnecessary copy from the aperture to the host buffer, and a page fault. */ @@ -225,7 +225,7 @@ dev = dma_addr + size - len; host = kmp + buffer.offset; if (dir == DMA_FROM_DEVICE) { - if (__copy_to_user(host, dev, bytes)) + if (__copy_to_user_inatomic(host, dev, bytes)) /* inaccessible */; } else memcpy(dev, host, bytes); @@ -238,7 +238,7 @@ char *host = (char *)phys_to_virt( page_to_pseudophys(buffer.page)) + buffer.offset; if (dir == DMA_FROM_DEVICE) { - if (__copy_to_user(host, dma_addr, size)) + if (__copy_to_user_inatomic(host, dma_addr, size)) /* inaccessible */; } else if (dir == DMA_TO_DEVICE) memcpy(dma_addr, host, size); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/arch/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Sat Apr 15 18:25:21 2006 @@ -568,7 +568,6 @@ from i386. Requires that the driver writer used memory barriers properly. -if !X86_64_XEN source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" @@ -576,7 +575,6 @@ source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" -endif endmenu diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sat Apr 15 18:25:21 2006 @@ -186,9 +186,8 @@ handle = pending_handle(req, i); if (handle == BLKBACK_INVALID_HANDLE) continue; - unmap[invcount].host_addr = vaddr(req, i); - unmap[invcount].dev_bus_addr = 0; - unmap[invcount].handle = handle; + gnttab_set_unmap_op(&unmap[i], vaddr(req, i), GNTMAP_host_map, + handle); pending_handle(req, i) = BLKBACK_INVALID_HANDLE; invcount++; } @@ -384,6 +383,8 @@ pending_req->nr_pages = nseg; for (i = 0; i < nseg; i++) { + uint32_t flags; + seg[i].nsec = req->seg[i].last_sect - req->seg[i].first_sect + 1; @@ -392,12 +393,11 @@ goto fail_response; preq.nr_sects += seg[i].nsec; - map[i].host_addr = vaddr(pending_req, i); - map[i].dom = blkif->domid; - map[i].ref = req->seg[i].gref; - map[i].flags = GNTMAP_host_map; + flags = GNTMAP_host_map; if ( operation == WRITE ) - map[i].flags |= GNTMAP_readonly; + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->seg[i].gref, blkif->domid); } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sat Apr 15 18:25:21 2006 @@ -58,10 +58,8 @@ struct gnttab_map_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.flags = GNTMAP_host_map; - op.ref = shared_page; - op.dom = blkif->domid; + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -90,9 +88,8 @@ struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.handle = blkif->shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sat Apr 15 18:25:21 2006 @@ -418,9 +418,9 @@ if (BLKTAP_INVALID_HANDLE(handle)) continue; - unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); - unmap[op].dev_bus_addr = 0; - unmap[op].handle = handle->kernel; + gnttab_set_unmap_op(&unmap[op], + MMAP_VADDR(mmap_vstart, idx, i), + GNTMAP_host_map, handle->kernel); op++; if (create_lookup_pte_addr( @@ -430,9 +430,10 @@ DPRINTK("Couldn't get a pte addr!\n"); return; } - unmap[op].host_addr = ptep; - unmap[op].dev_bus_addr = 0; - unmap[op].handle = handle->user; + gnttab_set_unmap_grnat_ref(&unmap[op], ptep, + GNTMAP_host_map | + GNTMAP_application_map | + GNTMAP_contains_pte, handle->user); op++; BLKTAP_INVALIDATE_HANDLE(handle); @@ -703,21 +704,21 @@ unsigned long uvaddr; unsigned long kvaddr; uint64_t ptep; + uint32_t flags; uvaddr = MMAP_VADDR(user_vstart, pending_idx, i); kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i); - /* Map the remote page to kernel. */ - map[op].host_addr = kvaddr; - map[op].dom = blkif->domid; - map[op].ref = req->seg[i].gref; - map[op].flags = GNTMAP_host_map; + flags = GNTMAP_host_map; /* This needs a bit more thought in terms of interposition: * If we want to be able to modify pages during write using * grant table mappings, the guest will either need to allow * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */ if (req->operation == BLKIF_OP_WRITE) - map[op].flags |= GNTMAP_readonly; + flags |= GNTMAP_readonly; + /* Map the remote page to kernel. */ + gnttab_set_map_op(&map[op], kvaddr, flags, req->seg[i].gref, + blkif->domid); op++; /* Now map it to user. */ @@ -728,14 +729,13 @@ goto bad_descriptor; } - map[op].host_addr = ptep; - map[op].dom = blkif->domid; - map[op].ref = req->seg[i].gref; - map[op].flags = GNTMAP_host_map | GNTMAP_application_map + flags = GNTMAP_host_map | GNTMAP_application_map | GNTMAP_contains_pte; /* Above interposition comment applies here as well. */ if (req->operation == BLKIF_OP_WRITE) - map[op].flags |= GNTMAP_readonly; + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], ptep, flags, req->seg[i].gref, + blkif->domid); op++; } diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sat Apr 15 18:25:21 2006 @@ -33,10 +33,8 @@ struct gnttab_map_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.flags = GNTMAP_host_map; - op.ref = shared_page; - op.dom = blkif->domid; + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -59,9 +57,8 @@ struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)blkif->blk_ring_area->addr; - op.handle = blkif->shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); lock_vm_area(blkif->blk_ring_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Sat Apr 15 18:25:21 2006 @@ -513,6 +513,8 @@ { int evtchn = evtchn_from_irq(irq); + move_native_irq(irq); + if (VALID_EVTCHN(evtchn)) { mask_evtchn(evtchn); clear_evtchn(evtchn); @@ -635,6 +637,8 @@ static void ack_pirq(unsigned int irq) { int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); if (VALID_EVTCHN(evtchn)) { mask_evtchn(evtchn); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Sat Apr 15 18:25:21 2006 @@ -65,6 +65,7 @@ EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); EXPORT_SYMBOL_GPL(gnttab_free_grant_references); EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); +EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); EXPORT_SYMBOL_GPL(gnttab_request_free_callback); @@ -322,6 +323,12 @@ *head = h; return 0; +} + +int +gnttab_empty_grant_references(const grant_ref_t *private_head) +{ + return (*private_head == GNTTAB_LIST_END); } int diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sat Apr 15 18:25:21 2006 @@ -150,10 +150,8 @@ struct gnttab_map_grant_ref op; int ret; - op.host_addr = (unsigned long)netif->tx_comms_area->addr; - op.flags = GNTMAP_host_map; - op.ref = tx_ring_ref; - op.dom = netif->domid; + gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr, + GNTMAP_host_map, tx_ring_ref, netif->domid); lock_vm_area(netif->tx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -168,10 +166,8 @@ netif->tx_shmem_ref = tx_ring_ref; netif->tx_shmem_handle = op.handle; - op.host_addr = (unsigned long)netif->rx_comms_area->addr; - op.flags = GNTMAP_host_map; - op.ref = rx_ring_ref; - op.dom = netif->domid; + gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr, + GNTMAP_host_map, rx_ring_ref, netif->domid); lock_vm_area(netif->rx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -194,18 +190,16 @@ struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)netif->tx_comms_area->addr; - op.handle = netif->tx_shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr, + GNTMAP_host_map, netif->tx_shmem_handle); lock_vm_area(netif->tx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(netif->tx_comms_area); BUG_ON(ret); - op.host_addr = (unsigned long)netif->rx_comms_area->addr; - op.handle = netif->rx_shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr, + GNTMAP_host_map, netif->rx_shmem_handle); lock_vm_area(netif->rx_comms_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sat Apr 15 18:25:21 2006 @@ -453,9 +453,9 @@ gop = tx_unmap_ops; while (dc != dp) { pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; - gop->host_addr = MMAP_VADDR(pending_idx); - gop->dev_bus_addr = 0; - gop->handle = grant_tx_handle[pending_idx]; + gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx), + GNTMAP_host_map, + grant_tx_handle[pending_idx]); gop++; } ret = HYPERVISOR_grant_table_op( @@ -579,10 +579,9 @@ /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, 16); - mop->host_addr = MMAP_VADDR(pending_idx); - mop->dom = netif->domid; - mop->ref = txreq.gref; - mop->flags = GNTMAP_host_map | GNTMAP_readonly; + gnttab_set_map_op(mop, MMAP_VADDR(pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txreq.gref, netif->domid); mop++; memcpy(&pending_tx_info[pending_idx].req, diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sat Apr 15 18:25:21 2006 @@ -106,7 +106,7 @@ /* Receive-ring batched refills. */ #define RX_MIN_TARGET 8 #define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET NET_RX_RING_SIZE +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) int rx_min_target, rx_max_target, rx_target; struct sk_buff_head rx_batch; @@ -119,6 +119,7 @@ struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; struct sk_buff *rx_skbs[NET_RX_RING_SIZE+1]; +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; grant_ref_t gref_rx_head; @@ -505,8 +506,9 @@ } while (prod != np->tx.sring->rsp_prod); out: - if (np->tx_full && - ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { + if ((np->tx_full) && + ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE) && + !gnttab_empty_grant_references(&np->gref_tx_head)) { np->tx_full = 0; if (np->user_state == UST_OPEN) netif_wake_queue(dev); @@ -705,7 +707,8 @@ network_tx_buf_gc(dev); - if (RING_FULL(&np->tx)) { + if (RING_FULL(&np->tx) || + gnttab_empty_grant_references(&np->gref_tx_head)) { np->tx_full = 1; netif_stop_queue(dev); } @@ -1140,14 +1143,14 @@ } /* A grant for every tx ring slot */ - if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, + if (gnttab_alloc_grant_references(TX_MAX_TARGET, &np->gref_tx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); err = -ENOMEM; goto exit; } /* A grant for every rx ring slot */ - if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, + if (gnttab_alloc_grant_references(RX_MAX_TARGET, &np->gref_rx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); gnttab_free_grant_references(np->gref_tx_head); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Sat Apr 15 18:25:21 2006 @@ -13,6 +13,7 @@ #include "common.h" #include <xen/balloon.h> +#include <xen/gnttab.h> static kmem_cache_t *tpmif_cachep; int num_frontends = 0; @@ -72,12 +73,10 @@ static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page) { int ret; - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)tpmif->tx_area->addr, - .flags = GNTMAP_host_map, - .ref = shared_page, - .dom = tpmif->domid, - }; + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr, + GNTMAP_host_map, shared_page, tpmif->domid); lock_vm_area(tpmif->tx_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); @@ -100,9 +99,8 @@ struct gnttab_unmap_grant_ref op; int ret; - op.host_addr = (unsigned long)tpmif->tx_area->addr; - op.handle = tpmif->shmem_handle; - op.dev_bus_addr = 0; + gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr, + GNTMAP_host_map, tpmif->shmem_handle); lock_vm_area(tpmif->tx_area); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Sat Apr 15 18:25:21 2006 @@ -21,6 +21,7 @@ #include <asm/uaccess.h> #include <xen/xenbus.h> #include <xen/interface/grant_table.h> +#include <xen/gnttab.h> /* local data structures */ struct data_exchange { @@ -278,10 +279,8 @@ return 0; } - map_op.host_addr = MMAP_VADDR(tpmif, i); - map_op.flags = GNTMAP_host_map; - map_op.ref = tx->ref; - map_op.dom = tpmif->domid; + gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, tx->ref, tpmif->domid); if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &map_op, 1))) { @@ -308,9 +307,8 @@ } tx->size = tocopy; - unmap_op.host_addr = MMAP_VADDR(tpmif, i); - unmap_op.handle = handle; - unmap_op.dev_bus_addr = 0; + gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, handle); if (unlikely (HYPERVISOR_grant_table_op @@ -422,10 +420,8 @@ tx = &tpmif->tx->ring[i].req; - map_op.host_addr = MMAP_VADDR(tpmif, i); - map_op.flags = GNTMAP_host_map; - map_op.ref = tx->ref; - map_op.dom = tpmif->domid; + gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, tx->ref, tpmif->domid); if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &map_op, 1))) { @@ -461,9 +457,8 @@ tpmif->domid, buffer[offset], buffer[offset + 1], buffer[offset + 2], buffer[offset + 3]); - unmap_op.host_addr = MMAP_VADDR(tpmif, i); - unmap_op.handle = handle; - unmap_op.dev_bus_addr = 0; + gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i), + GNTMAP_host_map, handle); if (unlikely (HYPERVISOR_grant_table_op diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sat Apr 15 18:25:21 2006 @@ -164,10 +164,10 @@ switch (frontend_state) { case XenbusStateInitialising: + case XenbusStateInitialised: + break; + case XenbusStateConnected: - break; - - case XenbusStateInitialised: err = connect_ring(be); if (err) { return; diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Sat Apr 15 18:25:21 2006 @@ -334,12 +334,6 @@ goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, - "state", "%d", XenbusStateInitialised); - if (err) { - goto abort_transaction; - } - err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) goto again; @@ -347,6 +341,9 @@ xenbus_dev_fatal(dev, err, "completing transaction"); goto destroy_tpmring; } + + xenbus_switch_state(dev, XenbusStateConnected); + return 0; abort_transaction: @@ -387,6 +384,7 @@ if (tp->is_suspended == 0) { device_unregister(&dev->dev); } + xenbus_switch_state(dev, XenbusStateClosed); break; } } @@ -439,6 +437,7 @@ /* lock, so no app can send */ mutex_lock(&suspend_lock); + xenbus_switch_state(dev, XenbusStateClosed); tp->is_suspended = 1; for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 25; ctr++) { diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Sat Apr 15 18:25:21 2006 @@ -37,11 +37,7 @@ /* Based on Rusty Russell's skeleton driver's map_page */ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) { - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op; struct vm_struct *area; *vaddr = NULL; @@ -50,8 +46,9 @@ if (!area) return -ENOMEM; - op.host_addr = (unsigned long)area->addr; - + gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map, + gnt_ref, dev->otherend_id); + lock_vm_area(area); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); unlock_vm_area(area); @@ -76,13 +73,10 @@ int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr) { - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + gnt_ref, dev->otherend_id); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); if (op.status != GNTST_okay) { @@ -101,9 +95,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) { struct vm_struct *area; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; + struct gnttab_unmap_grant_ref op; /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) * method so that we don't have to muck with vmalloc internals here. @@ -124,7 +116,8 @@ return GNTST_bad_virt_addr; } - op.handle = (grant_handle_t)area->phys_addr; + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + (grant_handle_t)area->phys_addr); lock_vm_area(area); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); @@ -145,11 +138,10 @@ int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr) { - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; + struct gnttab_unmap_grant_ref op; + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + handle); BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); if (op.status != GNTST_okay) diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/include/xen/gnttab.h --- a/linux-2.6-xen-sparse/include/xen/gnttab.h Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Sat Apr 15 18:25:21 2006 @@ -40,6 +40,7 @@ #include <linux/config.h> #include <asm/hypervisor.h> #include <xen/interface/grant_table.h> +#include <xen/features.h> /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ #ifdef __ia64__ @@ -90,6 +91,8 @@ void gnttab_free_grant_references(grant_ref_t head); +int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); + int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); void gnttab_release_grant_reference(grant_ref_t *private_head, @@ -113,6 +116,37 @@ int gnttab_suspend(void); int gnttab_resume(void); +static inline void +gnttab_set_map_op(struct gnttab_map_grant_ref *map, unsigned long addr, + uint32_t flags, grant_ref_t ref, domid_t domid) +{ + if (flags & GNTMAP_contains_pte) + map->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + map->host_addr = __pa(addr); + else + map->host_addr = addr; + + map->flags = flags; + map->ref = ref; + map->dom = domid; +} + +static inline void +gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, unsigned long addr, + uint32_t flags, grant_handle_t handle) +{ + if (flags & GNTMAP_contains_pte) + unmap->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + unmap->host_addr = __pa(addr); + else + unmap->host_addr = addr; + + unmap->handle = handle; + unmap->dev_bus_addr = 0; +} + #endif /* __ASM_GNTTAB_H__ */ /* diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/mkbuildtree --- a/linux-2.6-xen-sparse/mkbuildtree Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/mkbuildtree Sat Apr 15 18:25:21 2006 @@ -90,8 +90,8 @@ RS=$DESTPATH # Arch-specific pre-processing -if [ -x arch/${LINUX_ARCH}/xen-mkbuildtree-pre ]; then - arch/${LINUX_ARCH}/xen-mkbuildtree-pre +if [ -e arch/${LINUX_ARCH}/xen-mkbuildtree-pre ]; then + bash arch/${LINUX_ARCH}/xen-mkbuildtree-pre fi # Remove old copies of files and directories at the destination @@ -115,6 +115,6 @@ # Arch-specific post-processing cd ${AD} -if [ -x arch/${LINUX_ARCH}/xen-mkbuildtree-post ]; then - arch/${LINUX_ARCH}/xen-mkbuildtree-post +if [ -e arch/${LINUX_ARCH}/xen-mkbuildtree-post ]; then + bash arch/${LINUX_ARCH}/xen-mkbuildtree-post fi diff -r 83eb8d81c96f -r 91da9a1b7196 linux-2.6-xen-sparse/net/core/dev.c --- a/linux-2.6-xen-sparse/net/core/dev.c Sat Apr 15 18:25:09 2006 +++ b/linux-2.6-xen-sparse/net/core/dev.c Sat Apr 15 18:25:21 2006 @@ -1294,6 +1294,7 @@ if ((skb->h.raw + skb->csum + 2) > skb->tail) goto out_kfree_skb; skb->ip_summed = CHECKSUM_HW; + skb->proto_csum_blank = 0; } #endif diff -r 83eb8d81c96f -r 91da9a1b7196 tools/debugger/gdb/gdbbuild --- a/tools/debugger/gdb/gdbbuild Sat Apr 15 18:25:09 2006 +++ b/tools/debugger/gdb/gdbbuild Sat Apr 15 18:25:21 2006 @@ -7,7 +7,7 @@ tar xjf gdb-6.2.1.tar.bz2 cd gdb-6.2.1-xen-sparse -./mkbuildtree ../gdb-6.2.1 +bash ./mkbuildtree ../gdb-6.2.1 cd .. mkdir gdb-6.2.1-linux-i386-xen diff -r 83eb8d81c96f -r 91da9a1b7196 tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Sat Apr 15 18:25:09 2006 +++ b/tools/examples/xend-config.sxp Sat Apr 15 18:25:21 2006 @@ -127,3 +127,6 @@ # Whether to enable core-dumps when domains crash. #(enable-dump no) + +# The tool used for initiating virtual TPM migration +#(external-migration-tool '') diff -r 83eb8d81c96f -r 91da9a1b7196 tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Sat Apr 15 18:25:09 2006 +++ b/tools/firmware/hvmloader/Makefile Sat Apr 15 18:25:21 2006 @@ -21,7 +21,7 @@ # External CFLAGS can do more harm than good. CFLAGS := -XEN_TARGET_ARCH = x86_32 +override XEN_TARGET_ARCH = x86_32 XEN_ROOT = ../../.. include $(XEN_ROOT)/Config.mk diff -r 83eb8d81c96f -r 91da9a1b7196 tools/firmware/vmxassist/Makefile --- a/tools/firmware/vmxassist/Makefile Sat Apr 15 18:25:09 2006 +++ b/tools/firmware/vmxassist/Makefile Sat Apr 15 18:25:21 2006 @@ -21,7 +21,7 @@ # External CFLAGS can do more harm than good. CFLAGS := -XEN_TARGET_ARCH = x86_32 +override XEN_TARGET_ARCH = x86_32 XEN_ROOT = ../../.. include $(XEN_ROOT)/Config.mk diff -r 83eb8d81c96f -r 91da9a1b7196 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Sat Apr 15 18:25:09 2006 +++ b/tools/ioemu/vl.c Sat Apr 15 18:25:21 2006 @@ -138,7 +138,7 @@ int gus_enabled = 1; int pci_enabled = 1; int prep_enabled = 0; -int rtc_utc = 0; +int rtc_utc = 1; int cirrus_vga_enabled = 1; int vga_accelerate = 1; int graphic_width = 800; diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_bvtsched.c --- a/tools/libxc/xc_bvtsched.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_bvtsched.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_bvtsched.c - * + * * API for manipulating parameters of the Borrowed Virtual Time scheduler. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -26,7 +26,7 @@ { DECLARE_DOM0_OP; int ret; - + op.cmd = DOM0_SCHEDCTL; op.u.schedctl.sched_id = SCHED_BVT; op.u.schedctl.direction = SCHED_INFO_GET; @@ -71,7 +71,7 @@ long long *warpl, long long *warpu) { - + DECLARE_DOM0_OP; int ret; struct bvt_adjdom *adjptr = &op.u.adjustdom.u.bvt; diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_core.c Sat Apr 15 18:25:21 2006 @@ -23,7 +23,7 @@ return 0; } -int +int xc_domain_dumpcore_via_callback(int xc_handle, uint32_t domid, void *args, @@ -45,13 +45,13 @@ PERROR("Could not allocate dump_mem"); goto error_out; } - + if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 ) { PERROR("Could not get info for domain"); goto error_out; } - + if ( domid != info.domid ) { PERROR("Domain %d does not exist", domid); @@ -61,10 +61,10 @@ for ( i = 0; i <= info.max_vcpu_id; i++ ) if ( xc_vcpu_getcontext(xc_handle, domid, i, &ctxt[nr_vcpus]) == 0) nr_vcpus++; - + nr_pages = info.nr_pages; - header.xch_magic = XC_CORE_MAGIC; + header.xch_magic = XC_CORE_MAGIC; header.xch_nr_vcpus = nr_vcpus; header.xch_nr_pages = nr_pages; header.xch_ctxt_offset = sizeof(struct xc_core_header); @@ -74,7 +74,7 @@ (sizeof(vcpu_guest_context_t) * nr_vcpus) + (nr_pages * sizeof(unsigned long))); header.xch_pages_offset = round_pgup(dummy_len); - + sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header)); if ( sts != 0 ) goto error_out; @@ -150,7 +150,7 @@ return 0; } -int +int xc_domain_dumpcore(int xc_handle, uint32_t domid, const char *corename) @@ -163,7 +163,7 @@ PERROR("Could not open corefile %s: %s", corename, strerror(errno)); return -errno; } - + sts = xc_domain_dumpcore_via_callback( xc_handle, domid, &da, &local_file_dump); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_domain.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_domain.c - * + * * API for manipulating and obtaining information on domains. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -26,17 +26,17 @@ *pdomid = (uint16_t)op.u.createdomain.domain; return 0; -} - - -int xc_domain_pause(int xc_handle, +} + + +int xc_domain_pause(int xc_handle, uint32_t domid) { DECLARE_DOM0_OP; op.cmd = DOM0_PAUSEDOMAIN; op.u.pausedomain.domain = (domid_t)domid; return do_dom0_op(xc_handle, &op); -} +} int xc_domain_unpause(int xc_handle, @@ -46,7 +46,7 @@ op.cmd = DOM0_UNPAUSEDOMAIN; op.u.unpausedomain.domain = (domid_t)domid; return do_dom0_op(xc_handle, &op); -} +} int xc_domain_destroy(int xc_handle, @@ -88,7 +88,7 @@ int xc_vcpu_setaffinity(int xc_handle, - uint32_t domid, + uint32_t domid, int vcpu, cpumap_t cpumap) { @@ -109,7 +109,7 @@ unsigned int nr_doms; uint32_t next_domid = first_domid; DECLARE_DOM0_OP; - int rc = 0; + int rc = 0; memset(info, 0, max_doms*sizeof(xc_dominfo_t)); @@ -127,8 +127,8 @@ info->blocked = !!(op.u.getdomaininfo.flags & DOMFLAGS_BLOCKED); info->running = !!(op.u.getdomaininfo.flags & DOMFLAGS_RUNNING); - info->shutdown_reason = - (op.u.getdomaininfo.flags>>DOMFLAGS_SHUTDOWNSHIFT) & + info->shutdown_reason = + (op.u.getdomaininfo.flags>>DOMFLAGS_SHUTDOWNSHIFT) & DOMFLAGS_SHUTDOWNMASK; if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_crash) ) @@ -152,7 +152,7 @@ info++; } - if( !nr_doms ) return rc; + if( !nr_doms ) return rc; return nr_doms; } @@ -167,7 +167,7 @@ if ( mlock(info, max_domains*sizeof(xc_domaininfo_t)) != 0 ) return -1; - + op.cmd = DOM0_GETDOMAININFOLIST; op.u.getdomaininfolist.first_domain = first_domain; op.u.getdomaininfolist.max_domains = max_domains; @@ -177,10 +177,10 @@ ret = -1; else ret = op.u.getdomaininfolist.num_domains; - + if ( munlock(info, max_domains*sizeof(xc_domaininfo_t)) != 0 ) ret = -1; - + return ret; } @@ -209,7 +209,7 @@ int xc_shadow_control(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int sop, unsigned long *dirty_bitmap, unsigned long pages, @@ -238,11 +238,11 @@ { int sched_id; int ret; - + /* Figure out which scheduler is currently used: */ if ( (ret = xc_sched_id(xc_handle, &sched_id)) != 0 ) return ret; - + switch ( sched_id ) { case SCHED_BVT: @@ -253,20 +253,20 @@ long long warpl; long long warpu; - /* Preserve all the scheduling parameters apart + /* Preserve all the scheduling parameters apart of MCU advance. */ if ( (ret = xc_bvtsched_domain_get( - xc_handle, domid, &mcuadv, + xc_handle, domid, &mcuadv, &warpback, &warpvalue, &warpl, &warpu)) != 0 ) return ret; - + /* The MCU advance is inverse of the weight. Default value of the weight is 1, default mcuadv 10. The scaling factor is therefore 10. */ if ( weight > 0 ) mcuadv = 10 / weight; - - ret = xc_bvtsched_domain_set(xc_handle, domid, mcuadv, + + ret = xc_bvtsched_domain_set(xc_handle, domid, mcuadv, warpback, warpvalue, warpl, warpu); break; } @@ -276,7 +276,7 @@ } int xc_domain_setmaxmem(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int max_memkb) { DECLARE_DOM0_OP; @@ -287,7 +287,7 @@ } int xc_domain_memory_increase_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, @@ -297,7 +297,7 @@ struct xen_memory_reservation reservation = { .extent_start = extent_start, /* may be NULL */ .nr_extents = nr_extents, - .extent_order = extent_order, + .extent_order = extent_order, .address_bits = address_bits, .domid = domid }; @@ -319,16 +319,16 @@ } int xc_domain_memory_decrease_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned long *extent_start) { int err; struct xen_memory_reservation reservation = { - .extent_start = extent_start, + .extent_start = extent_start, .nr_extents = nr_extents, - .extent_order = extent_order, + .extent_order = extent_order, .address_bits = 0, .domid = domid }; @@ -411,7 +411,7 @@ return do_dom0_op(xc_handle, &op); } -int xc_domain_sethandle(int xc_handle, uint32_t domid, +int xc_domain_sethandle(int xc_handle, uint32_t domid, xen_domain_handle_t handle) { DECLARE_DOM0_OP; @@ -506,7 +506,7 @@ op.cmd = DOM0_IOMEM_PERMISSION; op.u.iomem_permission.domain = domid; op.u.iomem_permission.first_mfn = first_mfn; - op.u.iomem_permission.nr_mfns = nr_mfns; + op.u.iomem_permission.nr_mfns = nr_mfns; op.u.iomem_permission.allow_access = allow_access; return do_dom0_op(xc_handle, &op); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_elf.h --- a/tools/libxc/xc_elf.h Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_elf.h Sat Apr 15 18:25:21 2006 @@ -46,7 +46,7 @@ typedef uint16_t Elf64_Quarter; /* - * e_ident[] identification indexes + * e_ident[] identification indexes * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html */ #define EI_MAG0 0 /* file ID */ @@ -57,7 +57,7 @@ #define EI_DATA 5 /* data encoding */ #define EI_VERSION 6 /* ELF header version */ #define EI_OSABI 7 /* OS/ABI ID */ -#define EI_ABIVERSION 8 /* ABI version */ +#define EI_ABIVERSION 8 /* ABI version */ #define EI_PAD 9 /* start of pad bytes */ #define EI_NIDENT 16 /* Size of e_ident[] */ @@ -119,7 +119,7 @@ Elf32_Half e_phnum; /* number of program header entries */ Elf32_Half e_shentsize; /* section header entry size */ Elf32_Half e_shnum; /* number of section header entries */ - Elf32_Half e_shstrndx; /* section header table's "section + Elf32_Half e_shstrndx; /* section header table's "section header string table" entry offset */ } Elf32_Ehdr; @@ -160,7 +160,7 @@ #define EM_486 6 /* Intel 80486 - unused? */ #define EM_860 7 /* Intel 80860 */ #define EM_MIPS 8 /* MIPS R3000 Big-Endian only */ -/* +/* * Don't know if EM_MIPS_RS4_BE, * EM_SPARC64, EM_PARISC, * or EM_PPC are ABI compliant @@ -441,7 +441,7 @@ #define DT_NUM 25 /* Number used. */ #define DT_LOPROC 0x70000000 /* reserved range for processor */ #define DT_HIPROC 0x7fffffff /* specific dynamic array tags */ - + /* Standard ELF hashing function */ unsigned int elf_hash(const unsigned char *name); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_evtchn.c --- a/tools/libxc/xc_evtchn.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_evtchn.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_evtchn.c - * + * * API for manipulating and accessing inter-domain event channels. - * + * * Copyright (c) 2004, K A Fraser. */ @@ -44,7 +44,7 @@ if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) rc = op.u.alloc_unbound.port; - + return rc; } @@ -62,6 +62,6 @@ if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) memcpy(status, &op.u.status, sizeof(*status)); - + return rc; } diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_ia64_stubs.c Sat Apr 15 18:25:21 2006 @@ -22,7 +22,7 @@ return FPSR_DEFAULT; } -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, int (*suspend)(int domid)) { @@ -50,8 +50,8 @@ } int xc_ia64_get_pfn_list(int xc_handle, - uint32_t domid, - unsigned long *pfn_buf, + uint32_t domid, + unsigned long *pfn_buf, unsigned int start_page, unsigned int nr_pages) { @@ -65,16 +65,16 @@ op.u.getmemlist.buffer = pfn_buf; if ( (max_pfns != -1UL) - && mlock(pfn_buf, nr_pages * sizeof(unsigned long)) != 0 ) + && mlock(pfn_buf, nr_pages * sizeof(unsigned long)) != 0 ) { PERROR("Could not lock pfn list buffer"); return -1; - } + } ret = do_dom0_op(xc_handle, &op); if (max_pfns != -1UL) - (void)munlock(pfn_buf, nr_pages * sizeof(unsigned long)); + (void)munlock(pfn_buf, nr_pages * sizeof(unsigned long)); return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; } @@ -84,7 +84,7 @@ dom0_op_t op; op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - return (do_dom0_op(xc_handle, &op) < 0) ? + return (do_dom0_op(xc_handle, &op) < 0) ? -1 : op.u.getdomaininfo.max_pages; } @@ -92,7 +92,7 @@ void* src_page, unsigned long dst_pfn, int nr_pages) { // N.B. gva should be page aligned - + unsigned long *page_array = NULL; int i; @@ -107,13 +107,13 @@ } for ( i=0; i< nr_pages; i++ ){ - if (xc_copy_to_domain_page(xc_handle, domid, page_array[i], - src_page + (i << PAGE_SHIFT))) - goto error_out; + if (xc_copy_to_domain_page(xc_handle, domid, page_array[i], + src_page + (i << PAGE_SHIFT))) + goto error_out; } free(page_array); return 0; - + error_out: free(page_array); return -1; @@ -123,8 +123,8 @@ #define HOB_SIGNATURE 0x3436474953424f48 // "HOBSIG64" #define GFW_HOB_START ((4UL<<30)-(14UL<<20)) //4G -14M #define GFW_HOB_SIZE (1UL<<20) //1M -#define MEM_G (1UL << 30) -#define MEM_M (1UL << 20) +#define MEM_G (1UL << 30) +#define MEM_M (1UL << 20) typedef struct { unsigned long signature; @@ -136,7 +136,7 @@ * INFO HOB is the first data data in one HOB list * it contains the control information of the HOB list */ -typedef struct { +typedef struct { HOB_GENERIC_HEADER header; unsigned long length; // current length of hob unsigned long cur_pos; // current poisiton of hob @@ -216,7 +216,7 @@ // buffer too small return -1; } - + phit = (HOB_INFO*)buffer; phit->header.signature = HOB_SIGNATURE; phit->header.type = HOB_TYPE_INFO; @@ -224,7 +224,7 @@ phit->length = sizeof(HOB_INFO) + sizeof(HOB_GENERIC_HEADER); phit->cur_pos = 0; phit->buf_size = buf_size; - + terminal = (HOB_GENERIC_HEADER*) (buffer + sizeof(HOB_INFO)); terminal->signature= HOB_SIGNATURE; terminal->type = HOB_TYPE_TERMINAL; @@ -235,7 +235,7 @@ /* * Add a new HOB to the HOB List. - * + * * hob_start - start address of hob buffer * type - type of the hob to be added * data - data of the hob to be added @@ -250,8 +250,8 @@ ) { HOB_INFO *phit; - HOB_GENERIC_HEADER *newhob,*tail; - + HOB_GENERIC_HEADER *newhob,*tail; + phit = (HOB_INFO*)hob_start; if (phit->length + data_size > phit->buf_size){ @@ -259,7 +259,7 @@ return -1; } - //append new HOB + //append new HOB newhob = (HOB_GENERIC_HEADER*) (hob_start + phit->length - sizeof(HOB_GENERIC_HEADER)); newhob->signature = HOB_SIGNATURE; @@ -267,7 +267,7 @@ newhob->length = data_size + sizeof(HOB_GENERIC_HEADER); memcpy((void*)newhob + sizeof(HOB_GENERIC_HEADER), data, data_size); - // append terminal HOB + // append terminal HOB tail = (HOB_GENERIC_HEADER*) ( hob_start + phit->length + data_size); tail->signature = HOB_SIGNATURE; tail->type = HOB_TYPE_TERMINAL; @@ -281,9 +281,9 @@ } int get_hob_size(void* hob_buf){ - + HOB_INFO *phit = (HOB_INFO*)hob_buf; - + if (phit->header.signature != HOB_SIGNATURE){ PERROR("xc_get_hob_size:Incorrect signature"); return -1; @@ -293,30 +293,30 @@ int build_hob (void* hob_buf, unsigned long hob_buf_size, unsigned long dom_mem_size) -{ - //Init HOB List +{ + //Init HOB List if (hob_init (hob_buf, hob_buf_size)<0){ PERROR("buffer too small"); goto err_out; } - + if ( add_mem_hob( hob_buf,dom_mem_size) < 0){ PERROR("Add memory hob failed, buffer too small"); goto err_out; } - + if ( add_pal_hob( hob_buf ) < 0 ){ PERROR("Add PAL hob failed, buffer too small"); goto err_out; } - + return 0; err_out: - return -1; -} - -static int + return -1; +} + +static int load_hob(int xc_handle, uint32_t dom, void *hob_buf) { // hob_buf should be page aligned @@ -334,22 +334,22 @@ } nr_pages = (hob_size + PAGE_SIZE -1) >> PAGE_SHIFT; - + return xc_ia64_copy_to_domain_pages(xc_handle, dom, hob_buf, GFW_HOB_START, nr_pages ); } #define MIN(x, y) ((x) < (y)) ? (x) : (y) -static int +static int add_mem_hob(void* hob_buf, unsigned long dom_mem_size){ hob_mem_t memhob; // less than 3G memhob.start = 0; memhob.size = MIN(dom_mem_size, 0xC0000000); - + if (hob_add(hob_buf, HOB_TYPE_MEM, &memhob, sizeof(memhob)) < 0){ - return -1; + return -1; } if (dom_mem_size > 0xC0000000) { @@ -373,29 +373,29 @@ }; unsigned char config_pal_cache_info[152] = { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 4, 6, 7, 255, 1, 0, 1, 0, 64, 0, 0, 12, 12, + 6, 4, 6, 7, 255, 1, 0, 1, 0, 64, 0, 0, 12, 12, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 6, 7, 0, 1, - 0, 1, 0, 64, 0, 0, 12, 12, 49, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 6, 8, 7, 7, 255, 7, 0, 11, 0, 0, 16, 0, - 12, 17, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 7, + 0, 1, 0, 64, 0, 0, 12, 12, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 8, 7, 7, 255, 7, 0, 11, 0, 0, 16, 0, + 12, 17, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 7, 7, 7, 5, 9, 11, 0, 0, 4, 0, 12, 15, 49, 0, 254, 255, - 255, 255, 255, 255, 255, 255, 2, 8, 7, 7, 7, 5, 9, - 11, 0, 0, 4, 0, 12, 15, 49, 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 2, 8, 7, 7, 7, 5, 9, + 11, 0, 0, 4, 0, 12, 15, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 12, 7, 7, 7, 14, 1, 3, 0, 0, 192, 0, 12, 20, 49, 0 }; unsigned char config_pal_cache_prot_info[200] = { - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 16, 8, 0, 76, 12, 64, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 0, 16, 4, 0, 76, 44, 68, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, - 0, 16, 8, 0, 81, 44, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 0, 16, 4, 0, 76, 44, 68, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, + 0, 16, 8, 0, 81, 44, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, - 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, + 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, 32, 0, 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 160, - 12, 0, 84, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 160, + 12, 0, 84, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_debug_info[16] = { @@ -408,37 +408,37 @@ 109, 219, 182, 13, 0, 0, 0, 0 }; unsigned char config_pal_freq_ratios[24] = { - 11, 1, 0, 0, 77, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, + 11, 1, 0, 0, 77, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0 }; unsigned char config_pal_halt_info[64] = { - 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_perf_mon_info[136] = { - 12, 47, 18, 8, 0, 0, 0, 0, 241, 255, 0, 0, 255, 7, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 241, 255, 0, 0, 223, 0, 255, 255, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 47, 18, 8, 0, 0, 0, 0, 241, 255, 0, 0, 255, 7, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 241, 255, 0, 0, 223, 0, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_proc_get_features[104] = { - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 64, 6, 64, 49, 0, 0, 0, 0, 64, 6, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, - 231, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 64, 6, 64, 49, 0, 0, 0, 0, 64, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, + 231, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_ptce_info[24] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned char config_pal_register_info[64] = { @@ -473,7 +473,7 @@ typedef struct{ hob_type_t type; void* data; - unsigned long size; + unsigned long size; }hob_batch_t; hob_batch_t hob_batch[]={ @@ -552,13 +552,13 @@ { HOB_TYPE_PAL_VM_PAGE_SIZE, &config_pal_vm_page_size, sizeof(config_pal_vm_page_size) - }, + }, }; static int add_pal_hob(void* hob_buf){ int i; for (i=0; i<sizeof(hob_batch)/sizeof(hob_batch_t); i++){ - if (hob_add(hob_buf, hob_batch[i].type, + if (hob_add(hob_buf, hob_batch[i].type, hob_batch[i].data, hob_batch[i].size)<0) return -1; @@ -579,17 +579,17 @@ // FIXME: initialize pfn list for a temp hack if (xc_ia64_get_pfn_list(xc_handle, dom, NULL, -1, -1) == -1) { - PERROR("Could not allocate continuous memory"); - goto error_out; - } - + PERROR("Could not allocate continuous memory"); + goto error_out; + } + if ((image_size > 12 * MEM_M) || (image_size & (PAGE_SIZE - 1))) { PERROR("Guest firmware size is incorrect [%ld]?", image_size); return -1; } /* Load guest firmware */ - if( xc_ia64_copy_to_domain_pages( xc_handle, dom, + if( xc_ia64_copy_to_domain_pages( xc_handle, dom, image, 4*MEM_G-image_size, image_size>>PAGE_SHIFT)) { PERROR("Could not load guest firmware into domain"); goto error_out; @@ -610,9 +610,9 @@ *store_mfn = page_array[1]; if ((sp = (shared_iopage_t *) xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, - page_array[0])) == 0) - goto error_out; + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[0])) == 0) + goto error_out; memset(sp, 0, PAGE_SIZE); for (i = 0; i < vcpus; i++) { @@ -665,14 +665,14 @@ image_size = (image_size + PAGE_SIZE - 1) & PAGE_MASK; - if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ){ + if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ){ PERROR("Unable to mlock ctxt"); return 1; } op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - if ( (do_dom0_op(xc_handle, &op) < 0) || + if ( (do_dom0_op(xc_handle, &op) < 0) || ((uint16_t)op.u.getdomaininfo.domain != domid) ) { PERROR("Could not get info on domain"); goto error_out; diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_linux_build.c Sat Apr 15 18:25:21 2006 @@ -237,7 +237,7 @@ else { *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT; - if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && + if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) *vl1e &= ~_PAGE_RW; } @@ -314,7 +314,7 @@ else *vl2e++ = l1tab | L2_PROT; } - + if ( shadow_mode_enabled ) { *vl1e = (count << PAGE_SHIFT) | L1_PROT; @@ -323,12 +323,12 @@ { *vl1e = ((uint64_t)page_array[count] << PAGE_SHIFT) | L1_PROT; if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && - (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) + (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) *vl1e &= ~_PAGE_RW; } vl1e++; } - + munmap(vl1tab, PAGE_SIZE); munmap(vl2tab, PAGE_SIZE); munmap(vl3tab, PAGE_SIZE); @@ -376,13 +376,13 @@ ctxt->ctrlreg[3] = pl4tab; else ctxt->ctrlreg[3] = l4tab; - + for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++) { if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) ) { alloc_pt(l1tab, vl1tab, pl1tab); - + if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) ) { alloc_pt(l2tab, vl2tab, pl2tab); @@ -410,7 +410,7 @@ *vl2e = l1tab | L2_PROT; vl2e++; } - + if ( shadow_mode_enabled ) { *vl1e = (count << PAGE_SHIFT) | L1_PROT; @@ -419,14 +419,14 @@ { *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT; if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) && - (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) + (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) ) { *vl1e &= ~_PAGE_RW; } } vl1e++; } - + munmap(vl1tab, PAGE_SIZE); munmap(vl2tab, PAGE_SIZE); munmap(vl3tab, PAGE_SIZE); @@ -509,7 +509,7 @@ " Loaded kernel: %p->%p\n" " Init. ramdisk: %p->%p\n" " TOTAL: %p->%p\n", - _p(dsi.v_kernstart), _p(dsi.v_kernend), + _p(dsi.v_kernstart), _p(dsi.v_kernend), _p(vinitrd_start), _p(vinitrd_end), _p(dsi.v_start), _p(v_end)); printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry)); @@ -696,10 +696,10 @@ required_features); /* - * Why do we need this? The number of page-table frames depends on the - * size of the bootstrap address space. But the size of the address space - * depends on the number of page-table frames (since each one is mapped - * read-only). We have a pair of simultaneous equations in two unknowns, + * Why do we need this? The number of page-table frames depends on the + * size of the bootstrap address space. But the size of the address space + * depends on the number of page-table frames (since each one is mapped + * read-only). We have a pair of simultaneous equations in two unknowns, * which we solve by exhaustive search. */ v_end = round_pgup(dsi.v_end); @@ -731,13 +731,13 @@ if ( dsi.pae_kernel ) { /* FIXME: assumes one L2 pgtable @ 0xc0000000 */ - if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> + if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages ) break; } else { - if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> + if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) break; } @@ -873,7 +873,7 @@ count) ) { fprintf(stderr,"m2p update failure p=%lx m=%lx\n", - count, page_array[count]); + count, page_array[count]); munmap(physmap, PAGE_SIZE); goto error_out; } @@ -982,7 +982,7 @@ start_info->mod_len = initrd->len; } if ( cmdline != NULL ) - { + { strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE); start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0'; } @@ -1073,14 +1073,14 @@ #endif if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ) - { + { PERROR("%s: ctxt mlock failed", __func__); return 1; } op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - if ( (xc_dom0_op(xc_handle, &op) < 0) || + if ( (xc_dom0_op(xc_handle, &op) < 0) || ((uint16_t)op.u.getdomaininfo.domain != domid) ) { PERROR("Could not get info on domain"); @@ -1089,9 +1089,9 @@ memset(ctxt, 0, sizeof(*ctxt)); - if ( setup_guest(xc_handle, domid, image, image_size, + if ( setup_guest(xc_handle, domid, image, image_size, initrd, - nr_pages, + nr_pages, &vstartinfo_start, &vkern_entry, &vstack_start, ctxt, cmdline, op.u.getdomaininfo.shared_info_frame, @@ -1152,7 +1152,7 @@ /* No LDT. */ ctxt->ldt_ents = 0; - + /* Use the default Xen-provided GDT. */ ctxt->gdt_ents = 0; @@ -1184,7 +1184,7 @@ launch_op.cmd = DOM0_SETVCPUCONTEXT; rc = xc_dom0_op(xc_handle, &launch_op); - + return rc; error_out: diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_linux_restore.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_linux_restore.c - * + * * Restore the state of a Linux session. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -13,13 +13,13 @@ #include "xg_save_restore.h" /* max mfn of the whole machine */ -static unsigned long max_mfn; +static unsigned long max_mfn; /* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; +static unsigned long hvirt_start; /* #levels of page tables used by the currrent guest */ -static unsigned int pt_levels; +static unsigned int pt_levels; /* total number of pages used by the current guest */ static unsigned long max_pfn; @@ -41,84 +41,84 @@ s = read(fd, &b[r], count - r); if ((s == -1) && (errno == EINTR)) continue; - if (s <= 0) { + if (s <= 0) { break; - } + } r += s; } - return (r == count) ? 1 : 0; + return (r == count) ? 1 : 0; } /* -** In the state file (or during transfer), all page-table pages are -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. -** This function inverts that operation, replacing the pfn values with -** the (now known) appropriate mfn values. +** In the state file (or during transfer), all page-table pages are +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. +** This function inverts that operation, replacing the pfn values with +** the (now known) appropriate mfn values. */ -int uncanonicalize_pagetable(unsigned long type, void *page) -{ - int i, pte_last; - unsigned long pfn; - uint64_t pte; - - pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); +int uncanonicalize_pagetable(unsigned long type, void *page) +{ + int i, pte_last; + unsigned long pfn; + uint64_t pte; + + pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); /* Now iterate through the page table, uncanonicalizing each PTE */ - for(i = 0; i < pte_last; i++) { - - if(pt_levels == 2) - pte = ((uint32_t *)page)[i]; - else - pte = ((uint64_t *)page)[i]; - - if(pte & _PAGE_PRESENT) { + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + if(pte & _PAGE_PRESENT) { pfn = (pte >> PAGE_SHIFT) & 0xffffffff; - - if(pfn >= max_pfn) { + + if(pfn >= max_pfn) { /* This "page table page" is probably not one; bail. */ ERR("Frame number in type %lu page table is out of range: " - "i=%d pfn=0x%lx max_pfn=%lu", + "i=%d pfn=0x%lx max_pfn=%lu", type >> 28, i, pfn, max_pfn); - return 0; - } - - + return 0; + } + + pte &= 0xffffff0000000fffULL; pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; - if(pt_levels == 2) - ((uint32_t *)page)[i] = (uint32_t)pte; - else - ((uint64_t *)page)[i] = (uint64_t)pte; - - - - } - } - - return 1; + if(pt_levels == 2) + ((uint32_t *)page)[i] = (uint32_t)pte; + else + ((uint64_t *)page)[i] = (uint64_t)pte; + + + + } + } + + return 1; } -int xc_linux_restore(int xc_handle, int io_fd, - uint32_t dom, unsigned long nr_pfns, +int xc_linux_restore(int xc_handle, int io_fd, + uint32_t dom, unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOM0_OP; int rc = 1, i, n; - unsigned long mfn, pfn; + unsigned long mfn, pfn; unsigned int prev_pc, this_pc; int verify = 0; - int nraces = 0; + int nraces = 0; /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ shared_info_t *shared_info = (shared_info_t *)shared_info_page; - + /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -135,7 +135,7 @@ unsigned long *page = NULL; /* A copy of the pfn-to-mfn table frame list. */ - unsigned long *p2m_frame_list = NULL; + unsigned long *p2m_frame_list = NULL; /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; @@ -148,17 +148,17 @@ unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; struct mmuext_op pin[MAX_PIN_BATCH]; - unsigned int nr_pins; - - - max_pfn = nr_pfns; + unsigned int nr_pins; + + + max_pfn = nr_pfns; DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn); - if(!get_platform_info(xc_handle, dom, + if(!get_platform_info(xc_handle, dom, &max_mfn, &hvirt_start, &pt_levels)) { - ERR("Unable to get platform info."); + ERR("Unable to get platform info."); return 1; } @@ -171,20 +171,20 @@ /* Read the saved P2M frame list */ - if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { ERR("Couldn't allocate p2m_frame_list array"); goto out; } - - if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + + if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { ERR("read p2m_frame_list failed"); goto out; } - + /* We want zeroed memory so use calloc rather than malloc. */ - p2m = calloc(sizeof(unsigned long), max_pfn); - pfn_type = calloc(sizeof(unsigned long), max_pfn); + p2m = calloc(sizeof(unsigned long), max_pfn); + pfn_type = calloc(sizeof(unsigned long), max_pfn); region_mfn = calloc(sizeof(unsigned long), MAX_BATCH_SIZE); if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { @@ -192,7 +192,7 @@ errno = ENOMEM; goto out; } - + if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) { ERR("Could not mlock region_mfn"); goto out; @@ -207,27 +207,27 @@ } shared_info_frame = op.u.getdomaininfo.shared_info_frame; - if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { + if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { errno = ENOMEM; goto out; } - + if(xc_domain_memory_increase_reservation( - xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { + xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { ERR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn)); errno = ENOMEM; goto out; } - DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); + DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */ if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) { ERR("Did not read correct number of frame numbers for new dom"); goto out; } - - if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { + + if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { ERR("Could not initialise for MMU updates"); goto out; } @@ -242,7 +242,7 @@ prev_pc = 0; n = 0; - while (1) { + while (1) { int j; @@ -253,13 +253,13 @@ prev_pc = this_pc; } - if (!read_exact(io_fd, &j, sizeof(int))) { + if (!read_exact(io_fd, &j, sizeof(int))) { ERR("Error when reading batch size"); goto out; } PPRINTF("batch %d\n",j); - + if (j == -1) { verify = 1; fprintf(stderr, "Entering page verify mode\n"); @@ -269,27 +269,27 @@ if (j == 0) break; /* our work here is done */ - if (j > MAX_BATCH_SIZE) { + if (j > MAX_BATCH_SIZE) { ERR("Max batch size exceeded. Giving up."); goto out; } - - if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { + + if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { ERR("Error when reading region pfn types"); goto out; } - for (i = 0; i < j; i++) { + for (i = 0; i < j; i++) { if ((region_pfn_type[i] & LTAB_MASK) == XTAB) region_mfn[i] = 0; /* we know map will fail, but don't care */ - else - region_mfn[i] = p2m[region_pfn_type[i] & ~LTAB_MASK]; - - } - + else + region_mfn[i] = p2m[region_pfn_type[i] & ~LTAB_MASK]; + + } + if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_WRITE, region_mfn, j))) { + xc_handle, dom, PROT_WRITE, region_mfn, j))) { ERR("map batch failed"); goto out; } @@ -297,12 +297,12 @@ for ( i = 0; i < j; i++ ) { void *page; - unsigned long pagetype; + unsigned long pagetype; pfn = region_pfn_type[i] & ~LTAB_MASK; - pagetype = region_pfn_type[i] & LTAB_MASK; - - if (pagetype == XTAB) + pagetype = region_pfn_type[i] & LTAB_MASK; + + if (pagetype == XTAB) /* a bogus/unmapped page: skip it */ continue; @@ -311,72 +311,72 @@ goto out; } - pfn_type[pfn] = pagetype; + pfn_type[pfn] = pagetype; mfn = p2m[pfn]; /* In verify mode, we use a copy; otherwise we work in place */ - page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); - - if (!read_exact(io_fd, page, PAGE_SIZE)) { + page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); + + if (!read_exact(io_fd, page, PAGE_SIZE)) { ERR("Error when reading page (type was %lx)", pagetype); goto out; } - pagetype &= LTABTYPE_MASK; - - if(pagetype >= L1TAB && pagetype <= L4TAB) { - - /* - ** A page table page - need to 'uncanonicalize' it, i.e. - ** replace all the references to pfns with the corresponding - ** mfns for the new domain. - ** - ** On PAE we need to ensure that PGDs are in MFNs < 4G, and - ** so we may need to update the p2m after the main loop. - ** Hence we defer canonicalization of L1s until then. + pagetype &= LTABTYPE_MASK; + + if(pagetype >= L1TAB && pagetype <= L4TAB) { + + /* + ** A page table page - need to 'uncanonicalize' it, i.e. + ** replace all the references to pfns with the corresponding + ** mfns for the new domain. + ** + ** On PAE we need to ensure that PGDs are in MFNs < 4G, and + ** so we may need to update the p2m after the main loop. + ** Hence we defer canonicalization of L1s until then. */ - if(pt_levels != 3 || pagetype != L1TAB) { + if(pt_levels != 3 || pagetype != L1TAB) { if(!uncanonicalize_pagetable(pagetype, page)) { - /* + /* ** Failing to uncanonicalize a page table can be ok ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). + ** changed by now (and we'll get an update later). */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; } - } - - } else if(pagetype != NOTAB) { + } + + } else if(pagetype != NOTAB) { ERR("Bogus page type %lx page table is out of range: " "i=%d max_pfn=%lu", pagetype, i, max_pfn); goto out; - } + } if (verify) { int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); - if (res) { + if (res) { int v; DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " - "actualcs=%08lx\n", pfn, pfn_type[pfn], - csum_page(region_base + i*PAGE_SIZE), + "actualcs=%08lx\n", pfn, pfn_type[pfn], + csum_page(region_base + i*PAGE_SIZE), csum_page(buf)); for (v = 0; v < 4; v++) { - - unsigned long *p = (unsigned long *) + + unsigned long *p = (unsigned long *) (region_base + i*PAGE_SIZE); if (buf[v] != p[v]) DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); @@ -384,8 +384,8 @@ } } - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)mfn) << PAGE_SHIFT) + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)mfn) << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn)) { ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); goto out; @@ -398,149 +398,149 @@ DPRINTF("Received all pages (%d races)\n", nraces); - if(pt_levels == 3) { - - /* - ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This + if(pt_levels == 3) { + + /* + ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This ** is a little awkward and involves (a) finding all such PGDs and - ** replacing them with 'lowmem' versions; (b) upating the p2m[] + ** replacing them with 'lowmem' versions; (b) upating the p2m[] ** with the new info; and (c) canonicalizing all the L1s using the - ** (potentially updated) p2m[]. - ** + ** (potentially updated) p2m[]. + ** ** This is relatively slow (and currently involves two passes through ** the pfn_type[] array), but at least seems to be correct. May wish - ** to consider more complex approaches to optimize this later. + ** to consider more complex approaches to optimize this later. */ - int j, k; + int j, k; /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ for (i = 0; i < max_pfn; i++) { - + if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) { - unsigned long new_mfn; - uint64_t l3ptes[4]; - uint64_t *l3tab; + unsigned long new_mfn; + uint64_t l3ptes[4]; + uint64_t *l3tab; l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, p2m[i]); - - for(j = 0; j < 4; j++) - l3ptes[j] = l3tab[j]; - - munmap(l3tab, PAGE_SIZE); + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, p2m[i]); + + for(j = 0; j < 4; j++) + l3ptes[j] = l3tab[j]; + + munmap(l3tab, PAGE_SIZE); if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { ERR("Couldn't get a page below 4GB :-("); goto out; } - + p2m[i] = new_mfn; - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)new_mfn) - << PAGE_SHIFT) | + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)new_mfn) + << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i)) { ERR("Couldn't m2p on PAE root pgdir"); goto out; } - + l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, p2m[i]); - - for(j = 0; j < 4; j++) - l3tab[j] = l3ptes[j]; - - munmap(l3tab, PAGE_SIZE); - + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, p2m[i]); + + for(j = 0; j < 4; j++) + l3tab[j] = l3ptes[j]; + + munmap(l3tab, PAGE_SIZE); + } } /* Second pass: find all L1TABs and uncanonicalize them */ - j = 0; - - for(i = 0; i < max_pfn; i++) { - - if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { - region_mfn[j] = p2m[i]; - j++; - } - - if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { + j = 0; + + for(i = 0; i < max_pfn; i++) { + + if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { + region_mfn[j] = p2m[i]; + j++; + } + + if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ | PROT_WRITE, - region_mfn, j))) { + xc_handle, dom, PROT_READ | PROT_WRITE, + region_mfn, j))) { ERR("map batch failed"); goto out; } for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(L1TAB, + if(!uncanonicalize_pagetable(L1TAB, region_base + k*PAGE_SIZE)) { - ERR("failed uncanonicalize pt!"); - goto out; - } + ERR("failed uncanonicalize pt!"); + goto out; + } } - - munmap(region_base, j*PAGE_SIZE); - j = 0; - } - } - - } - - - if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERR("Error doing finish_mmu_updates()"); - goto out; - } + + munmap(region_base, j*PAGE_SIZE); + j = 0; + } + } + + } + + + if (xc_finish_mmu_updates(xc_handle, mmu)) { + ERR("Error doing finish_mmu_updates()"); + goto out; + } /* * Pin page tables. Do this after writing to them as otherwise Xen * will barf when doing the type-checking. */ - nr_pins = 0; + nr_pins = 0; for (i = 0; i < max_pfn; i++) { if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) { - if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { - ERR("Failed to pin batch of %d page tables", nr_pins); + if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { + ERR("Failed to pin batch of %d page tables", nr_pins); goto out; - } + } nr_pins = 0; } if ( (pfn_type[i] & LPINTAB) == 0 ) continue; - switch(pfn_type[i]) { - - case (L1TAB|LPINTAB): + switch(pfn_type[i]) { + + case (L1TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; - break; - - case (L2TAB|LPINTAB): + break; + + case (L2TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; - break; - - case (L3TAB|LPINTAB): + break; + + case (L3TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; - break; + break; case (L4TAB|LPINTAB): pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; - break; - - default: - continue; + break; + + default: + continue; } pin[nr_pins].arg1.mfn = p2m[i]; - nr_pins++; + nr_pins++; } @@ -553,17 +553,17 @@ unsigned long *pfntab; int rc; - if (!read_exact(io_fd, &count, sizeof(count))) { + if (!read_exact(io_fd, &count, sizeof(count))) { ERR("Error when reading pfn count"); goto out; } - if(!(pfntab = malloc(sizeof(unsigned long) * count))) { + if(!(pfntab = malloc(sizeof(unsigned long) * count))) { ERR("Out of memory"); goto out; } - - if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { + + if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { ERR("Error when reading pfntab"); goto out; } @@ -572,14 +572,14 @@ unsigned long pfn = pfntab[i]; - if(pfn > max_pfn) + if(pfn > max_pfn) /* shouldn't happen - continue optimistically */ - continue; - - pfntab[i] = p2m[pfn]; - p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map - } - + continue; + + pfntab[i] = p2m[pfn]; + p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map + } + if (count > 0) { struct xen_memory_reservation reservation = { @@ -590,16 +590,16 @@ }; if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation)) != count) { + &reservation)) != count) { ERR("Could not decrease reservation : %d", rc); goto out; } else DPRINTF("Decreased reservation by %d pages\n", count); - } - } - - if (!read_exact(io_fd, &ctxt, sizeof(ctxt)) || - !read_exact(io_fd, shared_info_page, PAGE_SIZE)) { + } + } + + if (!read_exact(io_fd, &ctxt, sizeof(ctxt)) || + !read_exact(io_fd, shared_info_page, PAGE_SIZE)) { ERR("Error when reading ctxt or shared info page"); goto out; } @@ -642,15 +642,15 @@ if (pfn >= max_pfn) { ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", - pfn, max_pfn, pfn_type[pfn]); - goto out; - } - - if ( (pfn_type[pfn] & LTABTYPE_MASK) != + pfn, max_pfn, pfn_type[pfn]); + goto out; + } + + if ( (pfn_type[pfn] & LTABTYPE_MASK) != ((unsigned long)pt_levels<<LTAB_SHIFT) ) { ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, max_pfn, pfn_type[pfn], - (unsigned long)pt_levels<<LTAB_SHIFT); + pfn, max_pfn, pfn_type[pfn], + (unsigned long)pt_levels<<LTAB_SHIFT); goto out; } @@ -667,7 +667,7 @@ xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); memcpy(page, shared_info, sizeof(shared_info_t)); munmap(page, PAGE_SIZE); - + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ for (i = 0; i < P2M_FL_ENTRIES; i++) { pfn = p2m_frame_list[i]; @@ -678,16 +678,16 @@ p2m_frame_list[i] = p2m[pfn]; } - + /* Copy the P2M we've constructed to the 'live' P2M */ - if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, + if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, p2m_frame_list, P2M_FL_ENTRIES))) { ERR("Couldn't map p2m table"); goto out; } - memcpy(live_p2m, p2m, P2M_SIZE); - munmap(live_p2m, P2M_SIZE); + memcpy(live_p2m, p2m, P2M_SIZE); + munmap(live_p2m, P2M_SIZE); /* * Safety checking of saved context: diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_linux_save.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_linux_save.c - * + * * Save the state of a running Linux session. - * + * * Copyright (c) 2003, K A Fraser. */ @@ -17,23 +17,23 @@ /* ** Default values for important tuning parameters. Can override by passing -** non-zero replacement values to xc_linux_save(). +** non-zero replacement values to xc_linux_save(). ** -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. -** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** */ -#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ #define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ /* max mfn of the whole machine */ -static unsigned long max_mfn; +static unsigned long max_mfn; /* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; +static unsigned long hvirt_start; /* #levels of page tables used by the currrent guest */ -static unsigned int pt_levels; +static unsigned int pt_levels; /* total number of pages used by the current guest */ static unsigned long max_pfn; @@ -56,8 +56,8 @@ (((_mfn) < (max_mfn)) && \ ((mfn_to_pfn(_mfn) < (max_pfn)) && \ (live_p2m[mfn_to_pfn(_mfn)] == (_mfn)))) - - + + /* Returns TRUE if MFN is successfully converted to a PFN. */ #define translate_mfn_to_pfn(_pmfn) \ ({ \ @@ -70,12 +70,12 @@ _res; \ }) -/* -** During (live) save/migrate, we maintain a number of bitmaps to track -** which pages we have to send, to fixup, and to skip. +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, to fixup, and to skip. */ -#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITS_PER_LONG (sizeof(unsigned long) * 8) #define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) #define BITMAP_ENTRY(_nr,_bmap) \ @@ -85,17 +85,17 @@ static inline int test_bit (int nr, volatile void * addr) { - return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; } static inline void clear_bit (int nr, volatile void * addr) { - BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr)); + BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr)); } static inline void set_bit ( int nr, volatile void * addr) { - BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr)); + BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr)); } /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ @@ -122,7 +122,7 @@ { /* Need a simple permutation function so that we scan pages in a pseudo random order, enabling us to get a better estimate of - the domain's page dirtying rate as we go (there are often + the domain's page dirtying rate as we go (there are often contiguous ranges of pfns that have similar behaviour, and we want to mix them up. */ @@ -130,21 +130,21 @@ /* 512MB domain, 128k pages, order 17 */ /* - QPONMLKJIHGFEDCBA - QPONMLKJIH - GFEDCBA + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA */ - + /* - QPONMLKJIHGFEDCBA - EDCBA + QPONMLKJIHGFEDCBA + EDCBA QPONM LKJIHGF */ do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ - + return i; } @@ -165,7 +165,7 @@ static uint64_t tv_delta(struct timeval *new, struct timeval *old) { - return ((new->tv_sec - old->tv_sec)*1000000 ) + + return ((new->tv_sec - old->tv_sec)*1000000 ) + (new->tv_usec - old->tv_usec); } @@ -175,7 +175,7 @@ /* ** We control the rate at which we transmit (or save) to minimize impact -** on running domains (including the target if we're doing live migrate). +** on running domains (including the target if we're doing live migrate). */ #define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */ @@ -193,10 +193,10 @@ static int mbit_rate, ombit_rate = 0; /* Have we reached the maximum transmission rate? */ -#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) - - -static inline void initialize_mbit_rate() +#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) + + +static inline void initialize_mbit_rate() { mbit_rate = START_MBIT_RATE; } @@ -213,7 +213,7 @@ if (START_MBIT_RATE == 0) return write(io_fd, buf, n); - + budget -= n; if (budget < 0) { if (mbit_rate != ombit_rate) { @@ -253,46 +253,46 @@ #else /* ! ADAPTIVE SAVE */ -#define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) -#define initialize_mbit_rate() +#define RATE_IS_MAX() (0) +#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) +#define initialize_mbit_rate() #endif static inline ssize_t write_exact(int fd, void *buf, size_t count) { - if(write(fd, buf, count) != count) - return 0; - return 1; -} - - - -static int print_stats(int xc_handle, uint32_t domid, int pages_sent, + if(write(fd, buf, count) != count) + return 0; + return 1; +} + + + +static int print_stats(int xc_handle, uint32_t domid, int pages_sent, xc_shadow_control_stats_t *stats, int print) { static struct timeval wall_last; static long long d0_cpu_last; static long long d1_cpu_last; - + struct timeval wall_now; long long wall_delta; long long d0_cpu_now, d0_cpu_delta; long long d1_cpu_now, d1_cpu_delta; - + gettimeofday(&wall_now, NULL); - + d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; - if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) fprintf(stderr, "ARRHHH!!\n"); - + wall_delta = tv_delta(&wall_now,&wall_last)/1000; - + if (wall_delta == 0) wall_delta = 1; - + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; @@ -300,14 +300,14 @@ fprintf(stderr, "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " "dirtied %dMb/s %" PRId32 " pages\n", - wall_delta, + wall_delta, (int)((d0_cpu_delta*100)/wall_delta), (int)((d1_cpu_delta*100)/wall_delta), (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), stats->dirty_count); -#ifdef ADAPTIVE_SAVE +#ifdef ADAPTIVE_SAVE if (((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate) { mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) + 50; @@ -315,16 +315,16 @@ mbit_rate = MAX_MBIT_RATE; } #endif - + d0_cpu_last = d0_cpu_now; d1_cpu_last = d1_cpu_now; - wall_last = wall_now; + wall_last = wall_now; return 0; } -static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, +static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, unsigned long *arr, int runs) { long long start, now; @@ -335,24 +335,24 @@ for (j = 0; j < runs; j++) { int i; - + xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_CLEAN, arr, max_pfn, NULL); fprintf(stderr, "#Flush\n"); - for ( i = 0; i < 40; i++ ) { - usleep(50000); + for ( i = 0; i < 40; i++ ) { + usleep(50000); now = llgettimeofday(); xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_PEEK, NULL, 0, &stats); - + fprintf(stderr, "now= %lld faults= %" PRId32 " dirty= %" PRId32 - " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", - ((now-start)+500)/1000, + " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", + ((now-start)+500)/1000, stats.fault_count, stats.dirty_count, stats.dirty_net_count, stats.dirty_block_count); } } - + return -1; } @@ -375,7 +375,7 @@ return -1; } - if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) + if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) ERR("Could not get vcpu context"); @@ -383,22 +383,22 @@ return 0; // success if (info->paused) { - // try unpausing domain, wait, and retest + // try unpausing domain, wait, and retest xc_domain_unpause( xc_handle, dom ); - + ERR("Domain was paused. Wait and re-test."); usleep(10000); // 10ms - + goto retry; } if( ++i < 100 ) { ERR("Retry suspend domain."); - usleep(10000); // 10ms + usleep(10000); // 10ms goto retry; } - + ERR("Unable to suspend domain."); return -1; @@ -406,173 +406,173 @@ /* -** During transfer (or in the state file), all page-table pages must be -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. +** During transfer (or in the state file), all page-table pages must be +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. ** -** This function performs the appropriate conversion, taking into account -** which entries do not require canonicalization (in particular, those -** entries which map the virtual address reserved for the hypervisor). +** This function performs the appropriate conversion, taking into account +** which entries do not require canonicalization (in particular, those +** entries which map the virtual address reserved for the hypervisor). */ -void canonicalize_pagetable(unsigned long type, unsigned long pfn, - const void *spage, void *dpage) -{ - +void canonicalize_pagetable(unsigned long type, unsigned long pfn, + const void *spage, void *dpage) +{ + int i, pte_last, xen_start, xen_end; uint64_t pte; - /* + /* ** We need to determine which entries in this page table hold ** reserved hypervisor mappings. This depends on the current - ** page table type as well as the number of paging levels. + ** page table type as well as the number of paging levels. */ - xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - + xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); + if (pt_levels == 2 && type == L2TAB) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); - - if (pt_levels == 3 && type == L3TAB) - xen_start = L3_PAGETABLE_ENTRIES_PAE; - - /* - ** in PAE only the L2 mapping the top 1GB contains Xen mappings. + xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); + + if (pt_levels == 3 && type == L3TAB) + xen_start = L3_PAGETABLE_ENTRIES_PAE; + + /* + ** in PAE only the L2 mapping the top 1GB contains Xen mappings. ** We can spot this by looking for the guest linear mapping which - ** Xen always ensures is present in that L2. Guests must ensure - ** that this check will fail for other L2s. + ** Xen always ensures is present in that L2. Guests must ensure + ** that this check will fail for other L2s. */ if (pt_levels == 3 && type == L2TAB) { /* XXX index of the L2 entry in PAE mode which holds the guest LPT */ -#define PAE_GLPT_L2ENTRY (495) - pte = ((uint64_t*)spage)[PAE_GLPT_L2ENTRY]; +#define PAE_GLPT_L2ENTRY (495) + pte = ((uint64_t*)spage)[PAE_GLPT_L2ENTRY]; if(((pte >> PAGE_SHIFT) & 0x0fffffff) == live_p2m[pfn]) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; - } - - if (pt_levels == 4 && type == L4TAB) { + xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + } + + if (pt_levels == 4 && type == L4TAB) { /* - ** XXX SMH: should compute these from hvirt_start (which we have) - ** and hvirt_end (which we don't) + ** XXX SMH: should compute these from hvirt_start (which we have) + ** and hvirt_end (which we don't) */ - xen_start = 256; - xen_end = 272; + xen_start = 256; + xen_end = 272; } /* Now iterate through the page table, canonicalizing each PTE */ for (i = 0; i < pte_last; i++ ) { - unsigned long pfn, mfn; - + unsigned long pfn, mfn; + if (pt_levels == 2) pte = ((uint32_t*)spage)[i]; else pte = ((uint64_t*)spage)[i]; - + if (i >= xen_start && i < xen_end) pte = 0; - + if (pte & _PAGE_PRESENT) { - - mfn = (pte >> PAGE_SHIFT) & 0xfffffff; + + mfn = (pte >> PAGE_SHIFT) & 0xfffffff; if (!MFN_IS_IN_PSEUDOPHYS_MAP(mfn)) { - /* This will happen if the type info is stale which + /* This will happen if the type info is stale which is quite feasible under live migration */ DPRINTF("PT Race: [%08lx,%d] pte=%llx, mfn=%08lx\n", - type, i, (unsigned long long)pte, mfn); + type, i, (unsigned long long)pte, mfn); pfn = 0; /* zap it - we'll retransmit this page later */ - } else + } else pfn = mfn_to_pfn(mfn); - + pte &= 0xffffff0000000fffULL; pte |= (uint64_t)pfn << PAGE_SHIFT; } - + if (pt_levels == 2) ((uint32_t*)dpage)[i] = pte; else - ((uint64_t*)dpage)[i] = pte; - - } - - return; -} - - - -static unsigned long *xc_map_m2p(int xc_handle, - unsigned long max_mfn, - int prot) -{ + ((uint64_t*)dpage)[i] = pte; + + } + + return; +} + + + +static unsigned long *xc_map_m2p(int xc_handle, + unsigned long max_mfn, + int prot) +{ struct xen_machphys_mfn_list xmml; - privcmd_mmap_t ioctlx; - privcmd_mmap_entry_t *entries; - unsigned long m2p_chunks, m2p_size; - unsigned long *m2p; - int i, rc; - - m2p_size = M2P_SIZE(max_mfn); - m2p_chunks = M2P_CHUNKS(max_mfn); + privcmd_mmap_t ioctlx; + privcmd_mmap_entry_t *entries; + unsigned long m2p_chunks, m2p_size; + unsigned long *m2p; + int i, rc; + + m2p_size = M2P_SIZE(max_mfn); + m2p_chunks = M2P_CHUNKS(max_mfn); xmml.max_extents = m2p_chunks; - if (!(xmml.extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) { - ERR("failed to allocate space for m2p mfns"); - return NULL; - } + if (!(xmml.extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) { + ERR("failed to allocate space for m2p mfns"); + return NULL; + } if (xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) || (xmml.nr_extents != m2p_chunks)) { - ERR("xc_get_m2p_mfns"); + ERR("xc_get_m2p_mfns"); return NULL; } - if ((m2p = mmap(NULL, m2p_size, prot, + if ((m2p = mmap(NULL, m2p_size, prot, MAP_SHARED, xc_handle, 0)) == MAP_FAILED) { - ERR("failed to mmap m2p"); - return NULL; - } - - if (!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { - ERR("failed to allocate space for mmap entries"); - return NULL; - } + ERR("failed to mmap m2p"); + return NULL; + } + + if (!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { + ERR("failed to allocate space for mmap entries"); + return NULL; + } ioctlx.num = m2p_chunks; - ioctlx.dom = DOMID_XEN; - ioctlx.entry = entries; - - for (i=0; i < m2p_chunks; i++) { - entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); + ioctlx.dom = DOMID_XEN; + ioctlx.entry = entries; + + for (i=0; i < m2p_chunks; i++) { + entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); entries[i].mfn = xmml.extent_start[i]; entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; } if ((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) { - ERR("ioctl_mmap failed (rc = %d)", rc); - return NULL; + ERR("ioctl_mmap failed (rc = %d)", rc); + return NULL; } free(xmml.extent_start); - free(entries); - - return m2p; -} - - - -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + free(entries); + + return m2p; +} + + + +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, int (*suspend)(int)) { xc_dominfo_t info; int rc = 1, i, j, last_iter, iter = 0; - int live = (flags & XCFLAGS_LIVE); - int debug = (flags & XCFLAGS_DEBUG); + int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); int sent_last_iter, skip_this_iter; /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; - + /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -581,7 +581,7 @@ unsigned long *pfn_batch = NULL; /* A temporary mapping, and a copy, of one frame of guest memory. */ - char page[PAGE_SIZE]; + char page[PAGE_SIZE]; /* Double and single indirect references to the live P2M table */ unsigned long *live_p2m_frame_list_list = NULL; @@ -597,14 +597,14 @@ unsigned char *region_base = NULL; /* power of 2 order of max_pfn */ - int order_nr; + int order_nr; /* bitmap of pages: - - that should be sent this iteration (unless later marked as skip); + - that should be sent this iteration (unless later marked as skip); - to skip this iteration because already dirty; - to fixup by sending at the end if not already resent; */ unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL; - + xc_shadow_control_stats_t stats; unsigned long needed_to_fix = 0; @@ -612,29 +612,29 @@ /* If no explicit control parameters given, use defaults */ - if(!max_iters) - max_iters = DEF_MAX_ITERS; - if(!max_factor) - max_factor = DEF_MAX_FACTOR; - - initialize_mbit_rate(); - - if(!get_platform_info(xc_handle, dom, + if(!max_iters) + max_iters = DEF_MAX_ITERS; + if(!max_factor) + max_factor = DEF_MAX_FACTOR; + + initialize_mbit_rate(); + + if(!get_platform_info(xc_handle, dom, &max_mfn, &hvirt_start, &pt_levels)) { - ERR("Unable to get platform info."); + ERR("Unable to get platform info."); return 1; } if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { ERR("Could not get domain info"); - return 1; + return 1; } if (mlock(&ctxt, sizeof(ctxt))) { ERR("Unable to mlock ctxt"); return 1; } - + /* Only have to worry about vcpu 0 even for SMP */ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { ERR("Could not get vcpu context"); @@ -648,16 +648,16 @@ ERR("Domain is not in a valid Linux guest OS state"); goto out; } - + /* cheesy sanity check */ if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERR("Invalid state record -- pfn count out of range: %lu", - (info.max_memkb >> (PAGE_SHIFT - 10))); + ERR("Invalid state record -- pfn count out of range: %lu", + (info.max_memkb >> (PAGE_SHIFT - 10))); goto out; } - + /* Map the shared info frame */ - if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame))) { ERR("Couldn't map live_shinfo"); goto out; @@ -665,8 +665,8 @@ max_pfn = live_shinfo->arch.max_pfn; - live_p2m_frame_list_list = - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, + live_p2m_frame_list_list = + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, live_shinfo->arch.pfn_to_mfn_frame_list_list); if (!live_p2m_frame_list_list) { @@ -674,24 +674,24 @@ goto out; } - live_p2m_frame_list = + live_p2m_frame_list = xc_map_foreign_batch(xc_handle, dom, PROT_READ, live_p2m_frame_list_list, - P2M_FLL_ENTRIES); - + P2M_FLL_ENTRIES); + if (!live_p2m_frame_list) { ERR("Couldn't map p2m_frame_list"); goto out; } - /* Map all the frames of the pfn->mfn table. For migrate to succeed, - the guest must not change which frames are used for this purpose. + /* Map all the frames of the pfn->mfn table. For migrate to succeed, + the guest must not change which frames are used for this purpose. (its not clear why it would want to change them, and we'll be OK from a safety POV anyhow. */ live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ, live_p2m_frame_list, - P2M_FL_ENTRIES); + P2M_FL_ENTRIES); if (!live_p2m) { ERR("Couldn't map p2m table"); @@ -699,25 +699,25 @@ } /* Setup the mfn_to_pfn table mapping */ - if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { - ERR("Failed to map live M2P table"); - goto out; - } - - + if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { + ERR("Failed to map live M2P table"); + goto out; + } + + /* Get a local copy of the live_P2M_frame_list */ - if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { ERR("Couldn't allocate p2m_frame_list array"); goto out; } - memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); + memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); /* Canonicalise the pfn-to-mfn table frame-number list. */ for (i = 0; i < max_pfn; i += ulpp) { - if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { + if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { ERR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, - p2m_frame_list[i/ulpp]); + ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, + p2m_frame_list[i/ulpp]); goto out; } } @@ -725,31 +725,31 @@ /* Domain is still running at this point */ if (live) { - if (xc_shadow_control(xc_handle, dom, + if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL ) < 0) { + NULL, 0, NULL ) < 0) { ERR("Couldn't enable shadow mode"); goto out; } - + last_iter = 0; - + } else { - + /* This is a non-live suspend. Issue the call back to get the domain suspended */ - + last_iter = 1; - + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { ERR("Domain appears not to have suspended"); goto out; } - + } /* pretend we sent all the pages last iteration */ - sent_last_iter = max_pfn; + sent_last_iter = max_pfn; /* calculate the power of 2 order of max_pfn, e.g. @@ -758,15 +758,15 @@ continue; /* Setup to_send / to_fix and to_skip bitmaps */ - to_send = malloc(BITMAP_SIZE); - to_fix = calloc(1, BITMAP_SIZE); - to_skip = malloc(BITMAP_SIZE); - + to_send = malloc(BITMAP_SIZE); + to_fix = calloc(1, BITMAP_SIZE); + to_skip = malloc(BITMAP_SIZE); + if (!to_send || !to_fix || !to_skip) { ERR("Couldn't allocate to_send array"); goto out; } - + memset(to_send, 0xff, BITMAP_SIZE); if (mlock(to_send, BITMAP_SIZE)) { @@ -779,7 +779,7 @@ ERR("Unable to mlock to_skip"); return 1; } - + analysis_phase(xc_handle, dom, max_pfn, to_skip, 0); /* We want zeroed memory so use calloc rather than malloc. */ @@ -787,7 +787,7 @@ pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(unsigned long)); if ((pfn_type == NULL) || (pfn_batch == NULL)) { - ERR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); + ERR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); errno = ENOMEM; goto out; } @@ -803,12 +803,12 @@ */ { int err=0; - unsigned long mfn; + unsigned long mfn; for (i = 0; i < max_pfn; i++) { mfn = live_p2m[i]; - if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) { - DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, + if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) { + DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, mfn, mfn_to_pfn(mfn)); err++; } @@ -819,16 +819,16 @@ /* Start writing out the saved-domain record. */ - if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { ERR("write: max_pfn"); goto out; } - if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { ERR("write: p2m_frame_list"); goto out; } - + print_stats(xc_handle, dom, 0, &stats, 0); /* Now write out each data page, canonicalising page tables as we go... */ @@ -853,8 +853,8 @@ DPRINTF("\b\b\b\b%3d%%", this_pc); prev_pc = this_pc; } - - /* slightly wasteful to peek the whole array evey time, + + /* slightly wasteful to peek the whole array evey time, but this is fast enough for the moment. */ if (!last_iter && xc_shadow_control( xc_handle, dom, DOM0_SHADOW_CONTROL_OP_PEEK, @@ -862,7 +862,7 @@ ERR("Error peeking shadow bitmap"); goto out; } - + /* load pfn_type[] with the mfn of all the pages we're doing in this batch. */ @@ -873,11 +873,11 @@ if (debug) { DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", iter, (unsigned long)n, live_p2m[n], - test_bit(n, to_send), + test_bit(n, to_send), mfn_to_pfn(live_p2m[n]&0xFFFFF)); } - - if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) + + if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) skip_this_iter++; /* stats keeping */ if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) || @@ -885,13 +885,13 @@ (test_bit(n, to_fix) && last_iter))) continue; - /* + /* ** we get here if: ** 1. page is marked to_send & hasn't already been re-dirtied ** 2. (ignore to_skip in last iteration) ** 3. add in pages that still need fixup (net bufs) */ - + pfn_batch[batch] = n; pfn_type[batch] = live_p2m[n]; @@ -914,80 +914,80 @@ iter,n,pfn_type[batch]); } - clear_bit(n, to_fix); - + clear_bit(n, to_fix); + batch++; } - + if (batch == 0) goto skip; /* vanishingly unlikely... */ - + if ((region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_type, batch)) == 0) { + xc_handle, dom, PROT_READ, pfn_type, batch)) == 0) { ERR("map batch failed"); goto out; } - + if (xc_get_pfn_type_batch(xc_handle, dom, batch, pfn_type)) { ERR("get_pfn_type_batch failed"); goto out; } - + for (j = 0; j < batch; j++) { if ((pfn_type[j] & LTAB_MASK) == XTAB) { DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]); continue; } - - if (debug) + + if (debug) fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx" " sum= %08lx\n", - iter, + iter, (pfn_type[j] & LTAB_MASK) | pfn_batch[j], pfn_type[j], mfn_to_pfn(pfn_type[j]&(~LTAB_MASK)), csum_page(region_base + (PAGE_SIZE*j))); - + /* canonicalise mfn->pfn */ pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j]; } - if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { + if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { ERR("Error when writing to state file (2)"); goto out; } - if(!write_exact(io_fd, pfn_type, sizeof(unsigned long)*j)) { + if(!write_exact(io_fd, pfn_type, sizeof(unsigned long)*j)) { ERR("Error when writing to state file (3)"); goto out; } - + /* entering this loop, pfn_type is now in pfns (Not mfns) */ for (j = 0; j < batch; j++) { - - unsigned long pfn = pfn_type[j] & ~LTAB_MASK; - unsigned long pagetype = pfn_type[j] & LTAB_MASK; - void *spage = (void *) region_base + (PAGE_SIZE*j); + + unsigned long pfn = pfn_type[j] & ~LTAB_MASK; + unsigned long pagetype = pfn_type[j] & LTAB_MASK; + void *spage = (void *) region_base + (PAGE_SIZE*j); /* write out pages in batch */ if (pagetype == XTAB) continue; - pagetype &= LTABTYPE_MASK; - + pagetype &= LTABTYPE_MASK; + if (pagetype >= L1TAB && pagetype <= L4TAB) { - + /* We have a pagetable page: need to rewrite it. */ - canonicalize_pagetable(pagetype, pfn, spage, page); - + canonicalize_pagetable(pagetype, pfn, spage, page); + if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) { ERR("Error when writing to state file (4)"); goto out; } - - } else { + + } else { /* We have a normal page: just write it directly. */ if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) { @@ -996,36 +996,36 @@ } } } /* end of the write out for this batch */ - + sent_this_iter += batch; munmap(region_base, batch*PAGE_SIZE); - + } /* end of this while loop for this iteration */ - - skip: - + + skip: + total_sent += sent_this_iter; - DPRINTF("\r %d: sent %d, skipped %d, ", + DPRINTF("\r %d: sent %d, skipped %d, ", iter, sent_this_iter, skip_this_iter ); if (last_iter) { print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - DPRINTF("Total pages sent= %ld (%.2fx)\n", + DPRINTF("Total pages sent= %ld (%.2fx)\n", total_sent, ((float)total_sent)/max_pfn ); DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); - } + } if (last_iter && debug){ int minusone = -1; - memset(to_send, 0xff, BITMAP_SIZE); + memset(to_send, 0xff, BITMAP_SIZE); debug = 0; fprintf(stderr, "Entering debug resend-all mode\n"); - + /* send "-1" to put receiver into debug mode */ - if(!write_exact(io_fd, &minusone, sizeof(int))) { + if(!write_exact(io_fd, &minusone, sizeof(int))) { ERR("Error when writing to state file (6)"); goto out; } @@ -1033,34 +1033,34 @@ continue; } - if (last_iter) break; + if (last_iter) break; if (live) { - if( + if( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) || - (total_sent > max_pfn*max_factor) ) { + (total_sent > max_pfn*max_factor) ) { DPRINTF("Start last iteration\n"); last_iter = 1; - + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { ERR("Domain appears not to have suspended"); goto out; } - - DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", - info.shared_info_frame, - (unsigned long)ctxt.user_regs.eip, + + DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", + info.shared_info_frame, + (unsigned long)ctxt.user_regs.eip, (unsigned long)ctxt.user_regs.edx); - } - + } + if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_CLEAN, - to_send, max_pfn, &stats ) != max_pfn) { + to_send, max_pfn, &stats ) != max_pfn) { ERR("Error flushing shadow PT"); goto out; } @@ -1068,7 +1068,7 @@ sent_last_iter = sent_this_iter; print_stats(xc_handle, dom, sent_this_iter, &stats, 1); - + } @@ -1077,8 +1077,8 @@ DPRINTF("All memory is saved\n"); /* Zero terminate */ - i = 0; - if (!write_exact(io_fd, &i, sizeof(int))) { + i = 0; + if (!write_exact(io_fd, &i, sizeof(int))) { ERR("Error when writing to state file (6)"); goto out; } @@ -1086,18 +1086,18 @@ /* Send through a list of all the PFNs that were not in map at the close */ { unsigned int i,j; - unsigned long pfntab[1024]; + unsigned long pfntab[1024]; for (i = 0, j = 0; i < max_pfn; i++) { if (!is_mapped(live_p2m[i])) j++; } - - if(!write_exact(io_fd, &j, sizeof(unsigned int))) { + + if(!write_exact(io_fd, &j, sizeof(unsigned int))) { ERR("Error when writing to state file (6a)"); goto out; - } - + } + for (i = 0, j = 0; i < max_pfn; ) { if (!is_mapped(live_p2m[i])) @@ -1105,16 +1105,16 @@ i++; if (j == 1024 || i == max_pfn) { - if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { + if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { ERR("Error when writing to state file (6b)"); goto out; - } + } j = 0; } } } - + /* Canonicalise the suspend-record frame number. */ if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ){ ERR("Suspend record is not in range of pseudophys map"); @@ -1138,7 +1138,7 @@ PAGE_SHIFT; if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) || - !write_exact(io_fd, live_shinfo, PAGE_SIZE)) { + !write_exact(io_fd, live_shinfo, PAGE_SIZE)) { ERR("Error when writing to state file (1)"); goto out; } @@ -1149,26 +1149,26 @@ out: if (live) { - if(xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF, - NULL, 0, NULL ) < 0) { + if(xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF, + NULL, 0, NULL ) < 0) { DPRINTF("Warning - couldn't disable shadow mode"); } } - + if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); - - if (live_p2m_frame_list_list) - munmap(live_p2m_frame_list_list, PAGE_SIZE); - - if (live_p2m_frame_list) - munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); - - if(live_p2m) - munmap(live_p2m, P2M_SIZE); - - if(live_m2p) - munmap(live_m2p, M2P_SIZE(max_mfn)); + + if (live_p2m_frame_list_list) + munmap(live_p2m_frame_list_list, PAGE_SIZE); + + if (live_p2m_frame_list) + munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); + + if(live_p2m) + munmap(live_p2m, P2M_SIZE); + + if(live_m2p) + munmap(live_m2p, M2P_SIZE(max_mfn)); free(pfn_type); free(pfn_batch); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_load_aout9.c --- a/tools/libxc/xc_load_aout9.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_load_aout9.c Sat Apr 15 18:25:21 2006 @@ -22,7 +22,7 @@ struct Exec *get_header(const char *, unsigned long, struct Exec *); -int +int probe_aout9( const char *image, unsigned long image_size, @@ -40,7 +40,7 @@ return 0; } -static int +static int parseaout9image( const char *image, unsigned long image_size, @@ -74,7 +74,7 @@ return 0; } -static int +static int loadaout9image( const char *image, unsigned long image_size, @@ -123,7 +123,7 @@ if(chunksz > PAGE_SIZE - pgoff) chunksz = PAGE_SIZE - pgoff; - pg = xc_map_foreign_range(xch, dom, PAGE_SIZE, PROT_WRITE, + pg = xc_map_foreign_range(xch, dom, PAGE_SIZE, PROT_WRITE, parray[off>>PAGE_SHIFT]); memcpy(pg + pgoff, buf, chunksz); munmap(pg, PAGE_SIZE); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_load_bin.c --- a/tools/libxc/xc_load_bin.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_load_bin.c Sat Apr 15 18:25:21 2006 @@ -161,7 +161,7 @@ return NULL; } -static int parsebinimage(const char *image, +static int parsebinimage(const char *image, unsigned long image_size, struct domain_setup_info *dsi) { diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_load_elf.c Sat Apr 15 18:25:21 2006 @@ -51,7 +51,7 @@ ((phdr->p_flags & (PF_W|PF_X)) != 0)); } -static int parseelfimage(const char *image, +static int parseelfimage(const char *image, unsigned long elfsize, struct domain_setup_info *dsi) { @@ -102,10 +102,10 @@ ERROR("ELF image has no section-header strings table (shstrtab)."); return -EINVAL; } - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + + shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (ehdr->e_shstrndx*ehdr->e_shentsize)); shstrtab = image + shdr->sh_offset; - + /* Find the special '__xen_guest' section and check its contents. */ for ( h = 0; h < ehdr->e_shnum; h++ ) { @@ -148,7 +148,7 @@ dsi->xen_guest_string = guestinfo; - for ( h = 0; h < ehdr->e_phnum; h++ ) + for ( h = 0; h < ehdr->e_phnum; h++ ) { phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) @@ -159,8 +159,8 @@ kernend = phdr->p_paddr + phdr->p_memsz; } - if ( (kernstart > kernend) || - (ehdr->e_entry < kernstart) || + if ( (kernstart > kernend) || + (ehdr->e_entry < kernstart) || (ehdr->e_entry > kernend) ) { ERROR("Malformed ELF image."); @@ -196,12 +196,12 @@ char *va; unsigned long pa, done, chunksz; - for ( h = 0; h < ehdr->e_phnum; h++ ) + for ( h = 0; h < ehdr->e_phnum; h++ ) { phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) continue; - + for ( done = 0; done < phdr->p_filesz; done += chunksz ) { pa = (phdr->p_paddr + done) - dsi->v_start; @@ -265,7 +265,7 @@ shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr)); memcpy(shdr, image + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr)); - for ( h = 0; h < ehdr->e_shnum; h++ ) + for ( h = 0; h < ehdr->e_shnum; h++ ) { if ( shdr[h].sh_type == SHT_STRTAB ) { diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_misc.c Sat Apr 15 18:25:21 2006 @@ -1,6 +1,6 @@ /****************************************************************************** * xc_misc.c - * + * * Miscellaneous control interface functions. */ @@ -21,7 +21,7 @@ int xc_readconsolering(int xc_handle, char **pbuffer, - unsigned int *pnr_chars, + unsigned int *pnr_chars, int clear) { int ret; @@ -46,14 +46,14 @@ safe_munlock(buffer, nr_chars); return ret; -} +} int xc_physinfo(int xc_handle, xc_physinfo_t *put_info) { int ret; DECLARE_DOM0_OP; - + op.cmd = DOM0_PHYSINFO; op.interface_version = DOM0_INTERFACE_VERSION; @@ -70,15 +70,15 @@ { int ret; DECLARE_DOM0_OP; - + op.cmd = DOM0_SCHED_ID; op.interface_version = DOM0_INTERFACE_VERSION; - + if ( (ret = do_dom0_op(xc_handle, &op)) != 0 ) return ret; - + *sched_id = op.u.sched_id.sched_id; - + return 0; } @@ -100,9 +100,9 @@ long long xc_msr_read(int xc_handle, int cpu_mask, int msr) { - int rc; + int rc; DECLARE_DOM0_OP; - + op.cmd = DOM0_MSR; op.u.msr.write = 0; op.u.msr.msr = msr; @@ -116,9 +116,9 @@ int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low, unsigned int high) { - int rc; + int rc; DECLARE_DOM0_OP; - + op.cmd = DOM0_MSR; op.u.msr.write = 1; op.u.msr.msr = msr; @@ -127,7 +127,7 @@ op.u.msr.in2 = high; rc = do_dom0_op(xc_handle, &op); - + return rc; } diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_physdev.c --- a/tools/libxc/xc_physdev.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_physdev.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_physdev.c - * + * * API for manipulating physical-device access permissions. - * + * * Copyright (c) 2004, Rolf Neugebauer (Intel Research Cambridge) * Copyright (c) 2004, K A Fraser (University of Cambridge) */ diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_private.c Sat Apr 15 18:25:21 2006 @@ -1,6 +1,6 @@ /****************************************************************************** * xc_private.c - * + * * Helper functions for the rest of the library. */ @@ -10,7 +10,7 @@ void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot, unsigned long *arr, int num ) { - privcmd_mmapbatch_t ioctlx; + privcmd_mmapbatch_t ioctlx; void *addr; addr = mmap(NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0); if ( addr == MAP_FAILED ) @@ -38,8 +38,8 @@ int size, int prot, unsigned long mfn ) { - privcmd_mmap_t ioctlx; - privcmd_mmap_entry_t entry; + privcmd_mmap_t ioctlx; + privcmd_mmap_entry_t entry; void *addr; addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0); if ( addr == MAP_FAILED ) @@ -64,7 +64,7 @@ /*******************/ /* NB: arr must be mlock'ed */ -int xc_get_pfn_type_batch(int xc_handle, +int xc_get_pfn_type_batch(int xc_handle, uint32_t dom, int num, unsigned long *arr) { DECLARE_DOM0_OP; @@ -76,8 +76,8 @@ } #define GETPFN_ERR (~0U) -unsigned int get_pfn_type(int xc_handle, - unsigned long mfn, +unsigned int get_pfn_type(int xc_handle, + unsigned long mfn, uint32_t dom) { DECLARE_DOM0_OP; @@ -119,7 +119,7 @@ out1: return ret; -} +} static int flush_mmu_updates(int xc_handle, xc_mmu_t *mmu) { @@ -166,7 +166,7 @@ return mmu; } -int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, +int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, unsigned long long ptr, unsigned long long val) { mmu->updates[mmu->idx].ptr = ptr; @@ -288,7 +288,7 @@ out1: return ret; -} +} long long xc_domain_get_cpu_usage( int xc_handle, domid_t domid, int vcpu ) @@ -308,8 +308,8 @@ int xc_get_pfn_list(int xc_handle, - uint32_t domid, - unsigned long *pfn_buf, + uint32_t domid, + unsigned long *pfn_buf, unsigned long max_pfns) { DECLARE_DOM0_OP; @@ -327,7 +327,7 @@ { PERROR("xc_get_pfn_list: pfn_buf mlock failed"); return -1; - } + } ret = do_dom0_op(xc_handle, &op); @@ -356,13 +356,13 @@ DECLARE_DOM0_OP; op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - return (do_dom0_op(xc_handle, &op) < 0) ? + return (do_dom0_op(xc_handle, &op) < 0) ? -1 : op.u.getdomaininfo.tot_pages; } int xc_copy_to_domain_page(int xc_handle, uint32_t domid, - unsigned long dst_pfn, + unsigned long dst_pfn, const char *src_page) { void *vaddr = xc_map_foreign_range( @@ -481,7 +481,7 @@ { unsigned long new_mfn; - if ( xc_domain_memory_decrease_reservation( + if ( xc_domain_memory_decrease_reservation( xc_handle, domid, 1, 0, &mfn) != 0 ) { fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_private.h Sat Apr 15 18:25:21 2006 @@ -57,7 +57,7 @@ } static inline int do_privcmd(int xc_handle, - unsigned int cmd, + unsigned int cmd, unsigned long data) { return ioctl(xc_handle, cmd, data); @@ -67,7 +67,7 @@ privcmd_hypercall_t *hypercall) { return do_privcmd(xc_handle, - IOCTL_PRIVCMD_HYPERCALL, + IOCTL_PRIVCMD_HYPERCALL, (unsigned long)hypercall); } @@ -78,7 +78,7 @@ hypercall.op = __HYPERVISOR_xen_version; hypercall.arg[0] = (unsigned long) cmd; hypercall.arg[1] = (unsigned long) dest; - + return do_xen_hypercall(xc_handle, &hypercall); } @@ -121,13 +121,13 @@ unsigned long va; unsigned long mfn; unsigned long npages; -} privcmd_mmap_entry_t; +} privcmd_mmap_entry_t; typedef struct privcmd_mmap { int num; domid_t dom; privcmd_mmap_entry_t *entry; -} privcmd_mmap_t; +} privcmd_mmap_t; */ #endif /* __XC_PRIVATE_H__ */ diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_ptrace.c Sat Apr 15 18:25:21 2006 @@ -46,7 +46,7 @@ static vcpu_guest_context_t ctxt[MAX_VIRT_CPUS]; extern int ffsll(long long int); -#define FOREACH_CPU(cpumap, i) for ( cpumap = online_cpumap; (i = ffsll(cpumap)); cpumap &= ~(1 << (index - 1)) ) +#define FOREACH_CPU(cpumap, i) for ( cpumap = online_cpumap; (i = ffsll(cpumap)); cpumap &= ~(1 << (index - 1)) ) static int @@ -58,22 +58,22 @@ if (online) *online = 0; if ( !(regs_valid & (1 << cpu)) ) - { - retval = xc_vcpu_getcontext(xc_handle, current_domid, - cpu, &ctxt[cpu]); - if ( retval ) + { + retval = xc_vcpu_getcontext(xc_handle, current_domid, + cpu, &ctxt[cpu]); + if ( retval ) goto done; - regs_valid |= (1 << cpu); - - } - if ( online == NULL ) - goto done; - - retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info); - *online = info.online; - + regs_valid |= (1 << cpu); + + } + if ( online == NULL ) + goto done; + + retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info); + *online = info.online; + done: - return retval; + return retval; } static struct thr_ev_handlers { @@ -81,8 +81,8 @@ thr_ev_handler_t td_death; } handlers; -void -xc_register_event_handler(thr_ev_handler_t h, +void +xc_register_event_handler(thr_ev_handler_t h, td_event_e e) { switch (e) { @@ -97,7 +97,7 @@ } } -static inline int +static inline int paging_enabled(vcpu_guest_context_t *v) { unsigned long cr0 = v->ctrlreg[0]; @@ -114,19 +114,19 @@ get_online_cpumap(int xc_handle, dom0_getdomaininfo_t *d, cpumap_t *cpumap) { int i, online, retval; - + *cpumap = 0; for (i = 0; i <= d->max_vcpu_id; i++) { if ((retval = fetch_regs(xc_handle, i, &online))) return retval; if (online) - *cpumap |= (1 << i); - } - + *cpumap |= (1 << i); + } + return 0; } -/* +/* * Notify GDB of any vcpus that have come online or gone offline * update online_cpumap * @@ -137,7 +137,7 @@ { cpumap_t changed_cpumap = cpumap ^ online_cpumap; int index; - + while ( (index = ffsll(changed_cpumap)) ) { if ( cpumap & (1 << (index - 1)) ) { @@ -149,7 +149,7 @@ changed_cpumap &= ~(1 << (index - 1)); } online_cpumap = cpumap; - + } /* --------------------- */ @@ -172,7 +172,7 @@ static unsigned long pde_phys[MAX_VIRT_CPUS]; static uint32_t *pde_virt[MAX_VIRT_CPUS]; static unsigned long page_phys[MAX_VIRT_CPUS]; - static uint32_t *page_virt[MAX_VIRT_CPUS]; + static uint32_t *page_virt[MAX_VIRT_CPUS]; static int prev_perm[MAX_VIRT_CPUS]; if (ctxt[cpu].ctrlreg[3] == 0) @@ -221,7 +221,7 @@ return NULL; } prev_perm[cpu] = perm; - } + } return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK)); } @@ -284,7 +284,7 @@ if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ return map_domain_va_32(xc_handle, cpu, guest_va, perm); - l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, + l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); if ( l4 == NULL ) return NULL; @@ -349,7 +349,7 @@ mode = MODE_64; else if ( strstr(caps, "-x86_32p") ) mode = MODE_PAE; - else if ( strstr(caps, "-x86_32") ) + else if ( strstr(caps, "-x86_32") ) mode = MODE_32; } @@ -374,7 +374,7 @@ if (fetch_regs(xc_handle, cpu, NULL)) return NULL; - if (!paging_enabled(&ctxt[cpu])) { + if (!paging_enabled(&ctxt[cpu])) { static void * v; unsigned long page; @@ -383,9 +383,9 @@ page = page_array[va >> PAGE_SHIFT] << PAGE_SHIFT; - v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, + v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, perm, page >> PAGE_SHIFT); - + if ( v == NULL ) return NULL; @@ -403,7 +403,7 @@ int control_c_pressed_flag = 0; -static int +static int __xc_waitdomain( int xc_handle, int domain, @@ -420,7 +420,7 @@ op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = domain; - + retry: retval = do_dom0_op(xc_handle, &op); if ( retval || (op.u.getdomaininfo.domain != domain) ) @@ -429,7 +429,7 @@ goto done; } *status = op.u.getdomaininfo.flags; - + if ( options & WNOHANG ) goto done; @@ -472,16 +472,16 @@ void *data = (char *)edata; cpu = (request != PTRACE_ATTACH) ? domid_tid : 0; - + switch ( request ) - { + { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: if (current_isfile) - guest_va = (unsigned long *)map_domain_va_core(current_domid, + guest_va = (unsigned long *)map_domain_va_core(current_domid, cpu, addr, ctxt); else - guest_va = (unsigned long *)map_domain_va(xc_handle, + guest_va = (unsigned long *)map_domain_va(xc_handle, cpu, addr, PROT_READ); if ( guest_va == NULL ) goto out_error; @@ -492,26 +492,26 @@ case PTRACE_POKEDATA: /* XXX assume that all CPUs have the same address space */ if (current_isfile) - guest_va = (unsigned long *)map_domain_va_core(current_domid, + guest_va = (unsigned long *)map_domain_va_core(current_domid, cpu, addr, ctxt); else - guest_va = (unsigned long *)map_domain_va(xc_handle, + guest_va = (unsigned long *)map_domain_va(xc_handle, cpu, addr, PROT_READ|PROT_WRITE); - if ( guest_va == NULL ) + if ( guest_va == NULL ) goto out_error; *guest_va = (unsigned long)data; break; case PTRACE_GETREGS: - if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) + if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) goto out_error; - SET_PT_REGS(pt, ctxt[cpu].user_regs); + SET_PT_REGS(pt, ctxt[cpu].user_regs); memcpy(data, &pt, sizeof(struct gdb_regs)); break; case PTRACE_GETFPREGS: case PTRACE_GETFPXREGS: - if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) + if (!current_isfile && fetch_regs(xc_handle, cpu, NULL)) goto out_error; memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt)); break; @@ -520,7 +520,7 @@ if (current_isfile) goto out_unspported; /* XXX not yet supported */ SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs); - if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, + if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]))) goto out_error_dom0; break; @@ -531,8 +531,8 @@ /* XXX we can still have problems if the user switches threads * during single-stepping - but that just seems retarded */ - ctxt[cpu].user_regs.eflags |= PSL_T; - if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, + ctxt[cpu].user_regs.eflags |= PSL_T; + if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]))) goto out_error_dom0; /* FALLTHROUGH */ @@ -545,13 +545,13 @@ { FOREACH_CPU(cpumap, index) { cpu = index - 1; - if (fetch_regs(xc_handle, cpu, NULL)) + if (fetch_regs(xc_handle, cpu, NULL)) goto out_error; /* Clear trace flag */ - if ( ctxt[cpu].user_regs.eflags & PSL_T ) + if ( ctxt[cpu].user_regs.eflags & PSL_T ) { ctxt[cpu].user_regs.eflags &= ~PSL_T; - if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, + if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]))) goto out_error_dom0; } @@ -566,7 +566,7 @@ goto out_error_dom0; } regs_valid = 0; - if ((retval = xc_domain_unpause(xc_handle, current_domid > 0 ? + if ((retval = xc_domain_unpause(xc_handle, current_domid > 0 ? current_domid : -current_domid))) goto out_error_dom0; break; @@ -627,7 +627,7 @@ } -int +int xc_waitdomain( int xc_handle, int domain, diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_ptrace.h --- a/tools/libxc/xc_ptrace.h Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_ptrace.h Sat Apr 15 18:25:21 2006 @@ -107,7 +107,7 @@ long esi; /* 12 */ long edi; /* 16 */ long ebp; /* 20 */ - long eax; /* 24 */ + long eax; /* 24 */ int xds; /* 28 */ int xes; /* 32 */ int xfs; /* 36 */ @@ -116,7 +116,7 @@ long eip; /* 48 */ int xcs; /* 52 */ long eflags; /* 56 */ - long esp; /* 60 */ + long esp; /* 60 */ int xss; /* 64 */ }; @@ -169,20 +169,20 @@ typedef void (*thr_ev_handler_t)(long); void xc_register_event_handler( - thr_ev_handler_t h, + thr_ev_handler_t h, td_event_e e); long xc_ptrace( int xc_handle, - enum __ptrace_request request, + enum __ptrace_request request, uint32_t domid, - long addr, + long addr, long data); int xc_waitdomain( int xc_handle, - int domain, - int *status, + int domain, + int *status, int options); #endif /* XC_PTRACE */ diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_ptrace_core.c --- a/tools/libxc/xc_ptrace_core.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_ptrace_core.c Sat Apr 15 18:25:21 2006 @@ -39,7 +39,7 @@ static unsigned long page_phys[MAX_VIRT_CPUS]; static unsigned long *page_virt[MAX_VIRT_CPUS]; - if (cr3[cpu] != cr3_phys[cpu]) + if (cr3[cpu] != cr3_phys[cpu]) { cr3_phys[cpu] = cr3[cpu]; if (cr3_virt[cpu]) @@ -53,12 +53,12 @@ return NULL; } cr3_virt[cpu] = v; - } + } if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */ return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT; - if (pde != pde_phys[cpu]) + if (pde != pde_phys[cpu]) { pde_phys[cpu] = pde; if (pde_virt[cpu]) @@ -74,7 +74,7 @@ return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT; - if (page != page_phys[cpu]) + if (page != page_phys[cpu]) { page_phys[cpu] = page; if (page_virt[cpu]) @@ -89,11 +89,11 @@ return NULL; } page_virt[cpu] = v; - } + } return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK)); } -int +int xc_waitdomain_core( int xc_handle, int domfd, @@ -122,7 +122,7 @@ nr_vcpus = header.xch_nr_vcpus; pages_offset = header.xch_pages_offset; - if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != + if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != sizeof(vcpu_guest_context_t)*nr_vcpus) return -1; @@ -134,7 +134,7 @@ printf("Could not allocate p2m_array\n"); return -1; } - if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != + if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != sizeof(unsigned long)*nr_pages) return -1; diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_sedf.c --- a/tools/libxc/xc_sedf.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_sedf.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_sedf.c - * + * * API for manipulating parameters of the Simple EDF scheduler. - * + * * changes by Stephan Diestelhorst * based on code * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge. @@ -35,7 +35,7 @@ int ret; struct sedf_adjdom *p = &op.u.adjustdom.u.sedf; - op.cmd = DOM0_ADJUSTDOM; + op.cmd = DOM0_ADJUSTDOM; op.u.adjustdom.domain = (domid_t)domid; op.u.adjustdom.sched_id = SCHED_SEDF; op.u.adjustdom.direction = SCHED_INFO_GET; diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xc_tbuf.c --- a/tools/libxc/xc_tbuf.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xc_tbuf.c Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xc_tbuf.c - * + * * API for manipulating and accessing trace buffer parameters - * + * * Copyright (c) 2005, Rob Gardner */ @@ -18,7 +18,7 @@ op.u.tbufcontrol.op = DOM0_TBUF_ENABLE; else op.u.tbufcontrol.op = DOM0_TBUF_DISABLE; - + return xc_dom0_op(xc_handle, &op); } @@ -30,10 +30,10 @@ op.interface_version = DOM0_INTERFACE_VERSION; op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE; op.u.tbufcontrol.size = size; - + return xc_dom0_op(xc_handle, &op); } - + int xc_tbuf_get_size(int xc_handle, uint32_t *size) { int rc; diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xenctrl.h Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xenctrl.h - * + * * A library for low-level access to the Xen control interfaces. - * + * * Copyright (c) 2003-2004, K A Fraser. */ @@ -30,7 +30,7 @@ /* * DEFINITIONS FOR CPU BARRIERS - */ + */ #if defined(__i386__) #define mb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) @@ -51,7 +51,7 @@ /* * INITIALIZATION FUNCTIONS - */ + */ /** * This function opens a handle to the hypervisor interface. This function can @@ -96,20 +96,20 @@ long xc_ptrace_core( int xc_handle, - enum __ptrace_request request, - uint32_t domid, - long addr, + enum __ptrace_request request, + uint32_t domid, + long addr, long data, vcpu_guest_context_t *ctxt); void * map_domain_va_core( - unsigned long domfd, - int cpu, + unsigned long domfd, + int cpu, void *guest_va, vcpu_guest_context_t *ctxt); int xc_waitdomain_core( int xc_handle, - int domain, - int *status, + int domain, + int *status, int options, vcpu_guest_context_t *ctxt); @@ -120,7 +120,7 @@ typedef struct { uint32_t domid; uint32_t ssidref; - unsigned int dying:1, crashed:1, shutdown:1, + unsigned int dying:1, crashed:1, shutdown:1, paused:1, blocked:1, running:1; unsigned int shutdown_reason; /* only meaningful if shutdown==1 */ unsigned long nr_pages; @@ -133,7 +133,7 @@ } xc_dominfo_t; typedef dom0_getdomaininfo_t xc_domaininfo_t; -int xc_domain_create(int xc_handle, +int xc_domain_create(int xc_handle, uint32_t ssidref, xen_domain_handle_t handle, uint32_t *pdomid); @@ -144,7 +144,7 @@ * xc_domain_dumpcore_via_callback - produces a dump, using a specified * callback function */ -int xc_domain_dumpcore(int xc_handle, +int xc_domain_dumpcore(int xc_handle, uint32_t domid, const char *corename); @@ -156,7 +156,7 @@ */ typedef int (dumpcore_rtn_t)(void *arg, char *buffer, unsigned int length); -int xc_domain_dumpcore_via_callback(int xc_handle, +int xc_domain_dumpcore_via_callback(int xc_handle, uint32_t domid, void *arg, dumpcore_rtn_t dump_rtn); @@ -170,7 +170,7 @@ * @return 0 on success, -1 on failure. */ int xc_domain_max_vcpus(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int max); /** @@ -181,7 +181,7 @@ * @parm domid the domain id to pause * @return 0 on success, -1 on failure. */ -int xc_domain_pause(int xc_handle, +int xc_domain_pause(int xc_handle, uint32_t domid); /** * This function unpauses a domain. The domain should have been previously @@ -191,7 +191,7 @@ * @parm domid the domain id to unpause * return 0 on success, -1 on failure */ -int xc_domain_unpause(int xc_handle, +int xc_domain_unpause(int xc_handle, uint32_t domid); /** @@ -203,7 +203,7 @@ * @parm domid the domain id to destroy * @return 0 on success, -1 on failure */ -int xc_domain_destroy(int xc_handle, +int xc_domain_destroy(int xc_handle, uint32_t domid); /** @@ -217,7 +217,7 @@ * @parm reason is the reason (SHUTDOWN_xxx) for the shutdown * @return 0 on success, -1 on failure */ -int xc_domain_shutdown(int xc_handle, +int xc_domain_shutdown(int xc_handle, uint32_t domid, int reason); @@ -242,7 +242,7 @@ * @return the number of domains enumerated or -1 on error */ int xc_domain_getinfo(int xc_handle, - uint32_t first_domid, + uint32_t first_domid, unsigned int max_doms, xc_dominfo_t *info); @@ -307,12 +307,12 @@ domid_t domid, int vcpu); -int xc_domain_sethandle(int xc_handle, uint32_t domid, +int xc_domain_sethandle(int xc_handle, uint32_t domid, xen_domain_handle_t handle); typedef dom0_shadow_control_stats_t xc_shadow_control_stats_t; int xc_shadow_control(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int sop, unsigned long *dirty_bitmap, unsigned long pages, @@ -386,7 +386,7 @@ int xc_readconsolering(int xc_handle, char **pbuffer, - unsigned int *pnr_chars, + unsigned int *pnr_chars, int clear); typedef dom0_physinfo_t xc_physinfo_t; @@ -397,18 +397,18 @@ int *sched_id); int xc_domain_setmaxmem(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned int max_memkb); int xc_domain_memory_increase_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, unsigned long *extent_start); int xc_domain_memory_decrease_reservation(int xc_handle, - uint32_t domid, + uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned long *extent_start); @@ -443,7 +443,7 @@ unsigned long nr_mfns, uint8_t allow_access); -unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, +unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, unsigned long mfn); typedef dom0_perfc_desc_t xc_perfc_desc_t; @@ -492,11 +492,11 @@ unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, int vcpu, unsigned long long virt); -int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, +int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, unsigned long max_pfns); int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + unsigned long *pfn_buf, unsigned int start_page, unsigned int nr_pages); int xc_copy_to_domain_page(int xc_handle, uint32_t domid, @@ -551,7 +551,7 @@ int xc_tbuf_set_size(int xc_handle, uint32_t size); /** - * This function retrieves the current size of the trace buffers. + * This function retrieves the current size of the trace buffers. * Note that the size returned is in terms of bytes, not pages. * @parm xc_handle a handle to an open hypervisor interface @@ -577,7 +577,7 @@ }; typedef struct xc_mmu xc_mmu_t; xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom); -int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, +int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, unsigned long long ptr, unsigned long long val); int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xenguest.h Sat Apr 15 18:25:21 2006 @@ -1,8 +1,8 @@ /****************************************************************************** * xenguest.h - * + * * A library for guest domain management in Xen. - * + * * Copyright (c) 2003-2004, K A Fraser. */ @@ -21,7 +21,7 @@ * @parm dom the id of the domain * @return 0 on success, -1 on failure */ -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, int (*suspend)(int domid)); @@ -37,8 +37,8 @@ * @parm store_mfn returned with the mfn of the store page * @return 0 on success, -1 on failure */ -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long nr_pfns, unsigned int store_evtchn, +int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn); diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xg_private.c Sat Apr 15 18:25:21 2006 @@ -1,6 +1,6 @@ /****************************************************************************** * xg_private.c - * + * * Helper functions for the rest of the library. */ diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xg_private.h Sat Apr 15 18:25:21 2006 @@ -11,7 +11,7 @@ #include <sys/stat.h> #include "xenctrl.h" -#include "xenguest.h" +#include "xenguest.h" #include <xen/linux/privcmd.h> #include <xen/memory.h> @@ -62,7 +62,7 @@ #define L2_PAGETABLE_ENTRIES_PAE 512 #define L3_PAGETABLE_ENTRIES_PAE 4 -#if defined(__i386__) +#if defined(__i386__) #define L1_PAGETABLE_ENTRIES 1024 #define L2_PAGETABLE_ENTRIES 1024 #elif defined(__x86_64__) @@ -71,7 +71,7 @@ #define L3_PAGETABLE_ENTRIES 512 #define L4_PAGETABLE_ENTRIES 512 #endif - + #define PAGE_SHIFT XC_PAGE_SHIFT #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) @@ -167,8 +167,8 @@ int error; int max_queue_size; void * addr; - privcmd_mmap_t ioctl; - + privcmd_mmap_t ioctl; + } mfn_mapper_t; int xc_copy_to_domain_page(int xc_handle, uint32_t domid, diff -r 83eb8d81c96f -r 91da9a1b7196 tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Sat Apr 15 18:25:09 2006 +++ b/tools/libxc/xg_save_restore.h Sat Apr 15 18:25:21 2006 @@ -1,7 +1,7 @@ /* ** xg_save_restore.h -** -** Defintions and utilities for save / restore. +** +** Defintions and utilities for save / restore. */ #include "xc_private.h" @@ -29,8 +29,8 @@ /* -** We process save/restore/migrate in batches of pages; the below -** determines how many pages we (at maximum) deal with in each batch. +** We process save/restore/migrate in batches of pages; the below +** determines how many pages we (at maximum) deal with in each batch. */ #define MAX_BATCH_SIZE 1024 /* up to 1024 pages (4MB) at a time */ @@ -40,56 +40,56 @@ /* -** Determine various platform information required for save/restore, in -** particular: +** Determine various platform information required for save/restore, in +** particular: ** -** - the maximum MFN on this machine, used to compute the size of -** the M2P table; -** -** - the starting virtual address of the the hypervisor; we use this -** to determine which parts of guest address space(s) do and don't -** require canonicalization during save/restore; and -** -** - the number of page-table levels for save/ restore. This should -** be a property of the domain, but for the moment we just read it +** - the maximum MFN on this machine, used to compute the size of +** the M2P table; +** +** - the starting virtual address of the the hypervisor; we use this +** to determine which parts of guest address space(s) do and don't +** require canonicalization during save/restore; and +** +** - the number of page-table levels for save/ restore. This should +** be a property of the domain, but for the moment we just read it ** from the hypervisor. ** -** Returns 1 on success, 0 on failure. +** Returns 1 on success, 0 on failure. */ -static int get_platform_info(int xc_handle, uint32_t dom, - /* OUT */ unsigned long *max_mfn, - /* OUT */ unsigned long *hvirt_start, +static int get_platform_info(int xc_handle, uint32_t dom, + /* OUT */ unsigned long *max_mfn, + /* OUT */ unsigned long *hvirt_start, /* OUT */ unsigned int *pt_levels) - -{ + +{ xen_capabilities_info_t xen_caps = ""; xen_platform_parameters_t xen_params; if (xc_version(xc_handle, XENVER_platform_parameters, &xen_params) != 0) return 0; - + if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) return 0; *max_mfn = xc_memory_op(xc_handle, XENMEM_maximum_ram_page, NULL); - + *hvirt_start = xen_params.virt_start; if (strstr(xen_caps, "xen-3.0-x86_64")) *pt_levels = 4; else if (strstr(xen_caps, "xen-3.0-x86_32p")) - *pt_levels = 3; + *pt_levels = 3; else if (strstr(xen_caps, "xen-3.0-x86_32")) - *pt_levels = 2; - else - return 0; - + *pt_levels = 2; + else + return 0; + return 1; -} +} -/* -** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables. +/* +** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables. ** The M2P simply holds the corresponding PFN, while the top bit of a P2M ** entry tell us whether or not the the PFN is currently mapped. */ @@ -98,18 +98,18 @@ #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) -/* -** The M2P is made up of some number of 'chunks' of at least 2MB in size. -** The below definitions and utility function(s) deal with mapping the M2P -** regarldess of the underlying machine memory size or architecture. +/* +** The M2P is made up of some number of 'chunks' of at least 2MB in size. +** The below definitions and utility function(s) deal with mapping the M2P +** regarldess of the underlying machine memory size or architecture. */ -#define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE -#define M2P_CHUNK_SIZE (1 << M2P_SHIFT) -#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) +#define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE +#define M2P_CHUNK_SIZE (1 << M2P_SHIFT) +#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) #define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT) /* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */ -#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) +#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) /* Number of unsigned longs in a page */ #define ulpp (PAGE_SIZE/sizeof(unsigned long)) @@ -127,12 +127,12 @@ #define NR_SLACK_ENTRIES ((8 * 1024 * 1024) / PAGE_SIZE) /* Is the given PFN within the 'slack' region at the top of the P2M? */ -#define IS_REAL_PFN(_pfn) ((max_pfn - (_pfn)) > NR_SLACK_ENTRIES) +#define IS_REAL_PFN(_pfn) ((max_pfn - (_pfn)) > NR_SLACK_ENTRIES) /* Returns TRUE if the PFN is currently mapped */ #define is_mapped(pfn_type) (!((pfn_type) & 0x80000000UL)) -#define INVALID_P2M_ENTRY (~0UL) +#define INVALID_P2M_ENTRY (~0UL) diff -r 83eb8d81c96f -r 91da9a1b7196 tools/misc/xen-clone --- a/tools/misc/xen-clone Sat Apr 15 18:25:09 2006 +++ b/tools/misc/xen-clone Sat Apr 15 18:25:21 2006 @@ -113,7 +113,7 @@ # Turn linux into xenolinux then build it cd xenolinux-${LINUX_VER}-sparse - ./mkbuildtree ../../linux-${LINUX_VER} + bash ./mkbuildtree ../../linux-${LINUX_VER} cd ../.. mv linux-${LINUX_VER} xenolinux-${LINUX_VER} cd xenolinux-${LINUX_VER} diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/lowlevel/xs/xs.c Sat Apr 15 18:25:21 2006 @@ -589,7 +589,7 @@ static PyObject *xspy_introduce_domain(XsHandle *self, PyObject *args) { - domid_t dom; + uint32_t dom; unsigned long page; unsigned int port; @@ -620,7 +620,7 @@ static PyObject *xspy_release_domain(XsHandle *self, PyObject *args) { - domid_t dom; + uint32_t dom; struct xs_handle *xh = xshandle(self); bool result = 0; @@ -677,7 +677,7 @@ static PyObject *xspy_get_domain_path(XsHandle *self, PyObject *args) { struct xs_handle *xh = xshandle(self); - int domid; + uint32_t domid; char *xsval; if (!xh) diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/xend/XendCheckpoint.py Sat Apr 15 18:25:21 2006 @@ -53,7 +53,7 @@ -def save(fd, dominfo, live): +def save(fd, dominfo, live, dst): write_exact(fd, SIGNATURE, "could not write guest state file: signature") config = sxp.to_string(dominfo.sxpr()) @@ -65,6 +65,8 @@ dominfo.setName('migrating-' + domain_name) try: + dominfo.migrateDevices(live, dst, 1, domain_name) + write_exact(fd, pack("!i", len(config)), "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") @@ -85,7 +87,9 @@ log.debug("Suspending %d ...", dominfo.getDomid()) dominfo.shutdown('suspend') dominfo.waitForShutdown() + dominfo.migrateDevices(live, dst, 2, domain_name) log.info("Domain %d suspended.", dominfo.getDomid()) + dominfo.migrateDevices(live, dst, 3, domain_name) tochild.write("done\n") tochild.flush() log.debug('Written done') diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/xend/XendDomain.py Sat Apr 15 18:25:21 2006 @@ -405,6 +405,9 @@ if dominfo.getDomid() == PRIV_DOMAIN: raise XendError("Cannot migrate privileged domain %i" % domid) + """ The following call may raise a XendError exception """ + dominfo.testMigrateDevices(live, dst) + if port == 0: port = xroot.get_xend_relocation_port() try: @@ -414,8 +417,8 @@ raise XendError("can't connect: %s" % err[1]) sock.send("receive\n") - sock.recv(80) - XendCheckpoint.save(sock.fileno(), dominfo, live) + sock.recv(80) + XendCheckpoint.save(sock.fileno(), dominfo, live, dst) def domain_save(self, domid, dst): @@ -435,7 +438,7 @@ fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) try: # For now we don't support 'live checkpoint' - return XendCheckpoint.save(fd, dominfo, False) + return XendCheckpoint.save(fd, dominfo, False, dst) finally: os.close(fd) except OSError, ex: diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/xend/XendDomainInfo.py Sat Apr 15 18:25:21 2006 @@ -1395,6 +1395,38 @@ if self.image: self.image.createDeviceModel() + ## public: + + def testMigrateDevices(self, live, dst): + """ Notify all device about intention of migration + @raise: XendError for a device that cannot be migrated + """ + for (n, c) in self.info['device']: + rc = self.migrateDevice(n, c, live, dst, 0) + if rc != 0: + raise XendError("Device of type '%s' refuses migration." % n) + + def migrateDevices(self, live, dst, step, domName=''): + """Notify the devices about migration + """ + ctr = 0 + try: + for (n, c) in self.info['device']: + self.migrateDevice(n, c, live, dst, step, domName) + ctr = ctr + 1 + except: + for (n, c) in self.info['device']: + if ctr == 0: + step = step - 1 + ctr = ctr - 1 + self.recoverMigrateDevice(n, c, live, dst, step, domName) + raise + + def migrateDevice(self, deviceClass, deviceConfig, live, dst, step, domName=''): + return self.getDeviceController(deviceClass).migrate(deviceConfig, live, dst, step, domName) + + def recoverMigrateDevice(self, deviceClass, deviceConfig, live, dst, step, domName=''): + return self.getDeviceController(deviceClass).recover_migrate(deviceConfig, live, dst, step, domName) def waitForDevices(self): """Wait for this domain's configured devices to connect. diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/xend/XendRoot.py Sat Apr 15 18:25:21 2006 @@ -85,6 +85,9 @@ """Default for the flag indicating whether xend should run a unix-domain server (deprecated).""" xend_unix_server_default = 'no' + + """Default external migration tool """ + external_migration_tool_default = '' """Default path the unix-domain server listens at.""" xend_unix_path_default = '/var/lib/xend/xend-socket' @@ -250,6 +253,9 @@ else: return None + def get_external_migration_tool(self): + """@return the name of the tool to handle virtual TPM migration.""" + return self.get_config_value('external-migration-tool', self.external_migration_tool_default) def get_enable_dump(self): return self.get_config_bool('enable-dump', 'no') diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/xend/server/DevController.py Sat Apr 15 18:25:21 2006 @@ -267,6 +267,41 @@ raise NotImplementedError() + def migrate(self, deviceConfig, live, dst, step, domName): + """ Migration of a device. The 'live' parameter indicates + whether the device is live-migrated (live=1). 'dst' then gives + the hostname of the machine to migrate to. + This function is called for 4 steps: + If step == 0: Check whether the device is ready to be migrated + or can at all be migrated; return a '-1' if + the device is NOT ready, a '0' otherwise. If it is + not ready ( = not possible to migrate this device), + migration will not take place. + step == 1: Called immediately after step 0; migration + of the kernel has started; + step == 2: Called after the suspend has been issued + to the domain and the domain is not scheduled anymore. + Synchronize with what was started in step 1, if necessary. + Now the device should initiate its transfer to the + given target. Since there might be more than just + one device initiating a migration, this step should + put the process performing the transfer into the + background and return immediately to achieve as much + concurrency as possible. + step == 3: Synchronize with the migration of the device that + was initiated in step 2. + Make sure that the migration has finished and only + then return from the call. + """ + return 0 + + + def recover_migrate(self, deviceConfig, list, dst, step, domName): + """ Recover from device migration. The given step was the + last one that was successfully executed. + """ + return 0 + def getDomid(self): """Stub to {@link XendDomainInfo.getDomid}, for use by our diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/xend/server/tpmif.py --- a/tools/python/xen/xend/server/tpmif.py Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/xend/server/tpmif.py Sat Apr 15 18:25:21 2006 @@ -23,8 +23,16 @@ from xen.xend import sxp from xen.xend.XendLogging import log +from xen.xend.XendError import XendError +from xen.xend import XendRoot from xen.xend.server.DevController import DevController + +import os +import re + + +xroot = XendRoot.instance() class TPMifController(DevController): @@ -61,3 +69,43 @@ result.append(['instance', instance]) return result + + def migrate(self, deviceConfig, live, dst, step, domName): + """@see DevContoller.migrate""" + if live: + tool = xroot.get_external_migration_tool() + if tool != '': + log.info("Request to live-migrate device to %s. step=%d.", + dst, step) + + if step == 0: + """Assuming for now that everything is ok and migration + with the given tool can proceed. + """ + return 0 + else: + fd = os.popen("%s -type vtpm -step %d -host %s -domname %s" % + (tool, step, dst, domName), + 'r') + for line in fd.readlines(): + mo = re.search('Error', line) + if mo: + raise XendError("vtpm: Fatal error in migration step %d." % + step) + return 0 + else: + log.debug("External migration tool not in configuration.") + return -1 + return 0 + + def recover_migrate(self, deviceConfig, live, dst, step, domName): + """@see DevContoller.recover_migrate""" + if live: + tool = xroot.get_external_migration_tool() + if tool != '': + log.info("Request to recover live-migrated device. last good step=%d.", + step) + fd = os.popen("%s -type vtpm -step %d -host %s -domname %s -recover" % + (tool, step, dst, domName), + 'r') + return 0 diff -r 83eb8d81c96f -r 91da9a1b7196 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Sat Apr 15 18:25:09 2006 +++ b/tools/python/xen/xm/create.py Sat Apr 15 18:25:21 2006 @@ -158,7 +158,7 @@ use="CPU to run the VCPU0 on.") gopts.var('cpus', val='CPUS', - fn=set_int, default=None, + fn=set_value, default=None, use="CPUS to run the domain on.") gopts.var('pae', val='PAE', diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenmon/README --- a/tools/xenmon/README Sat Apr 15 18:25:09 2006 +++ b/tools/xenmon/README Sat Apr 15 18:25:21 2006 @@ -84,6 +84,16 @@ events cause a trace record to be emitted. - To exit xenmon, type 'q' - To cycle the display to other physical cpu's, type 'c' + - The first time xenmon is run, it attempts to allocate xen trace buffers + using a default size. If you wish to use a non-default value for the + trace buffer size, run the 'setsize' program (located in tools/xentrace) + and specify the number of memory pages as a parameter. The default is 20. + - Not well tested with domains using more than 1 virtual cpu + - If you create a lot of domains, or repeatedly kill a domain and restart it, + and the domain id's get to be bigger than NDOMAINS, then xenmon behaves badly. + This is a bug that is due to xenbaked's treatment of domain id's vs. domain + indices in a data array. Will be fixed in a future release; Workaround: + Increase NDOMAINS in xenbaked and rebuild. Future Work ----------- diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenmon/xenbaked.c --- a/tools/xenmon/xenbaked.c Sat Apr 15 18:25:09 2006 +++ b/tools/xenmon/xenbaked.c Sat Apr 15 18:25:21 2006 @@ -7,6 +7,7 @@ * * Copyright (C) 2004 by Intel Research Cambridge * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins + * Copyright (C) 2006 by Hewlett Packard Fort Collins * * Authors: Diwaker Gupta, diwaker.gupta@xxxxxx * Rob Gardner, rob.gardner@xxxxxx @@ -42,6 +43,8 @@ #include <xenctrl.h> #include <xen/xen.h> #include <string.h> +#include <sys/select.h> +#include <xen/linux/evtchn.h> #include "xc_private.h" typedef struct { int counter; } atomic_t; @@ -81,14 +84,13 @@ int interrupted = 0; /* gets set if we get a SIGHUP */ int rec_count = 0; +int wakeups = 0; time_t start_time; int dom0_flips = 0; _new_qos_data *new_qos; _new_qos_data **cpu_qos_data; - -#define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X) // array of currently running domains, indexed by cpu int *running = NULL; @@ -223,6 +225,9 @@ printf("processed %d total records in %d seconds (%ld per second)\n", rec_count, (int)run_time, rec_count/run_time); + printf("woke up %d times in %d seconds (%ld per second)\n", wakeups, + (int) run_time, wakeups/run_time); + check_gotten_sum(); } @@ -243,6 +248,112 @@ stat_map[0].event_count++; // other } +#define EVTCHN_DEV_NAME "/dev/xen/evtchn" +#define EVTCHN_DEV_MAJOR 10 +#define EVTCHN_DEV_MINOR 201 + +int virq_port; +int eventchn_fd = -1; + +/* Returns the event channel handle. */ +/* Stolen from xenstore code */ +int eventchn_init(void) +{ + struct stat st; + struct ioctl_evtchn_bind_virq bind; + int rc; + + // to revert to old way: + if (0) + return -1; + + /* Make sure any existing device file links to correct device. */ + if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) || + (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR))) + (void)unlink(EVTCHN_DEV_NAME); + + reopen: + eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR); + if (eventchn_fd == -1) { + if ((errno == ENOENT) && + ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) && + (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600, + makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0)) + goto reopen; + return -errno; + } + + if (eventchn_fd < 0) + perror("Failed to open evtchn device"); + + bind.virq = VIRQ_TBUF; + rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind); + if (rc == -1) + perror("Failed to bind to domain exception virq port"); + virq_port = rc; + + return eventchn_fd; +} + +void wait_for_event(void) +{ + int ret; + fd_set inset; + evtchn_port_t port; + struct timeval tv; + + if (eventchn_fd < 0) { + nanosleep(&opts.poll_sleep, NULL); + return; + } + + FD_ZERO(&inset); + FD_SET(eventchn_fd, &inset); + tv.tv_sec = 1; + tv.tv_usec = 0; + // tv = millis_to_timespec(&opts.poll_sleep); + ret = select(eventchn_fd+1, &inset, NULL, NULL, &tv); + + if ( (ret == 1) && FD_ISSET(eventchn_fd, &inset)) { + if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port)) + perror("Failed to read from event fd"); + + // if (port == virq_port) + // printf("got the event I was looking for\r\n"); + + if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port)) + perror("Failed to write to event fd"); + } +} + +void enable_tracing_or_die(int xc_handle) +{ + int enable = 1; + int tbsize = DEFAULT_TBUF_SIZE; + + if (xc_tbuf_enable(xc_handle, enable) != 0) { + if (xc_tbuf_set_size(xc_handle, tbsize) != 0) { + perror("set_size Hypercall failure"); + exit(1); + } + printf("Set default trace buffer allocation (%d pages)\n", tbsize); + if (xc_tbuf_enable(xc_handle, enable) != 0) { + perror("Could not enable trace buffers\n"); + exit(1); + } + } + else + printf("Tracing enabled\n"); +} + +void disable_tracing(void) +{ + int enable = 0; + int xc_handle = xc_interface_open(); + + xc_tbuf_enable(xc_handle, enable); + xc_interface_close(xc_handle); +} /** @@ -258,6 +369,17 @@ int ret; dom0_op_t op; /* dom0 op we'll build */ int xc_handle = xc_interface_open(); /* for accessing control interface */ + unsigned int tbsize; + + enable_tracing_or_die(xc_handle); + + if (xc_tbuf_get_size(xc_handle, &tbsize) != 0) { + perror("Failure to get tbuf info from Xen. Guess size is 0?"); + exit(1); + } + else + printf("Current tbuf size: 0x%x\n", tbsize); + op.cmd = DOM0_TBUFCONTROL; op.interface_version = DOM0_INTERFACE_VERSION; @@ -448,6 +570,11 @@ meta = init_bufs_ptrs (tbufs_mapped, num, size); data = init_rec_ptrs(meta, num); + // Set up event channel for select() + if (eventchn_init() < 0) { + fprintf(stderr, "Failed to initialize event channel; Using POLL method\r\n"); + } + /* now, scan buffers for events */ while ( !interrupted ) { @@ -460,7 +587,8 @@ meta[i]->cons++; } - nanosleep(&opts.poll_sleep, NULL); + wait_for_event(); + wakeups++; } /* cleanup */ @@ -640,6 +768,7 @@ dump_stats(); msync(new_qos, sizeof(_new_qos_data), MS_SYNC); + disable_tracing(); return ret; } @@ -737,7 +866,9 @@ start = new_qos->domain_info[id].start_time; if (start > now) { // wrapped around run_time = now + (~0ULL - start); - printf("warning: start > now\n"); + // this could happen if there is nothing going on within a cpu; + // in this case the idle domain would run forever + // printf("warning: start > now\n"); } else run_time = now - start; @@ -746,11 +877,11 @@ new_qos->domain_info[id].ns_oncpu_since_boot += run_time; new_qos->domain_info[id].start_time = now; new_qos->domain_info[id].ns_since_boot += time_since_update; -#if 1 + new_qos->qdata[n].ns_gotten[id] += run_time; - if (domid == 0 && cpu == 1) - printf("adding run time for dom0 on cpu1\r\n"); -#endif + // if (domid == 0 && cpu == 1) + // printf("adding run time for dom0 on cpu1\r\n"); + } new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid); @@ -916,13 +1047,13 @@ { int id = ID(domid); + qos_update_thread_stats(cpu, domid, now); + if (domain_runnable(id)) // double call? return; new_qos->domain_info[id].runnable = 1; update_blocked_time(domid, now); - qos_update_thread_stats(cpu, domid, now); - new_qos->domain_info[id].blocked_start_time = 0; /* invalidate */ new_qos->domain_info[id].runnable_start_time = now; // runnable_start_time[id] = now; @@ -951,7 +1082,7 @@ if (domid == IDLE_DOMAIN_ID) domid = NDOMAINS-1; if (domid < 0 || domid >= NDOMAINS) { - printf("bad domain id: %d\n", domid); + printf("bad domain id: %d\r\n", domid); return 0; } if (new_qos->domain_info[domid].in_use == 0) diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenmon/xenbaked.h --- a/tools/xenmon/xenbaked.h Sat Apr 15 18:25:09 2006 +++ b/tools/xenmon/xenbaked.h Sat Apr 15 18:25:21 2006 @@ -1,5 +1,5 @@ /****************************************************************************** - * tools/xenbaked.h + * TOOLS/xenbaked.h * * Header file for xenbaked * @@ -30,6 +30,7 @@ #define million 1000000LL #define billion 1000000000LL +// caution: don't use QOS_ADD with negative numbers! #define QOS_ADD(N,A) ((N+A)<(NSAMPLES-1) ? (N+A) : A) #define QOS_INCR(N) ((N<(NSAMPLES-2)) ? (N+1) : 0) #define QOS_DECR(N) ((N==0) ? (NSAMPLES-1) : (N-1)) @@ -43,6 +44,8 @@ /* Number of data points to keep */ #define NSAMPLES 100 +#define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X) +#define DEFAULT_TBUF_SIZE 20 // per domain stuff typedef struct diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenmon/xenmon.py --- a/tools/xenmon/xenmon.py Sat Apr 15 18:25:09 2006 +++ b/tools/xenmon/xenmon.py Sat Apr 15 18:25:21 2006 @@ -5,7 +5,7 @@ # There is a curses interface for live monitoring. XenMon also allows # logging to a file. For options, run python xenmon.py -h # -# Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins +# Copyright (C) 2005,2006 by Hewlett Packard, Palo Alto and Fort Collins # Authors: Lucy Cherkasova, lucy.cherkasova@xxxxxx # Rob Gardner, rob.gardner@xxxxxx # Diwaker Gupta, diwaker.gupta@xxxxxx @@ -85,6 +85,33 @@ parser.add_option("--ms_per_sample", dest="mspersample", action="store", type="int", default=100, help = "determines how many ms worth of data goes in a sample") + parser.add_option("--cpu", dest="cpu", action="store", type="int", default=0, + help = "specifies which cpu to display data for") + + parser.add_option("--allocated", dest="allocated", action="store_true", + default=False, help="Display allocated time for each domain") + parser.add_option("--noallocated", dest="allocated", action="store_false", + default=False, help="Don't display allocated time for each domain") + + parser.add_option("--blocked", dest="blocked", action="store_true", + default=True, help="Display blocked time for each domain") + parser.add_option("--noblocked", dest="blocked", action="store_false", + default=True, help="Don't display blocked time for each domain") + + parser.add_option("--waited", dest="waited", action="store_true", + default=True, help="Display waiting time for each domain") + parser.add_option("--nowaited", dest="waited", action="store_false", + default=True, help="Don't display waiting time for each domain") + + parser.add_option("--excount", dest="excount", action="store_true", + default=False, help="Display execution count for each domain") + parser.add_option("--noexcount", dest="excount", action="store_false", + default=False, help="Don't display execution count for each domain") + parser.add_option("--iocount", dest="iocount", action="store_true", + default=False, help="Display I/O count for each domain") + parser.add_option("--noiocount", dest="iocount", action="store_false", + default=False, help="Don't display I/O count for each domain") + return parser # encapsulate information about a domain @@ -227,19 +254,17 @@ # the live monitoring code -def show_livestats(): - cpu = 0 # cpu of interest to display data for +def show_livestats(cpu): ncpu = 1 # number of cpu's on this platform slen = 0 # size of shared data structure, incuding padding - global dom_in_use + cpu_1sec_usage = 0.0 + cpu_10sec_usage = 0.0 + heartbeat = 1 + global dom_in_use, options # mmap the (the first chunk of the) file shmf = open(SHM_FILE, "r+") shm = mmap.mmap(shmf.fileno(), QOS_DATA_SIZE) - - samples = [] - doms = [] - dom_in_use = [] # initialize curses stdscr = _c.initscr() @@ -253,7 +278,8 @@ # display in a loop while True: - for cpuidx in range(0, ncpu): + cpuidx = 0 + while cpuidx < ncpu: # calculate offset in mmap file to start from idx = cpuidx * slen @@ -261,6 +287,7 @@ samples = [] doms = [] + dom_in_use = [] # read in data for i in range(0, NSAMPLES): @@ -279,6 +306,8 @@ # dom_in_use.append(in_use) dom_in_use.append(dom[8]) idx += len +# print "dom_in_use(cpu=%d): " % cpuidx, dom_in_use + len = struct.calcsize("4i") oldncpu = ncpu @@ -294,6 +323,8 @@ # stop examining mmap data and start displaying stuff if cpuidx == cpu: break + + cpuidx = cpuidx + 1 # calculate starting and ending datapoints; never look at "next" since # it represents live data that may be in transition. @@ -312,12 +343,15 @@ row = 0 display(stdscr, row, 1, "CPU = %d" % cpu, _c.A_STANDOUT) - display(stdscr, row, 10, "%sLast 10 seconds%sLast 1 second" % (6*' ', 30*' '), _c.A_BOLD) + display(stdscr, row, 10, "%sLast 10 seconds (%3.2f%%)%sLast 1 second (%3.2f%%)" % (6*' ', cpu_10sec_usage, 30*' ', cpu_1sec_usage), _c.A_BOLD) row +=1 display(stdscr, row, 1, "%s" % ((maxx-2)*'=')) total_h1_cpu = 0 total_h2_cpu = 0 + + cpu_1sec_usage = 0.0 + cpu_10sec_usage = 0.0 for dom in range(0, NDOMAINS): if not dom_in_use[dom]: @@ -332,92 +366,102 @@ display(stdscr, row, col, "%s" % time_scale(h2[dom][0][0])) col += 12 display(stdscr, row, col, "%3.2f%%" % h2[dom][0][1]) + if dom != NDOMAINS - 1: + cpu_10sec_usage += h2[dom][0][1] col += 12 display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][0][2])) col += 18 display(stdscr, row, col, "%s" % time_scale(h1[dom][0][0])) col += 12 - display(stdscr, row, col, "%3.2f%%" % h1[dom][0][1]) + display(stdscr, row, col, "%3.2f%%" % h1[dom][0][1], _c.A_STANDOUT) col += 12 display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][0][2])) col += 18 display(stdscr, row, col, "Gotten") + + if dom != NDOMAINS - 1: + cpu_1sec_usage = cpu_1sec_usage + h1[dom][0][1] # display allocated - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 28 - display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1])) - col += 42 - display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][1])) - col += 18 - display(stdscr, row, col, "Allocated") + if options.allocated: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 28 + display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1])) + col += 42 + display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][1])) + col += 18 + display(stdscr, row, col, "Allocated") # display blocked - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 4 - display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h2[dom][2][1]) - col += 12 - display(stdscr, row, col, "%s/io" % time_scale(h2[dom][2][2])) - col += 18 - display(stdscr, row, col, "%s" % time_scale(h1[dom][2][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h1[dom][2][1]) - col += 12 - display(stdscr, row, col, "%s/io" % time_scale(h1[dom][2][2])) - col += 18 - display(stdscr, row, col, "Blocked") + if options.blocked: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h2[dom][2][1]) + col += 12 + display(stdscr, row, col, "%s/io" % time_scale(h2[dom][2][2])) + col += 18 + display(stdscr, row, col, "%s" % time_scale(h1[dom][2][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h1[dom][2][1]) + col += 12 + display(stdscr, row, col, "%s/io" % time_scale(h1[dom][2][2])) + col += 18 + display(stdscr, row, col, "Blocked") # display waited - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 4 - display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h2[dom][3][1]) - col += 12 - display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][3][2])) - col += 18 - display(stdscr, row, col, "%s" % time_scale(h1[dom][3][0])) - col += 12 - display(stdscr, row, col, "%3.2f%%" % h1[dom][3][1]) - col += 12 - display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][3][2])) - col += 18 - display(stdscr, row, col, "Waited") + if options.waited: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h2[dom][3][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][3][2])) + col += 18 + display(stdscr, row, col, "%s" % time_scale(h1[dom][3][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h1[dom][3][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][3][2])) + col += 18 + display(stdscr, row, col, "Waited") # display ex count - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - - col += 28 - display(stdscr, row, col, "%d/s" % h2[dom][4]) - col += 42 - display(stdscr, row, col, "%d" % h1[dom][4]) - col += 18 - display(stdscr, row, col, "Execution count") + if options.excount: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + + col += 28 + display(stdscr, row, col, "%d/s" % h2[dom][4]) + col += 42 + display(stdscr, row, col, "%d" % h1[dom][4]) + col += 18 + display(stdscr, row, col, "Execution count") # display io count - row += 1 - col = 2 - display(stdscr, row, col, "%d" % dom) - col += 4 - display(stdscr, row, col, "%d/s" % h2[dom][5][0]) - col += 24 - display(stdscr, row, col, "%d/ex" % h2[dom][5][1]) - col += 18 - display(stdscr, row, col, "%d" % h1[dom][5][0]) - col += 24 - display(stdscr, row, col, "%3.2f/ex" % h1[dom][5][1]) - col += 18 - display(stdscr, row, col, "I/O Count") + if options.iocount: + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%d/s" % h2[dom][5][0]) + col += 24 + display(stdscr, row, col, "%d/ex" % h2[dom][5][1]) + col += 18 + display(stdscr, row, col, "%d" % h1[dom][5][0]) + col += 24 + display(stdscr, row, col, "%3.2f/ex" % h1[dom][5][1]) + col += 18 + display(stdscr, row, col, "I/O Count") #row += 1 #stdscr.hline(row, 1, '-', maxx - 2) @@ -426,6 +470,9 @@ row += 1 + star = heartbeat * '*' + heartbeat = 1 - heartbeat + display(stdscr, row, 1, star) display(stdscr, row, 2, TOTALS % (total_h2_cpu, total_h1_cpu)) row += 1 # display(stdscr, row, 2, @@ -515,10 +562,10 @@ outfiles[dom].delayed_write("# passed cpu dom cpu(tot) cpu(%) cpu/ex allocated/ex blocked(tot) blocked(%) blocked/io waited(tot) waited(%) waited/ex ex/s io(tot) io/ex\n") while options.duration == 0 or interval < (options.duration * 1000): - for cpuidx in range(0, ncpu): + cpuidx = 0 + while cpuidx < ncpu: idx = cpuidx * slen # offset needed in mmap file - samples = [] doms = [] @@ -571,6 +618,7 @@ curr = time.time() interval += (curr - last) * 1000 last = curr + cpuidx = cpuidx + 1 time.sleep(options.interval / 1000.0) for dom in range(0, NDOMAINS): @@ -601,7 +649,7 @@ start_xenbaked() if options.live: - show_livestats() + show_livestats(options.cpu) else: try: writelog() diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Sat Apr 15 18:25:09 2006 +++ b/tools/xenstore/xenstored_core.c Sat Apr 15 18:25:21 2006 @@ -77,6 +77,10 @@ } while (0) +int quota_nb_entry_per_domain = 1000; +int quota_nb_watch_per_domain = 128; +int quota_max_entry_size = 2048; /* 2K */ + #ifdef TESTING static bool failtest = false; @@ -455,6 +459,10 @@ data.dsize = 3*sizeof(uint32_t) + node->num_perms*sizeof(node->perms[0]) + node->datalen + node->childlen; + + if (data.dsize >= quota_max_entry_size) + goto error; + data.dptr = talloc_size(node, data.dsize); ((uint32_t *)data.dptr)[0] = node->num_perms; ((uint32_t *)data.dptr)[1] = node->datalen; @@ -470,10 +478,12 @@ /* TDB should set errno, but doesn't even set ecode AFAICT. */ if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) { corrupt(conn, "Write of %s = %s failed", key, data); - errno = ENOSPC; - return false; + goto error; } return true; + error: + errno = ENOSPC; + return false; } static enum xs_perm_type perm_for_conn(struct connection *conn, @@ -765,8 +775,11 @@ key.dptr = (void *)node->name; key.dsize = strlen(node->name); - if (tdb_delete(tdb_context(conn), key) != 0) + if (tdb_delete(tdb_context(conn), key) != 0) { corrupt(conn, "Could not delete '%s'", node->name); + return; + } + domain_entry_dec(conn); } /* Must not be / */ @@ -788,7 +801,10 @@ parent = construct_node(conn, parentname); if (!parent) return NULL; - + + if (domain_entry(conn) >= quota_nb_entry_per_domain) + return NULL; + /* Add child to parent. */ base = basename(name); baselen = strlen(base) + 1; @@ -814,6 +830,7 @@ node->children = node->data = NULL; node->childlen = node->datalen = 0; node->parent = parent; + domain_entry_inc(conn); return node; } @@ -848,8 +865,10 @@ /* We write out the nodes down, setting destructor in case * something goes wrong. */ for (i = node; i; i = i->parent) { - if (!write_node(conn, i)) + if (!write_node(conn, i)) { + domain_entry_dec(conn); return NULL; + } talloc_set_destructor(i, destroy_node); } @@ -1706,6 +1725,9 @@ " --no-fork to request that the daemon does not fork,\n" " --output-pid to request that the pid of the daemon is output,\n" " --trace-file <file> giving the file for logging, and\n" +" --entry-nb <nb> limit the number of entries per domain,\n" +" --entry-size <size> limit the size of entry per domain, and\n" +" --entry-watch <nb> limit the number of watches per domain,\n" " --no-recovery to request that no recovery should be attempted when\n" " the store is corrupted (debug only),\n" " --preserve-local to request that /local is preserved on start-up,\n" @@ -1715,14 +1737,17 @@ static struct option options[] = { { "no-domain-init", 0, NULL, 'D' }, + { "entry-nb", 1, NULL, 'E' }, { "pid-file", 1, NULL, 'F' }, { "help", 0, NULL, 'H' }, { "no-fork", 0, NULL, 'N' }, { "output-pid", 0, NULL, 'P' }, + { "entry-size", 1, NULL, 'S' }, { "trace-file", 1, NULL, 'T' }, { "no-recovery", 0, NULL, 'R' }, { "preserve-local", 0, NULL, 'L' }, { "verbose", 0, NULL, 'V' }, + { "watch-nb", 1, NULL, 'W' }, { NULL, 0, NULL, 0 } }; extern void dump_conn(struct connection *conn); @@ -1737,11 +1762,14 @@ bool no_domain_init = false; const char *pidfile = NULL; - while ((opt = getopt_long(argc, argv, "DF:HNPT:RLV", options, + while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options, NULL)) != -1) { switch (opt) { case 'D': no_domain_init = true; + break; + case 'E': + quota_nb_entry_per_domain = strtol(optarg, NULL, 10); break; case 'F': pidfile = optarg; @@ -1761,11 +1789,17 @@ case 'L': remove_local = false; break; + case 'S': + quota_max_entry_size = strtol(optarg, NULL, 10); + break; case 'T': tracefile = optarg; break; case 'V': verbose = true; + break; + case 'W': + quota_nb_watch_per_domain = strtol(optarg, NULL, 10); break; } } diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Sat Apr 15 18:25:09 2006 +++ b/tools/xenstore/xenstored_domain.c Sat Apr 15 18:25:21 2006 @@ -74,6 +74,12 @@ /* Have we noticed that this domain is shutdown? */ int shutdown; + + /* number of entry from this domain in the store */ + int nbentry; + + /* number of watch for this domain */ + int nbwatch; }; static LIST_HEAD(domains); @@ -285,6 +291,8 @@ domain->conn->id = domid; domain->remote_port = port; + domain->nbentry = 0; + domain->nbwatch = 0; return domain; } @@ -562,6 +570,50 @@ return eventchn_fd; } +void domain_entry_inc(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + conn->domain->nbentry++; +} + +void domain_entry_dec(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + if (conn->domain->nbentry) + conn->domain->nbentry--; +} + +int domain_entry(struct connection *conn) +{ + return (conn && conn->domain && conn->domain->domid) + ? conn->domain->nbentry + : 0; +} + +void domain_watch_inc(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + conn->domain->nbwatch++; +} + +void domain_watch_dec(struct connection *conn) +{ + if (!conn || !conn->domain) + return; + if (conn->domain->nbwatch) + conn->domain->nbwatch--; +} + +int domain_watch(struct connection *conn) +{ + return (conn && conn->domain && conn->domain->domid) + ? conn->domain->nbwatch + : 0; +} + /* * Local variables: * c-file-style: "linux" diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenstore/xenstored_domain.h --- a/tools/xenstore/xenstored_domain.h Sat Apr 15 18:25:09 2006 +++ b/tools/xenstore/xenstored_domain.h Sat Apr 15 18:25:21 2006 @@ -47,4 +47,12 @@ bool domain_can_read(struct connection *conn); bool domain_can_write(struct connection *conn); +/* Quota manipulation */ +void domain_entry_inc(struct connection *conn); +void domain_entry_dec(struct connection *conn); +int domain_entry(struct connection *conn); +void domain_watch_inc(struct connection *conn); +void domain_watch_dec(struct connection *conn); +int domain_watch(struct connection *conn); + #endif /* _XENSTORED_DOMAIN_H */ diff -r 83eb8d81c96f -r 91da9a1b7196 tools/xenstore/xenstored_watch.c --- a/tools/xenstore/xenstored_watch.c Sat Apr 15 18:25:09 2006 +++ b/tools/xenstore/xenstored_watch.c Sat Apr 15 18:25:21 2006 @@ -32,6 +32,8 @@ #include "xenstored_test.h" #include "xenstored_domain.h" +extern int quota_nb_watch_per_domain; + struct watch { /* Watches on this connection */ @@ -135,6 +137,11 @@ } } + if (domain_watch(conn) > quota_nb_watch_per_domain) { + send_error(conn, E2BIG); + return; + } + watch = talloc(conn, struct watch); watch->node = talloc_strdup(watch, vec[0]); watch->token = talloc_strdup(watch, vec[1]); @@ -145,6 +152,7 @@ INIT_LIST_HEAD(&watch->events); + domain_watch_inc(conn); list_add_tail(&watch->list, &conn->watches); trace_create(watch, "watch"); talloc_set_destructor(watch, destroy_watch); @@ -169,6 +177,7 @@ if (streq(watch->node, node) && streq(watch->token, vec[1])) { list_del(&watch->list); talloc_free(watch); + domain_watch_dec(conn); send_ack(conn, XS_UNWATCH); return; } diff -r 83eb8d81c96f -r 91da9a1b7196 xen/Makefile --- a/xen/Makefile Sat Apr 15 18:25:09 2006 +++ b/xen/Makefile Sat Apr 15 18:25:21 2006 @@ -10,19 +10,22 @@ .PHONY: default default: build -ifeq ($(XEN_ROOT),) +.PHONY: dist +dist: install -.PHONY: build install clean -build install clean: - make -f Rules.mk $@ +.PHONY: debug +debug: + objdump -D -S $(TARGET)-syms > $(TARGET).s -else +.PHONY: build install clean cscope TAGS tags +build install clean cscope TAGS tags:: + make -f Rules.mk _$@ -.PHONY: build -build: $(TARGET).gz +.PHONY: _build +_build: $(TARGET).gz -.PHONY: install -install: $(TARGET).gz +.PHONY: _install +_install: $(TARGET).gz [ -d $(DESTDIR)/boot ] || $(INSTALL_DIR) $(DESTDIR)/boot $(INSTALL_DATA) $(TARGET).gz $(DESTDIR)/boot/$(notdir $(TARGET))-$(XEN_FULLVERSION).gz ln -f -s $(notdir $(TARGET))-$(XEN_FULLVERSION).gz $(DESTDIR)/boot/$(notdir $(TARGET))-$(XEN_VERSION).$(XEN_SUBVERSION).gz @@ -35,8 +38,8 @@ $(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io $(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen -.PHONY: clean -clean:: delete-unfresh-files +.PHONY: _clean +_clean: delete-unfresh-files $(MAKE) -C tools clean $(MAKE) -f $(BASEDIR)/Rules.mk -C common clean $(MAKE) -f $(BASEDIR)/Rules.mk -C drivers clean @@ -45,15 +48,6 @@ rm -f include/asm *.o $(TARGET)* *~ core rm -f include/asm-*/asm-offsets.h rm -f include/xen/acm_policy.h - -endif - -.PHONY: dist -dist: install - -.PHONY: debug -debug: - objdump -D -S $(TARGET)-syms > $(TARGET).s $(TARGET).gz: $(TARGET) gzip -f -9 < $< > $@.new @@ -135,16 +129,16 @@ find $(SUBDIRS) -name SCCS -prune -o -name '*.[chS]' -print ) endef -.PHONY: TAGS -TAGS: +.PHONY: _TAGS +_TAGS: $(all_sources) | etags - -.PHONY: tags -tags: +.PHONY: _tags +_tags: $(all_sources) | xargs ctags -.PHONY: cscope -cscope: +.PHONY: _cscope +_cscope: $(all_sources) > cscope.files cscope -k -b -q diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/Makefile Sat Apr 15 18:25:21 2006 @@ -76,6 +76,7 @@ $(HOSTCC) $(HOSTCFLAGS) -o $@ $< shadow_guest32.o: shadow.c +shadow_guest32pae.o: shadow.c .PHONY: clean clean:: diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/audit.c --- a/xen/arch/x86/audit.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/audit.c Sat Apr 15 18:25:21 2006 @@ -639,7 +639,7 @@ void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn) { int i; - active_grant_entry_t *act = d->grant_table->active; + struct active_grant_entry *act = d->grant_table->active; spin_lock(&d->grant_table->lock); diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/hvm/intercept.c Sat Apr 15 18:25:21 2006 @@ -208,8 +208,9 @@ static void pit_cal_count(struct hvm_virpit *vpit) { - u64 nsec_delta = (unsigned int)((NOW() - vpit->inject_point)); - + u64 nsec_delta = (unsigned int)((NOW() - vpit->count_point)); + + nsec_delta += vpit->count_advance; if (nsec_delta > vpit->period) HVM_DBG_LOG(DBG_LEVEL_1, "HVM_PIT: long time has passed from last injection!"); diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/hvm/svm/intr.c Sat Apr 15 18:25:21 2006 @@ -79,7 +79,8 @@ } else { vpit->pending_intr_nr--; } - vpit->inject_point = NOW(); + vpit->count_advance = 0; + vpit->count_point = NOW(); vpit->last_pit_gtime += vpit->period_cycles; svm_set_guest_time(v, vpit->last_pit_gtime); diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/hvm/svm/svm.c Sat Apr 15 18:25:21 2006 @@ -315,19 +315,29 @@ { case MSR_EFER: #ifdef __x86_64__ - if ((msr_content & EFER_LME) ^ test_bit(SVM_CPU_STATE_LME_ENABLED, - &vc->arch.hvm_svm.cpu_state)) + /* offending reserved bit will cause #GP */ + if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) ) { - if (test_bit(SVM_CPU_STATE_PG_ENABLED, &vc->arch.hvm_svm.cpu_state) - || !test_bit(SVM_CPU_STATE_PAE_ENABLED, - &vc->arch.hvm_svm.cpu_state)) + printk("trying to set reserved bit in EFER\n"); + svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0); + return 0; + } + + /* LME: 0 -> 1 */ + if ( msr_content & EFER_LME && + !test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state) ) + { + if ( svm_paging_enabled(vc) || + !test_bit(SVM_CPU_STATE_PAE_ENABLED, + &vc->arch.hvm_svm.cpu_state) ) { + printk("trying to set LME bit when " + "in paging mode or PAE bit is not set\n"); svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0); + return 0; } - } - - if (msr_content & EFER_LME) set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state); + } /* We have already recorded that we want LME, so it will be set * next time CR0 gets updated. So we clear that bit and continue. @@ -669,6 +679,7 @@ if ( vpit->first_injected && !v->domain->arch.hvm_domain.guest_time ) { v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v); + vpit->count_advance += (NOW() - vpit->count_point); stop_timer(&(vpit->pit_timer)); } } @@ -757,7 +768,8 @@ reset_stack_and_jump( svm_asm_do_resume ); } else { - printk("VCPU core pinned: %d to %d\n", v->arch.hvm_svm.launch_core, smp_processor_id() ); + printk("VCPU core pinned: %d to %d\n", + v->arch.hvm_svm.launch_core, smp_processor_id() ); v->arch.hvm_svm.launch_core = smp_processor_id(); svm_migrate_timers( v ); svm_do_resume( v ); @@ -922,6 +934,7 @@ clear_bit(X86_FEATURE_APIC, &edx); #if CONFIG_PAGING_LEVELS < 3 + clear_bit(X86_FEATURE_NX, &edx); clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); @@ -929,12 +942,14 @@ if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) { if ( !v->domain->arch.hvm_domain.pae_enabled ) - clear_bit(X86_FEATURE_PAE, &edx); + { + clear_bit(X86_FEATURE_PAE, &edx); + clear_bit(X86_FEATURE_NX, &edx); + } clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); } -#endif - +#endif /* Clear out reserved bits. */ ecx &= ~SVM_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */ clear_bit(X86_FEATURE_MWAIT & 31, &ecx); @@ -1312,8 +1327,7 @@ unsigned long mfn; int paging_enabled; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned long crn; - + ASSERT(vmcb); /* We don't want to lose PG. ET is reserved and should be always be 1*/ @@ -1358,35 +1372,37 @@ set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); vmcb->efer |= (EFER_LMA | EFER_LME); - -#if CONFIG_PAGING_LEVELS >= 4 - if (!shadow_set_guest_paging_levels(v->domain, 4)) + if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) ) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ } -#endif } else #endif /* __x86_64__ */ { #if CONFIG_PAGING_LEVELS >= 3 - if (!shadow_set_guest_paging_levels(v->domain, 2)) + /* seems it's a 32-bit or 32-bit PAE guest */ + if ( test_bit(SVM_CPU_STATE_PAE_ENABLED, + &v->arch.hvm_svm.cpu_state) ) { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ + /* The guest enables PAE first and then it enables PG, it is + * really a PAE guest */ + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); + } + } + else + { + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); /* need to take a clean path */ + } } #endif - } - - /* update CR4's PAE if needed */ - crn = vmcb->cr4; - if ((!(crn & X86_CR4_PAE)) - && test_bit(SVM_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_svm.cpu_state)) - { - HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n"); - vmcb->cr4 |= X86_CR4_PAE; } /* Now arch.guest_table points to machine physical. */ @@ -1402,7 +1418,16 @@ /* arch->shadow_table should hold the next CR3 for shadow */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n", v->arch.hvm_svm.cpu_cr3, mfn); - } + + return 1; + } + + if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled ) + if ( v->arch.hvm_svm.cpu_cr3 ) { + put_page(mfn_to_page(get_mfn_from_gpfn( + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))); + v->arch.guest_table = mk_pagetable(0); + } /* * SVM implements paged real-mode and when we return to real-mode @@ -1415,6 +1440,14 @@ return 0; } + clear_all_shadow_status( v->domain ); + set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); + vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); + } + else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) + { + /* we should take care of this kind of situation */ + clear_all_shadow_status(v->domain); set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); } @@ -1438,15 +1471,21 @@ { case 0: value = v->arch.hvm_svm.cpu_shadow_cr0; - break; + if (svm_dbg_on) + printk("CR0 read =%lx \n", value ); + break; case 2: value = vmcb->cr2; break; case 3: value = (unsigned long) v->arch.hvm_svm.cpu_cr3; - break; + if (svm_dbg_on) + printk("CR3 read =%lx \n", value ); + break; case 4: value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4; + if (svm_dbg_on) + printk( "CR4 read=%lx\n", value ); break; case 8: #if 0 @@ -1466,6 +1505,12 @@ } +static inline int svm_pgbit_test(struct vcpu *v) +{ + return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; +} + + /* * Write to control registers */ @@ -1486,12 +1531,15 @@ switch (cr) { case 0: + if (svm_dbg_on) + printk("CR0 write =%lx \n", value ); return svm_set_cr0(value); case 3: { unsigned long old_base_mfn, mfn; - + if (svm_dbg_on) + printk("CR3 write =%lx \n", value ); /* If paging is not enabled yet, simply copy the value to CR3. */ if (!svm_paging_enabled(v)) { v->arch.hvm_svm.cpu_cr3 = value; @@ -1533,19 +1581,104 @@ if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); + /* + * arch.shadow_table should now hold the next CR3 for shadow + */ +#if CONFIG_PAGING_LEVELS >= 3 + if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 ) + shadow_sync_all(v->domain); +#endif + v->arch.hvm_svm.cpu_cr3 = value; update_pagetables(v); - - /* arch.shadow_table should now hold the next CR3 for shadow*/ - v->arch.hvm_svm.cpu_cr3 = value; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); } break; } - case 4: - /* CR4 */ - if (value & X86_CR4_PAE) { + case 4: /* CR4 */ + { + if (svm_dbg_on) + printk( "write cr4=%lx, cr0=%lx\n", + value, v->arch.hvm_svm.cpu_shadow_cr0 ); + old_cr = v->arch.hvm_svm.cpu_shadow_cr4; + if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) ) + { + set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); + if ( svm_pgbit_test(v) ) + { + /* The guest is a 32-bit PAE guest. */ +#if CONFIG_PAGING_LEVELS >= 4 + unsigned long mfn, old_base_mfn; + + if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); /* need to take a clean path */ + } + + if ( !VALID_MFN(mfn = get_mfn_from_gpfn( + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) || + !get_page(mfn_to_page(mfn), v->domain) ) + { + printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3); + domain_crash_synchronous(); /* need to take a clean path */ + } + + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + + /* + * Now arch.guest_table points to machine physical. + */ + + v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + update_pagetables(v); + + HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", + (unsigned long) (mfn << PAGE_SHIFT)); + + vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); + + /* + * arch->shadow_table should hold the next CR3 for shadow + */ + + HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", + v->arch.hvm_svm.cpu_cr3, mfn); +#endif + } + else + { + /* The guest is a 64 bit or 32-bit PAE guest. */ +#if CONFIG_PAGING_LEVELS >= 4 + if ( (v->domain->arch.ops != NULL) && + v->domain->arch.ops->guest_paging_levels == PAGING_L2) + { + /* Seems the guest first enables PAE without enabling PG, + * it must enable PG after that, and it is a 32-bit PAE + * guest */ + + if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); + } + } + else + { + if ( !shadow_set_guest_paging_levels(v->domain, + PAGING_L4) ) + { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); + } + } +#endif + } + } + else if (value & X86_CR4_PAE) { set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); } else { if (test_bit(SVM_CPU_STATE_LMA_ENABLED, @@ -1555,7 +1688,6 @@ clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); } - old_cr = v->arch.hvm_svm.cpu_shadow_cr4; v->arch.hvm_svm.cpu_shadow_cr4 = value; vmcb->cr4 = value | SVM_CR4_HOST_MASK; @@ -1569,6 +1701,7 @@ shadow_sync_all(v->domain); } break; + } default: printk("invalid cr: %d\n", cr); @@ -1933,6 +2066,7 @@ vmcb->cr4 = SVM_CR4_HOST_MASK; v->arch.hvm_svm.cpu_shadow_cr4 = 0; + clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); /* This will jump to ROMBIOS */ vmcb->rip = 0xFFF0; @@ -1989,6 +2123,7 @@ vmcb->idtr.base = 0x00; vmcb->rax = 0; + vmcb->rsp = 0; return 0; } @@ -2280,7 +2415,8 @@ gpte.l1 = 0; __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) ); printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) ); - __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], sizeof(spte) ); + __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], + sizeof(spte) ); printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte)); } #endif /* SVM_WALK_GUEST_PAGES */ @@ -2313,6 +2449,17 @@ if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) { if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) + { + printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, gpa=%llx\n", + intercepts_counter, + exit_reasons[exit_reason], exit_reason, regs.cs, + (unsigned long long) regs.rip, + (unsigned long long) vmcb->exitinfo1, + (unsigned long long) vmcb->exitinfo2, + (unsigned long long) vmcb->exitintinfo.bytes, + (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) ); + } + else { printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", intercepts_counter, @@ -2320,12 +2467,12 @@ (unsigned long long) regs.rip, (unsigned long long) vmcb->exitinfo1, (unsigned long long) vmcb->exitinfo2, - (unsigned long long) vmcb->exitintinfo.bytes); + (unsigned long long) vmcb->exitintinfo.bytes ); } } - else if (svm_dbg_on - && exit_reason != VMEXIT_IOIO - && exit_reason != VMEXIT_INTR) + else if ( svm_dbg_on + && exit_reason != VMEXIT_IOIO + && exit_reason != VMEXIT_INTR) { if (exit_reasons[exit_reason]) @@ -2350,7 +2497,9 @@ } #ifdef SVM_WALK_GUEST_PAGES - if( exit_reason == VMEXIT_EXCEPTION_PF && ( ( vmcb->exitinfo2 == vmcb->rip )|| vmcb->exitintinfo.bytes) ) + if( exit_reason == VMEXIT_EXCEPTION_PF + && ( ( vmcb->exitinfo2 == vmcb->rip ) + || vmcb->exitintinfo.bytes) ) { if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) walk_shadow_and_guest_pt( vmcb->exitinfo2 ); @@ -2434,13 +2583,24 @@ */ break; + case VMEXIT_INIT: + /* + * Nothing to do, in fact we should never get to this point. + */ + break; + + case VMEXIT_EXCEPTION_BP: #ifdef XEN_DEBUGGER - case VMEXIT_EXCEPTION_BP: svm_debug_save_cpu_user_regs(®s); pdb_handle_exception(3, ®s, 1); svm_debug_restore_cpu_user_regs(®s); - break; +#else + if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) ) + domain_pause_for_debugger(); + else + svm_inject_exception(vmcb, TRAP_int3, 0, 0); #endif + break; case VMEXIT_EXCEPTION_NM: svm_do_no_device_fault(vmcb); diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/hvm/svm/vmcb.c Sat Apr 15 18:25:21 2006 @@ -257,7 +257,8 @@ /* CR3 is set in svm_final_setup_guest */ __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :); - arch_svm->cpu_shadow_cr4 = crn & ~(X86_CR4_PGE | X86_CR4_PSE); + crn &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); + arch_svm->cpu_shadow_cr4 = crn; vmcb->cr4 = crn | SVM_CR4_HOST_MASK; vmcb->rsp = 0; @@ -484,6 +485,7 @@ if ( vpit->first_injected ) { if ( v->domain->arch.hvm_domain.guest_time ) { svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time); + vpit->count_point = NOW(); v->domain->arch.hvm_domain.guest_time = 0; } pickup_deactive_ticks(vpit); diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/hvm/vmx/io.c Sat Apr 15 18:25:21 2006 @@ -84,7 +84,8 @@ } else { vpit->pending_intr_nr--; } - vpit->inject_point = NOW(); + vpit->count_advance = 0; + vpit->count_point = NOW(); vpit->last_pit_gtime += vpit->period_cycles; set_guest_time(v, vpit->last_pit_gtime); @@ -208,6 +209,7 @@ /* pick up the elapsed PIT ticks and re-enable pit_timer */ if ( vpit->first_injected ) { if ( v->domain->arch.hvm_domain.guest_time ) { + vpit->count_point = NOW(); set_guest_time(v, v->domain->arch.hvm_domain.guest_time); v->domain->arch.hvm_domain.guest_time = 0; } diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/hvm/vmx/vmx.c Sat Apr 15 18:25:21 2006 @@ -362,6 +362,7 @@ if ( vpit->first_injected && !v->domain->arch.hvm_domain.guest_time ) { v->domain->arch.hvm_domain.guest_time = get_guest_time(v); + vpit->count_advance += (NOW() - vpit->count_point); stop_timer(&(vpit->pit_timer)); } } diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Sat Apr 15 18:25:21 2006 @@ -94,6 +94,7 @@ ENTRY(vmx_asm_vmexit_handler) /* selectors are restored/saved by VMX */ HVM_SAVE_ALL_NOSEGREGS + call vmx_trace_vmexit call vmx_vmexit_handler jmp vmx_asm_do_resume @@ -114,6 +115,7 @@ /* vmx_restore_all_guest */ call vmx_intr_assist call vmx_load_cr2 + call vmx_trace_vmentry .endif /* * Check if we are going back to VMX-based VM diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/i8259.c Sat Apr 15 18:25:21 2006 @@ -318,7 +318,7 @@ * outb_p - this has to work on a wide range of PC hardware. */ outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ - outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_p(FIRST_LEGACY_VECTOR + 0, 0x21); /* ICW2: 8259A-1 IR0-7 */ outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ if (auto_eoi) outb_p(0x03, 0x21); /* master does Auto EOI */ @@ -326,7 +326,7 @@ outb_p(0x01, 0x21); /* master expects normal EOI */ outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ - outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_p(FIRST_LEGACY_VECTOR + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 */ outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode is to be investigated) */ diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/io_apic.c Sat Apr 15 18:25:21 2006 @@ -202,6 +202,18 @@ __modify_IO_APIC_irq(irq, 0x00008000, 0); } +/* mask = 1, trigger = 0 */ +static void __mask_and_edge_IO_APIC_irq (unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); +} + +/* mask = 0, trigger = 1 */ +static void __unmask_and_level_IO_APIC_irq (unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); +} + static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -657,11 +669,11 @@ } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; int assign_irq_vector(int irq) { - static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; + static int current_vector = FIRST_DYNAMIC_VECTOR, offset = 0; BUG_ON(irq >= NR_IRQ_VECTORS); if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) @@ -677,11 +689,11 @@ if (current_vector == 0x80) goto next; - if (current_vector >= FIRST_SYSTEM_VECTOR) { + if (current_vector > LAST_DYNAMIC_VECTOR) { offset++; if (!(offset%8)) return -ENOSPC; - current_vector = FIRST_DEVICE_VECTOR + offset; + current_vector = FIRST_DYNAMIC_VECTOR + offset; } vector_irq[current_vector] = irq; @@ -1321,10 +1333,25 @@ return 0; /* don't check for pending */ } +int ioapic_ack_new = 1; +static void setup_ioapic_ack(char *s) +{ + if ( !strcmp(s, "old") ) + ioapic_ack_new = 0; + else if ( !strcmp(s, "new") ) + ioapic_ack_new = 1; + else + printk("Unknown ioapic_ack value specified: '%s'\n", s); +} +custom_param("ioapic_ack", setup_ioapic_ack); + static void mask_and_ack_level_ioapic_irq (unsigned int irq) { unsigned long v; int i; + + if ( ioapic_ack_new ) + return; mask_IO_APIC_irq(irq); /* @@ -1363,7 +1390,47 @@ static void end_level_ioapic_irq (unsigned int irq) { - unmask_IO_APIC_irq(irq); + unsigned long v; + int i; + + if ( !ioapic_ack_new ) + { + unmask_IO_APIC_irq(irq); + return; + } + +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = IO_APIC_VECTOR(irq); + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } } static unsigned int startup_edge_ioapic_vector(unsigned int vector) @@ -1695,6 +1762,7 @@ io_apic_irqs = ~PIC_IRQS; printk("ENABLING IO-APIC IRQs\n"); + printk(" -> Using %s ACK method\n", ioapic_ack_new ? "new" : "old"); /* * Set up IO-APIC IRQ routing. @@ -1956,9 +2024,9 @@ return 0; } - if ( old_rte.vector >= FIRST_DEVICE_VECTOR ) + if ( old_rte.vector >= FIRST_DYNAMIC_VECTOR ) old_irq = vector_irq[old_rte.vector]; - if ( new_rte.vector >= FIRST_DEVICE_VECTOR ) + if ( new_rte.vector >= FIRST_DYNAMIC_VECTOR ) new_irq = vector_irq[new_rte.vector]; if ( (old_irq != new_irq) && (old_irq != -1) && IO_APIC_IRQ(old_irq) ) diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/irq.c Sat Apr 15 18:25:21 2006 @@ -148,8 +148,23 @@ u8 nr_guests; u8 in_flight; u8 shareable; + u8 ack_type; +#define ACKTYPE_NONE 0 /* No final acknowledgement is required */ +#define ACKTYPE_UNMASK 1 /* Unmask PIC hardware (from any CPU) */ +#define ACKTYPE_LAPIC_EOI 2 /* EOI on the CPU that was interrupted */ + cpumask_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */ struct domain *guest[IRQ_MAX_GUESTS]; } irq_guest_action_t; + +/* + * Stack of interrupts awaiting EOI on each CPU. These must be popped in + * order, as only the current highest-priority pending irq can be EOIed. + */ +static struct { + u8 vector; + u8 ready_to_end; +} pending_lapic_eoi[NR_CPUS][NR_VECTORS] __cacheline_aligned; +#define pending_lapic_eoi_sp(cpu) (pending_lapic_eoi[cpu][NR_VECTORS-1].vector) static void __do_IRQ_guest(int vector) { @@ -157,36 +172,153 @@ irq_desc_t *desc = &irq_desc[vector]; irq_guest_action_t *action = (irq_guest_action_t *)desc->action; struct domain *d; - int i; + int i, sp, cpu = smp_processor_id(); + + if ( unlikely(action->nr_guests == 0) ) + { + /* An interrupt may slip through while freeing a LAPIC_EOI irq. */ + ASSERT(action->ack_type == ACKTYPE_LAPIC_EOI); + desc->handler->end(vector); + return; + } + + if ( action->ack_type == ACKTYPE_LAPIC_EOI ) + { + sp = pending_lapic_eoi_sp(cpu); + ASSERT((sp == 0) || (pending_lapic_eoi[cpu][sp-1].vector < vector)); + ASSERT(sp < (NR_VECTORS-1)); + pending_lapic_eoi[cpu][sp].vector = vector; + pending_lapic_eoi[cpu][sp].ready_to_end = 0; + pending_lapic_eoi_sp(cpu) = sp+1; + cpu_set(cpu, action->cpu_eoi_map); + } for ( i = 0; i < action->nr_guests; i++ ) { d = action->guest[i]; - if ( !test_and_set_bit(irq, &d->pirq_mask) ) + if ( (action->ack_type != ACKTYPE_NONE) && + !test_and_set_bit(irq, &d->pirq_mask) ) action->in_flight++; send_guest_pirq(d, irq); } } +static void end_guest_irq(void *data) +{ + irq_desc_t *desc = data; + irq_guest_action_t *action = (irq_guest_action_t *)desc->action; + unsigned long flags; + int vector, sp, cpu = smp_processor_id(); + + vector = desc - irq_desc; + + spin_lock_irqsave(&desc->lock, flags); + + if ( (desc->status & IRQ_GUEST) && + (action->in_flight == 0) && + test_and_clear_bit(cpu, &action->cpu_eoi_map) ) + { + sp = pending_lapic_eoi_sp(cpu); + do { + ASSERT(sp > 0); + } while ( pending_lapic_eoi[cpu][--sp].vector != vector ); + ASSERT(!pending_lapic_eoi[cpu][sp].ready_to_end); + pending_lapic_eoi[cpu][sp].ready_to_end = 1; + } + + for ( ; ; ) + { + sp = pending_lapic_eoi_sp(cpu); + if ( (sp == 0) || !pending_lapic_eoi[cpu][sp-1].ready_to_end ) + { + spin_unlock_irqrestore(&desc->lock, flags); + return; + } + if ( pending_lapic_eoi[cpu][sp-1].vector != vector ) + { + spin_unlock(&desc->lock); + vector = pending_lapic_eoi[cpu][sp-1].vector; + desc = &irq_desc[vector]; + spin_lock(&desc->lock); + } + desc->handler->end(vector); + pending_lapic_eoi_sp(cpu) = sp-1; + } +} + int pirq_guest_unmask(struct domain *d) { - irq_desc_t *desc; - unsigned int pirq; - shared_info_t *s = d->shared_info; + irq_desc_t *desc; + irq_guest_action_t *action; + cpumask_t cpu_eoi_map = CPU_MASK_NONE; + unsigned int pirq, cpu = smp_processor_id(); + shared_info_t *s = d->shared_info; for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS); pirq < NR_PIRQS; pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) ) { - desc = &irq_desc[irq_to_vector(pirq)]; + desc = &irq_desc[irq_to_vector(pirq)]; + action = (irq_guest_action_t *)desc->action; + spin_lock_irq(&desc->lock); if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && - test_and_clear_bit(pirq, &d->pirq_mask) && - (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) - desc->handler->end(irq_to_vector(pirq)); + test_and_clear_bit(pirq, &d->pirq_mask) ) + { + ASSERT(action->ack_type != ACKTYPE_NONE); + if ( --action->in_flight == 0 ) + { + if ( action->ack_type == ACKTYPE_UNMASK ) + desc->handler->end(irq_to_vector(pirq)); + cpu_eoi_map = action->cpu_eoi_map; + } + } spin_unlock_irq(&desc->lock); - } - + + if ( __test_and_clear_bit(cpu, &cpu_eoi_map) ) + end_guest_irq(desc); + + if ( !cpus_empty(cpu_eoi_map) ) + { + on_selected_cpus(cpu_eoi_map, end_guest_irq, desc, 1, 0); + cpu_eoi_map = CPU_MASK_NONE; + } + } + + return 0; +} + +extern int ioapic_ack_new; +int pirq_acktype(int irq) +{ + irq_desc_t *desc; + unsigned int vector; + + vector = irq_to_vector(irq); + if ( vector == 0 ) + return ACKTYPE_NONE; + + desc = &irq_desc[vector]; + + /* + * Edge-triggered IO-APIC interrupts need no final acknowledgement: + * we ACK early during interrupt processing. + */ + if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ) + return ACKTYPE_NONE; + + /* Legacy PIC interrupts can be acknowledged from any CPU. */ + if ( !strcmp(desc->handler->typename, "XT-PIC") ) + return ACKTYPE_UNMASK; + + /* + * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU + * on which they were received. This is because we tickle the LAPIC to EOI. + */ + if ( !strcmp(desc->handler->typename, "IO-APIC-level") ) + return ioapic_ack_new ? ACKTYPE_LAPIC_EOI : ACKTYPE_UNMASK; + + BUG(); return 0; } @@ -230,10 +362,12 @@ goto out; } - action->nr_guests = 0; - action->in_flight = 0; - action->shareable = will_share; - + action->nr_guests = 0; + action->in_flight = 0; + action->shareable = will_share; + action->ack_type = pirq_acktype(irq); + action->cpu_eoi_map = CPU_MASK_NONE; + desc->depth = 0; desc->status |= IRQ_GUEST; desc->status &= ~IRQ_DISABLED; @@ -271,6 +405,7 @@ unsigned int vector = irq_to_vector(irq); irq_desc_t *desc = &irq_desc[vector]; irq_guest_action_t *action; + cpumask_t cpu_eoi_map; unsigned long flags; int i; @@ -280,28 +415,60 @@ action = (irq_guest_action_t *)desc->action; - if ( test_and_clear_bit(irq, &d->pirq_mask) && - (--action->in_flight == 0) ) - desc->handler->end(vector); - - if ( action->nr_guests == 1 ) - { - desc->action = NULL; - xfree(action); - desc->depth = 1; - desc->status |= IRQ_DISABLED; - desc->status &= ~IRQ_GUEST; - desc->handler->shutdown(vector); - } - else - { - i = 0; - while ( action->guest[i] && (action->guest[i] != d) ) - i++; - memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); - action->nr_guests--; - } - + i = 0; + while ( action->guest[i] && (action->guest[i] != d) ) + i++; + memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); + action->nr_guests--; + + switch ( action->ack_type ) + { + case ACKTYPE_UNMASK: + if ( test_and_clear_bit(irq, &d->pirq_mask) && + (--action->in_flight == 0) ) + desc->handler->end(vector); + break; + case ACKTYPE_LAPIC_EOI: + if ( test_and_clear_bit(irq, &d->pirq_mask) ) + --action->in_flight; + while ( action->in_flight == 0 ) + { + /* We cannot release guest info until all pending ACKs are done. */ + cpu_eoi_map = action->cpu_eoi_map; + if ( cpus_empty(cpu_eoi_map) ) + break; + + /* We cannot hold the lock while interrupting other CPUs. */ + spin_unlock_irqrestore(&desc->lock, flags); + on_selected_cpus(cpu_eoi_map, end_guest_irq, desc, 1, 1); + spin_lock_irqsave(&desc->lock, flags); + + /* The world can change while we do not hold the lock. */ + if ( !(desc->status & IRQ_GUEST) ) + goto out; + if ( (action->ack_type != ACKTYPE_LAPIC_EOI) || + (action->nr_guests != 0) ) + break; + } + break; + } + + BUG_ON(test_bit(irq, &d->pirq_mask)); + + if ( action->nr_guests != 0 ) + goto out; + + BUG_ON(action->in_flight != 0); + BUG_ON(!cpus_empty(action->cpu_eoi_map)); + + desc->action = NULL; + xfree(action); + desc->depth = 1; + desc->status |= IRQ_DISABLED; + desc->status &= ~IRQ_GUEST; + desc->handler->shutdown(vector); + + out: spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -373,3 +540,61 @@ return 0; } __initcall(setup_dump_irqs); + +static struct timer end_irq_timer[NR_CPUS]; + +static void end_irq_timeout(void *unused) +{ + irq_desc_t *desc; + irq_guest_action_t *action; + cpumask_t cpu_eoi_map; + unsigned int cpu = smp_processor_id(); + int sp, vector, i; + + local_irq_disable(); + + if ( (sp = pending_lapic_eoi_sp(cpu)) == 0 ) + { + local_irq_enable(); + return; + } + + vector = pending_lapic_eoi[cpu][sp-1].vector; + ASSERT(!pending_lapic_eoi[cpu][sp-1].ready_to_end); + + desc = &irq_desc[vector]; + spin_lock(&desc->lock); + action = (irq_guest_action_t *)desc->action; + ASSERT(action->ack_type == ACKTYPE_LAPIC_EOI); + ASSERT(desc->status & IRQ_GUEST); + for ( i = 0; i < action->nr_guests; i++ ) + clear_bit(vector_to_irq(vector), &action->guest[i]->pirq_mask); + action->in_flight = 0; + cpu_eoi_map = action->cpu_eoi_map; + spin_unlock(&desc->lock); + + local_irq_enable(); + + if ( !cpus_empty(cpu_eoi_map) ) + on_selected_cpus(cpu_eoi_map, end_guest_irq, desc, 1, 0); + + set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); +} + +static void __init __setup_irq_timeout(void *unused) +{ + int cpu = smp_processor_id(); + init_timer(&end_irq_timer[cpu], end_irq_timeout, NULL, cpu); + set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); +} + +static int force_intack; +boolean_param("force_intack", force_intack); + +static int __init setup_irq_timeout(void) +{ + if ( force_intack ) + on_each_cpu(__setup_irq_timeout, NULL, 1, 1); + return 0; +} +__initcall(setup_irq_timeout); diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/physdev.c Sat Apr 15 18:25:21 2006 @@ -18,6 +18,9 @@ extern int ioapic_guest_write( unsigned long physbase, unsigned int reg, u32 pval); +extern int +pirq_acktype( + int irq); /* * Demuxing hypercall. @@ -43,8 +46,7 @@ if ( (irq < 0) || (irq >= NR_IRQS) ) break; op.u.irq_status_query.flags = 0; - /* Edge-triggered interrupts don't need an explicit unmask downcall. */ - if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") ) + if ( pirq_acktype(irq) != 0 ) op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY; ret = 0; break; diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/shadow.c Sat Apr 15 18:25:21 2006 @@ -1531,14 +1531,10 @@ idx = get_cr3_idxval(v); smfn = __shadow_status( - d, ((unsigned long)(idx << PGT_score_shift) | entry->gpfn), PGT_l4_shadow); - -#ifndef NDEBUG + d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), PGT_l4_shadow); + if ( !smfn ) - { - BUG(); - } -#endif + continue; guest = (pgentry_64_t *)map_domain_page(entry->gmfn); snapshot = (pgentry_64_t *)map_domain_page(entry->snapshot_mfn); @@ -1550,9 +1546,35 @@ if ( entry_has_changed( guest[index], snapshot[index], PAGE_FLAG_MASK) ) { + unsigned long gpfn; + + /* + * Looks like it's no longer a page table. + */ + if ( unlikely(entry_get_value(guest[index]) & PAE_PDPT_RESERVED) ) + { + if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(shadow_l3[i])); + + shadow_l3[i] = entry_empty(); + continue; + } + + gpfn = entry_get_pfn(guest[index]); + + if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(shadow_l3[i])); + + shadow_l3[i] = entry_empty(); + continue; + } + validate_entry_change(d, &guest[index], &shadow_l3[i], PAGING_L3); } + if ( entry_get_value(guest[index]) != 0 ) max = i; @@ -1675,6 +1697,19 @@ guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) ) { int error; + +#if CONFIG_PAGING_LEVELS == 4 + unsigned long gpfn; + + gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT; + + if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + guest_l1_pgentry_t tmp_gl1e = guest_l1e_empty(); + validate_pte_change(d, tmp_gl1e, sl1e_p); + continue; + } +#endif error = validate_pte_change(d, guest1[i], sl1e_p); if ( error == -1 ) @@ -1698,6 +1733,7 @@ perfc_incrc(resync_l1); perfc_incr_histo(wpt_updates, changed, PT_UPDATES); perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES); + if ( d->arch.ops->guest_paging_levels >= PAGING_L3 && unshadow_l1 ) { pgentry_64_t l2e = { 0 }; @@ -1804,18 +1840,22 @@ for ( i = min_shadow; i <= max_shadow; i++ ) { if ( (i < min_snapshot) || (i > max_snapshot) || - entry_has_changed( - guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) ) + entry_has_changed( + guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) ) { - unsigned long gpfn; gpfn = entry_get_pfn(guest_pt[i]); /* - * Looks like it's longer a page table. + * Looks like it's no longer a page table. */ if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( entry_get_flags(shadow_pt[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(shadow_pt[i])); + shadow_pt[i] = entry_empty(); continue; + } need_flush |= validate_entry_change( d, &guest_pt[i], &shadow_pt[i], @@ -1864,11 +1904,17 @@ unsigned long gpfn; gpfn = l4e_get_pfn(new_root_e); + /* - * Looks like it's longer a page table. + * Looks like it's no longer a page table. */ if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( l4e_get_flags(shadow4[i]) & _PAGE_PRESENT ) + put_shadow_ref(l4e_get_pfn(shadow4[i])); + shadow4[i] = l4e_empty(); continue; + } if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) { @@ -2372,7 +2418,7 @@ if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) { u32 index = get_cr3_idxval(v); - gpfn = (index << PGT_score_shift) | gpfn; + gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn; } #endif @@ -3233,8 +3279,35 @@ int i; for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ ) + { + unsigned long gpfn; + + /* + * Looks like it's no longer a page table. + */ + if ( unlikely(entry_get_value(gple[index*4+i]) & PAE_PDPT_RESERVED) ) + { + if ( entry_get_flags(sple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(sple[i])); + + sple[i] = entry_empty(); + continue; + } + + gpfn = entry_get_pfn(gple[index*4+i]); + + if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) ) + { + if ( entry_get_flags(sple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(sple[i])); + + sple[i] = entry_empty(); + continue; + } + validate_entry_change( v->domain, &gple[index*4+i], &sple[i], PAGING_L3); + } unmap_domain_page(sple); } diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/shadow32.c Sat Apr 15 18:25:21 2006 @@ -583,6 +583,13 @@ { put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table)); v->arch.shadow_table = mk_pagetable(0); + + if ( shadow_mode_external(d) ) + { + if ( v->arch.shadow_vtable ) + unmap_domain_page_global(v->arch.shadow_vtable); + v->arch.shadow_vtable = NULL; + } } if ( v->arch.monitor_shadow_ref ) @@ -2886,7 +2893,7 @@ SH_VVLOG("shadow_fault( va=%lx, code=%lu )", va, (unsigned long)regs->error_code); perfc_incrc(shadow_fault_calls); - + check_pagetable(v, "pre-sf"); /* @@ -2917,7 +2924,16 @@ // the mapping is in-sync, so the check of the PDE's present bit, above, // covers this access. // - orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)]; + if ( __copy_from_user(&gpte, + &linear_pg_table[l1_linear_offset(va)], + sizeof(gpte)) ) { + printk("%s() failed, crashing domain %d " + "due to a unaccessible linear page table (gpde=%" PRIpte "), va=%lx\n", + __func__, d->domain_id, l2e_get_intpte(gpde), va); + domain_crash_synchronous(); + } + orig_gpte = gpte; + if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) ) { SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ") (gpde %" PRIpte ")", @@ -2928,7 +2944,7 @@ } /* Write fault? */ - if ( regs->error_code & 2 ) + if ( regs->error_code & 2 ) { int allow_writes = 0; @@ -2942,7 +2958,7 @@ else { /* Write fault on a read-only mapping. */ - SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", + SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", l1e_get_intpte(gpte)); perfc_incrc(shadow_fault_bail_ro_mapping); goto fail; @@ -2955,10 +2971,10 @@ } /* User access violation in guest? */ - if ( unlikely((regs->error_code & 4) && + if ( unlikely((regs->error_code & 4) && !(l1e_get_flags(gpte) & _PAGE_USER))) { - SH_VVLOG("shadow_fault - EXIT: wr fault on super page (%" PRIpte ")", + SH_VVLOG("shadow_fault - EXIT: wr fault on super page (%" PRIpte ")", l1e_get_intpte(gpte)); goto fail; @@ -2980,7 +2996,7 @@ /* Read-protection violation in guest? */ if ( unlikely((regs->error_code & 1) )) { - SH_VVLOG("shadow_fault - EXIT: read fault on super page (%" PRIpte ")", + SH_VVLOG("shadow_fault - EXIT: read fault on super page (%" PRIpte ")", l1e_get_intpte(gpte)); goto fail; diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/shadow_public.c Sat Apr 15 18:25:21 2006 @@ -102,6 +102,15 @@ int shadow_set_guest_paging_levels(struct domain *d, int levels) { + struct vcpu *v = current; + + /* + * Need to wait for VCPU0 to complete the on-going shadow ops. + */ + + if ( v->vcpu_id ) + return 1; + shadow_lock(d); switch(levels) { @@ -692,7 +701,6 @@ void free_shadow_page(unsigned long smfn) { struct page_info *page = mfn_to_page(smfn); - unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask; struct domain *d = page_get_owner(mfn_to_page(gmfn)); unsigned long gpfn = mfn_to_gmfn(d, gmfn); @@ -709,10 +717,9 @@ if ( !mfn ) gpfn |= (1UL << 63); } - if (d->arch.ops->guest_paging_levels == PAGING_L3) - if (type == PGT_l4_shadow ) { - gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_score_shift) | gpfn; - } + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + if ( type == PGT_l4_shadow ) + gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_pae_idx_shift) | gpfn; #endif delete_shadow_status(d, gpfn, gmfn, type); @@ -743,9 +750,24 @@ #if CONFIG_PAGING_LEVELS >= 3 case PGT_l2_shadow: case PGT_l3_shadow: + shadow_demote(d, gpfn, gmfn); + free_shadow_tables(d, smfn, shadow_type_to_level(type)); + d->arch.shadow_page_count--; + break; + case PGT_l4_shadow: gpfn = gpfn & PGT_mfn_mask; - shadow_demote(d, gpfn, gmfn); + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + { + /* + * Since a single PDPT page can have multiple PDPs, it's possible + * that shadow_demote() has been already called for gmfn. + */ + if ( mfn_is_page_table(gmfn) ) + shadow_demote(d, gpfn, gmfn); + } else + shadow_demote(d, gpfn, gmfn); + free_shadow_tables(d, smfn, shadow_type_to_level(type)); d->arch.shadow_page_count--; break; @@ -898,6 +920,13 @@ { put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table)); v->arch.shadow_table = mk_pagetable(0); + + if ( shadow_mode_external(d) ) + { + if ( v->arch.shadow_vtable ) + unmap_domain_page_global(v->arch.shadow_vtable); + v->arch.shadow_vtable = NULL; + } } if ( v->arch.monitor_shadow_ref ) @@ -2034,7 +2063,16 @@ void clear_all_shadow_status(struct domain *d) { + struct vcpu *v = current; + + /* + * Don't clean up while other vcpus are working. + */ + if ( v->vcpu_id ) + return; + shadow_lock(d); + free_shadow_pages(d); free_shadow_ht_entries(d); d->arch.shadow_ht = @@ -2047,6 +2085,7 @@ shadow_ht_buckets * sizeof(struct shadow_status)); free_out_of_sync_entries(d); + shadow_unlock(d); } diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/smp.c --- a/xen/arch/x86/smp.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/smp.c Sat Apr 15 18:25:21 2006 @@ -261,7 +261,7 @@ return on_selected_cpus(allbutself, func, info, retry, wait); } -extern int on_selected_cpus( +int on_selected_cpus( cpumask_t selected, void (*func) (void *info), void *info, diff -r 83eb8d81c96f -r 91da9a1b7196 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Sat Apr 15 18:25:09 2006 +++ b/xen/arch/x86/smpboot.c Sat Apr 15 18:25:21 2006 @@ -41,6 +41,7 @@ #include <xen/irq.h> #include <xen/delay.h> #include <xen/softirq.h> +#include <xen/serial.h> #include <asm/current.h> #include <asm/mc146818rtc.h> #include <asm/desc.h> @@ -1231,12 +1232,25 @@ void __init smp_intr_init(void) { + int irq, seridx; + /* * IRQ0 must be given a fixed assignment and initialized, * because it's used before the IO-APIC is set up. */ - irq_vector[0] = FIRST_DEVICE_VECTOR; - vector_irq[FIRST_DEVICE_VECTOR] = 0; + irq_vector[0] = FIRST_HIPRIORITY_VECTOR; + vector_irq[FIRST_HIPRIORITY_VECTOR] = 0; + + /* + * Also ensure serial interrupts are high priority. We do not + * want them to be blocked by unacknowledged guest-bound interrupts. + */ + for (seridx = 0; seridx < 2; seridx++) { + if ((irq = serial_irq(seridx)) < 0) + continue; + irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1; + vector_irq[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq; + } /* IPI for event checking. */ set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt); diff -r 83eb8d81c96f -r 91da9a1b7196 xen/common/dom0_ops.c --- a/xen/common/dom0_ops.c Sat Apr 15 18:25:09 2006 +++ b/xen/common/dom0_ops.c Sat Apr 15 18:25:21 2006 @@ -581,20 +581,31 @@ case DOM0_SETDOMAINMAXMEM: { struct domain *d; + unsigned long new_max; + ret = -ESRCH; d = find_domain_by_id(op->u.setdomainmaxmem.domain); - if ( d != NULL ) - { - d->max_pages = op->u.setdomainmaxmem.max_memkb >> (PAGE_SHIFT-10); - put_domain(d); + if ( d == NULL ) + break; + + ret = -EINVAL; + new_max = op->u.setdomainmaxmem.max_memkb >> (PAGE_SHIFT-10); + + spin_lock(&d->page_alloc_lock); + if ( new_max >= d->tot_pages ) + { + d->max_pages = new_max; ret = 0; } + spin_unlock(&d->page_alloc_lock); + + put_domain(d); } break; case DOM0_SETDOMAINHANDLE: { - struct domain *d; + struct domain *d; ret = -ESRCH; d = find_domain_by_id(op->u.setdomainhandle.domain); if ( d != NULL ) diff -r 83eb8d81c96f -r 91da9a1b7196 xen/common/grant_table.c --- a/xen/common/grant_table.c Sat Apr 15 18:25:09 2006 +++ b/xen/common/grant_table.c Sat Apr 15 18:25:21 2006 @@ -41,21 +41,21 @@ static inline int get_maptrack_handle( - grant_table_t *t) + struct grant_table *t) { unsigned int h; if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) ) return -1; - t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT; + t->maptrack_head = t->maptrack[h].ref; t->map_count++; return h; } static inline void put_maptrack_handle( - grant_table_t *t, int handle) -{ - t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT; + struct grant_table *t, int handle) +{ + t->maptrack[handle].ref = t->maptrack_head; t->maptrack_head = handle; t->map_count--; } @@ -76,7 +76,7 @@ int handle; unsigned long frame = 0; int rc = GNTST_okay; - active_grant_entry_t *act; + struct active_grant_entry *act; /* Entry details from @rd's shared grant table. */ grant_entry_t *sha; @@ -123,9 +123,9 @@ /* Get a maptrack handle. */ if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) { - int i; - grant_mapping_t *new_mt; - grant_table_t *lgt = ld->grant_table; + int i; + struct grant_mapping *new_mt; + struct grant_table *lgt = ld->grant_table; if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES ) { @@ -147,7 +147,7 @@ memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order); for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ ) - new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + new_mt[i].ref = i+1; free_xenheap_pages(lgt->maptrack, lgt->maptrack_order); lgt->maptrack = new_mt; @@ -264,10 +264,9 @@ TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom); - ld->grant_table->maptrack[handle].domid = op->dom; - ld->grant_table->maptrack[handle].ref_and_flags = - (op->ref << MAPTRACK_REF_SHIFT) | - (op->flags & MAPTRACK_GNTMAP_MASK); + ld->grant_table->maptrack[handle].domid = op->dom; + ld->grant_table->maptrack[handle].ref = op->ref; + ld->grant_table->maptrack[handle].flags = op->flags; op->dev_bus_addr = (u64)frame << PAGE_SHIFT; op->handle = handle; @@ -326,9 +325,9 @@ domid_t dom; grant_ref_t ref; struct domain *ld, *rd; - active_grant_entry_t *act; + struct active_grant_entry *act; grant_entry_t *sha; - grant_mapping_t *map; + struct grant_mapping *map; u16 flags; s16 rc = 0; unsigned long frame; @@ -340,7 +339,7 @@ map = &ld->grant_table->maptrack[op->handle]; if ( unlikely(op->handle >= ld->grant_table->maptrack_limit) || - unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) ) + unlikely(!map->flags) ) { DPRINTK("Bad handle (%d).\n", op->handle); op->status = GNTST_bad_handle; @@ -348,8 +347,8 @@ } dom = map->domid; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; - flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK; + ref = map->ref; + flags = map->flags; if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || unlikely(ld == rd) ) @@ -380,7 +379,7 @@ if ( flags & GNTMAP_device_map ) { ASSERT(act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask)); - map->ref_and_flags &= ~GNTMAP_device_map; + map->flags &= ~GNTMAP_device_map; if ( flags & GNTMAP_readonly ) { act->pin -= GNTPIN_devr_inc; @@ -401,7 +400,7 @@ goto unmap_out; ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)); - map->ref_and_flags &= ~GNTMAP_host_map; + map->flags &= ~GNTMAP_host_map; if ( flags & GNTMAP_readonly ) { act->pin -= GNTPIN_hstr_inc; @@ -414,9 +413,9 @@ } } - if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0 ) - { - map->ref_and_flags = 0; + if ( (map->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0 ) + { + map->flags = 0; put_maptrack_handle(ld->grant_table, op->handle); } @@ -534,12 +533,12 @@ gnttab_prepare_for_transfer( struct domain *rd, struct domain *ld, grant_ref_t ref) { - grant_table_t *rgt; - grant_entry_t *sha; - domid_t sdom; - u16 sflags; - u32 scombo, prev_scombo; - int retries = 0; + struct grant_table *rgt; + struct grant_entry *sha; + domid_t sdom; + u16 sflags; + u32 scombo, prev_scombo; + int retries = 0; if ( unlikely((rgt = rd->grant_table) == NULL) || unlikely(ref >= NR_GRANT_ENTRIES) ) @@ -775,10 +774,11 @@ grant_table_create( struct domain *d) { - grant_table_t *t; - int i; - - if ( (t = xmalloc(grant_table_t)) == NULL ) + struct grant_table *t; + int i; + + BUG_ON(MAPTRACK_MAX_ENTRIES < NR_GRANT_ENTRIES); + if ( (t = xmalloc(struct grant_table)) == NULL ) goto no_mem; /* Simple stuff. */ @@ -786,19 +786,19 @@ spin_lock_init(&t->lock); /* Active grant table. */ - if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES)) - == NULL ) + t->active = xmalloc_array(struct active_grant_entry, NR_GRANT_ENTRIES); + if ( t->active == NULL ) goto no_mem; - memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES); + memset(t->active, 0, sizeof(struct active_grant_entry) * NR_GRANT_ENTRIES); /* Tracking of mapped foreign frames table */ if ( (t->maptrack = alloc_xenheap_page()) == NULL ) goto no_mem; t->maptrack_order = 0; - t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t); + t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping); memset(t->maptrack, 0, PAGE_SIZE); for ( i = 0; i < t->maptrack_limit; i++ ) - t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + t->maptrack[i].ref = i+1; /* Shared grant table. */ t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES); @@ -828,27 +828,26 @@ gnttab_release_mappings( struct domain *d) { - grant_table_t *gt = d->grant_table; - grant_mapping_t *map; + struct grant_table *gt = d->grant_table; + struct grant_mapping *map; grant_ref_t ref; grant_handle_t handle; struct domain *rd; - active_grant_entry_t *act; - grant_entry_t *sha; + struct active_grant_entry *act; + struct grant_entry *sha; BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags)); for ( handle = 0; handle < gt->maptrack_limit; handle++ ) { map = >->maptrack[handle]; - if ( !(map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) ) + if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ) continue; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + ref = map->ref; DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n", - handle, ref, map->ref_and_flags & MAPTRACK_GNTMAP_MASK, - map->domid); + handle, ref, map->flags, map->domid); rd = find_domain_by_id(map->domid); BUG_ON(rd == NULL); @@ -858,16 +857,16 @@ act = &rd->grant_table->active[ref]; sha = &rd->grant_table->shared[ref]; - if ( map->ref_and_flags & GNTMAP_readonly ) - { - if ( map->ref_and_flags & GNTMAP_device_map ) + if ( map->flags & GNTMAP_readonly ) + { + if ( map->flags & GNTMAP_device_map ) { BUG_ON(!(act->pin & GNTPIN_devr_mask)); act->pin -= GNTPIN_devr_inc; put_page(mfn_to_page(act->frame)); } - if ( map->ref_and_flags & GNTMAP_host_map ) + if ( map->flags & GNTMAP_host_map ) { BUG_ON(!(act->pin & GNTPIN_hstr_mask)); act->pin -= GNTPIN_hstr_inc; @@ -877,14 +876,14 @@ } else { - if ( map->ref_and_flags & GNTMAP_device_map ) + if ( map->flags & GNTMAP_device_map ) { BUG_ON(!(act->pin & GNTPIN_devw_mask)); act->pin -= GNTPIN_devw_inc; put_page_and_type(mfn_to_page(act->frame)); } - if ( map->ref_and_flags & GNTMAP_host_map ) + if ( map->flags & GNTMAP_host_map ) { BUG_ON(!(act->pin & GNTPIN_hstw_mask)); act->pin -= GNTPIN_hstw_inc; @@ -903,7 +902,7 @@ put_domain(rd); - map->ref_and_flags = 0; + map->flags = 0; } } @@ -912,7 +911,7 @@ grant_table_destroy( struct domain *d) { - grant_table_t *t = d->grant_table; + struct grant_table *t = d->grant_table; if ( t == NULL ) return; diff -r 83eb8d81c96f -r 91da9a1b7196 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Sat Apr 15 18:25:09 2006 +++ b/xen/drivers/char/console.c Sat Apr 15 18:25:21 2006 @@ -65,11 +65,12 @@ #define COLUMNS 80 #define LINES 25 #define ATTRIBUTE 7 +#define VIDEO_SIZE (COLUMNS * LINES * 2) /* Clear the screen and initialize VIDEO, XPOS and YPOS. */ static void cls(void) { - memset(video, 0, COLUMNS * LINES * 2); + memset(video, 0, VIDEO_SIZE); xpos = ypos = 0; outw(10+(1<<(5+8)), 0x3d4); /* cursor off */ } @@ -107,9 +108,9 @@ * * These checks are basically to detect headless server boxes. */ - return (detect_video(__va(0xA0000)) || - detect_video(__va(0xB0000)) || - detect_video(__va(0xB8000))); + return (detect_video(ioremap(0xA0000, VIDEO_SIZE)) || + detect_video(ioremap(0xB0000, VIDEO_SIZE)) || + detect_video(ioremap(0xB8000, VIDEO_SIZE))); } /* This is actually code from vgaHWRestore in an old version of XFree86 :-) */ @@ -143,7 +144,7 @@ return; } - video = __va(0xB8000); + video = ioremap(0xB8000, VIDEO_SIZE); tmp = inb(0x3da); outb(0x00, 0x3c0); @@ -180,12 +181,10 @@ if (ypos >= LINES) { - static char zeroarr[2*COLUMNS] = { 0 }; ypos = LINES-1; - memcpy((char*)video, - (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS); - memcpy((char*)video + (LINES-1)*2*COLUMNS, - zeroarr, 2*COLUMNS); + memmove((char*)video, + (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS); + memset((char*)video + (LINES-1)*2*COLUMNS, 0, 2*COLUMNS); } } diff -r 83eb8d81c96f -r 91da9a1b7196 xen/drivers/char/ns16550.c --- a/xen/drivers/char/ns16550.c Sat Apr 15 18:25:09 2006 +++ b/xen/drivers/char/ns16550.c Sat Apr 15 18:25:21 2006 @@ -260,13 +260,20 @@ #define ns16550_endboot NULL #endif +static int ns16550_irq(struct serial_port *port) +{ + struct ns16550 *uart = port->uart; + return ((uart->irq > 0) ? uart->irq : -1); +} + static struct uart_driver ns16550_driver = { .init_preirq = ns16550_init_preirq, .init_postirq = ns16550_init_postirq, .endboot = ns16550_endboot, .tx_empty = ns16550_tx_empty, .putc = ns16550_putc, - .getc = ns16550_getc + .getc = ns16550_getc, + .irq = ns16550_irq }; static int parse_parity_char(int c) diff -r 83eb8d81c96f -r 91da9a1b7196 xen/drivers/char/serial.c --- a/xen/drivers/char/serial.c Sat Apr 15 18:25:09 2006 +++ b/xen/drivers/char/serial.c Sat Apr 15 18:25:21 2006 @@ -372,6 +372,15 @@ com[i].driver->endboot(&com[i]); } +int serial_irq(int idx) +{ + if ( (idx >= 0) && (idx < ARRAY_SIZE(com)) && + com[idx].driver && com[idx].driver->irq ) + return com[idx].driver->irq(&com[idx]); + + return -1; +} + void serial_register_uart(int idx, struct uart_driver *driver, void *uart) { /* Store UART-specific info. */ diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Sat Apr 15 18:25:21 2006 @@ -61,8 +61,7 @@ CPU_BASED_MWAIT_EXITING | \ CPU_BASED_MOV_DR_EXITING | \ CPU_BASED_ACTIVATE_IO_BITMAP | \ - CPU_BASED_USE_TSC_OFFSETING | \ - CPU_BASED_UNCOND_IO_EXITING \ + CPU_BASED_USE_TSC_OFFSETING \ ) #define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/asm-x86/hvm/vpit.h --- a/xen/include/asm-x86/hvm/vpit.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/asm-x86/hvm/vpit.h Sat Apr 15 18:25:21 2006 @@ -38,7 +38,8 @@ struct hvm_virpit { /* for simulation of counter 0 in mode 2 */ u64 period_cycles; /* pit frequency in cpu cycles */ - s_time_t inject_point; /* the time inject virt intr */ + s_time_t count_advance; /* accumulated count advance since last fire */ + s_time_t count_point; /* last point accumulating count advance */ s_time_t scheduled; /* scheduled timer interrupt */ struct timer pit_timer; /* periodic timer for mode 2*/ unsigned int channel; /* the pit channel, counter 0~2 */ diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/asm-x86/irq.h --- a/xen/include/asm-x86/irq.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/asm-x86/irq.h Sat Apr 15 18:25:21 2006 @@ -11,8 +11,8 @@ #define IO_APIC_IRQ(irq) (((irq) >= 16) || ((1<<(irq)) & io_apic_irqs)) #define IO_APIC_VECTOR(irq) (irq_vector[irq]) -#define LEGACY_VECTOR(irq) ((irq) + FIRST_EXTERNAL_VECTOR) -#define LEGACY_IRQ_FROM_VECTOR(vec) ((vec) - FIRST_EXTERNAL_VECTOR) +#define LEGACY_VECTOR(irq) ((irq) + FIRST_LEGACY_VECTOR) +#define LEGACY_IRQ_FROM_VECTOR(vec) ((vec) - FIRST_LEGACY_VECTOR) #define irq_to_vector(irq) \ (IO_APIC_IRQ(irq) ? IO_APIC_VECTOR(irq) : LEGACY_VECTOR(irq)) diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/asm-x86/mach-default/irq_vectors.h --- a/xen/include/asm-x86/mach-default/irq_vectors.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/asm-x86/mach-default/irq_vectors.h Sat Apr 15 18:25:21 2006 @@ -1,96 +1,36 @@ -/* - * This file should contain #defines for all of the interrupt vector - * numbers used by this architecture. - * - * In addition, there are some standard defines: - * - * FIRST_EXTERNAL_VECTOR: - * The first free place for external interrupts - * - * SYSCALL_VECTOR: - * The IRQ vector a syscall makes the user to kernel transition - * under. - * - * TIMER_IRQ: - * The IRQ number the timer interrupt comes in at. - * - * NR_IRQS: - * The total number of interrupt vectors (including all the - * architecture specific interrupts) needed. - * - */ #ifndef _ASM_IRQ_VECTORS_H #define _ASM_IRQ_VECTORS_H -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define HYPERCALL_VECTOR 0x82 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ +/* Processor-initiated interrupts are all high priority. */ #define SPURIOUS_APIC_VECTOR 0xff #define ERROR_APIC_VECTOR 0xfe #define INVALIDATE_TLB_VECTOR 0xfd #define EVENT_CHECK_VECTOR 0xfc #define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef +#define THERMAL_APIC_VECTOR 0xfa +#define LOCAL_TIMER_VECTOR 0xf9 /* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) + * High-priority dynamically-allocated vectors. For interrupts that + * must be higher priority than any guest-bound interrupt. */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_HIPRIORITY_VECTOR 0xf0 +#define LAST_HIPRIORITY_VECTOR 0xf8 -#define TIMER_IRQ 0 +/* Legacy PIC uses vectors 0xe0-0xef. */ +#define FIRST_LEGACY_VECTOR 0xe0 +#define LAST_LEGACY_VECTOR 0xef -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ +#define HYPERCALL_VECTOR 0x82 -/* - * The maximum number of vectors supported by i386 processors - * is limited to 256. For processors other than i386, NR_VECTORS - * should be changed accordingly. - */ +/* Dynamically-allocated vectors available to any driver. */ +#define FIRST_DYNAMIC_VECTOR 0x20 +#define LAST_DYNAMIC_VECTOR 0xdf + #define NR_VECTORS 256 -#include "irq_vectors_limits.h" - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - +/* Limited by number of trap vectors. */ +#define NR_IRQS NR_VECTORS +#define NR_IRQ_VECTORS NR_IRQS #endif /* _ASM_IRQ_VECTORS_H */ diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/asm-x86/mm.h Sat Apr 15 18:25:21 2006 @@ -103,11 +103,13 @@ #define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift) #define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask) #define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift) +#define PGT_pae_idx_shift PGT_high_mfn_shift #else /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */ #define PGT_mfn_mask ((1U<<23)-1) /* NX for PAE xen is not supported yet */ #define PGT_high_mfn_nx (1ULL << 63) +#define PGT_pae_idx_shift 23 #endif #define PGT_score_shift 23 diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/asm-x86/shadow_64.h Sat Apr 15 18:25:21 2006 @@ -119,6 +119,8 @@ #define PAE_CR3_IDX_MASK 0x7f #define PAE_CR3_IDX_NO 128 +#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */ + /******************************************************************************/ static inline int table_offset_64(unsigned long va, int level) { diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/public/xen.h --- a/xen/include/public/xen.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/public/xen.h Sat Apr 15 18:25:21 2006 @@ -286,7 +286,8 @@ uint64_t system_time; /* Time, in nanosecs, since boot. */ /* * Current system time: - * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) * CPU frequency (Hz): * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift */ diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/xen/grant_table.h --- a/xen/include/xen/grant_table.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/xen/grant_table.h Sat Apr 15 18:25:21 2006 @@ -29,11 +29,11 @@ #include <asm/grant_table.h> /* Active grant entry - used for shadowing GTF_permit_access grants. */ -typedef struct { +struct active_grant_entry { u32 pin; /* Reference count information. */ domid_t domid; /* Domain being granted access. */ unsigned long frame; /* Frame being granted. */ -} active_grant_entry_t; +}; /* Count of writable host-CPU mappings. */ #define GNTPIN_hstw_shift (0) @@ -60,29 +60,30 @@ * Tracks a mapping of another domain's grant reference. Each domain has a * table of these, indexes into which are returned as a 'mapping handle'. */ -typedef struct { - u16 ref_and_flags; /* 0-4: GNTMAP_* ; 5-15: grant ref */ +struct grant_mapping { + u32 ref; /* grant ref */ + u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */ domid_t domid; /* granting domain */ -} grant_mapping_t; -#define MAPTRACK_GNTMAP_MASK 0x1f -#define MAPTRACK_REF_SHIFT 5 -#define MAPTRACK_MAX_ENTRIES (1 << (16 - MAPTRACK_REF_SHIFT)) +}; + +/* Fairly arbitrary. [POLICY] */ +#define MAPTRACK_MAX_ENTRIES 16384 /* Per-domain grant information. */ -typedef struct { +struct grant_table { /* Shared grant table (see include/public/grant_table.h). */ - grant_entry_t *shared; + struct grant_entry *shared; /* Active grant table. */ - active_grant_entry_t *active; + struct active_grant_entry *active; /* Mapping tracking table. */ - grant_mapping_t *maptrack; + struct grant_mapping *maptrack; unsigned int maptrack_head; unsigned int maptrack_order; unsigned int maptrack_limit; unsigned int map_count; /* Lock protecting updates to active and shared grant tables. */ spinlock_t lock; -} grant_table_t; +}; /* Create/destroy per-domain grant table context. */ int grant_table_create( diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/xen/sched.h Sat Apr 15 18:25:21 2006 @@ -125,7 +125,7 @@ struct evtchn *evtchn[NR_EVTCHN_BUCKETS]; spinlock_t evtchn_lock; - grant_table_t *grant_table; + struct grant_table *grant_table; /* * Interrupt to event-channel mappings. Updates should be protected by the diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/xen/serial.h --- a/xen/include/xen/serial.h Sat Apr 15 18:25:09 2006 +++ b/xen/include/xen/serial.h Sat Apr 15 18:25:21 2006 @@ -57,6 +57,8 @@ void (*putc)(struct serial_port *, char); /* Get a character from the serial line: returns 0 if none available. */ int (*getc)(struct serial_port *, char *); + /* Get IRQ number for this port's serial line: returns -1 if none. */ + int (*irq)(struct serial_port *); }; /* 'Serial handles' are composed from the following fields. */ @@ -99,6 +101,9 @@ /* Return number of bytes headroom in transmit buffer. */ int serial_tx_space(int handle); +/* Return irq number for specified serial port (identified by index). */ +int serial_irq(int idx); + /* * Initialisation and helper functions for uart drivers. */ diff -r 83eb8d81c96f -r 91da9a1b7196 xen/include/asm-x86/mach-default/irq_vectors_limits.h --- a/xen/include/asm-x86/mach-default/irq_vectors_limits.h Sat Apr 15 18:25:09 2006 +++ /dev/null Sat Apr 15 18:25:21 2006 @@ -1,8 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H - -/* Limited by number of trap vectors. */ -#define NR_IRQS FIRST_SYSTEM_VECTOR -#define NR_IRQ_VECTORS NR_IRQS - -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |