[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [POWERPC] merge with xen-unstable.hg
# HG changeset patch # User Hollis Blanchard <hollisb@xxxxxxxxxx> # Node ID 0bdd578c417f0a3b50da35b3d6d1a196bb9abd7f # Parent ce9c34c049c541446d291b6839a83563a9f78a3d # Parent 2b8dc69744e3ae99d6c59eab7b229ae4259170e3 [POWERPC] merge with xen-unstable.hg Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx> --- buildconfigs/linux-defconfig_xen0_ia64 | 8 buildconfigs/linux-defconfig_xenU_ia64 | 6 buildconfigs/linux-defconfig_xen_ia64 | 8 docs/man/xm.pod.1 | 7 docs/src/user.tex | 64 +++- extras/mini-os/Makefile | 3 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 7 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 42 -- linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c | 113 ++++--- linux-2.6-xen-sparse/arch/ia64/Kconfig | 6 linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c | 2 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 5 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 10 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 4 linux-2.6-xen-sparse/drivers/xen/blktap/common.h | 1 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c | 23 - linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 16 + linux-2.6-xen-sparse/drivers/xen/console/console.c | 20 - linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c | 18 - linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 18 + linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 7 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c | 12 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 21 + linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 34 ++ linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h | 2 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h | 4 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 14 linux-2.6-xen-sparse/include/asm-ia64/machvec.h | 15 - linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h | 33 ++ linux-2.6-xen-sparse/include/asm-ia64/maddr.h | 4 linux-2.6-xen-sparse/include/xen/xenbus.h | 6 patches/linux-2.6.16.13/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 27 + tools/blktap/drivers/block-qcow.c | 29 + tools/blktap/drivers/tapdisk.c | 23 - tools/check/check_python | 2 tools/console/client/main.c | 3 tools/console/daemon/io.c | 10 tools/debugger/gdb/gdbbuild | 6 tools/examples/vtpm-common.sh | 5 tools/examples/vtpm-impl | 3 tools/libxc/xenctrl.h | 5 tools/python/xen/web/SrvBase.py | 1 tools/python/xen/xend/XendDomain.py | 4 tools/python/xen/xend/XendDomainInfo.py | 60 +++- tools/python/xen/xend/image.py | 47 ++- tools/python/xen/xend/server/DevController.py | 6 tools/python/xen/xend/server/blkif.py | 6 tools/python/xen/xend/server/pciquirk.py | 2 tools/python/xen/xm/addlabel.py | 6 tools/python/xen/xm/cfgbootpolicy.py | 6 tools/python/xen/xm/create.py | 7 tools/python/xen/xm/dry-run.py | 13 tools/python/xen/xm/dumppolicy.py | 8 tools/python/xen/xm/getlabel.py | 38 +- tools/python/xen/xm/labels.py | 4 tools/python/xen/xm/loadpolicy.py | 5 tools/python/xen/xm/makepolicy.py | 3 tools/python/xen/xm/resources.py | 21 - tools/python/xen/xm/rmlabel.py | 10 tools/python/xen/xm/sysrq.py | 5 tools/python/xen/xm/tests/test_create.py | 4 tools/xenmon/xenmon.py | 3 unmodified_drivers/linux-2.6/mkbuildtree | 6 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 37 +- xen/Makefile | 5 xen/arch/ia64/vmx/mmio.c | 1 xen/arch/ia64/vmx/pal_emul.c | 1 xen/arch/ia64/vmx/vmx_interrupt.c | 1 xen/arch/ia64/vmx/vmx_phy_mode.c | 1 xen/arch/ia64/vmx/vmx_process.c | 1 xen/arch/ia64/vmx/vmx_virt.c | 39 ++ xen/arch/ia64/xen/dom_fw.c | 12 xen/arch/ia64/xen/domain.c | 6 xen/arch/ia64/xen/fw_emul.c | 136 ++++++++- xen/arch/ia64/xen/hypercall.c | 1 xen/arch/ia64/xen/mm.c | 80 +++++ xen/arch/ia64/xen/vhpt.c | 29 + xen/arch/ia64/xen/xensetup.c | 8 xen/arch/x86/Rules.mk | 3 xen/arch/x86/hvm/io.c | 10 xen/arch/x86/hvm/platform.c | 32 +- xen/arch/x86/hvm/svm/intr.c | 43 +- xen/arch/x86/hvm/svm/svm.c | 5 xen/arch/x86/hvm/vioapic.c | 32 +- xen/arch/x86/hvm/vlapic.c | 49 ++- xen/arch/x86/hvm/vmx/io.c | 13 xen/arch/x86/hvm/vmx/vmx.c | 29 + xen/arch/x86/mm/shadow/multi.c | 72 +--- xen/arch/x86/oprofile/xenoprof.c | 75 ++--- xen/arch/x86/time.c | 4 xen/arch/x86/traps.c | 21 + xen/arch/x86/x86_32/traps.c | 46 ++- xen/arch/x86/x86_64/mm.c | 20 - xen/arch/x86/x86_64/traps.c | 43 ++ xen/common/grant_table.c | 148 ++++------ xen/common/perfc.c | 4 xen/include/asm-ia64/mm.h | 3 xen/include/asm-ia64/perfc_defn.h | 50 +++ xen/include/asm-x86/bitops.h | 57 +-- xen/include/asm-x86/hvm/vlapic.h | 30 +- xen/include/asm-x86/mm.h | 1 xen/include/asm-x86/page.h | 7 xen/include/asm-x86/processor.h | 7 xen/include/asm-x86/x86_32/page-2level.h | 3 xen/include/asm-x86/x86_32/page-3level.h | 2 xen/include/asm-x86/x86_32/page.h | 9 xen/include/asm-x86/x86_64/page.h | 11 xen/include/public/arch-ia64.h | 3 xen/include/public/arch-powerpc.h | 3 xen/include/public/arch-x86_32.h | 17 - xen/include/public/arch-x86_64.h | 3 xen/include/public/domctl.h | 18 - xen/include/public/sysctl.h | 16 - xen/include/public/xenoprof.h | 15 - xen/include/xen/compiler.h | 5 115 files changed, 1444 insertions(+), 734 deletions(-) diff -r ce9c34c049c5 -r 0bdd578c417f buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Mon Sep 18 09:23:51 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Mon Sep 18 14:28:16 2006 -0500 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.16.13-xen0 -# Fri Jul 28 16:33:47 2006 +# Fri Sep 1 11:03:26 2006 # # @@ -1512,12 +1512,10 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_XEN_BALLOON=y CONFIG_XEN_SKBUFF=y -CONFIG_XEN_NETDEV_BACKEND=y -CONFIG_XEN_NETDEV_FRONTEND=y # CONFIG_XEN_DEVMEM is not set CONFIG_XEN_REBOOT=y # CONFIG_XEN_SMPBOOT is not set -CONFIG_XEN_INTERFACE_VERSION=0x00030202 +CONFIG_XEN_INTERFACE_VERSION=0x00030203 # # XEN @@ -1529,6 +1527,7 @@ CONFIG_XEN_BACKEND=y CONFIG_XEN_BACKEND=y CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP is not set +CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y CONFIG_XEN_PCIDEV_BACKEND=y @@ -1538,6 +1537,7 @@ CONFIG_XEN_PCIDEV_BACKEND_SLOT=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set # CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_NETDEV_FRONTEND=y # CONFIG_XEN_SCRUB_PAGES is not set CONFIG_XEN_DISABLE_SERIAL=y CONFIG_XEN_SYSFS=y diff -r ce9c34c049c5 -r 0bdd578c417f buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Mon Sep 18 09:23:51 2006 -0400 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Mon Sep 18 14:28:16 2006 -0500 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.16.13-xenU -# Fri Jul 28 16:32:18 2006 +# Fri Sep 1 10:50:54 2006 # # @@ -1387,11 +1387,10 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_XEN_BALLOON=y CONFIG_XEN_SKBUFF=y -CONFIG_XEN_NETDEV_FRONTEND=y # CONFIG_XEN_DEVMEM is not set CONFIG_XEN_REBOOT=y # CONFIG_XEN_SMPBOOT is not set -CONFIG_XEN_INTERFACE_VERSION=0x00030202 +CONFIG_XEN_INTERFACE_VERSION=0x00030203 # # XEN @@ -1402,6 +1401,7 @@ CONFIG_XEN_XENBUS_DEV=y CONFIG_XEN_XENBUS_DEV=y # CONFIG_XEN_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_NETDEV_FRONTEND=y # CONFIG_XEN_SCRUB_PAGES is not set # CONFIG_XEN_DISABLE_SERIAL is not set CONFIG_XEN_SYSFS=y diff -r ce9c34c049c5 -r 0bdd578c417f buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Mon Sep 18 09:23:51 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_ia64 Mon Sep 18 14:28:16 2006 -0500 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.16.13-xen -# Fri Jul 28 16:33:08 2006 +# Fri Sep 1 10:58:55 2006 # # @@ -1518,12 +1518,10 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y CONFIG_XEN_BALLOON=y CONFIG_XEN_SKBUFF=y -CONFIG_XEN_NETDEV_BACKEND=y -CONFIG_XEN_NETDEV_FRONTEND=y # CONFIG_XEN_DEVMEM is not set CONFIG_XEN_REBOOT=y # CONFIG_XEN_SMPBOOT is not set -CONFIG_XEN_INTERFACE_VERSION=0x00030202 +CONFIG_XEN_INTERFACE_VERSION=0x00030203 # # XEN @@ -1535,6 +1533,7 @@ CONFIG_XEN_BACKEND=y CONFIG_XEN_BACKEND=y CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP is not set +CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y CONFIG_XEN_PCIDEV_BACKEND=y @@ -1544,6 +1543,7 @@ CONFIG_XEN_PCIDEV_BACKEND_SLOT=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set # CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_NETDEV_FRONTEND=y # CONFIG_XEN_SCRUB_PAGES is not set CONFIG_XEN_DISABLE_SERIAL=y CONFIG_XEN_SYSFS=y diff -r ce9c34c049c5 -r 0bdd578c417f docs/man/xm.pod.1 --- a/docs/man/xm.pod.1 Mon Sep 18 09:23:51 2006 -0400 +++ b/docs/man/xm.pod.1 Mon Sep 18 14:28:16 2006 -0500 @@ -432,7 +432,6 @@ Sample xen domain info looks as follows Sample xen domain info looks as follows (lines wrapped manually to make the man page more readable): - system : Linux host : talon release : 2.6.12.6-xen0 version : #1 Mon Nov 14 14:26:26 EST 2005 @@ -444,13 +443,14 @@ make the man page more readable): threads_per_core : 1 cpu_mhz : 696 hw_caps : 0383fbff:00000000:00000000:00000040 - memory : 767 + total_memory : 767 free_memory : 37 xen_major : 3 xen_minor : 0 xen_extra : -devel xen_caps : xen-3.0-x86_32 - xen_params : virt_start=0xfc000000 + xen_pagesize : 4096 + platform_params : virt_start=0xfc000000 xen_changeset : Mon Nov 14 18:13:38 2005 +0100 7793:090e44133d40 cc_compiler : gcc version 3.4.3 (Mandrakelinux @@ -458,6 +458,7 @@ make the man page more readable): cc_compile_by : sdague cc_compile_domain : (none) cc_compile_date : Mon Nov 14 14:16:48 EST 2005 + xend_config_format : 2 B<FIELDS> diff -r ce9c34c049c5 -r 0bdd578c417f docs/src/user.tex --- a/docs/src/user.tex Mon Sep 18 09:23:51 2006 -0400 +++ b/docs/src/user.tex Mon Sep 18 14:28:16 2006 -0500 @@ -1654,26 +1654,58 @@ Now unmount (this is important!): In the configuration file set: \begin{quote} + \verb_disk = ['tap:aio:/full/path/to/vm1disk,sda1,w']_ +\end{quote} + +As the virtual machine writes to its `disk', the sparse file will be +filled in and consume more space up to the original 2GB. + +{\em{Note:}} Users that have worked with file-backed VBDs on Xen in previous +versions will be interested to know that this support is now provided through +the blktap driver instead of the loopback driver. This change results in +file-based block devices that are higher-performance, more scalable, and which +provide better safety properties for VBD data. All that is required to update +your existing file-backed VM configurations is to change VBD configuration +lines from: +\begin{quote} \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_ \end{quote} - -As the virtual machine writes to its `disk', the sparse file will be -filled in and consume more space up to the original 2GB. - -{\bf Note that file-backed VBDs may not be appropriate for backing - I/O-intensive domains.} File-backed VBDs are known to experience +to: +\begin{quote} + \verb_disk = ['tap:aio:/full/path/to/vm1disk,sda1,w']_ +\end{quote} + + +\subsection{Loopback-mounted file-backed VBDs (deprecated)} + +{\em{{\bf{Note:}} Loopback mounted VBDs have now been replaced with + blktap-based support for raw image files, as described above. This + section remains to detail a configuration that was used by older Xen + versions.}} + +Raw image file-backed VBDs amy also be attached to VMs using the +Linux loopback driver. The only required change to the raw file +instructions above are to specify the configuration entry as: +\begin{quote} + \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_ +\end{quote} + +{\bf Note that loopback file-backed VBDs may not be appropriate for backing + I/O-intensive domains.} This approach is known to experience substantial slowdowns under heavy I/O workloads, due to the I/O handling by the loopback block device used to support file-backed VBDs -in dom0. Better I/O performance can be achieved by using either -LVM-backed VBDs (Section~\ref{s:using-lvm-backed-vbds}) or physical -devices as VBDs (Section~\ref{s:exporting-physical-devices-as-vbds}). - -Linux supports a maximum of eight file-backed VBDs across all domains -by default. This limit can be statically increased by using the -\emph{max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is -compiled as a module in the dom0 kernel, or by using the -\emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP is compiled -directly into the dom0 kernel. +in dom0. Loopbach support remains for old Xen installations, and users +are strongly encouraged to use the blktap-based file support (using +``{\tt{tap:aio}}'' as described above). + +Additionally, Linux supports a maximum of eight loopback file-backed +VBDs across all domains by default. This limit can be statically +increased by using the \emph{max\_loop} module parameter if +CONFIG\_BLK\_DEV\_LOOP is compiled as a module in the dom0 kernel, or +by using the \emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP +is compiled directly into the dom0 kernel. Again, users are encouraged +to use the blktap-based file support described above which scales to much +larger number of active VBDs. \section{Using LVM-backed VBDs} diff -r ce9c34c049c5 -r 0bdd578c417f extras/mini-os/Makefile --- a/extras/mini-os/Makefile Mon Sep 18 09:23:51 2006 -0400 +++ b/extras/mini-os/Makefile Mon Sep 18 14:28:16 2006 -0500 @@ -7,9 +7,12 @@ include $(XEN_ROOT)/Config.mk # Set TARGET_ARCH override TARGET_ARCH := $(XEN_TARGET_ARCH) +XEN_INTERFACE_VERSION := 0x00030203 + # NB. '-Wcast-qual' is nasty, so I omitted it. CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline +CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION) ASFLAGS = -D__ASSEMBLY__ diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Mon Sep 18 14:28:16 2006 -0500 @@ -1380,8 +1380,10 @@ legacy_init_iomem_resources(struct e820e * so we try it repeatedly and let the resource manager * test it. */ +#ifndef CONFIG_XEN request_resource(res, code_resource); request_resource(res, data_resource); +#endif #ifdef CONFIG_KEXEC request_resource(res, &crashk_res); #endif @@ -1454,11 +1456,8 @@ static void __init register_memory(void) int i; /* Nothing to do if not running in dom0. */ - if (!is_initial_xendomain()) { - legacy_init_iomem_resources(e820.map, e820.nr_map, - &code_resource, &data_resource); + if (!is_initial_xendomain()) return; - } #ifdef CONFIG_XEN machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Mon Sep 18 14:28:16 2006 -0500 @@ -22,15 +22,6 @@ #define ISA_START_ADDRESS 0x0 #define ISA_END_ADDRESS 0x100000 -#if 0 /* not PAE safe */ -/* These hacky macros avoid phys->machine translations. */ -#define __direct_pte(x) ((pte_t) { (x) } ) -#define __direct_mk_pte(page_nr,pgprot) \ - __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) -#define direct_mk_pte_phys(physpage, pgprot) \ - __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) -#endif - static int direct_remap_area_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long address, @@ -66,17 +57,16 @@ static int __direct_remap_pfn_range(stru for (i = 0; i < size; i += PAGE_SIZE) { if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) { - /* Fill in the PTE pointers. */ + /* Flush a full batch after filling in the PTE ptrs. */ rc = apply_to_page_range(mm, start_address, address - start_address, direct_remap_area_pte_fn, &w); if (rc) goto out; - w = u; rc = -EFAULT; if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) goto out; - v = u; + v = w = u; start_address = address; } @@ -92,7 +82,7 @@ static int __direct_remap_pfn_range(stru } if (v != u) { - /* get the ptep's filled in */ + /* Final batch. */ rc = apply_to_page_range(mm, start_address, address - start_address, direct_remap_area_pte_fn, &w); @@ -178,32 +168,6 @@ int touch_pte_range(struct mm_struct *mm } EXPORT_SYMBOL(touch_pte_range); - -void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot) -{ - int error; - - struct vm_struct *vma; - vma = get_vm_area (vm_size, VM_IOREMAP); - - if (vma == NULL) { - printk ("ioremap.c,vm_map_xen_pages(): " - "Failed to get VMA area\n"); - return NULL; - } - - error = direct_kernel_remap_pfn_range((unsigned long) vma->addr, - maddr >> PAGE_SHIFT, vm_size, - prot, DOMID_SELF ); - if (error == 0) { - return vma->addr; - } else { - printk ("ioremap.c,vm_map_xen_pages(): " - "Failed to map xen shared pages into kernel space\n"); - return NULL; - } -} -EXPORT_SYMBOL(vm_map_xen_pages); /* * Does @address reside within a non-highmem page that is local to this virtual diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c --- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Mon Sep 18 14:28:16 2006 -0500 @@ -26,15 +26,16 @@ #include <xen/evtchn.h> #include "op_counter.h" +#include <xen/driver_util.h> #include <xen/interface/xen.h> #include <xen/interface/xenoprof.h> #include <../../../drivers/oprofile/cpu_buffer.h> #include <../../../drivers/oprofile/event_buffer.h> +#define MAX_XENOPROF_SAMPLES 16 + static int xenoprof_start(void); static void xenoprof_stop(void); - -void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot); static int xenoprof_enabled = 0; static unsigned int num_events = 0; @@ -44,7 +45,7 @@ static int active_defined; /* sample buffers shared with Xen */ xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS]; /* Shared buffer area */ -char * shared_buffer; +char * shared_buffer = NULL; /* Number of buffers in shared area (one per VCPU) */ int nbuf; /* Mappings of VIRQ_XENOPROF to irq number (per cpu) */ @@ -234,13 +235,57 @@ static int bind_virq(void) } +static int map_xenoprof_buffer(int max_samples) +{ + struct xenoprof_get_buffer get_buffer; + struct xenoprof_buf *buf; + int npages, ret, i; + struct vm_struct *area; + + if ( shared_buffer ) + return 0; + + get_buffer.max_samples = max_samples; + + if ( (ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, &get_buffer)) ) + return ret; + + nbuf = get_buffer.nbuf; + npages = (get_buffer.bufsize * nbuf - 1) / PAGE_SIZE + 1; + + area = alloc_vm_area(npages * PAGE_SIZE); + if (area == NULL) + return -ENOMEM; + + if ( (ret = direct_kernel_remap_pfn_range( + (unsigned long)area->addr, + get_buffer.buf_maddr >> PAGE_SHIFT, + npages * PAGE_SIZE, __pgprot(_KERNPG_TABLE), DOMID_SELF)) ) { + vunmap(area->addr); + return ret; + } + + shared_buffer = area->addr; + for (i=0; i< nbuf; i++) { + buf = (struct xenoprof_buf*) + &shared_buffer[i * get_buffer.bufsize]; + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); + xenoprof_buf[buf->vcpu_id] = buf; + } + + return 0; +} + + static int xenoprof_setup(void) { int ret; int i; - ret = bind_virq(); - if (ret) + if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) ) + return ret; + + if ( (ret = bind_virq()) ) return ret; if (is_primary) { @@ -373,9 +418,9 @@ static int xenoprof_set_passive(int * p_ { int ret; int i, j; - int vm_size; int npages; struct xenoprof_buf *buf; + struct vm_struct *area; pgprot_t prot = __pgprot(_KERNPG_TABLE); if (!is_primary) @@ -391,19 +436,29 @@ static int xenoprof_set_passive(int * p_ for (i = 0; i < pdoms; i++) { passive_domains[i].domain_id = p_domains[i]; passive_domains[i].max_samples = 2048; - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, &passive_domains[i]); + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, + &passive_domains[i]); if (ret) - return ret; + goto out; npages = (passive_domains[i].bufsize * passive_domains[i].nbuf - 1) / PAGE_SIZE + 1; - vm_size = npages * PAGE_SIZE; - - p_shared_buffer[i] = (char *)vm_map_xen_pages(passive_domains[i].buf_maddr, - vm_size, prot); - if (!p_shared_buffer[i]) { + + area = alloc_vm_area(npages * PAGE_SIZE); + if (area == NULL) { ret = -ENOMEM; goto out; } + + ret = direct_kernel_remap_pfn_range( + (unsigned long)area->addr, + passive_domains[i].buf_maddr >> PAGE_SHIFT, + npages * PAGE_SIZE, prot, DOMID_SELF); + if (ret) { + vunmap(area->addr); + goto out; + } + + p_shared_buffer[i] = area->addr; for (j = 0; j < passive_domains[i].nbuf; j++) { buf = (struct xenoprof_buf *) @@ -473,43 +528,18 @@ int __init oprofile_arch_init(struct opr int __init oprofile_arch_init(struct oprofile_operations * ops) { struct xenoprof_init init; - struct xenoprof_buf * buf; - int vm_size; - int npages; - int ret; - int i; - - init.max_samples = 16; + int ret, i; + ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init); if (!ret) { - pgprot_t prot = __pgprot(_KERNPG_TABLE); - num_events = init.num_events; is_primary = init.is_primary; - nbuf = init.nbuf; /* just in case - make sure we do not overflow event list - (i.e. counter_config list) */ + (i.e. counter_config list) */ if (num_events > OP_MAX_COUNTER) num_events = OP_MAX_COUNTER; - - npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1; - vm_size = npages * PAGE_SIZE; - - shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr, - vm_size, prot); - if (!shared_buffer) { - ret = -ENOMEM; - goto out; - } - - for (i=0; i< nbuf; i++) { - buf = (struct xenoprof_buf*) - &shared_buffer[i * init.bufsize]; - BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); - xenoprof_buf[buf->vcpu_id] = buf; - } /* cpu_type is detected by Xen */ cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0; @@ -525,7 +555,6 @@ int __init oprofile_arch_init(struct opr active_defined = 0; } - out: printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, " "is_primary %d\n", ret, num_events, is_primary); return ret; diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Mon Sep 18 14:28:16 2006 -0500 @@ -516,12 +516,6 @@ config XEN_SKBUFF config XEN_SKBUFF default y -config XEN_NETDEV_BACKEND - default y - -config XEN_NETDEV_FRONTEND - default y - config XEN_DEVMEM default n diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Mon Sep 18 14:28:16 2006 -0500 @@ -255,8 +255,10 @@ void __init e820_reserve_resources(struc * so we try it repeatedly and let the resource manager * test it. */ +#ifndef CONFIG_XEN request_resource(res, &code_resource); request_resource(res, &data_resource); +#endif #ifdef CONFIG_KEXEC request_resource(res, &crashk_res); #endif diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon Sep 18 14:28:16 2006 -0500 @@ -944,9 +944,10 @@ void __init setup_arch(char **cmdline_p) BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); e820_reserve_resources(machine_e820, memmap.nr_entries); - } else -#endif + } +#else e820_reserve_resources(e820.map, e820.nr_map); +#endif request_resource(&iomem_resource, &video_ram_resource); diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Sep 18 14:28:16 2006 -0500 @@ -301,11 +301,11 @@ static void frontend_changed(struct xenb struct backend_info *be = dev->dev.driver_data; int err; - DPRINTK(""); + DPRINTK("%s", xenbus_strstate(frontend_state)); switch (frontend_state) { case XenbusStateInitialising: - if (dev->state == XenbusStateClosing) { + if (dev->state == XenbusStateClosed) { printk("%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); @@ -331,8 +331,12 @@ static void frontend_changed(struct xenb xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ case XenbusStateUnknown: - case XenbusStateClosed: device_unregister(&dev->dev); break; diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Sep 18 14:28:16 2006 -0500 @@ -273,7 +273,7 @@ static void backend_changed(struct xenbu xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); down(&bd->bd_sem); - if (info->users > 0) + if (info->users > 0 && system_state == SYSTEM_RUNNING) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else @@ -360,7 +360,7 @@ static void blkfront_closing(struct xenb xlvbd_del(info); - xenbus_switch_state(dev, XenbusStateClosed); + xenbus_frontend_closed(dev); } diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/blktap/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Mon Sep 18 14:28:16 2006 -0500 @@ -91,6 +91,7 @@ void tap_blkif_free(blkif_t *blkif); void tap_blkif_free(blkif_t *blkif); int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); +void tap_blkif_unmap(blkif_t *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Mon Sep 18 14:28:16 2006 -0500 @@ -135,20 +135,25 @@ int tap_blkif_map(blkif_t *blkif, unsign return 0; } +void tap_blkif_unmap(blkif_t *blkif) +{ + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + if (blkif->blk_ring.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_ring.sring = NULL; + } +} + void tap_blkif_free(blkif_t *blkif) { atomic_dec(&blkif->refcnt); wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - /* Already disconnected? */ - if (blkif->irq) - unbind_from_irqhandler(blkif->irq, blkif); - - if (blkif->blk_ring.sring) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - } - + tap_blkif_unmap(blkif); kmem_cache_free(blkif_cachep, blkif); } diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Mon Sep 18 14:28:16 2006 -0500 @@ -247,6 +247,11 @@ static void tap_frontend_changed(struct switch (frontend_state) { case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } break; case XenbusStateInitialised: @@ -264,11 +269,20 @@ static void tap_frontend_changed(struct break; case XenbusStateClosing: + if (be->blkif->xenblkd) { + kthread_stop(be->blkif->xenblkd); + be->blkif->xenblkd = NULL; + } + tap_blkif_unmap(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ case XenbusStateUnknown: - case XenbusStateClosed: device_unregister(&dev->dev); break; diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Mon Sep 18 14:28:16 2006 -0500 @@ -182,17 +182,18 @@ static struct console kcons_info = { .index = -1, }; -#define __RETCODE 0 static int __init xen_console_init(void) { if (!is_running_on_xen()) - return __RETCODE; + goto out; if (is_initial_xendomain()) { if (xc_mode == XC_DEFAULT) xc_mode = XC_SERIAL; kcons_info.write = kcons_write_dom0; } else { + if (!xen_start_info->console.domU.evtchn) + goto out; if (xc_mode == XC_DEFAULT) xc_mode = XC_TTY; kcons_info.write = kcons_write; @@ -212,14 +213,15 @@ static int __init xen_console_init(void) break; default: - return __RETCODE; + goto out; } wbuf = alloc_bootmem(wbuf_size); register_console(&kcons_info); - return __RETCODE; + out: + return 0; } console_initcall(xen_console_init); @@ -247,7 +249,9 @@ void xencons_force_flush(void) int sz; /* Emergency console is synchronous, so there's nothing to flush. */ - if (is_initial_xendomain()) + if (!is_running_on_xen() || + is_initial_xendomain() || + !xen_start_info->console.domU.evtchn) return; /* Spin until console data is flushed through to the daemon. */ @@ -582,7 +586,11 @@ static int __init xencons_init(void) if (xc_mode == XC_OFF) return 0; - xencons_ring_init(); + if (!is_initial_xendomain()) { + rc = xencons_ring_init(); + if (rc) + return rc; + } xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES); diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c --- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c Mon Sep 18 14:28:16 2006 -0500 @@ -110,24 +110,26 @@ static irqreturn_t handle_input(int irq, int xencons_ring_init(void) { - int err; + int irq; if (xencons_irq) unbind_from_irqhandler(xencons_irq, NULL); xencons_irq = 0; - if (!xen_start_info->console.domU.evtchn) - return 0; + if (!is_running_on_xen() || + is_initial_xendomain() || + !xen_start_info->console.domU.evtchn) + return -ENODEV; - err = bind_evtchn_to_irqhandler( + irq = bind_evtchn_to_irqhandler( xen_start_info->console.domU.evtchn, handle_input, 0, "xencons", NULL); - if (err <= 0) { - printk(KERN_ERR "XEN console request irq failed %i\n", err); - return err; + if (irq < 0) { + printk(KERN_ERR "XEN console request irq failed %i\n", irq); + return irq; } - xencons_irq = err; + xencons_irq = irq; /* In case we have in-flight data after save/restore... */ notify_daemon(); diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Sep 18 14:28:16 2006 -0500 @@ -228,13 +228,13 @@ static void frontend_changed(struct xenb { struct backend_info *be = dev->dev.driver_data; - DPRINTK(""); + DPRINTK("%s", xenbus_strstate(frontend_state)); be->frontend_state = frontend_state; switch (frontend_state) { case XenbusStateInitialising: - if (dev->state == XenbusStateClosing) { + if (dev->state == XenbusStateClosed) { printk("%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); if (be->netif) { @@ -260,8 +260,12 @@ static void frontend_changed(struct xenb xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ case XenbusStateUnknown: - case XenbusStateClosed: if (be->netif != NULL) kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); device_unregister(&dev->dev); @@ -421,6 +425,14 @@ static int connect_rings(struct backend_ if (val) { be->netif->features |= NETIF_F_TSO; be->netif->dev->features |= NETIF_F_TSO; + } + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", + "%d", &val) < 0) + val = 0; + if (val) { + be->netif->features &= ~NETIF_F_IP_CSUM; + be->netif->dev->features &= ~NETIF_F_IP_CSUM; } /* Map the shared frame, irq etc. */ diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Sep 18 14:28:16 2006 -0500 @@ -486,7 +486,7 @@ static void backend_changed(struct xenbu struct netfront_info *np = dev->dev.driver_data; struct net_device *netdev = np->netdev; - DPRINTK("\n"); + DPRINTK("%s\n", xenbus_strstate(backend_state)); switch (backend_state) { case XenbusStateInitialising: @@ -1936,11 +1936,10 @@ static void netfront_closing(struct xenb { struct netfront_info *info = dev->dev.driver_data; - DPRINTK("netfront_closing: %s removed\n", dev->nodename); + DPRINTK("%s\n", dev->nodename); close_netdev(info); - - xenbus_switch_state(dev, XenbusStateClosed); + xenbus_frontend_closed(dev); } diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Mon Sep 18 14:28:16 2006 -0500 @@ -132,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + int rc, val; + + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); + if (rc != 1) + val = 0; /* no online node present */ + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + MODULE_LICENSE("Dual BSD/GPL"); diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Mon Sep 18 14:28:16 2006 -0500 @@ -41,6 +41,20 @@ extern char *kasprintf(const char *fmt, #define DPRINTK(fmt, args...) \ pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +char *xenbus_strstate(enum xenbus_state state) +{ + static char *name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + }; + return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; +} + int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, @@ -124,6 +138,13 @@ int xenbus_switch_state(struct xenbus_de } EXPORT_SYMBOL_GPL(xenbus_switch_state); +int xenbus_frontend_closed(struct xenbus_device *dev) +{ + xenbus_switch_state(dev, XenbusStateClosed); + complete(&dev->down); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_frontend_closed); /** * Return the path to the error node for the given device, or NULL on failure. diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Mon Sep 18 14:28:16 2006 -0500 @@ -73,6 +73,7 @@ static int xenbus_probe_backend(const ch static int xenbus_dev_probe(struct device *_dev); static int xenbus_dev_remove(struct device *_dev); +static void xenbus_dev_shutdown(struct device *_dev); /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * @@ -192,6 +193,7 @@ static struct xen_bus_type xenbus_fronte .match = xenbus_match, .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, }, .dev = { .bus_id = "xen", @@ -246,6 +248,7 @@ static struct xen_bus_type xenbus_backen .match = xenbus_match, .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, +// .shutdown = xenbus_dev_shutdown, .uevent = xenbus_uevent_backend, }, .dev = { @@ -316,8 +319,9 @@ static void otherend_changed(struct xenb state = xenbus_read_driver_state(dev->otherend); - DPRINTK("state is %d, %s, %s", - state, dev->otherend_watch.node, vec[XS_WATCH_PATH]); + DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state), + dev->otherend_watch.node, vec[XS_WATCH_PATH]); + if (drv->otherend_changed) drv->otherend_changed(dev, state); } @@ -348,7 +352,7 @@ static int xenbus_dev_probe(struct devic const struct xenbus_device_id *id; int err; - DPRINTK(""); + DPRINTK("%s", dev->nodename); if (!drv->probe) { err = -ENODEV; @@ -393,7 +397,7 @@ static int xenbus_dev_remove(struct devi struct xenbus_device *dev = to_xenbus_device(_dev); struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); - DPRINTK(""); + DPRINTK("%s", dev->nodename); free_otherend_watch(dev); free_otherend_details(dev); @@ -403,6 +407,27 @@ static int xenbus_dev_remove(struct devi xenbus_switch_state(dev, XenbusStateClosed); return 0; +} + +static void xenbus_dev_shutdown(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned long timeout = 5*HZ; + + DPRINTK("%s", dev->nodename); + + get_device(&dev->dev); + if (dev->state != XenbusStateConnected) { + printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__, + dev->nodename, xenbus_strstate(dev->state)); + goto out; + } + xenbus_switch_state(dev, XenbusStateClosing); + timeout = wait_for_completion_timeout(&dev->down, timeout); + if (!timeout) + printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename); + out: + put_device(&dev->dev); } static int xenbus_register_driver_common(struct xenbus_driver *drv, @@ -587,6 +612,7 @@ static int xenbus_probe_node(struct xen_ tmpstring += strlen(tmpstring) + 1; strcpy(tmpstring, type); xendev->devicetype = tmpstring; + init_completion(&xendev->down); xendev->dev.parent = &bus->dev; xendev->dev.bus = &bus->bus; diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h --- a/linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h Mon Sep 18 14:28:16 2006 -0500 @@ -84,7 +84,9 @@ dma_sync_sg_for_device(struct device *de #define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \ dma_sync_single_for_device(dev, dma_handle, size, dir) +#ifndef CONFIG_XEN #define dma_supported platform_dma_supported +#endif static inline int dma_set_mask (struct device *dev, u64 mask) diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Mon Sep 18 14:28:16 2006 -0500 @@ -205,8 +205,12 @@ ____HYPERVISOR_memory_op( } #include <xen/interface/memory.h> +#ifdef CONFIG_VMX_GUEST +# define ia64_xenmem_reservation_op(op, xmr) (0) +#else int ia64_xenmem_reservation_op(unsigned long op, struct xen_memory_reservation* reservation__); +#endif static inline int HYPERVISOR_memory_op( unsigned int cmd, void *arg) diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Mon Sep 18 14:28:16 2006 -0500 @@ -33,13 +33,17 @@ #ifndef __HYPERVISOR_H__ #define __HYPERVISOR_H__ -#if !defined(CONFIG_XEN) && !defined(CONFIG_VMX_GUEST) -#define is_running_on_xen() (0) -#define HYPERVISOR_ioremap(offset, size) (offset) -#else +#ifdef CONFIG_XEN extern int running_on_xen; #define is_running_on_xen() (running_on_xen) -#endif +#else /* CONFIG_XEN */ +# ifdef CONFIG_VMX_GUEST +# define is_running_on_xen() (1) +# else /* CONFIG_VMX_GUEST */ +# define is_running_on_xen() (0) +# define HYPERVISOR_ioremap(offset, size) (offset) +# endif /* CONFIG_VMX_GUEST */ +#endif /* CONFIG_XEN */ #if defined(CONFIG_XEN) || defined(CONFIG_VMX_GUEST) #include <linux/config.h> diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/include/asm-ia64/machvec.h --- a/linux-2.6-xen-sparse/include/asm-ia64/machvec.h Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/machvec.h Mon Sep 18 14:28:16 2006 -0500 @@ -247,21 +247,6 @@ extern void machvec_init (const char *na # error Unknown configuration. Update asm-ia64/machvec.h. # endif /* CONFIG_IA64_GENERIC */ -#ifdef CONFIG_XEN -# define platform_dma_map_sg dma_map_sg -# define platform_dma_unmap_sg dma_unmap_sg -# define platform_dma_mapping_error dma_mapping_error -# define platform_dma_supported dma_supported -# define platform_dma_alloc_coherent dma_alloc_coherent -# define platform_dma_free_coherent dma_free_coherent -# define platform_dma_map_single dma_map_single -# define platform_dma_unmap_single dma_unmap_single -# define platform_dma_sync_single_for_cpu \ - dma_sync_single_for_cpu -# define platform_dma_sync_single_for_device \ - dma_sync_single_for_device -#endif - /* * Declare default routines which aren't declared anywhere else: */ diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/include/asm-ia64/maddr.h --- a/linux-2.6-xen-sparse/include/asm-ia64/maddr.h Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/maddr.h Mon Sep 18 14:28:16 2006 -0500 @@ -81,7 +81,11 @@ mfn_to_local_pfn(unsigned long mfn) #define virt_to_machine(virt) __pa(virt) // for tpmfront.c #define set_phys_to_machine(pfn, mfn) do { } while (0) +#ifdef CONFIG_VMX_GUEST +extern void xen_machphys_update(unsigned long mfn, unsigned long pfn); +#else /* CONFIG_VMX_GUEST */ #define xen_machphys_update(mfn, pfn) do { } while (0) +#endif /* CONFIG_VMX_GUEST */ typedef unsigned long maddr_t; // to compile netback, netfront diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Mon Sep 18 09:23:51 2006 -0400 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Mon Sep 18 14:28:16 2006 -0500 @@ -37,6 +37,7 @@ #include <linux/device.h> #include <linux/notifier.h> #include <linux/mutex.h> +#include <linux/completion.h> #include <xen/interface/xen.h> #include <xen/interface/grant_table.h> #include <xen/interface/io/xenbus.h> @@ -74,6 +75,7 @@ struct xenbus_device { struct xenbus_watch otherend_watch; struct device dev; enum xenbus_state state; + struct completion down; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) @@ -297,4 +299,8 @@ void xenbus_dev_fatal(struct xenbus_devi int __init xenbus_dev_init(void); +char *xenbus_strstate(enum xenbus_state state); +int xenbus_dev_is_online(struct xenbus_device *dev); +int xenbus_frontend_closed(struct xenbus_device *dev); + #endif /* _XEN_XENBUS_H */ diff -r ce9c34c049c5 -r 0bdd578c417f patches/linux-2.6.16.13/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch --- a/patches/linux-2.6.16.13/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Mon Sep 18 09:23:51 2006 -0400 +++ b/patches/linux-2.6.16.13/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Mon Sep 18 14:28:16 2006 -0500 @@ -1,5 +1,3 @@ diff --git a/arch/x86_64/kernel/vmlinux. -diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S -index 7c4de31..ef418b3 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86 @@ -24,7 +22,7 @@ index 7c4de31..ef418b3 100644 /* out-of-line lock text */ .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } -@@ -57,7 +63,7 @@ #endif +@@ -57,17 +63,10 @@ #endif .data : AT(ADDR(.data) - LOAD_OFFSET) { *(.data) CONSTRUCTORS @@ -33,7 +31,17 @@ index 7c4de31..ef418b3 100644 _edata = .; /* End of data section */ -@@ -89,7 +95,7 @@ #define VVIRT_OFFSET (VSYSCALL_ADDR - VS +- __bss_start = .; /* BSS */ +- .bss : AT(ADDR(.bss) - LOAD_OFFSET) { +- *(.bss.page_aligned) +- *(.bss) +- } +- __bss_stop = .; +- + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { +@@ -89,7 +88,7 @@ #define VVIRT_OFFSET (VSYSCALL_ADDR - VS #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) . = VSYSCALL_ADDR; @@ -42,7 +50,7 @@ index 7c4de31..ef418b3 100644 __vsyscall_0 = VSYSCALL_VIRT_ADDR; . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); -@@ -132,7 +138,7 @@ #undef VVIRT +@@ -132,7 +131,7 @@ #undef VVIRT . = ALIGN(8192); /* init_task */ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { *(.data.init_task) @@ -51,7 +59,22 @@ index 7c4de31..ef418b3 100644 . = ALIGN(4096); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { -@@ -235,4 +241,6 @@ #endif +@@ -222,6 +221,14 @@ SECTIONS + . = ALIGN(4096); + __nosave_end = .; + ++ __bss_start = .; /* BSS */ ++ . = ALIGN(4096); ++ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { ++ *(.bss.page_aligned) ++ *(.bss) ++ } ++ __bss_stop = .; ++ + _end = . ; + + /* Sections to be discarded */ +@@ -235,4 +242,6 @@ #endif STABS_DEBUG DWARF_DEBUG diff -r ce9c34c049c5 -r 0bdd578c417f tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/blktap/drivers/block-qcow.c Mon Sep 18 14:28:16 2006 -0500 @@ -235,6 +235,25 @@ static uint32_t gen_cksum(char *ptr, int memcpy(&ret, md, sizeof(uint32_t)); free(md); return ret; +} + +static int get_filesize(char *filename, uint64_t *size, struct stat *st) +{ + int blockfd; + + /*Set to the backing file size*/ + if(S_ISBLK(st->st_mode)) { + blockfd = open(filename, O_RDONLY); + if (blockfd < 0) + return -1; + if (ioctl(blockfd,BLKGETSIZE,size)!=0) { + printf("Unable to get Block device size\n"); + close(blockfd); + return -1; + } + close(blockfd); + } else *size = (st->st_size >> SECTOR_SHIFT); + return 0; } static int qcow_set_key(struct td_state *bs, const char *key) @@ -1204,12 +1223,14 @@ int qcow_create(const char *filename, ui header_size += backing_filename_len; /*Set to the backing file size*/ - size = (st.st_size >> SECTOR_SHIFT); + if(get_filesize(backing_filename, &size, &st)) { + return -1; + } DPRINTF("Backing file size detected: %lld sectors" "(total %lld [%lld MB])\n", - (long long)total_size, - (long long)(total_size << SECTOR_SHIFT), - (long long)(total_size >> 11)); + (long long)size, + (long long)(size << SECTOR_SHIFT), + (long long)(size >> 11)); } else { backing_file = NULL; DPRINTF("Setting file size: %lld (total %lld)\n", diff -r ce9c34c049c5 -r 0bdd578c417f tools/blktap/drivers/tapdisk.c --- a/tools/blktap/drivers/tapdisk.c Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/blktap/drivers/tapdisk.c Mon Sep 18 14:28:16 2006 -0500 @@ -127,18 +127,15 @@ static inline int LOCAL_FD_SET(fd_set *r static inline int LOCAL_FD_SET(fd_set *readfds) { fd_list_entry_t *ptr; - int i; ptr = fd_start; while (ptr != NULL) { if (ptr->tap_fd) { FD_SET(ptr->tap_fd, readfds); - for (i = 0; i < MAX_IOFD; i++) { - if (ptr->io_fd[i]) - FD_SET(ptr->io_fd[i], readfds); - maxfds = (ptr->io_fd[i] > maxfds ? - ptr->io_fd[i]: maxfds); - } + if (ptr->io_fd[READ]) + FD_SET(ptr->io_fd[READ], readfds); + maxfds = (ptr->io_fd[READ] > maxfds ? + ptr->io_fd[READ]: maxfds); maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds); } ptr = ptr->next; @@ -580,7 +577,7 @@ static void get_io_request(struct td_sta int main(int argc, char *argv[]) { - int len, msglen, ret, i; + int len, msglen, ret; char *p, *buf; fd_set readfds, writefds; struct timeval timeout; @@ -633,16 +630,14 @@ int main(int argc, char *argv[]) (fd_set *) 0, &timeout); if (ret > 0) - { + { ptr = fd_start; while (ptr != NULL) { if (FD_ISSET(ptr->tap_fd, &readfds)) get_io_request(ptr->s); - for (i = 0; i < MAX_IOFD; i++) { - if (ptr->io_fd[i] && - FD_ISSET(ptr->io_fd[i], &readfds)) - io_done(ptr->s, i); - } + if (ptr->io_fd[READ] && + FD_ISSET(ptr->io_fd[READ], &readfds)) + io_done(ptr->s, READ); ptr = ptr->next; } diff -r ce9c34c049c5 -r 0bdd578c417f tools/check/check_python --- a/tools/check/check_python Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/check/check_python Mon Sep 18 14:28:16 2006 -0500 @@ -7,4 +7,4 @@ function error { exit 1 } -python -V 2>&1 | cut -d ' ' -f 2 | grep -q -E '^2.2|^2.3|^2.4' || error +python -V 2>&1 | cut -d ' ' -f 2 | grep -q '^2.[2345]' || error diff -r ce9c34c049c5 -r 0bdd578c417f tools/console/client/main.c --- a/tools/console/client/main.c Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/console/client/main.c Mon Sep 18 14:28:16 2006 -0500 @@ -220,7 +220,8 @@ int main(int argc, char **argv) user friendly, we'll bail out here since no data will ever show up on domain-0. */ if (domid == 0) { - err(errno, "Could not read tty from store"); + fprintf(stderr, "Can't specify Domain-0\n"); + exit(EINVAL); } /* Wait a little bit for tty to appear. There is a race diff -r ce9c34c049c5 -r 0bdd578c417f tools/console/daemon/io.c --- a/tools/console/daemon/io.c Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/console/daemon/io.c Mon Sep 18 14:28:16 2006 -0500 @@ -293,12 +293,14 @@ static bool watch_domain(struct domain * bool success; sprintf(domid_str, "dom%u", dom->domid); - if (watch) + if (watch) { success = xs_watch(xs, dom->conspath, domid_str); - else + if (success) + domain_create_ring(dom); + } else { success = xs_unwatch(xs, dom->conspath, domid_str); - if (success) - domain_create_ring(dom); + } + return success; } diff -r ce9c34c049c5 -r 0bdd578c417f tools/debugger/gdb/gdbbuild --- a/tools/debugger/gdb/gdbbuild Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/debugger/gdb/gdbbuild Mon Sep 18 14:28:16 2006 -0500 @@ -1,4 +1,6 @@ #!/bin/sh + +set -e [ "$GDB_MIRROR" ] || GDB_MIRROR="ftp://ftp.gnu.org/gnu/gdb/" @@ -18,7 +20,7 @@ if [ "$MAKE" ]; then if [ "$MAKE" ]; then $MAKE elif which gmake ; then - gmake -j4 CFLAGS=-D__XEN_TOOLS__ + gmake -j4 else - make -j4 CFLAGS=-D__XEN_TOOLS__ + make -j4 fi diff -r ce9c34c049c5 -r 0bdd578c417f tools/examples/vtpm-common.sh --- a/tools/examples/vtpm-common.sh Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/examples/vtpm-common.sh Mon Sep 18 14:28:16 2006 -0500 @@ -47,6 +47,9 @@ else } function vtpm_migrate() { echo "Error: vTPM migration accross machines not implemented." + } + function vtpm_migrate_local() { + echo "Error: local vTPM migration not supported" } function vtpm_migrate_recover() { true @@ -353,6 +356,8 @@ function vtpm_migration_step() { local res=$(vtpm_isLocalAddress $1) if [ "$res" == "0" ]; then vtpm_migrate $1 $2 $3 + else + vtpm_migrate_local fi } diff -r ce9c34c049c5 -r 0bdd578c417f tools/examples/vtpm-impl --- a/tools/examples/vtpm-impl Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/examples/vtpm-impl Mon Sep 18 14:28:16 2006 -0500 @@ -184,3 +184,6 @@ function vtpm_migrate_recover() { echo "Error: Recovery not supported yet" } +function vtpm_migrate_local() { + echo "Error: local vTPM migration not supported" +} diff -r ce9c34c049c5 -r 0bdd578c417f tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/libxc/xenctrl.h Mon Sep 18 14:28:16 2006 -0500 @@ -8,6 +8,11 @@ #ifndef XENCTRL_H #define XENCTRL_H + +/* Tell the Xen public headers we are a user-space tools build. */ +#ifndef __XEN_TOOLS__ +#define __XEN_TOOLS__ 1 +#endif #include <stddef.h> #include <stdint.h> diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/web/SrvBase.py --- a/tools/python/xen/web/SrvBase.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/web/SrvBase.py Mon Sep 18 14:28:16 2006 -0500 @@ -84,6 +84,7 @@ class SrvBase(resource.Resource): try: return op_method(op, req) except Exception, exn: + req.setResponseCode(http.INTERNAL_SERVER_ERROR, "Request failed: " + op) log.exception("Request %s failed.", op) if req.useSxp(): return ['xend.err', str(exn)] diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xend/XendDomain.py Mon Sep 18 14:28:16 2006 -0500 @@ -420,6 +420,10 @@ class XendDomain: """ The following call may raise a XendError exception """ dominfo.testMigrateDevices(True, dst) + if live: + """ Make sure there's memory free for enabling shadow mode """ + dominfo.checkLiveMigrateMemory() + if port == 0: port = xroot.get_xend_relocation_port() try: diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xend/XendDomainInfo.py Mon Sep 18 14:28:16 2006 -0500 @@ -49,6 +49,7 @@ from xen.xend.xenstore.xsutil import Get from xen.xend.xenstore.xsutil import GetDomainPath, IntroduceDomain from xen.xend.xenstore.xswatch import xswatch +from xen.xend import arch """Shutdown code for poweroff.""" DOMAIN_POWEROFF = 0 @@ -1087,6 +1088,15 @@ class XendDomainInfo: ## public: def destroyDevice(self, deviceClass, devid): + if type(devid) is str: + devicePath = '%s/device/%s' % (self.dompath, deviceClass) + for entry in xstransact.List(devicePath): + backend = xstransact.Read('%s/%s' % (devicePath, entry), "backend") + devName = xstransact.Read(backend, "dev") + if devName == devid: + # We found the integer matching our devid, use it instead + devid = entry + break return self.getDeviceController(deviceClass).destroyDevice(devid) @@ -1285,28 +1295,37 @@ class XendDomainInfo: for v in range(0, self.info['max_vcpu_id']+1): xc.vcpu_setaffinity(self.domid, v, self.info['cpus']) + # Use architecture- and image-specific calculations to determine + # the various headrooms necessary, given the raw configured + # values. + # reservation, maxmem, memory, and shadow are all in KiB. + reservation = self.image.getRequiredInitialReservation( + self.info['memory'] * 1024) + maxmem = self.image.getRequiredAvailableMemory( + self.info['maxmem'] * 1024) + memory = self.image.getRequiredAvailableMemory( + self.info['memory'] * 1024) + shadow = self.image.getRequiredShadowMemory( + self.info['shadow_memory'] * 1024, + self.info['maxmem'] * 1024) + + # Round shadow up to a multiple of a MiB, as shadow_mem_control + # takes MiB and we must not round down and end up under-providing. + shadow = ((shadow + 1023) / 1024) * 1024 + # set memory limit - maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024) xc.domain_setmaxmem(self.domid, maxmem) - mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024) - - # get the domain's shadow memory requirement - shadow_kb = self.image.getRequiredShadowMemory(mem_kb) - shadow_kb_req = self.info['shadow_memory'] * 1024 - if shadow_kb_req > shadow_kb: - shadow_kb = shadow_kb_req - shadow_mb = (shadow_kb + 1023) / 1024 - # Make sure there's enough RAM available for the domain - balloon.free(mem_kb + shadow_mb * 1024) + balloon.free(memory + shadow) # Set up the shadow memory - shadow_cur = xc.shadow_mem_control(self.domid, shadow_mb) + shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024) self.info['shadow_memory'] = shadow_cur - # initial memory allocation - xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0) + # initial memory reservation + xc.domain_memory_increase_reservation(self.domid, reservation, 0, + 0) self.createChannels() @@ -1484,6 +1503,19 @@ class XendDomainInfo: self.image.createDeviceModel() ## public: + + def checkLiveMigrateMemory(self): + """ Make sure there's enough memory to migrate this domain """ + overhead_kb = 0 + if arch.type == "x86": + # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than + # the minimum that Xen would allocate if no value were given. + overhead_kb = self.info['vcpus'] * 1024 + self.info['maxmem'] * 4 + overhead_kb = ((overhead_kb + 1023) / 1024) * 1024 + # The domain might already have some shadow memory + overhead_kb -= xc.shadow_mem_control(self.domid) * 1024 + if overhead_kb > 0: + balloon.free(overhead_kb) def testMigrateDevices(self, network, dst): """ Notify all device about intention of migration diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xend/image.py Mon Sep 18 14:28:16 2006 -0500 @@ -143,12 +143,27 @@ class ImageHandler: raise VmError('Building domain failed: ostype=%s dom=%d err=%s' % (self.ostype, self.vm.getDomid(), str(result))) - def getRequiredMemory(self, mem_kb): + def getRequiredAvailableMemory(self, mem_kb): + """@param mem_kb The configured maxmem or memory, in KiB. + @return The corresponding required amount of memory for the domain, + also in KiB. This is normally the given mem_kb, but architecture- or + image-specific code may override this to add headroom where + necessary.""" return mem_kb - def getRequiredShadowMemory(self, mem_kb): - """@return The minimum shadow memory required, in KiB, for a domain - with mem_kb KiB of RAM.""" + def getRequiredInitialReservation(self, mem_kb): + """@param mem_kb The configured memory, in KiB. + @return The corresponding required amount of memory to be free, also + in KiB. This is normally the same as getRequiredAvailableMemory, but + architecture- or image-specific code may override this to + add headroom where necessary.""" + return self.getRequiredAvailableMemory(mem_kb) + + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): + """@param shadow_mem_kb The configured shadow memory, in KiB. + @param maxmem_kb The configured maxmem, in KiB. + @return The corresponding required amount of shadow memory, also in + KiB.""" # PV domains don't need any shadow memory return 0 @@ -418,13 +433,13 @@ class IA64_HVM_ImageHandler(HVMImageHand ostype = "hvm" - def getRequiredMemory(self, mem_kb): + def getRequiredAvailableMemory(self, mem_kb): page_kb = 16 # ROM size for guest firmware, ioreq page and xenstore page extra_pages = 1024 + 2 return mem_kb + extra_pages * page_kb - def getRequiredShadowMemory(self, mem_kb): + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): # Explicit shadow memory is not a concept return 0 @@ -432,19 +447,29 @@ class X86_HVM_ImageHandler(HVMImageHandl ostype = "hvm" - def getRequiredMemory(self, mem_kb): + def getRequiredAvailableMemory(self, mem_kb): + # Add 8 MiB overhead for QEMU's video RAM. + return self.getRequiredInitialReservation(mem_kb) + 8192 + + def getRequiredInitialReservation(self, mem_kb): page_kb = 4 # This was derived emperically: - # 2.4 MB overhead per 1024 MB RAM + 8 MB constant + # 2.4 MB overhead per 1024 MB RAM # + 4 to avoid low-memory condition - extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12; + extra_mb = (2.4/1024) * (mem_kb/1024.0) + 4; extra_pages = int( math.ceil( extra_mb*1024 / page_kb )) return mem_kb + extra_pages * page_kb - def getRequiredShadowMemory(self, mem_kb): + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): + # The given value is the configured value -- we need to include the + # overhead due to getRequiredInitialReservation. + maxmem_kb = self.getRequiredInitialReservation(maxmem_kb) + # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than # the minimum that Xen would allocate if no value were given. - return 1024 * self.vm.getVCpuCount() + mem_kb / 256 + return max(1024 * self.vm.getVCpuCount() + maxmem_kb / 256, + shadow_mem_kb) + _handlers = { "powerpc": { diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xend/server/DevController.py Mon Sep 18 14:28:16 2006 -0500 @@ -207,6 +207,9 @@ class DevController: devid = int(devid) + # Modify online status /before/ updating state (latter is watched by + # drivers, so this ordering avoids a race). + self.writeBackend(devid, 'online', "0") self.writeBackend(devid, 'state', str(xenbusState['Closing'])) @@ -406,7 +409,8 @@ class DevController: 'domain' : self.vm.getName(), 'frontend' : frontpath, 'frontend-id' : "%i" % self.vm.getDomid(), - 'state' : str(xenbusState['Initialising']) + 'state' : str(xenbusState['Initialising']), + 'online' : "1" }) return (backpath, frontpath) diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xend/server/blkif.py Mon Sep 18 14:28:16 2006 -0500 @@ -64,10 +64,14 @@ class BlkifController(DevController): except ValueError: (typ, params) = ("", "") + mode = sxp.child_value(config, 'mode', 'r') + if mode not in ('r', 'w', 'w!'): + raise VmError('Invalid mode') + back = { 'dev' : dev, 'type' : typ, 'params' : params, - 'mode' : sxp.child_value(config, 'mode', 'r') + 'mode' : mode } if security.on(): diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xend/server/pciquirk.py --- a/tools/python/xen/xend/server/pciquirk.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xend/server/pciquirk.py Mon Sep 18 14:28:16 2006 -0500 @@ -1,5 +1,5 @@ from xen.xend.XendLogging import log from xen.xend.XendLogging import log -from xen.xend.XendError import XendError +from xen.xend.XendError import XendError, VmError import sys import os.path from xen.xend.sxp import * diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/addlabel.py --- a/tools/python/xen/xm/addlabel.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/addlabel.py Mon Sep 18 14:28:16 2006 -0500 @@ -20,8 +20,6 @@ """Labeling a domain configuration file or a resoruce. """ import sys, os -import string -import traceback from xen.util import dictio from xen.util import security @@ -33,6 +31,7 @@ def usage(): print " resource. It derives the policy from the running hypervisor" print " if it is not given (optional parameter). If a label already" print " exists for the given domain or resource, then addlabel fails.\n" + security.err("Usage") def validate_config_file(configfile): @@ -134,7 +133,6 @@ def main (argv): for prefix in [".", "/etc/xen"]: configfile = prefix + "/" + configfile if os.path.isfile(configfile): - fd = open(configfile, "rb") break if not validate_config_file(configfile): usage() @@ -147,7 +145,7 @@ def main (argv): usage() except security.ACMError: - traceback.print_exc(limit=1) + sys.exit(-1) if __name__ == '__main__': diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/cfgbootpolicy.py --- a/tools/python/xen/xm/cfgbootpolicy.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/cfgbootpolicy.py Mon Sep 18 14:28:16 2006 -0500 @@ -70,7 +70,7 @@ def determine_kernelversion(user_specifi within_xen_title = 0 within_xen_entry = 0 if len(version_list) > 1: - err("Cannot decide between entries for kernels: " + version_list) + err("Cannot decide between entries for kernels %s" % version_list) elif len(version_list) == 0: err("Cannot find a boot entry candidate (please create a Xen boot entry first).") else: @@ -87,7 +87,6 @@ def insert_policy(boot_file, kernel_vers within_xen_entry = 0 insert_at_end_of_entry = 0 path_prefix = '' - done = False (tmp_fd, tmp_grub) = tempfile.mkstemp() #follow symlink since menue.lst might be linked to grub.conf if stat.S_ISLNK(os.lstat(boot_file)[stat.ST_MODE]): @@ -175,9 +174,10 @@ def main(argv): print "Boot entry created and \'%s\' copied to /boot" % (policy + ".bin") except ACMError: - pass + sys.exit(-1) except: traceback.print_exc(limit=1) + sys.exit(-1) diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/create.py Mon Sep 18 14:28:16 2006 -0500 @@ -1155,7 +1155,7 @@ def create_security_check(config): else: print "Checking resources: (skipped)" except security.ACMError: - traceback.print_exc(limit=1) + sys.exit(-1) return passed @@ -1169,11 +1169,14 @@ def main(argv): if not opts: return + if type(config) == str: + config = sxp.parse(file(config))[0] + if opts.vals.dryrun: PrettyPrint.prettyprint(config) else: if not create_security_check(config): - print "Security configuration prevents domain from starting" + err("Security configuration prevents domain from starting.") else: dom = make_domain(opts, config) if opts.vals.console_autoconnect: diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/dry-run.py --- a/tools/python/xen/xm/dry-run.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/dry-run.py Mon Sep 18 14:28:16 2006 -0500 @@ -18,6 +18,7 @@ """Tests the security settings for a domain and its resources. """ +import sys from xen.util import security from xen.xm import create from xen.xend import sxp @@ -28,14 +29,14 @@ def usage(): print "to see if the domain created by the configfile can access" print "the resources. The status of each resource is listed" print "individually along with the final security decision.\n" + security.err("Usage") def main (argv): - if len(argv) != 2: - usage() - return + try: + if len(argv) != 2: + usage() - try: passed = 0 (opts, config) = create.parseCommandLine(argv) if create.check_domain_label(config, verbose=1): @@ -48,8 +49,10 @@ def main (argv): print "Dry Run: PASSED" else: print "Dry Run: FAILED" + sys.exit(-1) + except security.ACMError: - pass + sys.exit(-1) if __name__ == '__main__': diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/dumppolicy.py --- a/tools/python/xen/xm/dumppolicy.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/dumppolicy.py Mon Sep 18 14:28:16 2006 -0500 @@ -18,7 +18,6 @@ """Display currently enforced policy (low-level hypervisor representation). """ import sys -import traceback from xen.util.security import ACMError, err, dump_policy @@ -31,12 +30,13 @@ def usage(): def main(argv): try: + if len(argv) != 1: + usage() + dump_policy() except ACMError: - pass - except: - traceback.print_exc(limit=1) + sys.exit(-1) if __name__ == '__main__': diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/getlabel.py --- a/tools/python/xen/xm/getlabel.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/getlabel.py Mon Sep 18 14:28:16 2006 -0500 @@ -19,8 +19,6 @@ """Show the label for a domain or resoruce. """ import sys, os, re -import string -import traceback from xen.util import dictio from xen.util import security @@ -28,6 +26,7 @@ def usage(): print "\nUsage: xm getlabel dom <configfile>" print " xm getlabel res <resource>\n" print " This program shows the label for a domain or resource.\n" + security.err("Usage") def get_resource_label(resource): @@ -38,8 +37,7 @@ def get_resource_label(resource): try: access_control = dictio.dict_read("resources", file) except: - print "Resource label file not found" - return + security.err("Resource label file not found") # get the entry and print label if access_control.has_key(resource): @@ -47,7 +45,7 @@ def get_resource_label(resource): label = access_control[resource][1] print "policy="+policy+",label="+label else: - print "Resource not labeled" + security.err("Resource not labeled") def get_domain_label(configfile): @@ -63,8 +61,7 @@ def get_domain_label(configfile): fd = open(file, "rb") break if not fd: - print "Configuration file '"+configfile+"' not found." - return + security.err("Configuration file '"+configfile+"' not found.") # read in the domain config file, finding the label line ac_entry_re = re.compile("^access_control\s*=.*", re.IGNORECASE) @@ -82,8 +79,7 @@ def get_domain_label(configfile): # send error message if we didn't find anything if acline == "": - print "Label does not exist in domain configuration file." - return + security.err("Domain not labeled") # print out the label (title, data) = acline.split("=", 1) @@ -94,19 +90,21 @@ def get_domain_label(configfile): def main (argv): - if len(argv) != 3: - usage() - return + try: + if len(argv) != 3: + usage() - if argv[1].lower() == "dom": - configfile = argv[2] - get_domain_label(configfile) - elif argv[1].lower() == "res": - resource = argv[2] - get_resource_label(resource) - else: - usage() + if argv[1].lower() == "dom": + configfile = argv[2] + get_domain_label(configfile) + elif argv[1].lower() == "res": + resource = argv[2] + get_resource_label(resource) + else: + usage() + except security.ACMError: + sys.exit(-1) if __name__ == '__main__': main(sys.argv) diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/labels.py --- a/tools/python/xen/xm/labels.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/labels.py Mon Sep 18 14:28:16 2006 -0500 @@ -70,10 +70,12 @@ def main(argv): labels.sort() for label in labels: print label + except ACMError: - pass + sys.exit(-1) except: traceback.print_exc(limit=1) + sys.exit(-1) if __name__ == '__main__': diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/loadpolicy.py --- a/tools/python/xen/xm/loadpolicy.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/loadpolicy.py Mon Sep 18 14:28:16 2006 -0500 @@ -34,11 +34,12 @@ def main(argv): if len(argv) != 2: usage() load_policy(argv[1]) + except ACMError: - pass + sys.exit(-1) except: traceback.print_exc(limit=1) - + sys.exit(-1) if __name__ == '__main__': diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/makepolicy.py --- a/tools/python/xen/xm/makepolicy.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/makepolicy.py Mon Sep 18 14:28:16 2006 -0500 @@ -37,9 +37,10 @@ def main(argv): make_policy(argv[1]) except ACMError: - pass + sys.exit(-1) except: traceback.print_exc(limit=1) + sys.exit(-1) diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/resources.py --- a/tools/python/xen/xm/resources.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/resources.py Mon Sep 18 14:28:16 2006 -0500 @@ -18,8 +18,7 @@ """List the resource label information from the global resource label file """ -import sys, os -import string +import sys from xen.util import dictio from xen.util import security @@ -27,6 +26,7 @@ def usage(): print "\nUsage: xm resource\n" print " This program lists information for each resource in the" print " global resource label file\n" + security.err("Usage") def print_resource_data(access_control): @@ -41,14 +41,19 @@ def print_resource_data(access_control): def main (argv): try: - file = security.res_label_filename - access_control = dictio.dict_read("resources", file) - except: - print "Resource file not found." - return + if len(argv) != 1: + usage() - print_resource_data(access_control) + try: + file = security.res_label_filename + access_control = dictio.dict_read("resources", file) + except: + security.err("Error reading resource file.") + print_resource_data(access_control) + + except security.ACMError: + sys.exit(-1) if __name__ == '__main__': main(sys.argv) diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/rmlabel.py --- a/tools/python/xen/xm/rmlabel.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/rmlabel.py Mon Sep 18 14:28:16 2006 -0500 @@ -19,8 +19,6 @@ """Remove a label from a domain configuration file or a resoruce. """ import sys, os, re -import string -import traceback from xen.util import dictio from xen.util import security @@ -31,6 +29,7 @@ def usage(): print " for a domain or from the global resource label file for a" print " resource. If the label does not exist for the given domain or" print " resource, then rmlabel fails.\n" + security.err("Usage") def rm_resource_label(resource): @@ -48,7 +47,7 @@ def rm_resource_label(resource): del access_control[resource] dictio.dict_write(access_control, "resources", file) else: - security.err("Label does not exist in resource label file.") + security.err("Resource not labeled.") def rm_domain_label(configfile): @@ -85,7 +84,7 @@ def rm_domain_label(configfile): # send error message if we didn't find anything to remove if not removed: - security.err("Label does not exist in domain configuration file.") + security.err("Domain not labeled.") # write the data back out to the file fd = open(file, "wb") @@ -97,7 +96,6 @@ def main (argv): try: if len(argv) != 3: usage() - return if argv[1].lower() == "dom": configfile = argv[2] @@ -109,7 +107,7 @@ def main (argv): usage() except security.ACMError: - traceback.print_exc(limit=1) + sys.exit(-1) if __name__ == '__main__': diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/sysrq.py --- a/tools/python/xen/xm/sysrq.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/sysrq.py Mon Sep 18 14:28:16 2006 -0500 @@ -24,8 +24,9 @@ def main(argv): return # no options for the moment - if len(args) < 1: opts.err('Missing domain') - if len(args) < 2: opts.err('Missing sysrq character') + if len(args) != 2: + opts.usage() + sys.exit(1) dom = args[0] req = ord(args[1][0]) server.xend.domain.send_sysrq(dom, req) diff -r ce9c34c049c5 -r 0bdd578c417f tools/python/xen/xm/tests/test_create.py --- a/tools/python/xen/xm/tests/test_create.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/python/xen/xm/tests/test_create.py Mon Sep 18 14:28:16 2006 -0500 @@ -51,6 +51,7 @@ class test_create(unittest.TestCase): 'path' : '.:/etc/xen', 'builder' : 'linux', 'nics' : -1, + 'vncunused' : 1, 'xauthority': xen.xm.create.get_xauthority(), }) @@ -101,6 +102,7 @@ on_crash = 'destroy' 'path' : '.:/etc/xen', 'builder' : 'linux', + 'vncunused' : 1, 'xauthority' : xen.xm.create.get_xauthority(), }) @@ -140,6 +142,7 @@ cpu_weight = 0.75 'builder' : 'linux', 'nics' : -1, + 'vncunused' : 1, 'xauthority' : xen.xm.create.get_xauthority(), }) @@ -182,6 +185,7 @@ ne2000=0 xen.xm.create.VNC_BASE_PORT + xen.xm.create.choose_vnc_display())), 'vnc' : 1, + 'vncunused' : 1, 'vncviewer' : 1, 'xm_file' : fname, diff -r ce9c34c049c5 -r 0bdd578c417f tools/xenmon/xenmon.py --- a/tools/xenmon/xenmon.py Mon Sep 18 09:23:51 2006 -0400 +++ b/tools/xenmon/xenmon.py Mon Sep 18 14:28:16 2006 -0500 @@ -672,6 +672,9 @@ def main(): parser = setup_cmdline_parser() (options, args) = parser.parse_args() + if options.mspersample < 0: + parser.error("option --ms_per_sample: invalid negative value: '%d'" % + options.mspersample) start_xenbaked() if options.live: diff -r ce9c34c049c5 -r 0bdd578c417f unmodified_drivers/linux-2.6/mkbuildtree --- a/unmodified_drivers/linux-2.6/mkbuildtree Mon Sep 18 09:23:51 2006 -0400 +++ b/unmodified_drivers/linux-2.6/mkbuildtree Mon Sep 18 14:28:16 2006 -0500 @@ -42,6 +42,12 @@ i[34567]86) ln -sf ${XL}/include/asm-i386/mach-xen/asm/synch_bitops.h include/asm ln -sf ${XL}/include/asm-i386/mach-xen/asm/maddr.h include/asm ;; +"ia64") + ln -sf ${XL}/include/asm-ia64/hypervisor.h include/asm + ln -sf ${XL}/include/asm-ia64/hypercall.h include/asm + ln -sf ${XL}/include/asm-ia64/synch_bitops.h include/asm + ln -sf ${XL}/include/asm-ia64/maddr.h include/asm + ;; *) echo unknown architecture $uname exit 1 diff -r ce9c34c049c5 -r 0bdd578c417f unmodified_drivers/linux-2.6/platform-pci/platform-pci.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Mon Sep 18 09:23:51 2006 -0400 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Mon Sep 18 14:28:16 2006 -0500 @@ -17,6 +17,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * */ + #include <linux/module.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -25,6 +26,8 @@ #include <linux/init.h> #include <linux/version.h> #include <linux/interrupt.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> #include <asm/system.h> #include <asm/io.h> #include <asm/irq.h> @@ -47,7 +50,6 @@ MODULE_DESCRIPTION("Xen platform PCI dev MODULE_DESCRIPTION("Xen platform PCI device"); MODULE_LICENSE("GPL"); - unsigned long *phys_to_machine_mapping; EXPORT_SYMBOL(phys_to_machine_mapping); @@ -115,10 +117,11 @@ unsigned long alloc_xen_mmio(unsigned lo return addr; } +#ifndef __ia64__ /* Lifted from hvmloader.c */ static int get_hypercall_stubs(void) { - uint32_t eax, ebx, ecx, edx, pages, msr, order, i; + uint32_t eax, ebx, ecx, edx, pages, msr, i; char signature[13]; cpuid(0x40000000, &eax, &ebx, &ecx, &edx); @@ -141,25 +144,28 @@ static int get_hypercall_stubs(void) cpuid(0x40000002, &pages, &msr, &ecx, &edx); - i = pages - 1; - for (order = 0; i != 0; order++) - i >>= 1; - - printk(KERN_INFO "Hypercall area is %u pages (order %u allocation)\n", - pages, order); - - hypercall_stubs = (void *)__get_free_pages(GFP_KERNEL, order); + printk(KERN_INFO "Hypercall area is %u pages.\n", pages); + + /* Use __vmalloc() because vmalloc_exec() is not an exported symbol. */ + /* PAGE_KERNEL_EXEC also is not exported, hence we use PAGE_KERNEL. */ + /* hypercall_stubs = vmalloc_exec(pages * PAGE_SIZE); */ + hypercall_stubs = __vmalloc(pages * PAGE_SIZE, + GFP_KERNEL | __GFP_HIGHMEM, + __pgprot(__PAGE_KERNEL & ~_PAGE_NX)); if (hypercall_stubs == NULL) return -ENOMEM; - for (i = 0; i < pages; i++) - wrmsrl(ebx, - virt_to_phys(hypercall_stubs) + /* base address */ - (i << PAGE_SHIFT) + /* offset of page @i */ - i); /* request page @i */ + for (i = 0; i < pages; i++) { + unsigned long pfn; + pfn = vmalloc_to_pfn((char *)hypercall_stubs + i*PAGE_SIZE); + wrmsrl(msr, ((u64)pfn << PAGE_SHIFT) + i); + } return 0; } +#else /* __ia64__ */ +#define get_hypercall_stubs() (0) +#endif static int __devinit platform_pci_init(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -205,7 +211,6 @@ static int __devinit platform_pci_init(s if (ret < 0) goto out; - if ((ret = init_xen_info())) goto out; diff -r ce9c34c049c5 -r 0bdd578c417f xen/Makefile --- a/xen/Makefile Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/Makefile Mon Sep 18 14:28:16 2006 -0500 @@ -90,10 +90,9 @@ include/xen/acm_policy.h: echo "#endif") >$@ # compile.h contains dynamic build info. Rebuilt on every 'make' invocation. -include/xen/compile.h: LANG=C include/xen/compile.h: include/xen/compile.h.in - @sed -e 's/@@date@@/$(shell date)/g' \ - -e 's/@@time@@/$(shell date +%T)/g' \ + @sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \ + -e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \ -e 's/@@whoami@@/$(shell whoami)/g' \ -e 's/@@domain@@/$(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown]))/g' \ -e 's/@@hostname@@/$(shell hostname)/g' \ diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/vmx/mmio.c --- a/xen/arch/ia64/vmx/mmio.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/vmx/mmio.c Mon Sep 18 14:28:16 2006 -0500 @@ -213,6 +213,7 @@ static void mmio_access(VCPU *vcpu, u64 iot=__gpfn_is_io(vcpu->domain, src_pa>>PAGE_SHIFT); v_plat = vmx_vcpu_get_plat(vcpu); + perfc_incra(vmx_mmio_access, iot >> 56); switch (iot) { case GPFN_PIB: if(!dir) diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/vmx/pal_emul.c --- a/xen/arch/ia64/vmx/pal_emul.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/vmx/pal_emul.c Mon Sep 18 14:28:16 2006 -0500 @@ -389,6 +389,7 @@ pal_emul(VCPU *vcpu) { vcpu_get_gr_nat(vcpu,28,&gr28); //bank1 + perfc_incrc(vmx_pal_emul); switch (gr28) { case PAL_CACHE_FLUSH: result = pal_cache_flush(vcpu); diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/vmx/vmx_interrupt.c --- a/xen/arch/ia64/vmx/vmx_interrupt.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_interrupt.c Mon Sep 18 14:28:16 2006 -0500 @@ -92,6 +92,7 @@ inject_guest_interruption(VCPU *vcpu, u6 u64 viva; REGS *regs; ISR pt_isr; + perfc_incra(vmx_inject_guest_interruption, vec >> 8); regs=vcpu_regs(vcpu); // clear cr.isr.ri pt_isr.val = VMX(vcpu,cr_isr); diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/vmx/vmx_phy_mode.c --- a/xen/arch/ia64/vmx/vmx_phy_mode.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Mon Sep 18 14:28:16 2006 -0500 @@ -262,6 +262,7 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_ int act; REGS * regs=vcpu_regs(vcpu); act = mm_switch_action(old_psr, new_psr); + perfc_incra(vmx_switch_mm_mode, act); switch (act) { case SW_V2P: // printf("V -> P mode transition: (0x%lx -> 0x%lx)\n", diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/vmx/vmx_process.c --- a/xen/arch/ia64/vmx/vmx_process.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_process.c Mon Sep 18 14:28:16 2006 -0500 @@ -115,6 +115,7 @@ vmx_ia64_handle_break (unsigned long ifa struct domain *d = current->domain; struct vcpu *v = current; + perfc_incrc(vmx_ia64_handle_break); #ifdef CRASH_DEBUG if ((iim == 0 || iim == CDB_BREAK_NUM) && !user_mode(regs) && IS_VMM_ADDRESS(regs->cr_iip)) { diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/vmx/vmx_virt.c --- a/xen/arch/ia64/vmx/vmx_virt.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_virt.c Mon Sep 18 14:28:16 2006 -0500 @@ -1398,120 +1398,159 @@ if ( (cause == 0xff && opcode == 0x1e000 switch(cause) { case EVENT_RSM: + perfc_incrc(vmx_rsm); status=vmx_emul_rsm(vcpu, inst); break; case EVENT_SSM: + perfc_incrc(vmx_ssm); status=vmx_emul_ssm(vcpu, inst); break; case EVENT_MOV_TO_PSR: + perfc_incrc(vmx_mov_to_psr); status=vmx_emul_mov_to_psr(vcpu, inst); break; case EVENT_MOV_FROM_PSR: + perfc_incrc(vmx_mov_from_psr); status=vmx_emul_mov_from_psr(vcpu, inst); break; case EVENT_MOV_FROM_CR: + perfc_incrc(vmx_mov_from_cr); status=vmx_emul_mov_from_cr(vcpu, inst); break; case EVENT_MOV_TO_CR: + perfc_incrc(vmx_mov_to_cr); status=vmx_emul_mov_to_cr(vcpu, inst); break; case EVENT_BSW_0: + perfc_incrc(vmx_bsw0); status=vmx_emul_bsw0(vcpu, inst); break; case EVENT_BSW_1: + perfc_incrc(vmx_bsw1); status=vmx_emul_bsw1(vcpu, inst); break; case EVENT_COVER: + perfc_incrc(vmx_cover); status=vmx_emul_cover(vcpu, inst); break; case EVENT_RFI: + perfc_incrc(vmx_rfi); status=vmx_emul_rfi(vcpu, inst); break; case EVENT_ITR_D: + perfc_incrc(vmx_itr_d); status=vmx_emul_itr_d(vcpu, inst); break; case EVENT_ITR_I: + perfc_incrc(vmx_itr_i); status=vmx_emul_itr_i(vcpu, inst); break; case EVENT_PTR_D: + perfc_incrc(vmx_ptr_d); status=vmx_emul_ptr_d(vcpu, inst); break; case EVENT_PTR_I: + perfc_incrc(vmx_ptr_i); status=vmx_emul_ptr_i(vcpu, inst); break; case EVENT_ITC_D: + perfc_incrc(vmx_itc_d); status=vmx_emul_itc_d(vcpu, inst); break; case EVENT_ITC_I: + perfc_incrc(vmx_itc_i); status=vmx_emul_itc_i(vcpu, inst); break; case EVENT_PTC_L: + perfc_incrc(vmx_ptc_l); status=vmx_emul_ptc_l(vcpu, inst); break; case EVENT_PTC_G: + perfc_incrc(vmx_ptc_g); status=vmx_emul_ptc_g(vcpu, inst); break; case EVENT_PTC_GA: + perfc_incrc(vmx_ptc_ga); status=vmx_emul_ptc_ga(vcpu, inst); break; case EVENT_PTC_E: + perfc_incrc(vmx_ptc_e); status=vmx_emul_ptc_e(vcpu, inst); break; case EVENT_MOV_TO_RR: + perfc_incrc(vmx_mov_to_rr); status=vmx_emul_mov_to_rr(vcpu, inst); break; case EVENT_MOV_FROM_RR: + perfc_incrc(vmx_mov_from_rr); status=vmx_emul_mov_from_rr(vcpu, inst); break; case EVENT_THASH: + perfc_incrc(vmx_thash); status=vmx_emul_thash(vcpu, inst); break; case EVENT_TTAG: + perfc_incrc(vmx_ttag); status=vmx_emul_ttag(vcpu, inst); break; case EVENT_TPA: + perfc_incrc(vmx_tpa); status=vmx_emul_tpa(vcpu, inst); break; case EVENT_TAK: + perfc_incrc(vmx_tak); status=vmx_emul_tak(vcpu, inst); break; case EVENT_MOV_TO_AR_IMM: + perfc_incrc(vmx_mov_to_ar_imm); status=vmx_emul_mov_to_ar_imm(vcpu, inst); break; case EVENT_MOV_TO_AR: + perfc_incrc(vmx_mov_to_ar_reg); status=vmx_emul_mov_to_ar_reg(vcpu, inst); break; case EVENT_MOV_FROM_AR: + perfc_incrc(vmx_mov_from_ar_reg); status=vmx_emul_mov_from_ar_reg(vcpu, inst); break; case EVENT_MOV_TO_DBR: + perfc_incrc(vmx_mov_to_dbr); status=vmx_emul_mov_to_dbr(vcpu, inst); break; case EVENT_MOV_TO_IBR: + perfc_incrc(vmx_mov_to_ibr); status=vmx_emul_mov_to_ibr(vcpu, inst); break; case EVENT_MOV_TO_PMC: + perfc_incrc(vmx_mov_to_pmc); status=vmx_emul_mov_to_pmc(vcpu, inst); break; case EVENT_MOV_TO_PMD: + perfc_incrc(vmx_mov_to_pmd); status=vmx_emul_mov_to_pmd(vcpu, inst); break; case EVENT_MOV_TO_PKR: + perfc_incrc(vmx_mov_to_pkr); status=vmx_emul_mov_to_pkr(vcpu, inst); break; case EVENT_MOV_FROM_DBR: + perfc_incrc(vmx_mov_from_dbr); status=vmx_emul_mov_from_dbr(vcpu, inst); break; case EVENT_MOV_FROM_IBR: + perfc_incrc(vmx_mov_from_ibr); status=vmx_emul_mov_from_ibr(vcpu, inst); break; case EVENT_MOV_FROM_PMC: + perfc_incrc(vmx_mov_from_pmc); status=vmx_emul_mov_from_pmc(vcpu, inst); break; case EVENT_MOV_FROM_PKR: + perfc_incrc(vmx_mov_from_pkr); status=vmx_emul_mov_from_pkr(vcpu, inst); break; case EVENT_MOV_FROM_CPUID: + perfc_incrc(vmx_mov_from_cpuid); status=vmx_emul_mov_from_cpuid(vcpu, inst); break; case EVENT_VMSW: diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/xen/dom_fw.c Mon Sep 18 14:28:16 2006 -0500 @@ -816,9 +816,12 @@ dom_fw_init(struct domain *d, FW_HYPERCALL_SAL_RETURN, 0, hypercalls_imva); /* Fill in the FPSWA interface: */ - tables->fpswa_inf.revision = fpswa_interface->revision; - dom_fpswa_hypercall_patch(d, hypercalls_imva); - tables->fpswa_inf.fpswa = (void *)FW_HYPERCALL_FPSWA_ENTRY_PADDR; + if (fpswa_interface) { + tables->fpswa_inf.revision = fpswa_interface->revision; + dom_fpswa_hypercall_patch(d, hypercalls_imva); + tables->fpswa_inf.fpswa = + (void *)FW_HYPERCALL_FPSWA_ENTRY_PADDR; + } i = 0; /* Used by MAKE_MD */ @@ -867,7 +870,8 @@ dom_fw_init(struct domain *d, bp->console_info.num_rows = 25; bp->console_info.orig_x = 0; bp->console_info.orig_y = 24; - bp->fpswa = FW_FIELD_MPA(fpswa_inf); + if (fpswa_interface) + bp->fpswa = FW_FIELD_MPA(fpswa_inf); } void dom_fw_setup(struct domain *d, unsigned long bp_mpa, unsigned long maxmem) diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/xen/domain.c Mon Sep 18 14:28:16 2006 -0500 @@ -54,6 +54,7 @@ static unsigned int dom0_max_vcpus = 1; static unsigned int dom0_max_vcpus = 1; integer_param("dom0_max_vcpus", dom0_max_vcpus); +extern int opt_dom0_vcpus_pin; extern unsigned long running_on_sim; extern char dom0_command_line[]; @@ -1020,9 +1021,12 @@ int construct_dom0(struct domain *d, dom0_max_vcpus = MAX_VIRT_CPUS; printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus); - for ( i = 1; i < dom0_max_vcpus; i++ ) + for ( i = 1; i < dom0_max_vcpus; i++ ) { if (alloc_vcpu(d, i, i) == NULL) printf ("Cannot allocate dom0 vcpu %d\n", i); + else if (opt_dom0_vcpus_pin) + d->vcpu[i]->cpu_affinity = cpumask_of_cpu(i); + } /* Copy the OS image. */ loaddomainelfimage(d,image_start); diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/xen/fw_emul.c --- a/xen/arch/ia64/xen/fw_emul.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/xen/fw_emul.c Mon Sep 18 14:28:16 2006 -0500 @@ -395,28 +395,124 @@ efi_emulate_get_time( unsigned long tv_addr, unsigned long tc_addr, IA64FAULT *fault) { - unsigned long tv = 0, tc = 0; + unsigned long tv, tc = 0; struct page_info *tv_page = NULL; struct page_info *tc_page = NULL; - efi_status_t status; + efi_status_t status = 0; //printf("efi_get_time(%016lx,%016lx) called\n", tv_addr, tc_addr); tv = efi_translate_domain_addr(tv_addr, fault, &tv_page); if (*fault != IA64_NO_FAULT) - return 0; + goto errout; if (tc_addr) { tc = efi_translate_domain_addr(tc_addr, fault, &tc_page); - if (*fault != IA64_NO_FAULT) { - put_page(tv_page); - return 0; - } - } + if (*fault != IA64_NO_FAULT) + goto errout; + } + //printf("efi_get_time(%016lx,%016lx) translated to xen virtual address\n", tv, tc); status = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t *) tc); //printf("efi_get_time returns %lx\n", status); + +errout: if (tc_page != NULL) put_page(tc_page); - put_page(tv_page); + if (tv_page != NULL) + put_page(tv_page); + + return status; +} + +static efi_status_t +efi_emulate_set_time( + unsigned long tv_addr, IA64FAULT *fault) +{ + unsigned long tv; + struct page_info *tv_page = NULL; + efi_status_t status = 0; + + if (current->domain != dom0) + return EFI_UNSUPPORTED; + + tv = efi_translate_domain_addr(tv_addr, fault, &tv_page); + if (*fault != IA64_NO_FAULT) + goto errout; + + status = (*efi.set_time)((efi_time_t *)tv); + +errout: + if (tv_page != NULL) + put_page(tv_page); + + return status; +} + +static efi_status_t +efi_emulate_get_wakeup_time( + unsigned long e_addr, unsigned long p_addr, + unsigned long tv_addr, IA64FAULT *fault) +{ + unsigned long enabled, pending, tv; + struct page_info *e_page = NULL, *p_page = NULL, + *tv_page = NULL; + efi_status_t status = 0; + + if (current->domain != dom0) + return EFI_UNSUPPORTED; + + if (!e_addr || !p_addr || !tv_addr) + return EFI_INVALID_PARAMETER; + + enabled = efi_translate_domain_addr(e_addr, fault, &e_page); + if (*fault != IA64_NO_FAULT) + goto errout; + pending = efi_translate_domain_addr(p_addr, fault, &p_page); + if (*fault != IA64_NO_FAULT) + goto errout; + tv = efi_translate_domain_addr(tv_addr, fault, &tv_page); + if (*fault != IA64_NO_FAULT) + goto errout; + + status = (*efi.get_wakeup_time)((efi_bool_t *)enabled, + (efi_bool_t *)pending, + (efi_time_t *)tv); + +errout: + if (e_page != NULL) + put_page(e_page); + if (p_page != NULL) + put_page(p_page); + if (tv_page != NULL) + put_page(tv_page); + + return status; +} + +static efi_status_t +efi_emulate_set_wakeup_time( + unsigned long enabled, unsigned long tv_addr, + IA64FAULT *fault) +{ + unsigned long tv = 0; + struct page_info *tv_page = NULL; + efi_status_t status = 0; + + if (current->domain != dom0) + return EFI_UNSUPPORTED; + + if (tv_addr) { + tv = efi_translate_domain_addr(tv_addr, fault, &tv_page); + if (*fault != IA64_NO_FAULT) + goto errout; + } + + status = (*efi.set_wakeup_time)((efi_bool_t)enabled, + (efi_time_t *)tv); + +errout: + if (tv_page != NULL) + put_page(tv_page); + return status; } @@ -663,6 +759,24 @@ efi_emulator (struct pt_regs *regs, IA64 vcpu_get_gr(v,33), fault); break; + case FW_HYPERCALL_EFI_SET_TIME: + status = efi_emulate_set_time ( + vcpu_get_gr(v,32), + fault); + break; + case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: + status = efi_emulate_get_wakeup_time ( + vcpu_get_gr(v,32), + vcpu_get_gr(v,33), + vcpu_get_gr(v,34), + fault); + break; + case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: + status = efi_emulate_set_wakeup_time ( + vcpu_get_gr(v,32), + vcpu_get_gr(v,33), + fault); + break; case FW_HYPERCALL_EFI_GET_VARIABLE: status = efi_emulate_get_variable ( vcpu_get_gr(v,32), @@ -695,10 +809,6 @@ efi_emulator (struct pt_regs *regs, IA64 (u32) vcpu_get_gr(v,34), (efi_memory_desc_t *) vcpu_get_gr(v,35)); break; - case FW_HYPERCALL_EFI_SET_TIME: - case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: - case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: - // FIXME: need fixes in efi.h from 2.6.9 case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: // FIXME: need fixes in efi.h from 2.6.9 status = EFI_UNSUPPORTED; diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/xen/hypercall.c Mon Sep 18 14:28:16 2006 -0500 @@ -211,6 +211,7 @@ fw_hypercall (struct pt_regs *regs) IA64FAULT fault; unsigned long index = regs->r2 & FW_HYPERCALL_NUM_MASK_HIGH; + perfc_incra(fw_hypercall, index >> 8); switch (index) { case FW_HYPERCALL_PAL_CALL: //printf("*** PAL hypercall: index=%d\n",regs->r28); diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/xen/mm.c Mon Sep 18 14:28:16 2006 -0500 @@ -173,6 +173,9 @@ #include <asm/vcpu.h> #include <asm/shadow.h> #include <linux/efi.h> +#include <xen/guest_access.h> +#include <asm/page.h> +#include <public/memory.h> static void domain_page_flush(struct domain* d, unsigned long mpaddr, unsigned long old_mfn, unsigned long new_mfn); @@ -1752,6 +1755,83 @@ int memory_is_conventional_ram(paddr_t p return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY); } + +long +arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) +{ + switch (op) { + case XENMEM_add_to_physmap: + { + struct xen_add_to_physmap xatp; + unsigned long prev_mfn, mfn = 0, gpfn; + struct domain *d; + + if (copy_from_guest(&xatp, arg, 1)) + return -EFAULT; + + if (xatp.domid == DOMID_SELF) { + d = current->domain; + get_knownalive_domain(d); + } + else if (!IS_PRIV(current->domain)) + return -EPERM; + else if ((d = find_domain_by_id(xatp.domid)) == NULL) + return -ESRCH; + + /* This hypercall is used for VT-i domain only */ + if (!VMX_DOMAIN(d->vcpu[0])) { + put_domain(d); + return -ENOSYS; + } + + switch (xatp.space) { + case XENMAPSPACE_shared_info: + if (xatp.idx == 0) + mfn = virt_to_mfn(d->shared_info); + break; + case XENMAPSPACE_grant_table: + if (xatp.idx < NR_GRANT_FRAMES) + mfn = virt_to_mfn(d->grant_table->shared) + xatp.idx; + break; + default: + break; + } + + LOCK_BIGLOCK(d); + + /* Remove previously mapped page if it was present. */ + prev_mfn = gmfn_to_mfn(d, xatp.gpfn); + if (prev_mfn && mfn_valid(prev_mfn)) { + if (IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn))) + /* Xen heap frames are simply unhooked from this phys slot. */ + guest_physmap_remove_page(d, xatp.gpfn, prev_mfn); + else + /* Normal domain memory is freed, to avoid leaking memory. */ + guest_remove_page(d, xatp.gpfn); + } + + /* Unmap from old location, if any. */ + gpfn = get_gpfn_from_mfn(mfn); + if (gpfn != INVALID_M2P_ENTRY) + guest_physmap_remove_page(d, gpfn, mfn); + + /* Map at new location. */ + guest_physmap_add_page(d, xatp.gpfn, mfn); + + UNLOCK_BIGLOCK(d); + + put_domain(d); + + break; + } + + default: + return -ENOSYS; + } + + return 0; +} + /* * Local variables: * mode: C diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/xen/vhpt.c Mon Sep 18 14:28:16 2006 -0500 @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/vhpt.h> #include <asm/vcpu.h> +#include <asm/vmmu.h> /* Defined in tlb.c */ extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits); @@ -131,15 +132,25 @@ void vhpt_init(void) void vcpu_flush_vtlb_all(struct vcpu *v) { - /* First VCPU tlb. */ - vcpu_purge_tr_entry(&PSCBX(v,dtlb)); - vcpu_purge_tr_entry(&PSCBX(v,itlb)); - - /* Then VHPT. */ - vhpt_flush (); - - /* Then mTLB. */ - local_flush_tlb_all (); + if (VMX_DOMAIN(v)) { + /* This code may be call for remapping shared_info and + grant_table share page from guest_physmap_remove_page() + in arch_memory_op() XENMEM_add_to_physmap to realize + PV-on-HVM feature. */ + /* Purge vTLB for VT-i domain */ + thash_purge_all(v); + } + else { + /* First VCPU tlb. */ + vcpu_purge_tr_entry(&PSCBX(v,dtlb)); + vcpu_purge_tr_entry(&PSCBX(v,itlb)); + + /* Then VHPT. */ + vhpt_flush(); + + /* Then mTLB. */ + local_flush_tlb_all(); + } /* We could clear bit in d->domain_dirty_cpumask only if domain d in not running on this processor. There is currently no easy way to diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/ia64/xen/xensetup.c Mon Sep 18 14:28:16 2006 -0500 @@ -49,6 +49,10 @@ extern void init_IRQ(void); extern void init_IRQ(void); extern void trap_init(void); +/* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */ +unsigned int opt_dom0_vcpus_pin = 0; +boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin); + /* opt_nosmp: If true, secondary processors are ignored. */ static int opt_nosmp = 0; boolean_param("nosmp", opt_nosmp); @@ -517,6 +521,10 @@ printk("num_online_cpus=%d, max_cpus=%d\ 0) != 0) panic("Could not set up DOM0 guest OS\n"); + /* PIN domain0 VCPU 0 on CPU 0. */ + if (opt_dom0_vcpus_pin) + dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0); + if (!running_on_sim) // slow on ski and pages are pre-initialized to zero scrub_heap_pages(); diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/Rules.mk Mon Sep 18 14:28:16 2006 -0500 @@ -42,6 +42,9 @@ ifeq ($(TARGET_SUBARCH),x86_64) ifeq ($(TARGET_SUBARCH),x86_64) CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks CFLAGS += -fno-asynchronous-unwind-tables +# -fvisibility=hidden reduces -fpic cost, if it's available +CFLAGS += $(shell $(CC) -v --help 2>&1 | grep " -fvisibility=" | \ + grep -q hidden && echo "-fvisibility=hidden") LDFLAGS += -m elf_x86_64 x86_32 := n x86_64 := y diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/io.c Mon Sep 18 14:28:16 2006 -0500 @@ -646,9 +646,13 @@ static void hvm_mmio_assist(struct cpu_u break; case INSTR_BT: - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - + if ( src & REGISTER ) + { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + } + else if ( src & IMMEDIATE ) + value = mmio_opp->immediate; if (p->u.data & (1 << (value & ((1 << 5) - 1)))) regs->eflags |= X86_EFLAGS_CF; else diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/platform.c Mon Sep 18 14:28:16 2006 -0500 @@ -652,6 +652,23 @@ static int hvm_decode(int realmode, unsi instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); return DECODE_success; + case 0xBA: + if (((opcode[1] >> 3) & 7) == 4) /* BT $imm8, m16/32/64 */ + { + instr->instr = INSTR_BT; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + instr->immediate = + (signed char)get_immediate(realmode, opcode+1, BYTE); + instr->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE); + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + return DECODE_success; + } + else + { + printf("0f %x, This opcode subtype isn't handled yet\n", *opcode); + return DECODE_failure; + } + default: printf("0f %x, This opcode isn't handled yet\n", *opcode); return DECODE_failure; @@ -1002,10 +1019,17 @@ void handle_mmio(unsigned long va, unsig mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */ mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */ - index = operand_index(mmio_inst.operand[0]); - size = operand_size(mmio_inst.operand[0]); - value = get_reg_value(size, index, 0, regs); - + if ( mmio_inst.operand[0] & REGISTER ) + { + index = operand_index(mmio_inst.operand[0]); + size = operand_size(mmio_inst.operand[0]); + value = get_reg_value(size, index, 0, regs); + } + else if ( mmio_inst.operand[0] & IMMEDIATE ) + { + mmio_opp->immediate = mmio_inst.immediate; + value = mmio_inst.immediate; + } send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1, mmio_inst.op_size, 0, IOREQ_READ, 0); break; diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/svm/intr.c Mon Sep 18 14:28:16 2006 -0500 @@ -79,22 +79,22 @@ asmlinkage void svm_intr_assist(void) ASSERT(vmcb); /* Check if an Injection is active */ - /* Previous Interrupt delivery caused this Intercept? */ - if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) { - v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector; + /* Previous Interrupt delivery caused this Intercept? */ + if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) { + v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector; // printk("Injecting PF#: saving IRQ from ExitInfo\n"); - vmcb->exitintinfo.bytes = 0; - re_injecting = 1; - } + vmcb->exitintinfo.bytes = 0; + re_injecting = 1; + } /* Guest's interrputs masked? */ rflags = vmcb->rflags; if (irq_masked(rflags)) { HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags); - /* bail out, we won't be injecting an interrupt this time */ - return; + /* bail out, we won't be injecting an interrupt this time */ + return; } - + /* Previous interrupt still pending? */ if (vmcb->vintr.fields.irq) { // printk("Re-injecting IRQ from Vintr\n"); @@ -115,27 +115,24 @@ asmlinkage void svm_intr_assist(void) if ( v->vcpu_id == 0 ) hvm_pic_assist(v); + + if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { + pic_set_irq(pic, pt->irq, 0); + pic_set_irq(pic, pt->irq, 1); + } + callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ]; - - /* Before we deal with PIT interrupts, let's check for - interrupts set by the device model or paravirtualised event - channel interrupts. - */ - if ( cpu_has_pending_irq(v) ) { - intr_vector = cpu_get_interrupt(v, &intr_type); - } - else if ( callback_irq != 0 && local_events_need_delivery() ) { + if ( callback_irq != 0 && + local_events_need_delivery() ) { /*inject para-device call back irq*/ v->vcpu_info->evtchn_upcall_mask = 1; pic_set_irq(pic, callback_irq, 0); pic_set_irq(pic, callback_irq, 1); - intr_vector = callback_irq; } - else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { - pic_set_irq(pic, pt->irq, 0); - pic_set_irq(pic, pt->irq, 1); + + if ( cpu_has_pending_irq(v) ) intr_vector = cpu_get_interrupt(v, &intr_type); - } + } /* have we got an interrupt to inject? */ diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Sep 18 14:28:16 2006 -0500 @@ -243,6 +243,7 @@ static void svm_store_cpu_guest_regs( { /* Returning the guest's regs */ crs[0] = v->arch.hvm_svm.cpu_shadow_cr0; + crs[2] = v->arch.hvm_svm.cpu_cr2; crs[3] = v->arch.hvm_svm.cpu_cr3; crs[4] = v->arch.hvm_svm.cpu_shadow_cr4; } @@ -2793,9 +2794,7 @@ asmlinkage void svm_vmexit_handler(struc break; case VMEXIT_INTR: - raise_softirq(SCHEDULE_SOFTIRQ); - break; - + break; case VMEXIT_INVD: svm_vmexit_do_invd(vmcb); diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/vioapic.c Mon Sep 18 14:28:16 2006 -0500 @@ -362,21 +362,35 @@ static uint32_t ioapic_get_delivery_bitm ASSERT(s); - if (dest_mode == 0) { /* Physical mode */ - for (i = 0; i < s->lapic_count; i++) { - if (VLAPIC_ID(s->lapic_info[i]) == dest) { + if ( dest_mode == 0 ) + { + /* Physical mode. */ + for ( i = 0; i < s->lapic_count; i++ ) + { + if ( VLAPIC_ID(s->lapic_info[i]) == dest ) + { mask = 1 << i; break; } } - } else { - /* logical destination. call match_logical_addr for each APIC. */ - if (dest != 0) { - for (i=0; i< s->lapic_count; i++) { + + /* Broadcast. */ + if ( dest == 0xFF ) + { + for ( i = 0; i < s->lapic_count; i++ ) + mask |= ( 1 << i ); + } + } + else + { + /* Logical destination. Call match_logical_addr for each APIC. */ + if ( dest != 0 ) + { + for ( i = 0; i < s->lapic_count; i++ ) + { if ( s->lapic_info[i] && - ioapic_match_logical_addr(s, i, dest) ) { + ioapic_match_logical_addr(s, i, dest) ) mask |= (1<<i); - } } } } diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/vlapic.c Mon Sep 18 14:28:16 2006 -0500 @@ -66,12 +66,10 @@ int vlapic_find_highest_irr(struct vlapi { int result; - result = find_highest_bit((unsigned long *)(vlapic->regs + APIC_IRR), - MAX_VECTOR); - - ASSERT( result == -1 || result >= 16); - - return result; + result = vlapic_find_highest_vector(vlapic->regs + APIC_IRR); + ASSERT((result == -1) || (result >= 16)); + + return result; } s_time_t get_apictime_scheduled(struct vcpu *v) @@ -89,10 +87,8 @@ int vlapic_find_highest_isr(struct vlapi { int result; - result = find_highest_bit((unsigned long *)(vlapic->regs + APIC_ISR), - MAX_VECTOR); - - ASSERT( result == -1 || result >= 16); + result = vlapic_find_highest_vector(vlapic->regs + APIC_ISR); + ASSERT((result == -1) || (result >= 16)); return result; } @@ -221,7 +217,8 @@ static int vlapic_accept_irq(struct vcpu if ( unlikely(vlapic == NULL || !vlapic_enabled(vlapic)) ) break; - if ( test_and_set_bit(vector, vlapic->regs + APIC_IRR) && trig_mode) + if ( vlapic_test_and_set_vector(vector, vlapic->regs + APIC_IRR) && + trig_mode) { HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "level trig mode repeatedly for vector %d\n", vector); @@ -232,7 +229,7 @@ static int vlapic_accept_irq(struct vcpu { HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "level trig mode for vector %d\n", vector); - set_bit(vector, vlapic->regs + APIC_TMR); + vlapic_set_vector(vector, vlapic->regs + APIC_TMR); } hvm_prod_vcpu(v); @@ -358,10 +355,10 @@ void vlapic_EOI_set(struct vlapic *vlapi if ( vector == -1 ) return ; - clear_bit(vector, vlapic->regs + APIC_ISR); + vlapic_clear_vector(vector, vlapic->regs + APIC_ISR); vlapic_update_ppr(vlapic); - if ( test_and_clear_bit(vector, vlapic->regs + APIC_TMR) ) + if ( vlapic_test_and_clear_vector(vector, vlapic->regs + APIC_TMR) ) ioapic_update_EOI(vlapic->domain, vector); } @@ -816,7 +813,7 @@ void vlapic_timer_fn(void *data) vlapic->timer_last_update = now; - if ( test_and_set_bit(timer_vector, vlapic->regs + APIC_IRR )) + if ( vlapic_test_and_set_vector(timer_vector, vlapic->regs + APIC_IRR) ) vlapic->intr_pending_count[timer_vector]++; if ( vlapic_lvtt_period(vlapic) ) @@ -893,7 +890,7 @@ int cpu_get_apic_interrupt(struct vcpu * HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "Sending an illegal vector 0x%x.", highest_irr); - set_bit(err_vector, vlapic->regs + APIC_IRR); + vlapic_set_vector(err_vector, vlapic->regs + APIC_IRR); highest_irr = err_vector; } @@ -919,6 +916,20 @@ int cpu_has_apic_interrupt(struct vcpu* return 0; } +/* check to see if there is pending interrupt */ +int cpu_has_pending_irq(struct vcpu *v) +{ + struct hvm_domain *plat = &v->domain->arch.hvm_domain; + + /* APIC */ + if ( cpu_has_apic_interrupt(v) ) return 1; + + /* PIC */ + if ( !vlapic_accept_pic_intr(v) ) return 0; + + return plat->interrupt_request; +} + void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) { struct vlapic *vlapic = VLAPIC(v); @@ -929,15 +940,15 @@ void vlapic_post_injection(struct vcpu * switch ( deliver_mode ) { case APIC_DM_FIXED: case APIC_DM_LOWEST: - set_bit(vector, vlapic->regs + APIC_ISR); - clear_bit(vector, vlapic->regs + APIC_IRR); + vlapic_set_vector(vector, vlapic->regs + APIC_ISR); + vlapic_clear_vector(vector, vlapic->regs + APIC_IRR); vlapic_update_ppr(vlapic); if ( vector == vlapic_lvt_vector(vlapic, APIC_LVTT) ) { vlapic->intr_pending_count[vector]--; if ( vlapic->intr_pending_count[vector] > 0 ) - test_and_set_bit(vector, vlapic->regs + APIC_IRR); + vlapic_test_and_set_vector(vector, vlapic->regs + APIC_IRR); } break; diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/io.c Mon Sep 18 14:28:16 2006 -0500 @@ -68,19 +68,6 @@ static inline int is_interruptibility_st return interruptibility; } -/* check to see if there is pending interrupt */ -int cpu_has_pending_irq(struct vcpu *v) -{ - struct hvm_domain *plat = &v->domain->arch.hvm_domain; - - /* APIC */ - if ( cpu_has_apic_interrupt(v) ) return 1; - - /* PIC */ - if ( !vlapic_accept_pic_intr(v) ) return 0; - - return plat->interrupt_request; -} asmlinkage void vmx_intr_assist(void) { diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 18 14:28:16 2006 -0500 @@ -46,6 +46,8 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> +extern uint32_t vlapic_update_ppr(struct vlapic *vlapic); + static DEFINE_PER_CPU(unsigned long, trace_values[5]); #define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value @@ -518,6 +520,7 @@ static void vmx_store_cpu_guest_regs( if ( crs != NULL ) { __vmread(CR0_READ_SHADOW, &crs[0]); + crs[2] = v->arch.hvm_vmx.cpu_cr2; __vmread(GUEST_CR3, &crs[3]); __vmread(CR4_READ_SHADOW, &crs[4]); } @@ -953,8 +956,6 @@ static void vmx_vmexit_do_cpuid(struct c bitmaskof(X86_FEATURE_MWAIT) ); edx &= ~( bitmaskof(X86_FEATURE_HT) | - bitmaskof(X86_FEATURE_MCA) | - bitmaskof(X86_FEATURE_MCE) | bitmaskof(X86_FEATURE_ACPI) | bitmaskof(X86_FEATURE_ACC) ); } @@ -1615,6 +1616,7 @@ static int mov_to_cr(int gp, int cr, str unsigned long value; unsigned long old_cr; struct vcpu *v = current; + struct vlapic *vlapic = VLAPIC(v); switch ( gp ) { CASE_GET_REG(EAX, eax); @@ -1758,6 +1760,12 @@ static int mov_to_cr(int gp, int cr, str shadow_update_paging_modes(v); break; } + case 8: + { + vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); + vlapic_update_ppr(vlapic); + break; + } default: printk("invalid cr: %d\n", gp); __hvm_bug(regs); @@ -1771,13 +1779,20 @@ static int mov_to_cr(int gp, int cr, str */ static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) { - unsigned long value; + unsigned long value = 0; struct vcpu *v = current; - - if ( cr != 3 ) + struct vlapic *vlapic = VLAPIC(v); + + if ( cr != 3 && cr != 8) __hvm_bug(regs); - value = (unsigned long) v->arch.hvm_vmx.cpu_cr3; + if ( cr == 3 ) + value = (unsigned long) v->arch.hvm_vmx.cpu_cr3; + else if ( cr == 8 ) + { + value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI); + value = (value & 0xF0) >> 4; + } switch ( gp ) { CASE_SET_REG(EAX, eax); @@ -1888,7 +1903,7 @@ static inline void vmx_do_msr_read(struc } rdmsr_safe(regs->ecx, regs->eax, regs->edx); - break; + return; } regs->eax = msr_content & 0xFFFFFFFF; diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Sep 18 14:28:16 2006 -0500 @@ -1792,8 +1792,10 @@ void sh_install_xen_entries_in_l2h(struc for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ ) { sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] = - shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])), - __PAGE_HYPERVISOR); + (l3e_get_flags(p2m[i]) & _PAGE_PRESENT) + ? shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])), + __PAGE_HYPERVISOR) + : shadow_l2e_empty(); } sh_unmap_domain_page(p2m); } @@ -2861,11 +2863,11 @@ static int sh_page_fault(struct vcpu *v, // bunch of 4K maps. // + shadow_lock(d); + SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", v->domain->domain_id, v->vcpu_id, va, regs->error_code); - shadow_lock(d); - shadow_audit_tables(v); if ( guest_walk_tables(v, va, &gw, 1) != 0 ) @@ -3291,12 +3293,6 @@ sh_update_linear_entries(struct vcpu *v) { ml3e = __linear_l3_table; l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0])); -#if GUEST_PAGING_LEVELS == 2 - /* Shadow l3 tables are made up by update_cr3 */ - sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; -#else - sl3e = v->arch.shadow_vtable; -#endif } else { @@ -3306,13 +3302,15 @@ sh_update_linear_entries(struct vcpu *v) l3mfn = _mfn(l4e_get_pfn(ml4e[0])); ml3e = sh_map_domain_page(l3mfn); sh_unmap_domain_page(ml4e); + } + #if GUEST_PAGING_LEVELS == 2 - /* Shadow l3 tables are made up by update_cr3 */ - sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; + /* Shadow l3 tables are made up by update_cr3 */ + sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; #else - sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table)); -#endif - } + /* Always safe to use shadow_vtable, because it's globally mapped */ + sl3e = v->arch.shadow_vtable; +#endif for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) { @@ -3324,12 +3322,7 @@ sh_update_linear_entries(struct vcpu *v) } if ( v != current ) - { sh_unmap_domain_page(ml3e); -#if GUEST_PAGING_LEVELS != 2 - sh_unmap_domain_page(sl3e); -#endif - } } #elif CONFIG_PAGING_LEVELS == 3 @@ -3361,31 +3354,10 @@ sh_update_linear_entries(struct vcpu *v) #else /* GUEST_PAGING_LEVELS == 3 */ - /* Use local vcpu's mappings if we can; otherwise make new mappings */ - if ( v == current ) - { - shadow_l3e = v->arch.shadow_vtable; - if ( !shadow_mode_external(d) ) - guest_l3e = v->arch.guest_vtable; - } - else - { - mfn_t smfn; - int idx; - - /* Map the shadow l3 */ - smfn = pagetable_get_mfn(v->arch.shadow_table); - idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable)); - shadow_l3e = sh_map_domain_page(smfn); - shadow_l3e += idx; - if ( !shadow_mode_external(d) ) - { - /* Also the guest l3 */ - mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); - guest_l3e = sh_map_domain_page(gmfn); - guest_l3e += guest_index(v->arch.guest_vtable); - } - } + /* Always safe to use *_vtable, because they're globally mapped */ + shadow_l3e = v->arch.shadow_vtable; + guest_l3e = v->arch.guest_vtable; + #endif /* GUEST_PAGING_LEVELS */ /* Choose where to write the entries, using linear maps if possible */ @@ -3443,14 +3415,6 @@ sh_update_linear_entries(struct vcpu *v) if ( v != current || !shadow_mode_external(d) ) sh_unmap_domain_page(l2e); -#if GUEST_PAGING_LEVELS == 3 - if ( v != current) - { - sh_unmap_domain_page(shadow_l3e); - if ( !shadow_mode_external(d) ) - sh_unmap_domain_page(guest_l3e); - } -#endif } #elif CONFIG_PAGING_LEVELS == 2 @@ -3601,7 +3565,7 @@ sh_detach_old_tables(struct vcpu *v) v->arch.shadow_vtable ) { // Q: why does this need to use (un)map_domain_page_*global* ? - // + /* A: so sh_update_linear_entries can operate on other vcpus */ sh_unmap_domain_page_global(v->arch.shadow_vtable); v->arch.shadow_vtable = NULL; } diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/oprofile/xenoprof.c --- a/xen/arch/x86/oprofile/xenoprof.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/oprofile/xenoprof.c Mon Sep 18 14:28:16 2006 -0500 @@ -437,54 +437,59 @@ int xenoprof_op_init(XEN_GUEST_HANDLE(vo int xenoprof_op_init(XEN_GUEST_HANDLE(void) arg) { struct xenoprof_init xenoprof_init; - int is_primary, num_events; + int ret; + + if ( copy_from_guest(&xenoprof_init, arg, 1) ) + return -EFAULT; + + if ( (ret = nmi_init(&xenoprof_init.num_events, + &xenoprof_init.is_primary, + xenoprof_init.cpu_type)) ) + return ret; + + if ( copy_to_guest(arg, &xenoprof_init, 1) ) + return -EFAULT; + + if ( xenoprof_init.is_primary ) + primary_profiler = current->domain; + + return 0; +} + +int xenoprof_op_get_buffer(XEN_GUEST_HANDLE(void) arg) +{ + struct xenoprof_get_buffer xenoprof_get_buffer; struct domain *d = current->domain; int ret; - if ( copy_from_guest(&xenoprof_init, arg, 1) ) + if ( copy_from_guest(&xenoprof_get_buffer, arg, 1) ) return -EFAULT; - ret = nmi_init(&num_events, - &is_primary, - xenoprof_init.cpu_type); - if ( ret < 0 ) - goto err; - - if ( is_primary ) - primary_profiler = current->domain; - /* - * We allocate xenoprof struct and buffers only at first time xenoprof_init + * We allocate xenoprof struct and buffers only at first time xenoprof_get_buffer * is called. Memory is then kept until domain is destroyed. */ if ( (d->xenoprof == NULL) && - ((ret = alloc_xenoprof_struct(d, xenoprof_init.max_samples, 0)) < 0) ) - goto err; + ((ret = alloc_xenoprof_struct(d, xenoprof_get_buffer.max_samples, 0)) < 0) ) + return ret; xenoprof_reset_buf(d); d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED; d->xenoprof->domain_ready = 0; - d->xenoprof->is_primary = is_primary; - - xenoprof_init.is_primary = is_primary; - xenoprof_init.num_events = num_events; - xenoprof_init.nbuf = d->xenoprof->nbuf; - xenoprof_init.bufsize = d->xenoprof->bufsize; - xenoprof_init.buf_maddr = __pa(d->xenoprof->rawbuf); - - if ( copy_to_guest(arg, &xenoprof_init, 1) ) - { - ret = -EFAULT; - goto err; - } - - return ret; - - err: if ( primary_profiler == current->domain ) - primary_profiler = NULL; - return ret; + d->xenoprof->is_primary = 1; + else + d->xenoprof->is_primary = 0; + + xenoprof_get_buffer.nbuf = d->xenoprof->nbuf; + xenoprof_get_buffer.bufsize = d->xenoprof->bufsize; + xenoprof_get_buffer.buf_maddr = __pa(d->xenoprof->rawbuf); + + if ( copy_to_guest(arg, &xenoprof_get_buffer, 1) ) + return -EFAULT; + + return 0; } #define PRIV_OP(op) ( (op == XENOPROF_set_active) \ @@ -510,6 +515,10 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN { case XENOPROF_init: ret = xenoprof_op_init(arg); + break; + + case XENOPROF_get_buffer: + ret = xenoprof_op_get_buffer(arg); break; case XENOPROF_reset_active_list: diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/time.c --- a/xen/arch/x86/time.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/time.c Mon Sep 18 14:28:16 2006 -0500 @@ -676,7 +676,7 @@ static inline void __update_vcpu_system_ struct vcpu_time_info *u; t = &this_cpu(cpu_time); - u = &v->domain->shared_info->vcpu_info[v->vcpu_id].time; + u = &v->vcpu_info->time; version_update_begin(&u->version); @@ -690,7 +690,7 @@ static inline void __update_vcpu_system_ void update_vcpu_system_time(struct vcpu *v) { - if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp != + if ( v->vcpu_info->time.tsc_timestamp != this_cpu(cpu_time).local_tsc_stamp ) __update_vcpu_system_time(v); } diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/traps.c Mon Sep 18 14:28:16 2006 -0500 @@ -339,7 +339,6 @@ asmlinkage void fatal_trap(int trapnr, s asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs) { int cpu = smp_processor_id(); - unsigned long cr2; static char *trapstr[] = { "divide error", "debug", "nmi", "bkpt", "overflow", "bounds", "invalid opcode", "device not available", "double fault", @@ -356,7 +355,7 @@ asmlinkage void fatal_trap(int trapnr, s if ( trapnr == TRAP_page_fault ) { - __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : ); + unsigned long cr2 = read_cr2(); printk("Faulting linear address: %p\n", _p(cr2)); show_page_walk(cr2); } @@ -911,7 +910,7 @@ asmlinkage int do_page_fault(struct cpu_ ASSERT(!in_irq()); - __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : ); + addr = read_cr2(); DEBUGGER_trap_entry(TRAP_page_fault, regs); @@ -1004,7 +1003,21 @@ static inline int admin_io_okay( } /* Check admin limits. Silently fail the access if it is disallowed. */ -#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0) +static inline unsigned char inb_user( + unsigned int port, struct vcpu *v, struct cpu_user_regs *regs) +{ + /* + * Allow read access to port 0x61. Bit 4 oscillates with period 30us, and + * so it is often used for timing loops in BIOS code. This hack can go + * away when we have separate read/write permission rangesets. + * Note that we could emulate bit 4 instead of directly reading port 0x61, + * but there's not really a good reason to do so. + */ + if ( admin_io_okay(port, 1, v, regs) || (port == 0x61) ) + return inb(port); + return ~0; +} +//#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0) #define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0) #define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0) #define outb_user(_v, _p, _d, _r) \ diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/x86_32/traps.c Mon Sep 18 14:28:16 2006 -0500 @@ -21,11 +21,28 @@ /* All CPUs have their own IDT to allow int80 direct trap. */ idt_entry_t *idt_tables[NR_CPUS] __read_mostly; +static void print_xen_info(void) +{ + char taint_str[TAINT_STRING_MAX_LEN]; + char debug = 'n', *arch = "x86_32"; + +#ifndef NDEBUG + debug = 'y'; +#endif + +#ifdef CONFIG_X86_PAE + arch = "x86_32p"; +#endif + + printk("----[ Xen-%d.%d%s %s debug=%c %s ]----\n", + xen_major_version(), xen_minor_version(), xen_extra_version(), + arch, debug, print_tainted(taint_str)); +} + void show_registers(struct cpu_user_regs *regs) { struct cpu_user_regs fault_regs = *regs; unsigned long fault_crs[8]; - char taint_str[TAINT_STRING_MAX_LEN]; const char *context; if ( hvm_guest(current) && guest_mode(regs) ) @@ -35,25 +52,29 @@ void show_registers(struct cpu_user_regs } else { - context = guest_mode(regs) ? "guest" : "hypervisor"; - if ( !guest_mode(regs) ) { + context = "hypervisor"; fault_regs.esp = (unsigned long)®s->esp; fault_regs.ss = read_segment_register(ss); fault_regs.ds = read_segment_register(ds); fault_regs.es = read_segment_register(es); fault_regs.fs = read_segment_register(fs); fault_regs.gs = read_segment_register(gs); + fault_crs[2] = read_cr2(); + } + else + { + context = "guest"; + fault_crs[2] = current->vcpu_info->arch.cr2; } fault_crs[0] = read_cr0(); fault_crs[3] = read_cr3(); - } - - printk("----[ Xen-%d.%d%s %s ]----\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + fault_crs[4] = read_cr4(); + } + + print_xen_info(); printk("CPU: %d\nEIP: %04x:[<%08x>]", smp_processor_id(), fault_regs.cs, fault_regs.eip); if ( !guest_mode(regs) ) @@ -63,7 +84,8 @@ void show_registers(struct cpu_user_regs fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx); printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n", fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp); - printk("cr0: %08lx cr3: %08lx\n", fault_crs[0], fault_crs[3]); + printk("cr0: %08lx cr4: %08lx cr3: %08lx cr2: %08lx\n", + fault_crs[0], fault_crs[4], fault_crs[3], fault_crs[2]); printk("ds: %04x es: %04x fs: %04x gs: %04x " "ss: %04x cs: %04x\n", fault_regs.ds, fault_regs.es, fault_regs.fs, @@ -125,7 +147,6 @@ asmlinkage void do_double_fault(void) { struct tss_struct *tss = &doublefault_tss; unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1; - char taint_str[TAINT_STRING_MAX_LEN]; watchdog_disable(); @@ -133,9 +154,8 @@ asmlinkage void do_double_fault(void) /* Find information saved during fault and dump it to the console. */ tss = &init_tss[cpu]; - printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + printk("*** DOUBLE FAULT ***\n"); + print_xen_info(); printk("CPU: %d\nEIP: %04x:[<%08x>]", cpu, tss->cs, tss->eip); print_symbol(" %s\n", tss->eip); diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/x86_64/mm.c Mon Sep 18 14:28:16 2006 -0500 @@ -78,7 +78,7 @@ void __init paging_init(void) { unsigned long i, mpt_size; l3_pgentry_t *l3_ro_mpt; - l2_pgentry_t *l2_ro_mpt; + l2_pgentry_t *l2_ro_mpt = NULL; struct page_info *pg; /* Create user-accessible L2 directory to map the MPT for guests. */ @@ -87,12 +87,6 @@ void __init paging_init(void) idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] = l4e_from_page( virt_to_page(l3_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER); - l2_ro_mpt = alloc_xenheap_page(); - clear_page(l2_ro_mpt); - l3_ro_mpt[l3_table_offset(RO_MPT_VIRT_START)] = - l3e_from_page( - virt_to_page(l2_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER); - l2_ro_mpt += l2_table_offset(RO_MPT_VIRT_START); /* * Allocate and map the machine-to-phys table. @@ -110,10 +104,20 @@ void __init paging_init(void) PAGE_HYPERVISOR); memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55, 1UL << L2_PAGETABLE_SHIFT); + if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) ) + { + unsigned long va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT); + + l2_ro_mpt = alloc_xenheap_page(); + clear_page(l2_ro_mpt); + l3_ro_mpt[l3_table_offset(va)] = + l3e_from_page( + virt_to_page(l2_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER); + l2_ro_mpt += l2_table_offset(va); + } /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ *l2_ro_mpt++ = l2e_from_page( pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT); - BUG_ON(((unsigned long)l2_ro_mpt & ~PAGE_MASK) == 0); } /* Set up linear page table mapping. */ diff -r ce9c34c049c5 -r 0bdd578c417f xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/arch/x86/x86_64/traps.c Mon Sep 18 14:28:16 2006 -0500 @@ -21,11 +21,24 @@ #include <public/callback.h> +static void print_xen_info(void) +{ + char taint_str[TAINT_STRING_MAX_LEN]; + char debug = 'n'; + +#ifndef NDEBUG + debug = 'y'; +#endif + + printk("----[ Xen-%d.%d%s x86_64 debug=%c %s ]----\n", + xen_major_version(), xen_minor_version(), xen_extra_version(), + debug, print_tainted(taint_str)); +} + void show_registers(struct cpu_user_regs *regs) { struct cpu_user_regs fault_regs = *regs; unsigned long fault_crs[8]; - char taint_str[TAINT_STRING_MAX_LEN]; const char *context; if ( hvm_guest(current) && guest_mode(regs) ) @@ -35,18 +48,27 @@ void show_registers(struct cpu_user_regs } else { - context = guest_mode(regs) ? "guest" : "hypervisor"; + if ( guest_mode(regs) ) + { + context = "guest"; + fault_crs[2] = current->vcpu_info->arch.cr2; + } + else + { + context = "hypervisor"; + fault_crs[2] = read_cr2(); + } + fault_crs[0] = read_cr0(); fault_crs[3] = read_cr3(); + fault_crs[4] = read_cr4(); fault_regs.ds = read_segment_register(ds); fault_regs.es = read_segment_register(es); fault_regs.fs = read_segment_register(fs); fault_regs.gs = read_segment_register(gs); } - printk("----[ Xen-%d.%d%s %s ]----\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + print_xen_info(); printk("CPU: %d\nRIP: %04x:[<%016lx>]", smp_processor_id(), fault_regs.cs, fault_regs.rip); if ( !guest_mode(regs) ) @@ -62,8 +84,9 @@ void show_registers(struct cpu_user_regs fault_regs.r9, fault_regs.r10, fault_regs.r11); printk("r12: %016lx r13: %016lx r14: %016lx\n", fault_regs.r12, fault_regs.r13, fault_regs.r14); - printk("r15: %016lx cr0: %016lx cr3: %016lx\n", - fault_regs.r15, fault_crs[0], fault_crs[3]); + printk("r15: %016lx cr0: %016lx cr4: %016lx\n", + fault_regs.r15, fault_crs[0], fault_crs[4]); + printk("cr3: %016lx cr2: %016lx\n", fault_crs[3], fault_crs[2]); printk("ds: %04x es: %04x fs: %04x gs: %04x " "ss: %04x cs: %04x\n", fault_regs.ds, fault_regs.es, fault_regs.fs, @@ -121,7 +144,6 @@ asmlinkage void do_double_fault(struct c asmlinkage void do_double_fault(struct cpu_user_regs *regs) { unsigned int cpu, tr; - char taint_str[TAINT_STRING_MAX_LEN]; asm ( "str %0" : "=r" (tr) ); cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2; @@ -131,9 +153,8 @@ asmlinkage void do_double_fault(struct c console_force_unlock(); /* Find information saved during fault and dump it to the console. */ - printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + printk("*** DOUBLE FAULT ***\n"); + print_xen_info(); printk("CPU: %d\nRIP: %04x:[<%016lx>]", cpu, regs->cs, regs->rip); print_symbol(" %s", regs->rip); diff -r ce9c34c049c5 -r 0bdd578c417f xen/common/grant_table.c --- a/xen/common/grant_table.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/common/grant_table.c Mon Sep 18 14:28:16 2006 -0500 @@ -90,11 +90,8 @@ __gnttab_map_grant_ref( unsigned long frame = 0; int rc = GNTST_okay; struct active_grant_entry *act; - - /* Entry details from @rd's shared grant table. */ grant_entry_t *sha; - domid_t sdom; - u16 sflags; + union grant_combo scombo, prev_scombo, new_scombo; /* * We bound the number of times we retry CMPXCHG on memory locations that @@ -159,7 +156,10 @@ __gnttab_map_grant_ref( memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order); for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ ) + { new_mt[i].ref = i+1; + new_mt[i].flags = 0; + } free_xenheap_pages(lgt->maptrack, lgt->maptrack_order); lgt->maptrack = new_mt; @@ -175,12 +175,19 @@ __gnttab_map_grant_ref( spin_lock(&rd->grant_table->lock); + /* If already pinned, check the active domid and avoid refcnt overflow. */ + if ( act->pin && + ((act->domid != ld->domain_id) || + (act->pin & 0x80808080U) != 0) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Bad domain (%d != %d), or risk of counter overflow %08x\n", + act->domid, ld->domain_id, act->pin); + if ( !act->pin || (!(op->flags & GNTMAP_readonly) && !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) ) { - sflags = sha->flags; - sdom = sha->domid; + scombo.word = *(u32 *)&sha->flags; /* * This loop attempts to set the access (reading/writing) flags @@ -190,33 +197,29 @@ __gnttab_map_grant_ref( */ for ( ; ; ) { - union grant_combo scombo, prev_scombo, new_scombo; - - if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || - unlikely(sdom != led->domain->domain_id) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", - sflags, sdom, led->domain->domain_id); - - /* Merge two 16-bit values into a 32-bit combined update. */ - scombo.shorts.flags = sflags; - scombo.shorts.domid = sdom; - + /* If not already pinned, check the grant domid and type. */ + if ( !act->pin && + (((scombo.shorts.flags & GTF_type_mask) != + GTF_permit_access) || + (scombo.shorts.domid != ld->domain_id)) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Bad flags (%x) or dom (%d). (expected dom %d)\n", + scombo.shorts.flags, scombo.shorts.domid, + ld->domain_id); + new_scombo = scombo; new_scombo.shorts.flags |= GTF_reading; if ( !(op->flags & GNTMAP_readonly) ) { new_scombo.shorts.flags |= GTF_writing; - if ( unlikely(sflags & GTF_readonly) ) + if ( unlikely(scombo.shorts.flags & GTF_readonly) ) PIN_FAIL(unlock_out, GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); } prev_scombo.word = cmpxchg((u32 *)&sha->flags, scombo.word, new_scombo.word); - - /* Did the combined update work (did we see what we expected?). */ if ( likely(prev_scombo.word == scombo.word) ) break; @@ -224,20 +227,15 @@ __gnttab_map_grant_ref( PIN_FAIL(unlock_out, GNTST_general_error, "Shared grant entry is unstable.\n"); - /* Didn't see what we expected. Split out the seen flags & dom. */ - sflags = prev_scombo.shorts.flags; - sdom = prev_scombo.shorts.domid; + scombo = prev_scombo; } if ( !act->pin ) { - act->domid = sdom; + act->domid = scombo.shorts.domid; act->frame = gmfn_to_mfn(rd, sha->frame); } } - else if ( (act->pin & 0x80808080U) != 0 ) - PIN_FAIL(unlock_out, ENOSPC, - "Risk of counter overflow %08x\n", act->pin); if ( op->flags & GNTMAP_device_map ) act->pin += (op->flags & GNTMAP_readonly) ? @@ -545,9 +543,7 @@ gnttab_prepare_for_transfer( { struct grant_table *rgt; struct grant_entry *sha; - domid_t sdom; - u16 sflags; - union grant_combo scombo, prev_scombo, tmp_scombo; + union grant_combo scombo, prev_scombo, new_scombo; int retries = 0; if ( unlikely((rgt = rd->grant_table) == NULL) || @@ -562,29 +558,24 @@ gnttab_prepare_for_transfer( sha = &rgt->shared[ref]; - sflags = sha->flags; - sdom = sha->domid; + scombo.word = *(u32 *)&sha->flags; for ( ; ; ) { - if ( unlikely(sflags != GTF_accept_transfer) || - unlikely(sdom != ld->domain_id) ) + if ( unlikely(scombo.shorts.flags != GTF_accept_transfer) || + unlikely(scombo.shorts.domid != ld->domain_id) ) { DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", - sflags, sdom, ld->domain_id); + scombo.shorts.flags, scombo.shorts.domid, + ld->domain_id); goto fail; } - /* Merge two 16-bit values into a 32-bit combined update. */ - scombo.shorts.flags = sflags; - scombo.shorts.domid = sdom; - - tmp_scombo = scombo; - tmp_scombo.shorts.flags |= GTF_transfer_committed; + new_scombo = scombo; + new_scombo.shorts.flags |= GTF_transfer_committed; + prev_scombo.word = cmpxchg((u32 *)&sha->flags, - scombo.word, tmp_scombo.word); - - /* Did the combined update work (did we see what we expected?). */ + scombo.word, new_scombo.word); if ( likely(prev_scombo.word == scombo.word) ) break; @@ -594,9 +585,7 @@ gnttab_prepare_for_transfer( goto fail; } - /* Didn't see what we expected. Split out the seen flags & dom. */ - sflags = prev_scombo.shorts.flags; - sdom = prev_scombo.shorts.domid; + scombo = prev_scombo; } spin_unlock(&rgt->lock); @@ -734,16 +723,21 @@ __release_grant_for_copy( gnttab_mark_dirty(rd, r_frame); spin_lock(&rd->grant_table->lock); + if ( readonly ) + { act->pin -= GNTPIN_hstr_inc; + } else + { act->pin -= GNTPIN_hstw_inc; - - if ( !(act->pin & GNTPIN_hstw_mask) && !readonly ) - gnttab_clear_flag(_GTF_writing, &sha->flags); + if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) ) + gnttab_clear_flag(_GTF_writing, &sha->flags); + } if ( !act->pin ) gnttab_clear_flag(_GTF_reading, &sha->flags); + spin_unlock(&rd->grant_table->lock); } @@ -759,8 +753,7 @@ __acquire_grant_for_copy( struct active_grant_entry *act; s16 rc = GNTST_okay; int retries = 0; - u16 sflags; - domid_t sdom; + union grant_combo scombo, prev_scombo, new_scombo; if ( unlikely(gref >= NR_GRANT_ENTRIES) ) PIN_FAIL(error_out, GNTST_bad_gntref, @@ -771,36 +764,42 @@ __acquire_grant_for_copy( spin_lock(&rd->grant_table->lock); + /* If already pinned, check the active domid and avoid refcnt overflow. */ + if ( act->pin && + ((act->domid != current->domain->domain_id) || + (act->pin & 0x80808080U) != 0) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Bad domain (%d != %d), or risk of counter overflow %08x\n", + act->domid, current->domain->domain_id, act->pin); + if ( !act->pin || - (!readonly && !(act->pin & GNTPIN_hstw_mask)) ) - { - sflags = sha->flags; - sdom = sha->domid; + (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) + { + scombo.word = *(u32 *)&sha->flags; for ( ; ; ) { - union grant_combo scombo, prev_scombo, new_scombo; - - if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access || - sdom != current->domain->domain_id ) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", - sflags, sdom, current->domain->domain_id); - - /* Merge two 16-bit values into a 32-bit combined update. */ - scombo.shorts.flags = sflags; - scombo.shorts.domid = sdom; - + /* If not already pinned, check the grant domid and type. */ + if ( !act->pin && + (((scombo.shorts.flags & GTF_type_mask) != + GTF_permit_access) || + (scombo.shorts.domid != current->domain->domain_id)) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Bad flags (%x) or dom (%d). (expected dom %d)\n", + scombo.shorts.flags, scombo.shorts.domid, + current->domain->domain_id); + new_scombo = scombo; new_scombo.shorts.flags |= GTF_reading; if ( !readonly ) { new_scombo.shorts.flags |= GTF_writing; - if ( unlikely(sflags & GTF_readonly) ) + if ( unlikely(scombo.shorts.flags & GTF_readonly) ) PIN_FAIL(unlock_out, GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); } + prev_scombo.word = cmpxchg((u32 *)&sha->flags, scombo.word, new_scombo.word); if ( likely(prev_scombo.word == scombo.word) ) @@ -809,19 +808,16 @@ __acquire_grant_for_copy( if ( retries++ == 4 ) PIN_FAIL(unlock_out, GNTST_general_error, "Shared grant entry is unstable.\n"); - sflags = prev_scombo.shorts.flags; - sdom = prev_scombo.shorts.flags; + + scombo = prev_scombo; } if ( !act->pin ) { - act->domid = sdom; + act->domid = scombo.shorts.domid; act->frame = gmfn_to_mfn(rd, sha->frame); } } - else if ( (act->pin & 0x80808080U) != 0 ) - PIN_FAIL(unlock_out, ENOSPC, - "Risk of counter overflow %08x\n", act->pin); act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; diff -r ce9c34c049c5 -r 0bdd578c417f xen/common/perfc.c --- a/xen/common/perfc.c Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/common/perfc.c Mon Sep 18 14:28:16 2006 -0500 @@ -136,8 +136,8 @@ static xen_sysctl_perfc_val_t *perfc_val static xen_sysctl_perfc_val_t *perfc_vals; static int perfc_nbr_vals; static int perfc_init = 0; -static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc, - XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val) +static int perfc_copy_info(XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc, + XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val) { unsigned int i, j; unsigned int v = 0; diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-ia64/mm.h Mon Sep 18 14:28:16 2006 -0500 @@ -451,7 +451,6 @@ extern u64 translate_domain_pte(u64 ptev #define INVALID_M2P_ENTRY (~0UL) #define VALID_M2P(_e) (!((_e) & (1UL<<63))) -#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e)) #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) @@ -495,7 +494,7 @@ extern u64 translate_domain_pte(u64 ptev ((gmfn_to_mfn((_d),(gpa)>>PAGE_SHIFT)<<PAGE_SHIFT)|((gpa)&~PAGE_MASK)) /* Arch-specific portion of memory_op hypercall. */ -#define arch_memory_op(op, arg) (-ENOSYS) +long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg); int steal_page( struct domain *d, struct page_info *page, unsigned int memflags); diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-ia64/perfc_defn.h --- a/xen/include/asm-ia64/perfc_defn.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-ia64/perfc_defn.h Mon Sep 18 14:28:16 2006 -0500 @@ -35,6 +35,48 @@ PERFCOUNTER_ARRAY(mov_from_cr, "p PERFCOUNTER_ARRAY(misc_privop, "privop misc", 64) +// privileged instructions to fall into vmx_entry +PERFCOUNTER_CPU(vmx_rsm, "vmx privop rsm") +PERFCOUNTER_CPU(vmx_ssm, "vmx privop ssm") +PERFCOUNTER_CPU(vmx_mov_to_psr, "vmx privop mov_to_psr") +PERFCOUNTER_CPU(vmx_mov_from_psr, "vmx privop mov_from_psr") +PERFCOUNTER_CPU(vmx_mov_from_cr, "vmx privop mov_from_cr") +PERFCOUNTER_CPU(vmx_mov_to_cr, "vmx privop mov_to_cr") +PERFCOUNTER_CPU(vmx_bsw0, "vmx privop bsw0") +PERFCOUNTER_CPU(vmx_bsw1, "vmx privop bsw1") +PERFCOUNTER_CPU(vmx_cover, "vmx privop cover") +PERFCOUNTER_CPU(vmx_rfi, "vmx privop rfi") +PERFCOUNTER_CPU(vmx_itr_d, "vmx privop itr_d") +PERFCOUNTER_CPU(vmx_itr_i, "vmx privop itr_i") +PERFCOUNTER_CPU(vmx_ptr_d, "vmx privop ptr_d") +PERFCOUNTER_CPU(vmx_ptr_i, "vmx privop ptr_i") +PERFCOUNTER_CPU(vmx_itc_d, "vmx privop itc_d") +PERFCOUNTER_CPU(vmx_itc_i, "vmx privop itc_i") +PERFCOUNTER_CPU(vmx_ptc_l, "vmx privop ptc_l") +PERFCOUNTER_CPU(vmx_ptc_g, "vmx privop ptc_g") +PERFCOUNTER_CPU(vmx_ptc_ga, "vmx privop ptc_ga") +PERFCOUNTER_CPU(vmx_ptc_e, "vmx privop ptc_e") +PERFCOUNTER_CPU(vmx_mov_to_rr, "vmx privop mov_to_rr") +PERFCOUNTER_CPU(vmx_mov_from_rr, "vmx privop mov_from_rr") +PERFCOUNTER_CPU(vmx_thash, "vmx privop thash") +PERFCOUNTER_CPU(vmx_ttag, "vmx privop ttag") +PERFCOUNTER_CPU(vmx_tpa, "vmx privop tpa") +PERFCOUNTER_CPU(vmx_tak, "vmx privop tak") +PERFCOUNTER_CPU(vmx_mov_to_ar_imm, "vmx privop mov_to_ar_imm") +PERFCOUNTER_CPU(vmx_mov_to_ar_reg, "vmx privop mov_to_ar_reg") +PERFCOUNTER_CPU(vmx_mov_from_ar_reg, "vmx privop mov_from_ar_reg") +PERFCOUNTER_CPU(vmx_mov_to_dbr, "vmx privop mov_to_dbr") +PERFCOUNTER_CPU(vmx_mov_to_ibr, "vmx privop mov_to_ibr") +PERFCOUNTER_CPU(vmx_mov_to_pmc, "vmx privop mov_to_pmc") +PERFCOUNTER_CPU(vmx_mov_to_pmd, "vmx privop mov_to_pmd") +PERFCOUNTER_CPU(vmx_mov_to_pkr, "vmx privop mov_to_pkr") +PERFCOUNTER_CPU(vmx_mov_from_dbr, "vmx privop mov_from_dbr") +PERFCOUNTER_CPU(vmx_mov_from_ibr, "vmx privop mov_from_ibr") +PERFCOUNTER_CPU(vmx_mov_from_pmc, "vmx privop mov_from_pmc") +PERFCOUNTER_CPU(vmx_mov_from_pkr, "vmx privop mov_from_pkr") +PERFCOUNTER_CPU(vmx_mov_from_cpuid, "vmx privop mov_from_cpuid") + + PERFCOUNTER_ARRAY(slow_hyperprivop, "slow hyperprivops", HYPERPRIVOP_MAX + 1) PERFCOUNTER_ARRAY(fast_hyperprivop, "fast hyperprivops", HYPERPRIVOP_MAX + 1) @@ -43,6 +85,14 @@ PERFCOUNTER_ARRAY(fast_reflect, "f PERFSTATUS(vhpt_nbr_entries, "nbr of entries per VHPT") PERFSTATUS_CPU(vhpt_valid_entries, "nbr of valid entries in VHPT") + +PERFCOUNTER_ARRAY(vmx_mmio_access, "vmx_mmio_access", 8) +PERFCOUNTER_CPU(vmx_pal_emul, "vmx_pal_emul") +PERFCOUNTER_ARRAY(vmx_switch_mm_mode, "vmx_switch_mm_mode", 8) +PERFCOUNTER_CPU(vmx_ia64_handle_break,"vmx_ia64_handle_break") +PERFCOUNTER_ARRAY(vmx_inject_guest_interruption, + "vmx_inject_guest_interruption", 0x80) +PERFCOUNTER_ARRAY(fw_hypercall, "fw_hypercall", 0x20) #ifdef CONFIG_PRIVOP_ADDRS #ifndef PERFPRIVOPADDR diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/bitops.h --- a/xen/include/asm-x86/bitops.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/bitops.h Mon Sep 18 14:28:16 2006 -0500 @@ -6,14 +6,6 @@ */ #include <xen/config.h> - -/* - * These have to be done with inline assembly: that way the bit-setting - * is guaranteed to be atomic. All bit operations return 0 if the bit - * was cleared before the operation and != 0 if it was not. - * - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ #ifdef CONFIG_SMP #define LOCK_PREFIX "lock ; " @@ -21,6 +13,13 @@ #define LOCK_PREFIX "" #endif +/* + * We use the "+m" constraint because the memory operand is both read from + * and written to. Since the operand is in fact a word array, we also + * specify "memory" in the clobbers list to indicate that words other than + * the one directly addressed by the memory operand may be modified. + */ + #define ADDR (*(volatile long *) addr) /** @@ -37,8 +36,8 @@ static __inline__ void set_bit(int nr, v { __asm__ __volatile__( LOCK_PREFIX "btsl %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); + :"+m" (ADDR) + :"dIr" (nr) : "memory"); } /** @@ -54,8 +53,8 @@ static __inline__ void __set_bit(int nr, { __asm__( "btsl %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); + :"+m" (ADDR) + :"dIr" (nr) : "memory"); } /** @@ -72,8 +71,8 @@ static __inline__ void clear_bit(int nr, { __asm__ __volatile__( LOCK_PREFIX "btrl %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); + :"+m" (ADDR) + :"dIr" (nr) : "memory"); } /** @@ -89,8 +88,8 @@ static __inline__ void __clear_bit(int n { __asm__( "btrl %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); + :"+m" (ADDR) + :"dIr" (nr) : "memory"); } #define smp_mb__before_clear_bit() barrier() @@ -109,8 +108,8 @@ static __inline__ void __change_bit(int { __asm__ __volatile__( "btcl %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); + :"+m" (ADDR) + :"dIr" (nr) : "memory"); } /** @@ -126,8 +125,8 @@ static __inline__ void change_bit(int nr { __asm__ __volatile__( LOCK_PREFIX "btcl %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); + :"+m" (ADDR) + :"dIr" (nr) : "memory"); } /** @@ -144,7 +143,7 @@ static __inline__ int test_and_set_bit(i __asm__ __volatile__( LOCK_PREFIX "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) + :"=r" (oldbit),"+m" (ADDR) :"dIr" (nr) : "memory"); return oldbit; } @@ -164,8 +163,8 @@ static __inline__ int __test_and_set_bit __asm__( "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr)); + :"=r" (oldbit),"+m" (ADDR) + :"dIr" (nr) : "memory"); return oldbit; } @@ -183,7 +182,7 @@ static __inline__ int test_and_clear_bit __asm__ __volatile__( LOCK_PREFIX "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) + :"=r" (oldbit),"+m" (ADDR) :"dIr" (nr) : "memory"); return oldbit; } @@ -203,8 +202,8 @@ static __inline__ int __test_and_clear_b __asm__( "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr)); + :"=r" (oldbit),"+m" (ADDR) + :"dIr" (nr) : "memory"); return oldbit; } @@ -215,7 +214,7 @@ static __inline__ int __test_and_change_ __asm__ __volatile__( "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) + :"=r" (oldbit),"+m" (ADDR) :"dIr" (nr) : "memory"); return oldbit; } @@ -234,7 +233,7 @@ static __inline__ int test_and_change_bi __asm__ __volatile__( LOCK_PREFIX "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) + :"=r" (oldbit),"+m" (ADDR) :"dIr" (nr) : "memory"); return oldbit; } @@ -242,7 +241,7 @@ static __inline__ int test_and_change_bi static __inline__ int constant_test_bit(int nr, const volatile void * addr) { - return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; + return ((1U << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; } static __inline__ int variable_test_bit(int nr, volatile void * addr) diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/hvm/vlapic.h --- a/xen/include/asm-x86/hvm/vlapic.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/hvm/vlapic.h Mon Sep 18 14:28:16 2006 -0500 @@ -23,12 +23,28 @@ #include <asm/msr.h> #include <public/hvm/ioreq.h> -static __inline__ int find_highest_bit(unsigned long *data, int nr_bits) +#define MAX_VECTOR 256 + +#define VEC_POS(v) ((v)%32) +#define REG_POS(v) (((v)/32)* 0x10) +#define vlapic_test_and_set_vector(vec, bitmap) \ + test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) +#define vlapic_test_and_clear_vector(vec, bitmap) \ + test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) +#define vlapic_set_vector(vec, bitmap) \ + set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) +#define vlapic_clear_vector(vec, bitmap) \ + clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) + +static inline int vlapic_find_highest_vector(u32 *bitmap) { - int length = BITS_TO_LONGS(nr_bits); - while ( length && !data[--length] ) + int word_offset = MAX_VECTOR / 32; + + /* Work backwards through the bitmap (first 32-bit word in every four). */ + while ( (word_offset != 0) && (bitmap[(--word_offset)*4] == 0) ) continue; - return (fls(data[length]) - 1) + (length * BITS_PER_LONG); + + return (fls(bitmap[word_offset*4]) - 1) + (word_offset * 32); } #define VLAPIC(v) (v->arch.hvm_vcpu.vlapic) @@ -83,8 +99,6 @@ typedef struct direct_intr_info { int source[6]; } direct_intr_info_t; -#define MAX_VECTOR 256 - struct vlapic { uint32_t status; uint32_t vcpu_id; @@ -108,9 +122,9 @@ static inline int vlapic_set_irq(struct { int ret; - ret = test_and_set_bit(vec, vlapic->regs + APIC_IRR); + ret = vlapic_test_and_set_vector(vec, vlapic->regs + APIC_IRR); if ( trig ) - set_bit(vec, vlapic->regs + APIC_TMR); + vlapic_set_vector(vec, vlapic->regs + APIC_TMR); /* We may need to wake up target vcpu, besides set pending bit here */ return ret; diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/mm.h Mon Sep 18 14:28:16 2006 -0500 @@ -338,7 +338,6 @@ int check_descriptor(struct desc_struct #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START) #define INVALID_M2P_ENTRY (~0UL) #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1)))) -#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e)) #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/page.h Mon Sep 18 14:28:16 2006 -0500 @@ -300,13 +300,6 @@ void setup_idle_pagetable(void); #define _PAGE_GNTTAB 0 #endif -/* - * Disallow unused flag bits plus PAT, PSE and GLOBAL. - * Also disallow GNTTAB if we are using it for grant-table debugging. - * Permit the NX bit if the hardware supports it. - */ -#define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX) - #define __PAGE_HYPERVISOR \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) #define __PAGE_HYPERVISOR_NOCACHE \ diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/processor.h Mon Sep 18 14:28:16 2006 -0500 @@ -288,6 +288,13 @@ static inline void write_cr0(unsigned lo static inline void write_cr0(unsigned long val) { __asm__("mov %0,%%cr0": :"r" ((unsigned long)val)); +} + +static inline unsigned long read_cr2(void) +{ + unsigned long __cr2; + __asm__("mov %%cr2,%0\n\t" :"=r" (__cr2)); + return __cr2; } static inline unsigned long read_cr4(void) diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/x86_32/page-2level.h --- a/xen/include/asm-x86/x86_32/page-2level.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/x86_32/page-2level.h Mon Sep 18 14:28:16 2006 -0500 @@ -53,7 +53,4 @@ typedef l2_pgentry_t root_pgentry_t; #define get_pte_flags(x) ((int)(x) & 0xFFF) #define put_pte_flags(x) ((intpte_t)((x) & 0xFFF)) -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK - #endif /* __X86_32_PAGE_2LEVEL_H__ */ diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/x86_32/page-3level.h --- a/xen/include/asm-x86/x86_32/page-3level.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/x86_32/page-3level.h Mon Sep 18 14:28:16 2006 -0500 @@ -66,8 +66,6 @@ typedef l3_pgentry_t root_pgentry_t; #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF)) #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF)) -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK #define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */ #endif /* __X86_32_PAGE_3LEVEL_H__ */ diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/x86_32/page.h --- a/xen/include/asm-x86/x86_32/page.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/x86_32/page.h Mon Sep 18 14:28:16 2006 -0500 @@ -26,6 +26,15 @@ extern unsigned int PAGE_HYPERVISOR_NOCA #define GRANT_PTE_FLAGS \ (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB) +/* + * Disallow unused flag bits plus PAT, PSE and GLOBAL. + * Permit the NX bit if the hardware supports it. + */ +#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) + +#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) + #endif /* __X86_32_PAGE_H__ */ /* diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/asm-x86/x86_64/page.h Mon Sep 18 14:28:16 2006 -0500 @@ -75,8 +75,15 @@ typedef l4_pgentry_t root_pgentry_t; #define _PAGE_NX_BIT (1U<<23) #define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0U) -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK +/* + * Disallow unused flag bits plus PAT, PSE and GLOBAL. + * Permit the NX bit if the hardware supports it. + * Note that range [62:52] is available for software use on x86/64. + */ +#define BASE_DISALLOW_MASK (0xFF000180U & ~_PAGE_NX) + +#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */) #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */) diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/public/arch-ia64.h Mon Sep 18 14:28:16 2006 -0500 @@ -18,15 +18,12 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif #ifndef __ASSEMBLY__ -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/public/arch-powerpc.h --- a/xen/include/public/arch-powerpc.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/public/arch-powerpc.h Mon Sep 18 14:28:16 2006 -0500 @@ -29,7 +29,6 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) \ do { \ if (sizeof ((hnd).__pad)) \ @@ -42,8 +41,6 @@ #endif #ifndef __ASSEMBLY__ -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/public/arch-x86_32.h Mon Sep 18 14:28:16 2006 -0500 @@ -28,14 +28,7 @@ #endif /* Structural guest handles introduced in 0x00030201. */ -#if (defined(__XEN__) || defined(__XEN_TOOLS__)) && !defined(__ASSEMBLY__) -typedef uint64_t __attribute__((aligned(8))) uint64_aligned_t; -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { type *p; } \ - __guest_handle_ ## name; \ - typedef struct { union { type *p; uint64_aligned_t q; }; } \ - __guest_handle_64_ ## name -#elif __XEN_INTERFACE_VERSION__ >= 0x00030201 +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 #define __DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef struct { type *p; } __guest_handle_ ## name #else @@ -45,15 +38,9 @@ typedef uint64_t __attribute__((aligned( #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#define set_xen_guest_handle(hnd, val) \ - do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ - (hnd).p = val; \ - } while ( 0 ) -#else -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #endif #ifndef __ASSEMBLY__ diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/public/arch-x86_64.h Mon Sep 18 14:28:16 2006 -0500 @@ -39,15 +39,12 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif #ifndef __ASSEMBLY__ -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/public/domctl.h --- a/xen/include/public/domctl.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/public/domctl.h Mon Sep 18 14:28:16 2006 -0500 @@ -16,12 +16,10 @@ #include "xen.h" -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000002 - -#define uint64_t uint64_aligned_t +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000003 struct xenctl_cpumap { - XEN_GUEST_HANDLE_64(uint8_t) bitmap; + XEN_GUEST_HANDLE(uint8_t) bitmap; uint32_t nr_cpus; }; @@ -76,7 +74,7 @@ struct xen_domctl_getmemlist { uint64_t max_pfns; /* Start index in guest's page list. */ uint64_t start_pfn; - XEN_GUEST_HANDLE_64(xen_pfn_t) buffer; + XEN_GUEST_HANDLE(xen_pfn_t) buffer; /* OUT variables. */ uint64_t num_pfns; }; @@ -113,7 +111,7 @@ struct xen_domctl_getpageframeinfo2 { /* IN variables. */ uint64_t num; /* IN/OUT variables. */ - XEN_GUEST_HANDLE_64(ulong) array; + XEN_GUEST_HANDLE(ulong) array; }; typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); @@ -187,7 +185,7 @@ struct xen_domctl_shadow_op { uint32_t mb; /* Shadow memory allocation in MB */ /* OP_PEEK / OP_CLEAN */ - XEN_GUEST_HANDLE_64(ulong) dirty_bitmap; + XEN_GUEST_HANDLE(ulong) dirty_bitmap; uint64_t pages; /* Size of buffer. Updated with actual size. */ struct xen_domctl_shadow_op_stats stats; }; @@ -207,8 +205,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_m #define XEN_DOMCTL_setvcpucontext 12 #define XEN_DOMCTL_getvcpucontext 13 struct xen_domctl_vcpucontext { - uint32_t vcpu; /* IN */ - XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ + uint32_t vcpu; /* IN */ + XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */ }; typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); @@ -381,8 +379,6 @@ typedef struct xen_domctl xen_domctl_t; typedef struct xen_domctl xen_domctl_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); -#undef uint64_t - #endif /* __XEN_PUBLIC_DOMCTL_H__ */ /* diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/public/sysctl.h Mon Sep 18 14:28:16 2006 -0500 @@ -16,9 +16,7 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000001 - -#define uint64_t uint64_aligned_t +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002 /* * Read console content from Xen buffer ring. @@ -26,8 +24,8 @@ #define XEN_SYSCTL_readconsole 1 struct xen_sysctl_readconsole { /* IN variables. */ - uint32_t clear; /* Non-zero -> clear after reading. */ - XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */ + uint32_t clear; /* Non-zero -> clear after reading. */ + XEN_GUEST_HANDLE(char) buffer; /* Buffer start */ /* IN/OUT variables. */ uint32_t count; /* In: Buffer size; Out: Used buffer size */ }; @@ -105,9 +103,9 @@ struct xen_sysctl_perfc_op { uint32_t nr_counters; /* number of counters description */ uint32_t nr_vals; /* number of values */ /* counter information (or NULL) */ - XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; + XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc; /* counter values (or NULL) */ - XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; + XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val; }; typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); @@ -117,7 +115,7 @@ struct xen_sysctl_getdomaininfolist { /* IN variables. */ domid_t first_domain; uint32_t max_domains; - XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer; /* OUT variables. */ uint32_t num_domains; }; @@ -140,8 +138,6 @@ typedef struct xen_sysctl xen_sysctl_t; typedef struct xen_sysctl xen_sysctl_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); -#undef uint64_t - #endif /* __XEN_PUBLIC_SYSCTL_H__ */ /* diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/public/xenoprof.h --- a/xen/include/public/xenoprof.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/public/xenoprof.h Mon Sep 18 14:28:16 2006 -0500 @@ -28,6 +28,8 @@ #define XENOPROF_disable_virq 11 #define XENOPROF_release_counters 12 #define XENOPROF_shutdown 13 +#define XENOPROF_get_buffer 14 +#define XENOPROF_last_op 14 #define MAX_OPROF_EVENTS 32 #define MAX_OPROF_DOMAINS 25 @@ -56,16 +58,21 @@ DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); struct xenoprof_init { - int32_t max_samples; int32_t num_events; int32_t is_primary; - int32_t nbuf; - int32_t bufsize; - uint64_t buf_maddr; char cpu_type[XENOPROF_CPU_TYPE_SIZE]; }; typedef struct xenoprof_init xenoprof_init_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); + +struct xenoprof_get_buffer { + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_maddr; +}; +typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); struct xenoprof_counter { uint32_t ind; diff -r ce9c34c049c5 -r 0bdd578c417f xen/include/xen/compiler.h --- a/xen/include/xen/compiler.h Mon Sep 18 09:23:51 2006 -0400 +++ b/xen/include/xen/compiler.h Mon Sep 18 14:28:16 2006 -0500 @@ -35,6 +35,11 @@ #define offsetof(a,b) ((unsigned long)&(((a *)0)->b)) #endif +#if defined(__x86_64__) && (__GNUC__ > 3) +/* Results in more efficient PIC code (no indirections through GOT or PLT). */ +#pragma GCC visibility push(hidden) +#endif + /* This macro obfuscates arithmetic on a variable address so that gcc shouldn't recognize the original var, and make assumptions about it */ /* diff -r ce9c34c049c5 -r 0bdd578c417f linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h Mon Sep 18 14:28:16 2006 -0500 @@ -0,0 +1,33 @@ +#ifndef _ASM_IA64_MACHVEC_DIG_h +#define _ASM_IA64_MACHVEC_DIG_h + +extern ia64_mv_setup_t dig_setup; +extern ia64_mv_irq_init_t dig_irq_init; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define platform_name "dig" +#define platform_setup dig_setup +#define platform_irq_init dig_irq_init + +#ifdef CONFIG_XEN +# define platform_dma_map_sg dma_map_sg +# define platform_dma_unmap_sg dma_unmap_sg +# define platform_dma_mapping_error dma_mapping_error +# define platform_dma_supported dma_supported +# define platform_dma_alloc_coherent dma_alloc_coherent +# define platform_dma_free_coherent dma_free_coherent +# define platform_dma_map_single dma_map_single +# define platform_dma_unmap_single dma_unmap_single +# define platform_dma_sync_single_for_cpu \ + dma_sync_single_for_cpu +# define platform_dma_sync_single_for_device \ + dma_sync_single_for_device +#endif + +#endif /* _ASM_IA64_MACHVEC_DIG_h */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |