[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1182365367 21600 # Node ID 810885428743660169e7382ec9596373ca6ce48f # Parent c20bc60f9243d08199cb0a9a837cbe11c6b3dcdc # Parent 005dd6b1cf8e0008aba7984b828274a40e8d7d95 merge with xen-unstable.hg --- docs/src/user.tex | 1 tools/blktap/drivers/Makefile | 1 tools/blktap/drivers/block-aio.c | 49 ++++----- tools/blktap/drivers/block-qcow.c | 48 ++++----- tools/blktap/drivers/tapaio.c | 164 ++++++++++++++++++++++++++++++++ tools/blktap/drivers/tapaio.h | 58 +++++++++++ tools/examples/init.d/xendomains | 33 ++++-- tools/ioemu/block-raw.c | 2 tools/ioemu/target-i386-dm/exec-dm.c | 42 ++++++-- tools/ioemu/vl.c | 12 ++ tools/libxc/xc_core.c | 2 tools/python/xen/xend/XendDomainInfo.py | 2 tools/python/xen/xend/server/blkif.py | 5 xen/arch/ia64/xen/domain.c | 9 - xen/arch/ia64/xen/xenmem.c | 2 xen/arch/x86/apic.c | 4 xen/arch/x86/boot/cmdline.S | 40 +++++-- xen/arch/x86/boot/trampoline.S | 11 -- xen/arch/x86/boot/video.S | 59 ++++++----- xen/arch/x86/boot/video.h | 9 - xen/arch/x86/boot/x86_32.S | 4 xen/arch/x86/boot/x86_64.S | 2 xen/arch/x86/domain.c | 37 +++++-- xen/arch/x86/domain_build.c | 6 - xen/arch/x86/flushtlb.c | 4 xen/arch/x86/hvm/hvm.c | 21 ++-- xen/arch/x86/hvm/irq.c | 81 ++++++++------- xen/arch/x86/hvm/svm/asid.c | 72 +++++++------- xen/arch/x86/hvm/svm/intr.c | 146 ++++++++++++++++------------ xen/arch/x86/hvm/svm/svm.c | 60 +++++------ xen/arch/x86/hvm/svm/vmcb.c | 6 - xen/arch/x86/hvm/vioapic.c | 34 +++--- xen/arch/x86/hvm/vlapic.c | 9 - xen/arch/x86/hvm/vmx/intr.c | 106 ++++++++++---------- xen/arch/x86/hvm/vmx/vmcs.c | 2 xen/arch/x86/hvm/vmx/vmx.c | 59 ++++++++--- xen/arch/x86/hvm/vpic.c | 3 xen/arch/x86/hvm/vpt.c | 40 ++++--- xen/arch/x86/mm.c | 10 - xen/arch/x86/setup.c | 10 + xen/arch/x86/traps.c | 14 ++ xen/arch/x86/x86_32/traps.c | 1 xen/arch/x86/x86_64/compat_kexec.S | 65 +++++++++++- xen/arch/x86/x86_64/traps.c | 1 xen/common/compat/memory.c | 7 + xen/common/domctl.c | 4 xen/common/grant_table.c | 12 +- xen/common/kernel.c | 10 - xen/common/kexec.c | 4 xen/common/perfc.c | 2 xen/drivers/char/console.c | 2 xen/drivers/video/vga.c | 3 xen/include/asm-ia64/guest_access.h | 25 ++-- xen/include/asm-x86/event.h | 1 xen/include/asm-x86/guest_access.h | 68 +++++++------ xen/include/asm-x86/hvm/hvm.h | 33 +++++- xen/include/asm-x86/hvm/irq.h | 12 +- xen/include/asm-x86/hvm/support.h | 1 xen/include/asm-x86/hvm/svm/asid.h | 1 xen/include/asm-x86/hvm/vcpu.h | 4 xen/include/asm-x86/hvm/vlapic.h | 2 xen/include/asm-x86/hvm/vmx/vmx.h | 13 +- xen/include/asm-x86/hvm/vpic.h | 2 xen/include/asm-x86/hvm/vpt.h | 3 xen/include/xen/compat.h | 62 +++++++----- xen/include/xen/xencomm.h | 43 ++++---- 66 files changed, 1080 insertions(+), 580 deletions(-) diff -r c20bc60f9243 -r 810885428743 docs/src/user.tex --- a/docs/src/user.tex Wed Jun 20 12:47:52 2007 -0600 +++ b/docs/src/user.tex Wed Jun 20 12:49:27 2007 -0600 @@ -3178,6 +3178,7 @@ editing \path{grub.conf}. \begin{description} \item[ ask ] Display a vga menu allowing manual selection of video mode. + \item[ current ] Use existing vga mode without modification. \item[ text-$<$mode$>$ ] Select text-mode resolution, where mode is one of 80x25, 80x28, 80x30, 80x34, 80x43, 80x50, 80x60. \item[ gfx-$<$mode$>$ ] Select VESA graphics mode diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/Makefile --- a/tools/blktap/drivers/Makefile Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/blktap/drivers/Makefile Wed Jun 20 12:49:27 2007 -0600 @@ -35,6 +35,7 @@ BLK-OBJS += block-ram.o BLK-OBJS += block-ram.o BLK-OBJS += block-qcow.o BLK-OBJS += aes.o +BLK-OBJS += tapaio.o all: $(IBIN) qcow-util diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-aio.c --- a/tools/blktap/drivers/block-aio.c Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/blktap/drivers/block-aio.c Wed Jun 20 12:49:27 2007 -0600 @@ -43,14 +43,7 @@ #include <sys/ioctl.h> #include <linux/fs.h> #include "tapdisk.h" - - -/** - * We used a kernel patch to return an fd associated with the AIO context - * so that we can concurrently poll on synchronous and async descriptors. - * This is signalled by passing 1 as the io context to io_setup. - */ -#define REQUEST_ASYNC_FD 1 +#include "tapaio.h" #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) @@ -65,14 +58,13 @@ struct tdaio_state { int fd; /* libaio state */ - io_context_t aio_ctx; + tap_aio_context_t aio_ctx; struct iocb iocb_list [MAX_AIO_REQS]; struct iocb *iocb_free [MAX_AIO_REQS]; struct pending_aio pending_aio[MAX_AIO_REQS]; int iocb_free_count; struct iocb *iocb_queue[MAX_AIO_REQS]; int iocb_queued; - int poll_fd; /* NB: we require aio_poll support */ struct io_event aio_events[MAX_AIO_REQS]; }; @@ -148,7 +140,7 @@ static inline void init_fds(struct disk_ for(i = 0; i < MAX_IOFD; i++) dd->io_fd[i] = 0; - dd->io_fd[0] = prv->poll_fd; + dd->io_fd[0] = prv->aio_ctx.pollfd; } /* Open the disk file and initialize aio state. */ @@ -162,12 +154,9 @@ int tdaio_open (struct disk_driver *dd, /* Initialize AIO */ prv->iocb_free_count = MAX_AIO_REQS; prv->iocb_queued = 0; - - prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD; - prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx); - - if (prv->poll_fd < 0) { - ret = prv->poll_fd; + + ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS); + if (ret < 0) { if (ret == -EAGAIN) { DPRINTF("Couldn't setup AIO context. If you are " "trying to concurrently use a large number " @@ -176,9 +165,7 @@ int tdaio_open (struct disk_driver *dd, "(e.g. 'echo echo 1048576 > /proc/sys/fs/" "aio-max-nr')\n"); } else { - DPRINTF("Couldn't get fd for AIO poll support. This " - "is probably because your kernel does not " - "have the aio-poll patch applied.\n"); + DPRINTF("Couldn't setup AIO context.\n"); } goto done; } @@ -286,7 +273,7 @@ int tdaio_submit(struct disk_driver *dd) if (!prv->iocb_queued) return 0; - ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); + ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue); /* XXX: TODO: Handle error conditions here. */ @@ -300,7 +287,7 @@ int tdaio_close(struct disk_driver *dd) { struct tdaio_state *prv = (struct tdaio_state *)dd->private; - io_destroy(prv->aio_ctx); + io_destroy(prv->aio_ctx.aio_ctx); close(prv->fd); return 0; @@ -308,15 +295,13 @@ int tdaio_close(struct disk_driver *dd) int tdaio_do_callbacks(struct disk_driver *dd, int sid) { - int ret, i, rsp = 0; + int i, nr_events, rsp = 0; struct io_event *ep; struct tdaio_state *prv = (struct tdaio_state *)dd->private; - /* Non-blocking test for completed io. */ - ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events, - NULL); - - for (ep=prv->aio_events,i=ret; i-->0; ep++) { + nr_events = tap_aio_get_events(&prv->aio_ctx); +repeat: + for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) { struct iocb *io = ep->obj; struct pending_aio *pio; @@ -327,6 +312,14 @@ int tdaio_do_callbacks(struct disk_drive prv->iocb_free[prv->iocb_free_count++] = io; } + + if (nr_events) { + nr_events = tap_aio_more_events(&prv->aio_ctx); + goto repeat; + } + + tap_aio_continue(&prv->aio_ctx); + return rsp; } diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:49:27 2007 -0600 @@ -38,6 +38,7 @@ #include "bswap.h" #include "aes.h" #include "tapdisk.h" +#include "tapaio.h" #if 1 #define ASSERT(_p) \ @@ -52,9 +53,6 @@ (uint64_t)( \ (l + (s - 1)) - ((l + (s - 1)) % s)); \ }) - -/******AIO DEFINES******/ -#define REQUEST_ASYNC_FD 1 struct pending_aio { td_callback_t cb; @@ -145,7 +143,7 @@ struct tdqcow_state { AES_KEY aes_encrypt_key; /*AES key*/ AES_KEY aes_decrypt_key; /*AES key*/ /* libaio state */ - io_context_t aio_ctx; + tap_aio_context_t aio_ctx; int max_aio_reqs; struct iocb *iocb_list; struct iocb **iocb_free; @@ -153,7 +151,6 @@ struct tdqcow_state { int iocb_free_count; struct iocb **iocb_queue; int iocb_queued; - int poll_fd; /* NB: we require aio_poll support */ struct io_event *aio_events; }; @@ -179,7 +176,7 @@ static void free_aio_state(struct disk_d static int init_aio_state(struct disk_driver *dd) { - int i; + int i, ret; struct td_state *bs = dd->td_state; struct tdqcow_state *s = (struct tdqcow_state *)dd->private; long ioidx; @@ -216,12 +213,9 @@ static int init_aio_state(struct disk_dr goto fail; } - /*Signal kernel to create Poll FD for Asyc completion events*/ - s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD; - s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx); - - if (s->poll_fd < 0) { - if (s->poll_fd == -EAGAIN) { + ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs); + if (ret < 0) { + if (ret == -EAGAIN) { DPRINTF("Couldn't setup AIO context. If you are " "trying to concurrently use a large number " "of blktap-based disks, you may need to " @@ -229,9 +223,7 @@ static int init_aio_state(struct disk_dr "(e.g. 'echo echo 1048576 > /proc/sys/fs/" "aio-max-nr')\n"); } else { - DPRINTF("Couldn't get fd for AIO poll support. This " - "is probably because your kernel does not " - "have the aio-poll patch applied.\n"); + DPRINTF("Couldn't setup AIO context.\n"); } goto fail; } @@ -845,7 +837,7 @@ static inline void init_fds(struct disk_ for(i = 0; i < MAX_IOFD; i++) dd->io_fd[i] = 0; - dd->io_fd[0] = s->poll_fd; + dd->io_fd[0] = s->aio_ctx.pollfd; } /* Open the disk file and initialize qcow state. */ @@ -1144,7 +1136,7 @@ int tdqcow_submit(struct disk_driver *dd if (!prv->iocb_queued) return 0; - ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); + ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue); /* XXX: TODO: Handle error conditions here. */ @@ -1172,7 +1164,7 @@ int tdqcow_close(struct disk_driver *dd) close(fd); } - io_destroy(s->aio_ctx); + io_destroy(s->aio_ctx.aio_ctx); free(s->name); free(s->l1_table); free(s->l2_cache); @@ -1184,17 +1176,15 @@ int tdqcow_close(struct disk_driver *dd) int tdqcow_do_callbacks(struct disk_driver *dd, int sid) { - int ret, i, rsp = 0,*ptr; + int ret, i, nr_events, rsp = 0,*ptr; struct io_event *ep; struct tdqcow_state *prv = (struct tdqcow_state *)dd->private; if (sid > MAX_IOFD) return 1; - - /* Non-blocking test for completed io. */ - ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events, - NULL); - - for (ep = prv->aio_events, i = ret; i-- > 0; ep++) { + + nr_events = tap_aio_get_events(&prv->aio_ctx); +repeat: + for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) { struct iocb *io = ep->obj; struct pending_aio *pio; @@ -1215,6 +1205,14 @@ int tdqcow_do_callbacks(struct disk_driv prv->iocb_free[prv->iocb_free_count++] = io; } + + if (nr_events) { + nr_events = tap_aio_more_events(&prv->aio_ctx); + goto repeat; + } + + tap_aio_continue(&prv->aio_ctx); + return rsp; } diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/tapaio.c Wed Jun 20 12:49:27 2007 -0600 @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield + * Copyright (c) 2007 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "tapaio.h" +#include "tapdisk.h" +#include <unistd.h> + +/** + * We used a kernel patch to return an fd associated with the AIO context + * so that we can concurrently poll on synchronous and async descriptors. + * This is signalled by passing 1 as the io context to io_setup. + */ +#define REQUEST_ASYNC_FD 1 + +/* + * If we don't have any way to do epoll on aio events in a normal kernel, + * wait for aio events in a separate thread and return completion status + * that via a pipe that can be waited on normally. + * + * To keep locking problems between the completion thread and the submit + * thread to a minimum, there's a handshake which allows only one thread + * to be doing work on the completion queue at a time: + * + * 1) main thread sends completion thread a command via the command pipe; + * 2) completion thread waits for aio events and returns the number + * received on the completion pipe + * 3) main thread processes the received ctx->aio_events events + * 4) loop back to 1) to let the completion thread refill the aio_events + * buffer. + * + * This workaround needs to disappear once the kernel provides a single + * mechanism for waiting on both aio and normal fd wakeups. + */ +static void * +tap_aio_completion_thread(void *arg) +{ + tap_aio_context_t *ctx = (tap_aio_context_t *) arg; + int command; + int nr_events; + int rc; + + while (1) { + rc = read(ctx->command_fd[0], &command, sizeof(command)); + + do { + rc = io_getevents(ctx->aio_ctx, 1, + ctx->max_aio_events, ctx->aio_events, + NULL); + if (rc) { + nr_events = rc; + rc = write(ctx->completion_fd[1], &nr_events, + sizeof(nr_events)); + } + } while (!rc); + } +} + +void +tap_aio_continue(tap_aio_context_t *ctx) +{ + int cmd = 0; + + if (!ctx->poll_in_thread) + return; + + if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0) + DPRINTF("Cannot write to command pipe\n"); +} + +int +tap_aio_setup(tap_aio_context_t *ctx, + struct io_event *aio_events, + int max_aio_events) +{ + int ret; + + ctx->aio_events = aio_events; + ctx->max_aio_events = max_aio_events; + ctx->poll_in_thread = 0; + + ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD; + ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx); + if (ret < 0 && ret != -EINVAL) + return ret; + else if (ret > 0) { + ctx->pollfd = ret; + return ctx->pollfd; + } + + ctx->aio_ctx = (io_context_t) 0; + ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx); + if (ret < 0) + return ret; + + if ((ret = pipe(ctx->command_fd)) < 0) { + DPRINTF("Unable to create command pipe\n"); + return -1; + } + if ((ret = pipe(ctx->completion_fd)) < 0) { + DPRINTF("Unable to create completion pipe\n"); + return -1; + } + + if ((ret = pthread_create(&ctx->aio_thread, NULL, + tap_aio_completion_thread, ctx)) != 0) { + DPRINTF("Unable to create completion thread\n"); + return -1; + } + + ctx->pollfd = ctx->completion_fd[0]; + ctx->poll_in_thread = 1; + + tap_aio_continue(ctx); + + return 0; +} + +int +tap_aio_get_events(tap_aio_context_t *ctx) +{ + int nr_events = 0; + + if (!ctx->poll_in_thread) + nr_events = io_getevents(ctx->aio_ctx, 1, + ctx->max_aio_events, ctx->aio_events, NULL); + else + read(ctx->completion_fd[0], &nr_events, sizeof(nr_events)); + + return nr_events; +} + +int tap_aio_more_events(tap_aio_context_t *ctx) +{ + return io_getevents(ctx->aio_ctx, 0, + ctx->max_aio_events, ctx->aio_events, NULL); +} + + diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/tapaio.h Wed Jun 20 12:49:27 2007 -0600 @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield + * Copyright (c) 2007 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __TAPAIO_H__ +#define __TAPAIO_H__ + +#include <pthread.h> +#include <libaio.h> + +struct tap_aio_context { + io_context_t aio_ctx; + + struct io_event *aio_events; + int max_aio_events; + + pthread_t aio_thread; + int command_fd[2]; + int completion_fd[2]; + int pollfd; + unsigned int poll_in_thread : 1; +}; + +typedef struct tap_aio_context tap_aio_context_t; + +int tap_aio_setup (tap_aio_context_t *ctx, + struct io_event *aio_events, + int max_aio_events); +void tap_aio_continue (tap_aio_context_t *ctx); +int tap_aio_get_events (tap_aio_context_t *ctx); +int tap_aio_more_events(tap_aio_context_t *ctx); + +#endif /* __TAPAIO_H__ */ diff -r c20bc60f9243 -r 810885428743 tools/examples/init.d/xendomains --- a/tools/examples/init.d/xendomains Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/examples/init.d/xendomains Wed Jun 20 12:49:27 2007 -0600 @@ -182,25 +182,31 @@ rdnames() parseln() { - name=`echo "$1" | cut -c0-17` - name=${name%% *} - rest=`echo "$1" | cut -c18- ` - read id mem cpu vcpu state tm < <(echo "$rest") + if [[ "$1" =~ "\(domain" ]]; then + name=;id= + else if [[ "$1" =~ "\(name" ]]; then + name=$(echo $1 | sed -e 's/^.*(name \(.*\))$/\1/') + else if [[ "$1" =~ "\(domid" ]]; then + id=$(echo $1 | sed -e 's/^.*(domid \(.*\))$/\1/') + fi; fi; fi + + [ -n "$name" -a -n "$id" ] && return 0 || return 1 } is_running() { rdname $1 RC=1 + name=;id= while read LN; do - parseln "$LN" + parseln "$LN" || continue if test $id = 0; then continue; fi case $name in ($NM) RC=0 ;; esac - done < <(xm list | grep -v '^Name') + done < <(xm list -l | grep '(\(domain\|domid\|name\)') return $RC } @@ -267,13 +273,14 @@ start() all_zombies() { + name=;id= while read LN; do - parseln "$LN" + parseln "$LN" || continue if test $id = 0; then continue; fi if test "$state" != "-b---d" -a "$state" != "-----d"; then return 1; fi - done < <(xm list | grep -v '^Name') + done < <(xm list -l | grep '(\(domain\|domid\|name\)') return 0 } @@ -309,8 +316,9 @@ stop() rdnames fi echo -n "Shutting down Xen domains:" + name=;id= while read LN; do - parseln "$LN" + parseln "$LN" || continue if test $id = 0; then continue; fi echo -n " $name" if test "$XENDOMAINS_AUTO_ONLY" = "true"; then @@ -384,7 +392,7 @@ stop() fi kill $WDOG_PID >/dev/null 2>&1 fi - done < <(xm list | grep -v '^Name') + done < <(xm list -l | grep '(\(domain\|domid\|name\)') # NB. this shuts down ALL Xen domains (politely), not just the ones in # AUTODIR/* @@ -409,15 +417,16 @@ stop() check_domain_up() { + name=;id= while read LN; do - parseln "$LN" + parseln "$LN" || continue if test $id = 0; then continue; fi case $name in ($1) return 0 ;; esac - done < <(xm list | grep -v "^Name") + done < <(xm list -l | grep '(\(domain\|domid\|name\)') return 1 } diff -r c20bc60f9243 -r 810885428743 tools/ioemu/block-raw.c --- a/tools/ioemu/block-raw.c Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/ioemu/block-raw.c Wed Jun 20 12:49:27 2007 -0600 @@ -166,7 +166,7 @@ typedef struct RawAIOCB { struct RawAIOCB *next; } RawAIOCB; -static int aio_sig_num = SIGUSR2; +const int aio_sig_num = SIGUSR2; static RawAIOCB *first_aio; /* AIO issued */ static int aio_initialized = 0; diff -r c20bc60f9243 -r 810885428743 tools/ioemu/target-i386-dm/exec-dm.c --- a/tools/ioemu/target-i386-dm/exec-dm.c Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/ioemu/target-i386-dm/exec-dm.c Wed Jun 20 12:49:27 2007 -0600 @@ -443,19 +443,40 @@ extern unsigned long logdirty_bitmap_siz * Forcing a word-sized read/write prevents the guest from seeing a partially * written word-sized atom. */ -void memcpy_words(void *dst, void *src, size_t n) -{ - while (n >= sizeof(long)) { - *((long *)dst) = *((long *)src); - dst = ((long *)dst) + 1; - src = ((long *)src) + 1; - n -= sizeof(long); - } - - if (n & 4) { +#if defined(__x86_64__) || defined(__i386__) +static void memcpy_words(void *dst, void *src, size_t n) +{ + asm ( + " movl %%edx,%%ecx \n" +#ifdef __x86_64 + " shrl $3,%%ecx \n" + " andl $7,%%edx \n" + " rep movsq \n" + " test $4,%%edx \n" + " jz 1f \n" + " movsl \n" +#else /* __i386__ */ + " shrl $2,%%ecx \n" + " andl $3,%%edx \n" + " rep movsl \n" +#endif + "1: test $2,%%edx \n" + " jz 1f \n" + " movsw \n" + "1: test $1,%%edx \n" + " jz 1f \n" + " movsb \n" + "1: \n" + : : "S" (src), "D" (dst), "d" (n) : "ecx" ); +} +#else +static void memcpy_words(void *dst, void *src, size_t n) +{ + while (n >= sizeof(uint32_t)) { *((uint32_t *)dst) = *((uint32_t *)src); dst = ((uint32_t *)dst) + 1; src = ((uint32_t *)src) + 1; + n -= sizeof(uint32_t); } if (n & 2) { @@ -470,6 +491,7 @@ void memcpy_words(void *dst, void *src, src = ((uint8_t *)src) + 1; } } +#endif void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write) diff -r c20bc60f9243 -r 810885428743 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/ioemu/vl.c Wed Jun 20 12:49:27 2007 -0600 @@ -7059,6 +7059,18 @@ int main(int argc, char **argv) #endif char qemu_dm_logfilename[128]; + + /* Ensure that SIGUSR2 is blocked by default when a new thread is created, + then only the threads that use the signal unblock it -- this fixes a + race condition in Qcow support where the AIO signal is misdelivered. */ + { + extern const int aio_sig_num; + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, aio_sig_num); + sigprocmask(SIG_BLOCK, &set, NULL); + } LIST_INIT (&vm_change_state_head); #ifndef _WIN32 diff -r c20bc60f9243 -r 810885428743 tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/libxc/xc_core.c Wed Jun 20 12:49:27 2007 -0600 @@ -156,7 +156,7 @@ struct xc_core_section_headers { Elf64_Shdr *shdrs; }; #define SHDR_INIT 16 -#define SHDR_INC 4 +#define SHDR_INC 4U static struct xc_core_section_headers* xc_core_shdr_init(void) diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jun 20 12:49:27 2007 -0600 @@ -983,7 +983,7 @@ class XendDomainInfo: self.info['VCPUs_live'] = vcpus self._writeDom(self._vcpuDomDetails()) else: - self.info['VCPUs_live'] = vcpus + self.info['VCPUs_max'] = vcpus xen.xend.XendDomain.instance().managed_config_save(self) log.info("Set VCPU count on domain %s to %d", self.info['name_label'], vcpus) diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Wed Jun 20 12:47:52 2007 -0600 +++ b/tools/python/xen/xend/server/blkif.py Wed Jun 20 12:49:27 2007 -0600 @@ -98,6 +98,11 @@ class BlkifController(DevController): if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and dev == new_back['dev'] and mode == 'r'): + # dummy device + self.writeBackend(devid, + 'type', new_back['type'], + 'params', '') + # new backend-device self.writeBackend(devid, 'type', new_back['type'], 'params', new_back['params']) diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/ia64/xen/domain.c Wed Jun 20 12:49:27 2007 -0600 @@ -1146,9 +1146,8 @@ static void __init loaddomainelfimage(st dom_imva = __va_ul(page_to_maddr(p)); if (filesz > 0) { if (filesz >= PAGE_SIZE) - memcpy((void *) dom_imva, - (void *) elfaddr, - PAGE_SIZE); + copy_page((void *) dom_imva, + (void *) elfaddr); else { // copy partial page memcpy((void *) dom_imva, @@ -1166,7 +1165,7 @@ static void __init loaddomainelfimage(st } else if (memsz > 0) { /* always zero out entire page */ - memset((void *) dom_imva, 0, PAGE_SIZE); + clear_page((void *) dom_imva); } memsz -= PAGE_SIZE; filesz -= PAGE_SIZE; @@ -1367,7 +1366,7 @@ int __init construct_dom0(struct domain if (start_info_page == NULL) panic("can't allocate start info page"); si = page_to_virt(start_info_page); - memset(si, 0, PAGE_SIZE); + clear_page(si); snprintf(si->magic, sizeof(si->magic), "xen-%i.%i-ia64", xen_major_version(), xen_minor_version()); si->nr_pages = max_pages; diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/xenmem.c --- a/xen/arch/ia64/xen/xenmem.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/ia64/xen/xenmem.c Wed Jun 20 12:49:27 2007 -0600 @@ -90,7 +90,7 @@ alloc_dir_page(void) panic("Not enough memory for virtual frame table!\n"); ++table_size; dir = mfn << PAGE_SHIFT; - memset(__va(dir), 0, PAGE_SIZE); + clear_page(__va(dir)); return dir; } diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/apic.c Wed Jun 20 12:49:27 2007 -0600 @@ -817,7 +817,7 @@ void __init init_apic_mappings(void) */ if (!smp_found_config && detect_init_APIC()) { apic_phys = __pa(alloc_xenheap_page()); - memset(__va(apic_phys), 0, PAGE_SIZE); + clear_page(__va(apic_phys)); } else apic_phys = mp_lapic_addr; @@ -852,7 +852,7 @@ void __init init_apic_mappings(void) } else { fake_ioapic_page: ioapic_phys = __pa(alloc_xenheap_page()); - memset(__va(ioapic_phys), 0, PAGE_SIZE); + clear_page(__va(ioapic_phys)); } set_fixmap_nocache(idx, ioapic_phys); apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/cmdline.S --- a/xen/arch/x86/boot/cmdline.S Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/boot/cmdline.S Wed Jun 20 12:49:27 2007 -0600 @@ -119,30 +119,31 @@ 3: pop %edi ret .Lfind_option: - push %ebx - push 4+8(%esp) - push 4+8(%esp) + mov 4(%esp),%eax + dec %eax + push %ebx +1: pushl 4+8(%esp) + inc %eax + push %eax call .Lstrstr add $8,%esp test %eax,%eax jz 3f cmp %eax,4+4(%esp) - je 1f + je 2f cmpb $' ',-1(%eax) - jne 2f -1: mov %eax,%ebx - push 4+8(%esp) + jne 1b +2: mov %eax,%ebx + pushl 4+8(%esp) call .Lstrlen add $4,%esp - xchg %eax,%ebx - add %eax,%ebx + xadd %eax,%ebx cmpb $'\0',(%ebx) je 3f cmpb $' ',(%ebx) je 3f cmpb $'=',(%ebx) - je 3f -2: xor %eax,%eax + jne 1b 3: pop %ebx ret @@ -297,7 +298,7 @@ 1: lodsw call .Lstr_prefix add $8,%esp test %eax,%eax - jnz .Lcmdline_exit + jnz .Lparse_vga_current /* We have 'vga=mode-<mode>'. */ add $5,%ebx @@ -305,6 +306,19 @@ 1: lodsw call .Latoi add $4,%esp mov %ax,bootsym_phys(boot_vid_mode) + jmp .Lcmdline_exit + +.Lparse_vga_current: + /* Check for 'vga=current'. */ + push %ebx + pushl $sym_phys(.Lvga_current) + call .Lstr_prefix + add $8,%esp + test %eax,%eax + jnz .Lcmdline_exit + + /* We have 'vga=current'. */ + movw $VIDEO_CURRENT_MODE,bootsym_phys(boot_vid_mode) .Lcmdline_exit: popa @@ -328,6 +342,8 @@ 1: lodsw .asciz "gfx-" .Lvga_mode: .asciz "mode-" +.Lvga_current: + .asciz "current" .Lno_rm_opt: .asciz "no-real-mode" .Ledid_opt: diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/trampoline.S --- a/xen/arch/x86/boot/trampoline.S Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/boot/trampoline.S Wed Jun 20 12:49:27 2007 -0600 @@ -13,12 +13,11 @@ trampoline_realmode_entry: cli lidt bootsym(idt_48) lgdt bootsym(gdt_48) + mov $1,%bl # EBX != 0 indicates we are an AP xor %ax, %ax inc %ax lmsw %ax # CR0.PE = 1 (enter protected mode) - mov $1,%bl # EBX != 0 indicates we are an AP - jmp 1f -1: ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry) + ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry) idt_48: .word 0, 0, 0 # base = limit = 0 gdt_48: .word 6*8-1 @@ -135,10 +134,9 @@ trampoline_boot_cpu_entry: ljmp $BOOT_PSEUDORM_CS,$bootsym(1f) .code16 1: mov %eax,%cr0 # CR0.PE = 0 (leave protected mode) - jmp 1f /* Load proper real-mode values into %cs, %ds, %es and %ss. */ -1: ljmp $(BOOT_TRAMPOLINE>>4),$bootsym(1f) + ljmp $(BOOT_TRAMPOLINE>>4),$bootsym(1f) 1: mov $(BOOT_TRAMPOLINE>>4),%ax mov %ax,%ds mov %ax,%es @@ -166,10 +164,9 @@ 1: mov $(BOOT_TRAMPOLINE>>4),%a xor %ax,%ax inc %ax lmsw %ax # CR0.PE = 1 (enter protected mode) - jmp 1f /* Load proper protected-mode values into all segment registers. */ -1: ljmpl $BOOT_CS32,$bootsym_phys(1f) + ljmpl $BOOT_CS32,$bootsym_phys(1f) .code32 1: mov $BOOT_DS,%eax mov %eax,%ds diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.S --- a/xen/arch/x86/boot/video.S Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/boot/video.S Wed Jun 20 12:49:27 2007 -0600 @@ -15,7 +15,10 @@ #include "video.h" -#define modelist (0x3000) +/* Scratch space layout. */ +#define modelist (0x3000) +#define vesa_glob_info (modelist + 1024) +#define vesa_mode_info (vesa_glob_info + 1024) /* Retrieve Extended Display Identification Data. */ #define CONFIG_FIRMWARE_EDID @@ -109,7 +112,7 @@ mopar2: movb %al, _param(PARAM_VIDEO_ # Fetching of VESA frame buffer parameters mopar_gr: - leaw modelist+1024, %di + leaw vesa_mode_info, %di movb $0x23, _param(PARAM_HAVE_VGA) movw 16(%di), %ax movw %ax, _param(PARAM_LFB_LINELENGTH) @@ -128,9 +131,7 @@ mopar_gr: movl %eax, _param(PARAM_LFB_COLORS+4) # get video mem size - leaw modelist+1024, %di - movw $0x4f00, %ax - int $0x10 + leaw vesa_glob_info, %di xorl %eax, %eax movw 18(%di), %ax movl %eax, _param(PARAM_LFB_SIZE) @@ -183,7 +184,10 @@ dac_done: movw %es, _param(PARAM_VESAPM_SEG) movw %di, _param(PARAM_VESAPM_OFF) -no_pm: ret + +no_pm: pushw %ds + popw %es + ret # The video mode menu mode_menu: @@ -428,17 +432,13 @@ setmenu: jmp mode_set check_vesa: -#ifdef CONFIG_FIRMWARE_EDID - leaw modelist+1024, %di + leaw vesa_glob_info, %di movw $0x4f00, %ax int $0x10 cmpw $0x004f, %ax jnz setbad - movw 4(%di), %ax - movw %ax, bootsym(vbe_version) -#endif - leaw modelist+1024, %di + leaw vesa_mode_info, %di subb $VIDEO_FIRST_VESA>>8, %bh movw %bx, %cx # Get mode information structure movw $0x4f01, %ax @@ -447,7 +447,7 @@ check_vesa: cmpw $0x004f, %ax jnz setbad - movb (%di), %al # Check capabilities. + movb (%di), %al # Check mode attributes. andb $0x99, %al cmpb $0x99, %al jnz _setbad # Doh! No linear frame buffer. @@ -530,6 +530,7 @@ spec_inits: .word bootsym(set_8pixel) .word bootsym(set_80x43) .word bootsym(set_80x28) + .word bootsym(set_current) .word bootsym(set_80x30) .word bootsym(set_80x34) .word bootsym(set_80x60) @@ -575,6 +576,7 @@ set14: movw $0x1111, %ax movb $0x01, %ah # Define cursor scan lines 11-12 movw $0x0b0c, %cx int $0x10 +set_current: stc ret @@ -695,33 +697,34 @@ vga_modes_end: # Detect VESA modes. vesa_modes: movw %di, %bp # BP=original mode table end - addw $0x200, %di # Buffer space + leaw vesa_glob_info, %di movw $0x4f00, %ax # VESA Get card info call int $0x10 + movw %di, %si movw %bp, %di cmpw $0x004f, %ax # Successful? jnz ret0 - cmpw $0x4556, 0x200(%di) # 'VE' + cmpw $0x4556, (%si) # 'VE' jnz ret0 - cmpw $0x4153, 0x202(%di) # 'SA' + cmpw $0x4153, 2(%si) # 'SA' jnz ret0 movw $bootsym(vesa_name), bootsym(card_name) # Set name to "VESA VGA" pushw %gs - lgsw 0x20e(%di), %si # GS:SI=mode list + lgsw 0xe(%si), %si # GS:SI=mode list movw $128, %cx # Iteration limit vesa1: gs; lodsw - cmpw $0xffff, %ax # End of the table? + cmpw $0xffff, %ax # End of the table? jz vesar - cmpw $0x0080, %ax # Check validity of mode ID + cmpw $0x0080, %ax # Check validity of mode ID jc vesa2 - orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff - jz vesan # Certain BIOSes report 0x80-0xff! + orb %ah, %ah # Valid IDs 0x0000-0x007f/0x0100-0x07ff + jz vesan # Certain BIOSes report 0x80-0xff! cmpw $0x0800, %ax jnc vesae @@ -891,8 +894,13 @@ store_edid: cmpb $1, bootsym(opt_edid) # EDID disabled on cmdline (edid=no)? je .Lno_edid - cmpw $0x0200, bootsym(vbe_version) # only do EDID on >= VBE2.0 - jl .Lno_edid + leaw vesa_glob_info, %di + movw $0x4f00, %ax + int $0x10 + cmpw $0x004f, %ax + jne .Lno_edid + cmpw $0x0200, 4(%di) # only do EDID on >= VBE2.0 + jb .Lno_edid xorw %di, %di # Report Capability pushw %di @@ -901,6 +909,8 @@ store_edid: xorw %bx, %bx xorw %cx, %cx int $0x10 + pushw %ds + popw %es cmpw $0x004f, %ax # Call failed? jne .Lno_edid @@ -920,8 +930,6 @@ store_edid: movw $0x01, %bx movw $0x00, %cx movw $0x00, %dx - pushw %ds - popw %es movw $bootsym(boot_edid_info), %di int $0x10 @@ -940,7 +948,6 @@ card_name: .word 0 # Pointe card_name: .word 0 # Pointer to adapter name graphic_mode: .byte 0 # Graphic mode with a linear frame buffer dac_size: .byte 6 # DAC bit depth -vbe_version: .word 0 # VBE bios version # Status messages keymsg: .ascii "Press <RETURN> to see video modes available," diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.h --- a/xen/arch/x86/boot/video.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/boot/video.h Wed Jun 20 12:49:27 2007 -0600 @@ -16,10 +16,11 @@ #define VIDEO_80x50 0x0f01 #define VIDEO_80x43 0x0f02 #define VIDEO_80x28 0x0f03 -#define VIDEO_80x30 0x0f04 -#define VIDEO_80x34 0x0f05 -#define VIDEO_80x60 0x0f06 -#define VIDEO_LAST_SPECIAL 0x0f07 +#define VIDEO_CURRENT_MODE 0x0f04 +#define VIDEO_80x30 0x0f05 +#define VIDEO_80x34 0x0f06 +#define VIDEO_80x60 0x0f07 +#define VIDEO_LAST_SPECIAL 0x0f08 #define ASK_VGA 0xfffd #define VIDEO_VESA_BY_SIZE 0xffff diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_32.S --- a/xen/arch/x86/boot/x86_32.S Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/boot/x86_32.S Wed Jun 20 12:49:27 2007 -0600 @@ -30,9 +30,7 @@ 1: mov %eax,(%edi) loop 1b /* Pass off the Multiboot info structure to C land. */ - mov multiboot_ptr,%eax - add $__PAGE_OFFSET,%eax - push %eax + pushl multiboot_ptr call __start_xen ud2 /* Force a panic (invalid opcode). */ diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_64.S --- a/xen/arch/x86/boot/x86_64.S Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/boot/x86_64.S Wed Jun 20 12:49:27 2007 -0600 @@ -51,8 +51,6 @@ 1: movq %rax,(%rdi) /* Pass off the Multiboot info structure to C land. */ mov multiboot_ptr(%rip),%edi - lea start-0x100000(%rip),%rax - add %rax,%rdi call __start_xen ud2 /* Force a panic (invalid opcode). */ diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/domain.c Wed Jun 20 12:49:27 2007 -0600 @@ -232,26 +232,28 @@ static int setup_compat_l4(struct vcpu * l4_pgentry_t *l4tab; int rc; - if ( !pg ) + if ( pg == NULL ) return -ENOMEM; /* This page needs to look like a pagetable so that it can be shadowed */ pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated; l4tab = copy_page(page_to_virt(pg), idle_pg_table); + l4tab[0] = l4e_empty(); l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_page(pg, __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); + + if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 ) + { + free_domheap_page(pg); + return rc; + } + v->arch.guest_table = pagetable_from_page(pg); v->arch.guest_table_user = v->arch.guest_table; - - if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 ) - { - free_domheap_page(pg); - return rc; - } return 0; } @@ -318,11 +320,11 @@ int switch_compat(struct domain *d) gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR); for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) { + if ( (d->vcpu[vcpuid] != NULL) && + (setup_compat_l4(d->vcpu[vcpuid]) != 0) ) + goto undo_and_fail; d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; - if (d->vcpu[vcpuid] - && setup_compat_l4(d->vcpu[vcpuid]) != 0) - return -ENOMEM; } d->arch.physaddr_bitsize = @@ -330,6 +332,19 @@ int switch_compat(struct domain *d) + (PAGE_SIZE - 2); return 0; + + undo_and_fail: + d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; + release_arg_xlat_area(d); + gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); + while ( vcpuid-- != 0 ) + { + if ( d->vcpu[vcpuid] != NULL ) + release_compat_l4(d->vcpu[vcpuid]); + d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; + } + return -ENOMEM; } #else @@ -461,7 +476,7 @@ int arch_domain_create(struct domain *d) if ( (d->shared_info = alloc_xenheap_page()) == NULL ) goto fail; - memset(d->shared_info, 0, PAGE_SIZE); + clear_page(d->shared_info); share_xen_page_with_guest( virt_to_page(d->shared_info), d, XENSHARE_writable); } diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/domain_build.c Wed Jun 20 12:49:27 2007 -0600 @@ -505,7 +505,7 @@ int __init construct_dom0( v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start); #else l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; - memcpy(l2tab, idle_pg_table, PAGE_SIZE); + copy_page(l2tab, idle_pg_table); l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR); v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start); @@ -645,7 +645,7 @@ int __init construct_dom0( panic("Not enough RAM for domain 0 PML4.\n"); l4start = l4tab = page_to_virt(page); } - memcpy(l4tab, idle_pg_table, PAGE_SIZE); + copy_page(l4tab, idle_pg_table); l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = @@ -823,7 +823,7 @@ int __init construct_dom0( /* Set up start info area. */ si = (start_info_t *)vstartinfo_start; - memset(si, 0, PAGE_SIZE); + clear_page(si); si->nr_pages = nr_pages; si->shared_info = virt_to_maddr(d->shared_info); diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/flushtlb.c --- a/xen/arch/x86/flushtlb.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/flushtlb.c Wed Jun 20 12:49:27 2007 -0600 @@ -80,6 +80,8 @@ void write_cr3(unsigned long cr3) t = pre_flush(); + hvm_flush_guest_tlbs(); + #ifdef USER_MAPPINGS_ARE_GLOBAL __pge_off(); __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); @@ -103,6 +105,8 @@ void local_flush_tlb(void) t = pre_flush(); + hvm_flush_guest_tlbs(); + #ifdef USER_MAPPINGS_ARE_GLOBAL __pge_off(); __pge_on(); diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/hvm.c Wed Jun 20 12:49:27 2007 -0600 @@ -831,11 +831,24 @@ void hvm_update_guest_cr3(struct vcpu *v hvm_funcs.update_guest_cr3(v); } +static void hvm_latch_shinfo_size(struct domain *d) +{ + /* + * Called from operations which are among the very first executed by + * PV drivers on initialisation or after save/restore. These are sensible + * points at which to sample the execution mode of the guest and latch + * 32- or 64-bit format for shared state. + */ + if ( current->domain == d ) + d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8); +} + /* Initialise a hypercall transfer page for a VMX domain using paravirtualised drivers. */ void hvm_hypercall_page_initialise(struct domain *d, void *hypercall_page) { + hvm_latch_shinfo_size(d); hvm_funcs.init_hypercall_page(d, hypercall_page); } @@ -1065,13 +1078,7 @@ long do_hvm_op(unsigned long op, XEN_GUE break; case HVM_PARAM_CALLBACK_IRQ: hvm_set_callback_via(d, a.value); - /* - * Since this operation is one of the very first executed - * by PV drivers on initialisation or after save/restore, it - * is a sensible point at which to sample the execution mode of - * the guest and latch 32- or 64-bit format for shared state. - */ - d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8); + hvm_latch_shinfo_size(d); break; } d->arch.hvm_domain.params[a.index] = a.value; diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/irq.c Wed Jun 20 12:49:27 2007 -0600 @@ -285,43 +285,49 @@ void hvm_set_callback_via(struct domain } } -int cpu_has_pending_irq(struct vcpu *v) +enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v) { struct hvm_domain *plat = &v->domain->arch.hvm_domain; - /* APIC */ + if ( unlikely(v->arch.hvm_vcpu.nmi_pending) ) + return hvm_intack_nmi; + if ( vlapic_has_interrupt(v) != -1 ) - return 1; - - /* PIC */ + return hvm_intack_lapic; + if ( !vlapic_accept_pic_intr(v) ) - return 0; - - return plat->vpic[0].int_output; -} - -int cpu_get_interrupt(struct vcpu *v, int *type) -{ - int vector; - - if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 ) - return vector; - - if ( (v->vcpu_id == 0) && - ((vector = cpu_get_pic_interrupt(v, type)) != -1) ) - return vector; - - return -1; -} - -int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type) + return hvm_intack_none; + + return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none; +} + +int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector) +{ + switch ( type ) + { + case hvm_intack_nmi: + return test_and_clear_bool(v->arch.hvm_vcpu.nmi_pending); + case hvm_intack_lapic: + return ((*vector = cpu_get_apic_interrupt(v)) != -1); + case hvm_intack_pic: + ASSERT(v->vcpu_id == 0); + return ((*vector = cpu_get_pic_interrupt(v)) != -1); + default: + break; + } + + return 0; +} + +int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src) { unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); - if ( type == APIC_DM_EXTINT ) + if ( src == hvm_intack_pic ) return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base + (isa_irq & 7)); + ASSERT(src == hvm_intack_lapic); return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector; } @@ -337,19 +343,20 @@ int is_isa_irq_masked(struct vcpu *v, in domain_vioapic(v->domain)->redirtbl[gsi].fields.mask); } -/* - * TODO: 1. Should not need special treatment of event-channel events. - * 2. Should take notice of interrupt shadows (or clear them). - */ int hvm_local_events_need_delivery(struct vcpu *v) { - int pending; - - pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v)); - if ( unlikely(pending) ) - pending = hvm_interrupts_enabled(v); - - return pending; + enum hvm_intack type; + + /* TODO: Get rid of event-channel special case. */ + if ( vcpu_info(v, evtchn_upcall_pending) ) + type = hvm_intack_pic; + else + type = hvm_vcpu_has_pending_irq(v); + + if ( likely(type == hvm_intack_none) ) + return 0; + + return hvm_interrupts_enabled(v, type); } #if 0 /* Keep for debugging */ diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/asid.c --- a/xen/arch/x86/hvm/svm/asid.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/svm/asid.c Wed Jun 20 12:49:27 2007 -0600 @@ -60,7 +60,7 @@ struct svm_asid_data { u64 core_asid_generation; u32 next_asid; u32 max_asid; - u32 erratum170; + u32 erratum170:1; }; static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data); @@ -140,25 +140,21 @@ void svm_asid_init_vcpu(struct vcpu *v) } /* - * Increase the Generation to make free ASIDs. Flush physical TLB and give - * ASID. - */ -static void svm_asid_handle_inc_generation(struct vcpu *v) -{ - struct svm_asid_data *data = svm_asid_core_data(); - - if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) ) - { - /* Handle ASID overflow. */ + * Increase the Generation to make free ASIDs, and indirectly cause a + * TLB flush of all ASIDs on the next vmrun. + */ +void svm_asid_inc_generation(void) +{ + struct svm_asid_data *data = svm_asid_core_data(); + + if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) ) + { + /* Move to the next generation. We can't flush the TLB now + * because you need to vmrun to do that, and current might not + * be a HVM vcpu, but the first HVM vcpu that runs after this + * will pick up ASID 1 and flush the TLBs. */ data->core_asid_generation++; - data->next_asid = SVM_ASID_FIRST_GUEST_ASID + 1; - - /* Handle VCPU. */ - v->arch.hvm_svm.vmcb->guest_asid = SVM_ASID_FIRST_GUEST_ASID; - v->arch.hvm_svm.asid_generation = data->core_asid_generation; - - /* Trigger flush of physical TLB. */ - v->arch.hvm_svm.vmcb->tlb_control = 1; + data->next_asid = SVM_ASID_FIRST_GUEST_ASID; return; } @@ -168,11 +164,12 @@ static void svm_asid_handle_inc_generati * this core (flushing TLB always). So correctness is established; it * only runs a bit slower. */ - printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n"); - data->erratum170 = 1; - data->core_asid_generation = SVM_ASID_INVALID_GENERATION; - - svm_asid_init_vcpu(v); + if ( !data->erratum170 ) + { + printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n"); + data->erratum170 = 1; + data->core_asid_generation = SVM_ASID_INVALID_GENERATION; + } } /* @@ -202,18 +199,21 @@ asmlinkage void svm_asid_handle_vmrun(vo return; } - /* Different ASID generations trigger fetching of a fresh ASID. */ - if ( likely(data->next_asid <= data->max_asid) ) - { - /* There is a free ASID. */ - v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++; - v->arch.hvm_svm.asid_generation = data->core_asid_generation; - v->arch.hvm_svm.vmcb->tlb_control = 0; - return; - } - - /* Slow path, may cause TLB flush. */ - svm_asid_handle_inc_generation(v); + /* If there are no free ASIDs, need to go to a new generation */ + if ( unlikely(data->next_asid > data->max_asid) ) + svm_asid_inc_generation(); + + /* Now guaranteed to be a free ASID. */ + v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++; + v->arch.hvm_svm.asid_generation = data->core_asid_generation; + + /* When we assign ASID 1, flush all TLB entries. We need to do it + * here because svm_asid_inc_generation() can be called at any time, + * but the TLB flush can only happen on vmrun. */ + if ( v->arch.hvm_svm.vmcb->guest_asid == SVM_ASID_FIRST_GUEST_ASID ) + v->arch.hvm_svm.vmcb->tlb_control = 1; + else + v->arch.hvm_svm.vmcb->tlb_control = 0; } void svm_asid_inv_asid(struct vcpu *v) diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/svm/intr.c Wed Jun 20 12:49:27 2007 -0600 @@ -15,7 +15,6 @@ * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. - * */ #include <xen/config.h> @@ -39,100 +38,119 @@ #include <xen/domain_page.h> #include <asm/hvm/trace.h> -/* - * Most of this code is copied from vmx_io.c and modified - * to be suitable for SVM. - */ - -static inline int svm_inject_extint(struct vcpu *v, int trap) +static void svm_inject_dummy_vintr(struct vcpu *v) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; vintr_t intr = vmcb->vintr; - /* Update only relevant fields */ intr.fields.irq = 1; intr.fields.intr_masking = 1; - intr.fields.vector = trap; + intr.fields.vector = 0; intr.fields.prio = 0xF; intr.fields.ign_tpr = 1; vmcb->vintr = intr; +} + +static void svm_inject_nmi(struct vcpu *v) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + eventinj_t event; - return 0; + event.bytes = 0; + event.fields.v = 1; + event.fields.type = EVENTTYPE_NMI; + event.fields.vector = 2; + + ASSERT(vmcb->eventinj.fields.v == 0); + vmcb->eventinj = event; +} + +static void svm_inject_extint(struct vcpu *v, int vector) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + eventinj_t event; + + event.bytes = 0; + event.fields.v = 1; + event.fields.type = EVENTTYPE_INTR; + event.fields.vector = vector; + + ASSERT(vmcb->eventinj.fields.v == 0); + vmcb->eventinj = event; } asmlinkage void svm_intr_assist(void) { struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - int intr_type = APIC_DM_EXTINT; - int intr_vector = -1; + enum hvm_intack intr_source; + int intr_vector; /* - * Previous Interrupt delivery caused this intercept? + * Previous event delivery caused this intercept? * This will happen if the injection is latched by the processor (hence - * clearing vintr.fields.irq) but then subsequently a fault occurs (e.g., - * due to lack of shadow mapping of guest IDT or guest-kernel stack). - * - * NB. Exceptions that fault during delivery are lost. This needs to be - * fixed but we'll usually get away with it since faults are usually - * idempotent. But this isn't the case for e.g. software interrupts! + * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault + * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel + * stack). */ - if ( vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0) ) + if ( vmcb->exitintinfo.fields.v ) { - intr_vector = vmcb->exitintinfo.fields.vector; + vmcb->eventinj = vmcb->exitintinfo; vmcb->exitintinfo.bytes = 0; HVMTRACE_1D(REINJ_VIRQ, v, intr_vector); - svm_inject_extint(v, intr_vector); return; } - /* - * Previous interrupt still pending? This occurs if we return from VMRUN - * very early in the entry-to-guest process. Usually this is because an - * external physical interrupt was pending when we executed VMRUN. - */ - if ( vmcb->vintr.fields.irq ) - return; - - /* Crank the handle on interrupt state and check for new interrrupts. */ + /* Crank the handle on interrupt state. */ pt_update_irq(v); hvm_set_callback_irq_level(); - if ( !cpu_has_pending_irq(v) ) - return; - /* - * If the guest can't take an interrupt right now, create a 'fake' - * virtual interrupt on to intercept as soon as the guest _can_ take - * interrupts. Do not obtain the next interrupt from the vlapic/pic - * if unable to inject. - * - * Also do this if there is an exception pending. This is because - * the delivery of the exception can arbitrarily delay the injection - * of the vintr (for example, if the exception is handled via an - * interrupt gate, hence zeroing RFLAGS.IF). In the meantime: - * - the vTPR could be modified upwards, so we need to wait until the - * exception is delivered before we can safely decide that an - * interrupt is deliverable; and - * - the guest might look at the APIC/PIC state, so we ought not to have - * cleared the interrupt out of the IRR. - */ - if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow - || vmcb->eventinj.fields.v ) + do { + intr_source = hvm_vcpu_has_pending_irq(v); + if ( likely(intr_source == hvm_intack_none) ) + return; + + /* + * If the guest can't take an interrupt right now, create a 'fake' + * virtual interrupt on to intercept as soon as the guest _can_ take + * interrupts. Do not obtain the next interrupt from the vlapic/pic + * if unable to inject. + * + * Also do this if there is an injection already pending. This is + * because the event delivery can arbitrarily delay the injection + * of the vintr (for example, if the exception is handled via an + * interrupt gate, hence zeroing RFLAGS.IF). In the meantime: + * - the vTPR could be modified upwards, so we need to wait until the + * exception is delivered before we can safely decide that an + * interrupt is deliverable; and + * - the guest might look at the APIC/PIC state, so we ought not to + * have cleared the interrupt out of the IRR. + * + * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt + * shadow. This is hard to do without hardware support. We should also + * track 'NMI blocking' from NMI injection until IRET. This can be done + * quite easily in software by intercepting the unblocking IRET. + */ + if ( !hvm_interrupts_enabled(v, intr_source) || + vmcb->eventinj.fields.v ) + { + vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; + HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1); + svm_inject_dummy_vintr(v); + return; + } + } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) ); + + if ( intr_source == hvm_intack_nmi ) { - vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; - HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1); - svm_inject_extint(v, 0x0); /* actual vector doesn't matter */ - return; + svm_inject_nmi(v); } - - /* Okay, we can deliver the interrupt: grab it and update PIC state. */ - intr_vector = cpu_get_interrupt(v, &intr_type); - BUG_ON(intr_vector < 0); - - HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0); - svm_inject_extint(v, intr_vector); - - pt_intr_post(v, intr_vector, intr_type); + else + { + HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0); + svm_inject_extint(v, intr_vector); + pt_intr_post(v, intr_vector, intr_source); + } } /* diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Jun 20 12:49:27 2007 -0600 @@ -312,26 +312,8 @@ int svm_vmcb_save(struct vcpu *v, struct c->sysenter_esp = vmcb->sysenter_esp; c->sysenter_eip = vmcb->sysenter_eip; - /* Save any event/interrupt that was being injected when we last - * exited. Although there are three(!) VMCB fields that can contain - * active events, we only need to save at most one: because the - * intr_assist logic never delivers an IRQ when any other event is - * active, we know that the only possible collision is if we inject - * a fault while exitintinfo contains a valid event (the delivery of - * which caused the last exit). In that case replaying just the - * first event should cause the same behaviour when we restore. */ - if ( vmcb->vintr.fields.irq - && /* Check it's not a fake interrupt (see svm_intr_assist()) */ - !(vmcb->general1_intercepts & GENERAL1_INTERCEPT_VINTR) ) - { - c->pending_vector = vmcb->vintr.fields.vector; - c->pending_type = 0; /* External interrupt */ - c->pending_error_valid = 0; - c->pending_reserved = 0; - c->pending_valid = 1; - c->error_code = 0; - } - else if ( vmcb->exitintinfo.fields.v ) + /* Save any event/interrupt that was being injected when we last exited. */ + if ( vmcb->exitintinfo.fields.v ) { c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff; c->error_code = vmcb->exitintinfo.fields.errorcode; @@ -569,10 +551,15 @@ static inline void svm_restore_dr(struct __restore_debug_registers(v); } -static int svm_interrupts_enabled(struct vcpu *v) -{ - unsigned long eflags = v->arch.hvm_svm.vmcb->rflags; - return !irq_masked(eflags); +static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + if ( type == hvm_intack_nmi ) + return !vmcb->interrupt_shadow; + + ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic)); + return !irq_masked(vmcb->rflags) && !vmcb->interrupt_shadow; } static int svm_guest_x86_mode(struct vcpu *v) @@ -596,6 +583,14 @@ static void svm_update_guest_cr3(struct static void svm_update_guest_cr3(struct vcpu *v) { v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; +} + +static void svm_flush_guest_tlbs(void) +{ + /* Roll over the CPU's ASID generation, so it gets a clean TLB when we + * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on + * VMRUN anyway). */ + svm_asid_inc_generation(); } static void svm_update_vtpr(struct vcpu *v, unsigned long value) @@ -770,8 +765,6 @@ static void svm_init_hypercall_page(stru { char *p; int i; - - memset(hypercall_page, 0, PAGE_SIZE); for ( i = 0; i < (PAGE_SIZE / 32); i++ ) { @@ -948,6 +941,7 @@ static struct hvm_function_table svm_fun .get_segment_register = svm_get_segment_register, .update_host_cr3 = svm_update_host_cr3, .update_guest_cr3 = svm_update_guest_cr3, + .flush_guest_tlbs = svm_flush_guest_tlbs, .update_vtpr = svm_update_vtpr, .stts = svm_stts, .set_tsc_offset = svm_set_tsc_offset, @@ -957,7 +951,7 @@ static struct hvm_function_table svm_fun .event_injection_faulted = svm_event_injection_faulted }; -void svm_npt_detect(void) +static void svm_npt_detect(void) { u32 eax, ebx, ecx, edx; @@ -1017,6 +1011,9 @@ int start_svm(struct cpuinfo_x86 *c) hvm_enable(&svm_function_table); + if ( opt_hap_enabled ) + printk("SVM: Nested paging enabled.\n"); + return 1; } @@ -1477,7 +1474,7 @@ static void svm_io_instruction(struct vc /* Copy current guest state into io instruction state structure. */ memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); - hvm_store_cpu_guest_regs(v, regs, NULL); + svm_store_cpu_guest_regs(v, regs, NULL); info.bytes = vmcb->exitinfo1; @@ -2148,11 +2145,14 @@ static inline void svm_do_msr_access( static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb) { + enum hvm_intack type = hvm_vcpu_has_pending_irq(current); + __update_guest_eip(vmcb, 1); /* Check for interrupt not handled or new interrupt. */ - if ( (vmcb->rflags & X86_EFLAGS_IF) && - (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) { + if ( vmcb->eventinj.fields.v || + ((type != hvm_intack_none) && svm_interrupts_enabled(current, type)) ) + { HVMTRACE_1D(HLT, current, /*int pending=*/ 1); return; } diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Wed Jun 20 12:49:27 2007 -0600 @@ -56,7 +56,7 @@ struct vmcb_struct *alloc_vmcb(void) return NULL; } - memset(vmcb, 0, PAGE_SIZE); + clear_page(vmcb); return vmcb; } @@ -72,11 +72,11 @@ struct host_save_area *alloc_host_save_a hsa = alloc_xenheap_page(); if ( hsa == NULL ) { - printk(XENLOG_WARNING "Warning: failed to allocate vmcb.\n"); + printk(XENLOG_WARNING "Warning: failed to allocate hsa.\n"); return NULL; } - memset(hsa, 0, PAGE_SIZE); + clear_page(hsa); return hsa; } diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/vioapic.c Wed Jun 20 12:49:27 2007 -0600 @@ -254,17 +254,11 @@ static void ioapic_inj_irq( HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d", vector, trig_mode, delivery_mode); - switch ( delivery_mode ) - { - case dest_Fixed: - case dest_LowestPrio: - if ( vlapic_set_irq(target, vector, trig_mode) ) - vcpu_kick(vlapic_vcpu(target)); - break; - default: - gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode); - break; - } + ASSERT((delivery_mode == dest_Fixed) || + (delivery_mode == dest_LowestPrio)); + + if ( vlapic_set_irq(target, vector, trig_mode) ) + vcpu_kick(vlapic_vcpu(target)); } static uint32_t ioapic_get_delivery_bitmask( @@ -368,7 +362,6 @@ static void vioapic_deliver(struct hvm_h } case dest_Fixed: - case dest_ExtINT: { uint8_t bit; for ( bit = 0; deliver_bitmask != 0; bit++ ) @@ -393,10 +386,21 @@ static void vioapic_deliver(struct hvm_h break; } - case dest_SMI: case dest_NMI: - case dest_INIT: - case dest__reserved_2: + { + uint8_t bit; + for ( bit = 0; deliver_bitmask != 0; bit++ ) + { + if ( !(deliver_bitmask & (1 << bit)) ) + continue; + deliver_bitmask &= ~(1 << bit); + if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) && + !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) ) + vcpu_kick(v); + } + break; + } + default: gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n", delivery_mode); diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:49:27 2007 -0600 @@ -294,7 +294,8 @@ static int vlapic_accept_irq(struct vcpu break; case APIC_DM_NMI: - gdprintk(XENLOG_WARNING, "Ignoring guest NMI\n"); + if ( !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) ) + vcpu_kick(v); break; case APIC_DM_INIT: @@ -747,7 +748,7 @@ int vlapic_has_interrupt(struct vcpu *v) return highest_irr; } -int cpu_get_apic_interrupt(struct vcpu *v, int *mode) +int cpu_get_apic_interrupt(struct vcpu *v) { int vector = vlapic_has_interrupt(v); struct vlapic *vlapic = vcpu_vlapic(v); @@ -757,8 +758,6 @@ int cpu_get_apic_interrupt(struct vcpu * vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); vlapic_clear_irr(vector, vlapic); - - *mode = APIC_DM_FIXED; return vector; } @@ -935,7 +934,7 @@ int vlapic_init(struct vcpu *v) return -ENOMEM; } - memset(vlapic->regs, 0, PAGE_SIZE); + clear_page(vlapic->regs); vlapic_reset(vlapic); diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/intr.c Wed Jun 20 12:49:27 2007 -0600 @@ -102,8 +102,8 @@ static void update_tpr_threshold(struct asmlinkage void vmx_intr_assist(void) { - int has_ext_irq, intr_vector, intr_type = 0; - unsigned long eflags, intr_shadow; + int intr_vector; + enum hvm_intack intr_source; struct vcpu *v = current; unsigned int idtv_info_field; unsigned long inst_len; @@ -114,65 +114,67 @@ asmlinkage void vmx_intr_assist(void) update_tpr_threshold(vcpu_vlapic(v)); - has_ext_irq = cpu_has_pending_irq(v); + do { + intr_source = hvm_vcpu_has_pending_irq(v); - if ( unlikely(v->arch.hvm_vmx.vector_injected) ) - { - v->arch.hvm_vmx.vector_injected = 0; - if ( unlikely(has_ext_irq) ) - enable_irq_window(v); - return; - } + if ( unlikely(v->arch.hvm_vmx.vector_injected) ) + { + v->arch.hvm_vmx.vector_injected = 0; + if ( unlikely(intr_source != hvm_intack_none) ) + enable_irq_window(v); + return; + } - /* This could be moved earlier in the VMX resume sequence. */ - idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD); - if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) ) - { - __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); + /* This could be moved earlier in the VMX resume sequence. */ + idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD); + if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) ) + { + __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); + + /* + * Safe: the length will only be interpreted for software + * exceptions and interrupts. If we get here then delivery of some + * event caused a fault, and this always results in defined + * VM_EXIT_INSTRUCTION_LEN. + */ + inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */ + __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len); + + if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */ + __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, + __vmread(IDT_VECTORING_ERROR_CODE)); + if ( unlikely(intr_source != hvm_intack_none) ) + enable_irq_window(v); + + HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field); + return; + } + + if ( likely(intr_source == hvm_intack_none) ) + return; /* - * Safe: the length will only be interpreted for software exceptions - * and interrupts. If we get here then delivery of some event caused a - * fault, and this always results in defined VM_EXIT_INSTRUCTION_LEN. + * TODO: Better NMI handling. Shouldn't wait for EFLAGS.IF==1, but + * should wait for exit from 'NMI blocking' window (NMI injection to + * next IRET). This requires us to use the new 'virtual NMI' support. */ - inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */ - __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len); + if ( !hvm_interrupts_enabled(v, intr_source) ) + { + enable_irq_window(v); + return; + } + } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) ); - if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */ - __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, - __vmread(IDT_VECTORING_ERROR_CODE)); - if ( unlikely(has_ext_irq) ) - enable_irq_window(v); - - HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field); - return; + if ( intr_source == hvm_intack_nmi ) + { + vmx_inject_nmi(v); } - - if ( likely(!has_ext_irq) ) - return; - - intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO); - if ( unlikely(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)) ) + else { - enable_irq_window(v); - HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility"); - return; + HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0); + vmx_inject_extint(v, intr_vector); + pt_intr_post(v, intr_vector, intr_source); } - - eflags = __vmread(GUEST_RFLAGS); - if ( irq_masked(eflags) ) - { - enable_irq_window(v); - return; - } - - intr_vector = cpu_get_interrupt(v, &intr_type); - BUG_ON(intr_vector < 0); - - HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0); - vmx_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE); - - pt_intr_post(v, intr_vector, intr_type); } /* diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Jun 20 12:49:27 2007 -0600 @@ -158,7 +158,7 @@ static struct vmcs_struct *vmx_alloc_vmc return NULL; } - memset(vmcs, 0, PAGE_SIZE); + clear_page(vmcs); vmcs->vmcs_revision_id = vmcs_revision_id; return vmcs; diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 20 12:49:27 2007 -0600 @@ -1070,8 +1070,6 @@ static void vmx_init_hypercall_page(stru char *p; int i; - memset(hypercall_page, 0, PAGE_SIZE); - for ( i = 0; i < (PAGE_SIZE / 32); i++ ) { p = (char *)(hypercall_page + (i * 32)); @@ -1115,16 +1113,26 @@ static int vmx_nx_enabled(struct vcpu *v return v->arch.hvm_vmx.efer & EFER_NX; } -static int vmx_interrupts_enabled(struct vcpu *v) -{ - unsigned long eflags = __vmread(GUEST_RFLAGS); - return !irq_masked(eflags); -} - +static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type) +{ + unsigned long intr_shadow, eflags; + + ASSERT(v == current); + + intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO); + intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS; + + if ( type == hvm_intack_nmi ) + return !intr_shadow; + + ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic)); + eflags = __vmread(GUEST_RFLAGS); + return !irq_masked(eflags) && !intr_shadow; +} static void vmx_update_host_cr3(struct vcpu *v) { - ASSERT( (v == current) || !vcpu_runnable(v) ); + ASSERT((v == current) || !vcpu_runnable(v)); vmx_vmcs_enter(v); __vmwrite(HOST_CR3, v->arch.cr3); vmx_vmcs_exit(v); @@ -1132,12 +1140,18 @@ static void vmx_update_host_cr3(struct v static void vmx_update_guest_cr3(struct vcpu *v) { - ASSERT( (v == current) || !vcpu_runnable(v) ); + ASSERT((v == current) || !vcpu_runnable(v)); vmx_vmcs_enter(v); __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); vmx_vmcs_exit(v); } +static void vmx_flush_guest_tlbs(void) +{ + /* No tagged TLB support on VMX yet. The fact that we're in Xen + * at all means any guest will have a clean TLB when it's next run, + * because VMRESUME will flush it for us. */ +} static void vmx_inject_exception( unsigned int trapnr, int errcode, unsigned long cr2) @@ -1205,6 +1219,7 @@ static struct hvm_function_table vmx_fun .get_segment_register = vmx_get_segment_register, .update_host_cr3 = vmx_update_host_cr3, .update_guest_cr3 = vmx_update_guest_cr3, + .flush_guest_tlbs = vmx_flush_guest_tlbs, .update_vtpr = vmx_update_vtpr, .stts = vmx_stts, .set_tsc_offset = vmx_set_tsc_offset, @@ -1837,7 +1852,7 @@ static void vmx_io_instruction(unsigned /* Copy current guest state into io instruction state structure. */ memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); - hvm_store_cpu_guest_regs(current, regs, NULL); + vmx_store_cpu_guest_regs(current, regs, NULL); HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, " "exit_qualification = %lx", @@ -2549,7 +2564,8 @@ static inline int vmx_do_msr_read(struct HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx); - switch (ecx) { + switch ( ecx ) + { case MSR_IA32_TIME_STAMP_COUNTER: msr_content = hvm_get_guest_time(v); break; @@ -2565,6 +2581,8 @@ static inline int vmx_do_msr_read(struct case MSR_IA32_APICBASE: msr_content = vcpu_vlapic(v)->hw.apic_base_msr; break; + case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1: + goto gp_fault; default: if ( long_mode_do_msr_read(regs) ) goto done; @@ -2576,8 +2594,8 @@ static inline int vmx_do_msr_read(struct regs->edx = edx; goto done; } - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; + + goto gp_fault; } regs->eax = msr_content & 0xFFFFFFFF; @@ -2589,6 +2607,10 @@ done: ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); return 1; + +gp_fault: + vmx_inject_hw_exception(v, TRAP_gp_fault, 0); + return 0; } static int vmx_alloc_vlapic_mapping(struct domain *d) @@ -2667,7 +2689,8 @@ static inline int vmx_do_msr_write(struc msr_content = (u32)regs->eax | ((u64)regs->edx << 32); HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content); - switch (ecx) { + switch ( ecx ) + { case MSR_IA32_TIME_STAMP_COUNTER: hvm_set_guest_time(v, msr_content); pt_reset(v); @@ -2684,6 +2707,8 @@ static inline int vmx_do_msr_write(struc case MSR_IA32_APICBASE: vlapic_msr_set(vcpu_vlapic(v), msr_content); break; + case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1: + goto gp_fault; default: if ( !long_mode_do_msr_write(regs) ) wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); @@ -2691,6 +2716,10 @@ static inline int vmx_do_msr_write(struc } return 1; + +gp_fault: + vmx_inject_hw_exception(v, TRAP_gp_fault, 0); + return 0; } static void vmx_do_hlt(void) diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpic.c --- a/xen/arch/x86/hvm/vpic.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/vpic.c Wed Jun 20 12:49:27 2007 -0600 @@ -499,7 +499,7 @@ void vpic_irq_negative_edge(struct domai vpic_update_int_output(vpic); } -int cpu_get_pic_interrupt(struct vcpu *v, int *type) +int cpu_get_pic_interrupt(struct vcpu *v) { int irq, vector; struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0]; @@ -512,6 +512,5 @@ int cpu_get_pic_interrupt(struct vcpu *v return -1; vector = vpic[irq >> 3].irq_base + (irq & 7); - *type = APIC_DM_EXTINT; return vector; } diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpt.c --- a/xen/arch/x86/hvm/vpt.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/hvm/vpt.c Wed Jun 20 12:49:27 2007 -0600 @@ -155,7 +155,8 @@ void pt_update_irq(struct vcpu *v) } } -static struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type) +static struct periodic_time *is_pt_irq( + struct vcpu *v, int vector, enum hvm_intack src) { struct list_head *head = &v->arch.hvm_vcpu.tm_list; struct periodic_time *pt; @@ -174,7 +175,7 @@ static struct periodic_time *is_pt_irq(s return pt; } - vec = get_isa_irq_vector(v, pt->irq, type); + vec = get_isa_irq_vector(v, pt->irq, src); /* RTC irq need special care */ if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) ) @@ -186,7 +187,7 @@ static struct periodic_time *is_pt_irq(s return NULL; } -void pt_intr_post(struct vcpu *v, int vector, int type) +void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src) { struct periodic_time *pt; time_cb *cb; @@ -194,7 +195,7 @@ void pt_intr_post(struct vcpu *v, int ve spin_lock(&v->arch.hvm_vcpu.tm_lock); - pt = is_pt_irq(v, vector, type); + pt = is_pt_irq(v, vector, src); if ( pt == NULL ) { spin_unlock(&v->arch.hvm_vcpu.tm_lock); @@ -227,13 +228,10 @@ void pt_reset(struct vcpu *v) list_for_each_entry ( pt, head, list ) { - if ( pt->enabled ) - { - pt->pending_intr_nr = 0; - pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu); - pt->scheduled = NOW() + pt->period; - set_timer(&pt->timer, pt->scheduled); - } + pt->pending_intr_nr = 0; + pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu); + pt->scheduled = NOW() + pt->period; + set_timer(&pt->timer, pt->scheduled); } spin_unlock(&v->arch.hvm_vcpu.tm_lock); @@ -247,10 +245,7 @@ void pt_migrate(struct vcpu *v) spin_lock(&v->arch.hvm_vcpu.tm_lock); list_for_each_entry ( pt, head, list ) - { - if ( pt->enabled ) - migrate_timer(&pt->timer, v->processor); - } + migrate_timer(&pt->timer, v->processor); spin_unlock(&v->arch.hvm_vcpu.tm_lock); } @@ -263,8 +258,9 @@ void create_periodic_time( spin_lock(&v->arch.hvm_vcpu.tm_lock); - init_timer(&pt->timer, pt_timer_fn, pt, v->processor); pt->enabled = 1; + pt->pending_intr_nr = 0; + if ( period < 900000 ) /* < 0.9 ms */ { gdprintk(XENLOG_WARNING, @@ -283,6 +279,8 @@ void create_periodic_time( pt->priv = data; list_add(&pt->list, &v->arch.hvm_vcpu.tm_list); + + init_timer(&pt->timer, pt_timer_fn, pt, v->processor); set_timer(&pt->timer, pt->scheduled); spin_unlock(&v->arch.hvm_vcpu.tm_lock); @@ -295,8 +293,12 @@ void destroy_periodic_time(struct period pt_lock(pt); pt->enabled = 0; - pt->pending_intr_nr = 0; list_del(&pt->list); + pt_unlock(pt); + + /* + * pt_timer_fn() can run until this kill_timer() returns. We must do this + * outside pt_lock() otherwise we can deadlock with pt_timer_fn(). + */ kill_timer(&pt->timer); - pt_unlock(pt); -} +} diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/mm.c Wed Jun 20 12:49:27 2007 -0600 @@ -2942,7 +2942,7 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong) if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; - if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) ) + if ( copy_from_guest(frames, frame_list, nr_pages) ) return -EFAULT; LOCK_BIGLOCK(current->domain); @@ -3123,7 +3123,7 @@ long arch_memory_op(int op, XEN_GUEST_HA else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL ) return -ESRCH; - rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer, + rc = copy_from_guest(d->arch.e820, fmap.map.buffer, fmap.map.nr_entries) ? -EFAULT : 0; d->arch.nr_e820 = fmap.map.nr_entries; @@ -3144,7 +3144,7 @@ long arch_memory_op(int op, XEN_GUEST_HA return -EFAULT; map.nr_entries = min(map.nr_entries, d->arch.nr_e820); - if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) || + if ( copy_to_guest(map.buffer, d->arch.e820, map.nr_entries) || copy_to_guest(arg, &map, 1) ) return -EFAULT; @@ -3168,7 +3168,7 @@ long arch_memory_op(int op, XEN_GUEST_HA buffer = guest_handle_cast(memmap.buffer, e820entry_t); count = min((unsigned int)e820.nr_map, memmap.nr_entries); - if ( copy_to_guest(buffer, &e820.map[0], count) < 0 ) + if ( copy_to_guest(buffer, e820.map, count) < 0 ) return -EFAULT; memmap.nr_entries = count; @@ -3181,7 +3181,7 @@ long arch_memory_op(int op, XEN_GUEST_HA case XENMEM_machphys_mapping: { - struct xen_machphys_mapping mapping = { + static const struct xen_machphys_mapping mapping = { .v_start = MACH2PHYS_VIRT_START, .v_end = MACH2PHYS_VIRT_END, .max_mfn = MACH2PHYS_NR_ENTRIES - 1 diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/setup.c Wed Jun 20 12:49:27 2007 -0600 @@ -295,14 +295,14 @@ static struct e820map __initdata boot_e8 /* Reserve area (@s,@e) in the temporary bootstrap e820 map. */ static void __init reserve_in_boot_e820(unsigned long s, unsigned long e) { - unsigned long rs, re; + uint64_t rs, re; int i; for ( i = 0; i < boot_e820.nr_map; i++ ) { /* Have we found the e820 region that includes the specified range? */ rs = boot_e820.map[i].addr; - re = boot_e820.map[i].addr + boot_e820.map[i].size; + re = rs + boot_e820.map[i].size; if ( (s < rs) || (e > re) ) continue; @@ -402,7 +402,7 @@ void init_done(void) startup_cpu_idle_loop(); } -void __init __start_xen(multiboot_info_t *mbi) +void __init __start_xen(unsigned long mbi_p) { char *memmap_type = NULL; char __cmdline[] = "", *cmdline = __cmdline; @@ -410,6 +410,7 @@ void __init __start_xen(multiboot_info_t unsigned int initrdidx = 1; char *_policy_start = NULL; unsigned long _policy_len = 0; + multiboot_info_t *mbi = __va(mbi_p); module_t *mod = (module_t *)__va(mbi->mods_addr); unsigned long nr_pages, modules_length; int i, e820_warn = 0, bytes = 0; @@ -678,6 +679,9 @@ void __init __start_xen(multiboot_info_t barrier(); move_memory(e, 0, __pa(&_end) - xen_phys_start); + /* Poison low 1MB to detect stray pointers to physical 0-1MB. */ + memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20); + /* Walk initial pagetables, relocating page directory entries. */ pl4e = __va(__pa(idle_pg_table)); for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ ) diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/traps.c Wed Jun 20 12:49:27 2007 -0600 @@ -462,7 +462,17 @@ int rdmsr_hypervisor_regs( if ( idx > 0 ) return 0; - *eax = *edx = 0; + switch ( idx ) + { + case 0: + { + *eax = *edx = 0; + break; + } + default: + BUG(); + } + return 1; } @@ -1130,7 +1140,7 @@ static inline int guest_io_okay( * read as 0xff (no access allowed). */ TOGGLE_MODE(); - switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp, + switch ( __copy_from_guest_offset(x.bytes, v->arch.iobmp, port>>3, 2) ) { default: x.bytes[0] = ~0; diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/x86_32/traps.c Wed Jun 20 12:49:27 2007 -0600 @@ -513,6 +513,7 @@ static void hypercall_page_initialise_ri void hypercall_page_initialise(struct domain *d, void *hypercall_page) { + memset(hypercall_page, 0xCC, PAGE_SIZE); if ( is_hvm_domain(d) ) hvm_hypercall_page_initialise(d, hypercall_page); else if ( supervisor_mode_kernel ) diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/compat_kexec.S --- a/xen/arch/x86/x86_64/compat_kexec.S Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/x86_64/compat_kexec.S Wed Jun 20 12:49:27 2007 -0600 @@ -1,5 +1,11 @@ /* * Compatibility kexec handler. + */ + +/* + * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is + * currently true but if it ever changes then compat_pg_table will + * need to be moved back below 4G at run time. */ #include <xen/config.h> @@ -8,7 +14,20 @@ #include <asm/msr.h> #include <asm/page.h> -#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START) +/* The unrelocated physical address of a symbol. */ +#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START) + +/* Load physical address of symbol into register and relocate it. */ +#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \ + add xen_phys_start(%rip), reg + +/* + * Relocate a physical address in memory. Size of temporary register + * determines size of the value to relocate. + */ +#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \ + add xen_phys_start(%rip), reg ; \ + mov reg, addr(%rip) .text @@ -31,20 +50,35 @@ 1: dec %r9 test %r9,%r9 jnz 1b - mov $SYM_PHYS(compat_page_list),%rdx + RELOCATE_SYM(compat_page_list,%rdx) + + /* Relocate compatibility mode entry point address. */ + RELOCATE_MEM(compatibility_mode_far,%eax) + + /* Relocate compat_pg_table. */ + RELOCATE_MEM(compat_pg_table, %rax) + RELOCATE_MEM(compat_pg_table+0x8, %rax) + RELOCATE_MEM(compat_pg_table+0x10,%rax) + RELOCATE_MEM(compat_pg_table+0x18,%rax) /* * Setup an identity mapped region in PML4[0] of idle page * table. */ - lea l3_identmap(%rip),%rax - sub %rbx,%rax + RELOCATE_SYM(l3_identmap,%rax) or $0x63,%rax mov %rax, idle_pg_table(%rip) /* Switch to idle page table. */ - movq $SYM_PHYS(idle_pg_table), %rax + RELOCATE_SYM(idle_pg_table,%rax) movq %rax, %cr3 + + /* Switch to identity mapped compatibility stack. */ + RELOCATE_SYM(compat_stack,%rax) + movq %rax, %rsp + + /* Save xen_phys_start for 32 bit code. */ + movq xen_phys_start(%rip), %rbx /* Jump to low identity mapping in compatibility mode. */ ljmp *compatibility_mode_far(%rip) @@ -54,7 +88,26 @@ compatibility_mode_far: .long SYM_PHYS(compatibility_mode) .long __HYPERVISOR_CS32 + /* + * We use 5 words of stack for the arguments passed to the kernel. The + * kernel only uses 1 word before switching to its own stack. Allocate + * 16 words to give "plenty" of room. + */ + .fill 16,4,0 +compat_stack: + .code32 + +#undef RELOCATE_SYM +#undef RELOCATE_MEM + +/* + * Load physical address of symbol into register and relocate it. %rbx + * contains xen_phys_start(%rip) saved before jump to compatibility + * mode. + */ +#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \ + add %ebx, reg compatibility_mode: /* Setup some sane segments. */ @@ -78,7 +131,7 @@ compatibility_mode: movl %eax, %cr0 /* Switch to 32 bit page table. */ - movl $SYM_PHYS(compat_pg_table), %eax + RELOCATE_SYM(compat_pg_table, %eax) movl %eax, %cr3 /* Clear MSR_EFER[LME], disabling long mode */ diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/arch/x86/x86_64/traps.c Wed Jun 20 12:49:27 2007 -0600 @@ -510,6 +510,7 @@ static void hypercall_page_initialise_ri void hypercall_page_initialise(struct domain *d, void *hypercall_page) { + memset(hypercall_page, 0xCC, PAGE_SIZE); if ( is_hvm_domain(d) ) hvm_hypercall_page_initialise(d, hypercall_page); else if ( !is_pv_32bit_domain(d) ) diff -r c20bc60f9243 -r 810885428743 xen/common/compat/memory.c --- a/xen/common/compat/memory.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/common/compat/memory.c Wed Jun 20 12:49:27 2007 -0600 @@ -258,7 +258,8 @@ int compat_memory_op(unsigned int cmd, X compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent]; BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]); - if ( __copy_to_compat_offset(cmp.rsrv.extent_start, start_extent, &pfn, 1) ) + if ( __copy_to_compat_offset(cmp.rsrv.extent_start, + start_extent, &pfn, 1) ) { if ( split >= 0 ) { @@ -275,6 +276,10 @@ int compat_memory_op(unsigned int cmd, X break; } } + + /* Bail if there was an error. */ + if ( (split >= 0) && (end_extent != nat.rsrv->nr_extents) ) + split = 0; } else start_extent = end_extent; diff -r c20bc60f9243 -r 810885428743 xen/common/domctl.c --- a/xen/common/domctl.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/common/domctl.c Wed Jun 20 12:49:27 2007 -0600 @@ -43,7 +43,7 @@ void cpumask_to_xenctl_cpumap( bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS); - copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes); + copy_to_guest(xenctl_cpumap->bitmap, bytemap, copy_bytes); for ( i = copy_bytes; i < guest_bytes; i++ ) copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1); @@ -63,7 +63,7 @@ void xenctl_cpumap_to_cpumask( if ( guest_handle_is_null(xenctl_cpumap->bitmap) ) return; - copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes); + copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes); bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS); } diff -r c20bc60f9243 -r 810885428743 xen/common/grant_table.c --- a/xen/common/grant_table.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/common/grant_table.c Wed Jun 20 12:49:27 2007 -0600 @@ -148,7 +148,7 @@ get_maptrack_handle( return -1; } - memset(new_mt, 0, PAGE_SIZE); + clear_page(new_mt); new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE; @@ -624,7 +624,7 @@ gnttab_grow_table(struct domain *d, unsi { if ( (gt->active[i] = alloc_xenheap_page()) == NULL ) goto active_alloc_failed; - memset(gt->active[i], 0, PAGE_SIZE); + clear_page(gt->active[i]); } /* Shared */ @@ -632,7 +632,7 @@ gnttab_grow_table(struct domain *d, unsi { if ( (gt->shared[i] = alloc_xenheap_page()) == NULL ) goto shared_alloc_failed; - memset(gt->shared[i], 0, PAGE_SIZE); + clear_page(gt->shared[i]); } /* Share the new shared frames with the recipient domain */ @@ -1365,7 +1365,7 @@ grant_table_create( { if ( (t->active[i] = alloc_xenheap_page()) == NULL ) goto no_mem_2; - memset(t->active[i], 0, PAGE_SIZE); + clear_page(t->active[i]); } /* Tracking of mapped foreign frames table */ @@ -1375,7 +1375,7 @@ grant_table_create( memset(t->maptrack, 0, max_nr_maptrack_frames() * sizeof(t->maptrack[0])); if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL ) goto no_mem_3; - memset(t->maptrack[0], 0, PAGE_SIZE); + clear_page(t->maptrack[0]); t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping); for ( i = 0; i < t->maptrack_limit; i++ ) t->maptrack[0][i].ref = i+1; @@ -1389,7 +1389,7 @@ grant_table_create( { if ( (t->shared[i] = alloc_xenheap_page()) == NULL ) goto no_mem_4; - memset(t->shared[i], 0, PAGE_SIZE); + clear_page(t->shared[i]); } for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ ) diff -r c20bc60f9243 -r 810885428743 xen/common/kernel.c --- a/xen/common/kernel.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/common/kernel.c Wed Jun 20 12:49:27 2007 -0600 @@ -142,7 +142,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL { xen_extraversion_t extraversion; safe_strcpy(extraversion, xen_extra_version()); - if ( copy_to_guest(arg, (char *)extraversion, sizeof(extraversion)) ) + if ( copy_to_guest(arg, extraversion, ARRAY_SIZE(extraversion)) ) return -EFAULT; return 0; } @@ -167,7 +167,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL memset(info, 0, sizeof(info)); arch_get_xen_caps(&info); - if ( copy_to_guest(arg, (char *)info, sizeof(info)) ) + if ( copy_to_guest(arg, info, ARRAY_SIZE(info)) ) return -EFAULT; return 0; } @@ -187,7 +187,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL { xen_changeset_info_t chgset; safe_strcpy(chgset, xen_changeset()); - if ( copy_to_guest(arg, (char *)chgset, sizeof(chgset)) ) + if ( copy_to_guest(arg, chgset, ARRAY_SIZE(chgset)) ) return -EFAULT; return 0; } @@ -229,8 +229,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL case XENVER_guest_handle: { - if ( copy_to_guest(arg, (char *)current->domain->handle, - sizeof(current->domain->handle)) ) + if ( copy_to_guest(arg, current->domain->handle, + ARRAY_SIZE(current->domain->handle)) ) return -EFAULT; return 0; } diff -r c20bc60f9243 -r 810885428743 xen/common/kexec.c --- a/xen/common/kexec.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/common/kexec.c Wed Jun 20 12:49:27 2007 -0600 @@ -169,7 +169,11 @@ static int kexec_get(reserve)(xen_kexec_ static int kexec_get(xen)(xen_kexec_range_t *range) { +#ifdef CONFIG_X86_64 + range->start = xenheap_phys_start; +#else range->start = virt_to_maddr(_start); +#endif range->size = (unsigned long)xenheap_phys_end - (unsigned long)range->start; return 0; } diff -r c20bc60f9243 -r 810885428743 xen/common/perfc.c --- a/xen/common/perfc.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/common/perfc.c Wed Jun 20 12:49:27 2007 -0600 @@ -227,7 +227,7 @@ static int perfc_copy_info(XEN_GUEST_HAN } BUG_ON(v != perfc_nbr_vals); - if ( copy_to_guest(desc, (xen_sysctl_perfc_desc_t *)perfc_d, NR_PERFCTRS) ) + if ( copy_to_guest(desc, perfc_d, NR_PERFCTRS) ) return -EFAULT; if ( copy_to_guest(val, perfc_vals, perfc_nbr_vals) ) return -EFAULT; diff -r c20bc60f9243 -r 810885428743 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/drivers/char/console.c Wed Jun 20 12:49:27 2007 -0600 @@ -326,7 +326,7 @@ static long guest_console_write(XEN_GUES CONSOLEIO_write, count, buffer); kcount = min_t(int, count, sizeof(kbuf)-1); - if ( copy_from_guest((char *)kbuf, buffer, kcount) ) + if ( copy_from_guest(kbuf, buffer, kcount) ) return -EFAULT; kbuf[kcount] = '\0'; diff -r c20bc60f9243 -r 810885428743 xen/drivers/video/vga.c --- a/xen/drivers/video/vga.c Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/drivers/video/vga.c Wed Jun 20 12:49:27 2007 -0600 @@ -32,6 +32,9 @@ static unsigned char *video; * * 'vga=ask': * display a vga menu of available modes + * + * 'vga=current': + * use the current vga mode without modification * * 'vga=text-80x<rows>': * text mode, where <rows> is one of {25,28,30,34,43,50,60} diff -r c20bc60f9243 -r 810885428743 xen/include/asm-ia64/guest_access.h --- a/xen/include/asm-ia64/guest_access.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-ia64/guest_access.h Wed Jun 20 12:49:27 2007 -0600 @@ -76,28 +76,31 @@ extern int xencomm_handle_is_null(void * __copy_field_from_guest(ptr, hnd, field) #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \ - const typeof(ptr) _d = (hnd).p; \ - const typeof(ptr) _s = (ptr); \ + const typeof(*(ptr)) *_s = (ptr); \ + void *_d = (hnd).p; \ + ((void)((hnd).p == (ptr))); \ xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \ }) #define __copy_field_to_guest(hnd, ptr, field) ({ \ - const int _off = offsetof(typeof(*ptr), field); \ - const typeof(ptr) _d = (hnd).p; \ + unsigned int _off = offsetof(typeof(*(hnd).p), field); \ const typeof(&(ptr)->field) _s = &(ptr)->field; \ + void *_d = (hnd).p; \ + ((void)(&(hnd).p->field == &(ptr)->field)); \ xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off); \ }) -#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \ - const typeof(ptr) _s = (hnd).p; \ - const typeof(ptr) _d = (ptr); \ - xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \ +#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \ + const typeof(*(ptr)) *_s = (hnd).p; \ + typeof(*(ptr)) *_d = (ptr); \ + xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \ }) #define __copy_field_from_guest(ptr, hnd, field) ({ \ - const int _off = offsetof(typeof(*ptr), field); \ - const typeof(ptr) _s = (hnd).p; \ - const typeof(&(ptr)->field) _d = &(ptr)->field; \ + unsigned int _off = offsetof(typeof(*(hnd).p), field); \ + const void *_s = (hnd).p; \ + typeof(&(ptr)->field) _d = &(ptr)->field; \ + ((void)(&(hnd).p->field == &(ptr)->field)); \ xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \ }) diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/event.h --- a/xen/include/asm-x86/event.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/event.h Wed Jun 20 12:49:27 2007 -0600 @@ -10,7 +10,6 @@ #define __ASM_EVENT_H__ #include <xen/shared.h> -#include <asm/hvm/irq.h> /* cpu_has_pending_irq() */ static inline void vcpu_kick(struct vcpu *v) { diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/guest_access.h --- a/xen/include/asm-x86/guest_access.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/guest_access.h Wed Jun 20 12:49:27 2007 -0600 @@ -32,11 +32,12 @@ * specifying an offset into the guest array. */ #define copy_to_guest_offset(hnd, off, ptr, nr) ({ \ - typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ + const typeof(*(ptr)) *_s = (ptr); \ + char (*_d)[sizeof(*_s)] = (void *)(hnd).p; \ + ((void)((hnd).p == (ptr))); \ is_hvm_vcpu(current) ? \ - copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \ - copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ + copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) : \ + copy_to_user(_d+(off), _s, sizeof(*_s)*(nr)); \ }) /* @@ -44,29 +45,30 @@ * specifying an offset into the guest array. */ #define copy_from_guest_offset(ptr, hnd, off, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - typeof(ptr) _y = (ptr); \ + const typeof(*(ptr)) *_s = (hnd).p; \ + typeof(*(ptr)) *_d = (ptr); \ is_hvm_vcpu(current) ? \ - copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\ - copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ + copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\ + copy_from_user(_d, _s+(off), sizeof(*_d)*(nr)); \ }) /* Copy sub-field of a structure to guest context via a guest handle. */ #define copy_field_to_guest(hnd, ptr, field) ({ \ - typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ + const typeof(&(ptr)->field) _s = &(ptr)->field; \ + void *_d = &(hnd).p->field; \ + ((void)(&(hnd).p->field == &(ptr)->field)); \ is_hvm_vcpu(current) ? \ - copy_to_user_hvm(_x, _y, sizeof(*_x)) : \ - copy_to_user(_x, _y, sizeof(*_x)); \ + copy_to_user_hvm(_d, _s, sizeof(*_s)) : \ + copy_to_user(_d, _s, sizeof(*_s)); \ }) /* Copy sub-field of a structure from guest context via a guest handle. */ #define copy_field_from_guest(ptr, hnd, field) ({ \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - typeof(&(ptr)->field) _y = &(ptr)->field; \ + const typeof(&(ptr)->field) _s = &(hnd).p->field; \ + typeof(&(ptr)->field) _d = &(ptr)->field; \ is_hvm_vcpu(current) ? \ - copy_from_user_hvm(_y, _x, sizeof(*_x)) : \ - copy_from_user(_y, _x, sizeof(*_x)); \ + copy_from_user_hvm(_d, _s, sizeof(*_d)) : \ + copy_from_user(_d, _s, sizeof(*_d)); \ }) /* @@ -78,35 +80,37 @@ array_access_ok((hnd).p, (nr), sizeof(*(hnd).p))) #define __copy_to_guest_offset(hnd, off, ptr, nr) ({ \ - typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ + const typeof(*(ptr)) *_s = (ptr); \ + char (*_d)[sizeof(*_s)] = (void *)(hnd).p; \ + ((void)((hnd).p == (ptr))); \ is_hvm_vcpu(current) ? \ - copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \ - __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ + copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) : \ + __copy_to_user(_d+(off), _s, sizeof(*_s)*(nr)); \ }) #define __copy_from_guest_offset(ptr, hnd, off, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - typeof(ptr) _y = (ptr); \ + const typeof(*(ptr)) *_s = (hnd).p; \ + typeof(*(ptr)) *_d = (ptr); \ is_hvm_vcpu(current) ? \ - copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \ - __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ + copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\ + __copy_from_user(_d, _s+(off), sizeof(*_d)*(nr)); \ }) #define __copy_field_to_guest(hnd, ptr, field) ({ \ - typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ + const typeof(&(ptr)->field) _s = &(ptr)->field; \ + void *_d = &(hnd).p->field; \ + ((void)(&(hnd).p->field == &(ptr)->field)); \ is_hvm_vcpu(current) ? \ - copy_to_user_hvm(_x, _y, sizeof(*_x)) : \ - __copy_to_user(_x, _y, sizeof(*_x)); \ + copy_to_user_hvm(_d, _s, sizeof(*_s)) : \ + __copy_to_user(_d, _s, sizeof(*_s)); \ }) #define __copy_field_from_guest(ptr, hnd, field) ({ \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - typeof(&(ptr)->field) _y = &(ptr)->field; \ + const typeof(&(ptr)->field) _s = &(hnd).p->field; \ + typeof(&(ptr)->field) _d = &(ptr)->field; \ is_hvm_vcpu(current) ? \ - copy_from_user_hvm(_y, _x, sizeof(*_x)) : \ - __copy_from_user(_y, _x, sizeof(*_x)); \ + copy_from_user_hvm(_d, _s, sizeof(*_d)) : \ + __copy_from_user(_d, _s, sizeof(*_d)); \ }) #endif /* __ASM_X86_GUEST_ACCESS_H__ */ diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/hvm.h Wed Jun 20 12:49:27 2007 -0600 @@ -55,6 +55,14 @@ typedef struct segment_register { u64 base; } __attribute__ ((packed)) segment_register_t; +/* Interrupt acknowledgement sources. */ +enum hvm_intack { + hvm_intack_none, + hvm_intack_pic, + hvm_intack_lapic, + hvm_intack_nmi +}; + /* * The hardware virtual machine (HVM) interface abstracts away from the * x86/x86_64 CPU virtualization assist specifics. Currently this interface @@ -106,7 +114,7 @@ struct hvm_function_table { int (*long_mode_enabled)(struct vcpu *v); int (*pae_enabled)(struct vcpu *v); int (*nx_enabled)(struct vcpu *v); - int (*interrupts_enabled)(struct vcpu *v); + int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack); int (*guest_x86_mode)(struct vcpu *v); unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num); unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg); @@ -124,6 +132,13 @@ struct hvm_function_table { void (*update_guest_cr3)(struct vcpu *v); /* + * Called to ensure than all guest-specific mappings in a tagged TLB + * are flushed; does *not* flush Xen's TLB entries, and on + * processors without a tagged TLB it will be a noop. + */ + void (*flush_guest_tlbs)(void); + + /* * Reflect the virtual APIC's value in the guest's V_TPR register */ void (*update_vtpr)(struct vcpu *v, unsigned long value); @@ -148,6 +163,7 @@ struct hvm_function_table { }; extern struct hvm_function_table hvm_funcs; +extern int hvm_enabled; int hvm_domain_initialise(struct domain *d); void hvm_domain_relinquish_resources(struct domain *d); @@ -191,16 +207,16 @@ hvm_long_mode_enabled(struct vcpu *v) #define hvm_long_mode_enabled(v) (v,0) #endif - static inline int +static inline int hvm_pae_enabled(struct vcpu *v) { return hvm_funcs.pae_enabled(v); } static inline int -hvm_interrupts_enabled(struct vcpu *v) -{ - return hvm_funcs.interrupts_enabled(v); +hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type) +{ + return hvm_funcs.interrupts_enabled(v, type); } static inline int @@ -230,6 +246,13 @@ hvm_update_vtpr(struct vcpu *v, unsigned } void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3); + +static inline void +hvm_flush_guest_tlbs(void) +{ + if ( hvm_enabled ) + hvm_funcs.flush_guest_tlbs(); +} void hvm_hypercall_page_initialise(struct domain *d, void *hypercall_page); diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/irq.h --- a/xen/include/asm-x86/hvm/irq.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/irq.h Wed Jun 20 12:49:27 2007 -0600 @@ -24,10 +24,10 @@ #include <xen/types.h> #include <xen/spinlock.h> +#include <asm/hvm/hvm.h> #include <asm/hvm/vpic.h> #include <asm/hvm/vioapic.h> #include <public/hvm/save.h> - struct hvm_irq { /* @@ -58,7 +58,6 @@ struct hvm_irq { HVMIRQ_callback_gsi, HVMIRQ_callback_pci_intx } callback_via_type; - uint32_t pad; /* So the next field will be aligned */ }; union { uint32_t gsi; @@ -115,9 +114,12 @@ void hvm_set_callback_irq_level(void); void hvm_set_callback_irq_level(void); void hvm_set_callback_via(struct domain *d, uint64_t via); -int cpu_get_interrupt(struct vcpu *v, int *type); -int cpu_has_pending_irq(struct vcpu *v); -int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type); +/* Check/Acknowledge next pending interrupt. */ +enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v); +int hvm_vcpu_ack_pending_irq( + struct vcpu *v, enum hvm_intack type, int *vector); + +int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src); int is_isa_irq_masked(struct vcpu *v, int isa_irq); #endif /* __ASM_X86_HVM_IRQ_H__ */ diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:49:27 2007 -0600 @@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai /* End of save/restore */ extern char hvm_io_bitmap[]; -extern int hvm_enabled; void hvm_enable(struct hvm_function_table *); void hvm_disable(void); diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/svm/asid.h --- a/xen/include/asm-x86/hvm/svm/asid.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/svm/asid.h Wed Jun 20 12:49:27 2007 -0600 @@ -30,6 +30,7 @@ void svm_asid_init(struct cpuinfo_x86 *c void svm_asid_init(struct cpuinfo_x86 *c); void svm_asid_init_vcpu(struct vcpu *v); void svm_asid_inv_asid(struct vcpu *v); +void svm_asid_inc_generation(void); /* * ASID related, guest triggered events. diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/vcpu.h Wed Jun 20 12:49:27 2007 -0600 @@ -30,11 +30,13 @@ struct hvm_vcpu { unsigned long hw_cr3; /* value we give to HW to use */ - unsigned long ioflags; struct hvm_io_op io_op; struct vlapic vlapic; s64 cache_tsc_offset; u64 guest_time; + + /* Is an NMI pending for delivery to this VCPU core? */ + bool_t nmi_pending; /* NB. integrate flag with save/restore */ /* Lock and list for virtual platform timers. */ spinlock_t tm_lock; diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vlapic.h --- a/xen/include/asm-x86/hvm/vlapic.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/vlapic.h Wed Jun 20 12:49:27 2007 -0600 @@ -76,7 +76,7 @@ int vlapic_find_highest_irr(struct vlapi int vlapic_find_highest_irr(struct vlapic *vlapic); int vlapic_has_interrupt(struct vcpu *v); -int cpu_get_apic_interrupt(struct vcpu *v, int *mode); +int cpu_get_apic_interrupt(struct vcpu *v); int vlapic_init(struct vcpu *v); void vlapic_destroy(struct vcpu *v); diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:49:27 2007 -0600 @@ -336,9 +336,16 @@ static inline void vmx_inject_sw_excepti instruction_len); } -static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code) -{ - __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0); +static inline void vmx_inject_extint(struct vcpu *v, int trap) +{ + __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, + VMX_DELIVER_NO_ERROR_CODE, 0); +} + +static inline void vmx_inject_nmi(struct vcpu *v) +{ + __vmx_inject_exception(v, 2, INTR_TYPE_NMI, + VMX_DELIVER_NO_ERROR_CODE, 0); } #endif /* __ASM_X86_HVM_VMX_VMX_H__ */ diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpic.h --- a/xen/include/asm-x86/hvm/vpic.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/vpic.h Wed Jun 20 12:49:27 2007 -0600 @@ -32,7 +32,7 @@ void vpic_irq_positive_edge(struct domai void vpic_irq_positive_edge(struct domain *d, int irq); void vpic_irq_negative_edge(struct domain *d, int irq); void vpic_init(struct domain *d); -int cpu_get_pic_interrupt(struct vcpu *v, int *type); +int cpu_get_pic_interrupt(struct vcpu *v); int is_periodic_irq(struct vcpu *v, int irq, int type); #endif /* __ASM_X86_HVM_VPIC_H__ */ diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpt.h --- a/xen/include/asm-x86/hvm/vpt.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/asm-x86/hvm/vpt.h Wed Jun 20 12:49:27 2007 -0600 @@ -29,6 +29,7 @@ #include <xen/timer.h> #include <xen/list.h> #include <asm/hvm/vpic.h> +#include <asm/hvm/irq.h> #include <public/hvm/save.h> struct HPETState; @@ -119,7 +120,7 @@ void pt_freeze_time(struct vcpu *v); void pt_freeze_time(struct vcpu *v); void pt_thaw_time(struct vcpu *v); void pt_update_irq(struct vcpu *v); -void pt_intr_post(struct vcpu *v, int vector, int type); +void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src); void pt_reset(struct vcpu *v); void pt_migrate(struct vcpu *v); void create_periodic_time( diff -r c20bc60f9243 -r 810885428743 xen/include/xen/compat.h --- a/xen/include/xen/compat.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/xen/compat.h Wed Jun 20 12:49:27 2007 -0600 @@ -44,9 +44,10 @@ * specifying an offset into the guest array. */ #define copy_to_compat_offset(hnd, off, ptr, nr) ({ \ - const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ - const typeof(*(ptr)) *const _y = (ptr); \ - copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \ + const typeof(*(ptr)) *_s = (ptr); \ + char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c; \ + ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr))); \ + copy_to_user(_d + (off), _s, sizeof(*_s) * (nr)); \ }) /* @@ -54,9 +55,9 @@ * specifying an offset into the guest array. */ #define copy_from_compat_offset(ptr, hnd, off, nr) ({ \ - const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ - const typeof(ptr) _y = (ptr); \ - copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \ + const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ + typeof(*(ptr)) *_d = (ptr); \ + copy_from_user(_d, _s + (off), sizeof(*_d) * (nr)); \ }) #define copy_to_compat(hnd, ptr, nr) \ @@ -67,16 +68,19 @@ /* Copy sub-field of a structure to guest context via a compat handle. */ #define copy_field_to_compat(hnd, ptr, field) ({ \ - typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ - const typeof((ptr)->field) *const _y = &(ptr)->field; \ - copy_to_user(_x, _y, sizeof(*_x)); \ + const typeof(&(ptr)->field) _s = &(ptr)->field; \ + void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field == \ + &(ptr)->field)); \ + copy_to_user(_d, _s, sizeof(*_s)); \ }) /* Copy sub-field of a structure from guest context via a compat handle. */ #define copy_field_from_compat(ptr, hnd, field) ({ \ - typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ - typeof((ptr)->field) *const _y = &(ptr)->field; \ - copy_from_user(_y, _x, sizeof(*_x)); \ + const typeof(&(ptr)->field) _s = \ + &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + typeof(&(ptr)->field) _d = &(ptr)->field; \ + copy_from_user(_d, _s, sizeof(*_d)); \ }) /* @@ -84,18 +88,20 @@ * Allows use of faster __copy_* functions. */ #define compat_handle_okay(hnd, nr) \ - compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), sizeof(**(hnd)._)) + compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), \ + sizeof(**(hnd)._)) #define __copy_to_compat_offset(hnd, off, ptr, nr) ({ \ - const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ - const typeof(*(ptr)) *const _y = (ptr); \ - __copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \ + const typeof(*(ptr)) *_s = (ptr); \ + char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c; \ + ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr))); \ + __copy_to_user(_d + (off), _s, sizeof(*_s) * (nr)); \ }) #define __copy_from_compat_offset(ptr, hnd, off, nr) ({ \ - const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ - const typeof(ptr) _y = (ptr); \ - __copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \ + const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ + typeof(*(ptr)) *_d = (ptr); \ + __copy_from_user(_d, _s + (off), sizeof(*_d) * (nr)); \ }) #define __copy_to_compat(hnd, ptr, nr) \ @@ -105,15 +111,18 @@ __copy_from_compat_offset(ptr, hnd, 0, nr) #define __copy_field_to_compat(hnd, ptr, field) ({ \ - typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ - const typeof((ptr)->field) *const _y = &(ptr)->field; \ - __copy_to_user(_x, _y, sizeof(*_x)); \ + const typeof(&(ptr)->field) _s = &(ptr)->field; \ + void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field == \ + &(ptr)->field)); \ + __copy_to_user(_d, _s, sizeof(*_s)); \ }) #define __copy_field_from_compat(ptr, hnd, field) ({ \ - typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ - typeof((ptr)->field) *const _y = &(ptr)->field; \ - __copy_from_user(_y, _x, sizeof(*_x)); \ + const typeof(&(ptr)->field) _s = \ + &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + typeof(&(ptr)->field) _d = &(ptr)->field; \ + __copy_from_user(_d, _s, sizeof(*_d)); \ }) @@ -169,7 +178,8 @@ int switch_compat(struct domain *); int switch_compat(struct domain *); int switch_native(struct domain *); -#define BITS_PER_GUEST_LONG(d) (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG) +#define BITS_PER_GUEST_LONG(d) \ + (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG) #else diff -r c20bc60f9243 -r 810885428743 xen/include/xen/xencomm.h --- a/xen/include/xen/xencomm.h Wed Jun 20 12:47:52 2007 -0600 +++ b/xen/include/xen/xencomm.h Wed Jun 20 12:49:27 2007 -0600 @@ -47,17 +47,17 @@ static inline unsigned long xencomm_inli ((hnd).p == NULL || xencomm_handle_is_null((hnd).p)) /* Offset the given guest handle into the array it refers to. */ -#define guest_handle_add_offset(hnd, nr) ({ \ - const typeof((hnd).p) _ptr; \ - xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr)); \ +#define guest_handle_add_offset(hnd, nr) ({ \ + const typeof((hnd).p) _ptr; \ + xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr)); \ }) /* Cast a guest handle to the specified type of handle. */ #define guest_handle_cast(hnd, type) ({ \ type *_x = (hnd).p; \ - XEN_GUEST_HANDLE(type) _y; \ - set_xen_guest_handle(_y, _x); \ - _y; \ + XEN_GUEST_HANDLE(type) _y; \ + set_xen_guest_handle(_y, _x); \ + _y; \ }) /* Since we run in real mode, we can safely access all addresses. That also @@ -87,29 +87,32 @@ static inline unsigned long xencomm_inli __copy_field_from_guest(ptr, hnd, field) #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \ + const typeof(*(ptr)) *_s = (ptr); \ + void *_d = (hnd).p; \ + ((void)((hnd).p == (ptr))); \ + xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \ }) #define __copy_field_to_guest(hnd, ptr, field) ({ \ - const int _off = offsetof(typeof(*ptr), field); \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \ + unsigned int _off = offsetof(typeof(*(hnd).p), field); \ + const typeof(&(ptr)->field) _s = &(ptr)->field; \ + void *_d = (hnd).p; \ + ((void)(&(hnd).p->field == &(ptr)->field)); \ + xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off); \ }) #define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \ + const typeof(*(ptr)) *_s = (hnd).p; \ + typeof(*(ptr)) *_d = (ptr); \ + xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \ }) #define __copy_field_from_guest(ptr, hnd, field) ({ \ - const int _off = offsetof(typeof(*ptr), field); \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \ + unsigned int _off = offsetof(typeof(*(hnd).p), field); \ + const void *_s = (hnd).p; \ + typeof(&(ptr)->field) _d = &(ptr)->field; \ + ((void)(&(hnd).p->field == &(ptr)->field)); \ + xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \ }) #endif /* __XENCOMM_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |