[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Hollis Blanchard <hollisb@xxxxxxxxxx> # Date 1186066458 18000 # Node ID 04fb85a46dc555bc8f306dc98119858b7c5ad083 # Parent 976db28bcc43bfbb38728aa08e079e6c4d20b3bb # Parent 88bb0d305308a2cab31fd8559a6a2719db1ea55a merge with xen-unstable.hg --- tools/blktap/drivers/blktapctrl.c | 44 +- tools/blktap/lib/blktaplib.h | 4 tools/firmware/hvmloader/smbios.c | 2 tools/libxc/ia64/Makefile | 2 tools/libxc/ia64/dom_fw_acpi.c | 13 tools/python/xen/util/acmpolicy.py | 7 tools/xenstore/utils.c | 80 ---- tools/xenstore/utils.h | 27 - tools/xenstore/xenstored_core.c | 9 tools/xenstore/xenstored_domain.c | 9 tools/xenstore/xs_tdb_dump.c | 2 tools/xm-test/lib/XmTestLib/acm.py | 4 tools/xm-test/tests/security-acm/07_security-acm_pol_update.py | 9 tools/xm-test/tests/security-acm/09_security-acm_pol_update.py | 9 xen/arch/ia64/xen/dom_fw_common.c | 11 xen/arch/ia64/xen/dom_fw_dom0.c | 13 xen/arch/x86/acpi/boot.c | 15 xen/arch/x86/domain_build.c | 7 xen/arch/x86/hvm/instrlen.c | 113 +++--- xen/arch/x86/hvm/platform.c | 14 xen/arch/x86/hvm/svm/intr.c | 83 ++-- xen/arch/x86/hvm/svm/svm.c | 87 ++--- xen/arch/x86/hvm/vmx/intr.c | 78 +--- xen/arch/x86/hvm/vmx/vmcs.c | 17 - xen/arch/x86/hvm/vmx/vmx.c | 167 +++------- xen/arch/x86/mm/shadow/multi.c | 2 xen/common/libelf/libelf-dominfo.c | 101 +++++- xen/common/libelf/libelf-loader.c | 44 ++ xen/common/libelf/libelf-tools.c | 30 + xen/drivers/acpi/tables.c | 154 +++++++++ xen/include/asm-ia64/dom_fw_common.h | 1 xen/include/asm-x86/hvm/hvm.h | 70 +++- xen/include/asm-x86/hvm/svm/vmcb.h | 8 xen/include/asm-x86/hvm/vmx/vmcs.h | 7 xen/include/asm-x86/hvm/vmx/vmx.h | 36 -- xen/include/public/libelf.h | 76 ++-- xen/include/xen/acpi.h | 3 37 files changed, 797 insertions(+), 561 deletions(-) diff -r 976db28bcc43 -r 04fb85a46dc5 tools/blktap/drivers/blktapctrl.c --- a/tools/blktap/drivers/blktapctrl.c Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/blktap/drivers/blktapctrl.c Thu Aug 02 09:54:18 2007 -0500 @@ -42,6 +42,7 @@ #include <errno.h> #include <sys/types.h> #include <linux/types.h> +#include <sys/wait.h> #include <signal.h> #include <fcntl.h> #include <sys/poll.h> @@ -472,11 +473,38 @@ static int read_msg(int fd, int msgtype, } +int launch_tapdisk(char *wrctldev, char *rdctldev) +{ + char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL }; + pid_t child; + + if ((child = fork()) < 0) + return -1; + + if (!child) { + int i; + for (i = 0 ; i < sysconf(_SC_OPEN_MAX) ; i++) + if (i != STDIN_FILENO && + i != STDOUT_FILENO && + i != STDERR_FILENO) + close(i); + + execvp("tapdisk", argv); + _exit(1); + } else { + pid_t got; + do { + got = waitpid(child, NULL, 0); + } while (got != child); + } + return 0; +} + int blktapctrl_new_blkif(blkif_t *blkif) { blkif_info_t *blk; int major, minor, fd_read, fd_write, type, new; - char *rdctldev, *wrctldev, *cmd, *ptr; + char *rdctldev, *wrctldev, *ptr; image_t *image; blkif_t *exist = NULL; static uint16_t next_cookie = 0; @@ -504,12 +532,6 @@ int blktapctrl_new_blkif(blkif_t *blkif) free(rdctldev); return -1; } - if (asprintf(&cmd, "tapdisk %s %s", wrctldev, rdctldev) == -1) { - free(rdctldev); - free(wrctldev); - return -1; - } - blkif->fds[READ] = open_ctrl_socket(rdctldev); blkif->fds[WRITE] = open_ctrl_socket(wrctldev); @@ -517,15 +539,14 @@ int blktapctrl_new_blkif(blkif_t *blkif) goto fail; /*launch the new process*/ - DPRINTF("Launching process, CMDLINE [%s]\n",cmd); - if (system(cmd) == -1) { - DPRINTF("Unable to fork, cmdline: [%s]\n",cmd); + DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",wrctldev, rdctldev); + if (launch_tapdisk(wrctldev, rdctldev) == -1) { + DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",wrctldev, rdctldev); return -1; } free(rdctldev); free(wrctldev); - free(cmd); } else { DPRINTF("Process exists!\n"); blkif->fds[READ] = exist->fds[READ]; @@ -605,7 +626,6 @@ int open_ctrl_socket(char *devname) { int ret; int ipc_fd; - char *cmd; fd_set socks; struct timeval timeout; diff -r 976db28bcc43 -r 04fb85a46dc5 tools/blktap/lib/blktaplib.h --- a/tools/blktap/lib/blktaplib.h Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/blktap/lib/blktaplib.h Thu Aug 02 09:54:18 2007 -0500 @@ -169,12 +169,14 @@ typedef struct image { unsigned int info; } image_t; +/* 16-byte message header, immediately followed by message payload. */ typedef struct msg_hdr { - uint16_t type; + uint16_t type; uint16_t len; uint16_t drivertype; uint16_t cookie; uint8_t readonly; + uint8_t pad[7]; } msg_hdr_t; typedef struct msg_newdev { diff -r 976db28bcc43 -r 04fb85a46dc5 tools/firmware/hvmloader/smbios.c --- a/tools/firmware/hvmloader/smbios.c Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/firmware/hvmloader/smbios.c Thu Aug 02 09:54:18 2007 -0500 @@ -169,7 +169,7 @@ hvm_write_smbios_tables(void) /* temporary variables used to build up Xen version string */ char *p = NULL; /* points to next point of insertion */ unsigned len = 0; /* length of string already composed */ - char *tmp = NULL; /* holds result of itoa() */ + char tmp[16]; /* holds result of itoa() */ unsigned tmp_len; /* length of next string to add */ hypercall_xen_version(XENVER_guest_handle, uuid); diff -r 976db28bcc43 -r 04fb85a46dc5 tools/libxc/ia64/Makefile --- a/tools/libxc/ia64/Makefile Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/libxc/ia64/Makefile Thu Aug 02 09:54:18 2007 -0500 @@ -5,6 +5,8 @@ GUEST_SRCS-y += ia64/xc_ia64_linux_resto GUEST_SRCS-y += ia64/xc_ia64_linux_restore.c GUEST_SRCS-y += ia64/xc_dom_ia64_util.c +GUEST_SRCS-y += ia64/dom_fw_acpi.c + DOMFW_SRCS_BASE := dom_fw_common.c dom_fw_domu.c dom_fw_asm.S DOMFW_SRCS := $(addprefix ia64/, $(DOMFW_SRCS_BASE)) $(DOMFW_SRCS): diff -r 976db28bcc43 -r 04fb85a46dc5 tools/libxc/ia64/dom_fw_acpi.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/ia64/dom_fw_acpi.c Thu Aug 02 09:54:18 2007 -0500 @@ -0,0 +1,13 @@ +#include <inttypes.h> +#include <xen/acpi.h> + +uint8_t +generate_acpi_checksum(void *tbl, unsigned long len) +{ + uint8_t *ptr, sum = 0; + + for ( ptr = tbl; len > 0 ; len--, ptr++ ) + sum += *ptr; + + return 0 - sum; +} diff -r 976db28bcc43 -r 04fb85a46dc5 tools/python/xen/util/acmpolicy.py --- a/tools/python/xen/util/acmpolicy.py Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/python/xen/util/acmpolicy.py Thu Aug 02 09:54:18 2007 -0500 @@ -818,12 +818,13 @@ class ACMPolicy(XSPolicy): if successful,the policy's flags will indicate that the policy is the one loaded into the hypervisor """ - (ret, output) = commands.getstatusoutput( + if not self.isloaded(): + (ret, output) = commands.getstatusoutput( security.xensec_tool + " loadpolicy " + self.get_filename(".bin")) - if ret != 0: - return -xsconstants.XSERR_POLICY_LOAD_FAILED + if ret != 0: + return -xsconstants.XSERR_POLICY_LOAD_FAILED return xsconstants.XSERR_SUCCESS def isloaded(self): diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/utils.c --- a/tools/xenstore/utils.c Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xenstore/utils.c Thu Aug 02 09:54:18 2007 -0500 @@ -8,20 +8,19 @@ #include <fcntl.h> #include <sys/types.h> #include <signal.h> - #include "utils.h" void xprintf(const char *fmt, ...) { - static FILE *out = NULL; va_list args; - if (!out) - out = stderr; + + if (!stderr) + return; /* could trace()? */ va_start(args, fmt); - vfprintf(out, fmt, args); + vfprintf(stderr, fmt, args); va_end(args); - fflush(out); + fflush(stderr); } void barf(const char *fmt, ...) @@ -61,72 +60,3 @@ void barf_perror(const char *fmt, ...) } exit(1); } - -void *_realloc_array(void *ptr, size_t size, size_t num) -{ - if (num >= SIZE_MAX/size) - return NULL; - return realloc_nofail(ptr, size * num); -} - -void *realloc_nofail(void *ptr, size_t size) -{ - ptr = realloc(ptr, size); - if (ptr) - return ptr; - barf("realloc of %zu failed", size); -} - -void *malloc_nofail(size_t size) -{ - void *ptr = malloc(size); - if (ptr) - return ptr; - barf("malloc of %zu failed", size); -} - -/* This version adds one byte (for nul term) */ -void *grab_file(const char *filename, unsigned long *size) -{ - unsigned int max = 16384; - int ret, fd; - void *buffer; - - if (streq(filename, "-")) - fd = dup(STDIN_FILENO); - else - fd = open(filename, O_RDONLY, 0); - - if (fd == -1) - return NULL; - - buffer = malloc(max+1); - if (!buffer) - goto error; - *size = 0; - while ((ret = read(fd, buffer + *size, max - *size)) > 0) { - *size += ret; - if (*size == max) { - void *nbuffer; - max *= 2; - nbuffer = realloc(buffer, max + 1); - if (!nbuffer) - goto error; - buffer = nbuffer; - } - } - if (ret < 0) - goto error; - ((char *)buffer)[*size] = '\0'; - close(fd); - return buffer; -error: - free(buffer); - close(fd); - return NULL; -} - -void release_file(void *data, unsigned long size __attribute__((unused))) -{ - free(data); -} diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/utils.h --- a/tools/xenstore/utils.h Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xenstore/utils.h Thu Aug 02 09:54:18 2007 -0500 @@ -21,39 +21,12 @@ static inline bool strends(const char *a #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#define ___stringify(x) #x -#define __stringify(x) ___stringify(x) - -/* Convenient wrappers for malloc and realloc. Use them. */ -#define new(type) ((type *)malloc_nofail(sizeof(type))) -#define new_array(type, num) realloc_array((type *)0, (num)) -#define realloc_array(ptr, num) ((__typeof__(ptr))_realloc_array((ptr), sizeof((*ptr)), (num))) - -void *malloc_nofail(size_t size); -void *realloc_nofail(void *ptr, size_t size); -void *_realloc_array(void *ptr, size_t size, size_t num); - void barf(const char *fmt, ...) __attribute__((noreturn)); void barf_perror(const char *fmt, ...) __attribute__((noreturn)); - -/* This version adds one byte (for nul term) */ -void *grab_file(const char *filename, unsigned long *size); -void release_file(void *data, unsigned long size); - -/* Signal handling: returns fd to listen on. */ -int signal_to_fd(int signal); -void close_signal(int fd); void xprintf(const char *fmt, ...); #define eprintf(_fmt, _args...) xprintf("[ERR] %s" _fmt, __FUNCTION__, ##_args) -#define iprintf(_fmt, _args...) xprintf("[INF] %s" _fmt, __FUNCTION__, ##_args) - -#ifdef DEBUG -#define dprintf(_fmt, _args...) xprintf("[DBG] %s" _fmt, __FUNCTION__, ##_args) -#else -#define dprintf(_fmt, _args...) ((void)0) -#endif /* * Mux errno values onto returned pointers. diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xenstore/xenstored_core.c Thu Aug 02 09:54:18 2007 -0500 @@ -1820,7 +1820,9 @@ int main(int argc, char *argv[]) if (pidfile) write_pidfile(pidfile); - talloc_enable_leak_report_full(); + /* Talloc leak reports go to stderr, which is closed if we fork. */ + if (!dofork) + talloc_enable_leak_report_full(); /* Create sockets for them to listen to. */ sock = talloc(talloc_autofree_context(), int); @@ -1881,6 +1883,11 @@ int main(int argc, char *argv[]) close(STDIN_FILENO); close(STDOUT_FILENO); close(STDERR_FILENO); + + /* Get ourselves a nice xenstored crash if these are used. */ + stdin = NULL; + stdout = NULL; + stderr = NULL; } signal(SIGHUP, trigger_reopen_log); diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xenstore/xenstored_domain.c Thu Aug 02 09:54:18 2007 -0500 @@ -621,13 +621,8 @@ void domain_entry_fix(unsigned int domid struct domain *d; d = find_domain_by_domid(domid); - if (d) { - if ((d->nbentry += num) < 0) { - eprintf("invalid domain entry number %d", - d->nbentry); - d->nbentry = 0; - } - } + if (d && ((d->nbentry += num) < 0)) + d->nbentry = 0; } int domain_entry(struct connection *conn) diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/xs_tdb_dump.c --- a/tools/xenstore/xs_tdb_dump.c Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xenstore/xs_tdb_dump.c Thu Aug 02 09:54:18 2007 -0500 @@ -4,7 +4,7 @@ #include <fcntl.h> #include <stdio.h> #include <stdarg.h> - +#include <string.h> #include "xs_lib.h" #include "tdb.h" #include "talloc.h" diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xm-test/lib/XmTestLib/acm.py --- a/tools/xm-test/lib/XmTestLib/acm.py Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xm-test/lib/XmTestLib/acm.py Thu Aug 02 09:54:18 2007 -0500 @@ -67,6 +67,10 @@ def ACMLoadPolicy(policy='xm-test'): if main.serverType == main.SERVER_XEN_API: ACMLoadPolicy_XenAPI() else: + cmd='xm dumppolicy | grep -E "^POLICY REFERENCE = ' + policy + '.$"' + s, o = traceCommand(cmd) + if o != "": + return s, o = traceCommand("xm makepolicy %s" % (policy)) if s != 0: FAIL("Need to be able to do 'xm makepolicy %s' but could not" % diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xm-test/tests/security-acm/07_security-acm_pol_update.py --- a/tools/xm-test/tests/security-acm/07_security-acm_pol_update.py Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xm-test/tests/security-acm/07_security-acm_pol_update.py Thu Aug 02 09:54:18 2007 -0500 @@ -12,10 +12,19 @@ from xen.util import acmpolicy, security from xen.util import acmpolicy, security, xsconstants from xen.util.acmpolicy import ACMPolicy from xen.xend.XendDomain import DOM0_UUID +from XmTestLib.acm import * import commands import os import base64 + +if not isACMEnabled(): + SKIP("Not running this test since ACM not enabled.") + +try: + session = xapi.connect() +except: + SKIP("Skipping this test since xm is not using the Xen-API.") xm_test = {} xm_test['policyname'] = "xm-test" diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xm-test/tests/security-acm/09_security-acm_pol_update.py --- a/tools/xm-test/tests/security-acm/09_security-acm_pol_update.py Thu Aug 02 09:50:55 2007 -0500 +++ b/tools/xm-test/tests/security-acm/09_security-acm_pol_update.py Thu Aug 02 09:54:18 2007 -0500 @@ -7,6 +7,7 @@ from XmTestLib import xapi from XmTestLib.XenAPIDomain import XmTestAPIDomain +from XmTestLib.acm import * from XmTestLib import * from xen.xend import XendAPIConstants from xen.util import security, xsconstants @@ -15,6 +16,14 @@ import base64 import base64 import struct import time + +if not isACMEnabled(): + SKIP("Not running this test since ACM not enabled.") + +try: + session = xapi.connect() +except: + SKIP("Skipping this test since xm is not using the Xen-API.") def typestoxml(types): res = "" diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/ia64/xen/dom_fw_common.c --- a/xen/arch/ia64/xen/dom_fw_common.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/ia64/xen/dom_fw_common.c Thu Aug 02 09:54:18 2007 -0500 @@ -207,17 +207,6 @@ print_md(efi_memory_desc_t *md) printk("(%luKB)\n", size >> 10); } -uint8_t -generate_acpi_checksum(void *tbl, unsigned long len) -{ - uint8_t *ptr, sum = 0; - - for (ptr = tbl; len > 0 ; len--, ptr++) - sum += *ptr; - - return 0 - sum; -} - struct fake_acpi_tables { struct acpi20_table_rsdp rsdp; struct xsdt_descriptor_rev2 xsdt; diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/ia64/xen/dom_fw_dom0.c --- a/xen/arch/ia64/xen/dom_fw_dom0.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/ia64/xen/dom_fw_dom0.c Thu Aug 02 09:54:18 2007 -0500 @@ -103,6 +103,7 @@ acpi_update_madt_checksum(unsigned long /* base is physical address of acpi table */ static void __init touch_acpi_table(void) { + int result; lsapic_nbr = 0; if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, 0) < 0) @@ -110,6 +111,18 @@ static void __init touch_acpi_table(void if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_patch_plat_int_src, 0) < 0) printk("Error parsing MADT - no PLAT_INT_SRC entries\n"); + + result = acpi_table_disable(ACPI_SRAT); + if ( result == 0 ) + printk("Success Disabling SRAT\n"); + else if ( result != -ENOENT ) + printk("ERROR: Failed Disabling SRAT\n"); + + result = acpi_table_disable(ACPI_SLIT); + if ( result == 0 ) + printk("Success Disabling SLIT\n"); + else if ( result != -ENOENT ) + printk("ERROR: Failed Disabling SLIT\n"); acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum); diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/acpi/boot.c --- a/xen/arch/x86/acpi/boot.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/acpi/boot.c Thu Aug 02 09:54:18 2007 -0500 @@ -371,11 +371,18 @@ extern u32 pmtmr_ioport; #ifdef CONFIG_ACPI_SLEEP /* Get pm1x_cnt and pm1x_evt information for ACPI sleep */ -static int __init +static void __init acpi_fadt_parse_sleep_info(struct fadt_descriptor_rev2 *fadt) { + struct acpi_table_rsdp *rsdp; + unsigned long rsdp_phys; struct facs_descriptor_rev2 *facs = NULL; uint64_t facs_pa; + + rsdp_phys = acpi_find_rsdp(); + if (!rsdp_phys || acpi_disabled) + goto bad; + rsdp = __va(rsdp_phys); if (fadt->revision >= FADT2_REVISION_ID) { /* Sanity check on FADT Rev. 2 */ @@ -432,8 +439,7 @@ acpi_fadt_parse_sleep_info(struct fadt_d "FACS is shorter than ACPI spec allow: 0x%x", facs->length); - if ((acpi_rsdp_rev < 2) || - (facs->length < 32)) { + if ((rsdp->revision < 2) || (facs->length < 32)) { acpi_sinfo.wakeup_vector = facs_pa + offsetof(struct facs_descriptor_rev2, firmware_waking_vector); @@ -451,10 +457,9 @@ acpi_fadt_parse_sleep_info(struct fadt_d acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt, acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt, acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width); - return 0; + return; bad: memset(&acpi_sinfo, 0, sizeof(acpi_sinfo)); - return 0; } #endif diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/domain_build.c Thu Aug 02 09:54:18 2007 -0500 @@ -316,6 +316,9 @@ int __init construct_dom0( parms.pae ? ", PAE" : "", elf_msb(&elf) ? "msb" : "lsb", elf.pstart, elf.pend); + if ( parms.bsd_symtab ) + printk(" Dom0 symbol map 0x%" PRIx64 " -> 0x%" PRIx64 "\n", + elf.sstart, elf.send); if ( !compatible ) { @@ -385,7 +388,7 @@ int __init construct_dom0( v_start = parms.virt_base; vkern_start = parms.virt_kstart; vkern_end = parms.virt_kend; - vinitrd_start = round_pgup(vkern_end); + vinitrd_start = round_pgup(parms.virt_end); vinitrd_end = vinitrd_start + initrd_len; vphysmap_start = round_pgup(vinitrd_end); vphysmap_end = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ? @@ -795,7 +798,7 @@ int __init construct_dom0( /* Copy the OS image and free temporary buffer. */ elf.dest = (void*)vkern_start; - elf_load_binary(&elf); + elf_xen_dom_load_binary(&elf, &parms); if ( UNSET_ADDR != parms.virt_hypercall ) { diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/instrlen.c --- a/xen/arch/x86/hvm/instrlen.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/hvm/instrlen.c Thu Aug 02 09:54:18 2007 -0500 @@ -7,14 +7,6 @@ * * Essentially a very, very stripped version of Keir Fraser's work in * x86_emulate.c. Used for MMIO. - */ - -/* - * TODO: The way in which we use hvm_instruction_length is very inefficient as - * it now stands. It will be worthwhile to return the actual instruction buffer - * along with the instruction length since one of the reasons we are getting - * the instruction length is to know how many instruction bytes we need to - * fetch. */ #include <xen/config.h> @@ -194,31 +186,51 @@ static uint8_t twobyte_table[256] = { /* * insn_fetch - fetch the next byte from instruction stream */ -#define insn_fetch() \ -({ uint8_t _x; \ - if ( length >= 15 ) \ - return -1; \ - if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \ - gdprintk(XENLOG_WARNING, \ - "Cannot read from address %lx (eip %lx, mode %d)\n", \ - pc, org_pc, address_bytes); \ - return -1; \ - } \ - pc += 1; \ - length += 1; \ - _x; \ +#define insn_fetch() \ +({ uint8_t _x; \ + if ( length >= 15 ) \ + return -1; \ + if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \ + unsigned long err; \ + struct segment_register cs; \ + gdprintk(XENLOG_WARNING, \ + "Cannot read from address %lx (eip %lx, mode %d)\n", \ + pc, org_pc, address_bytes); \ + err = 0; /* Must be not-present: we don't enforce reserved bits */ \ + if ( hvm_nx_enabled(current) ) \ + err |= PFEC_insn_fetch; \ + hvm_get_segment_register(current, x86_seg_cs, &cs); \ + if ( cs.attr.fields.dpl != 0 ) \ + err |= PFEC_user_mode; \ + hvm_inject_exception(TRAP_page_fault, err, pc); \ + return -1; \ + } \ + if ( buf ) \ + buf[length] = _x; \ + length += 1; \ + pc += 1; \ + _x; \ }) +#define insn_skip(_n) do { \ + int _i; \ + for ( _i = 0; _i < (_n); _i++) { \ + (void) insn_fetch(); \ + } \ +} while (0) + /** - * hvm_instruction_length - returns the current instructions length + * hvm_instruction_fetch - read the current instruction and return its length * * @org_pc: guest instruction pointer - * @mode: guest operating mode + * @address_bytes: guest address width + * @buf: (optional) buffer to load actual instruction bytes into * - * EXTERNAL this routine calculates the length of the current instruction - * pointed to by org_pc. The guest state is _not_ changed by this routine. + * Doesn't increment the guest's instruction pointer, but may + * issue faults to the guest. Returns -1 on failure. */ -int hvm_instruction_length(unsigned long org_pc, int address_bytes) +int hvm_instruction_fetch(unsigned long org_pc, int address_bytes, + unsigned char *buf) { uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0; unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp; @@ -317,18 +329,13 @@ done_prefixes: { case 0: if ( modrm_rm == 6 ) - { - length += 2; - pc += 2; /* skip disp16 */ - } + insn_skip(2); /* skip disp16 */ break; case 1: - length += 1; - pc += 1; /* skip disp8 */ + insn_skip(1); /* skip disp8 */ break; case 2: - length += 2; - pc += 2; /* skip disp16 */ + insn_skip(2); /* skip disp16 */ break; } } @@ -340,33 +347,19 @@ done_prefixes: case 0: if ( (modrm_rm == 4) && ((insn_fetch() & 7) == 5) ) - { - length += 4; - pc += 4; /* skip disp32 specified by SIB.base */ - } + insn_skip(4); /* skip disp32 specified by SIB.base */ else if ( modrm_rm == 5 ) - { - length += 4; - pc += 4; /* skip disp32 */ - } + insn_skip(4); /* skip disp32 */ break; case 1: if ( modrm_rm == 4 ) - { - length += 1; - pc += 1; - } - length += 1; - pc += 1; /* skip disp8 */ + insn_skip(1); + insn_skip(1); /* skip disp8 */ break; case 2: if ( modrm_rm == 4 ) - { - length += 1; - pc += 1; - } - length += 4; - pc += 4; /* skip disp32 */ + insn_skip(1); + insn_skip(4); /* skip disp32 */ break; } } @@ -387,12 +380,10 @@ done_prefixes: tmp = (d & ByteOp) ? 1 : op_bytes; if ( tmp == 8 ) tmp = 4; /* NB. Immediates are sign-extended as necessary. */ - length += tmp; - pc += tmp; + insn_skip(tmp); break; case SrcImmByte: - length += 1; - pc += 1; + insn_skip(1); break; } @@ -402,8 +393,7 @@ done_prefixes: switch ( b ) { case 0xa0 ... 0xa3: /* mov */ - length += ad_bytes; - pc += ad_bytes; /* skip src/dst displacement */ + insn_skip(ad_bytes); /* skip src/dst displacement */ break; case 0xf6 ... 0xf7: /* Grp3 */ switch ( modrm_reg ) @@ -412,8 +402,7 @@ done_prefixes: /* Special case in Grp3: test has an immediate source operand. */ tmp = (d & ByteOp) ? 1 : op_bytes; if ( tmp == 8 ) tmp = 4; - length += tmp; - pc += tmp; + insn_skip(tmp); break; } break; diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/hvm/platform.c Thu Aug 02 09:54:18 2007 -0500 @@ -1041,17 +1041,13 @@ void handle_mmio(unsigned long gpa) /* real or vm86 modes */ address_bytes = 2; inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip; - inst_len = hvm_instruction_length(inst_addr, address_bytes); + memset(inst, 0, MAX_INST_LEN); + inst_len = hvm_instruction_fetch(inst_addr, address_bytes, inst); if ( inst_len <= 0 ) { - printk("handle_mmio: failed to get instruction length\n"); - domain_crash_synchronous(); - } - - memset(inst, 0, MAX_INST_LEN); - if ( inst_copy_from_guest(inst, inst_addr, inst_len) != inst_len ) { - printk("handle_mmio: failed to copy instruction\n"); - domain_crash_synchronous(); + gdprintk(XENLOG_DEBUG, "handle_mmio: failed to get instruction\n"); + /* hvm_instruction_fetch() will have injected a #PF; get out now */ + return; } if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size, diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/hvm/svm/intr.c Thu Aug 02 09:54:18 2007 -0500 @@ -58,7 +58,7 @@ static void svm_inject_nmi(struct vcpu * event.bytes = 0; event.fields.v = 1; - event.fields.type = EVENTTYPE_NMI; + event.fields.type = X86_EVENTTYPE_NMI; event.fields.vector = 2; ASSERT(vmcb->eventinj.fields.v == 0); @@ -72,34 +72,39 @@ static void svm_inject_extint(struct vcp event.bytes = 0; event.fields.v = 1; - event.fields.type = EVENTTYPE_INTR; + event.fields.type = X86_EVENTTYPE_EXT_INTR; event.fields.vector = vector; ASSERT(vmcb->eventinj.fields.v == 0); vmcb->eventinj = event; } +static void enable_intr_window(struct vcpu *v, enum hvm_intack intr_source) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + ASSERT(intr_source != hvm_intack_none); + + /* + * Create a dummy virtual interrupt to intercept as soon as the + * guest can accept the real interrupt. + * + * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt + * shadow. This is hard to do without hardware support. We should also + * track 'NMI blocking' from NMI injection until IRET. This can be done + * quite easily in software by intercepting the unblocking IRET. + */ + vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; + HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1); + svm_inject_dummy_vintr(v); +} + asmlinkage void svm_intr_assist(void) { struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; enum hvm_intack intr_source; int intr_vector; - - /* - * Previous event delivery caused this intercept? - * This will happen if the injection is latched by the processor (hence - * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault - * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel - * stack). - */ - if ( vmcb->exitintinfo.fields.v ) - { - vmcb->eventinj = vmcb->exitintinfo; - vmcb->exitintinfo.bytes = 0; - HVMTRACE_1D(REINJ_VIRQ, v, intr_vector); - return; - } /* Crank the handle on interrupt state. */ pt_update_irq(v); @@ -111,32 +116,23 @@ asmlinkage void svm_intr_assist(void) return; /* - * If the guest can't take an interrupt right now, create a 'fake' - * virtual interrupt on to intercept as soon as the guest _can_ take - * interrupts. Do not obtain the next interrupt from the vlapic/pic - * if unable to inject. - * - * Also do this if there is an injection already pending. This is - * because the event delivery can arbitrarily delay the injection - * of the vintr (for example, if the exception is handled via an - * interrupt gate, hence zeroing RFLAGS.IF). In the meantime: - * - the vTPR could be modified upwards, so we need to wait until the - * exception is delivered before we can safely decide that an - * interrupt is deliverable; and - * - the guest might look at the APIC/PIC state, so we ought not to - * have cleared the interrupt out of the IRR. - * - * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt - * shadow. This is hard to do without hardware support. We should also - * track 'NMI blocking' from NMI injection until IRET. This can be done - * quite easily in software by intercepting the unblocking IRET. + * Pending IRQs must be delayed if: + * 1. An event is already pending. This is despite the fact that SVM + * provides a VINTR delivery method quite separate from the EVENTINJ + * mechanism. The event delivery can arbitrarily delay the injection + * of the vintr (for example, if the exception is handled via an + * interrupt gate, hence zeroing RFLAGS.IF). In the meantime: + * - the vTPR could be modified upwards, so we need to wait until + * the exception is delivered before we can safely decide that an + * interrupt is deliverable; and + * - the guest might look at the APIC/PIC state, so we ought not to + * have cleared the interrupt out of the IRR. + * 2. The IRQ is masked. */ - if ( !hvm_interrupts_enabled(v, intr_source) || - vmcb->eventinj.fields.v ) + if ( unlikely(vmcb->eventinj.fields.v) || + !hvm_interrupts_enabled(v, intr_source) ) { - vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; - HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1); - svm_inject_dummy_vintr(v); + enable_intr_window(v, intr_source); return; } } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) ); @@ -151,6 +147,11 @@ asmlinkage void svm_intr_assist(void) svm_inject_extint(v, intr_vector); pt_intr_post(v, intr_vector, intr_source); } + + /* Is there another IRQ to queue up behind this one? */ + intr_source = hvm_vcpu_has_pending_irq(v); + if ( unlikely(intr_source != hvm_intack_none) ) + enable_intr_window(v, intr_source); } /* diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Aug 02 09:54:18 2007 -0500 @@ -71,8 +71,8 @@ static void *root_vmcb[NR_CPUS] __read_m /* hardware assisted paging bits */ extern int opt_hap_enabled; -static void svm_inject_exception(struct vcpu *v, int trap, - int ev, int error_code) +static void svm_inject_exception( + struct vcpu *v, int trap, int ev, int error_code) { eventinj_t event; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; @@ -84,13 +84,11 @@ static void svm_inject_exception(struct event.bytes = 0; event.fields.v = 1; - event.fields.type = EVENTTYPE_EXCEPTION; + event.fields.type = X86_EVENTTYPE_HW_EXCEPTION; event.fields.vector = trap; event.fields.ev = ev; event.fields.errorcode = error_code; - ASSERT(vmcb->eventinj.fields.v == 0); - vmcb->eventinj = event; } @@ -362,21 +360,14 @@ int svm_vmcb_save(struct vcpu *v, struct c->sysenter_esp = vmcb->sysenter_esp; c->sysenter_eip = vmcb->sysenter_eip; - /* Save any event/interrupt that was being injected when we last exited. */ - if ( vmcb->exitintinfo.fields.v ) - { - c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff; - c->error_code = vmcb->exitintinfo.fields.errorcode; - } - else if ( vmcb->eventinj.fields.v ) - { - c->pending_event = vmcb->eventinj.bytes & 0xffffffff; + c->pending_event = 0; + c->error_code = 0; + if ( vmcb->eventinj.fields.v && + hvm_event_needs_reinjection(vmcb->eventinj.fields.type, + vmcb->eventinj.fields.vector) ) + { + c->pending_event = (uint32_t)vmcb->eventinj.bytes; c->error_code = vmcb->eventinj.fields.errorcode; - } - else - { - c->pending_event = 0; - c->error_code = 0; } return 1; @@ -495,11 +486,11 @@ int svm_vmcb_restore(struct vcpu *v, str vmcb->sysenter_esp = c->sysenter_esp; vmcb->sysenter_eip = c->sysenter_eip; - /* update VMCB for nested paging restore */ - if ( paging_mode_hap(v->domain) ) { + if ( paging_mode_hap(v->domain) ) + { vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; - vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 | - (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); + vmcb->cr4 = (v->arch.hvm_svm.cpu_shadow_cr4 | + (HVM_CR4_HOST_MASK & ~X86_CR4_PAE)); vmcb->cr3 = c->cr3; vmcb->np_enable = 1; vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ @@ -514,26 +505,23 @@ int svm_vmcb_restore(struct vcpu *v, str gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n", c->pending_event, c->error_code); - /* VMX uses a different type for #OF and #BP; fold into "Exception" */ - if ( c->pending_type == 6 ) - c->pending_type = 3; - /* Sanity check */ - if ( c->pending_type == 1 || c->pending_type > 4 - || c->pending_reserved != 0 ) + if ( (c->pending_type == 1) || (c->pending_type > 6) || + (c->pending_reserved != 0) ) { gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32"\n", c->pending_event); return -EINVAL; } - /* Put this pending event in exitintinfo and svm_intr_assist() - * will reinject it when we return to the guest. */ - vmcb->exitintinfo.bytes = c->pending_event; - vmcb->exitintinfo.fields.errorcode = c->error_code; + + if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) ) + { + vmcb->eventinj.bytes = c->pending_event; + vmcb->eventinj.fields.errorcode = c->error_code; + } } paging_update_paging_modes(v); - /* signal paging update to ASID handler */ - svm_asid_g_update_paging (v); + svm_asid_g_update_paging(v); return 0; @@ -965,10 +953,10 @@ static void svm_hvm_inject_exception( svm_inject_exception(v, trapnr, (errcode != -1), errcode); } -static int svm_event_injection_faulted(struct vcpu *v) -{ - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - return vmcb->exitintinfo.fields.v; +static int svm_event_pending(struct vcpu *v) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + return vmcb->eventinj.fields.v; } static struct hvm_function_table svm_function_table = { @@ -1000,7 +988,7 @@ static struct hvm_function_table svm_fun .inject_exception = svm_hvm_inject_exception, .init_ap_context = svm_init_ap_context, .init_hypercall_page = svm_init_hypercall_page, - .event_injection_faulted = svm_event_injection_faulted + .event_pending = svm_event_pending }; static void svm_npt_detect(void) @@ -1667,6 +1655,17 @@ static int svm_set_cr0(unsigned long val unsigned long old_base_mfn; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); + + if ( (u32)value != value ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest attempts to set upper 32 bits in CR0: %lx", + value); + svm_inject_exception(v, TRAP_gp_fault, 1, 0); + return 0; + } + + value &= ~HVM_CR0_GUEST_RESERVED_BITS; /* ET is reserved and should be always be 1. */ value |= X86_CR0_ET; @@ -2420,6 +2419,7 @@ asmlinkage void svm_vmexit_handler(struc unsigned long eip; struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + eventinj_t eventinj; int inst_len, rc; exit_reason = vmcb->exitcode; @@ -2434,6 +2434,13 @@ asmlinkage void svm_vmexit_handler(struc perfc_incra(svmexits, exit_reason); eip = vmcb->rip; + + /* Event delivery caused this intercept? Queue for redelivery. */ + eventinj = vmcb->exitintinfo; + if ( unlikely(eventinj.fields.v) && + hvm_event_needs_reinjection(eventinj.fields.type, + eventinj.fields.vector) ) + vmcb->eventinj = eventinj; switch ( exit_reason ) { diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/intr.c Thu Aug 02 09:54:18 2007 -0500 @@ -76,10 +76,9 @@ static void enable_intr_window(struct vc u32 *cpu_exec_control = &v->arch.hvm_vmx.exec_control; u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING; - if ( unlikely(intr_source == hvm_intack_none) ) - return; + ASSERT(intr_source != hvm_intack_none); - if ( unlikely(intr_source == hvm_intack_nmi) && cpu_has_vmx_vnmi ) + if ( (intr_source == hvm_intack_nmi) && cpu_has_vmx_vnmi ) { /* * We set MOV-SS blocking in lieu of STI blocking when delivering an @@ -131,68 +130,27 @@ asmlinkage void vmx_intr_assist(void) int intr_vector; enum hvm_intack intr_source; struct vcpu *v = current; - unsigned int idtv_info_field; - unsigned long inst_len; + unsigned int intr_info; + /* Crank the handle on interrupt state. */ pt_update_irq(v); - hvm_set_callback_irq_level(); - - update_tpr_threshold(vcpu_vlapic(v)); do { intr_source = hvm_vcpu_has_pending_irq(v); + if ( likely(intr_source == hvm_intack_none) ) + goto out; - if ( unlikely(v->arch.hvm_vmx.vector_injected) ) - { - v->arch.hvm_vmx.vector_injected = 0; - enable_intr_window(v, intr_source); - return; - } - - /* This could be moved earlier in the VMX resume sequence. */ - idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD); - if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) ) - { - /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */ - __vmwrite(VM_ENTRY_INTR_INFO_FIELD, - idtv_info_field & ~INTR_INFO_RESVD_BITS_MASK); - - /* - * Safe: the length will only be interpreted for software - * exceptions and interrupts. If we get here then delivery of some - * event caused a fault, and this always results in defined - * VM_EXIT_INSTRUCTION_LEN. - */ - inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */ - __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len); - - if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */ - __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, - __vmread(IDT_VECTORING_ERROR_CODE)); - - /* - * Clear NMI-blocking interruptibility info if an NMI delivery - * faulted. Re-delivery will re-set it (see SDM 3B 25.7.1.2). - */ - if ( (idtv_info_field&INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI ) - __vmwrite(GUEST_INTERRUPTIBILITY_INFO, - __vmread(GUEST_INTERRUPTIBILITY_INFO) & - ~VMX_INTR_SHADOW_NMI); - - enable_intr_window(v, intr_source); - - HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field); - return; - } - - if ( likely(intr_source == hvm_intack_none) ) - return; - - if ( !hvm_interrupts_enabled(v, intr_source) ) + /* + * An event is already pending or the pending interrupt is masked? + * Then the pending interrupt must be delayed. + */ + intr_info = __vmread(VM_ENTRY_INTR_INFO); + if ( unlikely(intr_info & INTR_INFO_VALID_MASK) || + !hvm_interrupts_enabled(v, intr_source) ) { enable_intr_window(v, intr_source); - return; + goto out; } } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) ); @@ -206,6 +164,14 @@ asmlinkage void vmx_intr_assist(void) vmx_inject_extint(v, intr_vector); pt_intr_post(v, intr_vector, intr_source); } + + /* Is there another IRQ to queue up behind this one? */ + intr_source = hvm_vcpu_has_pending_irq(v); + if ( unlikely(intr_source != hvm_intack_none) ) + enable_intr_window(v, intr_source); + + out: + update_tpr_threshold(vcpu_vlapic(v)); } /* diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Aug 02 09:54:18 2007 -0500 @@ -240,8 +240,23 @@ int vmx_cpu_up(void) { u32 eax, edx; int cpu = smp_processor_id(); + u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1; BUG_ON(!(read_cr4() & X86_CR4_VMXE)); + + /* + * Ensure the current processor operating mode meets + * the requred CRO fixed bits in VMX operation. + */ + cr0 = read_cr0(); + rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0); + rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1); + if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) ) + { + printk("CPU%d: some settings of host CR0 are " + "not allowed in VMX operation.\n", cpu); + return 0; + } rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx); @@ -418,7 +433,7 @@ static void construct_vmcs(struct vcpu * __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); - __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + __vmwrite(VM_ENTRY_INTR_INFO, 0); __vmwrite(CR0_GUEST_HOST_MASK, ~0UL); __vmwrite(CR4_GUEST_HOST_MASK, ~0UL); diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 02 09:54:18 2007 -0500 @@ -613,28 +613,13 @@ void vmx_vmcs_save(struct vcpu *v, struc c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP); c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP); - /* - * Save any event/interrupt that was being injected when we last - * exited. IDT_VECTORING_INFO_FIELD has priority, as anything in - * VM_ENTRY_INTR_INFO_FIELD is either a fault caused by the first - * event, which will happen the next time, or an interrupt, which we - * never inject when IDT_VECTORING_INFO_FIELD is valid. - */ - if ( (ev = __vmread(IDT_VECTORING_INFO_FIELD)) & INTR_INFO_VALID_MASK ) - { - c->pending_event = ev; - c->error_code = __vmread(IDT_VECTORING_ERROR_CODE); - } - else if ( (ev = __vmread(VM_ENTRY_INTR_INFO_FIELD)) & - INTR_INFO_VALID_MASK ) + c->pending_event = 0; + c->error_code = 0; + if ( ((ev = __vmread(VM_ENTRY_INTR_INFO)) & INTR_INFO_VALID_MASK) && + hvm_event_needs_reinjection((ev >> 8) & 7, ev & 0xff) ) { c->pending_event = ev; c->error_code = __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE); - } - else - { - c->pending_event = 0; - c->error_code = 0; } vmx_vmcs_exit(v); @@ -754,34 +739,9 @@ int vmx_vmcs_restore(struct vcpu *v, str if ( c->pending_valid ) { - vmx_vmcs_enter(v); - gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n", c->pending_event, c->error_code); - /* SVM uses type 3 ("Exception") for #OF and #BP; VMX uses type 6 */ - if ( (c->pending_type == 3) && - ((c->pending_vector == 3) || (c->pending_vector == 4)) ) - c->pending_type = 6; - - /* For software exceptions, we need to tell the hardware the - * instruction length as well (hmmm). */ - if ( c->pending_type > 4 ) - { - int addrbytes, ilen; - if ( (c->cs_arbytes & X86_SEG_AR_CS_LM_ACTIVE) && - (c->msr_efer & EFER_LMA) ) - addrbytes = 8; - else if ( c->cs_arbytes & X86_SEG_AR_DEF_OP_SIZE ) - addrbytes = 4; - else - addrbytes = 2; - - ilen = hvm_instruction_length(c->rip, addrbytes); - __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen); - } - - /* Sanity check */ if ( (c->pending_type == 1) || (c->pending_type > 6) || (c->pending_reserved != 0) ) { @@ -790,12 +750,13 @@ int vmx_vmcs_restore(struct vcpu *v, str return -EINVAL; } - /* Re-inject the exception */ - __vmwrite(VM_ENTRY_INTR_INFO_FIELD, c->pending_event); - __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code); - v->arch.hvm_vmx.vector_injected = 1; - - vmx_vmcs_exit(v); + if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) ) + { + vmx_vmcs_enter(v); + __vmwrite(VM_ENTRY_INTR_INFO, c->pending_event); + __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code); + vmx_vmcs_exit(v); + } } return 0; @@ -1203,14 +1164,10 @@ static void vmx_update_vtpr(struct vcpu /* VMX doesn't have a V_TPR field */ } -static int vmx_event_injection_faulted(struct vcpu *v) -{ - unsigned int idtv_info_field; - +static int vmx_event_pending(struct vcpu *v) +{ ASSERT(v == current); - - idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD); - return (idtv_info_field & INTR_INFO_VALID_MASK); + return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK); } static void disable_intercept_for_msr(u32 msr) @@ -1261,7 +1218,7 @@ static struct hvm_function_table vmx_fun .inject_exception = vmx_inject_exception, .init_ap_context = vmx_init_ap_context, .init_hypercall_page = vmx_init_hypercall_page, - .event_injection_faulted = vmx_event_injection_faulted, + .event_pending = vmx_event_pending, .cpu_up = vmx_cpu_up, .cpu_down = vmx_cpu_down, }; @@ -2199,6 +2156,17 @@ static int vmx_set_cr0(unsigned long val unsigned long old_base_mfn; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); + + if ( (u32)value != value ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest attempts to set upper 32 bits in CR0: %lx", + value); + vmx_inject_hw_exception(v, TRAP_gp_fault, 0); + return 0; + } + + value &= ~HVM_CR0_GUEST_RESERVED_BITS; /* ET is reserved and should be always be 1. */ value |= X86_CR0_ET; @@ -2842,47 +2810,6 @@ static void vmx_do_extint(struct cpu_use } } -static void vmx_reflect_exception(struct vcpu *v) -{ - int error_code, intr_info, vector; - - intr_info = __vmread(VM_EXIT_INTR_INFO); - vector = intr_info & 0xff; - if ( intr_info & INTR_INFO_DELIVER_CODE_MASK ) - error_code = __vmread(VM_EXIT_INTR_ERROR_CODE); - else - error_code = VMX_DELIVER_NO_ERROR_CODE; - -#ifndef NDEBUG - { - unsigned long rip; - - rip = __vmread(GUEST_RIP); - HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, error_code = %x", - rip, error_code); - } -#endif /* NDEBUG */ - - /* - * According to Intel Virtualization Technology Specification for - * the IA-32 Intel Architecture (C97063-002 April 2005), section - * 2.8.3, SW_EXCEPTION should be used for #BP and #OV, and - * HW_EXCEPTION used for everything else. The main difference - * appears to be that for SW_EXCEPTION, the EIP/RIP is incremented - * by VM_ENTER_INSTRUCTION_LEN bytes, whereas for HW_EXCEPTION, - * it is not. - */ - if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_SW_EXCEPTION ) - { - int ilen = __get_instruction_length(); /* Safe: software exception */ - vmx_inject_sw_exception(v, vector, ilen); - } - else - { - vmx_inject_hw_exception(v, vector, error_code); - } -} - static void vmx_failed_vmentry(unsigned int exit_reason, struct cpu_user_regs *regs) { @@ -2919,7 +2846,7 @@ static void vmx_failed_vmentry(unsigned asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) { - unsigned int exit_reason; + unsigned int exit_reason, idtv_info; unsigned long exit_qualification, inst_len = 0; struct vcpu *v = current; @@ -2934,6 +2861,30 @@ asmlinkage void vmx_vmexit_handler(struc if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) return vmx_failed_vmentry(exit_reason, regs); + + /* Event delivery caused this intercept? Queue for redelivery. */ + idtv_info = __vmread(IDT_VECTORING_INFO); + if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) ) + { + if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) ) + { + /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */ + __vmwrite(VM_ENTRY_INTR_INFO, + idtv_info & ~INTR_INFO_RESVD_BITS_MASK); + if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK ) + __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, + __vmread(IDT_VECTORING_ERROR_CODE)); + } + + /* + * Clear NMI-blocking interruptibility info if an NMI delivery faulted. + * Re-delivery will re-set it (see SDM 3B 25.7.1.2). + */ + if ( (idtv_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI<<8) ) + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, + __vmread(GUEST_INTERRUPTIBILITY_INFO) & + ~VMX_INTR_SHADOW_NMI); + } switch ( exit_reason ) { @@ -2957,7 +2908,7 @@ asmlinkage void vmx_vmexit_handler(struc * (NB. If we emulate this IRET for any reason, we should re-clear!) */ if ( unlikely(intr_info & INTR_INFO_NMI_UNBLOCKED_BY_IRET) && - !(__vmread(IDT_VECTORING_INFO_FIELD) & INTR_INFO_VALID_MASK) && + !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) && (vector != TRAP_double_fault) ) __vmwrite(GUEST_INTERRUPTIBILITY_INFO, __vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI); @@ -2995,14 +2946,12 @@ asmlinkage void vmx_vmexit_handler(struc vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code); break; case TRAP_nmi: - if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI ) - { - HVMTRACE_0D(NMI, v); - vmx_store_cpu_guest_regs(v, regs, NULL); - do_nmi(regs); /* Real NMI, vector 2: normal processing. */ - } - else - vmx_reflect_exception(v); + if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) != + (X86_EVENTTYPE_NMI << 8) ) + goto exit_and_crash; + HVMTRACE_0D(NMI, v); + vmx_store_cpu_guest_regs(v, regs, NULL); + do_nmi(regs); /* Real NMI, vector 2: normal processing. */ break; case TRAP_machine_check: HVMTRACE_0D(MCE, v); diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Aug 02 09:54:18 2007 -0500 @@ -2905,7 +2905,7 @@ static int sh_page_fault(struct vcpu *v, * stack is currently considered to be a page table, so we should * unshadow the faulting page before exiting. */ - if ( unlikely(hvm_event_injection_faulted(v)) ) + if ( unlikely(hvm_event_pending(v)) ) { gdprintk(XENLOG_DEBUG, "write to pagetable during event " "injection: cr2=%#lx, mfn=%#lx\n", diff -r 976db28bcc43 -r 04fb85a46dc5 xen/common/libelf/libelf-dominfo.c --- a/xen/common/libelf/libelf-dominfo.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/common/libelf/libelf-dominfo.c Thu Aug 02 09:54:18 2007 -0500 @@ -333,6 +333,99 @@ static int elf_xen_note_check(struct elf return 0; } + +static void elf_xen_loadsymtab(struct elf_binary *elf, + struct elf_dom_parms *parms) +{ + unsigned long maxva, len; + + if ( !parms->bsd_symtab ) + return; + + /* Calculate the required additional kernel space for the elf image */ + + /* The absolute base address of the elf image */ + maxva = elf_round_up(elf, parms->virt_kend); + maxva += sizeof(long); /* Space to store the size of the elf image */ + /* Space for the elf and elf section headers */ + maxva += (elf_uval(elf, elf->ehdr, e_ehsize) + + elf_shdr_count(elf) * elf_uval(elf, elf->ehdr, e_shentsize)); + maxva = elf_round_up(elf, maxva); + + /* Space for the symbol and string tabs */ + len = (unsigned long)elf->send - (unsigned long)elf->sstart; + maxva = elf_round_up(elf, maxva + len); + + /* The address the kernel must expanded to */ + parms->virt_end = maxva; +} + +int elf_xen_dom_load_binary(struct elf_binary *elf, + struct elf_dom_parms *parms) +{ + elf_ehdr *sym_ehdr; + unsigned long shdr, symtab_addr; + unsigned long maxva, symbase; + uint8_t i; + char *p; + + elf_load_binary(elf); + + if ( !parms->bsd_symtab ) + return 0; + +#define elf_hdr_elm(_elf, _hdr, _elm, _val) \ +do { \ + if ( elf_64bit(_elf) ) \ + (_hdr)->e64._elm = _val; \ + else \ + (_hdr)->e32._elm = _val; \ +} while ( 0 ) + + /* ehdr right after the kernel image (4 byte aligned) */ + symbase = elf_round_up(elf, parms->virt_kend); + symtab_addr = maxva = symbase + sizeof(long); + + /* Set up Elf header. */ + sym_ehdr = (elf_ehdr *)symtab_addr; + maxva = elf_copy_ehdr(elf, sym_ehdr); + + elf_hdr_elm(elf, sym_ehdr, e_phoff, 0); + elf_hdr_elm(elf, sym_ehdr, e_shoff, elf_uval(elf, elf->ehdr, e_ehsize)); + elf_hdr_elm(elf, sym_ehdr, e_phentsize, 0); + elf_hdr_elm(elf, sym_ehdr, e_phnum, 0); + + /* Copy Elf section headers. */ + shdr = maxva; + maxva = elf_copy_shdr(elf, (elf_shdr *)shdr); + + for ( i = 0; i < elf_shdr_count(elf); i++ ) + { + uint8_t type; + unsigned long tmp; + type = elf_uval(elf, (elf_shdr *)shdr, sh_type); + if ( (type == SHT_STRTAB) || (type == SHT_SYMTAB) ) + { + elf_msg(elf, "%s: shdr %i at 0x%p -> 0x%p\n", __func__, i, + elf_section_start(elf, (elf_shdr *)shdr), (void *)maxva); + tmp = elf_copy_section(elf, (elf_shdr *)shdr, (void *)maxva); + /* Mangled to be based on ELF header location. */ + elf_hdr_elm(elf, (elf_shdr *)shdr, sh_offset, + maxva - symtab_addr); + maxva = tmp; + } + shdr += elf_uval(elf, elf->ehdr, e_shentsize); + } + + /* Write down the actual sym size. */ + p = (char *)symbase; + *(long *)p = maxva - symtab_addr; /* sym size */ + +#undef elf_ehdr_elm + + return 0; +} + static int elf_xen_addr_calc_check(struct elf_binary *elf, struct elf_dom_parms *parms) { @@ -374,9 +467,13 @@ static int elf_xen_addr_calc_check(struc parms->virt_offset = parms->virt_base - parms->elf_paddr_offset; parms->virt_kstart = elf->pstart + parms->virt_offset; parms->virt_kend = elf->pend + parms->virt_offset; + parms->virt_end = parms->virt_kend; if ( parms->virt_entry == UNSET_ADDR ) parms->virt_entry = elf_uval(elf, elf->ehdr, e_entry); + + if ( parms->bsd_symtab ) + elf_xen_loadsymtab(elf, parms); elf_msg(elf, "%s: addresses:\n", __FUNCTION__); elf_msg(elf, " virt_base = 0x%" PRIx64 "\n", parms->virt_base); @@ -384,12 +481,14 @@ static int elf_xen_addr_calc_check(struc elf_msg(elf, " virt_offset = 0x%" PRIx64 "\n", parms->virt_offset); elf_msg(elf, " virt_kstart = 0x%" PRIx64 "\n", parms->virt_kstart); elf_msg(elf, " virt_kend = 0x%" PRIx64 "\n", parms->virt_kend); + elf_msg(elf, " virt_end = 0x%" PRIx64 "\n", parms->virt_end); elf_msg(elf, " virt_entry = 0x%" PRIx64 "\n", parms->virt_entry); if ( (parms->virt_kstart > parms->virt_kend) || (parms->virt_entry < parms->virt_kstart) || (parms->virt_entry > parms->virt_kend) || - (parms->virt_base > parms->virt_kstart) ) + (parms->virt_base > parms->virt_kstart) || + (parms->virt_kend > parms->virt_end) ) { elf_err(elf, "%s: ERROR: ELF start or entries are out of bounds.\n", __FUNCTION__); diff -r 976db28bcc43 -r 04fb85a46dc5 xen/common/libelf/libelf-loader.c --- a/xen/common/libelf/libelf-loader.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/common/libelf/libelf-loader.c Thu Aug 02 09:54:18 2007 -0500 @@ -10,6 +10,8 @@ int elf_init(struct elf_binary *elf, con { const elf_shdr *shdr; uint64_t i, count, section, offset; + uint64_t low = -1; + uint64_t high = 0; if ( !elf_is_elfbinary(image) ) { @@ -24,7 +26,11 @@ int elf_init(struct elf_binary *elf, con elf->class = elf->ehdr->e32.e_ident[EI_CLASS]; elf->data = elf->ehdr->e32.e_ident[EI_DATA]; - /* sanity check phdr */ +#ifdef VERBOSE + elf_set_verbose(elf); +#endif + + /* Sanity check phdr. */ offset = elf_uval(elf, elf->ehdr, e_phoff) + elf_uval(elf, elf->ehdr, e_phentsize) * elf_phdr_count(elf); if ( offset > elf->size ) @@ -34,7 +40,7 @@ int elf_init(struct elf_binary *elf, con return -1; } - /* sanity check shdr */ + /* Sanity check shdr. */ offset = elf_uval(elf, elf->ehdr, e_shoff) + elf_uval(elf, elf->ehdr, e_shentsize) * elf_shdr_count(elf); if ( offset > elf->size ) @@ -44,29 +50,55 @@ int elf_init(struct elf_binary *elf, con return -1; } - /* find section string table */ + /* Find section string table. */ section = elf_uval(elf, elf->ehdr, e_shstrndx); shdr = elf_shdr_by_index(elf, section); if ( shdr != NULL ) elf->sec_strtab = elf_section_start(elf, shdr); - /* find symbol table, symbol string table */ + /* Find symbol table and symbol string table. */ count = elf_shdr_count(elf); for ( i = 0; i < count; i++ ) { + const char *sh_symend, *sh_strend; + shdr = elf_shdr_by_index(elf, i); if ( elf_uval(elf, shdr, sh_type) != SHT_SYMTAB ) continue; elf->sym_tab = shdr; + sh_symend = (const char *)elf_section_end(elf, shdr); shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link)); if ( shdr == NULL ) { elf->sym_tab = NULL; + sh_symend = 0; continue; } elf->sym_strtab = elf_section_start(elf, shdr); - break; - } + sh_strend = (const char *)elf_section_end(elf, shdr); + + if ( low > (unsigned long)elf->sym_tab ) + low = (unsigned long)elf->sym_tab; + if ( low > (unsigned long)shdr ) + low = (unsigned long)shdr; + + if ( high < ((unsigned long)sh_symend) ) + high = (unsigned long)sh_symend; + if ( high < ((unsigned long)sh_strend) ) + high = (unsigned long)sh_strend; + + elf_msg(elf, "%s: shdr: sym_tab=%p size=0x%" PRIx64 "\n", + __FUNCTION__, elf->sym_tab, + elf_uval(elf, elf->sym_tab, sh_size)); + elf_msg(elf, "%s: shdr: str_tab=%p size=0x%" PRIx64 "\n", + __FUNCTION__, elf->sym_strtab, elf_uval(elf, shdr, sh_size)); + + elf->sstart = low; + elf->send = high; + elf_msg(elf, "%s: symbol map: 0x%" PRIx64 " -> 0x%" PRIx64 "\n", + __FUNCTION__, elf->sstart, elf->send); + } + return 0; } diff -r 976db28bcc43 -r 04fb85a46dc5 xen/common/libelf/libelf-tools.c --- a/xen/common/libelf/libelf-tools.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/common/libelf/libelf-tools.c Thu Aug 02 09:54:18 2007 -0500 @@ -236,6 +236,36 @@ int elf_phdr_is_loadable(struct elf_bina uint64_t p_flags = elf_uval(elf, phdr, p_flags); return ((p_type == PT_LOAD) && (p_flags & (PF_W | PF_X)) != 0); +} + +unsigned long +elf_copy_ehdr(struct elf_binary *elf, void *dest) +{ + uint64_t size; + + size = elf_uval(elf, elf->ehdr, e_ehsize); + memcpy(dest, elf->ehdr, size); + return elf_round_up(elf, (unsigned long)(dest) + size); +} + +unsigned long +elf_copy_shdr(struct elf_binary *elf, void *dest) +{ + uint64_t size; + + size = elf_shdr_count(elf) * elf_uval(elf, elf->ehdr, e_shentsize); + memcpy(dest, elf->image + elf_uval(elf, elf->ehdr, e_shoff), size); + return elf_round_up(elf, (unsigned long)(dest) + size); +} + +unsigned long +elf_copy_section(struct elf_binary *elf, const elf_shdr *shdr, void *dest) +{ + uint64_t size; + + size = elf_uval(elf, shdr, sh_size); + memcpy(dest, elf_section_start(elf, shdr), size); + return elf_round_up(elf, (unsigned long)(dest) + size); } /* diff -r 976db28bcc43 -r 04fb85a46dc5 xen/drivers/acpi/tables.c --- a/xen/drivers/acpi/tables.c Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/drivers/acpi/tables.c Thu Aug 02 09:54:18 2007 -0500 @@ -73,7 +73,6 @@ struct acpi_table_sdt { static unsigned long sdt_pa; /* Physical Address */ static unsigned long sdt_count; /* Table count */ -unsigned char acpi_rsdp_rev; static struct acpi_table_sdt sdt_entry[ACPI_MAX_TABLES] __initdata; @@ -227,6 +226,17 @@ void acpi_table_print_madt_entry(acpi_ta } } +uint8_t +generate_acpi_checksum(void *tbl, unsigned long len) +{ + uint8_t *ptr, sum = 0; + + for (ptr = tbl; len > 0 ; len--, ptr++) + sum += *ptr; + + return 0 - sum; +} + static int acpi_table_compute_checksum(void *table_pointer, unsigned long length) { @@ -599,8 +609,6 @@ int __init acpi_table_init(void) "RSDP (v%3.3d %6.6s ) @ 0x%p\n", rsdp->revision, rsdp->oem_id, (void *)rsdp_phys); - acpi_rsdp_rev = rsdp->revision; - if (rsdp->revision < 2) result = acpi_table_compute_checksum(rsdp, @@ -623,3 +631,143 @@ int __init acpi_table_init(void) return 0; } + +int __init +acpi_table_disable(enum acpi_table_id table_id) +{ + struct acpi_table_header *header = NULL; + struct acpi_table_rsdp *rsdp; + unsigned long rsdp_phys; + char *table_name; + int id; + + rsdp_phys = acpi_find_rsdp(); + if (!rsdp_phys) + return -ENODEV; + + rsdp = (struct acpi_table_rsdp *)__acpi_map_table(rsdp_phys, + sizeof(struct acpi_table_rsdp)); + if (!rsdp) + return -ENODEV; + + for (id = 0; id < sdt_count; id++) + if (sdt_entry[id].id == table_id) + break; + + if (id == sdt_count) + return -ENOENT; + + table_name = acpi_table_signatures[table_id]; + + /* First check XSDT (but only on ACPI 2.0-compatible systems) */ + + if ((rsdp->revision >= 2) && + (((struct acpi20_table_rsdp *)rsdp)->xsdt_address)) { + + struct acpi_table_xsdt *mapped_xsdt = NULL; + + sdt_pa = ((struct acpi20_table_rsdp *)rsdp)->xsdt_address; + + /* map in just the header */ + header = (struct acpi_table_header *) + __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header)); + + if (!header) { + printk(KERN_WARNING PREFIX + "Unable to map XSDT header\n"); + return -ENODEV; + } + + /* remap in the entire table before processing */ + mapped_xsdt = (struct acpi_table_xsdt *) + __acpi_map_table(sdt_pa, header->length); + if (!mapped_xsdt) { + printk(KERN_WARNING PREFIX "Unable to map XSDT\n"); + return -ENODEV; + } + header = &mapped_xsdt->header; + + if (strncmp(header->signature, "XSDT", 4)) { + printk(KERN_WARNING PREFIX + "XSDT signature incorrect\n"); + return -ENODEV; + } + + if (acpi_table_compute_checksum(header, header->length)) { + printk(KERN_WARNING PREFIX "Invalid XSDT checksum\n"); + return -ENODEV; + } + + if (id < sdt_count) { + header = (struct acpi_table_header *) + __acpi_map_table(mapped_xsdt->entry[id], sizeof(struct acpi_table_header)); + } else { + printk(KERN_WARNING PREFIX + "Unable to disable entry %d\n", + id); + return -ENODEV; + } + } + + /* Then check RSDT */ + + else if (rsdp->rsdt_address) { + + struct acpi_table_rsdt *mapped_rsdt = NULL; + + sdt_pa = rsdp->rsdt_address; + + /* map in just the header */ + header = (struct acpi_table_header *) + __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header)); + if (!header) { + printk(KERN_WARNING PREFIX + "Unable to map RSDT header\n"); + return -ENODEV; + } + + /* remap in the entire table before processing */ + mapped_rsdt = (struct acpi_table_rsdt *) + __acpi_map_table(sdt_pa, header->length); + if (!mapped_rsdt) { + printk(KERN_WARNING PREFIX "Unable to map RSDT\n"); + return -ENODEV; + } + header = &mapped_rsdt->header; + + if (strncmp(header->signature, "RSDT", 4)) { + printk(KERN_WARNING PREFIX + "RSDT signature incorrect\n"); + return -ENODEV; + } + + if (acpi_table_compute_checksum(header, header->length)) { + printk(KERN_WARNING PREFIX "Invalid RSDT checksum\n"); + return -ENODEV; + } + if (id < sdt_count) { + header = (struct acpi_table_header *) + __acpi_map_table(mapped_rsdt->entry[id], sizeof(struct acpi_table_header)); + } else { + printk(KERN_WARNING PREFIX + "Unable to disable entry %d\n", + id); + return -ENODEV; + } + } + + else { + printk(KERN_WARNING PREFIX + "No System Description Table (RSDT/XSDT) specified in RSDP\n"); + return -ENODEV; + } + + memcpy(header->signature, "OEMx", 4); + memcpy(header->oem_id, "xxxxxx", 6); + memcpy(header->oem_id+1, table_name, 4); + memcpy(header->oem_table_id, "Xen ", 8); + header->checksum = 0; + header->checksum = generate_acpi_checksum(header, header->length); + + return 0; +} diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-ia64/dom_fw_common.h --- a/xen/include/asm-ia64/dom_fw_common.h Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/include/asm-ia64/dom_fw_common.h Thu Aug 02 09:54:18 2007 -0500 @@ -85,7 +85,6 @@ xen_ia64_efi_make_md(efi_memory_desc_t * xen_ia64_efi_make_md(efi_memory_desc_t *md, uint32_t type, uint64_t attr, uint64_t start, uint64_t end); -uint8_t generate_acpi_checksum(void *tbl, unsigned long len); struct fake_acpi_tables; void dom_fw_fake_acpi(domain_t *d, struct fake_acpi_tables *tables); int efi_mdt_cmp(const void *a, const void *b); diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Aug 02 09:54:18 2007 -0500 @@ -154,7 +154,7 @@ struct hvm_function_table { void (*init_hypercall_page)(struct domain *d, void *hypercall_page); - int (*event_injection_faulted)(struct vcpu *v); + int (*event_pending)(struct vcpu *v); int (*cpu_up)(void); void (*cpu_down)(void); @@ -229,7 +229,8 @@ hvm_guest_x86_mode(struct vcpu *v) return hvm_funcs.guest_x86_mode(v); } -int hvm_instruction_length(unsigned long pc, int address_bytes); +int hvm_instruction_fetch(unsigned long pc, int address_bytes, + unsigned char *buf); static inline void hvm_update_host_cr3(struct vcpu *v) @@ -295,24 +296,71 @@ hvm_inject_exception(unsigned int trapnr int hvm_bringup_ap(int vcpuid, int trampoline_vector); -static inline int hvm_event_injection_faulted(struct vcpu *v) -{ - return hvm_funcs.event_injection_faulted(v); -} +static inline int hvm_event_pending(struct vcpu *v) +{ + return hvm_funcs.event_pending(v); +} + +/* These reserved bits in lower 32 remain 0 after any load of CR0 */ +#define HVM_CR0_GUEST_RESERVED_BITS \ + (~((unsigned long) \ + (X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | \ + X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | \ + X86_CR0_WP | X86_CR0_AM | X86_CR0_NW | \ + X86_CR0_CD | X86_CR0_PG))) /* These bits in CR4 are owned by the host. */ #define HVM_CR4_HOST_MASK (mmu_cr4_features & \ (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE)) /* These bits in CR4 cannot be set by the guest. */ -#define HVM_CR4_GUEST_RESERVED_BITS \ - ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ - X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ - X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ - X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) +#define HVM_CR4_GUEST_RESERVED_BITS \ + (~((unsigned long) \ + (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ + X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ + X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ + X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT))) /* These exceptions must always be intercepted. */ #define HVM_TRAP_MASK (1U << TRAP_machine_check) + +/* + * x86 event types. This enumeration is valid for: + * Intel VMX: {VM_ENTRY,VM_EXIT,IDT_VECTORING}_INTR_INFO[10:8] + * AMD SVM: eventinj[10:8] and exitintinfo[10:8] (types 0-4 only) + */ +#define X86_EVENTTYPE_EXT_INTR 0 /* external interrupt */ +#define X86_EVENTTYPE_NMI 2 /* NMI */ +#define X86_EVENTTYPE_HW_EXCEPTION 3 /* hardware exception */ +#define X86_EVENTTYPE_SW_INTERRUPT 4 /* software interrupt */ +#define X86_EVENTTYPE_SW_EXCEPTION 6 /* software exception */ + +/* + * Need to re-inject a given event? We avoid re-injecting software exceptions + * and interrupts because the faulting/trapping instruction can simply be + * re-executed (neither VMX nor SVM update RIP when they VMEXIT during + * INT3/INTO/INTn). + */ +static inline int hvm_event_needs_reinjection(uint8_t type, uint8_t vector) +{ + switch ( type ) + { + case X86_EVENTTYPE_EXT_INTR: + case X86_EVENTTYPE_NMI: + return 1; + case X86_EVENTTYPE_HW_EXCEPTION: + /* + * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly + * check for these vectors, as they are really SW Exceptions. SVM has + * not updated RIP to point after the trapping instruction (INT3/INTO). + */ + return (vector != 3) && (vector != 4); + default: + /* Software exceptions/interrupts can be re-executed (e.g., INT n). */ + break; + } + return 0; +} static inline int hvm_cpu_up(void) { diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/svm/vmcb.h --- a/xen/include/asm-x86/hvm/svm/vmcb.h Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Thu Aug 02 09:54:18 2007 -0500 @@ -319,14 +319,6 @@ typedef union u64 errorcode:32; } fields; } __attribute__ ((packed)) eventinj_t; - -enum EVENTTYPES -{ - EVENTTYPE_INTR = 0, - EVENTTYPE_NMI = 2, - EVENTTYPE_EXCEPTION = 3, - EVENTTYPE_SWINT = 4, -}; typedef union { diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Aug 02 09:54:18 2007 -0500 @@ -66,9 +66,6 @@ struct arch_vmx_struct { /* Cache of cpu execution control. */ u32 exec_control; - - /* If there is vector installed in the INTR_INFO_FIELD. */ - u32 vector_injected; unsigned long cpu_cr0; /* copy of guest CR0 */ unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */ @@ -198,7 +195,7 @@ enum vmcs_field { VM_EXIT_MSR_LOAD_COUNT = 0x00004010, VM_ENTRY_CONTROLS = 0x00004012, VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, - VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_INTR_INFO = 0x00004016, VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, TPR_THRESHOLD = 0x0000401c, @@ -207,7 +204,7 @@ enum vmcs_field { VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, VM_EXIT_INTR_ERROR_CODE = 0x00004406, - IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_INFO = 0x00004408, IDT_VECTORING_ERROR_CODE = 0x0000440a, VM_EXIT_INSTRUCTION_LEN = 0x0000440c, VMX_INSTRUCTION_INFO = 0x0000440e, diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Thu Aug 02 09:54:18 2007 -0500 @@ -94,11 +94,6 @@ void vmx_vlapic_msr_changed(struct vcpu #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ #define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 -#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ -#define INTR_TYPE_NMI (2 << 8) /* NMI */ -#define INTR_TYPE_HW_EXCEPTION (3 << 8) /* hardware exception */ -#define INTR_TYPE_SW_EXCEPTION (6 << 8) /* software exception */ - /* * Exit Qualifications for MOV for Control Register Access */ @@ -263,8 +258,8 @@ static inline int __vmxon (u64 addr) return rc; } -static inline void __vmx_inject_exception(struct vcpu *v, int trap, int type, - int error_code, int ilen) +static inline void __vmx_inject_exception( + struct vcpu *v, int trap, int type, int error_code) { unsigned long intr_fields; @@ -276,16 +271,13 @@ static inline void __vmx_inject_exceptio * VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State). */ - intr_fields = (INTR_INFO_VALID_MASK | type | trap); + intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap); if ( error_code != VMX_DELIVER_NO_ERROR_CODE ) { __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); intr_fields |= INTR_INFO_DELIVER_CODE_MASK; } - if ( ilen ) - __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen); - - __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields); + __vmwrite(VM_ENTRY_INTR_INFO, intr_fields); if (trap == TRAP_page_fault) HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vmx.cpu_cr2, error_code); @@ -296,29 +288,19 @@ static inline void vmx_inject_hw_excepti static inline void vmx_inject_hw_exception( struct vcpu *v, int trap, int error_code) { - v->arch.hvm_vmx.vector_injected = 1; - __vmx_inject_exception(v, trap, INTR_TYPE_HW_EXCEPTION, error_code, 0); -} - -static inline void vmx_inject_sw_exception( - struct vcpu *v, int trap, int instruction_len) -{ - v->arch.hvm_vmx.vector_injected = 1; - __vmx_inject_exception(v, trap, INTR_TYPE_SW_EXCEPTION, - VMX_DELIVER_NO_ERROR_CODE, - instruction_len); + __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code); } static inline void vmx_inject_extint(struct vcpu *v, int trap) { - __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, - VMX_DELIVER_NO_ERROR_CODE, 0); + __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR, + VMX_DELIVER_NO_ERROR_CODE); } static inline void vmx_inject_nmi(struct vcpu *v) { - __vmx_inject_exception(v, 2, INTR_TYPE_NMI, - VMX_DELIVER_NO_ERROR_CODE, 0); + __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI, + VMX_DELIVER_NO_ERROR_CODE); } #endif /* __ASM_X86_HVM_VMX_VMX_H__ */ diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/public/libelf.h --- a/xen/include/public/libelf.h Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/include/public/libelf.h Thu Aug 02 09:54:18 2007 -0500 @@ -65,6 +65,8 @@ struct elf_binary { /* loaded to */ char *dest; + uint64_t sstart; + uint64_t send; uint64_t pstart; uint64_t pend; uint64_t reloc_offset; @@ -91,33 +93,32 @@ struct elf_binary { #define elf_lsb(elf) (ELFDATA2LSB == (elf)->data) #define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data) -#define elf_uval(elf, str, elem) \ - ((ELFCLASS64 == (elf)->class) \ - ? elf_access_unsigned((elf), (str), \ - offsetof(typeof(*(str)),e64.elem), \ - sizeof((str)->e64.elem)) \ - : elf_access_unsigned((elf), (str), \ - offsetof(typeof(*(str)),e32.elem), \ - sizeof((str)->e32.elem))) - -#define elf_sval(elf, str, elem) \ - ((ELFCLASS64 == (elf)->class) \ - ? elf_access_signed((elf), (str), \ - offsetof(typeof(*(str)),e64.elem), \ - sizeof((str)->e64.elem)) \ - : elf_access_signed((elf), (str), \ - offsetof(typeof(*(str)),e32.elem), \ - sizeof((str)->e32.elem))) - -#define elf_size(elf, str) \ - ((ELFCLASS64 == (elf)->class) \ - ? sizeof((str)->e64) \ - : sizeof((str)->e32)) +#define elf_uval(elf, str, elem) \ + ((ELFCLASS64 == (elf)->class) \ + ? elf_access_unsigned((elf), (str), \ + offsetof(typeof(*(str)),e64.elem), \ + sizeof((str)->e64.elem)) \ + : elf_access_unsigned((elf), (str), \ + offsetof(typeof(*(str)),e32.elem), \ + sizeof((str)->e32.elem))) + +#define elf_sval(elf, str, elem) \ + ((ELFCLASS64 == (elf)->class) \ + ? elf_access_signed((elf), (str), \ + offsetof(typeof(*(str)),e64.elem), \ + sizeof((str)->e64.elem)) \ + : elf_access_signed((elf), (str), \ + offsetof(typeof(*(str)),e32.elem), \ + sizeof((str)->e32.elem))) + +#define elf_size(elf, str) \ + ((ELFCLASS64 == (elf)->class) \ + ? sizeof((str)->e64) : sizeof((str)->e32)) uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr, - uint64_t offset, size_t size); + uint64_t offset, size_t size); int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, - uint64_t offset, size_t size); + uint64_t offset, size_t size); uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr); @@ -149,6 +150,11 @@ int elf_is_elfbinary(const void *image); int elf_is_elfbinary(const void *image); int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr); +unsigned long elf_copy_ehdr(struct elf_binary *elf, void *dest); +unsigned long elf_copy_shdr(struct elf_binary *elf, void *dest); +unsigned long elf_copy_section(struct elf_binary *elf, + const elf_shdr *shdr, void *dest); + /* ------------------------------------------------------------------------ */ /* xc_libelf_loader.c */ @@ -185,8 +191,8 @@ struct xen_elfnote { enum xen_elfnote_type type; const char *name; union { - const char *str; - uint64_t num; + const char *str; + uint64_t num; } data; }; @@ -215,7 +221,8 @@ struct elf_dom_parms { /* calculated */ uint64_t virt_offset; uint64_t virt_kstart; - uint64_t virt_kend; + uint64_t virt_kend; /* end of kernel image */ + uint64_t virt_end; /* end of kernel symtab (== virt_kend if none) */ }; static inline void elf_xen_feature_set(int nr, uint32_t * addr) @@ -228,14 +235,17 @@ static inline int elf_xen_feature_get(in } int elf_xen_parse_features(const char *features, - uint32_t *supported, - uint32_t *required); + uint32_t *supported, + uint32_t *required); int elf_xen_parse_note(struct elf_binary *elf, - struct elf_dom_parms *parms, - const elf_note *note); + struct elf_dom_parms *parms, + const elf_note *note); int elf_xen_parse_guest_info(struct elf_binary *elf, - struct elf_dom_parms *parms); + struct elf_dom_parms *parms); int elf_xen_parse(struct elf_binary *elf, - struct elf_dom_parms *parms); + struct elf_dom_parms *parms); + +int elf_xen_dom_load_binary(struct elf_binary *elf, + struct elf_dom_parms *parms); #endif /* __XC_LIBELF__ */ diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/xen/acpi.h --- a/xen/include/xen/acpi.h Thu Aug 02 09:50:55 2007 -0500 +++ b/xen/include/xen/acpi.h Thu Aug 02 09:54:18 2007 -0500 @@ -383,6 +383,7 @@ int acpi_numa_init (void); int acpi_numa_init (void); int acpi_table_init (void); +int acpi_table_disable(enum acpi_table_id table_id); int acpi_table_parse (enum acpi_table_id id, acpi_table_handler handler); int acpi_get_table_header_early (enum acpi_table_id id, struct acpi_table_header **header); int acpi_table_parse_madt (enum acpi_madt_entry_id id, acpi_madt_entry_handler handler, unsigned int max_entries); @@ -390,6 +391,7 @@ void acpi_table_print (struct acpi_table void acpi_table_print (struct acpi_table_header *header, unsigned long phys_addr); void acpi_table_print_madt_entry (acpi_table_entry_header *madt); void acpi_table_print_srat_entry (acpi_table_entry_header *srat); +uint8_t generate_acpi_checksum(void *tbl, unsigned long len); /* the following four functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); @@ -534,6 +536,5 @@ static inline int acpi_get_pxm(acpi_hand #endif extern int pnpacpi_disabled; -extern unsigned char acpi_rsdp_rev; #endif /*_LINUX_ACPI_H*/ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |