[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1183400366 21600 # Node ID d146700adf714cdc13f924ab0de1dc895b6927f8 # Parent 443ce7edad0e8a3a640960890a72ce530887b38e # Parent 182446677b6b56d58523050a6225a73d87a86ab7 merge with xen-unstable.hg --- buildconfigs/mk.linux-2.6-xen | 5 tools/Makefile | 1 tools/console/daemon/io.c | 38 +++-- tools/examples/init.d/xendomains | 6 tools/examples/network-bridge | 12 + tools/firmware/vmxassist/head.S | 76 ----------- tools/firmware/vmxassist/machine.h | 15 -- tools/firmware/vmxassist/setup.c | 58 --------- tools/firmware/vmxassist/vm86.c | 75 +++++++---- tools/ioemu/target-i386-dm/exec-dm.c | 2 tools/libxc/xc_domain_restore.c | 11 - tools/libxc/xc_misc.c | 28 ++++ tools/libxc/xenctrl.h | 4 tools/misc/xen-python-path | 9 + tools/python/xen/util/auxbin.py | 9 + tools/python/xen/xend/XendCheckpoint.py | 2 tools/python/xen/xend/XendConfig.py | 105 +++++++++++++--- tools/python/xen/xend/XendDomain.py | 4 tools/python/xen/xend/server/irqif.py | 2 tools/python/xen/xend/server/pciif.py | 3 tools/python/xen/xm/main.py | 14 +- xen/acm/acm_core.c | 2 xen/arch/x86/Makefile | 1 xen/arch/x86/boot/edd.S | 24 +-- xen/arch/x86/boot/x86_32.S | 21 ++- xen/arch/x86/boot/x86_64.S | 15 ++ xen/arch/x86/clear_page.S | 26 ++++ xen/arch/x86/domain.c | 9 - xen/arch/x86/hvm/hvm.c | 16 -- xen/arch/x86/hvm/io.c | 1 xen/arch/x86/hvm/platform.c | 3 xen/arch/x86/hvm/svm/svm.c | 56 +++++++- xen/arch/x86/hvm/svm/vmcb.c | 8 - xen/arch/x86/hvm/vmx/vmcs.c | 5 xen/arch/x86/hvm/vmx/vmx.c | 154 ++++++++++++++---------- xen/arch/x86/io_apic.c | 4 xen/arch/x86/mm.c | 3 xen/arch/x86/platform_hypercall.c | 96 ++++++++++++++ xen/arch/x86/setup.c | 34 ++++- xen/arch/x86/traps.c | 13 +- xen/arch/x86/x86_32/entry.S | 173 +++++++++++++-------------- xen/arch/x86/x86_32/supervisor_mode_kernel.S | 27 ++-- xen/arch/x86/x86_32/traps.c | 11 - xen/arch/x86/x86_64/Makefile | 2 xen/arch/x86/x86_64/compat/entry.S | 10 - xen/arch/x86/x86_64/entry.S | 68 +++++----- xen/arch/x86/x86_64/mm.c | 9 - xen/arch/x86/x86_64/traps.c | 10 + xen/arch/x86/x86_emulate.c | 1 xen/common/sysctl.c | 33 +++++ xen/include/asm-x86/edd.h | 18 ++ xen/include/asm-x86/hvm/hvm.h | 14 ++ xen/include/asm-x86/hvm/svm/emulate.h | 1 xen/include/asm-x86/hvm/svm/vmcb.h | 8 - xen/include/asm-x86/hvm/trace.h | 1 xen/include/asm-x86/hvm/vmx/vmcs.h | 2 xen/include/asm-x86/hvm/vmx/vmx.h | 7 - xen/include/asm-x86/page.h | 13 +- xen/include/asm-x86/processor.h | 4 xen/include/asm-x86/x86_32/asm_defns.h | 76 +++++++---- xen/include/public/platform.h | 40 ++++++ xen/include/public/sysctl.h | 22 ++- xen/include/public/trace.h | 1 63 files changed, 961 insertions(+), 560 deletions(-) diff -r 443ce7edad0e -r d146700adf71 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Mon Jul 02 10:31:03 2007 -0600 +++ b/buildconfigs/mk.linux-2.6-xen Mon Jul 02 12:19:26 2007 -0600 @@ -27,6 +27,11 @@ _build: build _build: build include buildconfigs/src.$(XEN_LINUX_SOURCE) + +# Default to allowing interface mismatch +ifndef XEN_LINUX_ALLOW_INTERFACE_MISMATCH +XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y +endif # The real action starts here! .PHONY: build diff -r 443ce7edad0e -r d146700adf71 tools/Makefile --- a/tools/Makefile Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/Makefile Mon Jul 02 12:19:26 2007 -0600 @@ -43,6 +43,7 @@ install: check $(MAKE) ioemuinstall $(INSTALL_DIR) $(DESTDIR)/var/xen/dump $(INSTALL_DIR) $(DESTDIR)/var/log/xen + $(INSTALL_DIR) $(DESTDIR)/var/lib/xen .PHONY: clean clean: check_clean diff -r 443ce7edad0e -r d146700adf71 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/console/daemon/io.c Mon Jul 02 12:19:26 2007 -0600 @@ -764,27 +764,31 @@ void handle_io(void) /* XXX I wish we didn't have to busy wait for hypervisor logs * but there's no obvious way to get event channel notifications * for new HV log data as we can with guest */ - ret = select(max_fd + 1, &readfds, &writefds, 0, log_hv_fd != -1 ? &timeout : NULL); - + ret = select(max_fd + 1, &readfds, &writefds, 0, + log_hv_fd != -1 ? &timeout : NULL); + + if (log_reload) { + handle_log_reload(); + log_reload = 0; + } + + /* Abort if select failed, except for EINTR cases + which indicate a possible log reload */ if (ret == -1) { - if (errno == EINTR) { - if (log_reload) { - handle_log_reload(); - log_reload = 0; - } + if (errno == EINTR) continue; - } dolog(LOG_ERR, "Failure in select: %d (%s)", errno, strerror(errno)); break; } - /* Check for timeout */ - if (ret == 0) { - if (log_hv_fd != -1) - handle_hv_logs(); + /* Always process HV logs even if not a timeout */ + if (log_hv_fd != -1) + handle_hv_logs(); + + /* Must not check returned FDSET if it was a timeout */ + if (ret == 0) continue; - } if (FD_ISSET(xs_fileno(xs), &readfds)) handle_xs(); @@ -806,10 +810,14 @@ void handle_io(void) } } - if (log_hv_fd != -1) + if (log_hv_fd != -1) { close(log_hv_fd); - if (xc_handle != -1) + log_hv_fd = -1; + } + if (xc_handle != -1) { xc_interface_close(xc_handle); + xc_handle = -1; + } } /* diff -r 443ce7edad0e -r d146700adf71 tools/examples/init.d/xendomains --- a/tools/examples/init.d/xendomains Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/examples/init.d/xendomains Mon Jul 02 12:19:26 2007 -0600 @@ -221,11 +221,12 @@ start() if [ "$XENDOMAINS_RESTORE" = "true" ] && contains_something "$XENDOMAINS_SAVE" then - mkdir -p $(dirname "$LOCKFILE") + XENDOMAINS_SAVED=`/bin/ls $XENDOMAINS_SAVE/* | grep -v 'lost+found'` + mkdir -p $(dirname "$LOCKFILE") touch $LOCKFILE echo -n "Restoring Xen domains:" saved_domains=`ls $XENDOMAINS_SAVE` - for dom in $XENDOMAINS_SAVE/*; do + for dom in $XENDOMAINS_SAVED; do echo -n " ${dom##*/}" xm restore $dom if [ $? -ne 0 ]; then @@ -259,6 +260,7 @@ start() if [ $? -eq 0 ] || is_running $dom; then echo -n "(skip)" else + echo "(booting)" xm create --quiet --defconfig $dom if [ $? -ne 0 ]; then rc_failed $? diff -r 443ce7edad0e -r d146700adf71 tools/examples/network-bridge --- a/tools/examples/network-bridge Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/examples/network-bridge Mon Jul 02 12:19:26 2007 -0600 @@ -172,9 +172,21 @@ show_status () { echo '============================================================' } +is_network_root () { + local rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $3; }}' /etc/mtab) + local rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}' /etc/mtab) + + [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev" ]] && return 0 || return 1 +} + op_start () { if [ "${bridge}" = "null" ] ; then return + fi + + if is_network_root ; then + [ -x /usr/bin/logger ] && /usr/bin/logger "network-bridge: bridging not supported on network root; not starting" + return fi if link_exists "$pdev"; then diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/head.S --- a/tools/firmware/vmxassist/head.S Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/firmware/vmxassist/head.S Mon Jul 02 12:19:26 2007 -0600 @@ -25,80 +25,12 @@ * switch happens to the environment below. The magic indicates * that this is a valid context. */ -#ifdef TEST - .byte 0x55, 0xaa - .byte 0x80 - .code16 - jmp _start16 -#else jmp _start -#endif .align 8 .long VMXASSIST_MAGIC .long newctx /* new context */ .long oldctx /* old context */ - -#ifdef TEST -/* - * We are running in 16-bit. Get into the protected mode as soon as - * possible. We use our own (minimal) GDT to get started. - * - * ROM is a misnomer as this code isn't really rommable (although it - * only requires a few changes) but it does live in a BIOS ROM segment. - * This code allows me to debug vmxassists under (a modified version of) - * Bochs and load it as a "optromimage1". - */ - .code16 - .globl _start16 -_start16: - cli - - /* load our own global descriptor table */ - data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR) - - /* go to protected mode */ - movl %cr0, %eax - orl $(CR0_PE), %eax - movl %eax, %cr0 - data32 ljmp $0x08, $1f - - .align 32 - .globl rom_gdt -rom_gdt: - .word 0, 0 /* 0x00: reserved */ - .byte 0, 0, 0, 0 - - .word 0xFFFF, 0 /* 0x08: CS 32-bit */ - .byte 0, 0x9A, 0xCF, 0 - - .word 0xFFFF, 0 /* 0x10: CS 32-bit */ - .byte 0, 0x92, 0xCF, 0 -rom_gdt_end: - - .align 4 - .globl rom_gdtr -rom_gdtr: - .word rom_gdt_end - rom_gdt - 1 - .long rom_gdt - - .code32 -1: - /* welcome to the 32-bit world */ - movw $0x10, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - movw %ax, %fs - movw %ax, %gs - - /* enable Bochs debug facilities */ - movw $0x8A00, %dx - movw $0x8A00, %ax - outw %ax, (%dx) - - jmp _start -#endif /* TEST */ /* * This is the real start. Control was transfered to this point @@ -111,9 +43,6 @@ _start: cli /* save register parameters to C land */ -#ifdef TEST - xorl %edx, %edx -#endif /* clear bss */ cld @@ -145,11 +74,6 @@ halt: halt: push $halt_msg call printf -#ifdef TEST - movw $0x8A00, %dx - movw $0x8AE0, %ax - outw %ax, (%dx) -#endif cli jmp . diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/machine.h --- a/tools/firmware/vmxassist/machine.h Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/firmware/vmxassist/machine.h Mon Jul 02 12:19:26 2007 -0600 @@ -55,13 +55,6 @@ #define PGMASK (~(PGSIZE - 1)) /* page mask */ #define LPGSIZE (1 << LOG_PDSIZE) /* large page size */ #define LPGMASK (~(LPGSIZE - 1)) /* large page mask */ - -#ifdef TEST -#define PTE_P (1 << 0) /* Present */ -#define PTE_RW (1 << 1) /* Read/Write */ -#define PTE_US (1 << 2) /* User/Supervisor */ -#define PTE_PS (1 << 7) /* Page Size */ -#endif /* Programmable Interrupt Contoller (PIC) defines */ #define PIC_MASTER 0x20 @@ -195,14 +188,6 @@ set_cr4(unsigned value) __asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value)); } -#ifdef TEST -static inline void -breakpoint(void) -{ - outw(0x8A00, 0x8AE0); -} -#endif /* TEST */ - #endif /* __ASSEMBLY__ */ #endif /* __MACHINE_H__ */ diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/setup.c --- a/tools/firmware/vmxassist/setup.c Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/firmware/vmxassist/setup.c Mon Jul 02 12:19:26 2007 -0600 @@ -46,19 +46,6 @@ unsigned long long idt[NR_TRAPS] __attri unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32))); struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt }; - -#ifdef TEST -unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 }; - -struct e820entry e820map[] = { - { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM }, - { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED }, - { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED }, - { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM }, - { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS }, - { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI }, -}; -#endif /* TEST */ struct vmx_assist_context oldctx; struct vmx_assist_context newctx; @@ -84,38 +71,11 @@ banner(void) (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10; memory_size += 0x400 << 10; /* + 1MB */ -#ifdef TEST - /* Create an SMAP for our debug environment */ - e820map[4].size = memory_size - e820map[4].addr - PGSIZE; - e820map[5].addr = memory_size - PGSIZE; - e820map[6].addr = memory_size; - e820map[7].addr += memory_size; - - *HVM_E820_NR = sizeof(e820map)/sizeof(e820map[0]); - memcpy(HVM_E820, e820map, sizeof(e820map)); -#endif - printf("Memory size %ld MB\n", memory_size >> 20); printf("E820 map:\n"); print_e820_map(HVM_E820, *HVM_E820_NR); printf("\n"); } - -#ifdef TEST -void -setup_paging(void) -{ - unsigned long i; - - if (((unsigned)pgd & ~PGMASK) != 0) - panic("PGD not page aligned"); - set_cr4(get_cr4() | CR4_PSE); - for (i = 0; i < NR_PGD; i++) - pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P; - set_cr3((unsigned) pgd); - set_cr0(get_cr0() | (CR0_PE|CR0_PG)); -} -#endif /* TEST */ void setup_gdt(void) @@ -211,11 +171,7 @@ enter_real_mode(struct regs *regs) regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000; if (booting_cpu == 0) { regs->cs = 0xF000; /* ROM BIOS POST entry point */ -#ifdef TEST - regs->eip = 0xFFE0; -#else regs->eip = 0xFFF0; -#endif } else { regs->cs = booting_vector << 8; /* AP entry point */ regs->eip = 0; @@ -242,9 +198,10 @@ enter_real_mode(struct regs *regs) } /* go from protected to real mode */ - regs->eflags |= EFLAGS_VM; set_mode(regs, VM86_PROTECTED_TO_REAL); emulate(regs); + if (mode != VM86_REAL) + panic("failed to emulate between clear PE and long jump.\n"); } /* @@ -269,13 +226,8 @@ setup_ctx(void) * more natural to enable CR0.PE to cause a world switch to * protected mode rather than disabling it. */ -#ifdef TEST - c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE; - c->cr3 = (unsigned long) pgd; -#else c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE; c->cr3 = 0; -#endif c->cr4 = get_cr4(); c->idtr_limit = sizeof(idt)-1; @@ -369,16 +321,10 @@ main(void) if (booting_cpu == 0) banner(); -#ifdef TEST - setup_paging(); -#endif - setup_gdt(); setup_idt(); -#ifndef TEST set_cr4(get_cr4() | CR4_VME); -#endif setup_ctx(); diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/firmware/vmxassist/vm86.c Mon Jul 02 12:19:26 2007 -0600 @@ -561,11 +561,7 @@ lmsw(struct regs *regs, unsigned prefix, unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax; TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax)); -#ifndef TEST oldctx.cr0 = cr0 | CR0_PE | CR0_NE; -#else - oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG; -#endif if (cr0 & CR0_PE) set_mode(regs, VM86_REAL_TO_PROTECTED); @@ -584,8 +580,13 @@ movr(struct regs *regs, unsigned prefix, unsigned addr = operand(prefix, regs, modrm); unsigned val, r = (modrm >> 3) & 7; - if ((modrm & 0xC0) == 0xC0) /* no registers */ - return 0; + if ((modrm & 0xC0) == 0xC0) { + /* + * Emulate all guest instructions in protected to real mode. + */ + if (mode != VM86_PROTECTED_TO_REAL) + return 0; + } switch (opc) { case 0x88: /* addr32 mov r8, r/m8 */ @@ -656,13 +657,8 @@ movcr(struct regs *regs, unsigned prefix TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr)); switch (cr) { case 0: -#ifndef TEST setreg32(regs, modrm, oldctx.cr0 & ~(CR0_PE | CR0_NE)); -#else - setreg32(regs, modrm, - oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG)); -#endif break; case 2: setreg32(regs, modrm, get_cr2()); @@ -680,9 +676,6 @@ movcr(struct regs *regs, unsigned prefix switch (cr) { case 0: oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE); -#ifdef TEST - oldctx.cr0 |= CR0_PG; -#endif if (getreg32(regs, modrm) & CR0_PE) set_mode(regs, VM86_REAL_TO_PROTECTED); else @@ -818,8 +811,13 @@ mov_to_seg(struct regs *regs, unsigned p { unsigned modrm = fetch8(regs); - /* Only need to emulate segment loads in real->protected mode. */ - if (mode != VM86_REAL_TO_PROTECTED) + /* + * Emulate segment loads in: + * 1) real->protected mode. + * 2) protected->real mode. + */ + if ((mode != VM86_REAL_TO_PROTECTED) && + (mode != VM86_PROTECTED_TO_REAL)) return 0; /* Register source only. */ @@ -829,6 +827,8 @@ mov_to_seg(struct regs *regs, unsigned p switch ((modrm & 0x38) >> 3) { case 0: /* es */ regs->ves = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; saved_rm_regs.ves = 0; oldctx.es_sel = regs->ves; return 1; @@ -837,21 +837,29 @@ mov_to_seg(struct regs *regs, unsigned p case 2: /* ss */ regs->uss = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; saved_rm_regs.uss = 0; oldctx.ss_sel = regs->uss; return 1; case 3: /* ds */ regs->vds = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; saved_rm_regs.vds = 0; oldctx.ds_sel = regs->vds; return 1; case 4: /* fs */ regs->vfs = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; saved_rm_regs.vfs = 0; oldctx.fs_sel = regs->vfs; return 1; case 5: /* gs */ regs->vgs = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; saved_rm_regs.vgs = 0; oldctx.gs_sel = regs->vgs; return 1; @@ -1067,7 +1075,8 @@ set_mode(struct regs *regs, enum vm86_mo } mode = newmode; - TRACE((regs, 0, states[mode])); + if (mode != VM86_PROTECTED) + TRACE((regs, 0, states[mode])); } static void @@ -1086,7 +1095,7 @@ jmpl(struct regs *regs, int prefix) if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */ set_mode(regs, VM86_PROTECTED); - else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */ + else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */ set_mode(regs, VM86_REAL); else panic("jmpl"); @@ -1280,6 +1289,12 @@ opcode(struct regs *regs) unsigned eip = regs->eip; unsigned opc, modrm, disp; unsigned prefix = 0; + + if (mode == VM86_PROTECTED_TO_REAL && + oldctx.cs_arbytes.fields.default_ops_size) { + prefix |= DATA32; + prefix |= ADDR32; + } for (;;) { switch ((opc = fetch8(regs))) { @@ -1391,17 +1406,29 @@ opcode(struct regs *regs) continue; case 0x66: - TRACE((regs, regs->eip - eip, "data32")); - prefix |= DATA32; + if (mode == VM86_PROTECTED_TO_REAL && + oldctx.cs_arbytes.fields.default_ops_size) { + TRACE((regs, regs->eip - eip, "data16")); + prefix &= ~DATA32; + } else { + TRACE((regs, regs->eip - eip, "data32")); + prefix |= DATA32; + } continue; case 0x67: - TRACE((regs, regs->eip - eip, "addr32")); - prefix |= ADDR32; + if (mode == VM86_PROTECTED_TO_REAL && + oldctx.cs_arbytes.fields.default_ops_size) { + TRACE((regs, regs->eip - eip, "addr16")); + prefix &= ~ADDR32; + } else { + TRACE((regs, regs->eip - eip, "addr32")); + prefix |= ADDR32; + } continue; - case 0x88: /* addr32 mov r8, r/m8 */ - case 0x8A: /* addr32 mov r/m8, r8 */ + case 0x88: /* mov r8, r/m8 */ + case 0x8A: /* mov r/m8, r8 */ if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED) goto invalid; if ((prefix & ADDR32) == 0) diff -r 443ce7edad0e -r d146700adf71 tools/ioemu/target-i386-dm/exec-dm.c --- a/tools/ioemu/target-i386-dm/exec-dm.c Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/ioemu/target-i386-dm/exec-dm.c Mon Jul 02 12:19:26 2007 -0600 @@ -448,7 +448,7 @@ static void memcpy_words(void *dst, void { asm ( " movl %%edx,%%ecx \n" -#ifdef __x86_64 +#ifdef __x86_64__ " shrl $3,%%ecx \n" " andl $7,%%edx \n" " rep movsq \n" diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/libxc/xc_domain_restore.c Mon Jul 02 12:19:26 2007 -0600 @@ -465,7 +465,7 @@ int xc_domain_restore(int xc_handle, int if ( j == 0 ) break; /* our work here is done */ - if ( j > MAX_BATCH_SIZE ) + if ( (j > MAX_BATCH_SIZE) || (j < 0) ) { ERROR("Max batch size exceeded. Giving up."); goto out; @@ -903,13 +903,14 @@ int xc_domain_restore(int xc_handle, int /* Get the list of PFNs that are not in the psuedo-phys map */ { - unsigned int count; + unsigned int count = 0; unsigned long *pfntab; int nr_frees, rc; - if ( !read_exact(io_fd, &count, sizeof(count)) ) - { - ERROR("Error when reading pfn count"); + if ( !read_exact(io_fd, &count, sizeof(count)) || + (count > (1U << 28)) ) /* up to 1TB of address space */ + { + ERROR("Error when reading pfn count (= %u)", count); goto out; } diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/libxc/xc_misc.c Mon Jul 02 12:19:26 2007 -0600 @@ -101,13 +101,37 @@ int xc_perfc_control(int xc_handle, rc = do_sysctl(xc_handle, &sysctl); - if (nbr_desc) + if ( nbr_desc ) *nbr_desc = sysctl.u.perfc_op.nr_counters; - if (nbr_val) + if ( nbr_val ) *nbr_val = sysctl.u.perfc_op.nr_vals; return rc; } + +int xc_getcpuinfo(int xc_handle, int max_cpus, + xc_cpuinfo_t *info, int *nr_cpus) +{ + int rc; + DECLARE_SYSCTL; + + sysctl.cmd = XEN_SYSCTL_getcpuinfo; + sysctl.u.getcpuinfo.max_cpus = max_cpus; + set_xen_guest_handle(sysctl.u.getcpuinfo.info, info); + + if ( (rc = lock_pages(info, max_cpus*sizeof(*info))) != 0 ) + return rc; + + rc = do_sysctl(xc_handle, &sysctl); + + unlock_pages(info, max_cpus*sizeof(*info)); + + if ( nr_cpus ) + *nr_cpus = sysctl.u.getcpuinfo.nr_cpus; + + return rc; +} + int xc_hvm_set_pci_intx_level( int xc_handle, domid_t dom, diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/libxc/xenctrl.h Mon Jul 02 12:19:26 2007 -0600 @@ -491,6 +491,10 @@ int xc_sched_id(int xc_handle, int xc_sched_id(int xc_handle, int *sched_id); +typedef xen_sysctl_cpuinfo_t xc_cpuinfo_t; +int xc_getcpuinfo(int xc_handle, int max_cpus, + xc_cpuinfo_t *info, int *nr_cpus); + int xc_domain_setmaxmem(int xc_handle, uint32_t domid, unsigned int max_memkb); diff -r 443ce7edad0e -r d146700adf71 tools/misc/xen-python-path --- a/tools/misc/xen-python-path Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/misc/xen-python-path Mon Jul 02 12:19:26 2007 -0600 @@ -28,8 +28,13 @@ import os.path import os.path import sys -for p in ['python%s' % sys.version[:3], 'python']: - for l in ['/usr/lib64', '/usr/lib']: +usr = os.path.dirname(os.path.dirname(sys.argv[0])) +list = [ os.path.join(usr,'lib64') ] +list += [ os.path.join(usr,'lib') ] +list += ['/usr/lib64', '/usr/lib'] + +for l in list: + for p in ['python%s' % sys.version[:3], 'python']: for k in ['', 'site-packages/']: d = os.path.join(l, p, k) if os.path.exists(os.path.join(d, AUXBIN)): diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/util/auxbin.py --- a/tools/python/xen/util/auxbin.py Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/python/xen/util/auxbin.py Mon Jul 02 12:19:26 2007 -0600 @@ -27,6 +27,7 @@ LIB_64_ARCHS = [ 'x86_64', 's390x', 'spa import os import os.path +import sys def execute(exe, args = None): @@ -47,6 +48,14 @@ def path(): def libpath(): machine = os.uname()[4] + if sys.argv[0] != '-c': + prefix = os.path.dirname(os.path.dirname(sys.argv[0])) + path = os.path.join(prefix, os.path.basename(LIB_64)) + if machine in LIB_64_ARCHS and os.path.exists(path): + return path + path = os.path.join(prefix, os.path.basename(LIB_32)) + if os.path.exists(path): + return path if machine in LIB_64_ARCHS and os.path.exists(LIB_64): return LIB_64 else: diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/python/xen/xend/XendCheckpoint.py Mon Jul 02 12:19:26 2007 -0600 @@ -148,6 +148,8 @@ def save(fd, dominfo, network, live, dst except: log.exception("Failed to reset the migrating domain's name") + raise exn + def restore(xd, fd, dominfo = None, paused = False): signature = read_exact(fd, len(SIGNATURE), diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/python/xen/xend/XendConfig.py Mon Jul 02 12:19:26 2007 -0600 @@ -28,6 +28,7 @@ from xen.xend.PrettyPrint import prettyp from xen.xend.PrettyPrint import prettyprintstring from xen.xend.XendConstants import DOM_STATE_HALTED from xen.xend.server.netif import randomMAC +from xen.util.blkif import blkdev_name_to_number log = logging.getLogger("xend.XendConfig") log.setLevel(logging.WARN) @@ -934,6 +935,62 @@ class XendConfig(dict): return sxpr + def _blkdev_name_to_number(self, dev): + if 'ioemu:' in dev: + _, dev = dev.split(':', 1) + try: + dev, _ = dev.split(':', 1) + except ValueError: + pass + + try: + devid = int(dev) + except ValueError: + # devid is not a number but a string containing either device + # name (e.g. xvda) or device_type/device_id (e.g. vbd/51728) + dev2 = type(dev) is str and dev.split('/')[-1] or None + if dev2 == None: + log.debug("Could not check the device %s", dev) + return None + try: + devid = int(dev2) + except ValueError: + devid = blkdev_name_to_number(dev2) + if devid == None: + log.debug("The device %s is not device name", dev2) + return None + return devid + + def device_duplicate_check(self, dev_type, dev_info, defined_config): + defined_devices_sxpr = self.all_devices_sxpr(target = defined_config) + + if dev_type == 'vbd': + dev_uname = dev_info.get('uname') + blkdev_name = dev_info.get('dev') + devid = self._blkdev_name_to_number(blkdev_name) + if devid == None: + return + + for o_dev_type, o_dev_info in defined_devices_sxpr: + if dev_type == o_dev_type: + if dev_uname == sxp.child_value(o_dev_info, 'uname'): + raise XendConfigError('The uname "%s" is already defined' % + dev_uname) + o_blkdev_name = sxp.child_value(o_dev_info, 'dev') + o_devid = self._blkdev_name_to_number(o_blkdev_name) + if o_devid != None and devid == o_devid: + raise XendConfigError('The device "%s" is already defined' % + blkdev_name) + + elif dev_type == 'vif': + dev_mac = dev_info.get('mac') + + for o_dev_type, o_dev_info in defined_devices_sxpr: + if dev_type == o_dev_type: + if dev_mac == sxp.child_value(o_dev_info, 'mac'): + raise XendConfigError('The mac "%s" is already defined' % + dev_mac) + def device_add(self, dev_type, cfg_sxp = None, cfg_xenapi = None, target = None): """Add a device configuration in SXP format or XenAPI struct format. @@ -997,6 +1054,8 @@ class XendConfig(dict): if dev_type == 'vif': if not dev_info.get('mac'): dev_info['mac'] = randomMAC() + + self.device_duplicate_check(dev_type, dev_info, target) # create uuid if it doesn't exist dev_uuid = dev_info.get('uuid', None) @@ -1275,15 +1334,19 @@ class XendConfig(dict): return False - def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None): + def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None, target = None): """Get Device SXPR by either giving the device UUID or (type, config). @rtype: list of lists @return: device config sxpr """ sxpr = [] - if dev_uuid != None and dev_uuid in self['devices']: - dev_type, dev_info = self['devices'][dev_uuid] + + if target == None: + target = self + + if dev_uuid != None and dev_uuid in target['devices']: + dev_type, dev_info = target['devices'][dev_uuid] if dev_type == None or dev_info == None: raise XendConfigError("Required either UUID or device type and " @@ -1300,8 +1363,12 @@ class XendConfig(dict): return sxpr - def ordered_device_refs(self): + def ordered_device_refs(self, target = None): result = [] + + if target == None: + target = self + # vkbd devices *must* be before vfb devices, otherwise # there is a race condition when setting up devices # where the daemon spawned for the vfb may write stuff @@ -1309,27 +1376,30 @@ class XendConfig(dict): # setup permissions on the vkbd backend path. This race # results in domain creation failing with 'device already # connected' messages - result.extend([u for u in self['devices'].keys() if self['devices'][u][0] == 'vkbd']) - - result.extend(self['console_refs'] + - self['vbd_refs'] + - self['vif_refs'] + - self['vtpm_refs']) - - result.extend([u for u in self['devices'].keys() if u not in result]) + result.extend([u for u in target['devices'].keys() if target['devices'][u][0] == 'vkbd']) + + result.extend(target.get('console_refs', []) + + target.get('vbd_refs', []) + + target.get('vif_refs', []) + + target.get('vtpm_refs', [])) + + result.extend([u for u in target['devices'].keys() if u not in result]) return result - def all_devices_sxpr(self): + def all_devices_sxpr(self, target = None): """Returns the SXPR for all devices in the current configuration.""" sxprs = [] pci_devs = [] - if 'devices' not in self: + if target == None: + target = self + + if 'devices' not in target: return sxprs - ordered_refs = self.ordered_device_refs() + ordered_refs = self.ordered_device_refs(target = target) for dev_uuid in ordered_refs: - dev_type, dev_info = self['devices'][dev_uuid] + dev_type, dev_info = target['devices'][dev_uuid] if dev_type == 'pci': # special case for pci devices sxpr = [['uuid', dev_info['uuid']]] for pci_dev_info in dev_info['devs']: @@ -1340,7 +1410,8 @@ class XendConfig(dict): sxprs.append((dev_type, sxpr)) else: sxpr = self.device_sxpr(dev_type = dev_type, - dev_info = dev_info) + dev_info = dev_info, + target = target) sxprs.append((dev_type, sxpr)) return sxprs diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/python/xen/xend/XendDomain.py Mon Jul 02 12:19:26 2007 -0600 @@ -1262,8 +1262,10 @@ class XendDomain: try: XendCheckpoint.save(fd, dominfo, False, False, dst, checkpoint=checkpoint) - finally: + except Exception, e: os.close(fd) + raise e + os.close(fd) except OSError, ex: raise XendError("can't write guest state file %s: %s" % (dst, ex[1])) diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/irqif.py --- a/tools/python/xen/xend/server/irqif.py Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/python/xen/xend/server/irqif.py Mon Jul 02 12:19:26 2007 -0600 @@ -61,7 +61,7 @@ class IRQController(DevController): pirq = get_param('irq') - rc = xc.domain_irq_permission(dom = self.getDomid(), + rc = xc.domain_irq_permission(domid = self.getDomid(), pirq = pirq, allow_access = True) diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/python/xen/xend/server/pciif.py Mon Jul 02 12:19:26 2007 -0600 @@ -185,3 +185,6 @@ class PciController(DevController): def waitForBackend(self,devid): return (0, "ok - no hotplug") + + def migrate(self, config, network, dst, step, domName): + raise XendError('Migration not permitted with assigned PCI device.') diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Mon Jul 02 10:31:03 2007 -0600 +++ b/tools/python/xen/xm/main.py Mon Jul 02 12:19:26 2007 -0600 @@ -2168,9 +2168,7 @@ def xm_network_attach(args): server.xend.domain.device_create(dom, vif) -def detach(args, command, deviceClass): - arg_check(args, command, 2, 3) - +def detach(args, deviceClass): dom = args[0] dev = args[1] try: @@ -2204,16 +2202,17 @@ def xm_block_detach(args): raise OptionError("Cannot find device '%s' in domain '%s'" % (dev,dom)) else: + arg_check(args, 'block-detach', 2, 3) try: - detach(args, 'block-detach', 'vbd') + detach(args, 'vbd') return except: pass - detach(args, 'block-detach', 'tap') + detach(args, 'tap') def xm_network_detach(args): if serverType == SERVER_XEN_API: - arg_check(args, "xm_block_detach", 2, 3) + arg_check(args, "xm_network_detach", 2, 3) dom = args[0] devid = args[1] vif_refs = server.xenapi.VM.get_VIFs(get_single_vm(dom)) @@ -2227,7 +2226,8 @@ def xm_network_detach(args): else: print "Cannot find device '%s' in domain '%s'" % (devid,dom) else: - detach(args, 'network-detach', 'vif') + arg_check(args, 'network-detach', 2, 3) + detach(args, 'vif') def xm_vnet_list(args): diff -r 443ce7edad0e -r d146700adf71 xen/acm/acm_core.c --- a/xen/acm/acm_core.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/acm/acm_core.c Mon Jul 02 12:19:26 2007 -0600 @@ -89,7 +89,7 @@ static void __init set_dom0_ssidref(cons if (hi < ACM_MAX_NUM_TYPES && hi >= 1) dom0_ste_ssidref = hi; for (i = 0; i < sizeof(polname); i++) { - polname[i] = c[7+i]; + polname[i] = c[5+i]; if (polname[i] == '\0' || polname[i] == '\t' || polname[i] == '\n' || polname[i] == ' ' || polname[i] == ':') { diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/Makefile Mon Jul 02 12:19:26 2007 -0600 @@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64 obj-y += apic.o obj-y += bitops.o +obj-y += clear_page.o obj-y += compat.o obj-y += delay.o obj-y += dmi_scan.o diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/edd.S --- a/xen/arch/x86/boot/edd.S Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/boot/edd.S Mon Jul 02 12:19:26 2007 -0600 @@ -24,7 +24,7 @@ /* Maximum number of EDD information structures at boot_edd_info. */ #define EDD_INFO_MAX 6 -/* Maximum number of MBR signatures at boot_edd_signature. */ +/* Maximum number of MBR signatures at boot_mbr_signature. */ #define EDD_MBR_SIG_MAX 16 /* Size of components of EDD information structure. */ @@ -40,10 +40,8 @@ get_edd: # Read the first sector of each BIOS disk device and store the 4-byte signature edd_mbr_sig_start: movb $0x80, %dl # from device 80 - movw $bootsym(boot_edd_signature),%bx # store buffer ptr in bx + movw $bootsym(boot_mbr_signature),%bx # store buffer ptr in bx edd_mbr_sig_read: - movl $0xFFFFFFFF, %eax - movl %eax, (%bx) # assume failure pushw %bx movb $0x02, %ah # 0x02 Read Sectors movb $1, %al # read 1 sector @@ -64,11 +62,12 @@ edd_mbr_sig_read: cmpb $0, %ah # some BIOSes do not set CF jne edd_mbr_sig_done # on failure, we're done. movl bootsym(boot_edd_info)+EDD_MBR_SIG_OFFSET,%eax - movl %eax, (%bx) # store signature from MBR - incb bootsym(boot_edd_signature_nr) # note that we stored something + movb %dl, (%bx) # store BIOS drive number + movl %eax, 4(%bx) # store signature from MBR + incb bootsym(boot_mbr_signature_nr) # note that we stored something incb %dl # increment to next device - addw $4, %bx # increment sig buffer ptr - cmpb $EDD_MBR_SIG_MAX,bootsym(boot_edd_signature_nr) + addw $8, %bx # increment sig buffer ptr + cmpb $EDD_MBR_SIG_MAX,bootsym(boot_mbr_signature_nr) jb edd_mbr_sig_read edd_mbr_sig_done: @@ -150,12 +149,13 @@ opt_edd: opt_edd: .byte 0 # edd=on/off/skipmbr -.globl boot_edd_info_nr, boot_edd_signature_nr +.globl boot_edd_info, boot_edd_info_nr +.globl boot_mbr_signature, boot_mbr_signature_nr boot_edd_info_nr: .byte 0 -boot_edd_signature_nr: +boot_mbr_signature_nr: .byte 0 -boot_edd_signature: - .fill EDD_MBR_SIG_MAX*4,1,0 +boot_mbr_signature: + .fill EDD_MBR_SIG_MAX*8,1,0 boot_edd_info: .fill 512,1,0 # big enough for a disc sector diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_32.S --- a/xen/arch/x86/boot/x86_32.S Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/boot/x86_32.S Mon Jul 02 12:19:26 2007 -0600 @@ -36,15 +36,29 @@ 1: mov %eax,(%edi) /* This is the default interrupt handler. */ int_msg: - .asciz "Unknown interrupt\n" + .asciz "Unknown interrupt (cr2=%08x)\n" +hex_msg: + .asciz " %08x" ALIGN ignore_int: + pusha cld mov $(__HYPERVISOR_DS),%eax mov %eax,%ds mov %eax,%es + mov %cr2,%eax + push %eax pushl $int_msg call printk + add $8,%esp + mov %esp,%ebp +0: pushl (%ebp) + add $4,%ebp + pushl $hex_msg + call printk + add $8,%esp + test $0xffc,%ebp + jnz 0b 1: jmp 1b ENTRY(stack_start) @@ -65,11 +79,6 @@ gdt_descr: gdt_descr: .word LAST_RESERVED_GDT_BYTE .long gdt_table - FIRST_RESERVED_GDT_BYTE - - .word 0 -nopaging_gdt_descr: - .word LAST_RESERVED_GDT_BYTE - .long sym_phys(gdt_table) - FIRST_RESERVED_GDT_BYTE .align PAGE_SIZE, 0 /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */ diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_64.S --- a/xen/arch/x86/boot/x86_64.S Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/boot/x86_64.S Mon Jul 02 12:19:26 2007 -0600 @@ -56,12 +56,23 @@ 1: movq %rax,(%rdi) /* This is the default interrupt handler. */ int_msg: - .asciz "Unknown interrupt\n" + .asciz "Unknown interrupt (cr2=%016lx)\n" +hex_msg: + .asciz " %016lx" ignore_int: - cld + SAVE_ALL + movq %cr2,%rsi leaq int_msg(%rip),%rdi xorl %eax,%eax call printk + movq %rsp,%rbp +0: movq (%rbp),%rsi + addq $8,%rbp + leaq hex_msg(%rip),%rdi + xorl %eax,%eax + call printk + testq $0xff8,%rbp + jnz 0b 1: jmp 1b diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/clear_page.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/clear_page.S Mon Jul 02 12:19:26 2007 -0600 @@ -0,0 +1,26 @@ +#include <xen/config.h> +#include <asm/page.h> + +#ifdef __i386__ +#define ptr_reg %edx +#else +#define ptr_reg %rdi +#endif + +ENTRY(clear_page_sse2) +#ifdef __i386__ + mov 4(%esp), ptr_reg +#endif + mov $PAGE_SIZE/16, %ecx + xor %eax,%eax + +0: dec %ecx + movnti %eax, (ptr_reg) + movnti %eax, 4(ptr_reg) + movnti %eax, 8(ptr_reg) + movnti %eax, 12(ptr_reg) + lea 16(ptr_reg), ptr_reg + jnz 0b + + sfence + ret diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/domain.c Mon Jul 02 12:19:26 2007 -0600 @@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, pg = alloc_domheap_page(NULL); if ( !pg ) return -ENOMEM; - d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg)); + d->arch.mm_arg_xlat_l3 = page_to_virt(pg); + clear_page(d->arch.mm_arg_xlat_l3); } l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] = @@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d) if ( (pg = alloc_domheap_page(NULL)) == NULL ) goto fail; - d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg)); + d->arch.mm_perdomain_l2 = page_to_virt(pg); + clear_page(d->arch.mm_perdomain_l2); for ( i = 0; i < (1 << pdpt_order); i++ ) d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] = l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i, @@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d) if ( (pg = alloc_domheap_page(NULL)) == NULL ) goto fail; - d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg)); + d->arch.mm_perdomain_l3 = page_to_virt(pg); + clear_page(d->arch.mm_perdomain_l3); d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] = l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2), __PAGE_HYPERVISOR); diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/hvm/hvm.c Mon Jul 02 12:19:26 2007 -0600 @@ -242,6 +242,11 @@ void hvm_domain_relinquish_resources(str { hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); + + pit_deinit(d); + rtc_deinit(d); + pmtimer_deinit(d); + hpet_deinit(d); } void hvm_domain_destroy(struct domain *d) @@ -421,22 +426,11 @@ int hvm_vcpu_initialise(struct vcpu *v) void hvm_vcpu_destroy(struct vcpu *v) { - struct domain *d = v->domain; - vlapic_destroy(v); hvm_funcs.vcpu_destroy(v); /* Event channel is already freed by evtchn_destroy(). */ /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/ - - if ( v->vcpu_id == 0 ) - { - /* NB. All these really belong in hvm_domain_destroy(). */ - pit_deinit(d); - rtc_deinit(d); - pmtimer_deinit(d); - hpet_deinit(d); - } } diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/hvm/io.c Mon Jul 02 12:19:26 2007 -0600 @@ -858,6 +858,7 @@ void hvm_io_assist(void) } /* Copy register changes back into current guest state. */ + regs->eflags &= ~X86_EFLAGS_RF; hvm_load_cpu_guest_regs(v, regs); memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES); diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/hvm/platform.c Mon Jul 02 12:19:26 2007 -0600 @@ -1065,6 +1065,7 @@ void handle_mmio(unsigned long gpa) } regs->eip += inst_len; /* advance %eip */ + regs->eflags &= ~X86_EFLAGS_RF; switch ( mmio_op->instr ) { case INSTR_MOV: @@ -1122,6 +1123,7 @@ void handle_mmio(unsigned long gpa) /* IO read --> memory write */ if ( dir == IOREQ_READ ) errcode |= PFEC_write_access; regs->eip -= inst_len; /* do not advance %eip */ + regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */ hvm_inject_exception(TRAP_page_fault, errcode, addr); return; } @@ -1150,6 +1152,7 @@ void handle_mmio(unsigned long gpa) /* Failed on the page-spanning copy. Inject PF into * the guest for the address where we failed */ regs->eip -= inst_len; /* do not advance %eip */ + regs->eflags |= X86_EFLAGS_RF; /* RF was set by #PF */ /* Must set CR2 at the failing address */ addr += size - rv; gdprintk(XENLOG_DEBUG, "Pagefault on non-io side of a " diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Jul 02 12:19:26 2007 -0600 @@ -391,7 +391,7 @@ int svm_vmcb_restore(struct vcpu *v, str } skip_cr3: - vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK; + vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK; v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4; vmcb->idtr.limit = c->idtr_limit; @@ -448,7 +448,8 @@ int svm_vmcb_restore(struct vcpu *v, str /* update VMCB for nested paging restore */ if ( paging_mode_hap(v->domain) ) { vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; - vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4; + vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 | + (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); vmcb->cr3 = c->cr3; vmcb->np_enable = 1; vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ @@ -805,8 +806,10 @@ static void svm_ctxt_switch_from(struct : : "a" (__pa(root_vmcb[cpu])) ); #ifdef __x86_64__ - /* Resume use of IST2 for NMIs now that the host TR is reinstated. */ - idt_tables[cpu][TRAP_nmi].a |= 2UL << 32; + /* Resume use of ISTs now that the host TR is reinstated. */ + idt_tables[cpu][TRAP_double_fault].a |= 1UL << 32; /* IST1 */ + idt_tables[cpu][TRAP_nmi].a |= 2UL << 32; /* IST2 */ + idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */ #endif } @@ -826,10 +829,12 @@ static void svm_ctxt_switch_to(struct vc set_segment_register(ss, 0); /* - * Cannot use IST2 for NMIs while we are running with the guest TR. But - * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET. + * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR. + * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET. */ - idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32); + idt_tables[cpu][TRAP_double_fault].a &= ~(3UL << 32); + idt_tables[cpu][TRAP_nmi].a &= ~(3UL << 32); + idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32); #endif svm_restore_dr(v); @@ -1823,9 +1828,19 @@ static int mov_to_cr(int gpreg, int cr, break; case 4: /* CR4 */ + if ( value & HVM_CR4_GUEST_RESERVED_BITS ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest attempts to set reserved bit in CR4: %lx", + value); + svm_inject_exception(v, TRAP_gp_fault, 1, 0); + break; + } + if ( paging_mode_hap(v->domain) ) { - vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value; + v->arch.hvm_svm.cpu_shadow_cr4 = value; + vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); paging_update_paging_modes(v); /* signal paging update to ASID handler */ svm_asid_g_update_paging (v); @@ -1875,7 +1890,7 @@ static int mov_to_cr(int gpreg, int cr, } v->arch.hvm_svm.cpu_shadow_cr4 = value; - vmcb->cr4 = value | SVM_CR4_HOST_MASK; + vmcb->cr4 = value | HVM_CR4_HOST_MASK; /* * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates @@ -2071,9 +2086,11 @@ static inline void svm_do_msr_access( case MSR_IA32_TIME_STAMP_COUNTER: msr_content = hvm_get_guest_time(v); break; + case MSR_IA32_APICBASE: msr_content = vcpu_vlapic(v)->hw.apic_base_msr; break; + case MSR_EFER: msr_content = v->arch.hvm_svm.cpu_shadow_efer; break; @@ -2093,6 +2110,10 @@ static inline void svm_do_msr_access( * particularly meaningful, but at least avoids the guest crashing! */ msr_content = 0; + break; + + case MSR_K8_VM_HSAVE_PA: + svm_inject_exception(v, TRAP_gp_fault, 1, 0); break; default: @@ -2128,9 +2149,15 @@ static inline void svm_do_msr_access( hvm_set_guest_time(v, msr_content); pt_reset(v); break; + case MSR_IA32_APICBASE: vlapic_msr_set(vcpu_vlapic(v), msr_content); break; + + case MSR_K8_VM_HSAVE_PA: + svm_inject_exception(v, TRAP_gp_fault, 1, 0); + break; + default: if ( !long_mode_do_msr_write(regs) ) wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); @@ -2265,12 +2292,13 @@ static int svm_reset_to_realmode(struct vmcb->cr2 = 0; vmcb->efer = EFER_SVME; - vmcb->cr4 = SVM_CR4_HOST_MASK; + vmcb->cr4 = HVM_CR4_HOST_MASK; v->arch.hvm_svm.cpu_shadow_cr4 = 0; if ( paging_mode_hap(v->domain) ) { vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; - vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4; + vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 | + (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); } /* This will jump to ROMBIOS */ @@ -2411,6 +2439,12 @@ asmlinkage void svm_vmexit_handler(struc break; } + case VMEXIT_EXCEPTION_MC: + HVMTRACE_0D(MCE, v); + svm_store_cpu_guest_regs(v, regs, NULL); + do_machine_check(regs); + break; + case VMEXIT_VINTR: vmcb->vintr.fields.irq = 0; vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR; diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Mon Jul 02 12:19:26 2007 -0600 @@ -224,7 +224,7 @@ static int construct_vmcb(struct vcpu *v /* Guest CR4. */ arch_svm->cpu_shadow_cr4 = read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); - vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK; + vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK; paging_update_paging_modes(v); vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; @@ -235,7 +235,9 @@ static int construct_vmcb(struct vcpu *v vmcb->np_enable = 1; /* enable nested paging */ vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); - vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0; + vmcb->cr4 = arch_svm->cpu_shadow_cr4 = + (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); + vmcb->exception_intercepts = HVM_TRAP_MASK; /* No point in intercepting CR0/3/4 reads, because the hardware * will return the guest versions anyway. */ @@ -249,7 +251,7 @@ static int construct_vmcb(struct vcpu *v } else { - vmcb->exception_intercepts = 1U << TRAP_page_fault; + vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault); } return 0; diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Jul 02 12:19:26 2007 -0600 @@ -43,6 +43,7 @@ u32 vmx_secondary_exec_control __read_mo u32 vmx_secondary_exec_control __read_mostly; u32 vmx_vmexit_control __read_mostly; u32 vmx_vmentry_control __read_mostly; +bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly; static u32 vmcs_revision_id __read_mostly; @@ -133,6 +134,7 @@ void vmx_init_vmcs_config(void) vmx_secondary_exec_control = _vmx_secondary_exec_control; vmx_vmexit_control = _vmx_vmexit_control; vmx_vmentry_control = _vmx_vmentry_control; + cpu_has_vmx_ins_outs_instr_info = !!(vmx_msr_high & (1U<<22)); } else { @@ -142,6 +144,7 @@ void vmx_init_vmcs_config(void) BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control); BUG_ON(vmx_vmexit_control != _vmx_vmexit_control); BUG_ON(vmx_vmentry_control != _vmx_vmentry_control); + BUG_ON(cpu_has_vmx_ins_outs_instr_info != !!(vmx_msr_high & (1U<<22))); } /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ @@ -421,7 +424,7 @@ static void construct_vmcs(struct vcpu * __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL); #endif - __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault); + __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault)); /* Guest CR0. */ cr0 = read_cr0(); diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Jul 02 12:19:26 2007 -0600 @@ -560,6 +560,9 @@ int vmx_vmcs_restore(struct vcpu *v, str __vmwrite(GUEST_RSP, c->rsp); __vmwrite(GUEST_RFLAGS, c->rflags); + v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG + | X86_CR0_NE | X86_CR0_WP | X86_CR0_ET); + __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0); v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0; __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); @@ -577,33 +580,17 @@ int vmx_vmcs_restore(struct vcpu *v, str goto skip_cr3; } - if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) { - /* - * This is simple TLB flush, implying the guest has - * removed some translation or changed page attributes. - * We simply invalidate the shadow. - */ - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if (mfn != pagetable_get_pfn(v->arch.guest_table)) { - goto bad_cr3; - } - } else { - /* - * If different, make a shadow. Check if the PDBR is valid - * first. - */ - HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3); - /* current!=vcpu as not called by arch_vmx_do_launch */ - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { - goto bad_cr3; - } - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - if (old_base_mfn) - put_page(mfn_to_page(old_base_mfn)); - v->arch.hvm_vmx.cpu_cr3 = c->cr3; - } + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3); + /* current!=vcpu as not called by arch_vmx_do_launch */ + mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); + if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { + goto bad_cr3; + } + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = pagetable_from_pfn(mfn); + if (old_base_mfn) + put_page(mfn_to_page(old_base_mfn)); + v->arch.hvm_vmx.cpu_cr3 = c->cr3; skip_cr3: #if defined(__x86_64__) @@ -615,7 +602,7 @@ int vmx_vmcs_restore(struct vcpu *v, str } #endif - __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK)); + __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK)); v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4; __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); @@ -1315,16 +1302,20 @@ static int __get_instruction_length(void static void inline __update_guest_eip(unsigned long inst_len) { - unsigned long current_eip, intr_shadow; - - current_eip = __vmread(GUEST_RIP); - __vmwrite(GUEST_RIP, current_eip + inst_len); - - intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO); - if ( intr_shadow & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) ) - { - intr_shadow &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS); - __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow); + unsigned long x; + + x = __vmread(GUEST_RIP); + __vmwrite(GUEST_RIP, x + inst_len); + + x = __vmread(GUEST_RFLAGS); + if ( x & X86_EFLAGS_RF ) + __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF); + + x = __vmread(GUEST_INTERRUPTIBILITY_INFO); + if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) ) + { + x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS); + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x); } } @@ -1475,16 +1466,34 @@ static void vmx_do_invlpg(unsigned long paging_invlpg(v, va); } -/* - * get segment for string pio according to guest instruction - */ -static void vmx_str_pio_get_segment(int long_mode, unsigned long eip, - int inst_len, enum x86_segment *seg) +/* Get segment for OUTS according to guest instruction. */ +static enum x86_segment vmx_outs_get_segment( + int long_mode, unsigned long eip, int inst_len) { unsigned char inst[MAX_INST_LEN]; + enum x86_segment seg = x86_seg_ds; int i; extern int inst_copy_from_guest(unsigned char *, unsigned long, int); + if ( likely(cpu_has_vmx_ins_outs_instr_info) ) + { + unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO); + + /* Get segment register according to bits 17:15. */ + switch ( (instr_info >> 15) & 7 ) + { + case 0: seg = x86_seg_es; break; + case 1: seg = x86_seg_cs; break; + case 2: seg = x86_seg_ss; break; + case 3: seg = x86_seg_ds; break; + case 4: seg = x86_seg_fs; break; + case 5: seg = x86_seg_gs; break; + default: BUG(); + } + + goto out; + } + if ( !long_mode ) eip += __vmread(GUEST_CS_BASE); @@ -1493,7 +1502,7 @@ static void vmx_str_pio_get_segment(int { gdprintk(XENLOG_ERR, "Get guest instruction failed\n"); domain_crash(current->domain); - return; + goto out; } for ( i = 0; i < inst_len; i++ ) @@ -1510,25 +1519,28 @@ static void vmx_str_pio_get_segment(int #endif continue; case 0x2e: /* CS */ - *seg = x86_seg_cs; + seg = x86_seg_cs; continue; case 0x36: /* SS */ - *seg = x86_seg_ss; + seg = x86_seg_ss; continue; case 0x26: /* ES */ - *seg = x86_seg_es; + seg = x86_seg_es; continue; case 0x64: /* FS */ - *seg = x86_seg_fs; + seg = x86_seg_fs; continue; case 0x65: /* GS */ - *seg = x86_seg_gs; + seg = x86_seg_gs; continue; case 0x3e: /* DS */ - *seg = x86_seg_ds; + seg = x86_seg_ds; continue; } } + + out: + return seg; } static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip, @@ -1541,7 +1553,7 @@ static int vmx_str_pio_check_descriptor( *base = 0; *limit = 0; if ( seg != x86_seg_es ) - vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg); + seg = vmx_outs_get_segment(long_mode, eip, inst_len); switch ( seg ) { @@ -1587,7 +1599,7 @@ static int vmx_str_pio_check_descriptor( } *ar_bytes = __vmread(ar_field); - return !(*ar_bytes & 0x10000); + return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE); } @@ -1896,7 +1908,7 @@ static void vmx_world_save(struct vcpu * c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */ c->esp = __vmread(GUEST_RSP); - c->eflags = __vmread(GUEST_RFLAGS); + c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF; c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; c->cr3 = v->arch.hvm_vmx.cpu_cr3; @@ -1997,7 +2009,7 @@ static int vmx_world_restore(struct vcpu else HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3); - __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK)); + __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK)); v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4; __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); @@ -2272,7 +2284,6 @@ static int vmx_set_cr0(unsigned long val "Enabling CR0.PE at %%eip 0x%lx", eip); if ( vmx_assist(v, VMX_ASSIST_RESTORE) ) { - eip = __vmread(GUEST_RIP); HVM_DBG_LOG(DBG_LEVEL_1, "Restoring to %%eip 0x%lx", eip); return 0; /* do not update eip! */ @@ -2397,6 +2408,15 @@ static int mov_to_cr(int gp, int cr, str case 4: /* CR4 */ old_cr = v->arch.hvm_vmx.cpu_shadow_cr4; + if ( value & HVM_CR4_GUEST_RESERVED_BITS ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest attempts to set reserved bit in CR4: %lx", + value); + vmx_inject_hw_exception(v, TRAP_gp_fault, 0); + break; + } + if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) ) { if ( vmx_pgbit_test(v) ) @@ -2437,7 +2457,7 @@ static int mov_to_cr(int gp, int cr, str } } - __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK); + __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK); v->arch.hvm_vmx.cpu_shadow_cr4 = value; __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); @@ -2581,7 +2601,7 @@ static inline int vmx_do_msr_read(struct case MSR_IA32_APICBASE: msr_content = vcpu_vlapic(v)->hw.apic_base_msr; break; - case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1: + case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; default: if ( long_mode_do_msr_read(regs) ) @@ -2707,7 +2727,7 @@ static inline int vmx_do_msr_write(struc case MSR_IA32_APICBASE: vlapic_msr_set(vcpu_vlapic(v), msr_content); break; - case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1: + case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; default: if ( !long_mode_do_msr_write(regs) ) @@ -2823,7 +2843,8 @@ static void vmx_reflect_exception(struct } } -static void vmx_failed_vmentry(unsigned int exit_reason) +static void vmx_failed_vmentry(unsigned int exit_reason, + struct cpu_user_regs *regs) { unsigned int failed_vmentry_reason = (uint16_t)exit_reason; unsigned long exit_qualification; @@ -2840,6 +2861,9 @@ static void vmx_failed_vmentry(unsigned break; case EXIT_REASON_MACHINE_CHECK: printk("caused by machine check.\n"); + HVMTRACE_0D(MCE, current); + vmx_store_cpu_guest_regs(current, regs, NULL); + do_machine_check(regs); break; default: printk("reason not known yet!"); @@ -2869,7 +2893,7 @@ asmlinkage void vmx_vmexit_handler(struc local_irq_enable(); if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) - return vmx_failed_vmentry(exit_reason); + return vmx_failed_vmentry(exit_reason, regs); switch ( exit_reason ) { @@ -2920,11 +2944,19 @@ asmlinkage void vmx_vmexit_handler(struc vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code); break; case TRAP_nmi: - HVMTRACE_0D(NMI, v); if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI ) + { + HVMTRACE_0D(NMI, v); + vmx_store_cpu_guest_regs(v, regs, NULL); do_nmi(regs); /* Real NMI, vector 2: normal processing. */ + } else vmx_reflect_exception(v); + break; + case TRAP_machine_check: + HVMTRACE_0D(MCE, v); + vmx_store_cpu_guest_regs(v, regs, NULL); + do_machine_check(regs); break; default: goto exit_and_crash; diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/io_apic.c Mon Jul 02 12:19:26 2007 -0600 @@ -371,7 +371,7 @@ static int pin_2_irq(int idx, int apic, * so mask in all cases should simply be TARGET_CPUS */ #ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) +void /*__init*/ setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -849,7 +849,7 @@ static inline void UNEXPECTED_IO_APIC(vo { } -void __init __print_IO_APIC(void) +void /*__init*/ __print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/mm.c Mon Jul 02 12:19:26 2007 -0600 @@ -3240,6 +3240,7 @@ static int ptwr_emulated_update( struct ptwr_emulate_ctxt *ptwr_ctxt) { unsigned long mfn; + unsigned long unaligned_addr = addr; struct page_info *page; l1_pgentry_t pte, ol1e, nl1e, *pl1e; struct vcpu *v = current; @@ -3294,7 +3295,7 @@ static int ptwr_emulated_update( if ( unlikely(!get_page_from_l1e(nl1e, d)) ) { if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) && - (bytes == 4) && (addr & 4) && !do_cmpxchg && + (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) { /* diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/platform_hypercall.c --- a/xen/arch/x86/platform_hypercall.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/platform_hypercall.c Mon Jul 02 12:19:26 2007 -0600 @@ -20,12 +20,20 @@ #include <xen/guest_access.h> #include <asm/current.h> #include <public/platform.h> +#include <asm/edd.h> #include <asm/mtrr.h> #include "cpu/mtrr/mtrr.h" + +extern uint16_t boot_edid_caps; +extern uint8_t boot_edid_info[]; #ifndef COMPAT typedef long ret_t; DEFINE_SPINLOCK(xenpf_lock); +# undef copy_from_compat +# define copy_from_compat copy_from_guest +# undef copy_to_compat +# define copy_to_compat copy_to_guest #else extern spinlock_t xenpf_lock; #endif @@ -150,6 +158,94 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe } } break; + + case XENPF_firmware_info: + switch ( op->u.firmware_info.type ) + { + case XEN_FW_DISK_INFO: { + const struct edd_info *info; + u16 length; + + ret = -ESRCH; + if ( op->u.firmware_info.index >= bootsym(boot_edd_info_nr) ) + break; + + info = bootsym(boot_edd_info) + op->u.firmware_info.index; + + /* Transfer the EDD info block. */ + ret = -EFAULT; + if ( copy_from_compat(&length, op->u.firmware_info.u. + disk_info.edd_params, 1) ) + break; + if ( length > info->edd_device_params.length ) + length = info->edd_device_params.length; + if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params, + (u8 *)&info->edd_device_params, + length) ) + break; + if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params, + &length, 1) ) + break; + + /* Transfer miscellaneous other information values. */ +#define C(x) op->u.firmware_info.u.disk_info.x = info->x + C(device); + C(version); + C(interface_support); + C(legacy_max_cylinder); + C(legacy_max_head); + C(legacy_sectors_per_track); +#undef C + + ret = (copy_field_to_guest(u_xenpf_op, op, + u.firmware_info.u.disk_info) + ? -EFAULT : 0); + break; + } + case XEN_FW_DISK_MBR_SIGNATURE: { + const struct mbr_signature *sig; + + ret = -ESRCH; + if ( op->u.firmware_info.index >= bootsym(boot_mbr_signature_nr) ) + break; + + sig = bootsym(boot_mbr_signature) + op->u.firmware_info.index; + + op->u.firmware_info.u.disk_mbr_signature.device = sig->device; + op->u.firmware_info.u.disk_mbr_signature.mbr_signature = + sig->signature; + + ret = (copy_field_to_guest(u_xenpf_op, op, + u.firmware_info.u.disk_mbr_signature) + ? -EFAULT : 0); + break; + } + case XEN_FW_VBEDDC_INFO: + ret = -ESRCH; + if ( op->u.firmware_info.index != 0 ) + break; + if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 ) + break; + + op->u.firmware_info.u.vbeddc_info.capabilities = + bootsym(boot_edid_caps); + op->u.firmware_info.u.vbeddc_info.edid_transfer_time = + bootsym(boot_edid_caps) >> 8; + + ret = 0; + if ( copy_field_to_guest(u_xenpf_op, op, u.firmware_info. + u.vbeddc_info.capabilities) || + copy_field_to_guest(u_xenpf_op, op, u.firmware_info. + u.vbeddc_info.edid_transfer_time) || + copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid, + bootsym(boot_edid_info), 128) ) + ret = -EFAULT; + break; + default: + ret = -EINVAL; + break; + } + break; default: ret = -ENOSYS; diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/setup.c Mon Jul 02 12:19:26 2007 -0600 @@ -405,7 +405,7 @@ void __init __start_xen(unsigned long mb void __init __start_xen(unsigned long mbi_p) { char *memmap_type = NULL; - char __cmdline[] = "", *cmdline = __cmdline; + char __cmdline[] = "", *cmdline = __cmdline, *kextra; unsigned long _initrd_start = 0, _initrd_len = 0; unsigned int initrdidx = 1; char *_policy_start = NULL; @@ -426,6 +426,17 @@ void __init __start_xen(unsigned long mb /* Parse the command-line options. */ if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) ) cmdline = __va(mbi->cmdline); + if ( (kextra = strstr(cmdline, " -- ")) != NULL ) + { + /* + * Options after ' -- ' separator belong to dom0. + * 1. Orphan dom0's options from Xen's command line. + * 2. Skip all but final leading space from dom0's options. + */ + *kextra = '\0'; + kextra += 3; + while ( kextra[1] == ' ' ) kextra++; + } cmdline_parse(cmdline); parse_video_info(); @@ -494,7 +505,7 @@ void __init __start_xen(unsigned long mb printk("Disc information:\n"); printk(" Found %d MBR signatures\n", - bootsym(boot_edd_signature_nr)); + bootsym(boot_mbr_signature_nr)); printk(" Found %d EDD information structures\n", bootsym(boot_edd_info_nr)); @@ -1009,17 +1020,26 @@ void __init __start_xen(unsigned long mb /* Grab the DOM0 command line. */ cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL); - if ( cmdline != NULL ) + if ( (cmdline != NULL) || (kextra != NULL) ) { static char dom0_cmdline[MAX_GUEST_CMDLINE]; - /* Skip past the image name and copy to a local buffer. */ - while ( *cmdline == ' ' ) cmdline++; - if ( (cmdline = strchr(cmdline, ' ')) != NULL ) + dom0_cmdline[0] = '\0'; + + if ( cmdline != NULL ) { + /* Skip past the image name and copy to a local buffer. */ while ( *cmdline == ' ' ) cmdline++; - safe_strcpy(dom0_cmdline, cmdline); + if ( (cmdline = strchr(cmdline, ' ')) != NULL ) + { + while ( *cmdline == ' ' ) cmdline++; + safe_strcpy(dom0_cmdline, cmdline); + } } + + if ( kextra != NULL ) + /* kextra always includes exactly one leading space. */ + safe_strcat(dom0_cmdline, kextra); /* Append any extra parameters. */ if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") ) diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/traps.c Mon Jul 02 12:19:26 2007 -0600 @@ -86,6 +86,7 @@ asmlinkage int do_ ## _name(struct cpu_u asmlinkage int do_ ## _name(struct cpu_user_regs *regs) asmlinkage void nmi(void); +asmlinkage void machine_check(void); DECLARE_TRAP_HANDLER(divide_error); DECLARE_TRAP_HANDLER(debug); DECLARE_TRAP_HANDLER(int3); @@ -103,7 +104,6 @@ DECLARE_TRAP_HANDLER(simd_coprocessor_er DECLARE_TRAP_HANDLER(simd_coprocessor_error); DECLARE_TRAP_HANDLER(alignment_check); DECLARE_TRAP_HANDLER(spurious_interrupt_bug); -DECLARE_TRAP_HANDLER(machine_check); long do_set_debugreg(int reg, unsigned long value); unsigned long do_get_debugreg(int reg); @@ -631,6 +631,7 @@ static int emulate_forced_invalid_op(str regs->ecx = c; regs->edx = d; regs->eip = eip; + regs->eflags &= ~X86_EFLAGS_RF; return EXCRET_fault_fixed; } @@ -730,10 +731,11 @@ asmlinkage int do_int3(struct cpu_user_r return do_guest_trap(TRAP_int3, regs, 0); } -asmlinkage int do_machine_check(struct cpu_user_regs *regs) -{ - fatal_trap(TRAP_machine_check, regs); - return 0; +asmlinkage void do_machine_check(struct cpu_user_regs *regs) +{ + extern fastcall void (*machine_check_vector)( + struct cpu_user_regs *, long error_code); + machine_check_vector(regs, regs->error_code); } void propagate_page_fault(unsigned long addr, u16 error_code) @@ -1787,6 +1789,7 @@ static int emulate_privileged_op(struct done: regs->eip = eip; + regs->eflags &= ~X86_EFLAGS_RF; return EXCRET_fault_fixed; fail: diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_32/entry.S Mon Jul 02 12:19:26 2007 -0600 @@ -72,48 +72,36 @@ andl $~3,reg; \ movl (reg),reg; - ALIGN restore_all_guest: ASSERT_INTERRUPTS_DISABLED testl $X86_EFLAGS_VM,UREGS_eflags(%esp) - jnz restore_all_vm86 + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + leal 4(%esp),%esp + jnz .Lrestore_iret_guest #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL - testl $2,UREGS_cs(%esp) - jnz 1f + testb $2,UREGS_cs-UREGS_eip(%esp) + jnz .Lrestore_sregs_guest call restore_ring0_guest - jmp restore_all_vm86 -1: + jmp .Lrestore_iret_guest #endif -.Lft1: mov UREGS_ds(%esp),%ds -.Lft2: mov UREGS_es(%esp),%es -.Lft3: mov UREGS_fs(%esp),%fs -.Lft4: mov UREGS_gs(%esp),%gs -restore_all_vm86: - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - addl $4,%esp +.Lrestore_sregs_guest: +.Lft1: mov UREGS_ds-UREGS_eip(%esp),%ds +.Lft2: mov UREGS_es-UREGS_eip(%esp),%es +.Lft3: mov UREGS_fs-UREGS_eip(%esp),%fs +.Lft4: mov UREGS_gs-UREGS_eip(%esp),%gs +.Lrestore_iret_guest: .Lft5: iret .section .fixup,"ax" -.Lfx5: subl $28,%esp - pushl 28(%esp) # error_code/entry_vector - movl %eax,UREGS_eax+4(%esp) - movl %ebp,UREGS_ebp+4(%esp) - movl %edi,UREGS_edi+4(%esp) - movl %esi,UREGS_esi+4(%esp) - movl %edx,UREGS_edx+4(%esp) - movl %ecx,UREGS_ecx+4(%esp) - movl %ebx,UREGS_ebx+4(%esp) -.Lfx1: SET_XEN_SEGMENTS(a) - movl %eax,%fs - movl %eax,%gs - sti - popl %esi +.Lfx1: sti + SAVE_ALL_GPRS + mov UREGS_error_code(%esp),%esi pushfl # EFLAGS movl $__HYPERVISOR_CS,%eax pushl %eax # CS @@ -147,7 +135,7 @@ 1: call create_bounce_frame .long .Lft2,.Lfx1 .long .Lft3,.Lfx1 .long .Lft4,.Lfx1 - .long .Lft5,.Lfx5 + .long .Lft5,.Lfx1 .previous .section __ex_table,"a" .long .Ldf1,failsafe_callback @@ -169,8 +157,8 @@ ENTRY(hypercall) ENTRY(hypercall) subl $4,%esp FIXUP_RING0_GUEST_STACK - SAVE_ALL(b) - sti + SAVE_ALL(1f,1f) +1: sti GET_CURRENT(%ebx) cmpl $NR_hypercalls,%eax jae bad_hypercall @@ -420,9 +408,14 @@ ENTRY(divide_error) ALIGN handle_exception: FIXUP_RING0_GUEST_STACK - SAVE_ALL_NOSEGREGS(a) - SET_XEN_SEGMENTS(a) - testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp) + SAVE_ALL(1f,2f) + .text 1 + /* Exception within Xen: make sure we have valid %ds,%es. */ +1: mov %ecx,%ds + mov %ecx,%es + jmp 2f + .previous +2: testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp) jz exception_with_ints_disabled sti # re-enable interrupts 1: xorl %eax,%eax @@ -533,71 +526,81 @@ ENTRY(page_fault) movw $TRAP_page_fault,2(%esp) jmp handle_exception -ENTRY(machine_check) - pushl $TRAP_machine_check<<16 - jmp handle_exception - ENTRY(spurious_interrupt_bug) pushl $TRAP_spurious_int<<16 jmp handle_exception ENTRY(early_page_fault) - SAVE_ALL_NOSEGREGS(a) - movl %esp,%edx - pushl %edx + SAVE_ALL(1f,1f) +1: movl %esp,%eax + pushl %eax call do_early_page_fault addl $4,%esp jmp restore_all_xen -ENTRY(nmi) +handle_nmi_mce: #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL - # NMI entry protocol is incompatible with guest kernel in ring 0. + # NMI/MCE entry protocol is incompatible with guest kernel in ring 0. + addl $4,%esp iret #else # Save state but do not trash the segment registers! - # We may otherwise be unable to reload them or copy them to ring 1. - pushl %eax - SAVE_ALL_NOSEGREGS(a) - - # We can only process the NMI if: - # A. We are the outermost Xen activation (in which case we have - # the selectors safely saved on our stack) - # B. DS and ES contain sane Xen values. - # In all other cases we bail without touching DS-GS, as we have - # interrupted an enclosing Xen activation in tricky prologue or - # epilogue code. - movl UREGS_eflags(%esp),%eax - movb UREGS_cs(%esp),%al - testl $(3|X86_EFLAGS_VM),%eax - jnz continue_nmi - movl %ds,%eax - cmpw $(__HYPERVISOR_DS),%ax - jne defer_nmi - movl %es,%eax - cmpw $(__HYPERVISOR_DS),%ax - jne defer_nmi - -continue_nmi: - SET_XEN_SEGMENTS(d) + SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common) +.Lnmi_mce_common: + xorl %eax,%eax + movw UREGS_entry_vector(%esp),%ax movl %esp,%edx pushl %edx - call do_nmi + call *exception_table(,%eax,4) addl $4,%esp + /* + * NB. We may return to Xen context with polluted %ds/%es. But in such + * cases we have put guest DS/ES on the guest stack frame, which will + * be detected by SAVE_ALL(), or we have rolled back restore_guest. + */ jmp ret_from_intr - -defer_nmi: - movl $FIXMAP_apic_base,%eax - # apic_wait_icr_idle() -1: movl %ss:APIC_ICR(%eax),%ebx - testl $APIC_ICR_BUSY,%ebx - jnz 1b - # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi) - movl $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \ - TRAP_deferred_nmi),%ss:APIC_ICR(%eax) - jmp restore_all_xen +.Lnmi_mce_xen: + /* Check the outer (guest) context for %ds/%es state validity. */ + GET_GUEST_REGS(%ebx) + testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx) + mov %ds,%eax + mov %es,%edx + jnz .Lnmi_mce_vm86 + /* We may have interrupted Xen while messing with %ds/%es... */ + cmpw %ax,%cx + mov %ecx,%ds /* Ensure %ds is valid */ + cmove UREGS_ds(%ebx),%eax /* Grab guest DS if it wasn't in %ds */ + cmpw %dx,%cx + movl %eax,UREGS_ds(%ebx) /* Ensure guest frame contains guest DS */ + cmove UREGS_es(%ebx),%edx /* Grab guest ES if it wasn't in %es */ + mov %ecx,%es /* Ensure %es is valid */ + movl $.Lrestore_sregs_guest,%ecx + movl %edx,UREGS_es(%ebx) /* Ensure guest frame contains guest ES */ + cmpl %ecx,UREGS_eip(%esp) + jbe .Lnmi_mce_common + cmpl $.Lrestore_iret_guest,UREGS_eip(%esp) + ja .Lnmi_mce_common + /* Roll outer context restore_guest back to restoring %ds/%es. */ + movl %ecx,UREGS_eip(%esp) + jmp .Lnmi_mce_common +.Lnmi_mce_vm86: + /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */ + mov %ecx,%ds + mov %ecx,%es + jmp .Lnmi_mce_common #endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */ +ENTRY(nmi) + pushl $TRAP_nmi<<16 + jmp handle_nmi_mce + +ENTRY(machine_check) + pushl $TRAP_machine_check<<16 + jmp handle_nmi_mce + ENTRY(setup_vm86_frame) + mov %ecx,%ds + mov %ecx,%es # Copies the entire stack frame forwards by 16 bytes. .macro copy_vm86_words count=18 .if \count @@ -615,7 +618,7 @@ ENTRY(exception_table) ENTRY(exception_table) .long do_divide_error .long do_debug - .long 0 # nmi + .long do_nmi .long do_int3 .long do_overflow .long do_bounds diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/supervisor_mode_kernel.S --- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Jul 02 12:19:26 2007 -0600 @@ -20,40 +20,45 @@ #include <asm/asm_defns.h> #include <public/xen.h> +#define guestreg(field) ((field)-UREGS_eip+36) + # Upon entry the stack should be the Xen stack and contain: - # %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN + # %ss, %esp, EFLAGS, %cs|1, %eip, RETURN # On exit the stack should be %ss:%esp (i.e. the guest stack) # and contain: - # EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN + # EFLAGS, %cs, %eip, RETURN ALIGN ENTRY(restore_ring0_guest) + pusha + # Point %gs:%esi to guest stack. -RRG0: movw UREGS_ss+4(%esp),%gs - movl UREGS_esp+4(%esp),%esi +RRG0: movw guestreg(UREGS_ss)(%esp),%gs + movl guestreg(UREGS_esp)(%esp),%esi - # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack. - movl $(UREGS_kernel_sizeof>>2)+1,%ecx + # Copy EFLAGS, %cs, %eip, RETURN, PUSHA from Xen stack to guest stack. + movl $12,%ecx /* 12 32-bit values */ 1: subl $4,%esi movl -4(%esp,%ecx,4),%eax RRG1: movl %eax,%gs:(%esi) loop 1b -RRG2: andl $~3,%gs:UREGS_cs+4(%esi) +RRG2: andl $~3,%gs:guestreg(UREGS_cs)(%esi) movl %gs,%eax # We need to do this because these registers are not present # on the guest stack so they cannot be restored by the code in # restore_all_guest. -RRG3: mov UREGS_ds+4(%esp),%ds -RRG4: mov UREGS_es+4(%esp),%es -RRG5: mov UREGS_fs+4(%esp),%fs -RRG6: mov UREGS_gs+4(%esp),%gs +RRG3: mov guestreg(UREGS_ds)(%esp),%ds +RRG4: mov guestreg(UREGS_es)(%esp),%es +RRG5: mov guestreg(UREGS_fs)(%esp),%fs +RRG6: mov guestreg(UREGS_gs)(%esp),%gs RRG7: movl %eax,%ss movl %esi,%esp + popa ret .section __ex_table,"a" .long RRG0,domain_crash_synchronous diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_32/traps.c Mon Jul 02 12:19:26 2007 -0600 @@ -232,15 +232,6 @@ unsigned long do_iret(void) return 0; } -#include <asm/asm_defns.h> -BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi) -fastcall void smp_deferred_nmi(struct cpu_user_regs *regs) -{ - asmlinkage void do_nmi(struct cpu_user_regs *); - ack_APIC_irq(); - do_nmi(regs); -} - void __init percpu_traps_init(void) { struct tss_struct *tss = &doublefault_tss; @@ -251,8 +242,6 @@ void __init percpu_traps_init(void) /* The hypercall entry vector is only accessible from ring 1. */ _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall); - - set_intr_gate(TRAP_deferred_nmi, &deferred_nmi); /* * Make a separate task for double faults. This will get us debug output if diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/Makefile --- a/xen/arch/x86/x86_64/Makefile Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_64/Makefile Mon Jul 02 12:19:26 2007 -0600 @@ -1,12 +1,12 @@ subdir-y += compat subdir-y += compat obj-y += entry.o -obj-y += compat_kexec.o obj-y += gpr_switch.o obj-y += mm.o obj-y += traps.o obj-$(CONFIG_COMPAT) += compat.o +obj-$(CONFIG_COMPAT) += compat_kexec.o obj-$(CONFIG_COMPAT) += domain.o obj-$(CONFIG_COMPAT) += physdev.o obj-$(CONFIG_COMPAT) += platform_hypercall.o diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/compat/entry.S --- a/xen/arch/x86/x86_64/compat/entry.S Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_64/compat/entry.S Mon Jul 02 12:19:26 2007 -0600 @@ -143,12 +143,12 @@ compat_restore_all_guest: .Lft0: iretq .section .fixup,"ax" -.Lfx0: popq -15*8-8(%rsp) # error_code/entry_vector - SAVE_ALL # 15*8 bytes pushed - movq -8(%rsp),%rsi # error_code/entry_vector - sti # after stack abuse (-1024(%rsp)) +.Lfx0: sti + SAVE_ALL + movq UREGS_error_code(%rsp),%rsi + movq %rsp,%rax + andq $~0xf,%rsp pushq $__HYPERVISOR_DS # SS - leaq 8(%rsp),%rax pushq %rax # RSP pushfq # RFLAGS pushq $__HYPERVISOR_CS # CS diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_64/entry.S Mon Jul 02 12:19:26 2007 -0600 @@ -57,23 +57,23 @@ 1: sysretl /* No special register assumptions. */ iret_exit_to_guest: addq $8,%rsp -.Lft1: iretq +.Lft0: iretq .section .fixup,"ax" -.Lfx1: popq -15*8-8(%rsp) # error_code/entry_vector - SAVE_ALL # 15*8 bytes pushed - movq -8(%rsp),%rsi # error_code/entry_vector - sti # after stack abuse (-1024(%rsp)) +.Lfx0: sti + SAVE_ALL + movq UREGS_error_code(%rsp),%rsi + movq %rsp,%rax + andq $~0xf,%rsp pushq $__HYPERVISOR_DS # SS - leaq 8(%rsp),%rax pushq %rax # RSP - pushf # RFLAGS + pushfq # RFLAGS pushq $__HYPERVISOR_CS # CS - leaq .Ldf1(%rip),%rax + leaq .Ldf0(%rip),%rax pushq %rax # RIP pushq %rsi # error_code/entry_vector jmp handle_exception -.Ldf1: GET_CURRENT(%rbx) +.Ldf0: GET_CURRENT(%rbx) jmp test_all_events failsafe_callback: GET_CURRENT(%rbx) @@ -88,10 +88,10 @@ 1: call create_bounce_frame jmp test_all_events .previous .section __pre_ex_table,"a" - .quad .Lft1,.Lfx1 + .quad .Lft0,.Lfx0 .previous .section __ex_table,"a" - .quad .Ldf1,failsafe_callback + .quad .Ldf0,failsafe_callback .previous ALIGN @@ -505,11 +505,6 @@ ENTRY(page_fault) movl $TRAP_page_fault,4(%rsp) jmp handle_exception -ENTRY(machine_check) - pushq $0 - movl $TRAP_machine_check,4(%rsp) - jmp handle_exception - ENTRY(spurious_interrupt_bug) pushq $0 movl $TRAP_spurious_int,4(%rsp) @@ -527,31 +522,38 @@ ENTRY(early_page_fault) call do_early_page_fault jmp restore_all_xen +handle_ist_exception: + SAVE_ALL + testb $3,UREGS_cs(%rsp) + jz 1f + /* Interrupted guest context. Copy the context to stack bottom. */ + GET_GUEST_REGS(%rdi) + movq %rsp,%rsi + movl $UREGS_kernel_sizeof/8,%ecx + movq %rdi,%rsp + rep movsq +1: movq %rsp,%rdi + movl UREGS_entry_vector(%rsp),%eax + leaq exception_table(%rip),%rdx + callq *(%rdx,%rax,8) + jmp ret_from_intr + ENTRY(nmi) pushq $0 - SAVE_ALL - testb $3,UREGS_cs(%rsp) - jz nmi_in_hypervisor_mode - /* Interrupted guest context. Copy the context to stack bottom. */ - GET_GUEST_REGS(%rbx) - movl $UREGS_kernel_sizeof/8,%ecx -1: popq %rax - movq %rax,(%rbx) - addq $8,%rbx - loop 1b - subq $UREGS_kernel_sizeof,%rbx - movq %rbx,%rsp -nmi_in_hypervisor_mode: - movq %rsp,%rdi - call do_nmi - jmp ret_from_intr + movl $TRAP_nmi,4(%rsp) + jmp handle_ist_exception + +ENTRY(machine_check) + pushq $0 + movl $TRAP_machine_check,4(%rsp) + jmp handle_ist_exception .data ENTRY(exception_table) .quad do_divide_error .quad do_debug - .quad 0 # nmi + .quad do_nmi .quad do_int3 .quad do_overflow .quad do_bounds diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_64/mm.c Mon Jul 02 12:19:26 2007 -0600 @@ -106,7 +106,8 @@ void __init paging_init(void) /* Create user-accessible L2 directory to map the MPT for guests. */ if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) goto nomem; - l3_ro_mpt = clear_page(page_to_virt(l2_pg)); + l3_ro_mpt = page_to_virt(l2_pg); + clear_page(l3_ro_mpt); l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)], l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER)); @@ -132,7 +133,8 @@ void __init paging_init(void) if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) goto nomem; va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT); - l2_ro_mpt = clear_page(page_to_virt(l2_pg)); + l2_ro_mpt = page_to_virt(l2_pg); + clear_page(l2_ro_mpt); l3e_write(&l3_ro_mpt[l3_table_offset(va)], l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER)); l2_ro_mpt += l2_table_offset(va); @@ -152,7 +154,8 @@ void __init paging_init(void) l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]); if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) goto nomem; - compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg)); + compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg); + clear_page(l2_ro_mpt); l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)], l3e_from_page(l2_pg, __PAGE_HYPERVISOR)); l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START); diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_64/traps.c Mon Jul 02 12:19:26 2007 -0600 @@ -294,8 +294,9 @@ void __init percpu_traps_init(void) { /* Specify dedicated interrupt stacks for NMIs and double faults. */ set_intr_gate(TRAP_double_fault, &double_fault); - idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */ - idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */ + idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */ + idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */ + idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */ /* * The 32-on-64 hypercall entry vector is only accessible from ring 1. @@ -310,7 +311,10 @@ void __init percpu_traps_init(void) stack_bottom = (char *)get_stack_bottom(); stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1)); - /* Double-fault handler has its own per-CPU 2kB stack. */ + /* Machine Check handler has its own per-CPU 1kB stack. */ + init_tss[cpu].ist[2] = (unsigned long)&stack[1024]; + + /* Double-fault handler has its own per-CPU 1kB stack. */ init_tss[cpu].ist[0] = (unsigned long)&stack[2048]; /* NMI handler has its own per-CPU 1kB stack. */ diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/arch/x86/x86_emulate.c Mon Jul 02 12:19:26 2007 -0600 @@ -1630,6 +1630,7 @@ x86_emulate( } /* Commit shadow register state. */ + _regs.eflags &= ~EF_RF; *ctxt->regs = _regs; done: diff -r 443ce7edad0e -r d146700adf71 xen/common/sysctl.c --- a/xen/common/sysctl.c Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/common/sysctl.c Mon Jul 02 12:19:26 2007 -0600 @@ -136,6 +136,39 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc } break; + case XEN_SYSCTL_getcpuinfo: + { + uint32_t i, nr_cpus; + struct xen_sysctl_cpuinfo cpuinfo; + struct vcpu *v; + + nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS); + + for ( i = 0; i < nr_cpus; i++ ) + { + /* Assume no holes in idle-vcpu map. */ + if ( (v = idle_vcpu[i]) == NULL ) + break; + + cpuinfo.idletime = v->runstate.time[RUNSTATE_running]; + if ( v->is_running ) + cpuinfo.idletime += NOW() - v->runstate.state_entry_time; + + if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) ) + { + ret = -EFAULT; + break; + } + } + + op->u.getcpuinfo.nr_cpus = i; + ret = 0; + + if ( copy_to_guest(u_sysctl, op, 1) ) + ret = -EFAULT; + } + break; + default: ret = arch_do_sysctl(op, u_sysctl); break; diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/edd.h --- a/xen/include/asm-x86/edd.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/edd.h Mon Jul 02 12:19:26 2007 -0600 @@ -32,12 +32,22 @@ struct edd_info { u16 legacy_max_cylinder; /* %cl[7:6]:%ch: maximum cylinder number */ u8 legacy_max_head; /* %dh: maximum head number */ u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */ - /* Int13, Fn41: Get Device Parameters */ - u8 edd_device_params[74]; /* as filled into %ds:%si */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + struct { + u16 length; + u8 data[72]; + } edd_device_params; } __attribute__ ((packed)); -extern u32 boot_edd_signature[]; -extern u8 boot_edd_signature_nr; +struct mbr_signature { + u8 device; + u8 pad[3]; + u32 signature; +} __attribute__ ((packed)); + +/* These all reside in the boot trampoline. Access via bootsym(). */ +extern struct mbr_signature boot_mbr_signature[]; +extern u8 boot_mbr_signature_nr; extern struct edd_info boot_edd_info[]; extern u8 boot_edd_info_nr; diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/hvm/hvm.h Mon Jul 02 12:19:26 2007 -0600 @@ -302,4 +302,18 @@ static inline int hvm_event_injection_fa return hvm_funcs.event_injection_faulted(v); } +/* These bits in CR4 are owned by the host. */ +#define HVM_CR4_HOST_MASK (mmu_cr4_features & \ + (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE)) + +/* These bits in CR4 cannot be set by the guest. */ +#define HVM_CR4_GUEST_RESERVED_BITS \ + ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ + X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ + X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ + X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) + +/* These exceptions must always be intercepted. */ +#define HVM_TRAP_MASK (1U << TRAP_machine_check) + #endif /* __ASM_X86_HVM_HVM_H__ */ diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/emulate.h --- a/xen/include/asm-x86/hvm/svm/emulate.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/hvm/svm/emulate.h Mon Jul 02 12:19:26 2007 -0600 @@ -138,6 +138,7 @@ static void inline __update_guest_eip( { ASSERT(inst_len > 0); vmcb->rip += inst_len; + vmcb->rflags &= ~X86_EFLAGS_RF; } #endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */ diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/vmcb.h --- a/xen/include/asm-x86/hvm/svm/vmcb.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Mon Jul 02 12:19:26 2007 -0600 @@ -464,14 +464,6 @@ void svm_destroy_vmcb(struct vcpu *v); void setup_vmcb_dump(void); -/* These bits in the CR4 are owned by the host */ -#if CONFIG_PAGING_LEVELS >= 3 -#define SVM_CR4_HOST_MASK (X86_CR4_PAE) -#else -#define SVM_CR4_HOST_MASK 0 -#endif - - #endif /* ASM_X86_HVM_SVM_VMCS_H__ */ /* diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/trace.h --- a/xen/include/asm-x86/hvm/trace.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/hvm/trace.h Mon Jul 02 12:19:26 2007 -0600 @@ -21,6 +21,7 @@ #define DO_TRC_HVM_CPUID 1 #define DO_TRC_HVM_INTR 1 #define DO_TRC_HVM_NMI 1 +#define DO_TRC_HVM_MCE 1 #define DO_TRC_HVM_SMI 1 #define DO_TRC_HVM_VMMCALL 1 #define DO_TRC_HVM_HLT 1 diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jul 02 12:19:26 2007 -0600 @@ -130,6 +130,8 @@ extern u32 vmx_vmentry_control; #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 extern u32 vmx_secondary_exec_control; + +extern bool_t cpu_has_vmx_ins_outs_instr_info; #define cpu_has_vmx_virtualize_apic_accesses \ (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 12:19:26 2007 -0600 @@ -143,13 +143,6 @@ void vmx_vlapic_msr_changed(struct vcpu #define X86_SEG_AR_GRANULARITY (1u << 15) /* 15, granularity */ #define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */ -/* These bits in the CR4 are owned by the host */ -#if CONFIG_PAGING_LEVELS >= 3 -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE) -#else -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE) -#endif - #define VMCALL_OPCODE ".byte 0x0f,0x01,0xc1\n" #define VMCLEAR_OPCODE ".byte 0x66,0x0f,0xc7\n" /* reg/opcode: /6 */ #define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n" diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/page.h Mon Jul 02 12:19:26 2007 -0600 @@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd #define pgentry_ptr_to_slot(_p) \ (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p))) +#ifndef __ASSEMBLY__ + /* Page-table type. */ -#ifndef __ASSEMBLY__ #if CONFIG_PAGING_LEVELS == 2 /* x86_32 default */ typedef struct { u32 pfn; } pagetable_t; @@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t; #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg)) #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT) #define pagetable_null() pagetable_from_pfn(0) -#endif - -#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE) + +void clear_page_sse2(void *); +#define clear_page(_p) (cpu_has_xmm2 ? \ + clear_page_sse2((void *)(_p)) : \ + (void)memset((void *)(_p), 0, PAGE_SIZE)) #define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE) #define mfn_valid(mfn) ((mfn) < max_page) @@ -244,6 +247,8 @@ typedef struct { u64 pfn; } pagetable_t; /* Convert between frame number and address formats. */ #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) #define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) + +#endif /* !defined(__ASSEMBLY__) */ /* High table entries are reserved by the hypervisor. */ #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/processor.h Mon Jul 02 12:19:26 2007 -0600 @@ -104,7 +104,6 @@ #define TRAP_alignment_check 17 #define TRAP_machine_check 18 #define TRAP_simd_error 19 -#define TRAP_deferred_nmi 31 /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */ /* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */ @@ -567,7 +566,8 @@ extern void mtrr_ap_init(void); extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); -extern void mcheck_init(struct cpuinfo_x86 *c); +void mcheck_init(struct cpuinfo_x86 *c); +asmlinkage void do_machine_check(struct cpu_user_regs *regs); int cpuid_hypervisor_leaves( uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/x86_32/asm_defns.h --- a/xen/include/asm-x86/x86_32/asm_defns.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/asm-x86/x86_32/asm_defns.h Mon Jul 02 12:19:26 2007 -0600 @@ -26,7 +26,7 @@ 1: addl $4,%esp; #define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz) #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z) -#define __SAVE_ALL_PRE \ +#define SAVE_ALL_GPRS \ cld; \ pushl %eax; \ pushl %ebp; \ @@ -35,32 +35,48 @@ 1: addl $4,%esp; pushl %esi; \ pushl %edx; \ pushl %ecx; \ - pushl %ebx; \ + pushl %ebx + +/* + * Saves all register state into an exception/interrupt stack frame. + * Returns to the caller at <xen_lbl> if the interrupted context is within + * Xen; at <vm86_lbl> if the interrupted context is vm86; or falls through + * if the interrupted context is an ordinary guest protected-mode context. + * In all cases %ecx contains __HYPERVISOR_DS. %ds/%es are guaranteed to + * contain __HYPERVISOR_DS unless control passes to <xen_lbl>, in which case + * the caller is reponsible for validity of %ds/%es. + */ +#define SAVE_ALL(xen_lbl, vm86_lbl) \ + SAVE_ALL_GPRS; \ testl $(X86_EFLAGS_VM),UREGS_eflags(%esp); \ - jz 2f; \ - call setup_vm86_frame; \ - jmp 3f; \ - 2:testb $3,UREGS_cs(%esp); \ - jz 1f; \ - mov %ds,UREGS_ds(%esp); \ - mov %es,UREGS_es(%esp); \ - mov %fs,UREGS_fs(%esp); \ - mov %gs,UREGS_gs(%esp); \ - 3: - -#define SAVE_ALL_NOSEGREGS(_reg) \ - __SAVE_ALL_PRE \ - 1: - -#define SET_XEN_SEGMENTS(_reg) \ - movl $(__HYPERVISOR_DS),%e ## _reg ## x; \ - mov %e ## _reg ## x,%ds; \ - mov %e ## _reg ## x,%es; - -#define SAVE_ALL(_reg) \ - __SAVE_ALL_PRE \ - SET_XEN_SEGMENTS(_reg) \ - 1: + mov %ds,%edi; \ + mov %es,%esi; \ + mov $(__HYPERVISOR_DS),%ecx; \ + jnz 86f; \ + .text 1; \ + 86: call setup_vm86_frame; \ + jmp vm86_lbl; \ + .previous; \ + testb $3,UREGS_cs(%esp); \ + jz xen_lbl; \ + /* \ + * We are the outermost Xen context, but our \ + * life is complicated by NMIs and MCEs. These \ + * could occur in our critical section and \ + * pollute %ds and %es. We have to detect that \ + * this has occurred and avoid saving Xen DS/ES \ + * values to the guest stack frame. \ + */ \ + cmpw %cx,%di; \ + mov %ecx,%ds; \ + mov %fs,UREGS_fs(%esp); \ + cmove UREGS_ds(%esp),%edi; \ + cmpw %cx,%si; \ + mov %edi,UREGS_ds(%esp); \ + cmove UREGS_es(%esp),%esi; \ + mov %ecx,%es; \ + mov %gs,UREGS_gs(%esp); \ + mov %esi,UREGS_es(%esp) #ifdef PERF_COUNTERS #define PERFC_INCR(_name,_idx,_cur) \ @@ -97,8 +113,8 @@ __asm__( STR(x) ":\n\t" \ "pushl $"#v"<<16\n\t" \ STR(FIXUP_RING0_GUEST_STACK) \ - STR(SAVE_ALL(a)) \ - "movl %esp,%eax\n\t" \ + STR(SAVE_ALL(1f,1f)) "\n\t" \ + "1:movl %esp,%eax\n\t" \ "pushl %eax\n\t" \ "call "STR(smp_##x)"\n\t" \ "addl $4,%esp\n\t" \ @@ -109,8 +125,8 @@ __asm__( "\n" __ALIGN_STR"\n" \ "common_interrupt:\n\t" \ STR(FIXUP_RING0_GUEST_STACK) \ - STR(SAVE_ALL(a)) \ - "movl %esp,%eax\n\t" \ + STR(SAVE_ALL(1f,1f)) "\n\t" \ + "1:movl %esp,%eax\n\t" \ "pushl %eax\n\t" \ "call " STR(do_IRQ) "\n\t" \ "addl $4,%esp\n\t" \ diff -r 443ce7edad0e -r d146700adf71 xen/include/public/platform.h --- a/xen/include/public/platform.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/public/platform.h Mon Jul 02 12:19:26 2007 -0600 @@ -114,6 +114,45 @@ typedef struct xenpf_platform_quirk xenp typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + XEN_GUEST_HANDLE(void) edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + XEN_GUEST_HANDLE(uint8_t) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + } u; +}; +typedef struct xenpf_firmware_info xenpf_firmware_info_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -124,6 +163,7 @@ struct xen_platform_op { struct xenpf_read_memtype read_memtype; struct xenpf_microcode_update microcode; struct xenpf_platform_quirk platform_quirk; + struct xenpf_firmware_info firmware_info; uint8_t pad[128]; } u; }; diff -r 443ce7edad0e -r d146700adf71 xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/public/sysctl.h Mon Jul 02 12:19:26 2007 -0600 @@ -140,9 +140,7 @@ typedef struct xen_sysctl_getdomaininfol typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); -/* - * Inject debug keys into Xen. - */ +/* Inject debug keys into Xen. */ #define XEN_SYSCTL_debug_keys 7 struct xen_sysctl_debug_keys { /* IN variables. */ @@ -151,6 +149,23 @@ struct xen_sysctl_debug_keys { }; typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); + +/* Get physical CPU information. */ +#define XEN_SYSCTL_getcpuinfo 8 +struct xen_sysctl_cpuinfo { + uint64_t idletime; +}; +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; +}; +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); struct xen_sysctl { uint32_t cmd; @@ -163,6 +178,7 @@ struct xen_sysctl { struct xen_sysctl_perfc_op perfc_op; struct xen_sysctl_getdomaininfolist getdomaininfolist; struct xen_sysctl_debug_keys debug_keys; + struct xen_sysctl_getcpuinfo getcpuinfo; uint8_t pad[128]; } u; }; diff -r 443ce7edad0e -r d146700adf71 xen/include/public/trace.h --- a/xen/include/public/trace.h Mon Jul 02 10:31:03 2007 -0600 +++ b/xen/include/public/trace.h Mon Jul 02 12:19:26 2007 -0600 @@ -88,6 +88,7 @@ #define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12) #define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13) #define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) +#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) /* This structure represents a single trace buffer record. */ struct t_rec { _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |