[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID f34e37d0742d80ccfefd017a91f93310ebc2dfe8 # Parent 9da2d9b48ff8711516a07f7a06120abedb4e24b2 # Parent bd811e94d293ebcb8fb15db0becacd36c65a4ac7 merge with xen-unstable.hg --- xen/arch/x86/hvm/svm/instrlen.c | 479 ------------------------------ docs/man/xm.pod.1 | 4 docs/src/user.tex | 2 tools/firmware/vmxassist/vm86.c | 37 -- tools/ioemu/hw/serial.c | 68 ++++ tools/ioemu/usb-linux.c | 3 tools/ioemu/vl.c | 2 tools/ioemu/vnc.c | 33 +- tools/python/xen/xend/XendCheckpoint.py | 4 tools/python/xen/xend/XendDomain.py | 17 - tools/python/xen/xend/server/SrvDomain.py | 2 tools/python/xen/xm/main.py | 8 xen/arch/ia64/xen/domain.c | 6 xen/arch/ia64/xen/xensetup.c | 8 xen/arch/x86/Rules.mk | 2 xen/arch/x86/boot/x86_32.S | 26 - xen/arch/x86/hvm/Makefile | 1 xen/arch/x86/hvm/hvm.c | 70 +++- xen/arch/x86/hvm/i8259.c | 8 xen/arch/x86/hvm/instrlen.c | 474 +++++++++++++++++++++++++++++ xen/arch/x86/hvm/platform.c | 48 +-- xen/arch/x86/hvm/svm/Makefile | 1 xen/arch/x86/hvm/svm/svm.c | 27 - xen/arch/x86/hvm/vmx/vmcs.c | 135 ++++++-- xen/arch/x86/hvm/vmx/vmx.c | 87 ++--- xen/arch/x86/irq.c | 12 xen/arch/x86/setup.c | 7 xen/common/domain.c | 6 xen/common/gdbstub.c | 3 xen/common/schedule.c | 26 + xen/include/asm-x86/hvm/hvm.h | 18 - xen/include/asm-x86/hvm/vmx/vmcs.h | 15 xen/include/asm-x86/hvm/vmx/vmx.h | 93 ----- xen/include/xen/compiler.h | 2 xen/include/xen/sched.h | 2 35 files changed, 919 insertions(+), 817 deletions(-) diff -r 9da2d9b48ff8 -r f34e37d0742d docs/man/xm.pod.1 --- a/docs/man/xm.pod.1 Tue Sep 26 16:15:45 2006 -0600 +++ b/docs/man/xm.pod.1 Tue Sep 26 19:11:33 2006 -0600 @@ -393,7 +393,9 @@ specified, VCPU information for all doma =item B<vcpu-pin> I<domain-id> I<vcpu> I<cpus> -Pins the the VCPU to only run on the specific CPUs. +Pins the the VCPU to only run on the specific CPUs. The keyword +I<all> can be used to apply the I<cpus> list to all VCPUs in the +domain. Normally VCPUs can float between available CPUs whenever Xen deems a different run state is appropriate. Pinning can be used to restrict diff -r 9da2d9b48ff8 -r f34e37d0742d docs/src/user.tex --- a/docs/src/user.tex Tue Sep 26 16:15:45 2006 -0600 +++ b/docs/src/user.tex Tue Sep 26 19:11:33 2006 -0600 @@ -3208,6 +3208,8 @@ editing \path{grub.conf}. respectively; if no suffix is specified, the parameter defaults to kilobytes. In previous versions of Xen, suffixes were not supported and the value is always interpreted as kilobytes. +\item [ dom0\_vcpus\_pin ] Pins domain 0 VCPUs on their respective + physical CPUS (default=false). \item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in pages (default 0). \item [ sched=xxx ] Select the CPU scheduler Xen should use. The diff -r 9da2d9b48ff8 -r f34e37d0742d tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/firmware/vmxassist/vm86.c Tue Sep 26 19:11:33 2006 -0600 @@ -69,28 +69,23 @@ guest_linear_to_real(uint32_t base) if (!(oldctx.cr4 & CR4_PAE)) { l1_mfn = ((uint32_t *)gcr3)[(base >> 22) & 0x3ff]; - - if (oldctx.cr4 & CR4_PSE || l1_mfn & PDE_PS) { - /* 1 level page table */ - l0_mfn = l1_mfn; - if (!(l0_mfn & PT_ENTRY_PRESENT)) - panic("l1 entry not present\n"); - - l0_mfn &= 0xffc00000; + if (!(l1_mfn & PT_ENTRY_PRESENT)) + panic("l2 entry not present\n"); + + if ((oldctx.cr4 & CR4_PSE) && (l1_mfn & PDE_PS)) { + l0_mfn = l1_mfn & 0xffc00000; return l0_mfn + (base & 0x3fffff); } - if (!(l1_mfn & PT_ENTRY_PRESENT)) - panic("l2 entry not present\n"); - l1_mfn &= 0xfffff000; + l0_mfn = ((uint32_t *)l1_mfn)[(base >> 12) & 0x3ff]; if (!(l0_mfn & PT_ENTRY_PRESENT)) panic("l1 entry not present\n"); l0_mfn &= 0xfffff000; return l0_mfn + (base & 0xfff); - } else if (oldctx.cr4 & CR4_PAE && !(oldctx.cr4 & CR4_PSE)) { + } else { l2_mfn = ((uint64_t *)gcr3)[(base >> 30) & 0x3]; if (!(l2_mfn & PT_ENTRY_PRESENT)) panic("l3 entry not present\n"); @@ -99,6 +94,12 @@ guest_linear_to_real(uint32_t base) l1_mfn = ((uint64_t *)l2_mfn)[(base >> 21) & 0x1ff]; if (!(l1_mfn & PT_ENTRY_PRESENT)) panic("l2 entry not present\n"); + + if (l1_mfn & PDE_PS) { /* CR4.PSE is ignored in PAE mode */ + l0_mfn = l1_mfn & 0x3ffe00000ULL; + return l0_mfn + (base & 0x1fffff); + } + l1_mfn &= 0x3fffff000ULL; l0_mfn = ((uint64_t *)l1_mfn)[(base >> 12) & 0x1ff]; @@ -107,18 +108,6 @@ guest_linear_to_real(uint32_t base) l0_mfn &= 0x3fffff000ULL; return l0_mfn + (base & 0xfff); - } else { /* oldctx.cr4 & CR4_PAE && oldctx.cr4 & CR4_PSE */ - l1_mfn = ((uint64_t *)gcr3)[(base >> 30) & 0x3]; - if (!(l1_mfn & PT_ENTRY_PRESENT)) - panic("l2 entry not present\n"); - l1_mfn &= 0x3fffff000ULL; - - l0_mfn = ((uint64_t *)l1_mfn)[(base >> 21) & 0x1ff]; - if (!(l0_mfn & PT_ENTRY_PRESENT)) - panic("l1 entry not present\n"); - l0_mfn &= 0x3ffe00000ULL; - - return l0_mfn + (base & 0x1fffff); } } diff -r 9da2d9b48ff8 -r f34e37d0742d tools/ioemu/hw/serial.c --- a/tools/ioemu/hw/serial.c Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/ioemu/hw/serial.c Tue Sep 26 19:11:33 2006 -0600 @@ -22,6 +22,9 @@ * THE SOFTWARE. */ #include "vl.h" +#include <sys/time.h> +#include <time.h> +#include <assert.h> //#define DEBUG_SERIAL @@ -138,6 +141,67 @@ static void serial_update_parameters(Ser printf("speed=%d parity=%c data=%d stop=%d\n", speed, parity, data_bits, stop_bits); #endif +} + +/* Rate limit serial requests so that e.g. grub on a serial console + doesn't kill dom0. Simple token bucket. If we get some actual + data from the user, instantly refil the bucket. */ + +/* How long it takes to generate a token, in microseconds. */ +#define TOKEN_PERIOD 1000 +/* Maximum and initial size of token bucket */ +#define TOKENS_MAX 100000 + +static int tokens_avail; + +static void serial_get_token(void) +{ + static struct timeval last_refil_time; + static int started; + + assert(tokens_avail >= 0); + if (!tokens_avail) { + struct timeval delta, now; + int generated; + + if (!started) { + gettimeofday(&last_refil_time, NULL); + tokens_avail = TOKENS_MAX; + started = 1; + return; + } + retry: + gettimeofday(&now, NULL); + delta.tv_sec = now.tv_sec - last_refil_time.tv_sec; + delta.tv_usec = now.tv_usec - last_refil_time.tv_usec; + if (delta.tv_usec < 0) { + delta.tv_usec += 1000000; + delta.tv_sec--; + } + assert(delta.tv_usec >= 0 && delta.tv_sec >= 0); + if (delta.tv_usec < TOKEN_PERIOD) { + struct timespec ts; + /* Wait until at least one token is available. */ + ts.tv_sec = TOKEN_PERIOD / 1000000; + ts.tv_nsec = (TOKEN_PERIOD % 1000000) * 1000; + while (nanosleep(&ts, &ts) < 0 && errno == EINTR) + ; + goto retry; + } + generated = (delta.tv_sec * 1000000) / TOKEN_PERIOD; + generated += + ((delta.tv_sec * 1000000) % TOKEN_PERIOD + delta.tv_usec) / TOKEN_PERIOD; + assert(generated > 0); + + last_refil_time.tv_usec += (generated * TOKEN_PERIOD) % 1000000; + last_refil_time.tv_sec += last_refil_time.tv_usec / 1000000; + last_refil_time.tv_usec %= 1000000; + last_refil_time.tv_sec += (generated * TOKEN_PERIOD) / 1000000; + if (generated > TOKENS_MAX) + generated = TOKENS_MAX; + tokens_avail = generated; + } + tokens_avail--; } static void serial_ioport_write(void *opaque, uint32_t addr, uint32_t val) @@ -245,9 +309,11 @@ static uint32_t serial_ioport_read(void ret = s->mcr; break; case 5: + serial_get_token(); ret = s->lsr; break; case 6: + serial_get_token(); if (s->mcr & UART_MCR_LOOP) { /* in loopback, the modem output pins are connected to the inputs */ @@ -296,12 +362,14 @@ static void serial_receive1(void *opaque static void serial_receive1(void *opaque, const uint8_t *buf, int size) { SerialState *s = opaque; + tokens_avail = TOKENS_MAX; serial_receive_byte(s, buf[0]); } static void serial_event(void *opaque, int event) { SerialState *s = opaque; + tokens_avail = TOKENS_MAX; if (event == CHR_EVENT_BREAK) serial_receive_break(s); } diff -r 9da2d9b48ff8 -r f34e37d0742d tools/ioemu/usb-linux.c --- a/tools/ioemu/usb-linux.c Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/ioemu/usb-linux.c Tue Sep 26 19:11:33 2006 -0600 @@ -26,6 +26,9 @@ #if defined(__linux__) #include <dirent.h> #include <sys/ioctl.h> +/* Some versions of usbdevice_fs.h need __user to be defined for them. */ +/* This may (harmlessly) conflict with a definition in linux/compiler.h. */ +#define __user #include <linux/usbdevice_fs.h> #include <linux/version.h> diff -r 9da2d9b48ff8 -r f34e37d0742d tools/ioemu/vl.c --- a/tools/ioemu/vl.c Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/ioemu/vl.c Tue Sep 26 19:11:33 2006 -0600 @@ -727,7 +727,7 @@ void qemu_del_timer(QEMUTimer *ts) void qemu_advance_timer(QEMUTimer *ts, int64_t expire_time) { - if (ts->expire_time > expire_time) + if (ts->expire_time > expire_time || !qemu_timer_pending(ts)) qemu_mod_timer(ts, expire_time); } diff -r 9da2d9b48ff8 -r f34e37d0742d tools/ioemu/vnc.c --- a/tools/ioemu/vnc.c Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/ioemu/vnc.c Tue Sep 26 19:11:33 2006 -0600 @@ -26,6 +26,7 @@ #include "vl.h" #include "qemu_socket.h" +#include <assert.h> /* The refresh interval starts at BASE. If we scan the buffer and find no change, we increase by INC, up to MAX. If the mouse moves @@ -580,12 +581,16 @@ static void _vnc_update_client(void *opa interested (e.g. minimised) it'll ignore this, and we can stop scanning the buffer until it sends another update request. */ - /* Note that there are bugs in xvncviewer which prevent - this from actually working. Leave the code in place - for correct clients. */ + /* It turns out that there's a bug in realvncviewer 4.1.2 + which means that if you send a proper null update (with + no update rectangles), it gets a bit out of sync and + never sends any further requests, regardless of whether + it needs one or not. Fix this by sending a single 1x1 + update rectangle instead. */ vnc_write_u8(vs, 0); vnc_write_u8(vs, 0); - vnc_write_u16(vs, 0); + vnc_write_u16(vs, 1); + send_framebuffer_update(vs, 0, 0, 1, 1); vnc_flush(vs); vs->last_update_time = now; return; @@ -728,8 +733,10 @@ static void vnc_client_read(void *opaque memmove(vs->input.buffer, vs->input.buffer + len, vs->input.offset - len); vs->input.offset -= len; - } else + } else { + assert(ret > vs->read_handler_expect); vs->read_handler_expect = ret; + } } } @@ -1076,8 +1083,12 @@ static int protocol_client_msg(VncState if (len == 1) return 4; - if (len == 4) - return 4 + (read_u16(data, 2) * 4); + if (len == 4) { + uint16_t v; + v = read_u16(data, 2); + if (v) + return 4 + v * 4; + } limit = read_u16(data, 2); for (i = 0; i < limit; i++) { @@ -1117,8 +1128,12 @@ static int protocol_client_msg(VncState if (len == 1) return 8; - if (len == 8) - return 8 + read_u32(data, 4); + if (len == 8) { + uint32_t v; + v = read_u32(data, 4); + if (v) + return 8 + v; + } client_cut_text(vs, read_u32(data, 4), data + 8); break; diff -r 9da2d9b48ff8 -r f34e37d0742d tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/python/xen/xend/XendCheckpoint.py Tue Sep 26 19:11:33 2006 -0600 @@ -161,8 +161,8 @@ def restore(xd, fd): if handler.store_mfn is None or handler.console_mfn is None: raise XendError('Could not read store/console MFN') - #Block until src closes connection - os.read(fd, 1) + os.read(fd, 1) # Wait for source to close connection + dominfo.waitForDevices() # Wait for backends to set up dominfo.unpause() dominfo.completeRestore(handler.store_mfn, handler.console_mfn) diff -r 9da2d9b48ff8 -r f34e37d0742d tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/python/xen/xend/XendDomain.py Tue Sep 26 19:11:33 2006 -0600 @@ -487,10 +487,19 @@ class XendDomain: if not dominfo: raise XendInvalidDomain(str(domid)) - try: - return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap) - except Exception, ex: - raise XendError(str(ex)) + # if vcpu is keyword 'all', apply the cpumap to all vcpus + vcpus = [ vcpu ] + if str(vcpu).lower() == "all": + vcpus = range(0, int(dominfo.getVCpuCount())) + + # set the same cpumask for all vcpus + rc = 0 + for v in vcpus: + try: + rc = xc.vcpu_setaffinity(dominfo.getDomid(), int(v), cpumap) + except Exception, ex: + raise XendError(str(ex)) + return rc def domain_cpu_sedf_set(self, domid, period, slice_, latency, extratime, weight): diff -r 9da2d9b48ff8 -r f34e37d0742d tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/python/xen/xend/server/SrvDomain.py Tue Sep 26 19:11:33 2006 -0600 @@ -97,7 +97,7 @@ class SrvDomain(SrvDir): def op_pincpu(self, _, req): fn = FormFn(self.xd.domain_pincpu, [['dom', 'int'], - ['vcpu', 'int'], + ['vcpu', 'str'], ['cpumap', 'str']]) val = fn(req.args, {'dom': self.dom.domid}) return val diff -r 9da2d9b48ff8 -r f34e37d0742d tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Tue Sep 26 16:15:45 2006 -0600 +++ b/tools/python/xen/xm/main.py Tue Sep 26 19:11:33 2006 -0600 @@ -759,12 +759,16 @@ def xm_vcpu_pin(args): for i in range(int(x),int(y)+1): cpus.append(int(i)) else: - cpus.append(int(c)) + # remove this element from the list + if c[0] == '^': + cpus = [x for x in cpus if x != int(c[1:])] + else: + cpus.append(int(c)) cpus.sort() return cpus dom = args[0] - vcpu = int(args[1]) + vcpu = args[1] cpumap = cpu_make_map(args[2]) server.xend.domain.pincpu(dom, vcpu, cpumap) diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/ia64/xen/domain.c Tue Sep 26 19:11:33 2006 -0600 @@ -54,7 +54,6 @@ static unsigned int dom0_max_vcpus = 1; static unsigned int dom0_max_vcpus = 1; integer_param("dom0_max_vcpus", dom0_max_vcpus); -extern int opt_dom0_vcpus_pin; extern unsigned long running_on_sim; extern char dom0_command_line[]; @@ -1021,12 +1020,9 @@ int construct_dom0(struct domain *d, dom0_max_vcpus = MAX_VIRT_CPUS; printf ("Dom0 max_vcpus=%d\n", dom0_max_vcpus); - for ( i = 1; i < dom0_max_vcpus; i++ ) { + for ( i = 1; i < dom0_max_vcpus; i++ ) if (alloc_vcpu(d, i, i) == NULL) printf ("Cannot allocate dom0 vcpu %d\n", i); - else if (opt_dom0_vcpus_pin) - d->vcpu[i]->cpu_affinity = cpumask_of_cpu(i); - } /* Copy the OS image. */ loaddomainelfimage(d,image_start); diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/ia64/xen/xensetup.c Tue Sep 26 19:11:33 2006 -0600 @@ -49,10 +49,6 @@ extern void init_IRQ(void); extern void init_IRQ(void); extern void trap_init(void); -/* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */ -unsigned int opt_dom0_vcpus_pin = 0; -boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin); - /* opt_nosmp: If true, secondary processors are ignored. */ static int opt_nosmp = 0; boolean_param("nosmp", opt_nosmp); @@ -521,10 +517,6 @@ printk("num_online_cpus=%d, max_cpus=%d\ 0) != 0) panic("Could not set up DOM0 guest OS\n"); - /* PIN domain0 VCPU 0 on CPU 0. */ - if (opt_dom0_vcpus_pin) - dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0); - if (!running_on_sim) // slow on ski and pages are pre-initialized to zero scrub_heap_pages(); diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/Rules.mk Tue Sep 26 19:11:33 2006 -0600 @@ -44,7 +44,7 @@ CFLAGS += -fno-asynchronous-unwind-tabl CFLAGS += -fno-asynchronous-unwind-tables # -fvisibility=hidden reduces -fpic cost, if it's available CFLAGS += $(shell $(CC) -v --help 2>&1 | grep " -fvisibility=" | \ - grep -q hidden && echo "-fvisibility=hidden") + grep -q hidden && echo "-DGCC_HAS_VISIBILITY_ATTRIBUTE") LDFLAGS += -m elf_x86_64 x86_32 := n x86_64 := y diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/boot/x86_32.S --- a/xen/arch/x86/boot/x86_32.S Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/boot/x86_32.S Tue Sep 26 19:11:33 2006 -0600 @@ -218,28 +218,24 @@ nopaging_gdt_descr: .word LAST_RESERVED_GDT_BYTE .long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET - .org 0x1000 -/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */ -/* machine->physical mapping table. Ring 0 can access all memory. */ + .align PAGE_SIZE, 0 +/* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */ +/* the machine->physical mapping table. Ring 0 can access all memory. */ +#define GUEST_DESC(d) \ + .long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff, \ + ((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d) ENTRY(gdt_table) .quad 0x0000000000000000 /* unused */ .quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */ .quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */ -#ifdef CONFIG_X86_PAE - .quad 0x00cfba00000067ff - .quad 0x00cfb200000067ff - .quad 0x00cffa00000067ff - .quad 0x00cff200000067ff -#else - .quad 0x00cfba000000c3ff /* 0xe019 ring 1 3.95GB code at 0x0 */ - .quad 0x00cfb2000000c3ff /* 0xe021 ring 1 3.95GB data at 0x0 */ - .quad 0x00cffa000000c3ff /* 0xe02b ring 3 3.95GB code at 0x0 */ - .quad 0x00cff2000000c3ff /* 0xe033 ring 3 3.95GB data at 0x0 */ -#endif + GUEST_DESC(0x00c0ba00) /* 0xe019 ring 1 3.xxGB code at 0x0 */ + GUEST_DESC(0x00c0b200) /* 0xe021 ring 1 3.xxGB data at 0x0 */ + GUEST_DESC(0x00c0fa00) /* 0xe02b ring 3 3.xxGB code at 0x0 */ + GUEST_DESC(0x00c0f200) /* 0xe033 ring 3 3.xxGB data at 0x0 */ .quad 0x0000000000000000 /* unused */ .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ - .org 0x2000 + .align PAGE_SIZE, 0 #ifdef CONFIG_X86_PAE ENTRY(idle_pg_table) diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/Makefile --- a/xen/arch/x86/hvm/Makefile Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/Makefile Tue Sep 26 19:11:33 2006 -0600 @@ -4,6 +4,7 @@ obj-y += hvm.o obj-y += hvm.o obj-y += i8254.o obj-y += i8259.o +obj-y += instrlen.o obj-y += intercept.o obj-y += io.o obj-y += platform.o diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/hvm.c Tue Sep 26 19:11:33 2006 -0600 @@ -337,6 +337,33 @@ int cpu_get_interrupt(struct vcpu *v, in return -1; } +static void hvm_vcpu_down(void) +{ + struct vcpu *v = current; + struct domain *d = v->domain; + int online_count = 0; + + DPRINTK("DOM%d/VCPU%d: going offline.\n", d->domain_id, v->vcpu_id); + + /* Doesn't halt us immediately, but we'll never return to guest context. */ + set_bit(_VCPUF_down, &v->vcpu_flags); + vcpu_sleep_nosync(v); + + /* Any other VCPUs online? ... */ + LOCK_BIGLOCK(d); + for_each_vcpu ( d, v ) + if ( !test_bit(_VCPUF_down, &v->vcpu_flags) ) + online_count++; + UNLOCK_BIGLOCK(d); + + /* ... Shut down the domain if not. */ + if ( online_count == 0 ) + { + DPRINTK("DOM%d: all CPUs offline -- powering off.\n", d->domain_id); + domain_shutdown(d, SHUTDOWN_poweroff); + } +} + void hvm_hlt(unsigned long rflags) { struct vcpu *v = current; @@ -344,18 +371,12 @@ void hvm_hlt(unsigned long rflags) s_time_t next_pit = -1, next_wakeup; /* - * Detect machine shutdown. Only do this for vcpu 0, to avoid potentially - * shutting down the domain early. If we halt with interrupts disabled, - * that's a pretty sure sign that we want to shut down. In a real - * processor, NMIs are the only way to break out of this. + * If we halt with interrupts disabled, that's a pretty sure sign that we + * want to shut down. In a real processor, NMIs are the only way to break + * out of this. */ - if ( (v->vcpu_id == 0) && !(rflags & X86_EFLAGS_IF) ) - { - printk("D%d: HLT with interrupts disabled -- shutting down.\n", - current->domain->domain_id); - domain_shutdown(current->domain, SHUTDOWN_poweroff); - return; - } + if ( unlikely(!(rflags & X86_EFLAGS_IF)) ) + return hvm_vcpu_down(); if ( !v->vcpu_id ) next_pit = get_scheduled(v, pt->irq, pt); @@ -578,17 +599,20 @@ int hvm_bringup_ap(int vcpuid, int tramp struct vcpu_guest_context *ctxt; int rc = 0; - /* current must be HVM domain BSP */ - if ( !(hvm_guest(bsp) && bsp->vcpu_id == 0) ) { - printk("Not calling hvm_bringup_ap from BSP context.\n"); + BUG_ON(!hvm_guest(bsp)); + + if ( bsp->vcpu_id != 0 ) + { + DPRINTK("Not calling hvm_bringup_ap from BSP context.\n"); domain_crash_synchronous(); } if ( (v = d->vcpu[vcpuid]) == NULL ) return -ENOENT; - if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL ) { - printk("Failed to allocate memory in hvm_bringup_ap.\n"); + if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL ) + { + DPRINTK("Failed to allocate memory in hvm_bringup_ap.\n"); return -ENOMEM; } @@ -601,12 +625,14 @@ int hvm_bringup_ap(int vcpuid, int tramp UNLOCK_BIGLOCK(d); if ( rc != 0 ) - printk("AP %d bringup failed in boot_vcpu %x.\n", vcpuid, rc); - else { - if ( test_and_clear_bit(_VCPUF_down, &d->vcpu[vcpuid]->vcpu_flags) ) - vcpu_wake(d->vcpu[vcpuid]); - printk("AP %d bringup suceeded.\n", vcpuid); - } + { + DPRINTK("AP %d bringup failed in boot_vcpu %x.\n", vcpuid, rc); + return rc; + } + + if ( test_and_clear_bit(_VCPUF_down, &d->vcpu[vcpuid]->vcpu_flags) ) + vcpu_wake(d->vcpu[vcpuid]); + DPRINTK("AP %d bringup suceeded.\n", vcpuid); xfree(ctxt); diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/i8259.c --- a/xen/arch/x86/hvm/i8259.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/i8259.c Tue Sep 26 19:11:33 2006 -0600 @@ -447,6 +447,10 @@ static void pic_init1(int io_addr, int e ASSERT(spin_is_locked(&s->pics_state->lock)); pic_reset(s); + + /* XXX We set the ELCR to level triggered here, but that should + really be done by the BIOS, and only for PCI IRQs. */ + s->elcr = 0xff & s->elcr_mask; } void pic_init(struct hvm_virpic *s, void (*irq_request)(void *, int), @@ -458,12 +462,12 @@ void pic_init(struct hvm_virpic *s, void spin_lock_init(&s->lock); s->pics[0].pics_state = s; s->pics[1].pics_state = s; + s->pics[0].elcr_mask = 0xf8; + s->pics[1].elcr_mask = 0xde; spin_lock_irqsave(&s->lock, flags); pic_init1(0x20, 0x4d0, &s->pics[0]); pic_init1(0xa0, 0x4d1, &s->pics[1]); spin_unlock_irqrestore(&s->lock, flags); - s->pics[0].elcr_mask = 0xf8; - s->pics[1].elcr_mask = 0xde; s->irq_request = irq_request; s->irq_request_opaque = irq_request_opaque; } diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/platform.c Tue Sep 26 19:11:33 2006 -0600 @@ -52,7 +52,7 @@ static inline long __get_reg_value(unsig case QUAD: return (long)(reg); default: - printf("Error: (__get_reg_value) Invalid reg size\n"); + printk("Error: (__get_reg_value) Invalid reg size\n"); domain_crash_synchronous(); } } @@ -78,7 +78,7 @@ long get_reg_value(int size, int index, case 7: /* %bh */ return (char)((regs->rbx & 0xFF00) >> 8); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printk("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } /* NOTREACHED */ @@ -102,7 +102,7 @@ long get_reg_value(int size, int index, case 14: return __get_reg_value(regs->r14, size); case 15: return __get_reg_value(regs->r15, size); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printk("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } @@ -115,7 +115,7 @@ static inline long __get_reg_value(unsig case LONG: return (int)(reg & 0xFFFFFFFF); default: - printf("Error: (__get_reg_value) Invalid reg size\n"); + printk("Error: (__get_reg_value) Invalid reg size\n"); domain_crash_synchronous(); } } @@ -141,7 +141,7 @@ long get_reg_value(int size, int index, case 7: /* %bh */ return (char)((regs->ebx & 0xFF00) >> 8); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printk("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } @@ -156,7 +156,7 @@ long get_reg_value(int size, int index, case 6: return __get_reg_value(regs->esi, size); case 7: return __get_reg_value(regs->edi, size); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printk("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } @@ -464,7 +464,7 @@ static int hvm_decode(int realmode, unsi return DECODE_success; default: - printf("%x/%x, This opcode isn't handled yet!\n", + printk("%x/%x, This opcode isn't handled yet!\n", *opcode, ins_subtype); return DECODE_failure; } @@ -614,7 +614,7 @@ static int hvm_decode(int realmode, unsi break; default: - printf("%x, This opcode isn't handled yet!\n", *opcode); + printk("%x, This opcode isn't handled yet!\n", *opcode); return DECODE_failure; } @@ -675,12 +675,12 @@ static int hvm_decode(int realmode, unsi } else { - printf("0f %x, This opcode subtype isn't handled yet\n", *opcode); + printk("0f %x, This opcode subtype isn't handled yet\n", *opcode); return DECODE_failure; } default: - printf("0f %x, This opcode isn't handled yet\n", *opcode); + printk("0f %x, This opcode isn't handled yet\n", *opcode); return DECODE_failure; } } @@ -702,7 +702,7 @@ static void hvm_send_assist_req(struct v if ( unlikely(p->state != STATE_INVALID) ) { /* This indicates a bug in the device model. Crash the domain. */ - printf("Device model set bad IO state %d.\n", p->state); + printk("Device model set bad IO state %d.\n", p->state); domain_crash(v->domain); return; } @@ -733,7 +733,7 @@ void send_pio_req(struct cpu_user_regs * p = &vio->vp_ioreq; if ( p->state != STATE_INVALID ) - printf("WARNING: send pio with something already pending (%d)?\n", + printk("WARNING: send pio with something already pending (%d)?\n", p->state); p->dir = dir; p->pdata_valid = pvalid; @@ -776,14 +776,14 @@ void send_mmio_req( vio = get_vio(v->domain, v->vcpu_id); if (vio == NULL) { - printf("bad shared page\n"); + printk("bad shared page\n"); domain_crash_synchronous(); } p = &vio->vp_ioreq; if ( p->state != STATE_INVALID ) - printf("WARNING: send mmio with something already pending (%d)?\n", + printk("WARNING: send mmio with something already pending (%d)?\n", p->state); p->dir = dir; p->pdata_valid = pvalid; @@ -841,7 +841,7 @@ static void mmio_operands(int type, unsi else send_mmio_req(type, gpa, 1, inst->op_size, 0, IOREQ_READ, 0); } else { - printf("mmio_operands: invalid operand\n"); + printk("mmio_operands: invalid operand\n"); domain_crash_synchronous(); } } @@ -866,8 +866,10 @@ void handle_mmio(unsigned long va, unsig memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); hvm_store_cpu_guest_regs(v, regs, NULL); - if ((inst_len = hvm_instruction_length(v)) <= 0) { - printf("handle_mmio: failed to get instruction length\n"); + inst_len = hvm_instruction_length(regs, hvm_guest_x86_mode(v)); + if ( inst_len <= 0 ) + { + printk("handle_mmio: failed to get instruction length\n"); domain_crash_synchronous(); } @@ -880,19 +882,19 @@ void handle_mmio(unsigned long va, unsig memset(inst, 0, MAX_INST_LEN); ret = inst_copy_from_guest(inst, inst_addr, inst_len); if (ret != inst_len) { - printf("handle_mmio: failed to copy instruction\n"); + printk("handle_mmio: failed to copy instruction\n"); domain_crash_synchronous(); } init_instruction(&mmio_inst); if (hvm_decode(realmode, inst, &mmio_inst) == DECODE_failure) { - printf("handle_mmio: failed to decode instruction\n"); - printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %d:", + printk("handle_mmio: failed to decode instruction\n"); + printk("mmio opcode: va 0x%lx, gpa 0x%lx, len %d:", va, gpa, inst_len); for (i = 0; i < inst_len; i++) - printf(" %02x", inst[i] & 0xFF); - printf("\n"); + printk(" %02x", inst[i] & 0xFF); + printk("\n"); domain_crash_synchronous(); } @@ -1073,7 +1075,7 @@ void handle_mmio(unsigned long va, unsig break; default: - printf("Unhandled MMIO instruction\n"); + printk("Unhandled MMIO instruction\n"); domain_crash_synchronous(); } } diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/svm/Makefile --- a/xen/arch/x86/hvm/svm/Makefile Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/svm/Makefile Tue Sep 26 19:11:33 2006 -0600 @@ -2,7 +2,6 @@ subdir-$(x86_64) += x86_64 subdir-$(x86_64) += x86_64 obj-y += emulate.o -obj-y += instrlen.o obj-y += intr.o obj-y += svm.o obj-y += vmcb.o diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Sep 26 19:11:33 2006 -0600 @@ -44,6 +44,7 @@ #include <asm/hvm/svm/emulate.h> #include <asm/hvm/svm/vmmcall.h> #include <asm/hvm/svm/intr.h> +#include <asm/x86_emulate.h> #include <public/sched.h> #define SVM_EXTRA_DEBUG @@ -60,7 +61,6 @@ extern asmlinkage void do_IRQ(struct cpu extern asmlinkage void do_IRQ(struct cpu_user_regs *); extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port, unsigned long count, int size, long value, int dir, int pvalid); -extern int svm_instrlen(struct cpu_user_regs *regs, int mode); extern void svm_dump_inst(unsigned long eip); extern int svm_dbg_on; void svm_dump_regs(const char *from, struct cpu_user_regs *regs); @@ -468,21 +468,19 @@ static int svm_realmode(struct vcpu *v) return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE); } -int svm_guest_x86_mode(struct vcpu *v) +static int svm_guest_x86_mode(struct vcpu *v) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode; - /* check which operating mode the guest is running */ - if( vmcb->efer & EFER_LMA ) - mode = vmcb->cs.attributes.fields.l ? 8 : 4; - else - mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4; - return mode; -} - -int svm_instruction_length(struct vcpu *v) -{ - return svm_instrlen(guest_cpu_user_regs(), svm_guest_x86_mode(v)); + + if ( vmcb->efer & EFER_LMA ) + return (vmcb->cs.attributes.fields.l ? + X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32); + + if ( svm_realmode(v) ) + return X86EMUL_MODE_REAL; + + return (vmcb->cs.attributes.fields.db ? + X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16); } void svm_update_host_cr3(struct vcpu *v) @@ -878,7 +876,6 @@ int start_svm(void) hvm_funcs.long_mode_enabled = svm_long_mode_enabled; hvm_funcs.pae_enabled = svm_pae_enabled; hvm_funcs.guest_x86_mode = svm_guest_x86_mode; - hvm_funcs.instruction_length = svm_instruction_length; hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg; hvm_funcs.update_host_cr3 = svm_update_host_cr3; diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Sep 26 19:11:33 2006 -0600 @@ -37,36 +37,119 @@ #include <xen/keyhandler.h> #include <asm/shadow.h> -static int vmcs_size; -static int vmcs_order; +/* Basic flags for Pin-based VM-execution controls. */ +#define MONITOR_PIN_BASED_EXEC_CONTROLS \ + ( PIN_BASED_EXT_INTR_MASK | \ + PIN_BASED_NMI_EXITING ) + +/* Basic flags for CPU-based VM-execution controls. */ +#ifdef __x86_64__ +#define MONITOR_CPU_BASED_EXEC_CONTROLS_SUBARCH \ + ( CPU_BASED_CR8_LOAD_EXITING | \ + CPU_BASED_CR8_STORE_EXITING ) +#else +#define MONITOR_CPU_BASED_EXEC_CONTROLS_SUBARCH 0 +#endif +#define MONITOR_CPU_BASED_EXEC_CONTROLS \ + ( MONITOR_CPU_BASED_EXEC_CONTROLS_SUBARCH | \ + CPU_BASED_HLT_EXITING | \ + CPU_BASED_INVDPG_EXITING | \ + CPU_BASED_MWAIT_EXITING | \ + CPU_BASED_MOV_DR_EXITING | \ + CPU_BASED_ACTIVATE_IO_BITMAP | \ + CPU_BASED_USE_TSC_OFFSETING ) + +/* Basic flags for VM-Exit controls. */ +#ifdef __x86_64__ +#define MONITOR_VM_EXIT_CONTROLS_SUBARCH VM_EXIT_IA32E_MODE +#else +#define MONITOR_VM_EXIT_CONTROLS_SUBARCH 0 +#endif +#define MONITOR_VM_EXIT_CONTROLS \ + ( MONITOR_VM_EXIT_CONTROLS_SUBARCH | \ + VM_EXIT_ACK_INTR_ON_EXIT ) + +/* Basic flags for VM-Entry controls. */ +#define MONITOR_VM_ENTRY_CONTROLS 0x00000000 + +/* Dynamic (run-time adjusted) execution control flags. */ +static u32 vmx_pin_based_exec_control; +static u32 vmx_cpu_based_exec_control; +static u32 vmx_vmexit_control; +static u32 vmx_vmentry_control; + static u32 vmcs_revision_id; +static u32 adjust_vmx_controls(u32 ctrls, u32 msr) +{ + u32 vmx_msr_low, vmx_msr_high; + + rdmsr(msr, vmx_msr_low, vmx_msr_high); + + /* Bit == 0 means must be zero. */ + BUG_ON(ctrls & ~vmx_msr_high); + + /* Bit == 1 means must be one. */ + ctrls |= vmx_msr_low; + + return ctrls; +} + void vmx_init_vmcs_config(void) { u32 vmx_msr_low, vmx_msr_high; - - if ( vmcs_size ) - return; + u32 _vmx_pin_based_exec_control; + u32 _vmx_cpu_based_exec_control; + u32 _vmx_vmexit_control; + u32 _vmx_vmentry_control; + + _vmx_pin_based_exec_control = + adjust_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS, + MSR_IA32_VMX_PINBASED_CTLS_MSR); + _vmx_cpu_based_exec_control = + adjust_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS, + MSR_IA32_VMX_PROCBASED_CTLS_MSR); + _vmx_vmexit_control = + adjust_vmx_controls(MONITOR_VM_EXIT_CONTROLS, + MSR_IA32_VMX_EXIT_CTLS_MSR); + _vmx_vmentry_control = + adjust_vmx_controls(MONITOR_VM_ENTRY_CONTROLS, + MSR_IA32_VMX_ENTRY_CTLS_MSR); rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high); - vmcs_revision_id = vmx_msr_low; - - vmcs_size = vmx_msr_high & 0x1fff; - vmcs_order = get_order_from_bytes(vmcs_size); + if ( smp_processor_id() == 0 ) + { + vmcs_revision_id = vmx_msr_low; + vmx_pin_based_exec_control = _vmx_pin_based_exec_control; + vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control; + vmx_vmexit_control = _vmx_vmexit_control; + vmx_vmentry_control = _vmx_vmentry_control; + } + else + { + BUG_ON(vmcs_revision_id != vmx_msr_low); + BUG_ON(vmx_pin_based_exec_control != _vmx_pin_based_exec_control); + BUG_ON(vmx_cpu_based_exec_control != _vmx_cpu_based_exec_control); + BUG_ON(vmx_vmexit_control != _vmx_vmexit_control); + BUG_ON(vmx_vmentry_control != _vmx_vmentry_control); + } + + /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ + BUG_ON((vmx_msr_high & 0x1fff) > PAGE_SIZE); } static struct vmcs_struct *vmx_alloc_vmcs(void) { struct vmcs_struct *vmcs; - if ( (vmcs = alloc_xenheap_pages(vmcs_order)) == NULL ) + if ( (vmcs = alloc_xenheap_page()) == NULL ) { DPRINTK("Failed to allocate VMCS.\n"); return NULL; } - memset(vmcs, 0, vmcs_size); /* don't remove this */ + memset(vmcs, 0, PAGE_SIZE); vmcs->vmcs_revision_id = vmcs_revision_id; return vmcs; @@ -74,7 +157,7 @@ static struct vmcs_struct *vmx_alloc_vmc static void vmx_free_vmcs(struct vmcs_struct *vmcs) { - free_xenheap_pages(vmcs, vmcs_order); + free_xenheap_page(vmcs); } static void __vmx_clear_vmcs(void *info) @@ -156,12 +239,11 @@ static inline int construct_vmcs_control { int error = 0; - error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, - MONITOR_PIN_BASED_EXEC_CONTROLS); - - error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS); - - error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS); + error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); + + error |= __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control); + + error |= __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control); error |= __vmwrite(IO_BITMAP_A, virt_to_maddr(arch_vmx->io_bitmap_a)); error |= __vmwrite(IO_BITMAP_B, virt_to_maddr(arch_vmx->io_bitmap_b)); @@ -246,9 +328,8 @@ static void vmx_do_launch(struct vcpu *v error |= __vmwrite(GUEST_CR0, cr0); cr0 &= ~X86_CR0_PG; error |= __vmwrite(CR0_READ_SHADOW, cr0); - error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, - MONITOR_CPU_BASED_EXEC_CONTROLS); - v->arch.hvm_vcpu.u.vmx.exec_control = MONITOR_CPU_BASED_EXEC_CONTROLS; + error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control); + v->arch.hvm_vcpu.u.vmx.exec_control = vmx_cpu_based_exec_control; __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : ); @@ -297,21 +378,21 @@ static inline int construct_init_vmcs_gu /* MSR */ error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0); error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0); - error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); - /* interrupt */ + error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); - /* mask */ - error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL); - error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL); + + error |= __vmwrite(CR0_GUEST_HOST_MASK, ~0UL); + error |= __vmwrite(CR4_GUEST_HOST_MASK, ~0UL); error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); - /* TSC */ error |= __vmwrite(CR3_TARGET_COUNT, 0); + + error |= __vmwrite(GUEST_ACTIVITY_STATE, 0); /* Guest Selectors */ error |= __vmwrite(GUEST_ES_SELECTOR, GUEST_LAUNCH_DS); diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Sep 26 19:11:33 2006 -0600 @@ -45,6 +45,7 @@ #include <public/hvm/ioreq.h> #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> +#include <asm/x86_emulate.h> extern uint32_t vlapic_update_ppr(struct vlapic *vlapic); @@ -593,15 +594,6 @@ static void vmx_load_cpu_guest_regs(stru vmx_vmcs_exit(v); } -static int vmx_instruction_length(struct vcpu *v) -{ - unsigned long inst_len; - - if ( __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len) ) /* XXX Unsafe XXX */ - return 0; - return inst_len; -} - static unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num) { switch ( num ) @@ -692,21 +684,6 @@ static void vmx_init_ap_context(struct v void do_nmi(struct cpu_user_regs *); -static int check_vmx_controls(u32 ctrls, u32 msr) -{ - u32 vmx_msr_low, vmx_msr_high; - - rdmsr(msr, vmx_msr_low, vmx_msr_high); - if ( (ctrls < vmx_msr_low) || (ctrls > vmx_msr_high) ) - { - printk("Insufficient VMX capability 0x%x, " - "msr=0x%x,low=0x%8x,high=0x%x\n", - ctrls, msr, vmx_msr_low, vmx_msr_high); - return 0; - } - return 1; -} - static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page) { char *p; @@ -729,6 +706,35 @@ static void vmx_init_hypercall_page(stru *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */ } +static int vmx_realmode(struct vcpu *v) +{ + unsigned long rflags; + + ASSERT(v == current); + + __vmread(GUEST_RFLAGS, &rflags); + return rflags & X86_EFLAGS_VM; +} + +static int vmx_guest_x86_mode(struct vcpu *v) +{ + unsigned long cs_ar_bytes; + + ASSERT(v == current); + + __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes); + + if ( vmx_long_mode_enabled(v) ) + return ((cs_ar_bytes & (1u<<13)) ? + X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32); + + if ( vmx_realmode(v) ) + return X86EMUL_MODE_REAL; + + return ((cs_ar_bytes & (1u<<14)) ? + X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16); +} + /* Setup HVM interfaces */ static void vmx_setup_hvm_funcs(void) { @@ -748,7 +754,6 @@ static void vmx_setup_hvm_funcs(void) hvm_funcs.long_mode_enabled = vmx_long_mode_enabled; hvm_funcs.pae_enabled = vmx_pae_enabled; hvm_funcs.guest_x86_mode = vmx_guest_x86_mode; - hvm_funcs.instruction_length = vmx_instruction_length; hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg; hvm_funcs.update_host_cr3 = vmx_update_host_cr3; @@ -771,7 +776,7 @@ int start_vmx(void) */ boot_cpu_data.x86_capability[4] = cpuid_ecx(1); - if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability))) + if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) ) return 0; rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx); @@ -791,24 +796,11 @@ int start_vmx(void) IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0); } - if ( !check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS, - MSR_IA32_VMX_PINBASED_CTLS_MSR) ) - return 0; - if ( !check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS, - MSR_IA32_VMX_PROCBASED_CTLS_MSR) ) - return 0; - if ( !check_vmx_controls(MONITOR_VM_EXIT_CONTROLS, - MSR_IA32_VMX_EXIT_CTLS_MSR) ) - return 0; - if ( !check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS, - MSR_IA32_VMX_ENTRY_CTLS_MSR) ) - return 0; - set_in_cr4(X86_CR4_VMXE); vmx_init_vmcs_config(); - - if(!smp_processor_id()) + + if ( smp_processor_id() == 0 ) setup_vmcs_dump(); if ( (vmcs = vmx_alloc_host_vmcs()) == NULL ) @@ -1499,7 +1491,7 @@ static int vmx_set_cr0(unsigned long val &v->arch.hvm_vmx.cpu_state); __vmread(VM_ENTRY_CONTROLS, &vm_entry_value); - vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE; + vm_entry_value |= VM_ENTRY_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); } #endif @@ -1553,7 +1545,7 @@ static int vmx_set_cr0(unsigned long val clear_bit(VMX_CPU_STATE_LMA_ENABLED, &v->arch.hvm_vmx.cpu_state); __vmread(VM_ENTRY_CONTROLS, &vm_entry_value); - vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE; + vm_entry_value &= ~VM_ENTRY_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); } } @@ -2276,15 +2268,8 @@ asmlinkage void vmx_vmexit_handler(struc domain_crash_synchronous(); break; case EXIT_REASON_PENDING_INTERRUPT: - /* - * Not sure exactly what the purpose of this is. The only bits set - * and cleared at this point are CPU_BASED_VIRTUAL_INTR_PENDING. - * (in io.c:{enable,disable}_irq_window(). So presumably we want to - * set it to the original value... - */ + /* Disable the interrupt window. */ v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - v->arch.hvm_vcpu.u.vmx.exec_control |= - (MONITOR_CPU_BASED_EXEC_CONTROLS & CPU_BASED_VIRTUAL_INTR_PENDING); __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vcpu.u.vmx.exec_control); break; diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/irq.c Tue Sep 26 19:11:33 2006 -0600 @@ -351,11 +351,15 @@ int pirq_acktype(int irq) desc = &irq_desc[vector]; + if ( desc->handler == &no_irq_type ) + return ACKTYPE_NONE; + /* - * Edge-triggered IO-APIC interrupts need no final acknowledgement: - * we ACK early during interrupt processing. + * Edge-triggered IO-APIC and LAPIC interrupts need no final + * acknowledgement: we ACK early during interrupt processing. */ - if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ) + if ( !strcmp(desc->handler->typename, "IO-APIC-edge") || + !strcmp(desc->handler->typename, "local-APIC-edge") ) return ACKTYPE_NONE; /* @@ -376,7 +380,9 @@ int pirq_acktype(int irq) return ACKTYPE_NONE; /* edge-triggered => no final EOI */ } + printk("Unknown PIC type '%s' for IRQ %d\n", desc->handler->typename, irq); BUG(); + return 0; } diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/arch/x86/setup.c Tue Sep 26 19:11:33 2006 -0600 @@ -272,6 +272,13 @@ void __init __start_xen(multiboot_info_t EARLY_FAIL(); } + /* + * Since there are some stubs getting built on the stacks which use + * direct calls/jumps, the heap must be confined to the lower 2G so + * that those branches can reach their targets. + */ + if ( opt_xenheap_megabytes > 2048 ) + opt_xenheap_megabytes = 2048; xenheap_phys_end = opt_xenheap_megabytes << 20; if ( mbi->flags & MBI_MEMMAP ) diff -r 9da2d9b48ff8 -r f34e37d0742d xen/common/domain.c --- a/xen/common/domain.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/common/domain.c Tue Sep 26 19:11:33 2006 -0600 @@ -82,20 +82,16 @@ struct vcpu *alloc_vcpu( v->domain = d; v->vcpu_id = vcpu_id; - v->processor = cpu_id; v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id]; spin_lock_init(&v->pause_lock); - v->cpu_affinity = is_idle_domain(d) ? - cpumask_of_cpu(cpu_id) : CPU_MASK_ALL; - v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline; v->runstate.state_entry_time = NOW(); if ( (vcpu_id != 0) && !is_idle_domain(d) ) set_bit(_VCPUF_down, &v->vcpu_flags); - if ( sched_init_vcpu(v) < 0 ) + if ( sched_init_vcpu(v, cpu_id) < 0 ) { free_vcpu_struct(v); return NULL; diff -r 9da2d9b48ff8 -r f34e37d0742d xen/common/gdbstub.c --- a/xen/common/gdbstub.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/common/gdbstub.c Tue Sep 26 19:11:33 2006 -0600 @@ -53,6 +53,8 @@ static char opt_gdb[30] = "none"; static char opt_gdb[30] = "none"; string_param("gdb", opt_gdb); +static void gdbstub_console_puts(const char *str); + /* value <-> char (de)serialzers */ char hex2char(unsigned long x) @@ -360,7 +362,6 @@ static void static void gdbstub_attach(struct gdb_context *ctx) { - static void gdbstub_console_puts(const char *str); if ( ctx->currently_attached ) return; ctx->currently_attached = 1; diff -r 9da2d9b48ff8 -r f34e37d0742d xen/common/schedule.c --- a/xen/common/schedule.c Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/common/schedule.c Tue Sep 26 19:11:33 2006 -0600 @@ -37,6 +37,10 @@ static char opt_sched[10] = "credit"; static char opt_sched[10] = "credit"; string_param("sched", opt_sched); +/* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */ +static unsigned int opt_dom0_vcpus_pin; +boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin); + #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ /* Various timer handlers. */ @@ -97,13 +101,26 @@ void vcpu_runstate_get(struct vcpu *v, s } } -int sched_init_vcpu(struct vcpu *v) -{ +int sched_init_vcpu(struct vcpu *v, unsigned int processor) +{ + struct domain *d = v->domain; + + /* + * Initialize processor and affinity settings. The idler, and potentially + * domain-0 VCPUs, are pinned onto their respective physical CPUs. + */ + v->processor = processor; + if ( is_idle_domain(d) || ((d->domain_id == 0) && opt_dom0_vcpus_pin) ) + v->cpu_affinity = cpumask_of_cpu(processor); + else + v->cpu_affinity = CPU_MASK_ALL; + /* Initialise the per-domain timers. */ init_timer(&v->timer, vcpu_timer_fn, v, v->processor); init_timer(&v->poll_timer, poll_timer_fn, v, v->processor); - if ( is_idle_vcpu(v) ) + /* Idle VCPUs are scheduled immediately. */ + if ( is_idle_domain(d) ) { per_cpu(schedule_data, v->processor).curr = v; per_cpu(schedule_data, v->processor).idle = v; @@ -211,6 +228,9 @@ int vcpu_set_affinity(struct vcpu *v, cp { cpumask_t online_affinity; unsigned long flags; + + if ( (v->domain->domain_id == 0) && opt_dom0_vcpus_pin ) + return -EINVAL; cpus_and(online_affinity, *affinity, cpu_online_map); if ( cpus_empty(online_affinity) ) diff -r 9da2d9b48ff8 -r f34e37d0742d xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/include/asm-x86/hvm/hvm.h Tue Sep 26 19:11:33 2006 -0600 @@ -51,15 +51,13 @@ struct hvm_function_table { * Examine specifics of the guest state: * 1) determine whether the guest is in real or vm8086 mode, * 2) determine whether paging is enabled, - * 3) return the length of the instruction that caused an exit. - * 4) return the current guest control-register value + * 3) return the current guest control-register value */ int (*realmode)(struct vcpu *v); int (*paging_enabled)(struct vcpu *v); int (*long_mode_enabled)(struct vcpu *v); int (*pae_enabled)(struct vcpu *v); int (*guest_x86_mode)(struct vcpu *v); - int (*instruction_length)(struct vcpu *v); unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num); /* @@ -159,11 +157,7 @@ hvm_guest_x86_mode(struct vcpu *v) return hvm_funcs.guest_x86_mode(v); } -static inline int -hvm_instruction_length(struct vcpu *v) -{ - return hvm_funcs.instruction_length(v); -} +int hvm_instruction_length(struct cpu_user_regs *regs, int mode); static inline void hvm_update_host_cr3(struct vcpu *v) @@ -182,9 +176,9 @@ hvm_get_guest_ctrl_reg(struct vcpu *v, u return 0; /* force to fail */ } -extern void hvm_stts(struct vcpu *v); -extern void hvm_set_guest_time(struct vcpu *v, u64 gtime); -extern void hvm_do_resume(struct vcpu *v); +void hvm_stts(struct vcpu *v); +void hvm_set_guest_time(struct vcpu *v, u64 gtime); +void hvm_do_resume(struct vcpu *v); static inline void hvm_init_ap_context(struct vcpu_guest_context *ctxt, @@ -193,6 +187,6 @@ hvm_init_ap_context(struct vcpu_guest_co return hvm_funcs.init_ap_context(ctxt, vcpuid, trampoline_vector); } -extern int hvm_bringup_ap(int vcpuid, int trampoline_vector); +int hvm_bringup_ap(int vcpuid, int trampoline_vector); #endif /* __ASM_X86_HVM_HVM_H__ */ diff -r 9da2d9b48ff8 -r f34e37d0742d xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Sep 26 19:11:33 2006 -0600 @@ -132,12 +132,16 @@ extern int vmcs_version; #define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000 #define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_PAUSE_EXITING 0x40000000 -#define PIN_BASED_EXT_INTR_MASK 0x1 -#define PIN_BASED_NMI_EXITING 0x8 - + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 + +#define VM_EXIT_IA32E_MODE 0x00000200 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 -#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200 - + +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 /* VMCS Encordings */ enum vmcs_field { @@ -217,6 +221,7 @@ enum vmcs_field { GUEST_LDTR_AR_BYTES = 0x00004820, GUEST_TR_AR_BYTES = 0x00004822, GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0x00004826, GUEST_SYSENTER_CS = 0x0000482A, HOST_IA32_SYSENTER_CS = 0x00004c00, CR0_GUEST_HOST_MASK = 0x00006000, diff -r 9da2d9b48ff8 -r f34e37d0742d xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue Sep 26 19:11:33 2006 -0600 @@ -35,73 +35,6 @@ extern void set_guest_time(struct vcpu * extern void set_guest_time(struct vcpu *v, u64 gtime); extern unsigned int cpu_rev; - -/* - * Need fill bits for SENTER - */ - -#define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x00000016 - -#define MONITOR_PIN_BASED_EXEC_CONTROLS \ - ( \ - MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE | \ - PIN_BASED_EXT_INTR_MASK | \ - PIN_BASED_NMI_EXITING \ - ) - -#define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x0401e172 - -#define _MONITOR_CPU_BASED_EXEC_CONTROLS \ - ( \ - MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE | \ - CPU_BASED_HLT_EXITING | \ - CPU_BASED_INVDPG_EXITING | \ - CPU_BASED_MWAIT_EXITING | \ - CPU_BASED_MOV_DR_EXITING | \ - CPU_BASED_ACTIVATE_IO_BITMAP | \ - CPU_BASED_USE_TSC_OFFSETING \ - ) - -#define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ - ( \ - CPU_BASED_CR8_LOAD_EXITING | \ - CPU_BASED_CR8_STORE_EXITING \ - ) - -#define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE 0x0003edff - -#define MONITOR_VM_EXIT_CONTROLS_IA32E_MODE 0x00000200 - -#define _MONITOR_VM_EXIT_CONTROLS \ - ( \ - MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE | \ - VM_EXIT_ACK_INTR_ON_EXIT \ - ) - -#if defined (__x86_64__) -#define MONITOR_CPU_BASED_EXEC_CONTROLS \ - ( \ - _MONITOR_CPU_BASED_EXEC_CONTROLS | \ - MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \ - ) -#define MONITOR_VM_EXIT_CONTROLS \ - ( \ - _MONITOR_VM_EXIT_CONTROLS | \ - MONITOR_VM_EXIT_CONTROLS_IA32E_MODE \ - ) -#else -#define MONITOR_CPU_BASED_EXEC_CONTROLS \ - _MONITOR_CPU_BASED_EXEC_CONTROLS - -#define MONITOR_VM_EXIT_CONTROLS \ - _MONITOR_VM_EXIT_CONTROLS -#endif - -#define VM_ENTRY_CONTROLS_RESERVED_VALUE 0x000011ff -#define VM_ENTRY_CONTROLS_IA32E_MODE 0x00000200 - -#define MONITOR_VM_ENTRY_CONTROLS \ - VM_ENTRY_CONTROLS_RESERVED_VALUE /* * Exit Reasons @@ -425,36 +358,10 @@ static inline int vmx_pae_enabled(struct } /* Works only for vcpu == current */ -static inline int vmx_realmode(struct vcpu *v) -{ - unsigned long rflags; - ASSERT(v == current); - - __vmread(GUEST_RFLAGS, &rflags); - return rflags & X86_EFLAGS_VM; -} - -/* Works only for vcpu == current */ static inline void vmx_update_host_cr3(struct vcpu *v) { ASSERT(v == current); __vmwrite(HOST_CR3, v->arch.cr3); -} - -static inline int vmx_guest_x86_mode(struct vcpu *v) -{ - unsigned long cs_ar_bytes; - ASSERT(v == current); - - if ( vmx_long_mode_enabled(v) ) - { - __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes); - return (cs_ar_bytes & (1u<<13)) ? 8 : 4; - } - if ( vmx_realmode(v) ) - return 2; - __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes); - return (cs_ar_bytes & (1u<<14)) ? 4 : 2; } static inline int vmx_pgbit_test(struct vcpu *v) diff -r 9da2d9b48ff8 -r f34e37d0742d xen/include/xen/compiler.h --- a/xen/include/xen/compiler.h Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/include/xen/compiler.h Tue Sep 26 19:11:33 2006 -0600 @@ -35,7 +35,7 @@ #define offsetof(a,b) ((unsigned long)&(((a *)0)->b)) #endif -#if defined(__x86_64__) && (__GNUC__ > 3) +#ifdef GCC_HAS_VISIBILITY_ATTRIBUTE /* Results in more efficient PIC code (no indirections through GOT or PLT). */ #pragma GCC visibility push(hidden) #endif diff -r 9da2d9b48ff8 -r f34e37d0742d xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Sep 26 16:15:45 2006 -0600 +++ b/xen/include/xen/sched.h Tue Sep 26 19:11:33 2006 -0600 @@ -280,7 +280,7 @@ void new_thread(struct vcpu *d, #define set_current_state(_s) do { current->state = (_s); } while (0) void scheduler_init(void); void schedulers_start(void); -int sched_init_vcpu(struct vcpu *); +int sched_init_vcpu(struct vcpu *v, unsigned int processor); void sched_destroy_domain(struct domain *); long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *); int sched_id(void); diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/instrlen.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/instrlen.c Tue Sep 26 19:11:33 2006 -0600 @@ -0,0 +1,474 @@ +/* + * instrlen.c - calculates the instruction length for all operating modes + * + * Travis Betak, travis.betak@xxxxxxx + * Copyright (c) 2005,2006 AMD + * Copyright (c) 2005 Keir Fraser + * + * Essentially a very, very stripped version of Keir Fraser's work in + * x86_emulate.c. Used for MMIO. + */ + +/* + * TODO: The way in which we use hvm_instruction_length is very inefficient as + * it now stands. It will be worthwhile to return the actual instruction buffer + * along with the instruction length since one of the reasons we are getting + * the instruction length is to know how many instruction bytes we need to + * fetch. + */ + +#include <xen/config.h> +#include <xen/sched.h> +#include <xen/mm.h> +#include <asm/regs.h> +#include <asm-x86/x86_emulate.h> + +/* read from guest memory */ +extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip, + int length); + +/* + * Opcode effective-address decode tables. + * Note that we only emulate instructions that have at least one memory + * operand (excluding implicit stack references). We assume that stack + * references and instruction fetches will never occur in special memory + * areas that require emulation. So, for example, 'mov <imm>,<reg>' need + * not be handled. + */ + +/* Operand sizes: 8-bit operands or specified/overridden size. */ +#define ByteOp (1<<0) /* 8-bit operands. */ +/* Destination operand type. */ +#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ +#define DstReg (2<<1) /* Register operand. */ +#define DstMem (3<<1) /* Memory operand. */ +#define DstMask (3<<1) +/* Source operand type. */ +#define SrcNone (0<<3) /* No source operand. */ +#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ +#define SrcReg (1<<3) /* Register operand. */ +#define SrcMem (2<<3) /* Memory operand. */ +#define SrcMem16 (3<<3) /* Memory operand (16-bit). */ +#define SrcMem32 (4<<3) /* Memory operand (32-bit). */ +#define SrcImm (5<<3) /* Immediate operand. */ +#define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */ +#define SrcMask (7<<3) +/* Generic ModRM decode. */ +#define ModRM (1<<6) +/* Destination is only written; never read. */ +#define Mov (1<<7) + +static uint8_t opcode_table[256] = { + /* 0x00 - 0x07 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x08 - 0x0F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x10 - 0x17 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x18 - 0x1F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x20 - 0x27 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x28 - 0x2F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x30 - 0x37 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x38 - 0x3F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, 0, + /* 0x40 - 0x4F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x50 - 0x5F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 - 0x6F */ + 0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 - 0x7F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x80 - 0x87 */ + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + /* 0x88 - 0x8F */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, DstMem|SrcNone|ModRM|Mov, + /* 0x90 - 0x9F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xA0 - 0xA7 */ + ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov, + ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps, ImplicitOps, + /* 0xA8 - 0xAF */ + 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps, ImplicitOps, + /* 0xB0 - 0xBF */ + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xC0 - 0xC7 */ + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0, + 0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, + /* 0xC8 - 0xCF */ + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xD0 - 0xD7 */ + ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, + ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, + 0, 0, 0, 0, + /* 0xD8 - 0xDF */ + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xE0 - 0xEF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xF0 - 0xF7 */ + 0, 0, 0, 0, + 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, + /* 0xF8 - 0xFF */ + 0, 0, 0, 0, + 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM +}; + +static uint8_t twobyte_table[256] = { + /* 0x00 - 0x0F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, + /* 0x10 - 0x1F */ + 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, + /* 0x20 - 0x2F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x30 - 0x3F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x40 - 0x47 */ + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + /* 0x48 - 0x4F */ + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + /* 0x50 - 0x5F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 - 0x6F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 - 0x7F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x80 - 0x8F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x90 - 0x9F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xA0 - 0xA7 */ + 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, + /* 0xA8 - 0xAF */ + 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, + /* 0xB0 - 0xB7 */ + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM, + 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, + /* 0xB8 - 0xBF */ + 0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, + 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, + /* 0xC0 - 0xCF */ + 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xD0 - 0xDF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xE0 - 0xEF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xF0 - 0xFF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* + * insn_fetch - fetch the next 1 to 4 bytes from instruction stream + * + * @_type: u8, u16, u32, s8, s16, or s32 + * @_size: 1, 2, or 4 bytes + * @_eip: address to fetch from guest memory + * @_length: increments the current instruction length counter by _size + * + * This is used internally by hvm_instruction_length to fetch the next byte, + * word, or dword from guest memory at location _eip. we currently use a local + * unsigned long as the storage buffer since the most bytes we're gonna get + * is limited to 4. + */ +#define insn_fetch(_type, _size, _eip, _length) \ +({ unsigned long _x; \ + if ((rc = inst_copy_from_guest((unsigned char *)(&(_x)), \ + (unsigned long)(_eip), _size)) \ + != _size) \ + goto done; \ + (_eip) += (_size); \ + (_length) += (_size); \ + (_type)_x; \ +}) + +/** + * hvm_instruction_length - returns the current instructions length + * + * @regs: guest register state + * @mode: guest operating mode + * + * EXTERNAL this routine calculates the length of the current instruction + * pointed to by eip. The guest state is _not_ changed by this routine. + */ +int hvm_instruction_length(struct cpu_user_regs *regs, int mode) +{ + uint8_t b, d, twobyte = 0, rex_prefix = 0; + uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; + unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i; + int rc = 0; + int length = 0; + unsigned int tmp; + + /* Shadow copy of register state. Committed on successful emulation. */ + struct cpu_user_regs _regs = *regs; + + /* include CS for 16-bit modes */ + if (mode == X86EMUL_MODE_REAL || mode == X86EMUL_MODE_PROT16) + _regs.eip += (_regs.cs << 4); + + switch ( mode ) + { + case X86EMUL_MODE_REAL: + case X86EMUL_MODE_PROT16: + op_bytes = ad_bytes = 2; + break; + case X86EMUL_MODE_PROT32: + op_bytes = ad_bytes = 4; + break; +#ifdef __x86_64__ + case X86EMUL_MODE_PROT64: + op_bytes = 4; + ad_bytes = 8; + break; +#endif + default: + return -1; + } + + /* Legacy prefixes. */ + for ( i = 0; i < 8; i++ ) + { + switch ( b = insn_fetch(uint8_t, 1, _regs.eip, length) ) + { + case 0x66: /* operand-size override */ + op_bytes ^= 6; /* switch between 2/4 bytes */ + break; + case 0x67: /* address-size override */ + if ( mode == X86EMUL_MODE_PROT64 ) + ad_bytes ^= 12; /* switch between 4/8 bytes */ + else + ad_bytes ^= 6; /* switch between 2/4 bytes */ + break; + case 0x2e: /* CS override */ + case 0x3e: /* DS override */ + case 0x26: /* ES override */ + case 0x64: /* FS override */ + case 0x65: /* GS override */ + case 0x36: /* SS override */ + break; + case 0xf0: /* LOCK */ + lock_prefix = 1; + break; + case 0xf3: /* REP/REPE/REPZ */ + rep_prefix = 1; + break; + case 0xf2: /* REPNE/REPNZ */ + break; + default: + goto done_prefixes; + } + } +done_prefixes: + + /* Note quite the same as 80386 real mode, but hopefully good enough. */ + if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) ) { + printf("sonofabitch!! we don't support 32-bit addresses in realmode\n"); + goto cannot_emulate; + } + + /* REX prefix. */ + if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) ) + { + rex_prefix = b; + if ( b & 8 ) + op_bytes = 8; /* REX.W */ + modrm_reg = (b & 4) << 1; /* REX.R */ + /* REX.B and REX.X do not need to be decoded. */ + b = insn_fetch(uint8_t, 1, _regs.eip, length); + } + + /* Opcode byte(s). */ + d = opcode_table[b]; + if ( d == 0 ) + { + /* Two-byte opcode? */ + if ( b == 0x0f ) + { + twobyte = 1; + b = insn_fetch(uint8_t, 1, _regs.eip, length); + d = twobyte_table[b]; + } + + /* Unrecognised? */ + if ( d == 0 ) + goto cannot_emulate; + } + + /* ModRM and SIB bytes. */ + if ( d & ModRM ) + { + modrm = insn_fetch(uint8_t, 1, _regs.eip, length); + modrm_mod |= (modrm & 0xc0) >> 6; + modrm_reg |= (modrm & 0x38) >> 3; + modrm_rm |= (modrm & 0x07); + + if ( modrm_mod == 3 ) + { + DPRINTK("Cannot parse ModRM.mod == 3.\n"); + goto cannot_emulate; + } + + if ( ad_bytes == 2 ) + { + /* 16-bit ModR/M decode. */ + switch ( modrm_mod ) + { + case 0: + if ( modrm_rm == 6 ) + { + length += 2; + _regs.eip += 2; /* skip disp16 */ + } + break; + case 1: + length += 1; + _regs.eip += 1; /* skip disp8 */ + break; + case 2: + length += 2; + _regs.eip += 2; /* skip disp16 */ + break; + } + } + else + { + /* 32/64-bit ModR/M decode. */ + switch ( modrm_mod ) + { + case 0: + if ( (modrm_rm == 4) && + (((insn_fetch(uint8_t, 1, _regs.eip, length)) & 7) + == 5) ) + { + length += 4; + _regs.eip += 4; /* skip disp32 specified by SIB.base */ + } + else if ( modrm_rm == 5 ) + { + length += 4; + _regs.eip += 4; /* skip disp32 */ + } + break; + case 1: + if ( modrm_rm == 4 ) + { + insn_fetch(uint8_t, 1, _regs.eip, length); + } + length += 1; + _regs.eip += 1; /* skip disp8 */ + break; + case 2: + if ( modrm_rm == 4 ) + { + insn_fetch(uint8_t, 1, _regs.eip, length); + } + length += 4; + _regs.eip += 4; /* skip disp32 */ + break; + } + } + } + + /* Decode and fetch the destination operand: register or memory. */ + switch ( d & DstMask ) + { + case ImplicitOps: + /* Special instructions do their own operand decoding. */ + goto done; + } + + /* Decode and fetch the source operand: register, memory or immediate. */ + switch ( d & SrcMask ) + { + case SrcImm: + tmp = (d & ByteOp) ? 1 : op_bytes; + if ( tmp == 8 ) tmp = 4; + /* NB. Immediates are sign-extended as necessary. */ + switch ( tmp ) + { + case 1: insn_fetch(int8_t, 1, _regs.eip, length); break; + case 2: insn_fetch(int16_t, 2, _regs.eip, length); break; + case 4: insn_fetch(int32_t, 4, _regs.eip, length); break; + } + break; + case SrcImmByte: + insn_fetch(int8_t, 1, _regs.eip, length); + break; + } + + if ( twobyte ) + goto done; + + switch ( b ) + { + case 0xa0 ... 0xa1: /* mov */ + length += ad_bytes; + _regs.eip += ad_bytes; /* skip src displacement */ + break; + case 0xa2 ... 0xa3: /* mov */ + length += ad_bytes; + _regs.eip += ad_bytes; /* skip dst displacement */ + break; + case 0xf6 ... 0xf7: /* Grp3 */ + switch ( modrm_reg ) + { + case 0 ... 1: /* test */ + /* Special case in Grp3: test has an immediate source operand. */ + tmp = (d & ByteOp) ? 1 : op_bytes; + if ( tmp == 8 ) tmp = 4; + switch ( tmp ) + { + case 1: insn_fetch(int8_t, 1, _regs.eip, length); break; + case 2: insn_fetch(int16_t, 2, _regs.eip, length); break; + case 4: insn_fetch(int32_t, 4, _regs.eip, length); break; + } + goto done; + } + break; + } + +done: + return length; + +cannot_emulate: + DPRINTK("Cannot emulate %02x at address %lx (eip %lx, mode %d)\n", + b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode); + return -1; +} diff -r 9da2d9b48ff8 -r f34e37d0742d xen/arch/x86/hvm/svm/instrlen.c --- a/xen/arch/x86/hvm/svm/instrlen.c Tue Sep 26 16:15:45 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,479 +0,0 @@ -/* - * instrlen.c - calculates the instruction length for all operating modes - * - * Travis Betak, travis.betak@xxxxxxx - * Copyright (c) 2005,2006 AMD - * Copyright (c) 2005 Keir Fraser - * - * Essentially a very, very stripped version of Keir Fraser's work in - * x86_emulate.c. Used for MMIO. - */ - -/* - * TODO: the way in which we use svm_instrlen is very inefficient as is now - * stands. It will be worth while to return the actual instruction buffer - * along with the instruction length since one of the reasons we are getting - * the instruction length is to know how many instruction bytes we need to - * fetch. - */ - -#include <xen/config.h> -#include <xen/types.h> -#include <xen/lib.h> -#include <xen/mm.h> -#include <asm/regs.h> -#define DPRINTF DPRINTK -#include <asm-x86/x86_emulate.h> - -/* read from guest memory */ -extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip, - int length); -extern void svm_dump_inst(unsigned long eip); - -/* - * Opcode effective-address decode tables. - * Note that we only emulate instructions that have at least one memory - * operand (excluding implicit stack references). We assume that stack - * references and instruction fetches will never occur in special memory - * areas that require emulation. So, for example, 'mov <imm>,<reg>' need - * not be handled. - */ - -/* Operand sizes: 8-bit operands or specified/overridden size. */ -#define ByteOp (1<<0) /* 8-bit operands. */ -/* Destination operand type. */ -#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ -#define DstReg (2<<1) /* Register operand. */ -#define DstMem (3<<1) /* Memory operand. */ -#define DstMask (3<<1) -/* Source operand type. */ -#define SrcNone (0<<3) /* No source operand. */ -#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ -#define SrcReg (1<<3) /* Register operand. */ -#define SrcMem (2<<3) /* Memory operand. */ -#define SrcMem16 (3<<3) /* Memory operand (16-bit). */ -#define SrcMem32 (4<<3) /* Memory operand (32-bit). */ -#define SrcImm (5<<3) /* Immediate operand. */ -#define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */ -#define SrcMask (7<<3) -/* Generic ModRM decode. */ -#define ModRM (1<<6) -/* Destination is only written; never read. */ -#define Mov (1<<7) - -static uint8_t opcode_table[256] = { - /* 0x00 - 0x07 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x08 - 0x0F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x10 - 0x17 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x18 - 0x1F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x20 - 0x27 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x28 - 0x2F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x30 - 0x37 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x38 - 0x3F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, - /* 0x40 - 0x4F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x50 - 0x5F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x60 - 0x6F */ - 0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x80 - 0x87 */ - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - /* 0x88 - 0x8F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, DstMem|SrcNone|ModRM|Mov, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xA0 - 0xA7 */ - ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov, - ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps, ImplicitOps, - /* 0xA8 - 0xAF */ - 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, - ByteOp|ImplicitOps, ImplicitOps, - /* 0xB0 - 0xBF */ - SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, - SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xC0 - 0xC7 */ - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0, - 0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, - /* 0xC8 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xD0 - 0xD7 */ - ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - 0, 0, 0, 0, - /* 0xD8 - 0xDF */ - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xF0 - 0xF7 */ - 0, 0, 0, 0, - 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, - /* 0xF8 - 0xFF */ - 0, 0, 0, 0, - 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM -}; - -static uint8_t twobyte_table[256] = { - /* 0x00 - 0x0F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, - /* 0x10 - 0x1F */ - 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, - /* 0x20 - 0x2F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x30 - 0x3F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x40 - 0x47 */ - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - /* 0x48 - 0x4F */ - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - /* 0x50 - 0x5F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x60 - 0x6F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x80 - 0x8F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xA0 - 0xA7 */ - 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, - /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, - /* 0xB0 - 0xB7 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM, - 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, - /* 0xB8 - 0xBF */ - 0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, - 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, - /* 0xC0 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xD0 - 0xDF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xF0 - 0xFF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* - * insn_fetch - fetch the next 1 to 4 bytes from instruction stream - * - * @_type: u8, u16, u32, s8, s16, or s32 - * @_size: 1, 2, or 4 bytes - * @_eip: address to fetch from guest memory - * @_length: updated! increments the current instruction length counter by _size - * - * INTERNAL this is used internally by svm_instrlen to fetch the next byte, - * word, or dword from guest memory at location _eip. we currently use a local - * unsigned long as the storage buffer since the most bytes we're gonna get - * is limited to 4. - */ -#define insn_fetch(_type, _size, _eip, _length) \ -({ unsigned long _x; \ - if ((rc = inst_copy_from_guest((unsigned char *)(&(_x)), \ - (unsigned long)(_eip), _size)) \ - != _size) \ - goto done; \ - (_eip) += (_size); \ - (_length) += (_size); \ - (_type)_x; \ -}) - - -/** - * svn_instrlen - returns the current instructions length - * - * @regs: guest register state - * @mode: guest operating mode - * - * EXTERNAL this routine calculates the length of the current instruction - * pointed to by eip. The guest state is _not_ changed by this routine. - */ -int svm_instrlen(struct cpu_user_regs *regs, int mode) -{ - uint8_t b, d, twobyte = 0, rex_prefix = 0; - uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; - unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i; - int rc = 0; - int length = 0; - unsigned int tmp; - - /* Shadow copy of register state. Committed on successful emulation. */ - struct cpu_user_regs _regs = *regs; - - /* include CS for 16-bit modes */ - if (mode == X86EMUL_MODE_REAL || mode == X86EMUL_MODE_PROT16) - _regs.eip += (_regs.cs << 4); - - switch ( mode ) - { - case X86EMUL_MODE_REAL: - case X86EMUL_MODE_PROT16: - op_bytes = ad_bytes = 2; - break; - case X86EMUL_MODE_PROT32: - op_bytes = ad_bytes = 4; - break; -#ifdef __x86_64__ - case X86EMUL_MODE_PROT64: - op_bytes = 4; - ad_bytes = 8; - break; -#endif - default: - return -1; - } - - /* Legacy prefixes. */ - for ( i = 0; i < 8; i++ ) - { - switch ( b = insn_fetch(uint8_t, 1, _regs.eip, length) ) - { - case 0x66: /* operand-size override */ - op_bytes ^= 6; /* switch between 2/4 bytes */ - break; - case 0x67: /* address-size override */ - if ( mode == X86EMUL_MODE_PROT64 ) - ad_bytes ^= 12; /* switch between 4/8 bytes */ - else - ad_bytes ^= 6; /* switch between 2/4 bytes */ - break; - case 0x2e: /* CS override */ - case 0x3e: /* DS override */ - case 0x26: /* ES override */ - case 0x64: /* FS override */ - case 0x65: /* GS override */ - case 0x36: /* SS override */ - break; - case 0xf0: /* LOCK */ - lock_prefix = 1; - break; - case 0xf3: /* REP/REPE/REPZ */ - rep_prefix = 1; - break; - case 0xf2: /* REPNE/REPNZ */ - break; - default: - goto done_prefixes; - } - } -done_prefixes: - - /* Note quite the same as 80386 real mode, but hopefully good enough. */ - if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) ) { - printf("sonofabitch!! we don't support 32-bit addresses in realmode\n"); - goto cannot_emulate; - } - - /* REX prefix. */ - if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) ) - { - rex_prefix = b; - if ( b & 8 ) - op_bytes = 8; /* REX.W */ - modrm_reg = (b & 4) << 1; /* REX.R */ - /* REX.B and REX.X do not need to be decoded. */ - b = insn_fetch(uint8_t, 1, _regs.eip, length); - } - - /* Opcode byte(s). */ - d = opcode_table[b]; - if ( d == 0 ) - { - /* Two-byte opcode? */ - if ( b == 0x0f ) - { - twobyte = 1; - b = insn_fetch(uint8_t, 1, _regs.eip, length); - d = twobyte_table[b]; - } - - /* Unrecognised? */ - if ( d == 0 ) - goto cannot_emulate; - } - - /* ModRM and SIB bytes. */ - if ( d & ModRM ) - { - modrm = insn_fetch(uint8_t, 1, _regs.eip, length); - modrm_mod |= (modrm & 0xc0) >> 6; - modrm_reg |= (modrm & 0x38) >> 3; - modrm_rm |= (modrm & 0x07); - - if ( modrm_mod == 3 ) - { - DPRINTF("Cannot parse ModRM.mod == 3.\n"); - goto cannot_emulate; - } - - if ( ad_bytes == 2 ) - { - /* 16-bit ModR/M decode. */ - switch ( modrm_mod ) - { - case 0: - if ( modrm_rm == 6 ) - { - length += 2; - _regs.eip += 2; /* skip disp16 */ - } - break; - case 1: - length += 1; - _regs.eip += 1; /* skip disp8 */ - break; - case 2: - length += 2; - _regs.eip += 2; /* skip disp16 */ - break; - } - } - else - { - /* 32/64-bit ModR/M decode. */ - switch ( modrm_mod ) - { - case 0: - if ( (modrm_rm == 4) && - (((insn_fetch(uint8_t, 1, _regs.eip, length)) & 7) - == 5) ) - { - length += 4; - _regs.eip += 4; /* skip disp32 specified by SIB.base */ - } - else if ( modrm_rm == 5 ) - { - length += 4; - _regs.eip += 4; /* skip disp32 */ - } - break; - case 1: - if ( modrm_rm == 4 ) - { - insn_fetch(uint8_t, 1, _regs.eip, length); - } - length += 1; - _regs.eip += 1; /* skip disp8 */ - break; - case 2: - if ( modrm_rm == 4 ) - { - insn_fetch(uint8_t, 1, _regs.eip, length); - } - length += 4; - _regs.eip += 4; /* skip disp32 */ - break; - } - } - } - - /* Decode and fetch the destination operand: register or memory. */ - switch ( d & DstMask ) - { - case ImplicitOps: - /* Special instructions do their own operand decoding. */ - goto done; - } - - /* Decode and fetch the source operand: register, memory or immediate. */ - switch ( d & SrcMask ) - { - case SrcImm: - tmp = (d & ByteOp) ? 1 : op_bytes; - if ( tmp == 8 ) tmp = 4; - /* NB. Immediates are sign-extended as necessary. */ - switch ( tmp ) - { - case 1: insn_fetch(int8_t, 1, _regs.eip, length); break; - case 2: insn_fetch(int16_t, 2, _regs.eip, length); break; - case 4: insn_fetch(int32_t, 4, _regs.eip, length); break; - } - break; - case SrcImmByte: - insn_fetch(int8_t, 1, _regs.eip, length); - break; - } - - if ( twobyte ) - goto done; - - switch ( b ) - { - case 0xa0 ... 0xa1: /* mov */ - length += ad_bytes; - _regs.eip += ad_bytes; /* skip src displacement */ - break; - case 0xa2 ... 0xa3: /* mov */ - length += ad_bytes; - _regs.eip += ad_bytes; /* skip dst displacement */ - break; - case 0xf6 ... 0xf7: /* Grp3 */ - switch ( modrm_reg ) - { - case 0 ... 1: /* test */ - /* Special case in Grp3: test has an immediate source operand. */ - tmp = (d & ByteOp) ? 1 : op_bytes; - if ( tmp == 8 ) tmp = 4; - switch ( tmp ) - { - case 1: insn_fetch(int8_t, 1, _regs.eip, length); break; - case 2: insn_fetch(int16_t, 2, _regs.eip, length); break; - case 4: insn_fetch(int32_t, 4, _regs.eip, length); break; - } - goto done; - } - break; - } - -done: - return length; - -cannot_emulate: - DPRINTF("Cannot emulate %02x at address %lx (eip %lx, mode %d)\n", - b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode); - svm_dump_inst(_regs.eip); - return -1; -} _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |