[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1212374139 -32400 # Node ID f1508348ffabedf2eab0b666a5b8e2c9515e52d7 # Parent d2a239224cb23f5a7a8059e4167c50bd3abeb9df # Parent 85fa199b4b7bd1e7511ce7fc2361bae808c27ba6 merge with xen-unstable.hg --- extras/mini-os/main-caml.c | 42 -- tools/examples/xend-config-xenapi.sxp | 196 --------- tools/examples/xm-config-xenapi.xml | 41 -- extras/mini-os/Config.mk | 13 extras/mini-os/Makefile | 35 - extras/mini-os/arch/ia64/minios-ia64.lds | 17 extras/mini-os/arch/ia64/mm.c | 8 extras/mini-os/arch/x86/minios-x86_32.lds | 18 extras/mini-os/arch/x86/minios-x86_64.lds | 18 extras/mini-os/arch/x86/mm.c | 17 extras/mini-os/hypervisor.c | 4 extras/mini-os/include/lib.h | 1 extras/mini-os/include/mm.h | 3 extras/mini-os/include/posix/pthread.h | 52 ++ extras/mini-os/include/x86/arch_mm.h | 5 extras/mini-os/lib/sys.c | 67 +-- extras/mini-os/lib/xmalloc.c | 2 extras/mini-os/main.c | 12 extras/mini-os/mm.c | 38 - extras/mini-os/sched.c | 2 stubdom/Makefile | 33 - stubdom/c/Makefile | 7 stubdom/c/main.c | 2 stubdom/caml/Makefile | 10 stubdom/caml/main-caml.c | 42 ++ tools/examples/Makefile | 2 tools/examples/xend-config.sxp | 16 tools/firmware/hvmloader/util.c | 12 tools/firmware/rombios/rombios.c | 26 - tools/ioemu/Makefile.target | 7 tools/ioemu/hw/cirrus_vga.c | 2 tools/ioemu/vl.c | 5 tools/ioemu/vl.h | 2 tools/ioemu/xenstore.c | 2 tools/libxc/Makefile | 8 tools/libxc/xc_core.c | 18 tools/libxc/xc_domain.c | 31 + tools/libxc/xc_minios.c | 17 tools/libxc/xenctrl.h | 7 tools/libxc/xg_private.c | 16 tools/python/xen/lowlevel/xc/xc.c | 230 +++++++++-- tools/python/xen/xend/XendAPI.py | 6 tools/python/xen/xend/XendDomain.py | 31 - tools/python/xen/xend/XendDomainInfo.py | 22 - tools/python/xen/xend/XendOptions.py | 27 + tools/python/xen/xend/server/SrvDomain.py | 13 tools/python/xen/xend/server/pciif.py | 52 ++ tools/python/xen/xend/server/relocate.py | 24 - tools/python/xen/xm/migrate.py | 10 tools/xenstat/libxenstat/src/xenstat.c | 18 unmodified_drivers/linux-2.6/platform-pci/evtchn.c | 2 xen/arch/x86/acpi/cpu_idle.c | 2 xen/arch/x86/acpi/power.c | 14 xen/arch/x86/cpu/amd.c | 8 xen/arch/x86/crash.c | 1 xen/arch/x86/domain.c | 30 - xen/arch/x86/domctl.c | 63 ++- xen/arch/x86/hvm/hpet.c | 18 xen/arch/x86/hvm/hvm.c | 158 ++++--- xen/arch/x86/hvm/i8254.c | 26 - xen/arch/x86/hvm/pmtimer.c | 2 xen/arch/x86/hvm/svm/svm.c | 4 xen/arch/x86/hvm/vlapic.c | 24 - xen/arch/x86/hvm/vmx/vmx.c | 4 xen/arch/x86/hvm/vpt.c | 35 + xen/arch/x86/mm.c | 14 xen/arch/x86/mm/hap/p2m-ept.c | 6 xen/arch/x86/mm/p2m.c | 21 - xen/arch/x86/mm/shadow/common.c | 119 +++--- xen/arch/x86/msi.c | 7 xen/arch/x86/setup.c | 8 xen/arch/x86/smpboot.c | 5 xen/arch/x86/tboot.c | 12 xen/arch/x86/x86_emulate/x86_emulate.c | 8 xen/common/domain.c | 4 xen/common/grant_table.c | 57 ++ xen/common/libelf/libelf-private.h | 2 xen/common/memory.c | 17 xen/drivers/passthrough/amd/pci_amd_iommu.c | 11 xen/drivers/passthrough/iommu.c | 108 +++++ xen/drivers/passthrough/vtd/dmar.c | 33 - xen/drivers/passthrough/vtd/dmar.h | 1 xen/drivers/passthrough/vtd/extern.h | 3 xen/drivers/passthrough/vtd/intremap.c | 318 ++++++++++++---- xen/drivers/passthrough/vtd/iommu.c | 416 ++++++++++----------- xen/drivers/passthrough/vtd/iommu.h | 1 xen/drivers/passthrough/vtd/utils.c | 178 ++++---- xen/drivers/passthrough/vtd/vtd.h | 22 + xen/drivers/passthrough/vtd/x86/vtd.c | 184 --------- xen/include/asm-x86/hvm/hvm.h | 6 xen/include/asm-x86/hvm/vcpu.h | 3 xen/include/asm-x86/hvm/vmx/vmx.h | 1 xen/include/asm-x86/hvm/vpt.h | 7 xen/include/asm-x86/tboot.h | 15 xen/include/public/domctl.h | 11 xen/include/xen/elfcore.h | 1 xen/include/xen/hvm/iommu.h | 4 xen/include/xen/iommu.h | 14 xen/include/xen/sched.h | 3 xen/include/xen/time.h | 1 100 files changed, 1874 insertions(+), 1427 deletions(-) diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/Config.mk --- a/extras/mini-os/Config.mk Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/Config.mk Mon Jun 02 11:35:39 2008 +0900 @@ -41,10 +41,7 @@ extra_incl := $(foreach dir,$(EXTRA_INC) extra_incl := $(foreach dir,$(EXTRA_INC),-I$(MINI-OS_ROOT)/include/$(dir)) DEF_CPPFLAGS += -I$(MINI-OS_ROOT)/include - -ifeq ($(stubdom),y) -DEF_CPPFLAGS += -DCONFIG_STUBDOM -endif +DEF_CPPFLAGS += -D__MINIOS__ ifeq ($(libc),y) DEF_CPPFLAGS += -DHAVE_LIBC @@ -58,11 +55,3 @@ DEF_CPPFLAGS += -I$(LWIPDIR)/src/include DEF_CPPFLAGS += -I$(LWIPDIR)/src/include DEF_CPPFLAGS += -I$(LWIPDIR)/src/include/ipv4 endif - -ifneq ($(QEMUDIR),) -qemu=y -endif - -ifneq ($(CAMLDIR),) -caml=y -endif diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/Makefile --- a/extras/mini-os/Makefile Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/Makefile Mon Jun 02 11:35:39 2008 +0900 @@ -73,44 +73,25 @@ OBJS += lwip.a OBJS += lwip.a endif -OBJS := $(filter-out lwip%.o $(LWO), $(OBJS)) - -ifeq ($(caml),y) -CAMLLIB = $(shell ocamlc -where) -APP_OBJS += main-caml.o -APP_OBJS += $(CAMLDIR)/caml.o -APP_OBJS += $(CAMLLIB)/libasmrun.a -CFLAGS += -I$(CAMLLIB) -APP_LDLIBS += -lm -endif -OBJS := $(filter-out main-caml.o, $(OBJS)) - -ifeq ($(qemu),y) -APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a -CFLAGS += -DCONFIG_QEMU -endif - -ifneq ($(CDIR),) -APP_OBJS += $(CDIR)/main.a -APP_LDLIBS += -endif +OBJS := $(filter-out main.o lwip%.o $(LWO), $(OBJS)) ifeq ($(libc),y) -LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -whole-archive -lxenguest -lxenctrl -no-whole-archive APP_LDLIBS += -lpci APP_LDLIBS += -lz +APP_LDLIBS += -lm LDLIBS += -lc endif -ifneq ($(caml)-$(qemu)-$(CDIR)-$(lwip),---y) +ifneq ($(APP_OBJS)-$(lwip),-y) OBJS := $(filter-out daytime.o, $(OBJS)) endif -app.o: $(APP_OBJS) app.lds - $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@ +$(TARGET)_app.o: $(APP_OBJS) app.lds + $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined app_main -o $@ -$(TARGET): links $(OBJS) app.o arch_lib - $(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o +$(TARGET): links $(OBJS) $(TARGET)_app.o arch_lib + $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(TARGET)_app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@ gzip -f -9 -c $@ >$@.gz diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/ia64/minios-ia64.lds --- a/extras/mini-os/arch/ia64/minios-ia64.lds Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/arch/ia64/minios-ia64.lds Mon Jun 02 11:35:39 2008 +0900 @@ -52,6 +52,23 @@ SECTIONS .fini_array : { *(.fini_array) } PROVIDE (__fini_array_end = .); + .ctors : { + __CTOR_LIST__ = .; + QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2) + *(SORT_BY_NAME(.ctors)) + SORT_BY_NAME(CONSTRUCTORS) + QUAD(0) + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2) + *(SORT_BY_NAME(.dtors)) + QUAD(0) + __DTOR_END__ = .; + } + .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - (((5<<(61))+0x100000000) - (1 << 20))) { *(.IA_64.unwind_info) } diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/ia64/mm.c --- a/extras/mini-os/arch/ia64/mm.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/arch/ia64/mm.c Mon Jun 02 11:35:39 2008 +0900 @@ -131,6 +131,14 @@ arch_init_demand_mapping_area(unsigned l } /* Helper function used in gnttab.c. */ +void do_map_frames(unsigned long addr, + unsigned long *f, unsigned long n, unsigned long stride, + unsigned long increment, domid_t id, int may_fail, unsigned long prot) +{ + /* TODO */ + ASSERT(0); +} + void* map_frames_ex(unsigned long* frames, unsigned long n, unsigned long stride, unsigned long increment, unsigned long alignment, domid_t id, diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/minios-x86_32.lds --- a/extras/mini-os/arch/x86/minios-x86_32.lds Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/arch/x86/minios-x86_32.lds Mon Jun 02 11:35:39 2008 +0900 @@ -28,9 +28,25 @@ SECTIONS .fini_array : { *(.fini_array) } PROVIDE (__fini_array_end = .); + .ctors : { + __CTOR_LIST__ = .; + LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 2) + *(SORT_BY_NAME(.ctors)) + SORT_BY_NAME(CONSTRUCTORS) + LONG(0) + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + LONG((__DTOR_END__ - __DTOR_LIST__) / 4 - 2) + *(SORT_BY_NAME(.dtors)) + LONG(0) + __DTOR_END__ = .; + } + .data : { /* Data */ *(.data) - CONSTRUCTORS } _edata = .; /* End of data section */ diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/minios-x86_64.lds --- a/extras/mini-os/arch/x86/minios-x86_64.lds Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/arch/x86/minios-x86_64.lds Mon Jun 02 11:35:39 2008 +0900 @@ -28,9 +28,25 @@ SECTIONS .fini_array : { *(.fini_array) } PROVIDE (__fini_array_end = .); + .ctors : { + __CTOR_LIST__ = .; + QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2) + *(SORT_BY_NAME(.ctors)) + SORT_BY_NAME(CONSTRUCTORS) + QUAD(0) + __CTOR_END__ = .; + } + + .dtors : { + __DTOR_LIST__ = .; + QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2) + *(SORT_BY_NAME(.dtors)) + QUAD(0) + __DTOR_END__ = .; + } + .data : { /* Data */ *(.data) - CONSTRUCTORS } _edata = .; /* End of data section */ diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/mm.c --- a/extras/mini-os/arch/x86/mm.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/arch/x86/mm.c Mon Jun 02 11:35:39 2008 +0900 @@ -59,11 +59,10 @@ void new_pt_frame(unsigned long *pt_pfn, { pgentry_t *tab = (pgentry_t *)start_info.pt_base; unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); - unsigned long prot_e, prot_t, pincmd; + unsigned long prot_e, prot_t; mmu_update_t mmu_updates[1]; - struct mmuext_op pin_request; - prot_e = prot_t = pincmd = 0; + prot_e = prot_t = 0; DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, " "prev_l_mfn=%lx, offset=%lx", level, *pt_pfn, prev_l_mfn, offset); @@ -77,18 +76,15 @@ void new_pt_frame(unsigned long *pt_pfn, case L1_FRAME: prot_e = L1_PROT; prot_t = L2_PROT; - pincmd = MMUEXT_PIN_L1_TABLE; break; case L2_FRAME: prot_e = L2_PROT; prot_t = L3_PROT; - pincmd = MMUEXT_PIN_L2_TABLE; break; #if defined(__x86_64__) case L3_FRAME: prot_e = L3_PROT; prot_t = L4_PROT; - pincmd = MMUEXT_PIN_L3_TABLE; break; #endif default: @@ -113,15 +109,6 @@ void new_pt_frame(unsigned long *pt_pfn, do_exit(); } - /* Pin the page to provide correct protection */ - pin_request.cmd = pincmd; - pin_request.arg1.mfn = pfn_to_mfn(*pt_pfn); - if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0) - { - printk("ERROR: pinning failed\n"); - do_exit(); - } - /* Now fill the new page table page with entries. Update the page directory as well. */ mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/hypervisor.c --- a/extras/mini-os/hypervisor.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/hypervisor.c Mon Jun 02 11:35:39 2008 +0900 @@ -55,12 +55,12 @@ void do_hypervisor_callback(struct pt_re while ( l1 != 0 ) { l1i = __ffs(l1); - l1 &= ~(1 << l1i); + l1 &= ~(1UL << l1i); while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 ) { l2i = __ffs(l2); - l2 &= ~(1 << l2i); + l2 &= ~(1UL << l2i); port = (l1i * (sizeof(unsigned long) * 8)) + l2i; do_event(port, regs); diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/lib.h --- a/extras/mini-os/include/lib.h Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/include/lib.h Mon Jun 02 11:35:39 2008 +0900 @@ -136,6 +136,7 @@ enum fd_type { FTYPE_CONSOLE, FTYPE_FILE, FTYPE_XENBUS, + FTYPE_XC, FTYPE_EVTCHN, FTYPE_SOCKET, FTYPE_TAP, diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/mm.h --- a/extras/mini-os/include/mm.h Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/include/mm.h Mon Jun 02 11:35:39 2008 +0900 @@ -67,6 +67,9 @@ void *map_frames_ex(unsigned long *f, un void *map_frames_ex(unsigned long *f, unsigned long n, unsigned long stride, unsigned long increment, unsigned long alignment, domid_t id, int may_fail, unsigned long prot); +void do_map_frames(unsigned long addr, + unsigned long *f, unsigned long n, unsigned long stride, + unsigned long increment, domid_t id, int may_fail, unsigned long prot); #ifdef HAVE_LIBC extern unsigned long heap, brk, heap_mapped, heap_end; #endif diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/posix/pthread.h --- a/extras/mini-os/include/posix/pthread.h Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/include/posix/pthread.h Mon Jun 02 11:35:39 2008 +0900 @@ -1,18 +1,56 @@ #ifndef _POSIX_PTHREAD_H #define _POSIX_PTHREAD_H +#include <stdlib.h> + /* Let's be single-threaded for now. */ -typedef void *pthread_key_t; -typedef struct {} pthread_mutex_t, pthread_once_t; +typedef struct { + void *ptr; +} *pthread_key_t; +static inline int pthread_key_create(pthread_key_t *key, void (*destr_function)(void*)) +{ + *key = malloc(sizeof(**key)); + (*key)->ptr = NULL; + return 0; +} +static inline int pthread_setspecific(pthread_key_t key, const void *pointer) +{ + key->ptr = (void*) pointer; + return 0; +} +static inline void *pthread_getspecific(pthread_key_t key) +{ + return key->ptr; +} +static inline int pthread_key_delete(pthread_key_t key) +{ + free(key); + return 0; +} + + + +typedef struct {} pthread_mutex_t; #define PTHREAD_MUTEX_INITIALIZER {} -#define PTHREAD_ONCE_INIT {} static inline int pthread_mutex_lock(pthread_mutex_t *mutex) { return 0; } static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; } -static inline int pthread_key_create(pthread_key_t *key, void (*destr_function)(void*)) { *key = NULL; return 0; } -static inline int pthread_setspecific(pthread_key_t *key, const void *pointer) { *key = (void*) pointer; return 0; } -static inline void *pthread_getspecific(pthread_key_t *key) { return *key; } -static inline int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) { init_routine(); return 0; } + + + +typedef struct { + int done; +} pthread_once_t; +#define PTHREAD_ONCE_INIT { 0 } + +static inline int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) +{ + if (!once_control->done) { + once_control->done = 1; + init_routine(); + } + return 0; +} #define __thread diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/x86/arch_mm.h --- a/extras/mini-os/include/x86/arch_mm.h Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/include/x86/arch_mm.h Mon Jun 02 11:35:39 2008 +0900 @@ -219,11 +219,6 @@ static __inline__ paddr_t machine_to_phy #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT) #define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, L1_PROT_RO) -#ifndef __ASSEMBLY__ -void do_map_frames(unsigned long addr, - unsigned long *f, unsigned long n, unsigned long stride, - unsigned long increment, domid_t id, int may_fail, unsigned long prot); -#endif #define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, 0, L1_PROT_RO) #endif /* _ARCH_MM_H_ */ diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/lib/sys.c --- a/extras/mini-os/lib/sys.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/lib/sys.c Mon Jun 02 11:35:39 2008 +0900 @@ -81,6 +81,7 @@ #define NOFILE 32 extern int xc_evtchn_close(int fd); +extern int xc_interface_close(int fd); pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER; struct file files[NOFILE] = { @@ -259,10 +260,7 @@ int read(int fd, void *buf, size_t nbyte } return ret * sizeof(union xenfb_in_event); } - case FTYPE_NONE: - case FTYPE_XENBUS: - case FTYPE_EVTCHN: - case FTYPE_BLK: + default: break; } printk("read(%d): Bad descriptor\n", fd); @@ -295,12 +293,7 @@ int write(int fd, const void *buf, size_ case FTYPE_TAP: netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes); return nbytes; - case FTYPE_NONE: - case FTYPE_XENBUS: - case FTYPE_EVTCHN: - case FTYPE_BLK: - case FTYPE_KBD: - case FTYPE_FB: + default: break; } printk("write(%d): Bad descriptor\n", fd); @@ -351,15 +344,7 @@ int fsync(int fd) { } return 0; } - case FTYPE_NONE: - case FTYPE_CONSOLE: - case FTYPE_SOCKET: - case FTYPE_XENBUS: - case FTYPE_EVTCHN: - case FTYPE_TAP: - case FTYPE_BLK: - case FTYPE_KBD: - case FTYPE_FB: + default: break; } printk("fsync(%d): Bad descriptor\n", fd); @@ -391,6 +376,9 @@ int close(int fd) files[fd].type = FTYPE_NONE; return res; } + case FTYPE_XC: + xc_interface_close(fd); + return 0; case FTYPE_EVTCHN: xc_evtchn_close(fd); return 0; @@ -495,13 +483,7 @@ int fstat(int fd, struct stat *buf) stat_from_fs(buf, &stat); return 0; } - case FTYPE_NONE: - case FTYPE_XENBUS: - case FTYPE_EVTCHN: - case FTYPE_TAP: - case FTYPE_BLK: - case FTYPE_KBD: - case FTYPE_FB: + default: break; } @@ -522,15 +504,7 @@ int ftruncate(int fd, off_t length) } return 0; } - case FTYPE_NONE: - case FTYPE_CONSOLE: - case FTYPE_SOCKET: - case FTYPE_XENBUS: - case FTYPE_EVTCHN: - case FTYPE_TAP: - case FTYPE_BLK: - case FTYPE_KBD: - case FTYPE_FB: + default: break; } @@ -636,9 +610,10 @@ static const char file_types[] = { [FTYPE_NONE] = 'N', [FTYPE_CONSOLE] = 'C', [FTYPE_FILE] = 'F', - [FTYPE_XENBUS] = 'X', + [FTYPE_XENBUS] = 'S', + [FTYPE_XC] = 'X', [FTYPE_EVTCHN] = 'E', - [FTYPE_SOCKET] = 'S', + [FTYPE_SOCKET] = 's', [FTYPE_TAP] = 'T', [FTYPE_BLK] = 'B', [FTYPE_KBD] = 'K', @@ -722,7 +697,7 @@ static int select_poll(int nfds, fd_set /* Then see others as well. */ for (i = 0; i < nfds; i++) { switch(files[i].type) { - case FTYPE_NONE: + default: if (FD_ISSET(i, readfds) || FD_ISSET(i, writefds) || FD_ISSET(i, exceptfds)) printk("bogus fd %d in select\n", i); /* Fallthrough. */ @@ -1083,14 +1058,20 @@ int clock_gettime(clockid_t clk_id, stru void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { + unsigned long n = (length + PAGE_SIZE - 1) / PAGE_SIZE; + ASSERT(!start); - length = (length + PAGE_SIZE - 1) & PAGE_MASK; ASSERT(prot == (PROT_READ|PROT_WRITE)); - ASSERT(flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON)); - ASSERT(fd == -1); + ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON))) + || (fd != -1 && flags == MAP_SHARED)); ASSERT(offset == 0); - return map_zero(length / PAGE_SIZE, 1); + if (fd == -1) + return map_zero(n, 1); + else if (files[fd].type == FTYPE_XC) { + unsigned long zero = 0; + return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, 0, 0); + } else ASSERT(0); } #if defined(__x86_64__) || defined(__ia64__) __typeof__(mmap) mmap64 __attribute__((__alias__("mmap"))); @@ -1110,7 +1091,7 @@ int munmap(void *start, size_t length) call[i].args[0] = (unsigned long) &data[i]; call[i].args[1] = 0; call[i].args[2] = 0; - call[i].args[3] = UVMF_INVLPG | UVMF_ALL; + call[i].args[3] = UVMF_INVLPG; } ret = HYPERVISOR_multicall(call, n); diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/lib/xmalloc.c --- a/extras/mini-os/lib/xmalloc.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/lib/xmalloc.c Mon Jun 02 11:35:39 2008 +0900 @@ -127,7 +127,7 @@ static void *xmalloc_whole_pages(size_t if ( hdr == NULL ) return NULL; - hdr->size = (1 << (pageorder + PAGE_SHIFT)); + hdr->size = (1UL << (pageorder + PAGE_SHIFT)); /* Debugging aid. */ hdr->freelist.next = hdr->freelist.prev = NULL; diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/main-caml.c --- a/extras/mini-os/main-caml.c Mon Jun 02 11:35:02 2008 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -/* - * Caml bootstrap - * - * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, January 2008 - */ - -#include <stdio.h> -#include <errno.h> - -#include <caml/mlvalues.h> -#include <caml/callback.h> -#include <unistd.h> - -/* Ugly binary compatibility with Linux */ -FILE *_stderr asm("stderr"); -int *__errno_location; -/* Will probably break everything, probably need to fetch from glibc */ -void *__ctype_b_loc; - -int main(int argc, char *argv[], char *envp[]) -{ - value *val; - - /* Get current thread's value */ - _stderr = stderr; - __errno_location = &errno; - - printf("starting caml\n"); - - /* Wait before things might hang up */ - sleep(1); - - caml_startup(argv); - val = caml_named_value("main"); - if (!val) { - printf("Couldn't find Caml main"); - return 1; - } - caml_callback(*val, Val_int(0)); - printf("callback returned\n"); - return 0; -} diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/main.c --- a/extras/mini-os/main.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/main.c Mon Jun 02 11:35:39 2008 +0900 @@ -4,7 +4,6 @@ * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, October 2007 */ -#ifdef HAVE_LIBC #include <os.h> #include <sched.h> #include <console.h> @@ -19,8 +18,8 @@ extern int main(int argc, char *argv[], extern int main(int argc, char *argv[], char *envp[]); extern void __libc_init_array(void); extern void __libc_fini_array(void); - -struct thread *main_thread; +extern unsigned long __CTOR_LIST__[]; +extern unsigned long __DTOR_LIST__[]; #if 0 #include <stdio.h> @@ -147,6 +146,8 @@ static void call_main(void *p) __libc_init_array(); environ = envp; + for (i = 1; i <= __CTOR_LIST__[0]; i++) + ((void((*)(void)))__CTOR_LIST__[i]) (); tzset(); exit(main(argc, argv, envp)); @@ -154,6 +155,10 @@ static void call_main(void *p) void _exit(int ret) { + int i; + + for (i = 1; i <= __DTOR_LIST__[0]; i++) + ((void((*)(void)))__DTOR_LIST__[i]) (); close_all_files(); __libc_fini_array(); printk("main returned %d\n", ret); @@ -172,4 +177,3 @@ int app_main(start_info_t *si) main_thread = create_thread("main", call_main, si); return 0; } -#endif diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/mm.c --- a/extras/mini-os/mm.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/mm.c Mon Jun 02 11:35:39 2008 +0900 @@ -58,7 +58,7 @@ static unsigned long *alloc_bitmap; #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) #define allocated_in_map(_pn) \ -(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1)))) +(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) /* * Hint regarding bitwise arithmetic in map_{alloc,free}: @@ -80,13 +80,13 @@ static void map_alloc(unsigned long firs if ( curr_idx == end_idx ) { - alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off); + alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off); } else { - alloc_bitmap[curr_idx] |= -(1<<start_off); - while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L; - alloc_bitmap[curr_idx] |= (1<<end_off)-1; + alloc_bitmap[curr_idx] |= -(1UL<<start_off); + while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL; + alloc_bitmap[curr_idx] |= (1UL<<end_off)-1; } } @@ -102,13 +102,13 @@ static void map_free(unsigned long first if ( curr_idx == end_idx ) { - alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1); + alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1); } else { - alloc_bitmap[curr_idx] &= (1<<start_off)-1; + alloc_bitmap[curr_idx] &= (1UL<<start_off)-1; while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0; - alloc_bitmap[curr_idx] &= -(1<<end_off); + alloc_bitmap[curr_idx] &= -(1UL<<end_off); } } @@ -178,7 +178,7 @@ USED static void print_chunks(void *star head = free_head[order]; while(!FREELIST_EMPTY(head)) { - for(count = 0; count < 1<< head->level; count++) + for(count = 0; count < 1UL<< head->level; count++) { if(count + virt_to_pfn(head) - pfn_start < 1000) chunks[count + virt_to_pfn(head) - pfn_start] = current; @@ -235,13 +235,13 @@ static void init_page_allocator(unsigned * Next chunk is limited by alignment of min, but also * must not be bigger than remaining range. */ - for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ ) - if ( min & (1<<i) ) break; + for ( i = PAGE_SHIFT; (1UL<<(i+1)) <= range; i++ ) + if ( min & (1UL<<i) ) break; ch = (chunk_head_t *)min; - min += (1<<i); - range -= (1<<i); + min += (1UL<<i); + range -= (1UL<<i); ct = (chunk_tail_t *)min-1; i -= PAGE_SHIFT; ch->level = i; @@ -280,8 +280,8 @@ unsigned long alloc_pages(int order) { /* Split into two equal parts. */ i--; - spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT))); - spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1; + spare_ch = (chunk_head_t *)((char *)alloc_ch + (1UL<<(i+PAGE_SHIFT))); + spare_ct = (chunk_tail_t *)((char *)spare_ch + (1UL<<(i+PAGE_SHIFT)))-1; /* Create new header for spare chunk. */ spare_ch->level = i; @@ -294,7 +294,7 @@ unsigned long alloc_pages(int order) free_head[i] = spare_ch; } - map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1<<order); + map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1UL<<order); return((unsigned long)alloc_ch); @@ -312,16 +312,16 @@ void free_pages(void *pointer, int order unsigned long mask; /* First free the chunk */ - map_free(virt_to_pfn(pointer), 1 << order); + map_free(virt_to_pfn(pointer), 1UL << order); /* Create free chunk */ freed_ch = (chunk_head_t *)pointer; - freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1; + freed_ct = (chunk_tail_t *)((char *)pointer + (1UL<<(order + PAGE_SHIFT)))-1; /* Now, possibly we can conseal chunks together */ while(order < FREELIST_SIZE) { - mask = 1 << (order + PAGE_SHIFT); + mask = 1UL << (order + PAGE_SHIFT); if((unsigned long)freed_ch & mask) { to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask); diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/sched.c --- a/extras/mini-os/sched.c Mon Jun 02 11:35:02 2008 +0900 +++ b/extras/mini-os/sched.c Mon Jun 02 11:35:39 2008 +0900 @@ -57,6 +57,8 @@ struct thread *idle_thread = NULL; struct thread *idle_thread = NULL; LIST_HEAD(exited_threads); static int threads_started; + +struct thread *main_thread; void inline print_runqueue(void) { diff -r d2a239224cb2 -r f1508348ffab stubdom/Makefile --- a/stubdom/Makefile Mon Jun 02 11:35:02 2008 +0900 +++ b/stubdom/Makefile Mon Jun 02 11:35:39 2008 +0900 @@ -37,7 +37,7 @@ export PATH:=$(CROSS_PREFIX)/bin:$(PATH) export PATH:=$(CROSS_PREFIX)/bin:$(PATH) .PHONY: all -all: qemu-stubdom +all: ioemu-stubdom c-stubdom ################ # Cross-binutils @@ -174,6 +174,7 @@ mk-symlinks: ([ ! -h config-host.h ] || rm -f config-host.h) && \ ([ ! -h config-host.mak ] || rm -f config-host.mak) ) [ -h mini-os ] || ln -sf ../extras/mini-os . + [ -h mini-os/include/xen ] || ln -sf ../../../xen/include/public mini-os/include/xen ####### # libxc @@ -198,40 +199,41 @@ ioemu: cross-zlib cross-libpci mk-symlin ###### .PHONY: caml -caml: - $(MAKE) -C $@ +caml: mk-symlinks + $(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs ### # C ### .PHONY: c -c: - $(MAKE) -C $@ +c: mk-symlinks + $(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs ######## # minios ######## -.PHONY: qemu-stubdom -qemu-stubdom: mk-symlinks lwip-cvs libxc ioemu - $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs QEMUDIR=$(CURDIR)/ioemu - +.PHONY: ioemu-stubdom +ioemu-stubdom: lwip-cvs libxc ioemu + $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs APP_OBJS="$(CURDIR)/ioemu/i386-dm-stubdom/qemu.a $(CURDIR)/ioemu/i386-dm-stubdom/libqemu.a" + +CAMLLIB = $(shell ocamlc -where) .PHONY: caml-stubdom -caml-stubdom: mk-symlinks lwip-cvs libxc cross-libpci caml - $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CAMLDIR=$(CURDIR)/caml +caml-stubdom: lwip-cvs libxc caml + $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs APP_OBJS="$(CURDIR)/caml/main-c.o $(CURDIR)/caml/main-caml.o $(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a" .PHONY: c-stubdom -c-stubdom: mk-symlinks lwip-cvs libxc cross-libpci c - $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CDIR=$(CURDIR)/c +c-stubdom: lwip-cvs libxc c + $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs APP_OBJS=$(CURDIR)/c/main.a ######### # install ######### -install: mini-os/mini-os.gz +install: mini-os/ioemu-stubdom.gz $(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin" - $(INSTALL_PROG) mini-os/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/stubdom.gz" + $(INSTALL_PROG) $< "$(DESTDIR)/usr/lib/xen/boot/stubdom.gz" ####### # clean @@ -242,6 +244,7 @@ clean: clean: -$(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs clean $(MAKE) -C caml clean + $(MAKE) -C c clean rm -fr libxc ioemu mini-os include # clean the cross-compilation result diff -r d2a239224cb2 -r f1508348ffab stubdom/c/Makefile --- a/stubdom/c/Makefile Mon Jun 02 11:35:02 2008 +0900 +++ b/stubdom/c/Makefile Mon Jun 02 11:35:39 2008 +0900 @@ -2,7 +2,12 @@ XEN_ROOT = ../.. include $(XEN_ROOT)/Config.mk -main.a: main.o +all: main.a + +main-c.c: + ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@ + +main.a: main-c.o main.o $(AR) cr $@ $^ clean: diff -r d2a239224cb2 -r f1508348ffab stubdom/c/main.c --- a/stubdom/c/main.c Mon Jun 02 11:35:02 2008 +0900 +++ b/stubdom/c/main.c Mon Jun 02 11:35:39 2008 +0900 @@ -1,4 +1,6 @@ #include <stdio.h> +#include <unistd.h> + int main(void) { sleep(2); printf("Hello, world!\n"); diff -r d2a239224cb2 -r f1508348ffab stubdom/caml/Makefile --- a/stubdom/caml/Makefile Mon Jun 02 11:35:02 2008 +0900 +++ b/stubdom/caml/Makefile Mon Jun 02 11:35:39 2008 +0900 @@ -1,12 +1,20 @@ XEN_ROOT = ../.. XEN_ROOT = ../.. include $(XEN_ROOT)/Config.mk + +CAMLLIB = $(shell ocamlc -where) +DEF_CPPFLAGS += -I$(CAMLLIB) OCAMLFIND=ocamlfind OCAMLOPT=ocamlopt OBJS := hello.cmx LIBS := + +all: main-c.o main-caml.o caml.o + +main-c.c: + ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@ %.cmx: %.ml $(OCAMLFIND) $(OCAMLOPT) -c $< -o $@ @@ -15,4 +23,4 @@ caml.o: $(OBJS) $(OCAMLFIND) $(OCAMLOPT) $(LIBS) $^ -output-obj -o $@ clean: - rm -f *.o *.cmx *.cmi + rm -f *.a *.o *.cmx *.cmi diff -r d2a239224cb2 -r f1508348ffab stubdom/caml/main-caml.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stubdom/caml/main-caml.c Mon Jun 02 11:35:39 2008 +0900 @@ -0,0 +1,42 @@ +/* + * Caml bootstrap + * + * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, January 2008 + */ + +#include <stdio.h> +#include <errno.h> + +#include <caml/mlvalues.h> +#include <caml/callback.h> +#include <unistd.h> + +/* Ugly binary compatibility with Linux */ +FILE *_stderr asm("stderr"); +int *__errno_location; +/* Will probably break everything, probably need to fetch from glibc */ +void *__ctype_b_loc; + +int main(int argc, char *argv[], char *envp[]) +{ + value *val; + + /* Get current thread's value */ + _stderr = stderr; + __errno_location = &errno; + + printf("starting caml\n"); + + /* Wait before things might hang up */ + sleep(1); + + caml_startup(argv); + val = caml_named_value("main"); + if (!val) { + printf("Couldn't find Caml main"); + return 1; + } + caml_callback(*val, Val_int(0)); + printf("callback returned\n"); + return 0; +} diff -r d2a239224cb2 -r f1508348ffab tools/examples/Makefile --- a/tools/examples/Makefile Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/examples/Makefile Mon Jun 02 11:35:39 2008 +0900 @@ -9,9 +9,7 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig. # Xen configuration dir and configs to go there. XEN_CONFIG_DIR = /etc/xen XEN_CONFIGS = xend-config.sxp -XEN_CONFIGS += xend-config-xenapi.sxp XEN_CONFIGS += xm-config.xml -XEN_CONFIGS += xm-config-xenapi.xml XEN_CONFIGS += xmexample1 XEN_CONFIGS += xmexample2 XEN_CONFIGS += xmexample.hvm diff -r d2a239224cb2 -r f1508348ffab tools/examples/xend-config-xenapi.sxp --- a/tools/examples/xend-config-xenapi.sxp Mon Jun 02 11:35:02 2008 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,196 +0,0 @@ -# -*- sh -*- - -# -# Xend configuration file. -# - -# This example configuration is appropriate for an installation that -# utilizes a bridged network configuration. Access to xend via http -# is disabled. - -# Commented out entries show the default for that entry, unless otherwise -# specified. - -#(logfile /var/log/xen/xend.log) -#(loglevel DEBUG) - - -# The Xen-API server configuration. (Please note that this server is -# available as an UNSUPPORTED PREVIEW in Xen 3.0.4, and should not be relied -# upon). -# -# This value configures the ports, interfaces, and access controls for the -# Xen-API server. Each entry in the list starts with either unix, a port -# number, or an address:port pair. If this is "unix", then a UDP socket is -# opened, and this entry applies to that. If it is a port, then Xend will -# listen on all interfaces on that TCP port, and if it is an address:port -# pair, then Xend will listen on the specified port, using the interface with -# the specified address. -# -# The subsequent string configures the user-based access control for the -# listener in question. This can be one of "none" or "pam", indicating either -# that users should be allowed access unconditionally, or that the local -# Pluggable Authentication Modules configuration should be used. If this -# string is missing or empty, then "pam" is used. -# -# The final string gives the host-based access control for that listener. If -# this is missing or empty, then all connections are accepted. Otherwise, -# this should be a space-separated sequence of regular expressions; any host -# with a fully-qualified domain name or an IP address that matches one of -# these regular expressions will be accepted. -# -# Example: listen on TCP port 9363 on all interfaces, accepting connections -# only from machines in example.com or localhost, and allow access through -# the unix domain socket unconditionally: -# - (xen-api-server ((9363 none))) -# (unix none))) -# -# Optionally, the TCP Xen-API server can use SSL by specifying the private -# key and certificate location: -# -# (9367 pam '' /etc/xen/xen-api.key /etc/xen/xen-api.crt) -# -# Default: -# (xen-api-server ((unix))) - - -#(xend-http-server no) -#(xend-unix-server no) -#(xend-tcp-xmlrpc-server no) -#(xend-unix-xmlrpc-server yes) -#(xend-relocation-server no) -(xend-relocation-server yes) - -#(xend-unix-path /var/lib/xend/xend-socket) - - -# Address and port xend should use for the legacy TCP XMLRPC interface, -# if xend-tcp-xmlrpc-server is set. -#(xend-tcp-xmlrpc-server-address 'localhost') -#(xend-tcp-xmlrpc-server-port 8006) - -# SSL key and certificate to use for the legacy TCP XMLRPC interface. -# Setting these will mean that this port serves only SSL connections as -# opposed to plaintext ones. -#(xend-tcp-xmlrpc-server-ssl-key-file /etc/xen/xmlrpc.key) -#(xend-tcp-xmlrpc-server-ssl-cert-file /etc/xen/xmlrpc.crt) - - -# Port xend should use for the HTTP interface, if xend-http-server is set. -#(xend-port 8000) - -# Port xend should use for the relocation interface, if xend-relocation-server -# is set. -#(xend-relocation-port 8002) - -# Address xend should listen on for HTTP connections, if xend-http-server is -# set. -# Specifying 'localhost' prevents remote connections. -# Specifying the empty string '' (the default) allows all connections. -#(xend-address '') -#(xend-address localhost) - -# Address xend should listen on for relocation-socket connections, if -# xend-relocation-server is set. -# Meaning and default as for xend-address above. -#(xend-relocation-address '') - -# The hosts allowed to talk to the relocation port. If this is empty (the -# default), then all connections are allowed (assuming that the connection -# arrives on a port and interface on which we are listening; see -# xend-relocation-port and xend-relocation-address above). Otherwise, this -# should be a space-separated sequence of regular expressions. Any host with -# a fully-qualified domain name or an IP address that matches one of these -# regular expressions will be accepted. -# -# For example: -# (xend-relocation-hosts-allow '^localhost$ ^.*\\.example\\.org$') -# -#(xend-relocation-hosts-allow '') -(xend-relocation-hosts-allow '^localhost$ ^localhost\\.localdomain$') - -# The limit (in kilobytes) on the size of the console buffer -#(console-limit 1024) - -## -# To bridge network traffic, like this: -# -# dom0: ----------------- bridge -> real eth0 -> the network -# | -# domU: fake eth0 -> vifN.0 -+ -# -# use -# -# (network-script network-bridge) -# -# Your default ethernet device is used as the outgoing interface, by default. -# To use a different one (e.g. eth1) use -# -# (network-script 'network-bridge netdev=eth1') -# -# The bridge is named xenbr0, by default. To rename the bridge, use -# -# (network-script 'network-bridge bridge=<name>') -# -# It is possible to use the network-bridge script in more complicated -# scenarios, such as having two outgoing interfaces, with two bridges, and -# two fake interfaces per guest domain. To do things like this, write -# yourself a wrapper script, and call network-bridge from it, as appropriate. -# -(network-script network-bridge) - -# The script used to control virtual interfaces. This can be overridden on a -# per-vif basis when creating a domain or a configuring a new vif. The -# vif-bridge script is designed for use with the network-bridge script, or -# similar configurations. -# -# If you have overridden the bridge name using -# (network-script 'network-bridge bridge=<name>') then you may wish to do the -# same here. The bridge name can also be set when creating a domain or -# configuring a new vif, but a value specified here would act as a default. -# -# If you are using only one bridge, the vif-bridge script will discover that, -# so there is no need to specify it explicitly. -# -(vif-script vif-bridge) - - -## Use the following if network traffic is routed, as an alternative to the -# settings for bridged networking given above. -#(network-script network-route) -#(vif-script vif-route) - - -## Use the following if network traffic is routed with NAT, as an alternative -# to the settings for bridged networking given above. -#(network-script network-nat) -#(vif-script vif-nat) - -# dom0-min-mem is the lowest permissible memory level (in MB) for dom0. -# This is a minimum both for auto-ballooning (as enabled by -# enable-dom0-ballooning below) and for xm mem-set when applied to dom0. -(dom0-min-mem 196) - -# Whether to enable auto-ballooning of dom0 to allow domUs to be created. -# If enable-dom0-ballooning = no, dom0 will never balloon out. -(enable-dom0-ballooning yes) - -# In SMP system, dom0 will use dom0-cpus # of CPUS -# If dom0-cpus = 0, dom0 will take all cpus available -(dom0-cpus 0) - -# Whether to enable core-dumps when domains crash. -#(enable-dump no) - -# The tool used for initiating virtual TPM migration -#(external-migration-tool '') - -# The interface for VNC servers to listen on. Defaults -# to 127.0.0.1 To restore old 'listen everywhere' behaviour -# set this to 0.0.0.0 -#(vnc-listen '127.0.0.1') - -# The default password for VNC console on HVM domain. -# Empty string is no authentication. -(vncpasswd '') diff -r d2a239224cb2 -r f1508348ffab tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/examples/xend-config.sxp Mon Jun 02 11:35:39 2008 +0900 @@ -59,6 +59,7 @@ #(xend-unix-xmlrpc-server yes) #(xend-relocation-server no) (xend-relocation-server yes) +#(xend-relocation-ssl-server no) #(xend-unix-path /var/lib/xend/xend-socket) @@ -82,14 +83,17 @@ # is set. #(xend-relocation-port 8002) -# Whether to use tls when relocating. -#(xend-relocation-tls no) - -# SSL key and certificate to use for the relocation interface. -# Setting these will mean that this port serves only SSL connections as -# opposed to plaintext ones. +# Port xend should use for the ssl relocation interface, if +# xend-relocation-ssl-server is set. +#(xend-relocation-ssl-port 8003) + +# SSL key and certificate to use for the ssl relocation interface, if +# xend-relocation-ssl-server is set. #(xend-relocation-server-ssl-key-file /etc/xen/xmlrpc.key) #(xend-relocation-server-ssl-cert-file /etc/xen/xmlrpc.crt) + +# Whether to use ssl as default when relocating. +#(xend-relocation-ssl no) # Address xend should listen on for HTTP connections, if xend-http-server is # set. diff -r d2a239224cb2 -r f1508348ffab tools/examples/xm-config-xenapi.xml --- a/tools/examples/xm-config-xenapi.xml Mon Jun 02 11:35:02 2008 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -<!-- - -Copyright (C) 2006 XenSource Inc. - -This library is free software; you can redistribute it and/or -modify it under the terms of version 2.1 of the GNU Lesser General Public -License as published by the Free Software Foundation. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - ---> - -<!-- - -This is a configuration file for xm; it should be placed in -/etc/xen/xm-config.xml. If this file is missing, then xm will fall back to -the normal behaviour that's in Xen 3.0.4 and below. The settings here are -most useful for experimenting with the Xen-API preview in Xen 3.0.4. - ---> - -<xm> - <!-- The server element describes how to talk to Xend. The type may be - Xen-API or LegacyXMLRPC (the default). The URI is that of the - server; you might try http://server:9363/ or - httpu:///var/run/xend/xen-api.sock for the Xen-API, or - httpu:///var/run/xend/xmlrpc.sock for the legacy server. - - The username and password attributes will be used to log in if Xen-API - is being used. - --> - <server type='Xen-API' - uri='http://localhost:9363/' - username='me' - password='mypassword' /> -</xm> diff -r d2a239224cb2 -r f1508348ffab tools/firmware/hvmloader/util.c --- a/tools/firmware/hvmloader/util.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/firmware/hvmloader/util.c Mon Jun 02 11:35:39 2008 +0900 @@ -609,7 +609,7 @@ uint16_t get_cpu_mhz(void) uint16_t get_cpu_mhz(void) { struct xen_add_to_physmap xatp; - struct shared_info *shared_info = (struct shared_info *)0xa0000; + struct shared_info *shared_info = (struct shared_info *)0xfffff000; struct vcpu_time_info *info = &shared_info->vcpu_info[0].time; uint64_t cpu_khz; uint32_t tsc_to_nsec_mul, version; @@ -619,7 +619,7 @@ uint16_t get_cpu_mhz(void) if ( cpu_mhz != 0 ) return cpu_mhz; - /* Map shared-info page to 0xa0000 (i.e., overlap VGA hole). */ + /* Map shared-info page. */ xatp.domid = DOMID_SELF; xatp.space = XENMAPSPACE_shared_info; xatp.idx = 0; @@ -643,14 +643,6 @@ uint16_t get_cpu_mhz(void) cpu_khz = cpu_khz << -tsc_shift; else cpu_khz = cpu_khz >> tsc_shift; - - /* Get the VGA MMIO hole back by remapping shared info to scratch. */ - xatp.domid = DOMID_SELF; - xatp.space = XENMAPSPACE_shared_info; - xatp.idx = 0; - xatp.gpfn = 0xfffff; /* scratch pfn */ - if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) - BUG(); cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000); return cpu_mhz; diff -r d2a239224cb2 -r f1508348ffab tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/firmware/rombios/rombios.c Mon Jun 02 11:35:39 2008 +0900 @@ -2225,26 +2225,12 @@ void interactive_bootkey() Bit16u i; Bit8u scan = 0; - bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\nPress F10 to select boot device.\n"); - for (i = 3; i > 0; i--) - { - scan = wait(WAIT_HZ, 0); - switch (scan) { - case 0x3D: - case 0x3E: - case 0x3F: - case 0x58: - break; - case 0x44: - scan = bootmenu(inb_cmos(0x3d) & 0x0f); - break; - default: - scan = 0; - break; - } - if (scan != 0) - break; - } + bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, + "\n\nPress F10 to select boot device.\n"); + + scan = wait(1, 0); + if (scan == 0x44) + scan = bootmenu(inb_cmos(0x3d) & 0x0f); /* set the default based on the keypress or menu */ switch(scan) { diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/Makefile.target --- a/tools/ioemu/Makefile.target Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/ioemu/Makefile.target Mon Jun 02 11:35:39 2008 +0900 @@ -358,6 +358,13 @@ endif endif ifdef CONFIG_STUBDOM +VL_OBJS+=main-qemu.o +CFLAGS += -DCONFIG_QEMU +main-qemu.c: + ln -s $(XEN_ROOT)/extras/mini-os/main.c $@ +endif + +ifdef CONFIG_STUBDOM #CONFIG_PASSTHROUGH=1 else ifeq (,$(wildcard /usr/include/pci)) diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/hw/cirrus_vga.c --- a/tools/ioemu/hw/cirrus_vga.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/ioemu/hw/cirrus_vga.c Mon Jun 02 11:35:39 2008 +0900 @@ -281,8 +281,6 @@ typedef struct PCICirrusVGAState { static uint8_t rop_to_index[256]; -void *shared_vram; - /*************************************** * * prototypes. diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/vl.c --- a/tools/ioemu/vl.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/ioemu/vl.c Mon Jun 02 11:35:39 2008 +0900 @@ -7807,8 +7807,9 @@ int main(int argc, char **argv) bdrv_set_type_hint(fd_table[i], BDRV_TYPE_FLOPPY); } if (fd_filename[i] != '\0') { - if (bdrv_open(fd_table[i], fd_filename[i], - snapshot ? BDRV_O_SNAPSHOT : 0) < 0) { + if (bdrv_open2(fd_table[i], fd_filename[i], + snapshot ? BDRV_O_SNAPSHOT : 0, + &bdrv_raw) < 0) { fprintf(stderr, "qemu: could not open floppy disk image '%s'\n", fd_filename[i]); exit(1); diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/vl.h --- a/tools/ioemu/vl.h Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/ioemu/vl.h Mon Jun 02 11:35:39 2008 +0900 @@ -153,8 +153,6 @@ int unset_mm_mapping(int xc_handle, uint unsigned int address_bits, unsigned long *extent_start); int set_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages, unsigned int address_bits, unsigned long *extent_start); - -extern void *shared_vram; extern FILE *logfile; diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/ioemu/xenstore.c Mon Jun 02 11:35:39 2008 +0900 @@ -112,7 +112,7 @@ void xenstore_parse_domain_config(int hv e = xs_directory(xsh, XBT_NULL, buf, &num); if (e == NULL) - goto out; + num = 0; for (i = 0; i < num; i++) { /* read the backend path */ diff -r d2a239224cb2 -r f1508348ffab tools/libxc/Makefile --- a/tools/libxc/Makefile Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/libxc/Makefile Mon Jun 02 11:35:39 2008 +0900 @@ -5,11 +5,9 @@ MINOR = 0 MINOR = 0 CTRL_SRCS-y := -ifneq ($(stubdom),y) CTRL_SRCS-y += xc_core.c CTRL_SRCS-$(CONFIG_X86) += xc_core_x86.c CTRL_SRCS-$(CONFIG_IA64) += xc_core_ia64.c -endif CTRL_SRCS-y += xc_domain.c CTRL_SRCS-y += xc_evtchn.c CTRL_SRCS-y += xc_misc.c @@ -21,9 +19,7 @@ CTRL_SRCS-y += xc_csched.c CTRL_SRCS-y += xc_csched.c CTRL_SRCS-y += xc_tbuf.c CTRL_SRCS-y += xc_pm.c -ifneq ($(stubdom),y) CTRL_SRCS-y += xc_resume.c -endif CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c CTRL_SRCS-$(CONFIG_SunOS) += xc_solaris.c @@ -33,15 +29,12 @@ CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios. GUEST_SRCS-y := GUEST_SRCS-y += xg_private.c -ifneq ($(stubdom),y) GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c -endif VPATH = ../../xen/common/libelf CFLAGS += -I../../xen/common/libelf -ifneq ($(stubdom),y) GUEST_SRCS-y += libelf-tools.c libelf-loader.c GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c @@ -55,7 +48,6 @@ GUEST_SRCS-$(CONFIG_X86) += xc_dom_x GUEST_SRCS-$(CONFIG_X86) += xc_dom_x86.c GUEST_SRCS-$(CONFIG_X86) += xc_cpuid_x86.c GUEST_SRCS-$(CONFIG_IA64) += xc_dom_ia64.c -endif -include $(XEN_TARGET_ARCH)/Makefile diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/libxc/xc_core.c Mon Jun 02 11:35:39 2008 +0900 @@ -64,7 +64,7 @@ /* string table */ struct xc_core_strtab { char *strings; - uint16_t current; + uint16_t length; uint16_t max; }; @@ -89,7 +89,7 @@ xc_core_strtab_init(void) /* index 0 represents none */ strtab->strings[0] = '\0'; - strtab->current = 1; + strtab->length = 1; return strtab; } @@ -107,14 +107,14 @@ xc_core_strtab_get(struct xc_core_strtab uint16_t ret = 0; uint16_t len = strlen(name) + 1; - if ( strtab->current > UINT16_MAX - len ) + if ( strtab->length > UINT16_MAX - len ) { PERROR("too long string table"); errno = E2BIG; return ret; } - if ( strtab->current + len > strtab->max ) + if ( strtab->length + len > strtab->max ) { char *tmp; if ( strtab->max > UINT16_MAX / 2 ) @@ -135,9 +135,9 @@ xc_core_strtab_get(struct xc_core_strtab strtab->max *= 2; } - ret = strtab->current; - strcpy(strtab->strings + strtab->current, name); - strtab->current += len; + ret = strtab->length; + strcpy(strtab->strings + strtab->length, name); + strtab->length += len; return ret; } @@ -669,7 +669,7 @@ xc_domain_dumpcore_via_callback(int xc_h offset += filesz; /* fixing up section header string table section header */ - filesz = strtab->current; + filesz = strtab->length; sheaders->shdrs[strtab_idx].sh_offset = offset; sheaders->shdrs[strtab_idx].sh_size = filesz; @@ -829,7 +829,7 @@ copy_done: goto out; /* elf section header string table: .shstrtab */ - sts = dump_rtn(args, strtab->strings, strtab->current); + sts = dump_rtn(args, strtab->strings, strtab->length); if ( sts != 0 ) goto out; diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/libxc/xc_domain.c Mon Jun 02 11:35:39 2008 +0900 @@ -767,6 +767,37 @@ int xc_assign_device( return do_domctl(xc_handle, &domctl); } +int xc_get_device_group( + int xc_handle, + uint32_t domid, + uint32_t machine_bdf, + uint32_t max_sdevs, + uint32_t *num_sdevs, + uint32_t *sdev_array) +{ + int rc; + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_get_device_group; + domctl.domain = (domid_t)domid; + + domctl.u.get_device_group.machine_bdf = machine_bdf; + domctl.u.get_device_group.max_sdevs = max_sdevs; + + set_xen_guest_handle(domctl.u.get_device_group.sdev_array, sdev_array); + + if ( lock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)) != 0 ) + { + PERROR("Could not lock memory for xc_get_device_group\n"); + return -ENOMEM; + } + rc = do_domctl(xc_handle, &domctl); + unlock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)); + + *num_sdevs = domctl.u.get_device_group.num_sdevs; + return rc; +} + int xc_test_assign_device( int xc_handle, uint32_t domid, diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_minios.c --- a/tools/libxc/xc_minios.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/libxc/xc_minios.c Mon Jun 02 11:35:39 2008 +0900 @@ -35,11 +35,12 @@ extern struct wait_queue_head event_queu int xc_interface_open(void) { - return 0; + return alloc_fd(FTYPE_XC); } int xc_interface_close(int xc_handle) { + files[xc_handle].type = FTYPE_NONE; return 0; } @@ -79,8 +80,12 @@ int xc_map_foreign_ranges(int xc_handle, int xc_map_foreign_ranges(int xc_handle, uint32_t dom, privcmd_mmap_entry_t *entries, int nr) { - printf("xc_map_foreign_ranges, TODO\n"); - do_exit(); + int i; + for (i = 0; i < nr; i++) { + unsigned long mfn = entries[i].mfn; + do_map_frames(entries[i].va, &mfn, entries[i].npages, 0, 1, dom, 0, L1_PROT); + } + return 0; } int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall) @@ -294,6 +299,12 @@ int xc_evtchn_unmask(int xce_handle, evt return 0; } +/* Optionally flush file to disk and discard page cache */ +void discard_file_cache(int fd, int flush) +{ + if (flush) + fsync(fd); +} /* * Local variables: * mode: C diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/libxc/xenctrl.h Mon Jun 02 11:35:39 2008 +0900 @@ -955,6 +955,13 @@ int xc_assign_device(int xc_handle, uint32_t domid, uint32_t machine_bdf); +int xc_get_device_group(int xc_handle, + uint32_t domid, + uint32_t machine_bdf, + uint32_t max_sdevs, + uint32_t *num_sdevs, + uint32_t *sdev_array); + int xc_test_assign_device(int xc_handle, uint32_t domid, uint32_t machine_bdf); diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/libxc/xg_private.c Mon Jun 02 11:35:39 2008 +0900 @@ -11,22 +11,6 @@ #include <malloc.h> #include "xg_private.h" - -int lock_pages(void *addr, size_t len) -{ - int e = 0; -#ifndef __sun__ - e = mlock(addr, len); -#endif - return (e); -} - -void unlock_pages(void *addr, size_t len) -{ -#ifndef __sun__ - safe_munlock(addr, len); -#endif -} char *xc_read_image(const char *filename, unsigned long *size) { diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jun 02 11:35:39 2008 +0900 @@ -106,7 +106,7 @@ static PyObject *pyxc_domain_create(XcOb static char *kwd_list[] = { "domid", "ssidref", "handle", "flags", "target", NULL }; if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|iiOii", kwd_list, - &dom, &ssidref, &pyhandle, &flags, &target)) + &dom, &ssidref, &pyhandle, &flags, &target)) return NULL; if ( pyhandle != NULL ) { @@ -434,44 +434,44 @@ static PyObject *pyxc_linux_build(XcObje dom->vhpt_size_log2 = vhpt; if ( xc_dom_linux_build(self->xc_handle, dom, domid, mem_mb, image, - ramdisk, flags, store_evtchn, &store_mfn, - console_evtchn, &console_mfn) != 0 ) { - goto out; + ramdisk, flags, store_evtchn, &store_mfn, + console_evtchn, &console_mfn) != 0 ) { + goto out; } if ( !(elfnote_dict = PyDict_New()) ) - goto out; + goto out; for ( i = 0; i < ARRAY_SIZE(dom->parms.elf_notes); i++ ) { - switch ( dom->parms.elf_notes[i].type ) + switch ( dom->parms.elf_notes[i].type ) { - case XEN_ENT_NONE: - continue; - case XEN_ENT_LONG: - elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num); - break; - case XEN_ENT_STR: - elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str); - break; - } - PyDict_SetItemString(elfnote_dict, - dom->parms.elf_notes[i].name, - elfnote); - Py_DECREF(elfnote); + case XEN_ENT_NONE: + continue; + case XEN_ENT_LONG: + elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num); + break; + case XEN_ENT_STR: + elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str); + break; + } + PyDict_SetItemString(elfnote_dict, + dom->parms.elf_notes[i].name, + elfnote); + Py_DECREF(elfnote); } ret = Py_BuildValue("{s:i,s:i,s:N}", - "store_mfn", store_mfn, - "console_mfn", console_mfn, - "notes", elfnote_dict); + "store_mfn", store_mfn, + "console_mfn", console_mfn, + "notes", elfnote_dict); if ( dom->arch_hooks->native_protocol ) { - PyObject *native_protocol = - Py_BuildValue("s", dom->arch_hooks->native_protocol); - PyDict_SetItemString(ret, "native_protocol", native_protocol); - Py_DECREF(native_protocol); + PyObject *native_protocol = + Py_BuildValue("s", dom->arch_hooks->native_protocol); + PyDict_SetItemString(ret, "native_protocol", native_protocol); + Py_DECREF(native_protocol); } xc_dom_release(dom); @@ -556,7 +556,7 @@ static PyObject *pyxc_test_assign_device { uint32_t dom; char *pci_str; - uint32_t bdf = 0; + int32_t bdf = 0; int seg, bus, dev, func; static char *kwd_list[] = { "domid", "pci", NULL }; @@ -571,12 +571,141 @@ static PyObject *pyxc_test_assign_device bdf |= (func & 0x7) << 8; if ( xc_test_assign_device(self->xc_handle, dom, bdf) != 0 ) + { + if (errno == ENOSYS) + bdf = -1; break; - + } bdf = 0; } return Py_BuildValue("i", bdf); +} + +static PyObject *pyxc_assign_device(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t dom; + char *pci_str; + int32_t bdf = 0; + int seg, bus, dev, func; + + static char *kwd_list[] = { "domid", "pci", NULL }; + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list, + &dom, &pci_str) ) + return NULL; + + while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) ) + { + bdf |= (bus & 0xff) << 16; + bdf |= (dev & 0x1f) << 11; + bdf |= (func & 0x7) << 8; + + if ( xc_assign_device(self->xc_handle, dom, bdf) != 0 ) + { + if (errno == ENOSYS) + bdf = -1; + break; + } + bdf = 0; + } + + return Py_BuildValue("i", bdf); +} + +static PyObject *pyxc_deassign_device(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t dom; + char *pci_str; + int32_t bdf = 0; + int seg, bus, dev, func; + + static char *kwd_list[] = { "domid", "pci", NULL }; + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list, + &dom, &pci_str) ) + return NULL; + + while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) ) + { + bdf |= (bus & 0xff) << 16; + bdf |= (dev & 0x1f) << 11; + bdf |= (func & 0x7) << 8; + + if ( xc_deassign_device(self->xc_handle, dom, bdf) != 0 ) + { + if (errno == ENOSYS) + bdf = -1; + break; + } + bdf = 0; + } + + return Py_BuildValue("i", bdf); +} + +static PyObject *pyxc_get_device_group(XcObject *self, + PyObject *args) +{ + domid_t domid; + uint32_t bdf = 0; + uint32_t max_sdevs, num_sdevs; + int seg, bus, dev, func, rc, i; + PyObject *Pystr; + char *group_str; + char dev_str[9]; + uint32_t *sdev_array; + + if ( !PyArg_ParseTuple(args, "iiiii", &domid, &seg, &bus, &dev, &func) ) + return NULL; + + /* Maximum allowed siblings device number per group */ + max_sdevs = 1024; + + if ( (sdev_array = malloc(max_sdevs * sizeof(*sdev_array))) == NULL ) + return PyErr_NoMemory(); + memset(sdev_array, 0, max_sdevs * sizeof(*sdev_array)); + + bdf |= (bus & 0xff) << 16; + bdf |= (dev & 0x1f) << 11; + bdf |= (func & 0x7) << 8; + + rc = xc_get_device_group(self->xc_handle, + domid, bdf, max_sdevs, &num_sdevs, sdev_array); + + if ( rc < 0 ) + { + free(sdev_array); + return pyxc_error_to_exception(); + } + + if ( !num_sdevs ) + { + free(sdev_array); + return Py_BuildValue("s", ""); + } + + if ( (group_str = malloc(num_sdevs * sizeof(dev_str))) == NULL ) + return PyErr_NoMemory(); + memset(group_str, '\0', num_sdevs * sizeof(dev_str)); + + for ( i = 0; i < num_sdevs; i++ ) + { + bus = (sdev_array[i] >> 16) & 0xff; + dev = (sdev_array[i] >> 11) & 0x1f; + func = (sdev_array[i] >> 8) & 0x7; + sprintf(dev_str, "%02x:%02x.%x,", bus, dev, func); + strcat(group_str, dev_str); + } + + Pystr = Py_BuildValue("s", group_str); + + free(sdev_array); + free(group_str); + + return Pystr; } #ifdef __ia64__ @@ -729,8 +858,8 @@ static PyObject *pyxc_hvm_build(XcObject int memsize, vcpus = 1, acpi = 0, apic = 1; static char *kwd_list[] = { "domid", - "memsize", "image", "vcpus", "acpi", - "apic", NULL }; + "memsize", "image", "vcpus", "acpi", + "apic", NULL }; if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list, &dom, &memsize, &image, &vcpus, &acpi, &apic) ) @@ -782,8 +911,8 @@ static PyObject *pyxc_evtchn_alloc_unbou } static PyObject *pyxc_evtchn_reset(XcObject *self, - PyObject *args, - PyObject *kwds) + PyObject *args, + PyObject *kwds) { uint32_t dom; @@ -947,11 +1076,11 @@ static PyObject *pyxc_physinfo(XcObject for ( i = 0; i < info.nr_nodes; i++ ) { - xc_availheap(self->xc_handle, 0, 0, i, &free_heap); - PyList_Append(node_to_memory_obj, - PyInt_FromLong(free_heap / 1024)); - } - + xc_availheap(self->xc_handle, 0, 0, i, &free_heap); + PyList_Append(node_to_memory_obj, + PyInt_FromLong(free_heap / 1024)); + } + PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj); PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj); @@ -1517,6 +1646,17 @@ static PyMethodDef pyxc_methods[] = { " value [long]: Value of param.\n" "Returns: [int] 0 on success.\n" }, + { "get_device_group", + (PyCFunction)pyxc_get_device_group, + METH_VARARGS, "\n" + "get sibling devices infomation.\n" + " dom [int]: Domain to assign device to.\n" + " seg [int]: PCI segment.\n" + " bus [int]: PCI bus.\n" + " dev [int]: PCI dev.\n" + " func [int]: PCI func.\n" + "Returns: [string]: Sibling devices \n" }, + { "test_assign_device", (PyCFunction)pyxc_test_assign_device, METH_VARARGS | METH_KEYWORDS, "\n" @@ -1524,6 +1664,22 @@ static PyMethodDef pyxc_methods[] = { " dom [int]: Identifier of domain to build into.\n" " pci_str [str]: PCI devices.\n" "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" }, + + { "assign_device", + (PyCFunction)pyxc_assign_device, + METH_VARARGS | METH_KEYWORDS, "\n" + "Assign device to IOMMU domain.\n" + " dom [int]: Domain to assign device to.\n" + " pci_str [str]: PCI devices.\n" + "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" }, + + { "deassign_device", + (PyCFunction)pyxc_deassign_device, + METH_VARARGS | METH_KEYWORDS, "\n" + "Deassign device from IOMMU domain.\n" + " dom [int]: Domain to deassign device from.\n" + " pci_str [str]: PCI devices.\n" + "Returns: [int] 0 on success, or device bdf that can't be deassigned.\n" }, { "sched_id_get", (PyCFunction)pyxc_sched_id_get, diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xend/XendAPI.py Mon Jun 02 11:35:39 2008 +0900 @@ -1759,12 +1759,12 @@ class XendAPI(object): xendom = XendDomain.instance() xeninfo = xendom.get_vm_by_uuid(vm_ref) - resource = other_config.get("resource", 0) port = other_config.get("port", 0) - node = other_config.get("node", 0) + node = other_config.get("node", -1) + ssl = other_config.get("ssl", None) xendom.domain_migrate(xeninfo.getDomid(), destination_url, - bool(live), resource, port, node) + bool(live), port, node, ssl) return xen_api_success_void() def VM_save(self, _, vm_ref, dest, checkpoint): diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xend/XendDomain.py Mon Jun 02 11:35:39 2008 +0900 @@ -43,8 +43,8 @@ from xen.xend.XendConstants import DOM_S from xen.xend.XendConstants import DOM_STATE_HALTED, DOM_STATE_PAUSED from xen.xend.XendConstants import DOM_STATE_RUNNING, DOM_STATE_SUSPENDED from xen.xend.XendConstants import DOM_STATE_SHUTDOWN, DOM_STATE_UNKNOWN -from xen.xend.XendConstants import DOM_STATE_CRASHED -from xen.xend.XendConstants import TRIGGER_TYPE +from xen.xend.XendConstants import DOM_STATE_CRASHED, HVM_PARAM_ACPI_S_STATE +from xen.xend.XendConstants import TRIGGER_TYPE, TRIGGER_S3RESUME from xen.xend.XendDevices import XendDevices from xen.xend.XendAPIConstants import * @@ -1258,22 +1258,24 @@ class XendDomain: return val - def domain_migrate(self, domid, dst, live=False, port=0, node=-1): + def domain_migrate(self, domid, dst, live=False, port=0, node=-1, ssl=None): """Start domain migration. @param domid: Domain ID or Name @type domid: int or string. @param dst: Destination IP address @type dst: string - @keyword port: relocation port on destination - @type port: int @keyword live: Live migration @type live: bool + @keyword port: relocation port on destination + @type port: int + @keyword node: use node number for target + @type node: int + @keyword ssl: use ssl connection + @type ssl: bool @rtype: None - @keyword node: use node number for target - @rtype: int @raise XendError: Failed to migrate - @raise XendInvalidDomain: Domain is not valid + @raise XendInvalidDomain: Domain is not valid """ dominfo = self.domain_lookup_nr(domid) @@ -1294,13 +1296,14 @@ class XendDomain: """ Make sure there's memory free for enabling shadow mode """ dominfo.checkLiveMigrateMemory() - if port == 0: - port = xoptions.get_xend_relocation_port() - - tls = xoptions.get_xend_relocation_tls() - if tls: + if ssl is None: + ssl = xoptions.get_xend_relocation_ssl() + + if ssl: from OpenSSL import SSL from xen.web import connection + if port == 0: + port = xoptions.get_xend_relocation_ssl_port() try: ctx = SSL.Context(SSL.SSLv23_METHOD) sock = SSL.Connection(ctx, @@ -1328,6 +1331,8 @@ class XendDomain: os.close(p2cread) os.close(p2cwrite) else: + if port == 0: + port = xoptions.get_xend_relocation_port() try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # When connecting to our ssl enabled relocation server using a diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xend/XendDomainInfo.py Mon Jun 02 11:35:39 2008 +0900 @@ -2091,28 +2091,28 @@ class XendDomainInfo: xc.vcpu_setaffinity(self.domid, v, self.info['cpus'][v]) else: def find_relaxed_node(node_list): - import sys + import sys + nr_nodes = info['nr_nodes'] if node_list is None: - node_list = range(0, info['nr_nodes']) + node_list = range(0, nr_nodes) nodeload = [0] - nodeload = nodeload * info['nr_nodes'] + nodeload = nodeload * nr_nodes from xen.xend import XendDomain doms = XendDomain.instance().list('all') - for dom in doms: + for dom in filter (lambda d: d.domid != self.domid, doms): cpuinfo = dom.getVCPUInfo() for vcpu in sxp.children(cpuinfo, 'vcpu'): - def vinfo(n, t): - return t(sxp.child_value(vcpu, n)) - cpumap = vinfo('cpumap', list) - for i in node_list: + if sxp.child_value(vcpu, 'online') == 0: continue + cpumap = list(sxp.child_value(vcpu,'cpumap')) + for i in range(0, nr_nodes): node_cpumask = info['node_to_cpu'][i] for j in node_cpumask: if j in cpumap: nodeload[i] += 1 break - for i in node_list: - if len(info['node_to_cpu'][i]) > 0: - nodeload[i] = int(nodeload[i] / len(info['node_to_cpu'][i])) + for i in range(0, nr_nodes): + if len(info['node_to_cpu'][i]) > 0 and i in node_list: + nodeload[i] = int(nodeload[i] * 16 / len(info['node_to_cpu'][i])) else: nodeload[i] = sys.maxint index = nodeload.index( min(nodeload) ) diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendOptions.py --- a/tools/python/xen/xend/XendOptions.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xend/XendOptions.py Mon Jun 02 11:35:39 2008 +0900 @@ -72,6 +72,9 @@ class XendOptions: """Default for the flag indicating whether xend should run a relocation server.""" xend_relocation_server_default = 'no' + """Default for the flag indicating whether xend should run a ssl relocation server.""" + xend_relocation_ssl_server_default = 'no' + """Default interface address the xend relocation server listens at. """ xend_relocation_address_default = '' @@ -80,6 +83,9 @@ class XendOptions: """Default port xend serves relocation at. """ xend_relocation_port_default = 8002 + + """Default port xend serves ssl relocation at. """ + xend_relocation_ssl_port_default = 8003 xend_relocation_hosts_allow_default = '' @@ -192,6 +198,12 @@ class XendOptions: return self.get_config_bool("xend-relocation-server", self.xend_relocation_server_default) + def get_xend_relocation_ssl_server(self): + """Get the flag indicating whether xend should run a ssl relocation server. + """ + return self.get_config_bool("xend-relocation-ssl-server", + self.xend_relocation_ssl_server_default) + def get_xend_relocation_server_ssl_key_file(self): return self.get_config_string("xend-relocation-server-ssl-key-file") @@ -209,10 +221,17 @@ class XendOptions: return self.get_config_int('xend-relocation-port', self.xend_relocation_port_default) - def get_xend_relocation_tls(self): - """Whether to use tls when relocating. - """ - return self.get_config_bool('xend-relocation-tls', 'no') + def get_xend_relocation_ssl_port(self): + """Get the port xend listens at for ssl connection to its relocation + server. + """ + return self.get_config_int('xend-relocation-ssl-port', + self.xend_relocation_ssl_port_default) + + def get_xend_relocation_ssl(self): + """Whether to use ssl when relocating. + """ + return self.get_config_bool('xend-relocation-ssl', 'no') def get_xend_relocation_hosts_allow(self): return self.get_config_string("xend-relocation-hosts-allow", diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xend/server/SrvDomain.py Mon Jun 02 11:35:39 2008 +0900 @@ -115,7 +115,9 @@ class SrvDomain(SrvDir): [['dom', 'int'], ['destination', 'str'], ['live', 'int'], - ['port', 'int']]) + ['port', 'int'], + ['node', 'int'], + ['ssl', 'int']]) return fn(req.args, {'dom': self.dom.domid}) def op_pincpu(self, _, req): @@ -215,6 +217,11 @@ class SrvDomain(SrvDir): def op_vcpuinfo(self, _1, req): return self.call(self.dom.getVCPUInfo, [], req) + + + def op_reset(self, _, req): + self.acceptCommand(req) + return self.xd.domain_reset(self.dom.getName()) def render_POST(self, req): @@ -257,6 +264,10 @@ class SrvDomain(SrvDir): req.write('</form>') req.write('<form method="post" action="%s">' % url) + req.write('<input type="submit" name="op" value="reset">') + req.write('</form>') + + req.write('<form method="post" action="%s">' % url) req.write('<input type="submit" name="op" value="shutdown">') req.write('<input type="radio" name="reason" value="poweroff" checked>Poweroff') req.write('<input type="radio" name="reason" value="halt">Halt') diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xend/server/pciif.py Mon Jun 02 11:35:39 2008 +0900 @@ -226,6 +226,39 @@ class PciController(DevController): return sxpr + def CheckSiblingDevices(self, domid, dev): + """ Check if all sibling devices of dev are owned by pciback + """ + if not self.vm.info.is_hvm(): + return + + group_str = xc.get_device_group(domid, dev.domain, dev.bus, dev.slot, dev.func) + if group_str == "": + return + + #group string format xx:xx.x,xx:xx.x, + devstr_len = group_str.find(',') + for i in range(0, len(group_str), devstr_len + 1): + (bus, slotfunc) = group_str[i:i + devstr_len].split(':') + (slot, func) = slotfunc.split('.') + b = parse_hex(bus) + d = parse_hex(slot) + f = parse_hex(func) + try: + sdev = PciDevice(dev.domain, b, d, f) + except Exception, e: + #no dom0 drivers bound to sdev + continue + + if sdev.driver!='pciback': + raise VmError(("pci: PCI Backend does not own\n "+ \ + "sibling device %s of device %s\n"+ \ + "See the pciback.hide kernel "+ \ + "command-line parameter or\n"+ \ + "bind your slot/device to the PCI backend using sysfs" \ + )%(sdev.name, dev.name)) + return + def setupOneDevice(self, domain, bus, slot, func): """ Attach I/O resources for device to frontend domain """ @@ -245,8 +278,19 @@ class PciController(DevController): "bind your slot/device to the PCI backend using sysfs" \ )%(dev.name)) + self.CheckSiblingDevices(fe_domid, dev) + PCIQuirk(dev.vendor, dev.device, dev.subvendor, dev.subdevice, domain, bus, slot, func) + + if not self.vm.info.is_hvm(): + # Setup IOMMU device assignment + pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func) + bdf = xc.assign_device(fe_domid, pci_str) + if bdf > 0: + raise VmError("Failed to assign device to IOMMU (%x:%x.%x)" + % (bus, slot, func)) + log.debug("pci: assign device %x:%x.%x" % (bus, slot, func)) for (start, size) in dev.ioports: log.debug('pci: enabling ioport 0x%x/0x%x'%(start,size)) @@ -329,6 +373,14 @@ class PciController(DevController): "command-line parameter or\n"+ \ "bind your slot/device to the PCI backend using sysfs" \ )%(dev.name)) + + if not self.vm.info.is_hvm(): + pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func) + bdf = xc.deassign_device(fe_domid, pci_str) + if bdf > 0: + raise VmError("Failed to deassign device from IOMMU (%x:%x.%x)" + % (bus, slot, func)) + log.debug("pci: deassign device %x:%x.%x" % (bus, slot, func)) for (start, size) in dev.ioports: log.debug('pci: disabling ioport 0x%x/0x%x'%(start,size)) diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/relocate.py --- a/tools/python/xen/xend/server/relocate.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xend/server/relocate.py Mon Jun 02 11:35:39 2008 +0900 @@ -142,16 +142,22 @@ def listenRelocation(): if xoptions.get_xend_unix_server(): path = '/var/lib/xend/relocation-socket' unix.UnixListener(path, RelocationProtocol) + + interface = xoptions.get_xend_relocation_address() + + hosts_allow = xoptions.get_xend_relocation_hosts_allow() + if hosts_allow == '': + hosts_allow = None + else: + hosts_allow = map(re.compile, hosts_allow.split(" ")) + if xoptions.get_xend_relocation_server(): port = xoptions.get_xend_relocation_port() - interface = xoptions.get_xend_relocation_address() + tcp.TCPListener(RelocationProtocol, port, interface = interface, + hosts_allow = hosts_allow) - hosts_allow = xoptions.get_xend_relocation_hosts_allow() - if hosts_allow == '': - hosts_allow = None - else: - hosts_allow = map(re.compile, hosts_allow.split(" ")) - + if xoptions.get_xend_relocation_ssl_server(): + port = xoptions.get_xend_relocation_ssl_port() ssl_key_file = xoptions.get_xend_relocation_server_ssl_key_file() ssl_cert_file = xoptions.get_xend_relocation_server_ssl_cert_file() @@ -161,5 +167,5 @@ def listenRelocation(): ssl_key_file = ssl_key_file, ssl_cert_file = ssl_cert_file) else: - tcp.TCPListener(RelocationProtocol, port, interface = interface, - hosts_allow = hosts_allow) + raise XendError("ssl_key_file or ssl_cert_file for ssl relocation server is missing.") + diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xm/migrate.py --- a/tools/python/xen/xm/migrate.py Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/python/xen/xm/migrate.py Mon Jun 02 11:35:39 2008 +0900 @@ -47,6 +47,10 @@ gopts.opt('node', short='n', val='nodenu fn=set_int, default=-1, use="Use specified NUMA node on target.") +gopts.opt('ssl', short='s', + fn=set_true, default=None, + use="Use ssl connection for migration.") + def help(): return str(gopts) @@ -65,11 +69,13 @@ def main(argv): vm_ref = get_single_vm(dom) other_config = { "port": opts.vals.port, - "node": opts.vals.node + "node": opts.vals.node, + "ssl": opts.vals.ssl } server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live), other_config) else: server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.port, - opts.vals.node) + opts.vals.node, + opts.vals.ssl) diff -r d2a239224cb2 -r f1508348ffab tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Mon Jun 02 11:35:02 2008 +0900 +++ b/tools/xenstat/libxenstat/src/xenstat.c Mon Jun 02 11:35:39 2008 +0900 @@ -655,12 +655,20 @@ unsigned long long xenstat_vbd_wr_reqs(x static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int domain_id) { - char path[80]; - - snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id); - + char path[80], *vmpath; + + snprintf(path, sizeof(path),"/local/domain/%i/vm", domain_id); + + vmpath = xs_read(handle->xshandle, XBT_NULL, path, NULL); + + if (vmpath == NULL) + return NULL; + + snprintf(path, sizeof(path),"%s/name", vmpath); + free(vmpath); + return xs_read(handle->xshandle, XBT_NULL, path, NULL); -} +} /* Remove specified entry from list of domains */ static void xenstat_prune_domain(xenstat_node *node, unsigned int entry) diff -r d2a239224cb2 -r f1508348ffab unmodified_drivers/linux-2.6/platform-pci/evtchn.c --- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Mon Jun 02 11:35:02 2008 +0900 +++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Mon Jun 02 11:35:39 2008 +0900 @@ -284,7 +284,7 @@ static irqreturn_t evtchn_interrupt(int #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ - rmb(); + wmb(); #endif l1 = xchg(&v->evtchn_pending_sel, 0); diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/acpi/cpu_idle.c --- a/xen/arch/x86/acpi/cpu_idle.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/acpi/cpu_idle.c Mon Jun 02 11:35:39 2008 +0900 @@ -173,6 +173,8 @@ static inline u32 ticks_elapsed(u32 t1, { if ( t2 >= t1 ) return (t2 - t1); + else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) ) + return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); else return ((0xFFFFFFFF - t1) + t2); } diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/acpi/power.c --- a/xen/arch/x86/acpi/power.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/acpi/power.c Mon Jun 02 11:35:39 2008 +0900 @@ -238,9 +238,17 @@ static void tboot_sleep(u8 sleep_state) static void tboot_sleep(u8 sleep_state) { uint32_t shutdown_type; - - *((struct acpi_sleep_info *)(unsigned long)g_tboot_shared->acpi_sinfo) = - acpi_sinfo; + + g_tboot_shared->acpi_sinfo.pm1a_cnt = + (uint16_t)acpi_sinfo.pm1a_cnt_blk.address; + g_tboot_shared->acpi_sinfo.pm1b_cnt = + (uint16_t)acpi_sinfo.pm1b_cnt_blk.address; + g_tboot_shared->acpi_sinfo.pm1a_evt = + (uint16_t)acpi_sinfo.pm1a_evt_blk.address; + g_tboot_shared->acpi_sinfo.pm1b_evt = + (uint16_t)acpi_sinfo.pm1b_evt_blk.address; + g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val; + g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val; switch ( sleep_state ) { diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/cpu/amd.c --- a/xen/arch/x86/cpu/amd.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/cpu/amd.c Mon Jun 02 11:35:39 2008 +0900 @@ -74,9 +74,11 @@ static void disable_c1_ramping(void) static void disable_c1_ramping(void) { u8 pmm7; - int node; - - for (node=0; node < NR_CPUS; node++) { + int node, nr_nodes; + + /* Read the number of nodes from the first Northbridge. */ + nr_nodes = ((pci_conf_read32(0, 0x18, 0x0, 0x60)>>4)&0x07)+1; + for (node = 0; node < nr_nodes; node++) { /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */ pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87); /* Invalid read means we've updated every Northbridge. */ diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/crash.c --- a/xen/arch/x86/crash.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/crash.c Mon Jun 02 11:35:39 2008 +0900 @@ -102,6 +102,7 @@ void machine_crash_shutdown(void) hvm_cpu_down(); info = kexec_crash_save_info(); + info->xen_phys_start = xen_phys_start; info->dom0_pfn_to_mfn_frame_list_list = arch_get_pfn_to_mfn_frame_list_list(dom0); } diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/domain.c Mon Jun 02 11:35:39 2008 +0900 @@ -59,8 +59,6 @@ static void default_idle(void); static void default_idle(void); void (*pm_idle) (void) = default_idle; -static void unmap_vcpu_info(struct vcpu *v); - static void paravirt_ctxt_switch_from(struct vcpu *v); static void paravirt_ctxt_switch_to(struct vcpu *v); @@ -432,8 +430,6 @@ void vcpu_destroy(struct vcpu *v) { if ( is_pv_32on64_vcpu(v) ) release_compat_l4(v); - - unmap_vcpu_info(v); if ( is_hvm_vcpu(v) ) hvm_vcpu_destroy(v); @@ -825,8 +821,15 @@ int arch_set_info_guest( void arch_vcpu_reset(struct vcpu *v) { - destroy_gdt(v); - vcpu_destroy_pagetables(v); + if ( !is_hvm_vcpu(v) ) + { + destroy_gdt(v); + vcpu_destroy_pagetables(v); + } + else + { + vcpu_end_shutdown_deferral(v); + } } /* @@ -1857,16 +1860,19 @@ int domain_relinquish_resources(struct d /* Tear down paging-assistance stuff. */ paging_teardown(d); - /* Drop the in-use references to page-table bases. */ for_each_vcpu ( d, v ) + { + /* Drop the in-use references to page-table bases. */ vcpu_destroy_pagetables(v); - /* - * Relinquish GDT mappings. No need for explicit unmapping of the LDT - * as it automatically gets squashed when the guest's mappings go away. - */ - for_each_vcpu(d, v) + /* + * Relinquish GDT mappings. No need for explicit unmapping of the + * LDT as it automatically gets squashed with the guest mappings. + */ destroy_gdt(v); + + unmap_vcpu_info(v); + } d->arch.relmem = RELMEM_xen_l4; /* fallthrough */ diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/domctl.c Mon Jun 02 11:35:39 2008 +0900 @@ -526,14 +526,54 @@ long arch_do_domctl( } break; + case XEN_DOMCTL_get_device_group: + { + struct domain *d; + u32 max_sdevs; + u8 bus, devfn; + XEN_GUEST_HANDLE_64(uint32) sdevs; + int num_sdevs; + + ret = -ENOSYS; + if ( !iommu_enabled ) + break; + + ret = -EINVAL; + if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL ) + break; + + bus = (domctl->u.get_device_group.machine_bdf >> 16) & 0xff; + devfn = (domctl->u.get_device_group.machine_bdf >> 8) & 0xff; + max_sdevs = domctl->u.get_device_group.max_sdevs; + sdevs = domctl->u.get_device_group.sdev_array; + + num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs); + if ( num_sdevs < 0 ) + { + dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n"); + ret = -EFAULT; + domctl->u.get_device_group.num_sdevs = 0; + } + else + { + ret = 0; + domctl->u.get_device_group.num_sdevs = num_sdevs; + } + if ( copy_to_guest(u_domctl, domctl, 1) ) + ret = -EFAULT; + rcu_unlock_domain(d); + } + break; + case XEN_DOMCTL_test_assign_device: { u8 bus, devfn; - ret = -EINVAL; + ret = -ENOSYS; if ( !iommu_enabled ) break; + ret = -EINVAL; bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff; devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff; @@ -553,10 +593,11 @@ long arch_do_domctl( struct domain *d; u8 bus, devfn; - ret = -EINVAL; + ret = -ENOSYS; if ( !iommu_enabled ) break; + ret = -EINVAL; if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) { gdprintk(XENLOG_ERR, @@ -565,6 +606,12 @@ long arch_do_domctl( } bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff; devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff; + + if ( !iommu_pv_enabled && !is_hvm_domain(d) ) + { + ret = -ENOSYS; + break; + } if ( device_assigned(bus, devfn) ) { @@ -576,7 +623,7 @@ long arch_do_domctl( ret = assign_device(d, bus, devfn); gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); put_domain(d); } break; @@ -586,10 +633,11 @@ long arch_do_domctl( struct domain *d; u8 bus, devfn; - ret = -EINVAL; + ret = -ENOSYS; if ( !iommu_enabled ) break; + ret = -EINVAL; if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) { gdprintk(XENLOG_ERR, @@ -599,9 +647,16 @@ long arch_do_domctl( bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff; devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff; + if ( !iommu_pv_enabled && !is_hvm_domain(d) ) + { + ret = -ENOSYS; + break; + } + if ( !device_assigned(bus, devfn) ) break; + ret = 0; deassign_device(d, bus, devfn); gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/hpet.c --- a/xen/arch/x86/hvm/hpet.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/hpet.c Mon Jun 02 11:35:39 2008 +0900 @@ -29,9 +29,9 @@ #define S_TO_NS 1000000000ULL /* 1s = 10^9 ns */ #define S_TO_FS 1000000000000000ULL /* 1s = 10^15 fs */ -/* Frequency_of_TSC / frequency_of_HPET = 32 */ -#define TSC_PER_HPET_TICK 32 -#define guest_time_hpet(v) (hvm_get_guest_time(v) / TSC_PER_HPET_TICK) +/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */ +#define STIME_PER_HPET_TICK 16 +#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK) #define HPET_ID 0x000 #define HPET_PERIOD 0x004 @@ -192,7 +192,7 @@ static void hpet_stop_timer(HPETState *h /* the number of HPET tick that stands for * 1/(2^10) second, namely, 0.9765625 milliseconds */ -#define HPET_TINY_TIME_SPAN ((h->tsc_freq >> 10) / TSC_PER_HPET_TICK) +#define HPET_TINY_TIME_SPAN ((h->stime_freq >> 10) / STIME_PER_HPET_TICK) static void hpet_set_timer(HPETState *h, unsigned int tn) { @@ -558,17 +558,17 @@ void hpet_init(struct vcpu *v) spin_lock_init(&h->lock); h->vcpu = v; - h->tsc_freq = ticks_per_sec(v); - - h->hpet_to_ns_scale = ((S_TO_NS * TSC_PER_HPET_TICK) << 10) / h->tsc_freq; + h->stime_freq = S_TO_NS; + + h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) / h->stime_freq; h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale; /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */ h->hpet.capability = 0x8086A201ULL; /* This is the number of femptoseconds per HPET tick. */ - /* Here we define HPET's frequency to be 1/32 of the TSC's */ - h->hpet.capability |= ((S_TO_FS*TSC_PER_HPET_TICK/h->tsc_freq) << 32); + /* Here we define HPET's frequency to be 1/16 of Xen system time */ + h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32); for ( i = 0; i < HPET_TIMER_NUM; i++ ) { diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/hvm.c Mon Jun 02 11:35:39 2008 +0900 @@ -296,6 +296,8 @@ int hvm_domain_initialise(struct domain spin_lock_init(&d->arch.hvm_domain.irq_lock); spin_lock_init(&d->arch.hvm_domain.uc_lock); + hvm_init_guest_time(d); + d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1; hvm_init_cacheattr_region_list(d); @@ -661,7 +663,7 @@ int hvm_vcpu_initialise(struct vcpu *v) hpet_init(v); /* Init guest TSC to start from zero. */ - hvm_set_guest_time(v, 0); + hvm_set_guest_tsc(v, 0); /* Can start up without SIPI-SIPI or setvcpucontext domctl. */ v->is_initialised = 1; @@ -1098,16 +1100,17 @@ int hvm_virtual_to_linear_addr( return 0; } -static void *hvm_map(unsigned long va, int size) +static void *hvm_map_entry(unsigned long va) { unsigned long gfn, mfn; p2m_type_t p2mt; uint32_t pfec; - if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE ) - { - hvm_inject_exception(TRAP_page_fault, PFEC_write_access, - (va + PAGE_SIZE - 1) & PAGE_MASK); + if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE ) + { + gdprintk(XENLOG_ERR, "Descriptor table entry " + "straddles page boundary\n"); + domain_crash(current->domain); return NULL; } @@ -1119,7 +1122,8 @@ static void *hvm_map(unsigned long va, i mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); if ( !p2m_is_ram(p2mt) ) { - hvm_inject_exception(TRAP_page_fault, pfec, va); + gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n"); + domain_crash(current->domain); return NULL; } @@ -1130,7 +1134,7 @@ static void *hvm_map(unsigned long va, i return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK); } -static void hvm_unmap(void *p) +static void hvm_unmap_entry(void *p) { if ( p ) unmap_domain_page(p); @@ -1166,7 +1170,7 @@ static int hvm_load_segment_selector( if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto fail; - pdesc = hvm_map(desctab.base + (sel & 0xfff8), 8); + pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8)); if ( pdesc == NULL ) goto hvm_map_fail; @@ -1226,7 +1230,7 @@ static int hvm_load_segment_selector( desc.b |= 0x100; skip_accessed_flag: - hvm_unmap(pdesc); + hvm_unmap_entry(pdesc); segr.base = (((desc.b << 0) & 0xff000000u) | ((desc.b << 16) & 0x00ff0000u) | @@ -1242,7 +1246,7 @@ static int hvm_load_segment_selector( return 0; unmap_and_fail: - hvm_unmap(pdesc); + hvm_unmap_entry(pdesc); fail: hvm_inject_exception(fault_type, sel & 0xfffc, 0); hvm_map_fail: @@ -1258,7 +1262,7 @@ void hvm_task_switch( struct segment_register gdt, tr, prev_tr, segr; struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc; unsigned long eflags; - int exn_raised; + int exn_raised, rc; struct { u16 back_link,__blh; u32 esp0; @@ -1270,7 +1274,7 @@ void hvm_task_switch( u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi; u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9; u16 trace, iomap; - } *ptss, tss; + } tss = { 0 }; hvm_get_segment_register(v, x86_seg_gdtr, &gdt); hvm_get_segment_register(v, x86_seg_tr, &prev_tr); @@ -1283,11 +1287,11 @@ void hvm_task_switch( goto out; } - optss_desc = hvm_map(gdt.base + (prev_tr.sel & 0xfff8), 8); + optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8)); if ( optss_desc == NULL ) goto out; - nptss_desc = hvm_map(gdt.base + (tss_sel & 0xfff8), 8); + nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8)); if ( nptss_desc == NULL ) goto out; @@ -1322,84 +1326,89 @@ void hvm_task_switch( goto out; } - ptss = hvm_map(prev_tr.base, sizeof(tss)); - if ( ptss == NULL ) + rc = hvm_copy_from_guest_virt( + &tss, prev_tr.base, sizeof(tss), PFEC_page_present); + if ( rc == HVMCOPY_bad_gva_to_gfn ) goto out; eflags = regs->eflags; if ( taskswitch_reason == TSW_iret ) eflags &= ~X86_EFLAGS_NT; - ptss->cr3 = v->arch.hvm_vcpu.guest_cr[3]; - ptss->eip = regs->eip; - ptss->eflags = eflags; - ptss->eax = regs->eax; - ptss->ecx = regs->ecx; - ptss->edx = regs->edx; - ptss->ebx = regs->ebx; - ptss->esp = regs->esp; - ptss->ebp = regs->ebp; - ptss->esi = regs->esi; - ptss->edi = regs->edi; + tss.cr3 = v->arch.hvm_vcpu.guest_cr[3]; + tss.eip = regs->eip; + tss.eflags = eflags; + tss.eax = regs->eax; + tss.ecx = regs->ecx; + tss.edx = regs->edx; + tss.ebx = regs->ebx; + tss.esp = regs->esp; + tss.ebp = regs->ebp; + tss.esi = regs->esi; + tss.edi = regs->edi; hvm_get_segment_register(v, x86_seg_es, &segr); - ptss->es = segr.sel; + tss.es = segr.sel; hvm_get_segment_register(v, x86_seg_cs, &segr); - ptss->cs = segr.sel; + tss.cs = segr.sel; hvm_get_segment_register(v, x86_seg_ss, &segr); - ptss->ss = segr.sel; + tss.ss = segr.sel; hvm_get_segment_register(v, x86_seg_ds, &segr); - ptss->ds = segr.sel; + tss.ds = segr.sel; hvm_get_segment_register(v, x86_seg_fs, &segr); - ptss->fs = segr.sel; + tss.fs = segr.sel; hvm_get_segment_register(v, x86_seg_gs, &segr); - ptss->gs = segr.sel; + tss.gs = segr.sel; hvm_get_segment_register(v, x86_seg_ldtr, &segr); - ptss->ldt = segr.sel; - - hvm_unmap(ptss); - - ptss = hvm_map(tr.base, sizeof(tss)); - if ( ptss == NULL ) + tss.ldt = segr.sel; + + rc = hvm_copy_to_guest_virt( + prev_tr.base, &tss, sizeof(tss), PFEC_page_present); + if ( rc == HVMCOPY_bad_gva_to_gfn ) goto out; - if ( hvm_set_cr3(ptss->cr3) ) - { - hvm_unmap(ptss); + rc = hvm_copy_from_guest_virt( + &tss, tr.base, sizeof(tss), PFEC_page_present); + if ( rc == HVMCOPY_bad_gva_to_gfn ) goto out; - } - - regs->eip = ptss->eip; - regs->eflags = ptss->eflags | 2; - regs->eax = ptss->eax; - regs->ecx = ptss->ecx; - regs->edx = ptss->edx; - regs->ebx = ptss->ebx; - regs->esp = ptss->esp; - regs->ebp = ptss->ebp; - regs->esi = ptss->esi; - regs->edi = ptss->edi; + + if ( hvm_set_cr3(tss.cr3) ) + goto out; + + regs->eip = tss.eip; + regs->eflags = tss.eflags | 2; + regs->eax = tss.eax; + regs->ecx = tss.ecx; + regs->edx = tss.edx; + regs->ebx = tss.ebx; + regs->esp = tss.esp; + regs->ebp = tss.ebp; + regs->esi = tss.esi; + regs->edi = tss.edi; if ( (taskswitch_reason == TSW_call_or_int) ) { regs->eflags |= X86_EFLAGS_NT; - ptss->back_link = prev_tr.sel; + tss.back_link = prev_tr.sel; } exn_raised = 0; - if ( hvm_load_segment_selector(v, x86_seg_es, ptss->es) || - hvm_load_segment_selector(v, x86_seg_cs, ptss->cs) || - hvm_load_segment_selector(v, x86_seg_ss, ptss->ss) || - hvm_load_segment_selector(v, x86_seg_ds, ptss->ds) || - hvm_load_segment_selector(v, x86_seg_fs, ptss->fs) || - hvm_load_segment_selector(v, x86_seg_gs, ptss->gs) || - hvm_load_segment_selector(v, x86_seg_ldtr, ptss->ldt) ) + if ( hvm_load_segment_selector(v, x86_seg_es, tss.es) || + hvm_load_segment_selector(v, x86_seg_cs, tss.cs) || + hvm_load_segment_selector(v, x86_seg_ss, tss.ss) || + hvm_load_segment_selector(v, x86_seg_ds, tss.ds) || + hvm_load_segment_selector(v, x86_seg_fs, tss.fs) || + hvm_load_segment_selector(v, x86_seg_gs, tss.gs) || + hvm_load_segment_selector(v, x86_seg_ldtr, tss.ldt) ) exn_raised = 1; - if ( (ptss->trace & 1) && !exn_raised ) + rc = hvm_copy_to_guest_virt( + tr.base, &tss, sizeof(tss), PFEC_page_present); + if ( rc == HVMCOPY_bad_gva_to_gfn ) + exn_raised = 1; + + if ( (tss.trace & 1) && !exn_raised ) hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0); - - hvm_unmap(ptss); tr.attr.fields.type = 0xb; /* busy 32-bit tss */ hvm_set_segment_register(v, x86_seg_tr, &tr); @@ -1428,8 +1437,8 @@ void hvm_task_switch( } out: - hvm_unmap(optss_desc); - hvm_unmap(nptss_desc); + hvm_unmap_entry(optss_desc); + hvm_unmap_entry(nptss_desc); } #define HVMCOPY_from_guest (0u<<0) @@ -1632,7 +1641,7 @@ int hvm_msr_read_intercept(struct cpu_us switch ( ecx ) { case MSR_IA32_TSC: - msr_content = hvm_get_guest_time(v); + msr_content = hvm_get_guest_tsc(v); break; case MSR_IA32_APICBASE: @@ -1725,7 +1734,7 @@ int hvm_msr_write_intercept(struct cpu_u switch ( ecx ) { case MSR_IA32_TSC: - hvm_set_guest_time(v, msr_content); + hvm_set_guest_tsc(v, msr_content); pt_reset(v); break; @@ -2071,6 +2080,13 @@ void hvm_vcpu_reset_state(struct vcpu *v if ( v->is_initialised ) goto out; + if ( !paging_mode_hap(d) ) + { + if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG ) + put_page(pagetable_get_page(v->arch.guest_table)); + v->arch.guest_table = pagetable_null(); + } + ctxt = &v->arch.guest_context; memset(ctxt, 0, sizeof(*ctxt)); ctxt->flags = VGCF_online; @@ -2122,6 +2138,8 @@ void hvm_vcpu_reset_state(struct vcpu *v v->arch.hvm_vcpu.cache_tsc_offset = v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset; hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset); + + paging_update_paging_modes(v); v->arch.flags |= TF_kernel_mode; v->is_initialised = 1; diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/i8254.c --- a/xen/arch/x86/hvm/i8254.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/i8254.c Mon Jun 02 11:35:39 2008 +0900 @@ -31,6 +31,7 @@ #include <xen/lib.h> #include <xen/errno.h> #include <xen/sched.h> +#include <asm/time.h> #include <asm/hvm/hvm.h> #include <asm/hvm/io.h> #include <asm/hvm/support.h> @@ -52,6 +53,9 @@ static int handle_pit_io( int dir, uint32_t port, uint32_t bytes, uint32_t *val); static int handle_speaker_io( int dir, uint32_t port, uint32_t bytes, uint32_t *val); + +#define get_guest_time(v) \ + (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time()) /* Compute with 96 bit intermediate result: (a*b)/c */ static uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) @@ -86,8 +90,8 @@ static int pit_get_count(PITState *pit, ASSERT(spin_is_locked(&pit->lock)); - d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel], - PIT_FREQ, ticks_per_sec(v)); + d = muldiv64(get_guest_time(v) - pit->count_load_time[channel], + PIT_FREQ, SYSTEM_TIME_HZ); switch ( c->mode ) { @@ -117,8 +121,8 @@ static int pit_get_out(PITState *pit, in ASSERT(spin_is_locked(&pit->lock)); - d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel], - PIT_FREQ, ticks_per_sec(v)); + d = muldiv64(get_guest_time(v) - pit->count_load_time[channel], + PIT_FREQ, SYSTEM_TIME_HZ); switch ( s->mode ) { @@ -164,7 +168,7 @@ static void pit_set_gate(PITState *pit, case 3: /* Restart counting on rising edge. */ if ( s->gate < val ) - pit->count_load_time[channel] = hvm_get_guest_time(v); + pit->count_load_time[channel] = get_guest_time(v); break; } @@ -180,7 +184,7 @@ static void pit_time_fired(struct vcpu * static void pit_time_fired(struct vcpu *v, void *priv) { uint64_t *count_load_time = priv; - *count_load_time = hvm_get_guest_time(v); + *count_load_time = get_guest_time(v); } static void pit_load_count(PITState *pit, int channel, int val) @@ -195,11 +199,11 @@ static void pit_load_count(PITState *pit val = 0x10000; if ( v == NULL ) - rdtscll(pit->count_load_time[channel]); - else - pit->count_load_time[channel] = hvm_get_guest_time(v); + pit->count_load_time[channel] = 0; + else + pit->count_load_time[channel] = get_guest_time(v); s->count = val; - period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ); + period = DIV_ROUND(val * SYSTEM_TIME_HZ, PIT_FREQ); if ( (v == NULL) || !is_hvm_vcpu(v) || (channel != 0) ) return; @@ -435,7 +439,7 @@ static int pit_load(struct domain *d, hv * time jitter here, but the wall-clock will have jumped massively, so * we hope the guest can handle it. */ - pit->pt0.last_plt_gtime = hvm_get_guest_time(d->vcpu[0]); + pit->pt0.last_plt_gtime = get_guest_time(d->vcpu[0]); for ( i = 0; i < 3; i++ ) pit_load_count(pit, i, pit->hw.channels[i].count); diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/pmtimer.c --- a/xen/arch/x86/hvm/pmtimer.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/pmtimer.c Mon Jun 02 11:35:39 2008 +0900 @@ -257,7 +257,7 @@ void pmtimer_init(struct vcpu *v) spin_lock_init(&s->lock); - s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / ticks_per_sec(v); + s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / SYSTEM_TIME_HZ; s->vcpu = v; /* Intercept port I/O (need two handlers because PM1a_CNT is between diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Jun 02 11:35:39 2008 +0900 @@ -299,7 +299,7 @@ static void svm_save_cpu_state(struct vc data->msr_efer = v->arch.hvm_vcpu.guest_efer; data->msr_flags = -1ULL; - data->tsc = hvm_get_guest_time(v); + data->tsc = hvm_get_guest_tsc(v); } @@ -315,7 +315,7 @@ static void svm_load_cpu_state(struct vc v->arch.hvm_vcpu.guest_efer = data->msr_efer; svm_update_guest_efer(v); - hvm_set_guest_time(v, data->tsc); + hvm_set_guest_tsc(v, data->tsc); } static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt) diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/vlapic.c Mon Jun 02 11:35:39 2008 +0900 @@ -22,18 +22,19 @@ #include <xen/types.h> #include <xen/mm.h> #include <xen/xmalloc.h> +#include <xen/domain.h> #include <xen/domain_page.h> -#include <asm/page.h> #include <xen/event.h> #include <xen/trace.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/numa.h> +#include <asm/current.h> +#include <asm/page.h> #include <asm/hvm/hvm.h> #include <asm/hvm/io.h> #include <asm/hvm/support.h> -#include <xen/lib.h> -#include <xen/sched.h> -#include <asm/current.h> #include <asm/hvm/vmx/vmx.h> -#include <xen/numa.h> #include <public/hvm/ioreq.h> #include <public/hvm/params.h> @@ -259,6 +260,7 @@ static void vlapic_init_action(unsigned { struct vcpu *v = (struct vcpu *)_vcpu; struct domain *d = v->domain; + bool_t fpu_initialised; /* If the VCPU is not on its way down we have nothing to do. */ if ( !test_bit(_VPF_down, &v->pause_flags) ) @@ -270,15 +272,12 @@ static void vlapic_init_action(unsigned return; } + /* Reset necessary VCPU state. This does not include FPU state. */ domain_lock(d); - - /* Paranoia makes us re-assert VPF_down under the domain lock. */ - set_bit(_VPF_down, &v->pause_flags); - v->is_initialised = 0; - clear_bit(_VPF_blocked, &v->pause_flags); - + fpu_initialised = v->fpu_initialised; + vcpu_reset(v); + v->fpu_initialised = fpu_initialised; vlapic_reset(vcpu_vlapic(v)); - domain_unlock(d); vcpu_unpause(v); @@ -474,7 +473,6 @@ static uint32_t vlapic_get_tmcct(struct uint64_t counter_passed; counter_passed = ((hvm_get_guest_time(v) - vlapic->timer_last_update) - * 1000000000ULL / ticks_per_sec(v) / APIC_BUS_CYCLE_NS / vlapic->hw.timer_divisor); tmcct = tmict - counter_passed; diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Jun 02 11:35:39 2008 +0900 @@ -607,7 +607,7 @@ static void vmx_save_cpu_state(struct vc data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK]; #endif - data->tsc = hvm_get_guest_time(v); + data->tsc = hvm_get_guest_tsc(v); } static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data) @@ -625,7 +625,7 @@ static void vmx_load_cpu_state(struct vc v->arch.hvm_vmx.shadow_gs = data->shadow_gs; #endif - hvm_set_guest_time(v, data->tsc); + hvm_set_guest_tsc(v, data->tsc); } diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vpt.c --- a/xen/arch/x86/hvm/vpt.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/hvm/vpt.c Mon Jun 02 11:35:39 2008 +0900 @@ -25,6 +25,39 @@ #define mode_is(d, name) \ ((d)->arch.hvm_domain.params[HVM_PARAM_TIMER_MODE] == HVMPTM_##name) +void hvm_init_guest_time(struct domain *d) +{ + struct pl_time *pl = &d->arch.hvm_domain.pl_time; + + spin_lock_init(&pl->pl_time_lock); + pl->stime_offset = -(u64)get_s_time(); + pl->last_guest_time = 0; +} + +u64 hvm_get_guest_time(struct vcpu *v) +{ + struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time; + u64 now; + + /* Called from device models shared with PV guests. Be careful. */ + ASSERT(is_hvm_vcpu(v)); + + spin_lock(&pl->pl_time_lock); + now = get_s_time() + pl->stime_offset; + if ( (int64_t)(now - pl->last_guest_time) >= 0 ) + pl->last_guest_time = now; + else + now = pl->last_guest_time; + spin_unlock(&pl->pl_time_lock); + + return now + v->arch.hvm_vcpu.stime_offset; +} + +void hvm_set_guest_time(struct vcpu *v, u64 guest_time) +{ + v->arch.hvm_vcpu.stime_offset += guest_time - hvm_get_guest_time(v); +} + static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src) { struct vcpu *v = pt->vcpu; @@ -348,7 +381,7 @@ void create_periodic_time( pt->vcpu = v; pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu); pt->irq = irq; - pt->period_cycles = (u64)period * cpu_khz / 1000000L; + pt->period_cycles = (u64)period; pt->one_shot = one_shot; pt->scheduled = NOW() + period; /* diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/mm.c Mon Jun 02 11:35:39 2008 +0900 @@ -1939,6 +1939,20 @@ int get_page_type(struct page_info *page } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); + if ( unlikely((x & PGT_type_mask) != type) ) + { + /* Special pages should not be accessible from devices. */ + struct domain *d = page_get_owner(page); + if ( d && unlikely(need_iommu(d)) ) + { + if ( (x & PGT_type_mask) == PGT_writable_page ) + iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); + else if ( type == PGT_writable_page ) + iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), + page_to_mfn(page)); + } + } + if ( unlikely(!(nx & PGT_validated)) ) { /* Try to validate page type; drop the new reference on failure. */ diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Mon Jun 02 11:35:39 2008 +0900 @@ -266,12 +266,6 @@ out: iommu_unmap_page(d, gfn); } } - -#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE - /* If p2m table is shared with vtd page-table. */ - if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) ) - iommu_flush(d, gfn, (u64*)ept_entry); -#endif return rv; } diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/mm/p2m.c Mon Jun 02 11:35:39 2008 +0900 @@ -325,7 +325,7 @@ p2m_set_entry(struct domain *d, unsigned if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) ) d->arch.p2m->max_mapped_pfn = gfn; - if ( iommu_enabled && is_hvm_domain(d) ) + if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) ) { if ( p2mt == p2m_ram_rw ) for ( i = 0; i < (1UL << page_order); i++ ) @@ -868,7 +868,12 @@ p2m_remove_page(struct domain *d, unsign unsigned long i; if ( !paging_mode_translate(d) ) + { + if ( need_iommu(d) ) + for ( i = 0; i < (1 << page_order); i++ ) + iommu_unmap_page(d, mfn + i); return; + } P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); @@ -899,7 +904,19 @@ guest_physmap_add_entry(struct domain *d int rc = 0; if ( !paging_mode_translate(d) ) - return -EINVAL; + { + if ( need_iommu(d) && t == p2m_ram_rw ) + { + for ( i = 0; i < (1 << page_order); i++ ) + if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 ) + { + while ( i-- > 0 ) + iommu_unmap_page(d, mfn + i); + return rc; + } + } + return 0; + } #if CONFIG_PAGING_LEVELS == 3 /* diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/mm/shadow/common.c Mon Jun 02 11:35:39 2008 +0900 @@ -2799,8 +2799,11 @@ int shadow_track_dirty_vram(struct domai if ( !d->dirty_vram ) { /* Just recount from start. */ - for ( i = begin_pfn; i < end_pfn; i++ ) - flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, &t)); + for ( i = begin_pfn; i < end_pfn; i++ ) { + mfn_t mfn = gfn_to_mfn(d, i, &t); + if (mfn_x(mfn) != INVALID_MFN) + flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn); + } gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn); @@ -2840,61 +2843,70 @@ int shadow_track_dirty_vram(struct domai /* Iterate over VRAM to track dirty bits. */ for ( i = 0; i < nr; i++ ) { mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t); - struct page_info *page = mfn_to_page(mfn); - u32 count_info = page->u.inuse.type_info & PGT_count_mask; + struct page_info *page; + u32 count_info; int dirty = 0; paddr_t sl1ma = d->dirty_vram->sl1ma[i]; - switch (count_info) + if (mfn_x(mfn) == INVALID_MFN) { - case 0: - /* No guest reference, nothing to track. */ - break; - case 1: - /* One guest reference. */ - if ( sl1ma == INVALID_PADDR ) + dirty = 1; + } + else + { + page = mfn_to_page(mfn); + count_info = page->u.inuse.type_info & PGT_count_mask; + switch (count_info) { - /* We don't know which sl1e points to this, too bad. */ + case 0: + /* No guest reference, nothing to track. */ + break; + case 1: + /* One guest reference. */ + if ( sl1ma == INVALID_PADDR ) + { + /* We don't know which sl1e points to this, too bad. */ + dirty = 1; + /* TODO: Heuristics for finding the single mapping of + * this gmfn */ + flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn); + } + else + { + /* Hopefully the most common case: only one mapping, + * whose dirty bit we can use. */ + l1_pgentry_t *sl1e; +#ifdef __i386__ + void *sl1p = map_sl1p; + unsigned long sl1mfn = paddr_to_pfn(sl1ma); + + if ( sl1mfn != map_mfn ) { + if ( map_sl1p ) + sh_unmap_domain_page(map_sl1p); + map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn)); + map_mfn = sl1mfn; + } + sl1e = sl1p + (sl1ma & ~PAGE_MASK); +#else + sl1e = maddr_to_virt(sl1ma); +#endif + + if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY ) + { + dirty = 1; + /* Note: this is atomic, so we may clear a + * _PAGE_ACCESSED set by another processor. */ + l1e_remove_flags(*sl1e, _PAGE_DIRTY); + flush_tlb = 1; + } + } + break; + default: + /* More than one guest reference, + * we don't afford tracking that. */ dirty = 1; - /* TODO: Heuristics for finding the single mapping of - * this gmfn */ - flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, begin_pfn + i, &t)); + break; } - else - { - /* Hopefully the most common case: only one mapping, - * whose dirty bit we can use. */ - l1_pgentry_t *sl1e; -#ifdef __i386__ - void *sl1p = map_sl1p; - unsigned long sl1mfn = paddr_to_pfn(sl1ma); - - if ( sl1mfn != map_mfn ) { - if ( map_sl1p ) - sh_unmap_domain_page(map_sl1p); - map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn)); - map_mfn = sl1mfn; - } - sl1e = sl1p + (sl1ma & ~PAGE_MASK); -#else - sl1e = maddr_to_virt(sl1ma); -#endif - - if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY ) - { - dirty = 1; - /* Note: this is atomic, so we may clear a - * _PAGE_ACCESSED set by another processor. */ - l1e_remove_flags(*sl1e, _PAGE_DIRTY); - flush_tlb = 1; - } - } - break; - default: - /* More than one guest reference, - * we don't afford tracking that. */ - dirty = 1; - break; } if ( dirty ) @@ -2916,8 +2928,11 @@ int shadow_track_dirty_vram(struct domai { /* was clean for more than two seconds, try to disable guest * write access */ - for ( i = begin_pfn; i < end_pfn; i++ ) - flush_tlb |= sh_remove_write_access(d->vcpu[0], gfn_to_mfn(d, i, &t), 1, 0); + for ( i = begin_pfn; i < end_pfn; i++ ) { + mfn_t mfn = gfn_to_mfn(d, i, &t); + if (mfn_x(mfn) != INVALID_MFN) + flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0); + } d->dirty_vram->last_dirty = -1; } rc = 0; diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/msi.c Mon Jun 02 11:35:39 2008 +0900 @@ -25,6 +25,7 @@ #include <mach_apic.h> #include <io_ports.h> #include <public/physdev.h> +#include <xen/iommu.h> extern int msi_irq_enable; @@ -156,6 +157,9 @@ void read_msi_msg(unsigned int irq, stru default: BUG(); } + + if ( vtd_enabled ) + msi_msg_read_remap_rte(entry, msg); } static int set_vector_msi(struct msi_desc *entry) @@ -201,6 +205,9 @@ void write_msi_msg(unsigned int irq, str void write_msi_msg(unsigned int irq, struct msi_msg *msg) { struct msi_desc *entry = irq_desc[irq].msi_desc; + + if ( vtd_enabled ) + msi_msg_write_remap_rte(entry, msg); switch ( entry->msi_attrib.type ) { diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/setup.c Mon Jun 02 11:35:39 2008 +0900 @@ -1100,6 +1100,14 @@ void arch_get_xen_caps(xen_capabilities_ #endif } +int xen_in_range(paddr_t start, paddr_t end) +{ + start = max_t(paddr_t, start, xenheap_phys_start); + end = min_t(paddr_t, end, xenheap_phys_end); + + return start < end; +} + /* * Local variables: * mode: C diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/smpboot.c Mon Jun 02 11:35:39 2008 +0900 @@ -1391,6 +1391,11 @@ void enable_nonboot_cpus(void) panic("Not enough cpus"); } cpus_clear(frozen_cpus); + + /* + * Cleanup possible dangling ends after sleep... + */ + smpboot_restore_warm_reset_vector(); } #else /* ... !CONFIG_HOTPLUG_CPU */ int __cpu_disable(void) diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/tboot.c --- a/xen/arch/x86/tboot.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/tboot.c Mon Jun 02 11:35:39 2008 +0900 @@ -96,6 +96,18 @@ int tboot_in_measured_env(void) return (g_tboot_shared != NULL); } +int tboot_in_range(paddr_t start, paddr_t end) +{ + if ( g_tboot_shared == NULL || g_tboot_shared->version < 0x02 ) + return 0; + + start = max_t(paddr_t, start, g_tboot_shared->tboot_base); + end = min_t(paddr_t, end, + g_tboot_shared->tboot_base + g_tboot_shared->tboot_size); + + return start < end; +} + /* * Local variables: * mode: C diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/x86_emulate/x86_emulate.c --- a/xen/arch/x86/x86_emulate/x86_emulate.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c Mon Jun 02 11:35:39 2008 +0900 @@ -2105,12 +2105,14 @@ x86_emulate( break; } + /* Inject #DB if single-step tracing was enabled at instruction start. */ + if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) && + (ops->inject_hw_exception != NULL) ) + rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION; + /* Commit shadow register state. */ _regs.eflags &= ~EFLG_RF; *ctxt->regs = _regs; - if ( (_regs.eflags & EFLG_TF) && (rc == X86EMUL_OKAY) && - (ops->inject_hw_exception != NULL) ) - rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION; done: return rc; diff -r d2a239224cb2 -r f1508348ffab xen/common/domain.c --- a/xen/common/domain.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/common/domain.c Mon Jun 02 11:35:39 2008 +0900 @@ -637,7 +637,7 @@ void vcpu_reset(struct vcpu *v) { struct domain *d = v->domain; - domain_pause(d); + vcpu_pause(v); domain_lock(d); arch_vcpu_reset(v); @@ -653,7 +653,7 @@ void vcpu_reset(struct vcpu *v) clear_bit(_VPF_blocked, &v->pause_flags); domain_unlock(v->domain); - domain_unpause(d); + vcpu_unpause(v); } diff -r d2a239224cb2 -r f1508348ffab xen/common/grant_table.c --- a/xen/common/grant_table.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/common/grant_table.c Mon Jun 02 11:35:39 2008 +0900 @@ -32,6 +32,8 @@ #include <xen/trace.h> #include <xen/guest_access.h> #include <xen/domain_page.h> +#include <xen/iommu.h> +#include <xen/paging.h> #include <xsm/xsm.h> #ifndef max_nr_grant_frames @@ -196,8 +198,9 @@ __gnttab_map_grant_ref( struct domain *ld, *rd; struct vcpu *led; int handle; - unsigned long frame = 0; + unsigned long frame = 0, nr_gets = 0; int rc = GNTST_okay; + u32 old_pin; unsigned int cache_flags; struct active_grant_entry *act; struct grant_mapping *mt; @@ -318,6 +321,7 @@ __gnttab_map_grant_ref( } } + old_pin = act->pin; if ( op->flags & GNTMAP_device_map ) act->pin += (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; @@ -361,20 +365,17 @@ __gnttab_map_grant_ref( rc = GNTST_general_error; goto undo_out; } - + + nr_gets++; if ( op->flags & GNTMAP_host_map ) { rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0); if ( rc != GNTST_okay ) - { - if ( gnttab_host_mapping_get_page_type(op, ld, rd) ) - put_page_type(mfn_to_page(frame)); - put_page(mfn_to_page(frame)); goto undo_out; - } if ( op->flags & GNTMAP_device_map ) { + nr_gets++; (void)get_page(mfn_to_page(frame), rd); if ( !(op->flags & GNTMAP_readonly) ) get_page_type(mfn_to_page(frame), PGT_writable_page); @@ -382,6 +383,17 @@ __gnttab_map_grant_ref( } } + if ( need_iommu(ld) && + !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && + (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) + { + if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) ) + { + rc = GNTST_general_error; + goto undo_out; + } + } + TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom); mt = &maptrack_entry(ld->grant_table, handle); @@ -397,6 +409,19 @@ __gnttab_map_grant_ref( return; undo_out: + if ( nr_gets > 1 ) + { + if ( !(op->flags & GNTMAP_readonly) ) + put_page_type(mfn_to_page(frame)); + put_page(mfn_to_page(frame)); + } + if ( nr_gets > 0 ) + { + if ( gnttab_host_mapping_get_page_type(op, ld, rd) ) + put_page_type(mfn_to_page(frame)); + put_page(mfn_to_page(frame)); + } + spin_lock(&rd->grant_table->lock); act = &active_entry(rd->grant_table, op->ref); @@ -451,6 +476,7 @@ __gnttab_unmap_common( struct active_grant_entry *act; grant_entry_t *sha; s16 rc = 0; + u32 old_pin; ld = current->domain; @@ -497,6 +523,7 @@ __gnttab_unmap_common( act = &active_entry(rd->grant_table, op->map->ref); sha = &shared_entry(rd->grant_table, op->map->ref); + old_pin = act->pin; if ( op->frame == 0 ) { @@ -532,6 +559,17 @@ __gnttab_unmap_common( act->pin -= GNTPIN_hstr_inc; else act->pin -= GNTPIN_hstw_inc; + } + + if ( need_iommu(ld) && + (old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && + !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) + { + if ( iommu_unmap_page(ld, mfn_to_gmfn(ld, op->frame)) ) + { + rc = GNTST_general_error; + goto unmap_out; + } } /* If just unmapped a writable mapping, mark as dirtied */ @@ -1073,6 +1111,11 @@ gnttab_transfer( gop.status = GNTST_bad_page; goto copyback; } + +#ifndef __ia64__ /* IA64 implicitly replaces the old page in steal_page(). */ + guest_physmap_remove_page(d, gop.mfn, mfn, 0); +#endif + flush_tlb_mask(d->domain_dirty_cpumask); /* Find the target domain. */ if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) ) diff -r d2a239224cb2 -r f1508348ffab xen/common/libelf/libelf-private.h --- a/xen/common/libelf/libelf-private.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/common/libelf/libelf-private.h Mon Jun 02 11:35:39 2008 +0900 @@ -43,7 +43,7 @@ #define bswap_16(x) swap16(x) #define bswap_32(x) swap32(x) #define bswap_64(x) swap64(x) -#elif defined(__linux__) || defined(__Linux__) +#elif defined(__linux__) || defined(__Linux__) || defined(__MINIOS__) #include <byteswap.h> #else #error Unsupported OS diff -r d2a239224cb2 -r f1508348ffab xen/common/memory.c --- a/xen/common/memory.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/common/memory.c Mon Jun 02 11:35:39 2008 +0900 @@ -124,12 +124,9 @@ static void populate_physmap(struct memo } mfn = page_to_mfn(page); - - if ( unlikely(paging_mode_translate(d)) ) - { - guest_physmap_add_page(d, gpfn, mfn, a->extent_order); - } - else + guest_physmap_add_page(d, gpfn, mfn, a->extent_order); + + if ( !paging_mode_translate(d) ) { for ( j = 0; j < (1 << a->extent_order); j++ ) set_gpfn_from_mfn(mfn + j, gpfn + j); @@ -436,11 +433,9 @@ static long memory_exchange(XEN_GUEST_HA &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1); mfn = page_to_mfn(page); - if ( unlikely(paging_mode_translate(d)) ) - { - guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order); - } - else + guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order); + + if ( !paging_mode_translate(d) ) { for ( k = 0; k < (1UL << exch.out.extent_order); k++ ) set_gpfn_from_mfn(mfn + k, gpfn + k); diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Mon Jun 02 11:35:39 2008 +0900 @@ -635,6 +635,16 @@ static void amd_iommu_return_device( reassign_device(s, t, bus, devfn); } +static int amd_iommu_group_id(u8 bus, u8 devfn) +{ + int rt; + int bdf = (bus << 8) | devfn; + rt = ( bdf < ivrs_bdf_entries ) ? + ivrs_mappings[bdf].dte_requestor_id : + bdf; + return rt; +} + struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .assign_device = amd_iommu_assign_device, @@ -642,4 +652,5 @@ struct iommu_ops amd_iommu_ops = { .map_page = amd_iommu_map_page, .unmap_page = amd_iommu_unmap_page, .reassign_device = amd_iommu_return_device, + .get_device_group_id = amd_iommu_group_id, }; diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/iommu.c Mon Jun 02 11:35:39 2008 +0900 @@ -15,14 +15,20 @@ #include <xen/sched.h> #include <xen/iommu.h> +#include <xen/paging.h> +#include <xen/guest_access.h> extern struct iommu_ops intel_iommu_ops; extern struct iommu_ops amd_iommu_ops; +static int iommu_populate_page_table(struct domain *d); int intel_vtd_setup(void); int amd_iov_detect(void); int iommu_enabled = 1; boolean_param("iommu", iommu_enabled); + +int iommu_pv_enabled = 0; +boolean_param("iommu_pv", iommu_pv_enabled); int iommu_domain_init(struct domain *domain) { @@ -54,11 +60,46 @@ int assign_device(struct domain *d, u8 b int assign_device(struct domain *d, u8 bus, u8 devfn) { struct hvm_iommu *hd = domain_hvm_iommu(d); - - if ( !iommu_enabled || !hd->platform_ops ) - return 0; - - return hd->platform_ops->assign_device(d, bus, devfn); + int rc; + + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) ) + return rc; + + if ( has_iommu_pdevs(d) && !need_iommu(d) ) + { + d->need_iommu = 1; + return iommu_populate_page_table(d); + } + return 0; +} + +static int iommu_populate_page_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct page_info *page; + int rc; + + spin_lock(&d->page_alloc_lock); + + list_for_each_entry ( page, &d->page_list, list ) + { + if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) + { + rc = hd->platform_ops->map_page( + d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page)); + if (rc) + { + spin_unlock(&d->page_alloc_lock); + hd->platform_ops->teardown(d); + return rc; + } + } + } + spin_unlock(&d->page_alloc_lock); + return 0; } void iommu_domain_destroy(struct domain *d) @@ -137,7 +178,13 @@ void deassign_device(struct domain *d, u if ( !iommu_enabled || !hd->platform_ops ) return; - return hd->platform_ops->reassign_device(d, dom0, bus, devfn); + hd->platform_ops->reassign_device(d, dom0, bus, devfn); + + if ( !has_iommu_pdevs(d) && need_iommu(d) ) + { + d->need_iommu = 0; + hd->platform_ops->teardown(d); + } } static int iommu_setup(void) @@ -160,7 +207,56 @@ static int iommu_setup(void) iommu_enabled = (rc == 0); out: + if ( !iommu_enabled || !vtd_enabled ) + iommu_pv_enabled = 0; printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis"); + if (iommu_enabled) + printk("I/O virtualisation for PV guests %sabled\n", + iommu_pv_enabled ? "en" : "dis"); return rc; } __initcall(iommu_setup); + +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, + XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev; + int group_id, sdev_id; + u32 bdf; + int i = 0; + struct iommu_ops *ops = hd->platform_ops; + + if ( !iommu_enabled || !ops || !ops->get_device_group_id ) + return 0; + + group_id = ops->get_device_group_id(bus, devfn); + + list_for_each_entry(pdev, + &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list) + { + if ( (pdev->bus == bus) && (pdev->devfn == devfn) ) + continue; + + sdev_id = ops->get_device_group_id(pdev->bus, pdev->devfn); + if ( (sdev_id == group_id) && (i < max_sdevs) ) + { + bdf = 0; + bdf |= (pdev->bus & 0xff) << 16; + bdf |= (pdev->devfn & 0xff) << 8; + if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) + return -1; + i++; + } + } + + return i; +} +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/dmar.c --- a/xen/drivers/passthrough/vtd/dmar.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/dmar.c Mon Jun 02 11:35:39 2008 +0900 @@ -147,39 +147,6 @@ struct acpi_drhd_unit * acpi_find_matche return NULL; } -struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev) -{ - struct acpi_rmrr_unit *rmrr; - - list_for_each_entry ( rmrr, &acpi_rmrr_units, list ) - if ( acpi_pci_device_match(rmrr->devices, - rmrr->devices_cnt, dev) ) - return rmrr; - - return NULL; -} - -struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev) -{ - struct acpi_atsr_unit *atsru; - struct acpi_atsr_unit *all_ports_atsru; - - all_ports_atsru = NULL; - list_for_each_entry ( atsru, &acpi_atsr_units, list ) - { - if ( atsru->all_ports ) - all_ports_atsru = atsru; - if ( acpi_pci_device_match(atsru->devices, - atsru->devices_cnt, dev) ) - return atsru; - } - - if ( all_ports_atsru ) - return all_ports_atsru;; - - return NULL; -} - static int scope_device_count(void *start, void *end) { struct acpi_dev_scope *scope; diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/dmar.h --- a/xen/drivers/passthrough/vtd/dmar.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/dmar.h Mon Jun 02 11:35:39 2008 +0900 @@ -86,7 +86,6 @@ struct acpi_atsr_unit { } struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev); -struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev); #define DMAR_TYPE 1 #define RMRR_TYPE 2 diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/extern.h --- a/xen/drivers/passthrough/vtd/extern.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/extern.h Mon Jun 02 11:35:39 2008 +0900 @@ -27,8 +27,7 @@ extern struct ir_ctrl *ir_ctrl; extern struct ir_ctrl *ir_ctrl; void print_iommu_regs(struct acpi_drhd_unit *drhd); -void print_vtd_entries(struct domain *d, struct iommu *iommu, - int bus, int devfn, unsigned long gmfn); +void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn); void pdev_flr(u8 bus, u8 devfn); int qinval_setup(struct iommu *iommu); diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/intremap.c --- a/xen/drivers/passthrough/vtd/intremap.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/intremap.c Mon Jun 02 11:35:39 2008 +0900 @@ -48,14 +48,14 @@ static void remap_entry_to_ioapic_rte( { struct iremap_entry *iremap_entry = NULL, *iremap_entries; struct IO_APIC_route_remap_entry *remap_rte; - unsigned int index; + int index = 0; unsigned long flags; struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); - if ( ir_ctrl == NULL ) + if ( ir_ctrl == NULL || ir_ctrl->iremap_index < 0 ) { dprintk(XENLOG_ERR VTDPREFIX, - "remap_entry_to_ioapic_rte: ir_ctl == NULL"); + "remap_entry_to_ioapic_rte: ir_ctl is not ready\n"); return; } @@ -63,11 +63,8 @@ static void remap_entry_to_ioapic_rte( index = (remap_rte->index_15 << 15) + remap_rte->index_0_14; if ( index > ir_ctrl->iremap_index ) - { - dprintk(XENLOG_ERR VTDPREFIX, - "Index is larger than remap table entry size. Error!\n"); - return; - } + panic("%s: index (%d) is larger than remap table entry size (%d)!\n", + __func__, index, ir_ctrl->iremap_index); spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); @@ -81,79 +78,90 @@ static void remap_entry_to_ioapic_rte( old_rte->trigger = iremap_entry->lo.tm; old_rte->__reserved_2 = 0; old_rte->dest.logical.__reserved_1 = 0; - old_rte->dest.logical.logical_dest = iremap_entry->lo.dst; + old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8; unmap_vtd_domain_page(iremap_entries); spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); } static void ioapic_rte_to_remap_entry(struct iommu *iommu, - int apic_id, struct IO_APIC_route_entry *old_rte) + int apic_id, struct IO_APIC_route_entry *old_rte, + unsigned int rte_upper, unsigned int value) { struct iremap_entry *iremap_entry = NULL, *iremap_entries; + struct iremap_entry new_ire; struct IO_APIC_route_remap_entry *remap_rte; - unsigned int index; + struct IO_APIC_route_entry new_rte; + int index; unsigned long flags; - int ret = 0; struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); remap_rte = (struct IO_APIC_route_remap_entry *) old_rte; spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); - index = ir_ctrl->iremap_index; + + if ( remap_rte->format == 0 ) + { + ir_ctrl->iremap_index++; + index = ir_ctrl->iremap_index; + } + else + index = (remap_rte->index_15 << 15) | remap_rte->index_0_14; + if ( index > IREMAP_ENTRY_NR - 1 ) - { - dprintk(XENLOG_ERR VTDPREFIX, - "The interrupt number is more than 256!\n"); - goto out; - } + panic("ioapic_rte_to_remap_entry: intremap index is more than 256!\n"); iremap_entries = (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr); iremap_entry = &iremap_entries[index]; - if ( *(u64 *)iremap_entry != 0 ) - dprintk(XENLOG_WARNING VTDPREFIX, - "Interrupt remapping entry is in use already!\n"); - iremap_entry->lo.fpd = 0; - iremap_entry->lo.dm = old_rte->dest_mode; - iremap_entry->lo.rh = 0; - iremap_entry->lo.tm = old_rte->trigger; - iremap_entry->lo.dlm = old_rte->delivery_mode; - iremap_entry->lo.avail = 0; - iremap_entry->lo.res_1 = 0; - iremap_entry->lo.vector = old_rte->vector; - iremap_entry->lo.res_2 = 0; - iremap_entry->lo.dst = (old_rte->dest.logical.logical_dest << 8); - iremap_entry->hi.sid = apicid_to_bdf(apic_id); - iremap_entry->hi.sq = 0; /* comparing all 16-bit of SID */ - iremap_entry->hi.svt = 1; /* turn on requestor ID verification SID/SQ */ - iremap_entry->hi.res_1 = 0; - iremap_entry->lo.p = 1; /* finally, set present bit */ - ir_ctrl->iremap_index++; + memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry)); + + if ( rte_upper ) + new_ire.lo.dst = (value >> 24) << 8; + else + { + *(((u32 *)&new_rte) + 0) = value; + new_ire.lo.fpd = 0; + new_ire.lo.dm = new_rte.dest_mode; + new_ire.lo.rh = 0; + new_ire.lo.tm = new_rte.trigger; + new_ire.lo.dlm = new_rte.delivery_mode; + new_ire.lo.avail = 0; + new_ire.lo.res_1 = 0; + new_ire.lo.vector = new_rte.vector; + new_ire.lo.res_2 = 0; + new_ire.hi.sid = apicid_to_bdf(apic_id); + + new_ire.hi.sq = 0; /* comparing all 16-bit of SID */ + new_ire.hi.svt = 1; /* requestor ID verification SID/SQ */ + new_ire.hi.res_1 = 0; + new_ire.lo.p = 1; /* finally, set present bit */ + + /* now construct new ioapic rte entry */ + remap_rte->vector = new_rte.vector; + remap_rte->delivery_mode = 0; /* has to be 0 for remap format */ + remap_rte->index_15 = index & 0x8000; + remap_rte->index_0_14 = index & 0x7fff; + + remap_rte->delivery_status = new_rte.delivery_status; + remap_rte->polarity = new_rte.polarity; + remap_rte->irr = new_rte.irr; + remap_rte->trigger = new_rte.trigger; + remap_rte->mask = new_rte.mask; + remap_rte->reserved = 0; + remap_rte->format = 1; /* indicate remap format */ + } + + memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry)); + iommu_flush_iec_index(iommu, 0, index); + invalidate_sync(iommu); unmap_vtd_domain_page(iremap_entries); - iommu_flush_iec_index(iommu, 0, index); - ret = invalidate_sync(iommu); - - /* now construct new ioapic rte entry */ - remap_rte->vector = old_rte->vector; - remap_rte->delivery_mode = 0; /* has to be 0 for remap format */ - remap_rte->index_15 = index & 0x8000; - remap_rte->index_0_14 = index & 0x7fff; - remap_rte->delivery_status = old_rte->delivery_status; - remap_rte->polarity = old_rte->polarity; - remap_rte->irr = old_rte->irr; - remap_rte->trigger = old_rte->trigger; - remap_rte->mask = 1; - remap_rte->reserved = 0; - remap_rte->format = 1; /* indicate remap format */ -out: spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); return; } -unsigned int -io_apic_read_remap_rte( +unsigned int io_apic_read_remap_rte( unsigned int apic, unsigned int reg) { struct IO_APIC_route_entry old_rte = { 0 }; @@ -198,15 +206,15 @@ io_apic_read_remap_rte( } } -void -io_apic_write_remap_rte( +void io_apic_write_remap_rte( unsigned int apic, unsigned int reg, unsigned int value) { struct IO_APIC_route_entry old_rte = { 0 }; struct IO_APIC_route_remap_entry *remap_rte; - int rte_upper = (reg & 1) ? 1 : 0; + unsigned int rte_upper = (reg & 1) ? 1 : 0; struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); + int saved_mask; if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ) { @@ -225,21 +233,192 @@ io_apic_write_remap_rte( *(((u32 *)&old_rte) + 1) = *(IO_APIC_BASE(apic)+4); remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte; - if ( remap_rte->mask || (remap_rte->format == 0) ) - { - *IO_APIC_BASE(apic) = rte_upper ? ++reg : reg; - *(IO_APIC_BASE(apic)+4) = value; - return; - } - - *(((u32 *)&old_rte) + rte_upper) = value; - ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, &old_rte); + + /* mask the interrupt while we change the intremap table */ + saved_mask = remap_rte->mask; + remap_rte->mask = 1; + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0); + remap_rte->mask = saved_mask; + + ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, + &old_rte, rte_upper, value); /* write new entry to ioapic */ *IO_APIC_BASE(apic) = reg; - *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0); + *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0); *IO_APIC_BASE(apic) = reg + 1; - *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+1); + *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1); +} + +static void remap_entry_to_msi_msg( + struct iommu *iommu, struct msi_msg *msg) +{ + struct iremap_entry *iremap_entry = NULL, *iremap_entries; + struct msi_msg_remap_entry *remap_rte; + int index; + unsigned long flags; + struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); + + if ( ir_ctrl == NULL ) + { + dprintk(XENLOG_ERR VTDPREFIX, + "remap_entry_to_msi_msg: ir_ctl == NULL"); + return; + } + + remap_rte = (struct msi_msg_remap_entry *) msg; + index = (remap_rte->address_lo.index_15 << 15) | + remap_rte->address_lo.index_0_14; + + if ( index > ir_ctrl->iremap_index ) + panic("%s: index (%d) is larger than remap table entry size (%d)\n", + __func__, index, ir_ctrl->iremap_index); + + spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); + + iremap_entries = + (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr); + iremap_entry = &iremap_entries[index]; + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((iremap_entry->lo.dm == 0) ? + MSI_ADDR_DESTMODE_PHYS: + MSI_ADDR_DESTMODE_LOGIC) | + ((iremap_entry->lo.dlm != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + iremap_entry->lo.dst >> 8; + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((iremap_entry->lo.dlm != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + iremap_entry->lo.vector; + + unmap_vtd_domain_page(iremap_entries); + spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); +} + +static void msi_msg_to_remap_entry( + struct iommu *iommu, struct pci_dev *pdev, struct msi_msg *msg) +{ + struct iremap_entry *iremap_entry = NULL, *iremap_entries; + struct iremap_entry new_ire; + struct msi_msg_remap_entry *remap_rte; + unsigned int index; + unsigned long flags; + struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); + int i = 0; + + remap_rte = (struct msi_msg_remap_entry *) msg; + spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); + + iremap_entries = + (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr); + + /* If the entry for a PCI device has been there, use the old entry, + * Or, assign a new entry for it. + */ + for ( i = 0; i <= ir_ctrl->iremap_index; i++ ) + { + iremap_entry = &iremap_entries[i]; + if ( iremap_entry->hi.sid == + ((pdev->bus << 8) | pdev->devfn) ) + break; + } + + if ( i > ir_ctrl->iremap_index ) + { + ir_ctrl->iremap_index++; + index = ir_ctrl->iremap_index; + } + else + index = i; + + if ( index > IREMAP_ENTRY_NR - 1 ) + panic("msi_msg_to_remap_entry: intremap index is more than 256!\n"); + + iremap_entry = &iremap_entries[index]; + memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry)); + + /* Set interrupt remapping table entry */ + new_ire.lo.fpd = 0; + new_ire.lo.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1; + new_ire.lo.rh = 0; + new_ire.lo.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; + new_ire.lo.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1; + new_ire.lo.avail = 0; + new_ire.lo.res_1 = 0; + new_ire.lo.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & + MSI_DATA_VECTOR_MASK; + new_ire.lo.res_2 = 0; + new_ire.lo.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) + & 0xff) << 8; + + new_ire.hi.sid = (pdev->bus << 8) | pdev->devfn; + new_ire.hi.sq = 0; + new_ire.hi.svt = 1; + new_ire.hi.res_1 = 0; + new_ire.lo.p = 1; /* finally, set present bit */ + + /* now construct new MSI/MSI-X rte entry */ + remap_rte->address_lo.dontcare = 0; + remap_rte->address_lo.index_15 = index & 0x8000; + remap_rte->address_lo.index_0_14 = index & 0x7fff; + remap_rte->address_lo.SHV = 1; + remap_rte->address_lo.format = 1; + + remap_rte->address_hi = 0; + remap_rte->data = 0; + + memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry)); + iommu_flush_iec_index(iommu, 0, index); + invalidate_sync(iommu); + + unmap_vtd_domain_page(iremap_entries); + spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); + return; +} + +void msi_msg_read_remap_rte( + struct msi_desc *msi_desc, struct msi_msg *msg) +{ + struct pci_dev *pdev = msi_desc->dev; + struct acpi_drhd_unit *drhd = NULL; + struct iommu *iommu = NULL; + struct ir_ctrl *ir_ctrl; + + drhd = acpi_find_matched_drhd_unit(pdev); + iommu = drhd->iommu; + + ir_ctrl = iommu_ir_ctrl(iommu); + if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ) + return; + + remap_entry_to_msi_msg(iommu, msg); +} + +void msi_msg_write_remap_rte( + struct msi_desc *msi_desc, struct msi_msg *msg) +{ + struct pci_dev *pdev = msi_desc->dev; + struct acpi_drhd_unit *drhd = NULL; + struct iommu *iommu = NULL; + struct ir_ctrl *ir_ctrl; + + drhd = acpi_find_matched_drhd_unit(msi_desc->dev); + iommu = drhd->iommu; + + ir_ctrl = iommu_ir_ctrl(iommu); + if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ) + return; + + msi_msg_to_remap_entry(iommu, pdev, msg); } int intremap_setup(struct iommu *iommu) @@ -260,6 +439,7 @@ int intremap_setup(struct iommu *iommu) "Cannot allocate memory for ir_ctrl->iremap_maddr\n"); return -ENODEV; } + ir_ctrl->iremap_index = -1; } #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT) diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.c Mon Jun 02 11:35:39 2008 +0900 @@ -112,28 +112,27 @@ struct iommu_flush *iommu_get_flush(stru return iommu ? &iommu->intel->flush : NULL; } -unsigned int clflush_size; -void clflush_cache_range(void *adr, int size) +static unsigned int clflush_size; +static int iommus_incoherent; +static void __iommu_flush_cache(void *addr, int size) { int i; + + if ( !iommus_incoherent ) + return; + for ( i = 0; i < size; i += clflush_size ) - clflush(adr + i); -} - -static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size) -{ - if ( !ecap_coherent(iommu->ecap) ) - clflush_cache_range(addr, size); -} - -void iommu_flush_cache_entry(struct iommu *iommu, void *addr) -{ - __iommu_flush_cache(iommu, addr, 8); -} - -void iommu_flush_cache_page(struct iommu *iommu, void *addr) -{ - __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K); + clflush((char *)addr + i); +} + +void iommu_flush_cache_entry(void *addr) +{ + __iommu_flush_cache(addr, 8); +} + +void iommu_flush_cache_page(void *addr) +{ + __iommu_flush_cache(addr, PAGE_SIZE_4K); } int nr_iommus; @@ -157,7 +156,7 @@ static u64 bus_to_context_maddr(struct i } set_root_value(*root, maddr); set_root_present(*root); - iommu_flush_cache_entry(iommu, root); + iommu_flush_cache_entry(root); } maddr = (u64) get_context_addr(*root); unmap_vtd_domain_page(root_entries); @@ -191,30 +190,22 @@ static int device_context_mapped(struct return ret; } -static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr) +static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) { struct hvm_iommu *hd = domain_hvm_iommu(domain); - struct acpi_drhd_unit *drhd; - struct iommu *iommu; int addr_width = agaw_to_width(hd->agaw); struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(hd->agaw); int offset; unsigned long flags; - u64 pte_maddr = 0; + u64 pte_maddr = 0, maddr; u64 *vaddr = NULL; - - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu = drhd->iommu; addr &= (((u64)1) << addr_width) - 1; spin_lock_irqsave(&hd->mapping_lock, flags); if ( hd->pgd_maddr == 0 ) - { - hd->pgd_maddr = alloc_pgtable_maddr(); - if ( hd->pgd_maddr == 0 ) - return 0; - } + if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) ) + goto out; parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr); while ( level > 1 ) @@ -224,7 +215,9 @@ static u64 addr_to_dma_page_maddr(struct if ( dma_pte_addr(*pte) == 0 ) { - u64 maddr = alloc_pgtable_maddr(); + if ( !alloc ) + break; + maddr = alloc_pgtable_maddr(); dma_set_pte_addr(*pte, maddr); vaddr = map_vtd_domain_page(maddr); if ( !vaddr ) @@ -236,7 +229,7 @@ static u64 addr_to_dma_page_maddr(struct */ dma_set_pte_readable(*pte); dma_set_pte_writable(*pte); - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); } else { @@ -259,43 +252,9 @@ static u64 addr_to_dma_page_maddr(struct } unmap_vtd_domain_page(parent); + out: spin_unlock_irqrestore(&hd->mapping_lock, flags); return pte_maddr; -} - -/* return address's page at specific level */ -static u64 dma_addr_level_page_maddr( - struct domain *domain, u64 addr, int level) -{ - struct hvm_iommu *hd = domain_hvm_iommu(domain); - struct dma_pte *parent, *pte = NULL; - int total = agaw_to_level(hd->agaw); - int offset; - u64 pg_maddr = hd->pgd_maddr; - - if ( pg_maddr == 0 ) - return 0; - - parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr); - while ( level <= total ) - { - offset = address_level_offset(addr, total); - pte = &parent[offset]; - if ( dma_pte_addr(*pte) == 0 ) - break; - - pg_maddr = pte->val & PAGE_MASK_4K; - unmap_vtd_domain_page(parent); - - if ( level == total ) - return pg_maddr; - - parent = map_vtd_domain_page(pte->val); - total--; - } - - unmap_vtd_domain_page(parent); - return 0; } static void iommu_flush_write_buffer(struct iommu *iommu) @@ -485,9 +444,12 @@ static int flush_iotlb_reg(void *_iommu, /* check IOTLB invalidation granularity */ if ( DMA_TLB_IAIG(val) == 0 ) printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n"); + +#ifdef VTD_DEBUG if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) ) printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n", (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val)); +#endif /* flush context entry will implictly flush write buffer */ return 0; } @@ -572,34 +534,36 @@ void iommu_flush_all(void) /* clear one page's page table */ static void dma_pte_clear_one(struct domain *domain, u64 addr) { + struct hvm_iommu *hd = domain_hvm_iommu(domain); struct acpi_drhd_unit *drhd; struct iommu *iommu; struct dma_pte *page = NULL, *pte = NULL; u64 pg_maddr; - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - /* get last level pte */ - pg_maddr = dma_addr_level_page_maddr(domain, addr, 1); + pg_maddr = addr_to_dma_page_maddr(domain, addr, 0); if ( pg_maddr == 0 ) return; page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); pte = page + address_level_offset(addr, 1); - if ( pte ) - { - dma_clear_pte(*pte); - iommu_flush_cache_entry(drhd->iommu, pte); - - for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - if ( cap_caching_mode(iommu->cap) ) - iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), - addr, 1, 0); - else if (cap_rwbf(iommu->cap)) - iommu_flush_write_buffer(iommu); - } - } + + if ( !dma_pte_present(*pte) ) + { + unmap_vtd_domain_page(page); + return; + } + + dma_clear_pte(*pte); + iommu_flush_cache_entry(pte); + + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + if ( test_bit(iommu->index, &hd->iommu_bitmap) ) + iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), + addr, 1, 0); + } + unmap_vtd_domain_page(page); } @@ -626,7 +590,6 @@ static void iommu_free_next_pagetable(u6 static void iommu_free_next_pagetable(u64 pt_maddr, unsigned long index, int level) { - struct acpi_drhd_unit *drhd; unsigned long next_index; struct dma_pte *pt_vaddr, *pde; int next_level; @@ -636,50 +599,38 @@ static void iommu_free_next_pagetable(u6 pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); pde = &pt_vaddr[index]; - if ( dma_pte_addr(*pde) != 0 ) - { - next_level = level - 1; - if ( next_level > 1 ) - { - next_index = 0; - do - { - iommu_free_next_pagetable(pde->val, - next_index, next_level); - next_index++; - } while ( next_index < PTE_NUM ); - } - - dma_clear_pte(*pde); - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu_flush_cache_entry(drhd->iommu, pde); - free_pgtable_maddr(pde->val); - unmap_vtd_domain_page(pt_vaddr); - } - else - unmap_vtd_domain_page(pt_vaddr); + if ( dma_pte_addr(*pde) == 0 ) + goto out; + + next_level = level - 1; + if ( next_level > 1 ) + { + for ( next_index = 0; next_index < PTE_NUM; next_index++ ) + iommu_free_next_pagetable(pde->val, next_index, next_level); + } + + dma_clear_pte(*pde); + iommu_flush_cache_entry(pde); + free_pgtable_maddr(pde->val); + + out: + unmap_vtd_domain_page(pt_vaddr); } /* free all VT-d page tables when shut down or destroy domain. */ static void iommu_free_pagetable(struct domain *domain) { - unsigned long index; struct hvm_iommu *hd = domain_hvm_iommu(domain); - int total_level = agaw_to_level(hd->agaw); - - if ( hd->pgd_maddr != 0 ) - { - index = 0; - do - { - iommu_free_next_pagetable(hd->pgd_maddr, - index, total_level + 1); - index++; - } while ( index < PTE_NUM ); - - free_pgtable_maddr(hd->pgd_maddr); - hd->pgd_maddr = 0; - } + int i, total_level = agaw_to_level(hd->agaw); + + if ( hd->pgd_maddr == 0 ) + return; + + for ( i = 0; i < PTE_NUM; i++ ) + iommu_free_next_pagetable(hd->pgd_maddr, i, total_level + 1); + + free_pgtable_maddr(hd->pgd_maddr); + hd->pgd_maddr = 0; } static int iommu_set_root_entry(struct iommu *iommu) @@ -777,16 +728,17 @@ int iommu_disable_translation(struct iom static struct iommu *vector_to_iommu[NR_VECTORS]; static int iommu_page_fault_do_one(struct iommu *iommu, int type, - u8 fault_reason, u16 source_id, u32 addr) + u8 fault_reason, u16 source_id, u64 addr) { dprintk(XENLOG_WARNING VTDPREFIX, - "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n", + "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x " + "iommu->reg = %p\n", (type ? "DMA Read" : "DMA Write"), (source_id >> 8), PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr, fault_reason, iommu->reg); if ( fault_reason < 0x20 ) - print_vtd_entries(current->domain, iommu, (source_id >> 8), + print_vtd_entries(iommu, (source_id >> 8), (source_id & 0xff), (addr >> PAGE_SHIFT)); return 0; @@ -844,7 +796,8 @@ static void iommu_page_fault(int vector, { u8 fault_reason; u16 source_id; - u32 guest_addr, data; + u32 data; + u64 guest_addr; int type; /* highest 32 bits */ @@ -998,6 +951,8 @@ static int iommu_alloc(struct acpi_drhd_ static int iommu_alloc(struct acpi_drhd_unit *drhd) { struct iommu *iommu; + unsigned long sagaw; + int agaw; if ( nr_iommus > MAX_IOMMUS ) { @@ -1020,10 +975,27 @@ static int iommu_alloc(struct acpi_drhd_ set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address); iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); - nr_iommus++; + iommu->index = nr_iommus++; iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); + + /* Calculate number of pagetable levels: between 2 and 4. */ + sagaw = cap_sagaw(iommu->cap); + for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- ) + if ( test_bit(agaw, &sagaw) ) + break; + if ( agaw < 0 ) + { + gdprintk(XENLOG_ERR VTDPREFIX, + "IOMMU: unsupported sagaw %lx\n", sagaw); + xfree(iommu); + return -ENODEV; + } + iommu->nr_pt_levels = agaw_to_level(agaw); + + if ( !ecap_coherent(iommu->ecap) ) + iommus_incoherent = 1; spin_lock_init(&iommu->lock); spin_lock_init(&iommu->register_lock); @@ -1066,9 +1038,7 @@ static int intel_iommu_domain_init(struc { struct hvm_iommu *hd = domain_hvm_iommu(d); struct iommu *iommu = NULL; - int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH; - int i, adjust_width, agaw; - unsigned long sagaw; + u64 i; struct acpi_drhd_unit *drhd; INIT_LIST_HEAD(&hd->pdev_list); @@ -1076,28 +1046,25 @@ static int intel_iommu_domain_init(struc drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); iommu = drhd->iommu; - /* Calculate AGAW. */ - if ( guest_width > cap_mgaw(iommu->cap) ) - guest_width = cap_mgaw(iommu->cap); - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - /* FIXME: hardware doesn't support it, choose a bigger one? */ - sagaw = cap_sagaw(iommu->cap); - if ( !test_bit(agaw, &sagaw) ) - { - gdprintk(XENLOG_ERR VTDPREFIX, - "IOMMU: hardware doesn't support the agaw\n"); - agaw = find_next_bit(&sagaw, 5, agaw); - if ( agaw >= 5 ) - return -ENODEV; - } - hd->agaw = agaw; + hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); if ( d->domain_id == 0 ) { - /* Set up 1:1 page table for dom0. */ + extern int xen_in_range(paddr_t start, paddr_t end); + extern int tboot_in_range(paddr_t start, paddr_t end); + + /* + * Set up 1:1 page table for dom0 except the critical segments + * like Xen and tboot. + */ for ( i = 0; i < max_page; i++ ) + { + if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) || + tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ) + continue; + iommu_map_page(d, i, i); + } setup_dom0_devices(d); setup_dom0_rmrr(d); @@ -1123,7 +1090,8 @@ static int domain_context_mapping_one( struct hvm_iommu *hd = domain_hvm_iommu(domain); struct context_entry *context, *context_entries; unsigned long flags; - u64 maddr; + u64 maddr, pgd_maddr; + int agaw; maddr = bus_to_context_maddr(iommu, bus); context_entries = (struct context_entry *)map_vtd_domain_page(maddr); @@ -1136,38 +1104,64 @@ static int domain_context_mapping_one( } spin_lock_irqsave(&iommu->lock, flags); + +#ifdef CONTEXT_PASSTHRU + if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) ) + context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); + else + { +#endif + /* Ensure we have pagetables allocated down to leaf PTE. */ + if ( hd->pgd_maddr == 0 ) + { + addr_to_dma_page_maddr(domain, 0, 1); + if ( hd->pgd_maddr == 0 ) + { + nomem: + unmap_vtd_domain_page(context_entries); + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } + } + + /* Skip top levels of page tables for 2- and 3-level DRHDs. */ + pgd_maddr = hd->pgd_maddr; + for ( agaw = level_to_agaw(4); + agaw != level_to_agaw(iommu->nr_pt_levels); + agaw-- ) + { + struct dma_pte *p = map_vtd_domain_page(pgd_maddr); + pgd_maddr = dma_pte_addr(*p); + unmap_vtd_domain_page(p); + if ( pgd_maddr == 0 ) + goto nomem; + } + + context_set_address_root(*context, pgd_maddr); + context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); +#ifdef CONTEXT_PASSTHRU + } +#endif + /* * domain_id 0 is not valid on Intel's IOMMU, force domain_id to * be 1 based as required by intel's iommu hw. */ context_set_domain_id(context, domain); - context_set_address_width(*context, hd->agaw); - - if ( ecap_pass_thru(iommu->ecap) ) - context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); -#ifdef CONTEXT_PASSTHRU - else - { -#endif - ASSERT(hd->pgd_maddr != 0); - context_set_address_root(*context, hd->pgd_maddr); - context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); -#ifdef CONTEXT_PASSTHRU - } -#endif - + context_set_address_width(*context, agaw); context_set_fault_enable(*context); context_set_present(*context); - iommu_flush_cache_entry(iommu, context); + iommu_flush_cache_entry(context); unmap_vtd_domain_page(context_entries); - if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain), - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, 1) ) + /* Context entry was previously non-present (with domid 0). */ + iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, 1); + if ( iommu_flush_iotlb_dsi(iommu, 0, 1) ) iommu_flush_write_buffer(iommu); - else - iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0); + + set_bit(iommu->index, &hd->iommu_bitmap); spin_unlock_irqrestore(&iommu->lock, flags); return 0; @@ -1314,7 +1308,7 @@ static int domain_context_unmap_one( spin_lock_irqsave(&iommu->lock, flags); context_clear_present(*context); context_clear_entry(*context); - iommu_flush_cache_entry(iommu, context); + iommu_flush_cache_entry(context); iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); unmap_vtd_domain_page(context_entries); @@ -1395,11 +1389,12 @@ void reassign_device_ownership( { struct hvm_iommu *source_hd = domain_hvm_iommu(source); struct hvm_iommu *target_hd = domain_hvm_iommu(target); - struct pci_dev *pdev; + struct pci_dev *pdev, *pdev2; struct acpi_drhd_unit *drhd; struct iommu *iommu; int status; unsigned long flags; + int found = 0; pdev_flr(bus, devfn); @@ -1420,6 +1415,18 @@ void reassign_device_ownership( list_move(&pdev->list, &target_hd->pdev_list); spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); + + for_each_pdev ( source, pdev2 ) + { + drhd = acpi_find_matched_drhd_unit(pdev2); + if ( drhd->iommu == iommu ) + { + found = 1; + break; + } + } + if ( !found ) + clear_bit(iommu->index, &source_hd->iommu_bitmap); status = domain_context_mapping(target, iommu, pdev); if ( status != 0 ) @@ -1477,13 +1484,12 @@ int intel_iommu_map_page( int intel_iommu_map_page( struct domain *d, unsigned long gfn, unsigned long mfn) { + struct hvm_iommu *hd = domain_hvm_iommu(d); struct acpi_drhd_unit *drhd; struct iommu *iommu; struct dma_pte *page = NULL, *pte = NULL; u64 pg_maddr; - - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu = drhd->iommu; + int pte_present; #ifdef CONTEXT_PASSTHRU /* do nothing if dom0 and iommu supports pass thru */ @@ -1491,23 +1497,27 @@ int intel_iommu_map_page( return 0; #endif - pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K); + pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1); if ( pg_maddr == 0 ) return -ENOMEM; page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); pte = page + (gfn & LEVEL_MASK); + pte_present = dma_pte_present(*pte); dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K); dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE); - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); unmap_vtd_domain_page(page); for_each_drhd_unit ( drhd ) { iommu = drhd->iommu; - if ( cap_caching_mode(iommu->cap) ) - iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d), - (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0); - else if ( cap_rwbf(iommu->cap) ) + + if ( !test_bit(iommu->index, &hd->iommu_bitmap) ) + continue; + + if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d), + (paddr_t)gfn << PAGE_SHIFT_4K, 1, + !pte_present) ) iommu_flush_write_buffer(iommu); } @@ -1536,6 +1546,7 @@ int iommu_page_mapping(struct domain *do int iommu_page_mapping(struct domain *domain, paddr_t iova, paddr_t hpa, size_t size, int prot) { + struct hvm_iommu *hd = domain_hvm_iommu(domain); struct acpi_drhd_unit *drhd; struct iommu *iommu; u64 start_pfn, end_pfn; @@ -1543,24 +1554,23 @@ int iommu_page_mapping(struct domain *do int index; u64 pg_maddr; - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu = drhd->iommu; if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 ) return -EINVAL; + iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K; start_pfn = hpa >> PAGE_SHIFT_4K; end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K; index = 0; while ( start_pfn < end_pfn ) { - pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index); + pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 1); if ( pg_maddr == 0 ) return -ENOMEM; page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); pte = page + (start_pfn & LEVEL_MASK); dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K); dma_set_pte_prot(*pte, prot); - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); unmap_vtd_domain_page(page); start_pfn++; index++; @@ -1569,10 +1579,12 @@ int iommu_page_mapping(struct domain *do for_each_drhd_unit ( drhd ) { iommu = drhd->iommu; - if ( cap_caching_mode(iommu->cap) ) - iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), - iova, index, 0); - else if ( cap_rwbf(iommu->cap) ) + + if ( !test_bit(iommu->index, &hd->iommu_bitmap) ) + continue; + + if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), + iova, index, 1) ) iommu_flush_write_buffer(iommu); } @@ -1584,25 +1596,6 @@ int iommu_page_unmapping(struct domain * dma_pte_clear_range(domain, addr, addr + size); return 0; -} - -void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry) -{ - struct acpi_drhd_unit *drhd; - struct iommu *iommu = NULL; - struct dma_pte *pte = (struct dma_pte *) p2m_entry; - - for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - if ( cap_caching_mode(iommu->cap) ) - iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d), - (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0); - else if ( cap_rwbf(iommu->cap) ) - iommu_flush_write_buffer(iommu); - } - - iommu_flush_cache_entry(iommu, pte); } static int iommu_prepare_rmrr_dev( @@ -1916,6 +1909,7 @@ struct iommu_ops intel_iommu_ops = { .map_page = intel_iommu_map_page, .unmap_page = intel_iommu_unmap_page, .reassign_device = reassign_device_ownership, + .get_device_group_id = NULL, }; /* diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.h Mon Jun 02 11:35:39 2008 +0900 @@ -236,6 +236,7 @@ struct context_entry { #define LEVEL_STRIDE (9) #define LEVEL_MASK ((1 << LEVEL_STRIDE) - 1) #define PTE_NUM (1 << LEVEL_STRIDE) +#define level_to_agaw(val) ((val) - 2) #define agaw_to_level(val) ((val) + 2) #define agaw_to_width(val) (30 + val * LEVEL_STRIDE) #define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE) diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/utils.c --- a/xen/drivers/passthrough/vtd/utils.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/utils.c Mon Jun 02 11:35:39 2008 +0900 @@ -213,109 +213,97 @@ u32 get_level_index(unsigned long gmfn, return gmfn & LEVEL_MASK; } -void print_vtd_entries( - struct domain *d, - struct iommu *iommu, - int bus, int devfn, - unsigned long gmfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct acpi_drhd_unit *drhd; +void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn) +{ struct context_entry *ctxt_entry; struct root_entry *root_entry; struct dma_pte pte; u64 *l; - u32 l_index; - u32 i = 0; - int level = agaw_to_level(hd->agaw); - - printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n", - d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn); - - if ( hd->pgd_maddr == 0 ) - { - printk(" hd->pgd_maddr == 0\n"); - return; - } - printk(" hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr); - - for_each_drhd_unit ( drhd ) - { - printk("---- print_vtd_entries %d ----\n", i++); - - if ( iommu->root_maddr == 0 ) - { - printk(" iommu->root_maddr = 0\n"); - continue; - } - - root_entry = - (struct root_entry *)map_vtd_domain_page(iommu->root_maddr); + u32 l_index, level; + + printk("print_vtd_entries: iommu = %p bdf = %x:%x:%x gmfn = %"PRIx64"\n", + iommu, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn); + + if ( iommu->root_maddr == 0 ) + { + printk(" iommu->root_maddr = 0\n"); + return; + } + + root_entry = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr); - printk(" root_entry = %p\n", root_entry); - printk(" root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val); - if ( !root_present(root_entry[bus]) ) - { - unmap_vtd_domain_page(root_entry); - printk(" root_entry[%x] not present\n", bus); - continue; - } - - ctxt_entry = - (struct context_entry *)map_vtd_domain_page(root_entry[bus].val); - if ( ctxt_entry == NULL ) - { - unmap_vtd_domain_page(root_entry); - printk(" ctxt_entry == NULL\n"); - continue; - } - - printk(" context = %p\n", ctxt_entry); - printk(" context[%x] = %"PRIx64" %"PRIx64"\n", - devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo); - if ( !context_present(ctxt_entry[devfn]) ) + printk(" root_entry = %p\n", root_entry); + printk(" root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val); + if ( !root_present(root_entry[bus]) ) + { + unmap_vtd_domain_page(root_entry); + printk(" root_entry[%x] not present\n", bus); + return; + } + + ctxt_entry = + (struct context_entry *)map_vtd_domain_page(root_entry[bus].val); + if ( ctxt_entry == NULL ) + { + unmap_vtd_domain_page(root_entry); + printk(" ctxt_entry == NULL\n"); + return; + } + + printk(" context = %p\n", ctxt_entry); + printk(" context[%x] = %"PRIx64"_%"PRIx64"\n", + devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo); + if ( !context_present(ctxt_entry[devfn]) ) + { + unmap_vtd_domain_page(ctxt_entry); + unmap_vtd_domain_page(root_entry); + printk(" ctxt_entry[%x] not present\n", devfn); + return; + } + + level = agaw_to_level(context_address_width(ctxt_entry[devfn])); + if ( level != VTD_PAGE_TABLE_LEVEL_3 && + level != VTD_PAGE_TABLE_LEVEL_4) + { + unmap_vtd_domain_page(ctxt_entry); + unmap_vtd_domain_page(root_entry); + printk("Unsupported VTD page table level (%d)!\n", level); + } + + l = maddr_to_virt(ctxt_entry[devfn].lo); + do + { + l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); + printk(" l%d = %p\n", level, l); + if ( l == NULL ) { unmap_vtd_domain_page(ctxt_entry); unmap_vtd_domain_page(root_entry); - printk(" ctxt_entry[%x] not present\n", devfn); - continue; - } - - if ( level != VTD_PAGE_TABLE_LEVEL_3 && - level != VTD_PAGE_TABLE_LEVEL_4) + printk(" l%d == NULL\n", level); + break; + } + l_index = get_level_index(gmfn, level); + printk(" l%d_index = %x\n", level, l_index); + printk(" l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]); + + pte.val = l[l_index]; + if ( !dma_pte_present(pte) ) { unmap_vtd_domain_page(ctxt_entry); unmap_vtd_domain_page(root_entry); - printk("Unsupported VTD page table level (%d)!\n", level); - continue; - } - - l = maddr_to_virt(ctxt_entry[devfn].lo); - do - { - l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K); - printk(" l%d = %p\n", level, l); - if ( l == NULL ) - { - unmap_vtd_domain_page(ctxt_entry); - unmap_vtd_domain_page(root_entry); - printk(" l%d == NULL\n", level); - break; - } - l_index = get_level_index(gmfn, level); - printk(" l%d_index = %x\n", level, l_index); - printk(" l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]); - - pte.val = l[l_index]; - if ( !dma_pte_present(pte) ) - { - unmap_vtd_domain_page(ctxt_entry); - unmap_vtd_domain_page(root_entry); - printk(" l%d[%x] not present\n", level, l_index); - break; - } - - l = maddr_to_virt(l[l_index]); - } while ( --level ); - } -} + printk(" l%d[%x] not present\n", level, l_index); + break; + } + + l = maddr_to_virt(l[l_index]); + } while ( --level ); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/vtd.h --- a/xen/drivers/passthrough/vtd/vtd.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/vtd.h Mon Jun 02 11:35:39 2008 +0900 @@ -42,13 +42,31 @@ struct IO_APIC_route_remap_entry { }; }; +struct msi_msg_remap_entry { + union { + u32 val; + struct { + u32 dontcare:2, + index_15:1, + SHV:1, + format:1, + index_0_14:15, + addr_id_val:12; /* Interrupt address identifier value, + must be 0FEEh */ + }; + } address_lo; /* low 32 bits of msi message address */ + + u32 address_hi; /* high 32 bits of msi message address */ + u32 data; /* msi message data */ +}; + unsigned int get_clflush_size(void); u64 alloc_pgtable_maddr(void); void free_pgtable_maddr(u64 maddr); void *map_vtd_domain_page(u64 maddr); void unmap_vtd_domain_page(void *va); -void iommu_flush_cache_entry(struct iommu *iommu, void *addr); -void iommu_flush_cache_page(struct iommu *iommu, void *addr); +void iommu_flush_cache_entry(void *addr); +void iommu_flush_cache_page(void *addr); #endif // _VTD_H_ diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/x86/vtd.c --- a/xen/drivers/passthrough/vtd/x86/vtd.c Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c Mon Jun 02 11:35:39 2008 +0900 @@ -41,8 +41,6 @@ u64 alloc_pgtable_maddr(void) { struct page_info *pg; u64 *vaddr; - struct acpi_drhd_unit *drhd; - struct iommu *iommu; pg = alloc_domheap_page(NULL, 0); vaddr = map_domain_page(page_to_mfn(pg)); @@ -50,9 +48,7 @@ u64 alloc_pgtable_maddr(void) return 0; memset(vaddr, 0, PAGE_SIZE); - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu = drhd->iommu; - iommu_flush_cache_page(iommu, vaddr); + iommu_flush_cache_page(vaddr); unmap_domain_page(vaddr); return page_to_maddr(pg); @@ -123,181 +119,3 @@ void hvm_dpci_isairq_eoi(struct domain * } } } - -void iommu_set_pgd(struct domain *d) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - unsigned long p2m_table; - - p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table)); - - if ( paging_mode_hap(d) ) - { - int level = agaw_to_level(hd->agaw); - struct dma_pte *dpte = NULL; - mfn_t pgd_mfn; - - switch ( level ) - { - case VTD_PAGE_TABLE_LEVEL_3: - dpte = map_domain_page(p2m_table); - if ( !dma_pte_present(*dpte) ) - { - gdprintk(XENLOG_ERR VTDPREFIX, - "iommu_set_pgd: second level wasn't there\n"); - unmap_domain_page(dpte); - return; - } - pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K); - hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; - unmap_domain_page(dpte); - break; - case VTD_PAGE_TABLE_LEVEL_4: - pgd_mfn = _mfn(p2m_table); - hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; - break; - default: - gdprintk(XENLOG_ERR VTDPREFIX, - "iommu_set_pgd:Unsupported p2m table sharing level!\n"); - break; - } - } - else - { -#if CONFIG_PAGING_LEVELS == 3 - struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL; - int i; - u64 pmd_maddr; - unsigned long flags; - l3_pgentry_t *l3e; - int level = agaw_to_level(hd->agaw); - - spin_lock_irqsave(&hd->mapping_lock, flags); - hd->pgd_maddr = alloc_pgtable_maddr(); - if ( hd->pgd_maddr == 0 ) - { - spin_unlock_irqrestore(&hd->mapping_lock, flags); - gdprintk(XENLOG_ERR VTDPREFIX, - "Allocate pgd memory failed!\n"); - return; - } - - pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr); - l3e = map_domain_page(p2m_table); - switch ( level ) - { - case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */ - /* We only support 8 entries for the PAE L3 p2m table */ - for ( i = 0; i < 8 ; i++ ) - { - /* Don't create new L2 entry, use ones from p2m table */ - pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW; - } - break; - - case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */ - /* We allocate one more page for the top vtd page table. */ - pmd_maddr = alloc_pgtable_maddr(); - if ( pmd_maddr == 0 ) - { - unmap_vtd_domain_page(pgd_vaddr); - unmap_domain_page(l3e); - spin_unlock_irqrestore(&hd->mapping_lock, flags); - gdprintk(XENLOG_ERR VTDPREFIX, - "Allocate pmd memory failed!\n"); - return; - } - - pte = &pgd_vaddr[0]; - dma_set_pte_addr(*pte, pmd_maddr); - dma_set_pte_readable(*pte); - dma_set_pte_writable(*pte); - - pmd_vaddr = map_vtd_domain_page(pmd_maddr); - for ( i = 0; i < 8; i++ ) - { - /* Don't create new L2 entry, use ones from p2m table */ - pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW; - } - - unmap_vtd_domain_page(pmd_vaddr); - break; - default: - gdprintk(XENLOG_ERR VTDPREFIX, - "iommu_set_pgd:Unsupported p2m table sharing level!\n"); - break; - } - - unmap_vtd_domain_page(pgd_vaddr); - unmap_domain_page(l3e); - spin_unlock_irqrestore(&hd->mapping_lock, flags); - -#elif CONFIG_PAGING_LEVELS == 4 - mfn_t pgd_mfn; - l3_pgentry_t *l3e; - int level = agaw_to_level(hd->agaw); - - switch ( level ) - { - case VTD_PAGE_TABLE_LEVEL_3: - l3e = map_domain_page(p2m_table); - if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) - { - gdprintk(XENLOG_ERR VTDPREFIX, - "iommu_set_pgd: second level wasn't there\n"); - unmap_domain_page(l3e); - return; - } - - pgd_mfn = _mfn(l3e_get_pfn(*l3e)); - hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; - unmap_domain_page(l3e); - break; - case VTD_PAGE_TABLE_LEVEL_4: - pgd_mfn = _mfn(p2m_table); - hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K; - break; - default: - gdprintk(XENLOG_ERR VTDPREFIX, - "iommu_set_pgd:Unsupported p2m table sharing level!\n"); - break; - } -#endif - } -} - -void iommu_free_pgd(struct domain *d) -{ -#if CONFIG_PAGING_LEVELS == 3 - struct hvm_iommu *hd = domain_hvm_iommu(d); - int level = agaw_to_level(hd->agaw); - struct dma_pte *pgd_vaddr = NULL; - - switch ( level ) - { - case VTD_PAGE_TABLE_LEVEL_3: - if ( hd->pgd_maddr != 0 ) - { - free_pgtable_maddr(hd->pgd_maddr); - hd->pgd_maddr = 0; - } - break; - case VTD_PAGE_TABLE_LEVEL_4: - if ( hd->pgd_maddr != 0 ) - { - pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr); - if ( pgd_vaddr[0].val != 0 ) - free_pgtable_maddr(pgd_vaddr[0].val); - unmap_vtd_domain_page(pgd_vaddr); - free_pgtable_maddr(hd->pgd_maddr); - hd->pgd_maddr = 0; - } - break; - default: - gdprintk(XENLOG_ERR VTDPREFIX, - "Unsupported p2m table sharing level!\n"); - break; - } -#endif -} - diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/asm-x86/hvm/hvm.h Mon Jun 02 11:35:39 2008 +0900 @@ -147,8 +147,10 @@ void hvm_send_assist_req(struct vcpu *v) void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc); u64 hvm_get_guest_tsc(struct vcpu *v); -#define hvm_set_guest_time(vcpu, gtime) hvm_set_guest_tsc(vcpu, gtime) -#define hvm_get_guest_time(vcpu) hvm_get_guest_tsc(vcpu) + +void hvm_init_guest_time(struct domain *d); +void hvm_set_guest_time(struct vcpu *v, u64 guest_time); +u64 hvm_get_guest_time(struct vcpu *v); #define hvm_paging_enabled(v) \ (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG)) diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/asm-x86/hvm/vcpu.h Mon Jun 02 11:35:39 2008 +0900 @@ -68,6 +68,9 @@ struct hvm_vcpu { struct mtrr_state mtrr; u64 pat_cr; + /* In mode delay_for_missed_ticks, VCPUs have differing guest times. */ + int64_t stime_offset; + /* Which cache mode is this VCPU in (CR0:CD/NW)? */ u8 cache_mode; diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jun 02 11:35:39 2008 +0900 @@ -49,7 +49,6 @@ void vmx_asm_do_vmentry(void); void vmx_asm_do_vmentry(void); void vmx_intr_assist(void); void vmx_do_resume(struct vcpu *); -void set_guest_time(struct vcpu *v, u64 gtime); void vmx_vlapic_msr_changed(struct vcpu *v); void vmx_realmode(struct cpu_user_regs *regs); diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vpt.h --- a/xen/include/asm-x86/hvm/vpt.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/asm-x86/hvm/vpt.h Mon Jun 02 11:35:39 2008 +0900 @@ -57,7 +57,7 @@ typedef struct HPETState { typedef struct HPETState { struct hpet_registers hpet; struct vcpu *vcpu; - uint64_t tsc_freq; + uint64_t stime_freq; uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */ uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */ uint64_t mc_offset; @@ -137,6 +137,11 @@ struct pl_time { /* platform time */ struct RTCState vrtc; struct HPETState vhpet; struct PMTState vpmt; + /* guest_time = Xen sys time + stime_offset */ + int64_t stime_offset; + /* Ensures monotonicity in appropriate timer modes. */ + uint64_t last_guest_time; + spinlock_t pl_time_lock; }; #define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency) diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/tboot.h --- a/xen/include/asm-x86/tboot.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/asm-x86/tboot.h Mon Jun 02 11:35:39 2008 +0900 @@ -46,7 +46,15 @@ typedef struct __attribute__ ((__packed_ } uuid_t; /* used to communicate between tboot and the launched kernel (i.e. Xen) */ -#define MAX_TB_ACPI_SINFO_SIZE 64 + +typedef struct __attribute__ ((__packed__)) { + uint16_t pm1a_cnt; + uint16_t pm1b_cnt; + uint16_t pm1a_evt; + uint16_t pm1b_evt; + uint16_t pm1a_cnt_val; + uint16_t pm1b_cnt_val; +} tboot_acpi_sleep_info; typedef struct __attribute__ ((__packed__)) { /* version 0x01+ fields: */ @@ -58,8 +66,9 @@ typedef struct __attribute__ ((__packed_ uint32_t shutdown_type; /* type of shutdown (TB_SHUTDOWN_*) */ uint32_t s3_tb_wakeup_entry;/* entry point for tboot s3 wake up */ uint32_t s3_k_wakeup_entry; /* entry point for xen s3 wake up */ - uint8_t acpi_sinfo[MAX_TB_ACPI_SINFO_SIZE]; - /* where kernel put acpi sleep info in Sx */ + tboot_acpi_sleep_info + acpi_sinfo; /* where kernel put acpi sleep info in Sx */ + uint8_t reserved[52]; /* this pad is for compat with old field */ /* version 0x02+ fields: */ uint32_t tboot_base; /* starting addr for tboot */ uint32_t tboot_size; /* size of tboot */ diff -r d2a239224cb2 -r f1508348ffab xen/include/public/domctl.h --- a/xen/include/public/domctl.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/public/domctl.h Mon Jun 02 11:35:39 2008 +0900 @@ -448,6 +448,16 @@ typedef struct xen_domctl_assign_device typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); +/* Retrieve sibling devices infomation of machine_bdf */ +#define XEN_DOMCTL_get_device_group 50 +struct xen_domctl_get_device_group { + uint32_t machine_bdf; /* IN */ + uint32_t max_sdevs; /* IN */ + uint32_t num_sdevs; /* OUT */ + XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ +}; +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); /* Pass-through interrupts: bind real irq -> hvm devfn. */ #define XEN_DOMCTL_bind_pt_irq 38 @@ -619,6 +629,7 @@ struct xen_domctl { struct xen_domctl_hvmcontext hvmcontext; struct xen_domctl_address_size address_size; struct xen_domctl_sendtrigger sendtrigger; + struct xen_domctl_get_device_group get_device_group; struct xen_domctl_assign_device assign_device; struct xen_domctl_bind_pt_irq bind_pt_irq; struct xen_domctl_memory_mapping memory_mapping; diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/elfcore.h --- a/xen/include/xen/elfcore.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/xen/elfcore.h Mon Jun 02 11:35:39 2008 +0900 @@ -66,6 +66,7 @@ typedef struct { unsigned long xen_compile_time; unsigned long tainted; #if defined(__i386__) || defined(__x86_64__) + unsigned long xen_phys_start; unsigned long dom0_pfn_to_mfn_frame_list_list; #endif #if defined(__ia64__) diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/hvm/iommu.h --- a/xen/include/xen/hvm/iommu.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/xen/hvm/iommu.h Mon Jun 02 11:35:39 2008 +0900 @@ -43,6 +43,7 @@ struct hvm_iommu { int agaw; /* adjusted guest address width, 0 is level 2 30-bit */ struct list_head g2m_ioport_list; /* guest to machine ioport mapping */ domid_t iommu_domid; /* domain id stored in iommu */ + u64 iommu_bitmap; /* bitmap of iommu(s) that the domain uses */ /* amd iommu support */ int domain_id; @@ -54,4 +55,7 @@ struct hvm_iommu { struct iommu_ops *platform_ops; }; +#define has_iommu_pdevs(domain) \ + (!list_empty(&(domain->arch.hvm_domain.hvm_iommu.pdev_list))) + #endif /* __ASM_X86_HVM_IOMMU_H__ */ diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/xen/iommu.h Mon Jun 02 11:35:39 2008 +0900 @@ -29,6 +29,7 @@ extern int vtd_enabled; extern int iommu_enabled; +extern int iommu_pv_enabled; #define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu) #define domain_vmx_iommu(d) (&d->arch.hvm_domain.hvm_iommu.vmx_iommu) @@ -43,7 +44,9 @@ struct iommu { struct iommu { struct list_head list; void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u32 index; /* Sequence number of iommu */ u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + u32 nr_pt_levels; u64 cap; u64 ecap; spinlock_t lock; /* protect context, domain ids */ @@ -58,14 +61,13 @@ int device_assigned(u8 bus, u8 devfn); int device_assigned(u8 bus, u8 devfn); int assign_device(struct domain *d, u8 bus, u8 devfn); void deassign_device(struct domain *d, u8 bus, u8 devfn); +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, + XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs); void reassign_device_ownership(struct domain *source, struct domain *target, u8 bus, u8 devfn); int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn); int iommu_unmap_page(struct domain *d, unsigned long gfn); -void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry); -void iommu_set_pgd(struct domain *d); -void iommu_free_pgd(struct domain *d); void iommu_domain_teardown(struct domain *d); int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq); int dpci_ioport_intercept(ioreq_t *p); @@ -76,6 +78,11 @@ unsigned int io_apic_read_remap_rte(unsi unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg); void io_apic_write_remap_rte(unsigned int apic, unsigned int reg, unsigned int value); + +struct msi_desc; +struct msi_msg; +void msi_msg_read_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg); +void msi_msg_write_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg); struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu); struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu); struct iommu_flush *iommu_get_flush(struct iommu *iommu); @@ -94,6 +101,7 @@ struct iommu_ops { int (*unmap_page)(struct domain *d, unsigned long gfn); void (*reassign_device)(struct domain *s, struct domain *t, u8 bus, u8 devfn); + int (*get_device_group_id)(u8 bus, u8 devfn); }; #endif /* _IOMMU_H_ */ diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/sched.h --- a/xen/include/xen/sched.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/xen/sched.h Mon Jun 02 11:35:39 2008 +0900 @@ -186,6 +186,8 @@ struct domain /* Is this an HVM guest? */ bool_t is_hvm; + /* Does this guest need iommu mappings? */ + bool_t need_iommu; /* Is this guest fully privileged (aka dom0)? */ bool_t is_privileged; /* Which guest this guest has privileges on */ @@ -515,6 +517,7 @@ static inline void vcpu_unblock(struct v #define is_hvm_domain(d) ((d)->is_hvm) #define is_hvm_vcpu(v) (is_hvm_domain(v->domain)) +#define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm) extern enum cpufreq_controller { FREQCTL_none, FREQCTL_dom0_kernel diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/time.h --- a/xen/include/xen/time.h Mon Jun 02 11:35:02 2008 +0900 +++ b/xen/include/xen/time.h Mon Jun 02 11:35:39 2008 +0900 @@ -47,6 +47,7 @@ struct tm { }; struct tm gmtime(unsigned long t); +#define SYSTEM_TIME_HZ 1000000000ULL #define NOW() ((s_time_t)get_s_time()) #define SECONDS(_s) ((s_time_t)((_s) * 1000000000ULL)) #define MILLISECS(_ms) ((s_time_t)((_ms) * 1000000ULL)) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |