[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1206038140 21600 # Node ID 42f6c206c951568c8969c32f643235fe98f44410 # Parent 8c921adf4833a0359775c8f20f9549f6cb11df7b # Parent c8c8b5c5f6d77a47bfa1869ac60e4975fa764ad6 merge with xen-unstable.hg --- xen/arch/x86/hvm/iommu.c | 145 -- xen/drivers/passthrough/vtd/io.c | 296 ---- xen/include/acpi/actbl2.h | 232 --- xen/include/asm-x86/hvm/iommu.h | 56 xen/include/asm-x86/hvm/vmx/intel-iommu.h | 460 ------- xen/include/asm-x86/iommu.h | 109 - config/StdGNU.mk | 5 config/SunOS.mk | 5 docs/src/interface.tex | 6 extras/mini-os/Makefile | 7 extras/mini-os/README | 34 extras/mini-os/arch/ia64/time.c | 9 extras/mini-os/arch/x86/time.c | 6 extras/mini-os/blkfront.c | 19 extras/mini-os/console/xencons_ring.c | 1 extras/mini-os/daytime.c | 11 extras/mini-os/events.c | 6 extras/mini-os/fbfront.c | 28 extras/mini-os/fs-front.c | 1 extras/mini-os/include/blkfront.h | 2 extras/mini-os/include/fcntl.h | 6 extras/mini-os/include/netfront.h | 2 extras/mini-os/kernel.c | 13 extras/mini-os/lib/sys.c | 53 extras/mini-os/lwip-net.c | 17 extras/mini-os/main.c | 74 - extras/mini-os/netfront.c | 29 extras/mini-os/xenbus/xenbus.c | 3 stubdom/Makefile | 14 stubdom/README | 2 stubdom/c/Makefile | 9 stubdom/c/main.c | 6 stubdom/stubdom-dm | 18 tools/blktap/drivers/Makefile | 26 tools/blktap/drivers/blktapctrl.c | 104 + tools/blktap/drivers/block-qcow2.c | 5 tools/blktap/drivers/tapaio.c | 1 tools/blktap/drivers/tapdisk.h | 12 tools/blktap/lib/Makefile | 29 tools/blktap/lib/blktaplib.h | 10 tools/examples/xmexample.hvm | 5 tools/firmware/rombios/rombios.c | 2 tools/include/Makefile | 2 tools/include/xen-sys/Linux/gntdev.h | 14 tools/ioemu/Makefile.target | 4 tools/ioemu/block-vbd.c | 4 tools/ioemu/configure | 32 tools/ioemu/hw/ide.c | 24 tools/ioemu/hw/ne2000.c | 4 tools/ioemu/hw/scsi-disk.c | 5 tools/ioemu/hw/vga.c | 4 tools/ioemu/hw/xen_blktap.c | 686 +++++++++++ tools/ioemu/hw/xen_blktap.h | 57 tools/ioemu/hw/xen_machine_pv.c | 9 tools/ioemu/sdl.c | 171 ++ tools/ioemu/vl.c | 29 tools/ioemu/vl.h | 2 tools/ioemu/vnc.c | 23 tools/ioemu/xenstore.c | 4 tools/libxc/ia64/dom_fw_acpi.c | 1 tools/libxc/ia64/xc_dom_ia64_util.h | 4 tools/libxc/xc_domain_save.c | 40 tools/libxc/xc_linux.c | 15 tools/libxc/xc_minios.c | 14 tools/libxc/xenctrl.h | 16 tools/python/xen/xend/XendConfig.py | 101 + tools/python/xen/xend/XendDomainInfo.py | 277 +++- tools/python/xen/xend/image.py | 9 tools/python/xen/xend/server/BlktapController.py | 4 tools/python/xen/xend/server/DevController.py | 50 tools/python/xen/xend/server/SrvDomain.py | 16 tools/python/xen/xend/server/pciif.py | 231 +++ tools/python/xen/xend/server/vfbif.py | 2 tools/python/xen/xm/create.py | 16 tools/python/xen/xm/main.py | 60 tools/python/xen/xm/xenapi_create.py | 2 tools/xcutils/xc_save.c | 1 tools/xenstat/xentop/xentop.c | 1 tools/xenstore/Makefile | 32 xen/arch/ia64/linux-xen/setup.c | 15 xen/arch/ia64/xen/acpi.c | 47 xen/arch/ia64/xen/dom_fw_common.c | 107 - xen/arch/ia64/xen/pcdp.c | 19 xen/arch/x86/acpi/boot.c | 125 +- xen/arch/x86/acpi/power.c | 31 xen/arch/x86/domain.c | 11 xen/arch/x86/domctl.c | 2 xen/arch/x86/hvm/Makefile | 1 xen/arch/x86/hvm/hvm.c | 4 xen/arch/x86/hvm/intercept.c | 2 xen/arch/x86/hvm/svm/emulate.c | 4 xen/arch/x86/hvm/svm/svm.c | 10 xen/arch/x86/hvm/svm/vmcb.c | 14 xen/arch/x86/hvm/vlapic.c | 20 xen/arch/x86/hvm/vmx/vmcs.c | 14 xen/arch/x86/hvm/vmx/vmx.c | 14 xen/arch/x86/hvm/vmx/vpmu_core2.c | 16 xen/arch/x86/irq.c | 11 xen/arch/x86/mm.c | 12 xen/arch/x86/mm/p2m.c | 4 xen/arch/x86/mm/paging.c | 11 xen/arch/x86/mm/shadow/private.h | 2 xen/arch/x86/numa.c | 6 xen/arch/x86/srat.c | 4 xen/arch/x86/traps.c | 84 + xen/arch/x86/x86_emulate.c | 27 xen/common/domain.c | 10 xen/common/event_channel.c | 14 xen/common/keyhandler.c | 6 xen/common/schedule.c | 2 xen/drivers/acpi/Makefile | 3 xen/drivers/acpi/hwregs.c | 385 ++++++ xen/drivers/acpi/numa.c | 2 xen/drivers/acpi/osl.c | 183 ++ xen/drivers/acpi/tables.c | 45 xen/drivers/passthrough/Makefile | 3 xen/drivers/passthrough/amd/iommu_acpi.c | 297 ++-- xen/drivers/passthrough/amd/iommu_detect.c | 168 +- xen/drivers/passthrough/amd/iommu_init.c | 90 - xen/drivers/passthrough/amd/iommu_map.c | 32 xen/drivers/passthrough/amd/pci_amd_iommu.c | 78 - xen/drivers/passthrough/io.c | 272 ++++ xen/drivers/passthrough/iommu.c | 136 ++ xen/drivers/passthrough/vtd/Makefile | 1 xen/drivers/passthrough/vtd/dmar.c | 8 xen/drivers/passthrough/vtd/dmar.h | 3 xen/drivers/passthrough/vtd/extern.h | 2 xen/drivers/passthrough/vtd/intremap.c | 26 xen/drivers/passthrough/vtd/iommu.c | 145 +- xen/drivers/passthrough/vtd/iommu.h | 454 +++++++ xen/drivers/passthrough/vtd/qinval.c | 10 xen/drivers/passthrough/vtd/utils.c | 20 xen/drivers/passthrough/vtd/vtd.h | 11 xen/drivers/video/vesa.c | 2 xen/include/acpi/acconfig.h | 85 - xen/include/acpi/acexcep.h | 69 - xen/include/acpi/acglobal.h | 404 +++--- xen/include/acpi/achware.h | 179 +- xen/include/acpi/aclocal.h | 1225 +++++++++---------- xen/include/acpi/acmacros.h | 615 +++++----- xen/include/acpi/acnames.h | 83 + xen/include/acpi/acobject.h | 657 ++++------ xen/include/acpi/acoutput.h | 32 xen/include/acpi/acpi.h | 36 xen/include/acpi/acpi_bus.h | 381 +++--- xen/include/acpi/acpiosxf.h | 426 ++---- xen/include/acpi/acpixf.h | 679 ++++------- xen/include/acpi/acstruct.h | 297 ++-- xen/include/acpi/actbl.h | 548 ++++---- xen/include/acpi/actbl1.h | 763 +++++++++++- xen/include/acpi/actypes.h | 1414 ++++++++++------------- xen/include/acpi/acutils.h | 1280 +++++++------------- xen/include/acpi/pdc_intel.h | 33 xen/include/acpi/platform/acenv.h | 171 +- xen/include/acpi/platform/acgcc.h | 18 xen/include/acpi/platform/aclinux.h | 63 - xen/include/asm-x86/acpi.h | 134 -- xen/include/asm-x86/apic.h | 1 xen/include/asm-x86/bitops.h | 361 +++-- xen/include/asm-x86/cpufeature.h | 4 xen/include/asm-x86/event.h | 5 xen/include/asm-x86/fixmap.h | 2 xen/include/asm-x86/grant_table.h | 2 xen/include/asm-x86/hvm/domain.h | 4 xen/include/asm-x86/hvm/support.h | 2 xen/include/asm-x86/hvm/svm/vmcb.h | 2 xen/include/asm-x86/hvm/vmx/vmcs.h | 2 xen/include/asm-x86/io_apic.h | 2 xen/include/asm-x86/msr-index.h | 2 xen/include/asm-x86/numa.h | 2 xen/include/asm-x86/shared.h | 8 xen/include/public/arch-x86/hvm/save.h | 3 xen/include/public/arch-x86/xen-x86_64.h | 5 xen/include/public/vcpu.h | 10 xen/include/xen/acpi.h | 133 -- xen/include/xen/hvm/iommu.h | 56 xen/include/xen/iommu.h | 115 + xen/include/xen/shared.h | 36 178 files changed, 9592 insertions(+), 7626 deletions(-) diff -r 8c921adf4833 -r 42f6c206c951 config/StdGNU.mk --- a/config/StdGNU.mk Fri Mar 14 15:07:45 2008 -0600 +++ b/config/StdGNU.mk Thu Mar 20 12:35:40 2008 -0600 @@ -41,9 +41,8 @@ SHLIB_CFLAGS = -shared SHLIB_CFLAGS = -shared ifneq ($(debug),y) -# Optimisation flags are overridable -CFLAGS ?= -O2 -fomit-frame-pointer +CFLAGS += -O2 -fomit-frame-pointer else # Less than -O1 produces bad code and large stack frames -CFLAGS ?= -O1 -fno-omit-frame-pointer +CFLAGS += -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls endif diff -r 8c921adf4833 -r 42f6c206c951 config/SunOS.mk --- a/config/SunOS.mk Fri Mar 14 15:07:45 2008 -0600 +++ b/config/SunOS.mk Thu Mar 20 12:35:40 2008 -0600 @@ -45,11 +45,10 @@ SHLIB_CFLAGS = -R $(SunOS_LIBDIR) -share SHLIB_CFLAGS = -R $(SunOS_LIBDIR) -shared ifneq ($(debug),y) -# Optimisation flags are overridable -CFLAGS ?= -O2 -fno-omit-frame-pointer +CFLAGS += -O2 -fno-omit-frame-pointer else # Less than -O1 produces bad code and large stack frames -CFLAGS ?= -O1 -fno-omit-frame-pointer +CFLAGS += -O1 -fno-omit-frame-pointer endif CFLAGS += -Wa,--divide -D_POSIX_C_SOURCE=200112L -D__EXTENSIONS__ diff -r 8c921adf4833 -r 42f6c206c951 docs/src/interface.tex --- a/docs/src/interface.tex Fri Mar 14 15:07:45 2008 -0600 +++ b/docs/src/interface.tex Thu Mar 20 12:35:40 2008 -0600 @@ -1,6 +1,6 @@ -\documentclass[11pt,twoside,final,openright]{report} -\usepackage{a4,graphicx,html,setspace,times} -\usepackage{comment,parskip} +\documentclass[11pt,twoside,final,openright,a4paper]{report} +\usepackage{graphicx,html,setspace,times} +\usepackage{parskip} \setstretch{1.15} % LIBRARY FUNCTIONS diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/Makefile --- a/extras/mini-os/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -88,6 +88,11 @@ CFLAGS += -DCONFIG_QEMU CFLAGS += -DCONFIG_QEMU endif +ifneq ($(CDIR),) +OBJS += $(CDIR)/main.a +LDLIBS += +endif + ifeq ($(libc),y) LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest LDLIBS += -lpci @@ -95,7 +100,7 @@ LDLIBS += -lc LDLIBS += -lc endif -ifneq ($(caml)-$(qemu)-$(lwip),--y) +ifneq ($(caml)-$(qemu)-$(CDIR)-$(lwip),---y) OBJS := $(filter-out daytime.o, $(OBJS)) endif diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/README --- a/extras/mini-os/README Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/README Thu Mar 20 12:35:40 2008 -0600 @@ -13,18 +13,34 @@ This includes: * registering virtual interrupt handlers (for timer interrupts) * a simple page and memory allocator * minimal libc support - -Stuff it doesn't show: - - * modifying page tables - * network code - * block-device code - + * minimal Copy-on-Write support + * network, block, framebuffer support + * transparent access to FileSystem exports (see tools/fs-back) - to build it just type make. + +- to build it with TCP/IP support, download LWIP 1.3 source code and type + + make LWIPDIR=/path/to/lwip/source + +- to build it with much better libc support, see the stubdom/ directory - to start it do the following in domain0 (assuming xend is running) # xm create domain_config -this starts the kernel and prints out a bunch of stuff and then once -every second the system time. +This starts the kernel and prints out a bunch of stuff and then once every +second the system time. + +If you have setup a disk in the config file (e.g. +disk = [ 'file:/tmp/foo,hda,r' ] ), it will loop reading it. If that disk is +writable (e.g. disk = [ 'file:/tmp/foo,hda,w' ] ), it will write data patterns +and re-read them. + +If you have setup a network in the config file (e.g. vif = [''] ), it will +print incoming packets. + +If you have setup a VFB in the config file (e.g. vfb = ['type=sdl'] ), it will +show a mouse with which you can draw color squares. + +If you have compiled it with TCP/IP support, it will run a daytime server on +TCP port 13. diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/arch/ia64/time.c --- a/extras/mini-os/arch/ia64/time.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/arch/ia64/time.c Thu Mar 20 12:35:40 2008 -0600 @@ -246,7 +246,7 @@ init_time(void) { uint64_t new; efi_time_t tm; - int err = 0; + evtchn_port_t port = 0; printk("Initialising time\n"); calculate_frequencies(); @@ -267,11 +267,12 @@ init_time(void) } else printk("efi_get_time() failed\n"); - err = bind_virq(VIRQ_ITC, timer_interrupt, NULL); - if (err == -1) { - printk("XEN timer request chn bind failed %i\n", err); + port = bind_virq(VIRQ_ITC, timer_interrupt, NULL); + if (port == -1) { + printk("XEN timer request chn bind failed %i\n", port); return; } + unmask_evtchn(port); itc_alt = ia64_get_itc(); itc_at_boot = itc_alt; new = ia64_get_itc() + itm_val; diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/arch/x86/time.c --- a/extras/mini-os/arch/x86/time.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/arch/x86/time.c Thu Mar 20 12:35:40 2008 -0600 @@ -222,6 +222,8 @@ static void timer_handler(evtchn_port_t void init_time(void) { + evtchn_port_t port; printk("Initialising timer interface\n"); - bind_virq(VIRQ_TIMER, &timer_handler, NULL); -} + port = bind_virq(VIRQ_TIMER, &timer_handler, NULL); + unmask_evtchn(port); +} diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/blkfront.c --- a/extras/mini-os/blkfront.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/blkfront.c Thu Mar 20 12:35:40 2008 -0600 @@ -43,7 +43,7 @@ struct blkfront_dev { struct blkif_front_ring ring; grant_ref_t ring_ref; - evtchn_port_t evtchn, local_port; + evtchn_port_t evtchn; blkif_vdev_t handle; char *nodename; @@ -70,7 +70,7 @@ void blkfront_handler(evtchn_port_t port wake_up(&blkfront_queue); } -struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode) +struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode, int *info) { xenbus_transaction_t xbt; char* err; @@ -92,14 +92,9 @@ struct blkfront_dev *init_blkfront(char dev = malloc(sizeof(*dev)); dev->nodename = strdup(nodename); - evtchn_alloc_unbound_t op; - op.dom = DOMID_SELF; snprintf(path, sizeof(path), "%s/backend-id", nodename); - dev->dom = op.remote_dom = xenbus_read_integer(path); - HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); - clear_evtchn(op.port); /* Without, handler gets invoked now! */ - dev->local_port = bind_evtchn(op.port, blkfront_handler, dev); - dev->evtchn=op.port; + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn); s = (struct blkif_sring*) alloc_page(); memset(s,0,PAGE_SIZE); @@ -181,6 +176,9 @@ done: xenbus_unwatch_path(XBT_NIL, path); + snprintf(path, sizeof(path), "%s/info", dev->backend); + *info = xenbus_read_integer(path); + snprintf(path, sizeof(path), "%s/sectors", dev->backend); // FIXME: read_integer returns an int, so disk size limited to 1TB for now *sectors = dev->sectors = xenbus_read_integer(path); @@ -194,6 +192,7 @@ done: snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend); dev->flush = xenbus_read_integer(path); } + unmask_evtchn(dev->evtchn); printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size); printk("**************************\n"); @@ -219,7 +218,7 @@ void shutdown_blkfront(struct blkfront_d err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6); xenbus_wait_for_value(path,"6"); - unbind_evtchn(dev->local_port); + unbind_evtchn(dev->evtchn); free(nodename); free(dev->backend); diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/console/xencons_ring.c --- a/extras/mini-os/console/xencons_ring.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/console/xencons_ring.c Thu Mar 20 12:35:40 2008 -0600 @@ -86,6 +86,7 @@ int xencons_ring_init(void) printk("XEN console request chn bind failed %i\n", err); return err; } + unmask_evtchn(start_info.console.domU.evtchn); /* In case we have in-flight data after save/restore... */ notify_daemon(); diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/daytime.c --- a/extras/mini-os/daytime.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/daytime.c Thu Mar 20 12:35:40 2008 -0600 @@ -15,16 +15,19 @@ void run_server(void *p) void run_server(void *p) { struct ip_addr listenaddr = { 0 }; - struct ip_addr ipaddr = { htonl(0x0a000001) }; - struct ip_addr netmask = { htonl(0xff000000) }; - struct ip_addr gw = { 0 }; struct netconn *listener; struct netconn *session; struct timeval tv; err_t rc; start_networking(); - networking_set_addr(&ipaddr, &netmask, &gw); + + if (0) { + struct ip_addr ipaddr = { htonl(0x0a000001) }; + struct ip_addr netmask = { htonl(0xff000000) }; + struct ip_addr gw = { 0 }; + networking_set_addr(&ipaddr, &netmask, &gw); + } tprintk("Opening connection\n"); diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/events.c --- a/extras/mini-os/events.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/events.c Thu Mar 20 12:35:40 2008 -0600 @@ -86,9 +86,6 @@ evtchn_port_t bind_evtchn(evtchn_port_t ev_actions[port].data = data; wmb(); ev_actions[port].handler = handler; - - /* Finally unmask the port */ - unmask_evtchn(port); return port; } @@ -191,8 +188,7 @@ int evtchn_bind_interdomain(domid_t pal, if (err) return err; set_bit(op.local_port,bound_ports); - evtchn_port_t port = op.local_port; - clear_evtchn(port); /* Without, handler gets invoked now! */ + evtchn_port_t port = op.local_port; *local_port = bind_evtchn(port, handler, data); return err; } diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/fbfront.c --- a/extras/mini-os/fbfront.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/fbfront.c Thu Mar 20 12:35:40 2008 -0600 @@ -26,7 +26,7 @@ struct kbdfront_dev { domid_t dom; struct xenkbd_page *page; - evtchn_port_t evtchn, local_port; + evtchn_port_t evtchn; char *nodename; char *backend; @@ -68,14 +68,9 @@ struct kbdfront_dev *init_kbdfront(char dev = malloc(sizeof(*dev)); dev->nodename = strdup(nodename); - evtchn_alloc_unbound_t op; - op.dom = DOMID_SELF; snprintf(path, sizeof(path), "%s/backend-id", nodename); - dev->dom = op.remote_dom = xenbus_read_integer(path); - HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); - clear_evtchn(op.port); /* Without, handler gets invoked now! */ - dev->local_port = bind_evtchn(op.port, kbdfront_handler, dev); - dev->evtchn=op.port; + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, kbdfront_handler, dev, &dev->evtchn); dev->page = s = (struct xenkbd_page*) alloc_page(); memset(s,0,PAGE_SIZE); @@ -151,6 +146,7 @@ done: err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 4); /* connected */ } + unmask_evtchn(dev->evtchn); printk("************************** KBDFRONT\n"); @@ -208,7 +204,7 @@ void shutdown_kbdfront(struct kbdfront_d err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6); xenbus_wait_for_value(path,"6"); - unbind_evtchn(dev->local_port); + unbind_evtchn(dev->evtchn); free_pages(dev->page,0); free(nodename); @@ -241,7 +237,7 @@ struct fbfront_dev { domid_t dom; struct xenfb_page *page; - evtchn_port_t evtchn, local_port; + evtchn_port_t evtchn; char *nodename; char *backend; @@ -281,14 +277,9 @@ struct fbfront_dev *init_fbfront(char *n dev = malloc(sizeof(*dev)); dev->nodename = strdup(nodename); - evtchn_alloc_unbound_t op; - op.dom = DOMID_SELF; snprintf(path, sizeof(path), "%s/backend-id", nodename); - dev->dom = op.remote_dom = xenbus_read_integer(path); - HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); - clear_evtchn(op.port); /* Without, handler gets invoked now! */ - dev->local_port = bind_evtchn(op.port, fbfront_handler, dev); - dev->evtchn=op.port; + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, fbfront_handler, dev, &dev->evtchn); dev->page = s = (struct xenfb_page*) alloc_page(); memset(s,0,PAGE_SIZE); @@ -397,6 +388,7 @@ done: err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 4); /* connected */ } + unmask_evtchn(dev->evtchn); printk("************************** FBFRONT\n"); @@ -462,7 +454,7 @@ void shutdown_fbfront(struct fbfront_dev err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6); xenbus_wait_for_value(path,"6"); - unbind_evtchn(dev->local_port); + unbind_evtchn(dev->evtchn); free_pages(dev->page,0); free(nodename); diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/fs-front.c --- a/extras/mini-os/fs-front.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/fs-front.c Thu Mar 20 12:35:40 2008 -0600 @@ -943,6 +943,7 @@ static int init_fs_import(struct fs_impo //ANY_CPU, import, &import->local_port)); + unmask_evtchn(import->local_port); self_id = get_self_id(); diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/include/blkfront.h --- a/extras/mini-os/include/blkfront.h Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/include/blkfront.h Thu Mar 20 12:35:40 2008 -0600 @@ -15,7 +15,7 @@ struct blkfront_aiocb void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret); }; -struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode); +struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode, int *info); #ifdef HAVE_LIBC int blkfront_open(struct blkfront_dev *dev); #endif diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/include/fcntl.h --- a/extras/mini-os/include/fcntl.h Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/include/fcntl.h Thu Mar 20 12:35:40 2008 -0600 @@ -1,5 +1,9 @@ #ifndef _I386_FCNTL_H #define _I386_FCNTL_H + +#ifdef HAVE_LIBC +#include_next <fcntl.h> +#else /* open/fcntl - O_SYNC is only implemented on blocks devices and on files located on an ext2 file system */ @@ -90,3 +94,5 @@ int open(const char *path, int flags, .. int open(const char *path, int flags, ...); int fcntl(int fd, int cmd, ...); #endif + +#endif diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/include/netfront.h --- a/extras/mini-os/include/netfront.h Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/include/netfront.h Thu Mar 20 12:35:40 2008 -0600 @@ -3,7 +3,7 @@ #include <lwip/netif.h> #endif struct netfront_dev; -struct netfront_dev *init_netfront(char *nodename, void (*netif_rx)(unsigned char *data, int len), unsigned char rawmac[6]); +struct netfront_dev *init_netfront(char *nodename, void (*netif_rx)(unsigned char *data, int len), unsigned char rawmac[6], char **ip); void netfront_xmit(struct netfront_dev *dev, unsigned char* data,int len); void shutdown_netfront(struct netfront_dev *dev); #ifdef HAVE_LIBC diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/kernel.c Thu Mar 20 12:35:40 2008 -0600 @@ -87,7 +87,7 @@ static void periodic_thread(void *p) static void netfront_thread(void *p) { - init_netfront(NULL, NULL, NULL); + init_netfront(NULL, NULL, NULL, NULL); } static struct blkfront_dev *blk_dev; @@ -207,9 +207,18 @@ static void blkfront_thread(void *p) static void blkfront_thread(void *p) { time_t lasttime = 0; - blk_dev = init_blkfront(NULL, &blk_sectors, &blk_sector_size, &blk_mode); + int blk_info; + + blk_dev = init_blkfront(NULL, &blk_sectors, &blk_sector_size, &blk_mode, &blk_info); if (!blk_dev) return; + + if (blk_info & VDISK_CDROM) + printk("Block device is a CDROM\n"); + if (blk_info & VDISK_REMOVABLE) + printk("Block device is removable\n"); + if (blk_info & VDISK_READONLY) + printk("Block device is read-only\n"); #ifdef BLKTEST_WRITE if (blk_mode == O_RDWR) { diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/lib/sys.c --- a/extras/mini-os/lib/sys.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/lib/sys.c Thu Mar 20 12:35:40 2008 -0600 @@ -171,8 +171,25 @@ int open(const char *pathname, int flags printk("open(%s) -> %d\n", pathname, fd); return fd; } - printk("open(%s)", pathname); - fs_fd = fs_open(fs_import, (void *) pathname); + printk("open(%s, %x)", pathname, flags); + switch (flags & ~O_ACCMODE) { + case 0: + fs_fd = fs_open(fs_import, (void *) pathname); + break; + case O_CREAT|O_TRUNC: + { + va_list ap; + mode_t mode; + va_start(ap, flags); + mode = va_arg(ap, mode_t); + va_end(ap); + fs_fd = fs_create(fs_import, (void *) pathname, 0, mode); + break; + } + default: + printk(" unsupported flags\n"); + do_exit(); + } if (fs_fd < 0) { errno = EIO; return -1; @@ -819,9 +836,19 @@ int select(int nfds, fd_set *readfds, fd add_waiter(w4, xenbus_watch_queue); add_waiter(w5, kbdfront_queue); - myread = *readfds; - mywrite = *writefds; - myexcept = *exceptfds; + if (readfds) + myread = *readfds; + else + FD_ZERO(&myread); + if (writefds) + mywrite = *writefds; + else + FD_ZERO(&mywrite); + if (exceptfds) + myexcept = *exceptfds; + else + FD_ZERO(&myexcept); + DEBUG("polling "); dump_set(nfds, &myread, &mywrite, &myexcept, timeout); DEBUG("\n"); @@ -860,9 +887,19 @@ int select(int nfds, fd_set *readfds, fd thread->wakeup_time = stop; schedule(); - myread = *readfds; - mywrite = *writefds; - myexcept = *exceptfds; + if (readfds) + myread = *readfds; + else + FD_ZERO(&myread); + if (writefds) + mywrite = *writefds; + else + FD_ZERO(&mywrite); + if (exceptfds) + myexcept = *exceptfds; + else + FD_ZERO(&myexcept); + n = select_poll(nfds, &myread, &mywrite, &myexcept); if (n) { diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/lwip-net.c --- a/extras/mini-os/lwip-net.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/lwip-net.c Thu Mar 20 12:35:40 2008 -0600 @@ -339,10 +339,25 @@ void start_networking(void) struct ip_addr ipaddr = { htonl(IF_IPADDR) }; struct ip_addr netmask = { htonl(IF_NETMASK) }; struct ip_addr gw = { 0 }; + char *ip; tprintk("Waiting for network.\n"); - dev = init_netfront(NULL, NULL, rawmac); + dev = init_netfront(NULL, NULL, rawmac, &ip); + + if (ip) { + ipaddr.addr = inet_addr(ip); + if (IN_CLASSA(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSA_NET); + else if (IN_CLASSB(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSB_NET); + else if (IN_CLASSC(ntohl(ipaddr.addr))) + netmask.addr = htonl(IN_CLASSC_NET); + else + tprintk("Strange IP %s, leaving netmask to 0.\n", ip); + } + tprintk("IP %x netmask %x gateway %x.\n", + ntohl(ipaddr.addr), ntohl(netmask.addr), ntohl(gw.addr)); tprintk("TCP/IP bringup begins.\n"); diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/main.c --- a/extras/mini-os/main.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/main.c Thu Mar 20 12:35:40 2008 -0600 @@ -42,6 +42,9 @@ static void call_main(void *p) static void call_main(void *p) { char *args, /**path,*/ *msg, *c; +#ifdef CONFIG_QEMU + char *domargs; +#endif int argc; char **argv; char *envp[] = { NULL }; @@ -63,14 +66,12 @@ static void call_main(void *p) } /* Fetch argc, argv from XenStore */ - char domid_s[10]; int domid; domid = xenbus_read_integer("target"); if (domid == -1) { printk("Couldn't read target\n"); do_exit(); } - snprintf(domid_s, sizeof(domid_s), "%d", domid); snprintf(path, sizeof(path), "/local/domain/%d/vm", domid); msg = xenbus_read(XBT_NIL, path, &vm); @@ -78,59 +79,64 @@ static void call_main(void *p) printk("Couldn't read vm path\n"); do_exit(); } - printk("vm is at %s\n", vm); -#else + printk("dom vm is at %s\n", vm); + + snprintf(path, sizeof(path), "%s/image/dmargs", vm); + free(vm); + msg = xenbus_read(XBT_NIL, path, &domargs); + + if (msg) { + printk("Couldn't get stubdom args: %s\n", msg); + domargs = strdup(""); + } +#endif + msg = xenbus_read(XBT_NIL, "vm", &vm); if (msg) { printk("Couldn't read vm path\n"); do_exit(); } -#endif - snprintf(path, sizeof(path), "%s/image/dmargs", vm); + printk("my vm is at %s\n", vm); + snprintf(path, sizeof(path), "%s/image/cmdline", vm); free(vm); msg = xenbus_read(XBT_NIL, path, &args); if (msg) { - printk("Couldn't get stubdom args: %s\n", msg); + printk("Couldn't get my args: %s\n", msg); args = strdup(""); } argc = 1; + +#define PARSE_ARGS(ARGS,START,END) \ + c = ARGS; \ + while (*c) { \ + if (*c != ' ') { \ + START; \ + while (*c && *c != ' ') \ + c++; \ + } else { \ + END; \ + while (*c == ' ') \ + c++; \ + } \ + } + + PARSE_ARGS(args, argc++, ); #ifdef CONFIG_QEMU - argc += 2; + PARSE_ARGS(domargs, argc++, ); #endif - c = args; - while (*c) { - if (*c != ' ') { - argc++; - while (*c && *c != ' ') - c++; - } else { - while (*c == ' ') - c++; - } - } + argv = alloca((argc + 1) * sizeof(char *)); argv[0] = "main"; argc = 1; + + PARSE_ARGS(args, argv[argc++] = c, *c++ = 0) #ifdef CONFIG_QEMU - argv[1] = "-d"; - argv[2] = domid_s; - argc += 2; + PARSE_ARGS(domargs, argv[argc++] = c, *c++ = 0) #endif - c = args; - while (*c) { - if (*c != ' ') { - argv[argc++] = c; - while (*c && *c != ' ') - c++; - } else { - *c++ = 0; - while (*c == ' ') - c++; - } - } + argv[argc] = NULL; for (i = 0; i < argc; i++) diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/netfront.c --- a/extras/mini-os/netfront.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/netfront.c Thu Mar 20 12:35:40 2008 -0600 @@ -48,7 +48,7 @@ struct netfront_dev { struct netif_rx_front_ring rx; grant_ref_t tx_ring_ref; grant_ref_t rx_ring_ref; - evtchn_port_t evtchn, local_port; + evtchn_port_t evtchn; char *nodename; char *backend; @@ -259,7 +259,7 @@ void netfront_select_handler(evtchn_port } #endif -struct netfront_dev *init_netfront(char *nodename, void (*thenetif_rx)(unsigned char* data, int len), unsigned char rawmac[6]) +struct netfront_dev *init_netfront(char *nodename, void (*thenetif_rx)(unsigned char* data, int len), unsigned char rawmac[6], char **ip) { xenbus_transaction_t xbt; char* err; @@ -301,19 +301,14 @@ struct netfront_dev *init_netfront(char dev->rx_buffers[i].page = (char*)alloc_page(); } - evtchn_alloc_unbound_t op; - op.dom = DOMID_SELF; snprintf(path, sizeof(path), "%s/backend-id", nodename); - dev->dom = op.remote_dom = xenbus_read_integer(path); - HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); - clear_evtchn(op.port); /* Without, handler gets invoked now! */ + dev->dom = xenbus_read_integer(path); #ifdef HAVE_LIBC if (thenetif_rx == NETIF_SELECT_RX) - dev->local_port = bind_evtchn(op.port, netfront_select_handler, dev); + evtchn_alloc_unbound(dev->dom, netfront_select_handler, dev, &dev->evtchn); else #endif - dev->local_port = bind_evtchn(op.port, netfront_handler, dev); - dev->evtchn=op.port; + evtchn_alloc_unbound(dev->dom, netfront_handler, dev, &dev->evtchn); txs = (struct netif_tx_sring*) alloc_page(); rxs = (struct netif_rx_sring *) alloc_page(); @@ -388,9 +383,9 @@ done: msg = xenbus_read(XBT_NIL, path, &mac); if ((dev->backend == NULL) || (mac == NULL)) { - struct evtchn_close op = { dev->local_port }; + struct evtchn_close op = { dev->evtchn }; printk("%s: backend/mac failed\n", __func__); - unbind_evtchn(dev->local_port); + unbind_evtchn(dev->evtchn); HYPERVISOR_event_channel_op(EVTCHNOP_close, &op); return NULL; } @@ -407,11 +402,17 @@ done: xenbus_wait_for_value(path,"4"); xenbus_unwatch_path(XBT_NIL, path); + + if (ip) { + snprintf(path, sizeof(path), "%s/ip", dev->backend); + xenbus_read(XBT_NIL, path, ip); + } } printk("**************************\n"); init_rx_buffers(dev); + unmask_evtchn(dev->evtchn); /* Special conversion specifier 'hh' needed for __ia64__. Without this mini-os panics with 'Unaligned reference'. */ @@ -431,7 +432,7 @@ int netfront_tap_open(char *nodename) { int netfront_tap_open(char *nodename) { struct netfront_dev *dev; - dev = init_netfront(nodename, NETIF_SELECT_RX, NULL); + dev = init_netfront(nodename, NETIF_SELECT_RX, NULL, NULL); if (!dev) { printk("TAP open failed\n"); errno = EIO; @@ -460,7 +461,7 @@ void shutdown_netfront(struct netfront_d err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6); xenbus_wait_for_value(path,"6"); - unbind_evtchn(dev->local_port); + unbind_evtchn(dev->evtchn); free(nodename); free(dev->backend); diff -r 8c921adf4833 -r 42f6c206c951 extras/mini-os/xenbus/xenbus.c --- a/extras/mini-os/xenbus/xenbus.c Fri Mar 14 15:07:45 2008 -0600 +++ b/extras/mini-os/xenbus/xenbus.c Thu Mar 20 12:35:40 2008 -0600 @@ -257,6 +257,7 @@ void init_xenbus(void) err = bind_evtchn(start_info.store_evtchn, xenbus_evtchn_handler, NULL); + unmask_evtchn(start_info.store_evtchn); DEBUG("xenbus on irq %d\n", err); } @@ -452,7 +453,7 @@ char *xenbus_write(xenbus_transaction_t { struct write_req req[] = { {path, strlen(path) + 1}, - {value, strlen(value) + 1}, + {value, strlen(value)}, }; struct xsd_sockmsg *rep; rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req)); diff -r 8c921adf4833 -r 42f6c206c951 stubdom/Makefile --- a/stubdom/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/stubdom/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -201,6 +201,14 @@ caml: caml: $(MAKE) -C $@ +### +# C +### + +.PHONY: c +c: + $(MAKE) -C $@ + ######## # minios ######## @@ -211,7 +219,11 @@ qemu-stubdom: mk-symlinks lwip-cvs libxc .PHONY: caml-stubdom caml-stubdom: mk-symlinks lwip-cvs libxc cross-libpci caml - $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwia-cvs CAMLDIR=$(CURDIR)/caml + $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CAMLDIR=$(CURDIR)/caml + +.PHONY: c-stubdom +c-stubdom: mk-symlinks lwip-cvs libxc cross-libpci c + $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CDIR=$(CURDIR)/c ######### # install diff -r 8c921adf4833 -r 42f6c206c951 stubdom/README --- a/stubdom/README Fri Mar 14 15:07:45 2008 -0600 +++ b/stubdom/README Thu Mar 20 12:35:40 2008 -0600 @@ -78,6 +78,8 @@ To run mkdir -p /exports/usr/share/qemu ln -s /usr/share/qemu/keymaps /exports/usr/share/qemu +mkdir -p /exports/var/lib +ln -s /var/lib/xen /exports/var/lib /usr/sbin/fs-backend & xm create hvmconfig diff -r 8c921adf4833 -r 42f6c206c951 stubdom/c/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stubdom/c/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,9 @@ +XEN_ROOT = ../.. + +include $(XEN_ROOT)/Config.mk + +main.a: main.o + $(AR) cr $@ $^ + +clean: + rm -f *.a *.o diff -r 8c921adf4833 -r 42f6c206c951 stubdom/c/main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stubdom/c/main.c Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,6 @@ +#include <stdio.h> +int main(void) { + sleep(2); + printf("Hello, world!\n"); + return 0; +} diff -r 8c921adf4833 -r 42f6c206c951 stubdom/stubdom-dm --- a/stubdom/stubdom-dm Fri Mar 14 15:07:45 2008 -0600 +++ b/stubdom/stubdom-dm Thu Mar 20 12:35:40 2008 -0600 @@ -14,18 +14,30 @@ domname= domname= vncviewer=0 vncpid= +extra= while [ "$#" -gt 0 ]; do if [ "$#" -ge 2 ]; then case "$1" in - -d) domid=$2; shift ;; - -domain-name) domname=$2; shift ;; + -d) + domid=$2; + extra="$extra -d $domid"; + shift + ;; + -domain-name) + domname=$2; + shift + ;; -vnc) ip=${2%:*}; vnc_port=${2#*:}; shift ;; + -loadvm) + extra="$extra -loadvm $2"; + shift + ;; esac fi case "$1" in @@ -60,7 +72,7 @@ do sleep 1 done -creation="xm create -c stubdom-$domname target=$domid memory=32" +creation="xm create -c stubdom-$domname target=$domid memory=32 extra=\"$extra\"" (while true ; do sleep 60 ; done) | $creation & #xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" & diff -r 8c921adf4833 -r 42f6c206c951 tools/blktap/drivers/Makefile --- a/tools/blktap/drivers/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/blktap/drivers/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -1,7 +1,5 @@ XEN_ROOT = ../../.. XEN_ROOT = ../../.. include $(XEN_ROOT)/tools/Rules.mk - -INCLUDES += -I.. -I../lib -I$(XEN_INCLUDE) IBIN = blktapctrl tapdisk QCOW_UTIL = img2qcow qcow2raw qcow-create @@ -9,18 +7,18 @@ LIBAIO_DIR = ../../libaio/src CFLAGS += -Werror CFLAGS += -Wno-unused -CFLAGS += $(CFLAGS_libxenctrl) -I $(LIBAIO_DIR) -CFLAGS += $(INCLUDES) -I. -I../../xenstore +CFLAGS += -I../lib +CFLAGS += $(CFLAGS_libxenctrl) +CFLAGS += $(CFLAGS_libxenstore) +CFLAGS += -I $(LIBAIO_DIR) CFLAGS += -D_GNU_SOURCE # Get gcc to generate the dependencies for us. CFLAGS += -Wp,-MD,.$(@F).d DEPS = .*.d -LIBS := -L. -L.. -L../lib -LIBS += -Wl,-rpath-link,$(XEN_XENSTORE) - -LIBS_IMG := $(LIBAIO_DIR)/libaio.a -lcrypto -lpthread -lz +LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap +LDFLAGS_img := $(LIBAIO_DIR)/libaio.a -lcrypto -lpthread -lz BLK-OBJS-y := block-aio.o BLK-OBJS-y += block-sync.o @@ -34,17 +32,17 @@ BLK-OBJS-$(CONFIG_Linux) += blk_linux.c all: $(IBIN) qcow-util -blktapctrl: blktapctrl.c - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) -lblktap $(LDFLAGS_libxenctrl) +blktapctrl: blktapctrl.o + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDFLAGS_blktapctrl) -tapdisk: tapdisk.c $(BLK-OBJS-y) tapdisk.c - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(LIBS_IMG) +tapdisk: tapdisk.o $(BLK-OBJS-y) + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDFLAGS_img) .PHONY: qcow-util qcow-util: img2qcow qcow2raw qcow-create -img2qcow qcow2raw qcow-create: %: $(BLK-OBJS-y) - $(CC) $(CFLAGS) -o $* $(BLK-OBJS-y) $*.c $(LDFLAGS) $(LIBS_IMG) +img2qcow qcow2raw qcow-create: %: %.o $(BLK-OBJS-y) + $(CC) $(CFLAGS) -o $* $^ $(LDFLAGS) $(LDFLAGS_img) install: all $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(VHD_UTIL) $(DESTDIR)$(SBINDIR) diff -r 8c921adf4833 -r 42f6c206c951 tools/blktap/drivers/blktapctrl.c --- a/tools/blktap/drivers/blktapctrl.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/blktap/drivers/blktapctrl.c Thu Mar 20 12:35:40 2008 -0600 @@ -501,6 +501,80 @@ int launch_tapdisk(char *wrctldev, char return 0; } +/* Connect to qemu-dm */ +static int connect_qemu(blkif_t *blkif) +{ + char *rdctldev, *wrctldev; + + if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", + blkif->domid) < 0) + return -1; + + if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", + blkif->domid) < 0) { + free(rdctldev); + return -1; + } + + DPRINTF("Using qemu blktap pipe: %s\n", rdctldev); + + blkif->fds[READ] = open_ctrl_socket(wrctldev); + blkif->fds[WRITE] = open_ctrl_socket(rdctldev); + + free(rdctldev); + free(wrctldev); + + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) + return -1; + + DPRINTF("Attached to qemu blktap pipes\n"); + return 0; +} + +/* Launch tapdisk instance */ +static int connect_tapdisk(blkif_t *blkif, int minor) +{ + char *rdctldev = NULL, *wrctldev = NULL; + int ret = -1; + + DPRINTF("tapdisk process does not exist:\n"); + + if (asprintf(&rdctldev, + "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1) + goto fail; + + if (asprintf(&wrctldev, + "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1) + goto fail; + + blkif->fds[READ] = open_ctrl_socket(rdctldev); + blkif->fds[WRITE] = open_ctrl_socket(wrctldev); + + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) + goto fail; + + /*launch the new process*/ + DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n", + wrctldev, rdctldev); + + if (launch_tapdisk(wrctldev, rdctldev) == -1) { + DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n", + wrctldev, rdctldev); + goto fail; + } + + ret = 0; + +fail: + if (rdctldev) + free(rdctldev); + + if (wrctldev) + free(wrctldev); + + return ret; +} + int blktapctrl_new_blkif(blkif_t *blkif) { blkif_info_t *blk; @@ -524,30 +598,14 @@ int blktapctrl_new_blkif(blkif_t *blkif) blkif->cookie = next_cookie++; if (!exist) { - DPRINTF("Process does not exist:\n"); - if (asprintf(&rdctldev, - "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1) - goto fail; - if (asprintf(&wrctldev, - "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1) { - free(rdctldev); - goto fail; + if (type == DISK_TYPE_IOEMU) { + if (connect_qemu(blkif)) + goto fail; + } else { + if (connect_tapdisk(blkif, minor)) + goto fail; } - blkif->fds[READ] = open_ctrl_socket(rdctldev); - blkif->fds[WRITE] = open_ctrl_socket(wrctldev); - - if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) - goto fail; - - /*launch the new process*/ - DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",wrctldev, rdctldev); - if (launch_tapdisk(wrctldev, rdctldev) == -1) { - DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",wrctldev, rdctldev); - goto fail; - } - - free(rdctldev); - free(wrctldev); + } else { DPRINTF("Process exists!\n"); blkif->fds[READ] = exist->fds[READ]; diff -r 8c921adf4833 -r 42f6c206c951 tools/blktap/drivers/block-qcow2.c --- a/tools/blktap/drivers/block-qcow2.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/blktap/drivers/block-qcow2.c Thu Mar 20 12:35:40 2008 -0600 @@ -43,6 +43,11 @@ #ifndef O_BINARY #define O_BINARY 0 +#endif + +/* *BSD has no O_LARGEFILE */ +#ifndef O_LARGEFILE +#define O_LARGEFILE 0 #endif #define BLOCK_FLAG_ENCRYPT 1 diff -r 8c921adf4833 -r 42f6c206c951 tools/blktap/drivers/tapaio.c --- a/tools/blktap/drivers/tapaio.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/blktap/drivers/tapaio.c Thu Mar 20 12:35:40 2008 -0600 @@ -82,6 +82,7 @@ tap_aio_completion_thread(void *arg) } } while (!rc); } + return NULL; } void diff -r 8c921adf4833 -r 42f6c206c951 tools/blktap/drivers/tapdisk.h --- a/tools/blktap/drivers/tapdisk.h Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/blktap/drivers/tapdisk.h Thu Mar 20 12:35:40 2008 -0600 @@ -167,6 +167,7 @@ extern struct tap_disk tapdisk_qcow2; #define DISK_TYPE_RAM 3 #define DISK_TYPE_QCOW 4 #define DISK_TYPE_QCOW2 5 +#define DISK_TYPE_IOEMU 6 /*Define Individual Disk Parameters here */ @@ -227,6 +228,16 @@ static disk_info_t qcow2_disk = { 0, #ifdef TAPDISK &tapdisk_qcow2, +#endif +}; + +static disk_info_t ioemu_disk = { + DISK_TYPE_IOEMU, + "ioemu disk", + "ioemu", + 0, +#ifdef TAPDISK + NULL #endif }; @@ -238,6 +249,7 @@ static disk_info_t *dtypes[] = { &ram_disk, &qcow_disk, &qcow2_disk, + &ioemu_disk, }; typedef struct driver_list_entry { diff -r 8c921adf4833 -r 42f6c206c951 tools/blktap/lib/Makefile --- a/tools/blktap/lib/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/blktap/lib/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -5,8 +5,9 @@ MINOR = 0 MINOR = 0 SONAME = libblktap.so.$(MAJOR) -INCLUDES += -I. -I.. -I $(XEN_LIBXC) -I$(XEN_INCLUDE) $(CFLAGS_libxenstore) - +CFLAGS += -I. +CFLAGS += $(CFLAGS_libxenctrl) +CFLAGS += $(CFLAGS_libxenstore) LDFLAGS += $(LDFLAGS_libxenstore) SRCS := @@ -20,23 +21,18 @@ CFLAGS += -D _GNU_SOURCE # Get gcc to generate the dependencies for us. CFLAGS += -Wp,-MD,.$(@F).d -CFLAGS += $(INCLUDES) DEPS = .*.d -OBJS = $(patsubst %.c,%.o,$(SRCS)) +OBJS = $(SRCS:.c=.o) +OBJS_PIC = $(SRCS:.c=.opic) IBINS := LIB = libblktap.a libblktap.so.$(MAJOR).$(MINOR) .PHONY: all -all: build +all: $(LIB) -.PHONY: build -build: libblktap.a - -.PHONY: libblktap -libblktap: libblktap.a - +.PHONY: install install: all $(INSTALL_DIR) $(DESTDIR)$(LIBDIR) $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR) @@ -45,19 +41,20 @@ install: all ln -sf libblktap.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libblktap.so $(INSTALL_DATA) blktaplib.h $(DESTDIR)$(INCLUDEDIR) +.PHONY: clean clean: rm -rf *.a *.so* *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS -libblktap.a: $(OBJS) +libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC) $(CC) $(CFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_CFLAGS) \ - -L$(XEN_XENSTORE) $(LDFLAGS) \ - -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) + $(LDFLAGS) -o $@ $^ ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) ln -sf libblktap.so.$(MAJOR) libblktap.so - $(AR) rc $@ libblktap.so -.PHONY: TAGS all build clean install libblktap +libblktap.a: $(OBJS) + $(AR) rc $@ $^ +.PHONY: TAGS TAGS: etags -t $(SRCS) *.h diff -r 8c921adf4833 -r 42f6c206c951 tools/blktap/lib/blktaplib.h --- a/tools/blktap/lib/blktaplib.h Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/blktap/lib/blktaplib.h Thu Mar 20 12:35:40 2008 -0600 @@ -221,15 +221,5 @@ int xs_fire_next_watch(struct xs_handle ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * getpagesize()) + \ ((_seg) * getpagesize())) -/* Defines that are only used by library clients */ - -#ifndef __COMPILING_BLKTAP_LIB - -static char *blkif_op_name[] = { - [BLKIF_OP_READ] = "READ", - [BLKIF_OP_WRITE] = "WRITE", -}; - -#endif /* __COMPILING_BLKTAP_LIB */ #endif /* __BLKTAPLIB_H__ */ diff -r 8c921adf4833 -r 42f6c206c951 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/examples/xmexample.hvm Thu Mar 20 12:35:40 2008 -0600 @@ -135,6 +135,11 @@ sdl=0 sdl=0 #---------------------------------------------------------------------------- +# enable OpenGL for texture rendering inside the SDL window, default = 1 +# valid only if sdl is enabled. +opengl=1 + +#---------------------------------------------------------------------------- # enable VNC library for graphics, default = 1 vnc=1 diff -r 8c921adf4833 -r 42f6c206c951 tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/firmware/rombios/rombios.c Thu Mar 20 12:35:40 2008 -0600 @@ -779,8 +779,8 @@ typedef struct { Bit32u sector_count1; Bit32u sector_count2; Bit16u blksize; + Bit16u dpte_offset; Bit16u dpte_segment; - Bit16u dpte_offset; Bit16u key; Bit8u dpi_length; Bit8u reserved1; diff -r 8c921adf4833 -r 42f6c206c951 tools/include/Makefile --- a/tools/include/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/include/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -22,6 +22,7 @@ install: all install: all $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/arch-ia64 $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86 + $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86/hvm $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/foreign $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/hvm $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/io @@ -31,6 +32,7 @@ install: all $(INSTALL_DATA) xen/*.h $(DESTDIR)$(INCLUDEDIR)/xen $(INSTALL_DATA) xen/arch-ia64/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-ia64 $(INSTALL_DATA) xen/arch-x86/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86 + $(INSTALL_DATA) xen/arch-x86/hvm/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86/hvm $(INSTALL_DATA) xen/foreign/*.h $(DESTDIR)$(INCLUDEDIR)/xen/foreign $(INSTALL_DATA) xen/hvm/*.h $(DESTDIR)$(INCLUDEDIR)/xen/hvm $(INSTALL_DATA) xen/io/*.h $(DESTDIR)$(INCLUDEDIR)/xen/io diff -r 8c921adf4833 -r 42f6c206c951 tools/include/xen-sys/Linux/gntdev.h --- a/tools/include/xen-sys/Linux/gntdev.h Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/include/xen-sys/Linux/gntdev.h Thu Mar 20 12:35:40 2008 -0600 @@ -102,4 +102,18 @@ struct ioctl_gntdev_get_offset_for_vaddr uint32_t pad; }; +/* + * Sets the maximum number of grants that may mapped at once by this gntdev + * instance. + * + * N.B. This must be called before any other ioctl is performed on the device. + */ +#define IOCTL_GNTDEV_SET_MAX_GRANTS \ +_IOC(_IOC_NONE, 'G', 3, sizeof(struct ioctl_gntdev_set_max_grants)) +struct ioctl_gntdev_set_max_grants { + /* IN parameter */ + /* The maximum number of grants that may be mapped at once. */ + uint32_t count; +}; + #endif /* __LINUX_PUBLIC_GNTDEV_H__ */ diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/Makefile.target --- a/tools/ioemu/Makefile.target Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/Makefile.target Thu Mar 20 12:35:40 2008 -0600 @@ -17,6 +17,7 @@ VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_P VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio CPPFLAGS+=-I. -I.. -I$(TARGET_PATH) -I$(SRC_PATH) CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc +CPPFLAGS+= -I$(XEN_ROOT)/tools/blktap/lib CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore CPPFLAGS+= -I$(XEN_ROOT)/tools/include ifdef CONFIG_DARWIN_USER @@ -429,6 +430,9 @@ VL_OBJS+= usb-uhci.o smbus_eeprom.o VL_OBJS+= usb-uhci.o smbus_eeprom.o VL_OBJS+= piix4acpi.o VL_OBJS+= xenstore.o +ifndef CONFIG_STUBDOM +VL_OBJS+= xen_blktap.o +endif VL_OBJS+= xen_platform.o VL_OBJS+= xen_machine_fv.o VL_OBJS+= xen_machine_pv.o diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/block-vbd.c --- a/tools/ioemu/block-vbd.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/block-vbd.c Thu Mar 20 12:35:40 2008 -0600 @@ -51,6 +51,7 @@ typedef struct BDRVVbdState { int fd; int type; int mode; + int info; uint64_t sectors; unsigned sector_size; QEMU_LIST_ENTRY(BDRVVbdState) list; @@ -80,7 +81,7 @@ static int vbd_open(BlockDriverState *bs //handy to test posix access //return -EIO; - s->dev = init_blkfront((char *) filename, &s->sectors, &s->sector_size, &s->mode); + s->dev = init_blkfront((char *) filename, &s->sectors, &s->sector_size, &s->mode, &s->info); if (!s->dev) return -EIO; @@ -271,6 +272,7 @@ static void vbd_close(BlockDriverState * BDRVVbdState *s = bs->opaque; bs->total_sectors = 0; if (s->fd >= 0) { + qemu_set_fd_handler(s->fd, NULL, NULL, NULL); close(s->fd); s->fd = -1; } diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/configure --- a/tools/ioemu/configure Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/configure Thu Mar 20 12:35:40 2008 -0600 @@ -189,6 +189,8 @@ for opt do --static) static="yes" ;; --disable-sdl) sdl="no" + ;; + --disable-opengl) opengl="no" ;; --enable-coreaudio) coreaudio="yes" ;; @@ -539,6 +541,26 @@ fi # -z $sdl fi # -z $sdl ########################################## +# OpenGL test + +if test -z "$opengl" && test "$sdl" = "yes" +then +cat > $TMPC << EOF +#include <SDL_opengl.h> +#ifndef GL_TEXTURE_RECTANGLE_ARB +#error "Opengl doesn't support GL_TEXTURE_RECTANGLE_ARB" +#endif +int main( void ) { return (int) glGetString(GL_EXTENSIONS); } +EOF +if $cc -o $TMPE `$sdl_config --cflags --libs 2> /dev/null` -I/usr/include/GL $TMPC -lXext -lGL 2> /dev/null +then +opengl="yes" +else +opengl="no" +fi +fi + +########################################## # alsa sound support libraries if test "$alsa" = "yes" ; then @@ -612,6 +634,7 @@ if test "$sdl" != "no" ; then if test "$sdl" != "no" ; then echo "SDL static link $sdl_static" fi +echo "OpenGL support $opengl" echo "mingw32 support $mingw32" echo "Adlib support $adlib" echo "CoreAudio support $coreaudio" @@ -995,6 +1018,15 @@ if test "$target_user_only" = "no"; then fi fi +if test $opengl = "yes" +then + echo "#define CONFIG_OPENGL 1" >> $config_h + echo "CONFIG_OPENGL=yes" >> $config_mak + echo "SDL_CFLAGS+=-I/usr/include/GL" >> $config_mak + echo "SDL_LIBS+=-lXext" >> $config_mak + echo "SDL_LIBS+=-lGL" >> $config_mak +fi + if test "$cocoa" = "yes" ; then echo "#define CONFIG_COCOA 1" >> $config_h echo "CONFIG_COCOA=yes" >> $config_mak diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/hw/ide.c --- a/tools/ioemu/hw/ide.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/hw/ide.c Thu Mar 20 12:35:40 2008 -0600 @@ -189,6 +189,15 @@ /* set to 1 set disable mult support */ #define MAX_MULT_SECTORS 16 +#ifdef CONFIG_STUBDOM +#include <xen/io/blkif.h> +#define IDE_DMA_BUF_SIZE (BLKIF_MAX_SEGMENTS_PER_REQUEST * TARGET_PAGE_SIZE) +#else +#define IDE_DMA_BUF_SIZE 131072 +#endif +#if (IDE_DMA_BUF_SIZE < MAX_MULT_SECTORS * 512) +#error "IDE_DMA_BUF_SIZE must be bigger or equal to MAX_MULT_SECTORS * 512" +#endif /* ATAPI defines */ @@ -932,8 +941,8 @@ static void ide_read_dma_cb(void *opaque /* launch next transfer */ n = s->nsector; - if (n > MAX_MULT_SECTORS) - n = MAX_MULT_SECTORS; + if (n > IDE_DMA_BUF_SIZE / 512) + n = IDE_DMA_BUF_SIZE / 512; s->io_buffer_index = 0; s->io_buffer_size = n * 512; #ifdef DEBUG_AIO @@ -1041,8 +1050,8 @@ static void ide_write_dma_cb(void *opaqu /* launch next transfer */ n = s->nsector; - if (n > MAX_MULT_SECTORS) - n = MAX_MULT_SECTORS; + if (n > IDE_DMA_BUF_SIZE / 512) + n = IDE_DMA_BUF_SIZE / 512; s->io_buffer_index = 0; s->io_buffer_size = n * 512; @@ -1336,8 +1345,8 @@ static void ide_atapi_cmd_read_dma_cb(vo data_offset = 16; } else { n = s->packet_transfer_size >> 11; - if (n > (MAX_MULT_SECTORS / 4)) - n = (MAX_MULT_SECTORS / 4); + if (n > (IDE_DMA_BUF_SIZE / 2048)) + n = (IDE_DMA_BUF_SIZE / 2048); s->io_buffer_size = n * 2048; data_offset = 0; } @@ -1823,7 +1832,6 @@ static void ide_ioport_write(void *opaqu break; case WIN_SETMULT: if (s->nsector > MAX_MULT_SECTORS || - s->nsector == 0 || (s->nsector & (s->nsector - 1)) != 0) { ide_abort_command(s); } else { @@ -2306,7 +2314,7 @@ static void ide_init2(IDEState *ide_stat for(i = 0; i < 2; i++) { s = ide_state + i; - s->io_buffer = qemu_memalign(getpagesize(), MAX_MULT_SECTORS*512 + 4); + s->io_buffer = qemu_memalign(getpagesize(), IDE_DMA_BUF_SIZE + 4); if (i == 0) s->bs = hd0; else diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/hw/ne2000.c --- a/tools/ioemu/hw/ne2000.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/hw/ne2000.c Thu Mar 20 12:35:40 2008 -0600 @@ -834,6 +834,10 @@ void pci_ne2000_init(PCIBus *bus, NICInf pci_conf[0x0a] = 0x00; // ethernet network controller pci_conf[0x0b] = 0x02; pci_conf[0x0e] = 0x00; // header_type + pci_conf[0x2c] = 0x53; /* subsystem vendor: XenSource */ + pci_conf[0x2d] = 0x58; + pci_conf[0x2e] = 0x01; /* subsystem device */ + pci_conf[0x2f] = 0x00; pci_conf[0x3d] = 1; // interrupt pin 0 pci_register_io_region(&d->dev, 0, 0x100, diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/hw/scsi-disk.c --- a/tools/ioemu/hw/scsi-disk.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/hw/scsi-disk.c Thu Mar 20 12:35:40 2008 -0600 @@ -34,9 +34,10 @@ do { fprintf(stderr, "scsi-disk: " fmt , #define SENSE_ILLEGAL_REQUEST 5 #ifdef CONFIG_STUBDOM -#define SCSI_DMA_BUF_SIZE 32768 +#include <xen/io/blkif.h> +#define SCSI_DMA_BUF_SIZE (BLKIF_MAX_SEGMENTS_PER_REQUEST * TARGET_PAGE_SIZE) #else -#define SCSI_DMA_BUF_SIZE 65536 +#define SCSI_DMA_BUF_SIZE 131072 #endif typedef struct SCSIRequest { diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/hw/vga.c Thu Mar 20 12:35:40 2008 -0600 @@ -1148,7 +1148,7 @@ static void vga_draw_text(VGAState *s, i cw != s->last_cw || cheight != s->last_ch) { s->last_scr_width = width * cw; s->last_scr_height = height * cheight; - dpy_resize(s->ds, s->last_scr_width, s->last_scr_height, s->last_scr_width * (depth / 8)); + dpy_resize(s->ds, s->last_scr_width, s->last_scr_height, s->last_scr_width * (s->ds->depth / 8)); s->last_width = width; s->last_height = height; s->last_ch = cheight; @@ -1571,7 +1571,7 @@ static void vga_draw_graphic(VGAState *s vga_draw_line = vga_draw_line_table[v * NB_DEPTHS + get_depth_index(s->ds)]; if (disp_width != s->last_width || height != s->last_height) { - dpy_resize(s->ds, disp_width, height, disp_width * (depth / 8)); + dpy_resize(s->ds, disp_width, height, s->line_offset); s->last_scr_width = disp_width; s->last_scr_height = height; s->last_width = disp_width; diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/hw/xen_blktap.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ioemu/hw/xen_blktap.c Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,686 @@ +/* xen_blktap.c + * + * Interface to blktapctrl to allow use of qemu block drivers with blktap. + * This file is based on tools/blktap/drivers/tapdisk.c + * + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield. + * Copyright (c) 2008 Kevin Wolf + */ + +/* + * There are several communication channels which are used by this interface: + * + * - A pair of pipes for receiving and sending general control messages + * (qemu-read-N and qemu-writeN in /var/run/tap, where N is the domain ID). + * These control messages are handled by handle_blktap_ctrlmsg(). + * + * - One file descriptor per attached disk (/dev/xen/blktapN) for disk + * specific control messages. A callback is triggered on this fd if there + * is a new IO request. The callback function is handle_blktap_iomsg(). + * + * - A shared ring for each attached disk containing the actual IO requests + * and responses. Whenever handle_blktap_iomsg() is triggered it processes + * the requests on this ring. + */ + +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> + +#include "vl.h" +#include "blktaplib.h" +#include "xen_blktap.h" +#include "block_int.h" + +#define MSG_SIZE 4096 + +#define BLKTAP_CTRL_DIR "/var/run/tap" + +/* If enabled, print debug messages to stderr */ +#if 1 +#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s\n", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + + +extern int domid; + +int read_fd; +int write_fd; + +static pid_t process; +fd_list_entry_t *fd_start = NULL; + +static void handle_blktap_iomsg(void* private); + +struct aiocb_info { + struct td_state *s; + uint64_t sector; + int nr_secs; + int idx; + long i; +}; + +static void unmap_disk(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + fd_list_entry_t *entry; + + bdrv_close(s->bs); + + if (info != NULL && info->mem > 0) + munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE); + + entry = s->fd_entry; + *entry->pprev = entry->next; + if (entry->next) + entry->next->pprev = entry->pprev; + + qemu_set_fd_handler2(info->fd, NULL, NULL, NULL, NULL); + close(info->fd); + + free(s->fd_entry); + free(s->blkif); + free(s->ring_info); + free(s); + + return; +} + +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s) +{ + fd_list_entry_t **pprev, *entry; + + DPRINTF("Adding fd_list_entry\n"); + + /*Add to linked list*/ + s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); + entry->tap_fd = tap_fd; + entry->s = s; + entry->next = NULL; + + pprev = &fd_start; + while (*pprev != NULL) + pprev = &(*pprev)->next; + + *pprev = entry; + entry->pprev = pprev; + + return entry; +} + +static inline struct td_state *get_state(int cookie) +{ + fd_list_entry_t *ptr; + + ptr = fd_start; + while (ptr != NULL) { + if (ptr->cookie == cookie) return ptr->s; + ptr = ptr->next; + } + return NULL; +} + +static struct td_state *state_init(void) +{ + int i; + struct td_state *s; + blkif_t *blkif; + + s = malloc(sizeof(struct td_state)); + blkif = s->blkif = malloc(sizeof(blkif_t)); + s->ring_info = calloc(1, sizeof(tapdev_info_t)); + + for (i = 0; i < MAX_REQUESTS; i++) { + blkif->pending_list[i].secs_pending = 0; + blkif->pending_list[i].submitting = 0; + } + + return s; +} + +static int map_new_dev(struct td_state *s, int minor) +{ + int tap_fd; + tapdev_info_t *info = s->ring_info; + char *devname; + fd_list_entry_t *ptr; + int page_size; + + if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor) == -1) + return -1; + tap_fd = open(devname, O_RDWR); + if (tap_fd == -1) + { + DPRINTF("open failed on dev %s!\n",devname); + goto fail; + } + info->fd = tap_fd; + + /*Map the shared memory*/ + page_size = getpagesize(); + info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0); + if ((long int)info->mem == -1) + { + DPRINTF("mmap failed on dev %s!\n",devname); + goto fail; + } + + /* assign the rings to the mapped memory */ + info->sring = (blkif_sring_t *)((unsigned long)info->mem); + BACK_RING_INIT(&info->fe_ring, info->sring, page_size); + + info->vstart = + (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size); + + ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process ); + ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); + free(devname); + + /*Update the fd entry*/ + ptr = fd_start; + while (ptr != NULL) { + if (s == ptr->s) { + ptr->tap_fd = tap_fd; + + /* Setup fd_handler for qemu main loop */ + DPRINTF("set tap_fd = %d\n", tap_fd); + qemu_set_fd_handler2(tap_fd, NULL, &handle_blktap_iomsg, NULL, s); + + break; + } + ptr = ptr->next; + } + + + DPRINTF("map_new_dev = %d\n", minor); + return minor; + + fail: + free(devname); + return -1; +} + +static int open_disk(struct td_state *s, char *path, int readonly) +{ + struct disk_id id; + BlockDriverState* bs; + + DPRINTF("Opening %s\n", path); + bs = calloc(1, sizeof(*bs)); + + memset(&id, 0, sizeof(struct disk_id)); + + if (bdrv_open(bs, path, 0) != 0) { + fprintf(stderr, "Could not open image file %s\n", path); + return -ENOMEM; + } + + s->bs = bs; + s->flags = readonly ? TD_RDONLY : 0; + s->size = bs->total_sectors; + s->sector_size = 512; + + s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0); + + return 0; +} + +static inline void write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp) +{ + tapdev_info_t *info = s->ring_info; + blkif_response_t *rsp_d; + + rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt); + memcpy(rsp_d, rsp, sizeof(blkif_response_t)); + info->fe_ring.rsp_prod_pvt++; +} + +static inline void kick_responses(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + + if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) + { + RING_PUSH_RESPONSES(&info->fe_ring); + ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); + } +} + +static int send_responses(struct td_state *s, int res, + uint64_t sector, int nr_secs, int idx, void *private) +{ + pending_req_t *preq; + blkif_request_t *req; + int responses_queued = 0; + blkif_t *blkif = s->blkif; + int secs_done = nr_secs; + + if ( (idx > MAX_REQUESTS-1) ) + { + DPRINTF("invalid index returned(%u)!\n", idx); + return 0; + } + preq = &blkif->pending_list[idx]; + req = &preq->req; + + preq->secs_pending -= secs_done; + + if (res == -EBUSY && preq->submitting) + return -EBUSY; /* propagate -EBUSY back to higher layers */ + if (res) + preq->status = BLKIF_RSP_ERROR; + + if (!preq->submitting && preq->secs_pending == 0) + { + blkif_request_t tmp; + blkif_response_t *rsp; + + tmp = preq->req; + rsp = (blkif_response_t *)req; + + rsp->id = tmp.id; + rsp->operation = tmp.operation; + rsp->status = preq->status; + + write_rsp_to_ring(s, rsp); + responses_queued++; + + kick_responses(s); + } + + return responses_queued; +} + +static void qemu_send_responses(void* opaque, int ret) +{ + struct aiocb_info* info = opaque; + + if (ret != 0) { + DPRINTF("ERROR: ret = %d (%s)\n", ret, strerror(-ret)); + } + + send_responses(info->s, ret, info->sector, info->nr_secs, + info->idx, (void*) info->i); + free(info); +} + +/** + * Callback function for the IO message pipe. Reads requests from the ring + * and processes them (call qemu read/write functions). + * + * The private parameter points to the struct td_state representing the + * disk the request is targeted at. + */ +static void handle_blktap_iomsg(void* private) +{ + struct td_state* s = private; + + RING_IDX rp, j, i; + blkif_request_t *req; + int idx, nsects, ret; + uint64_t sector_nr; + uint8_t *page; + blkif_t *blkif = s->blkif; + tapdev_info_t *info = s->ring_info; + int page_size = getpagesize(); + + struct aiocb_info *aiocb_info; + + if (info->fe_ring.sring == NULL) { + DPRINTF(" sring == NULL, ignoring IO request\n"); + return; + } + + rp = info->fe_ring.sring->req_prod; + xen_rmb(); + + for (j = info->fe_ring.req_cons; j != rp; j++) + { + int start_seg = 0; + + req = NULL; + req = RING_GET_REQUEST(&info->fe_ring, j); + ++info->fe_ring.req_cons; + + if (req == NULL) + continue; + + idx = req->id; + + ASSERT(blkif->pending_list[idx].secs_pending == 0); + memcpy(&blkif->pending_list[idx].req, req, sizeof(*req)); + blkif->pending_list[idx].status = BLKIF_RSP_OKAY; + blkif->pending_list[idx].submitting = 1; + sector_nr = req->sector_number; + + /* Don't allow writes on readonly devices */ + if ((s->flags & TD_RDONLY) && + (req->operation == BLKIF_OP_WRITE)) { + blkif->pending_list[idx].status = BLKIF_RSP_ERROR; + goto send_response; + } + + for (i = start_seg; i < req->nr_segments; i++) { + nsects = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= page_size >> 9) || + (nsects <= 0)) + continue; + + page = (uint8_t*) MMAP_VADDR(info->vstart, + (unsigned long)req->id, i); + page += (req->seg[i].first_sect << SECTOR_SHIFT); + + if (sector_nr >= s->size) { + DPRINTF("Sector request failed:\n"); + DPRINTF("%s request, idx [%d,%d] size [%llu], " + "sector [%llu,%llu]\n", + (req->operation == BLKIF_OP_WRITE ? + "WRITE" : "READ"), + idx,i, + (long long unsigned) + nsects<<SECTOR_SHIFT, + (long long unsigned) + sector_nr<<SECTOR_SHIFT, + (long long unsigned) sector_nr); + continue; + } + + blkif->pending_list[idx].secs_pending += nsects; + + switch (req->operation) + { + case BLKIF_OP_WRITE: + aiocb_info = malloc(sizeof(*aiocb_info)); + + aiocb_info->s = s; + aiocb_info->sector = sector_nr; + aiocb_info->nr_secs = nsects; + aiocb_info->idx = idx; + aiocb_info->i = i; + + ret = (NULL == bdrv_aio_write(s->bs, sector_nr, + page, nsects, + qemu_send_responses, + aiocb_info)); + + if (ret) { + blkif->pending_list[idx].status = BLKIF_RSP_ERROR; + DPRINTF("ERROR: bdrv_write() == NULL\n"); + goto send_response; + } + break; + + case BLKIF_OP_READ: + aiocb_info = malloc(sizeof(*aiocb_info)); + + aiocb_info->s = s; + aiocb_info->sector = sector_nr; + aiocb_info->nr_secs = nsects; + aiocb_info->idx = idx; + aiocb_info->i = i; + + ret = (NULL == bdrv_aio_read(s->bs, sector_nr, + page, nsects, + qemu_send_responses, + aiocb_info)); + + if (ret) { + blkif->pending_list[idx].status = BLKIF_RSP_ERROR; + DPRINTF("ERROR: bdrv_read() == NULL\n"); + goto send_response; + } + break; + + default: + DPRINTF("Unknown block operation\n"); + break; + } + sector_nr += nsects; + } + send_response: + blkif->pending_list[idx].submitting = 0; + + /* force write_rsp_to_ring for synchronous case */ + if (blkif->pending_list[idx].secs_pending == 0) + send_responses(s, 0, 0, 0, idx, (void *)(long)0); + } +} + +/** + * Callback function for the qemu-read pipe. Reads and processes control + * message from the pipe. + * + * The parameter private is unused. + */ +static void handle_blktap_ctrlmsg(void* private) +{ + int length, len, msglen; + char *ptr, *path; + image_t *img; + msg_hdr_t *msg; + msg_newdev_t *msg_dev; + msg_pid_t *msg_pid; + int ret = -1; + struct td_state *s = NULL; + fd_list_entry_t *entry; + + char buf[MSG_SIZE]; + + length = read(read_fd, buf, MSG_SIZE); + + if (length > 0 && length >= sizeof(msg_hdr_t)) + { + msg = (msg_hdr_t *)buf; + DPRINTF("blktap: Received msg, len %d, type %d, UID %d\n", + length,msg->type,msg->cookie); + + switch (msg->type) { + case CTLMSG_PARAMS: + ptr = buf + sizeof(msg_hdr_t); + len = (length - sizeof(msg_hdr_t)); + path = calloc(1, len + 1); + + memcpy(path, ptr, len); + DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path); + + /* Allocate the disk structs */ + s = state_init(); + + /*Open file*/ + if (s == NULL || open_disk(s, path, msg->readonly)) { + msglen = sizeof(msg_hdr_t); + msg->type = CTLMSG_IMG_FAIL; + msg->len = msglen; + } else { + entry = add_fd_entry(0, s); + entry->cookie = msg->cookie; + DPRINTF("Entered cookie %d\n", entry->cookie); + + memset(buf, 0x00, MSG_SIZE); + + msglen = sizeof(msg_hdr_t) + sizeof(image_t); + msg->type = CTLMSG_IMG; + img = (image_t *)(buf + sizeof(msg_hdr_t)); + img->size = s->size; + img->secsize = s->sector_size; + img->info = s->info; + DPRINTF("Writing (size, secsize, info) = " + "(%#" PRIx64 ", %#" PRIx64 ", %d)\n", + s->size, s->sector_size, s->info); + } + len = write(write_fd, buf, msglen); + free(path); + break; + + case CTLMSG_NEWDEV: + msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); + + s = get_state(msg->cookie); + DPRINTF("Retrieving state, cookie %d.....[%s]\n", + msg->cookie, (s == NULL ? "FAIL":"OK")); + if (s != NULL) { + ret = ((map_new_dev(s, msg_dev->devnum) + == msg_dev->devnum ? 0: -1)); + } + + memset(buf, 0x00, MSG_SIZE); + msglen = sizeof(msg_hdr_t); + msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP + : CTLMSG_NEWDEV_FAIL); + msg->len = msglen; + + len = write(write_fd, buf, msglen); + break; + + case CTLMSG_CLOSE: + s = get_state(msg->cookie); + if (s) unmap_disk(s); + break; + + case CTLMSG_PID: + memset(buf, 0x00, MSG_SIZE); + msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t); + msg->type = CTLMSG_PID_RSP; + msg->len = msglen; + + msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t)); + process = getpid(); + msg_pid->pid = process; + + len = write(write_fd, buf, msglen); + break; + + default: + break; + } + } +} + +/** + * Opens a control socket, i.e. a pipe to communicate with blktapctrl. + * + * Returns the file descriptor number for the pipe; -1 in error case + */ +static int open_ctrl_socket(char *devname) +{ + int ret; + int ipc_fd; + + if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0) + DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR); + + ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO); + if ( (ret != 0) && (errno != EEXIST) ) { + DPRINTF("ERROR: pipe failed (%d)\n", errno); + return -1; + } + + ipc_fd = open(devname,O_RDWR|O_NONBLOCK); + + if (ipc_fd < 0) { + DPRINTF("FD open failed\n"); + return -1; + } + + return ipc_fd; +} + +/** + * Unmaps all disks and closes their pipes + */ +void shutdown_blktap(void) +{ + fd_list_entry_t *ptr; + struct td_state *s; + char *devname; + + DPRINTF("Shutdown blktap\n"); + + /* Unmap all disks */ + ptr = fd_start; + while (ptr != NULL) { + s = ptr->s; + unmap_disk(s); + close(ptr->tap_fd); + ptr = ptr->next; + } + + /* Delete control pipes */ + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) { + DPRINTF("Delete %s\n", devname); + if (unlink(devname)) + DPRINTF("Could not delete: %s\n", strerror(errno)); + free(devname); + } + + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { + DPRINTF("Delete %s\n", devname); + if (unlink(devname)) + DPRINTF("Could not delete: %s\n", strerror(errno)); + free(devname); + } +} + +/** + * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap + * and register a fd handler. + * + * Returns 0 on success. + */ +int init_blktap(void) +{ + char* devname; + + DPRINTF("Init blktap pipes\n"); + + /* Open the read pipe */ + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) { + read_fd = open_ctrl_socket(devname); + free(devname); + + if (read_fd == -1) { + fprintf(stderr, "Could not open %s/qemu-read-%d\n", + BLKTAP_CTRL_DIR, domid); + return -1; + } + } + + /* Open the write pipe */ + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { + write_fd = open_ctrl_socket(devname); + free(devname); + + if (write_fd == -1) { + fprintf(stderr, "Could not open %s/qemu-write-%d\n", + BLKTAP_CTRL_DIR, domid); + close(read_fd); + return -1; + } + } + + /* Attach a handler to the read pipe (called from qemu main loop) */ + qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL); + + /* Register handler to clean up when the domain is destroyed */ + atexit(&shutdown_blktap); + + return 0; +} diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/hw/xen_blktap.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ioemu/hw/xen_blktap.h Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,57 @@ +/* xen_blktap.h + * + * Generic disk interface for blktap-based image adapters. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + */ + +#ifndef XEN_BLKTAP_H_ +#define XEN_BLKTAP_H_ + +#include <stdint.h> +#include <syslog.h> +#include <stdio.h> + +#include "block_int.h" + +/* Things disks need to know about, these should probably be in a higher-level + * header. */ +#define MAX_SEGMENTS_PER_REQ 11 +#define SECTOR_SHIFT 9 +#define DEFAULT_SECTOR_SIZE 512 + +#define MAX_IOFD 2 + +#define BLK_NOT_ALLOCATED 99 +#define TD_NO_PARENT 1 + +typedef uint32_t td_flag_t; + +#define TD_RDONLY 1 + +struct disk_id { + char *name; + int drivertype; +}; + +/* This structure represents the state of an active virtual disk. */ +struct td_state { + BlockDriverState* bs; + td_flag_t flags; + void *blkif; + void *image; + void *ring_info; + void *fd_entry; + uint64_t sector_size; + uint64_t size; + unsigned int info; +}; + +typedef struct fd_list_entry { + int cookie; + int tap_fd; + struct td_state *s; + struct fd_list_entry **pprev, *next; +} fd_list_entry_t; + +#endif /*XEN_BLKTAP_H_*/ diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/hw/xen_machine_pv.c --- a/tools/ioemu/hw/xen_machine_pv.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/hw/xen_machine_pv.c Thu Mar 20 12:35:40 2008 -0600 @@ -26,6 +26,9 @@ #include "xen_console.h" #include "xenfb.h" +extern void init_blktap(void); + + /* The Xen PV machine currently provides * - a virtual framebuffer * - .... @@ -40,6 +43,12 @@ static void xen_init_pv(uint64_t ram_siz { struct xenfb *xenfb; extern int domid; + + +#ifndef CONFIG_STUBDOM + /* Initialize tapdisk client */ + init_blktap(); +#endif /* Connect to text console */ if (serial_hds[0]) { diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/sdl.c --- a/tools/ioemu/sdl.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/sdl.c Thu Mar 20 12:35:40 2008 -0600 @@ -29,6 +29,10 @@ #include <signal.h> #endif +#ifdef CONFIG_OPENGL +#include <SDL_opengl.h> +#endif + static SDL_Surface *screen; static SDL_Surface *shared = NULL; static int gui_grab; /* if true, all keyboard/mouse events are grabbed */ @@ -44,6 +48,99 @@ static SDL_Cursor *sdl_cursor_normal; static SDL_Cursor *sdl_cursor_normal; static SDL_Cursor *sdl_cursor_hidden; static int absolute_enabled = 0; +static int opengl_enabled; + +#ifdef CONFIG_OPENGL +static GLint tex_format; +static GLint tex_type; +static GLuint texture_ref = 0; +static GLint gl_format; + +static void opengl_setdata(DisplayState *ds, void *pixels) +{ + glEnable(GL_TEXTURE_RECTANGLE_ARB); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + glClearColor(0, 0, 0, 0); + glDisable(GL_BLEND); + glDisable(GL_LIGHTING); + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + glDisable(GL_CULL_FACE); + glViewport( 0, 0, screen->w, screen->h); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(0, screen->w, screen->h, 0, -1,1); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + glClear(GL_COLOR_BUFFER_BIT); + ds->data = pixels; + + if (texture_ref) { + glDeleteTextures(1, &texture_ref); + texture_ref = 0; + } + + glGenTextures(1, &texture_ref); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture_ref); + glPixelStorei(GL_UNPACK_LSB_FIRST, 1); + switch (ds->depth) { + case 8: + tex_format = GL_RGB; + tex_type = GL_UNSIGNED_BYTE_3_3_2; + glPixelStorei (GL_UNPACK_ALIGNMENT, 1); + break; + case 16: + tex_format = GL_RGB; + tex_type = GL_UNSIGNED_SHORT_5_6_5; + glPixelStorei (GL_UNPACK_ALIGNMENT, 2); + break; + case 24: + tex_format = GL_BGR; + tex_type = GL_UNSIGNED_BYTE; + glPixelStorei (GL_UNPACK_ALIGNMENT, 1); + break; + case 32: + if (!ds->bgr) { + tex_format = GL_BGRA; + tex_type = GL_UNSIGNED_BYTE; + } else { + tex_format = GL_RGBA; + tex_type = GL_UNSIGNED_BYTE; + } + glPixelStorei (GL_UNPACK_ALIGNMENT, 4); + break; + } + glPixelStorei(GL_UNPACK_ROW_LENGTH, (ds->linesize * 8) / ds->depth); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, gl_format, ds->width, ds->height, 0, tex_format, tex_type, pixels); + glTexParameterf(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_PRIORITY, 1.0); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); +} + +static void opengl_update(DisplayState *ds, int x, int y, int w, int h) +{ + int bpp = ds->depth / 8; + GLvoid *pixels = ds->data + y * ds->linesize + x * bpp; + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture_ref); + glPixelStorei(GL_UNPACK_ROW_LENGTH, ds->linesize / bpp); + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, x, y, w, h, tex_format, tex_type, pixels); + glBegin(GL_QUADS); + glTexCoord2d(0, 0); + glVertex2d(0, 0); + glTexCoord2d(ds->width, 0); + glVertex2d(screen->w, 0); + glTexCoord2d(ds->width, ds->height); + glVertex2d(screen->w, screen->h); + glTexCoord2d(0, ds->height); + glVertex2d(0, screen->h); + glEnd(); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); + SDL_GL_SwapBuffers(); +} +#endif static void sdl_update(DisplayState *ds, int x, int y, int w, int h) { @@ -96,17 +193,26 @@ static void sdl_resize(DisplayState *ds, // printf("resizing to %d %d\n", w, h); - flags = SDL_HWSURFACE|SDL_ASYNCBLIT|SDL_HWACCEL|SDL_DOUBLEBUF|SDL_HWPALETTE; - if (gui_fullscreen) +#ifdef CONFIG_OPENGL + if (ds->shared_buf && opengl_enabled) + flags = SDL_OPENGL|SDL_RESIZABLE; + else +#endif + flags = SDL_HWSURFACE|SDL_ASYNCBLIT|SDL_HWACCEL|SDL_DOUBLEBUF|SDL_HWPALETTE; + + if (gui_fullscreen) { flags |= SDL_FULLSCREEN; - + flags &= ~SDL_RESIZABLE; + } + width = w; height = h; again: screen = SDL_SetVideoMode(w, h, 0, flags); +#ifndef CONFIG_OPENGL if (!screen) { - fprintf(stderr, "Could not open SDL display\n"); + fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError()); exit(1); } if (!screen->pixels && (flags & SDL_HWSURFACE) && (flags & SDL_FULLSCREEN)) { @@ -115,9 +221,10 @@ static void sdl_resize(DisplayState *ds, } if (!screen->pixels) { - fprintf(stderr, "Could not open SDL display\n"); + fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError()); exit(1); } +#endif ds->width = w; ds->height = h; if (!ds->shared_buf) { @@ -131,6 +238,25 @@ static void sdl_resize(DisplayState *ds, ds->linesize = screen->pitch; } else { ds->linesize = linesize; +#ifdef CONFIG_OPENGL + switch(screen->format->BitsPerPixel) { + case 8: + gl_format = GL_RGB; + break; + case 16: + gl_format = GL_RGB; + break; + case 24: + gl_format = GL_RGB; + break; + case 32: + if (!screen->format->Rshift) + gl_format = GL_BGRA; + else + gl_format = GL_RGBA; + break; + }; +#endif } } @@ -139,7 +265,13 @@ static void sdl_colourdepth(DisplayState if (!depth || !ds->depth) return; ds->shared_buf = 1; ds->depth = depth; - ds->linesize = width * depth / 8; + ds->linesize = width * depth / 8; +#ifdef CONFIG_OPENGL + if (opengl_enabled) { + ds->dpy_update = opengl_update; + ds->dpy_setdata = opengl_setdata; + } +#endif } /* generic keyboard conversion */ @@ -331,8 +463,8 @@ static void sdl_send_mouse_event(int dx, } SDL_GetMouseState(&dx, &dy); - dx = dx * 0x7FFF / (width - 1); - dy = dy * 0x7FFF / (height - 1); + dx = dx * 0x7FFF / (screen->w - 1); + dy = dy * 0x7FFF / (screen->h - 1); } else if (absolute_enabled) { sdl_show_cursor(); absolute_enabled = 0; @@ -344,7 +476,8 @@ static void toggle_full_screen(DisplaySt static void toggle_full_screen(DisplayState *ds) { gui_fullscreen = !gui_fullscreen; - sdl_resize(ds, screen->w, screen->h, ds->linesize); + sdl_resize(ds, ds->width, ds->height, ds->linesize); + ds->dpy_setdata(ds, ds->data); if (gui_fullscreen) { gui_saved_grab = gui_grab; sdl_grab_start(); @@ -371,7 +504,7 @@ static void sdl_refresh(DisplayState *ds while (SDL_PollEvent(ev)) { switch (ev->type) { case SDL_VIDEOEXPOSE: - sdl_update(ds, 0, 0, screen->w, screen->h); + ds->dpy_update(ds, 0, 0, ds->width, ds->height); break; case SDL_KEYDOWN: case SDL_KEYUP: @@ -528,6 +661,18 @@ static void sdl_refresh(DisplayState *ds } } break; +#ifdef CONFIG_OPENGL + case SDL_VIDEORESIZE: + { + if (ds->shared_buf && opengl_enabled) { + SDL_ResizeEvent *rev = &ev->resize; + screen = SDL_SetVideoMode(rev->w, rev->h, 0, SDL_OPENGL|SDL_RESIZABLE); + opengl_setdata(ds, ds->data); + opengl_update(ds, 0, 0, ds->width, ds->height); + } + break; + } +#endif default: break; } @@ -536,13 +681,17 @@ static void sdl_refresh(DisplayState *ds static void sdl_cleanup(void) { +#ifdef CONFIG_OPENGL + if (texture_ref) glDeleteTextures(1, &texture_ref); +#endif SDL_Quit(); } -void sdl_display_init(DisplayState *ds, int full_screen) +void sdl_display_init(DisplayState *ds, int full_screen, int opengl) { int flags; uint8_t data = 0; + opengl_enabled = opengl; #if defined(__APPLE__) /* always use generic keymaps */ diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/vl.c Thu Mar 20 12:35:40 2008 -0600 @@ -174,6 +174,11 @@ int graphic_height = 600; #endif int graphic_depth = 15; int full_screen = 0; +#ifdef CONFIG_OPENGL +int opengl_enabled = 1; +#else +int opengl_enabled = 0; +#endif int no_quit = 0; CharDriverState *serial_hds[MAX_SERIAL_PORTS]; CharDriverState *parallel_hds[MAX_PARALLEL_PORTS]; @@ -276,9 +281,9 @@ void default_ioport_writel(void *opaque, void init_ioports(void) { - ioport_opaque = malloc(MAX_IOPORTS * sizeof(*ioport_opaque)); - ioport_read_table = malloc(3 * MAX_IOPORTS * sizeof(**ioport_read_table)); - ioport_write_table = malloc(3 * MAX_IOPORTS * sizeof(**ioport_write_table)); + ioport_opaque = calloc(MAX_IOPORTS, sizeof(*ioport_opaque)); + ioport_read_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_read_table)); + ioport_write_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_write_table)); } /* size is the word size in byte */ @@ -6272,6 +6277,12 @@ void qemu_system_powerdown_request(void) cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT); } +static void qemu_sighup_handler(int signal) +{ + fprintf(stderr, "Received SIGHUP, terminating.\n"); + exit(0); +} + void main_loop_wait(int timeout) { IOHandlerRecord *ioh; @@ -6482,6 +6493,9 @@ void help(void) #ifdef CONFIG_SDL "-no-quit disable SDL window close capability\n" #endif +#ifdef CONFIG_OPENGL + "-disable-opengl disable OpenGL rendering, using SDL" +#endif #ifdef TARGET_I386 "-no-fd-bootchk disable boot signature checking for floppy disks\n" #endif @@ -6660,6 +6674,7 @@ enum { QEMU_OPTION_loadvm, QEMU_OPTION_full_screen, QEMU_OPTION_no_quit, + QEMU_OPTION_disable_opengl, QEMU_OPTION_pidfile, QEMU_OPTION_no_kqemu, QEMU_OPTION_kernel_kqemu, @@ -6757,6 +6772,7 @@ const QEMUOption qemu_options[] = { #ifdef CONFIG_SDL { "no-quit", 0, QEMU_OPTION_no_quit }, #endif + { "disable-opengl", 0, QEMU_OPTION_disable_opengl }, { "pidfile", HAS_ARG, QEMU_OPTION_pidfile }, { "win2k-hack", 0, QEMU_OPTION_win2k_hack }, { "usbdevice", HAS_ARG, QEMU_OPTION_usbdevice }, @@ -7528,6 +7544,9 @@ int main(int argc, char **argv) no_quit = 1; break; #endif + case QEMU_OPTION_disable_opengl: + opengl_enabled = 0; + break; case QEMU_OPTION_pidfile: create_pidfile(optarg); break; @@ -7854,7 +7873,7 @@ int main(int argc, char **argv) xenstore_write_vncport(vnc_display_port); } else { #if defined(CONFIG_SDL) - sdl_display_init(ds, full_screen); + sdl_display_init(ds, full_screen, opengl_enabled); #elif defined(CONFIG_COCOA) cocoa_display_init(ds, full_screen); #else @@ -7980,7 +7999,7 @@ int main(int argc, char **argv) #ifndef CONFIG_STUBDOM /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */ - signal(SIGHUP, SIG_DFL); + signal(SIGHUP, qemu_sighup_handler); sigemptyset(&set); sigaddset(&set, SIGTERM); sigaddset(&set, SIGHUP); diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/vl.h Thu Mar 20 12:35:40 2008 -0600 @@ -982,7 +982,7 @@ void isa_cirrus_vga_init(DisplayState *d unsigned long vga_ram_offset, int vga_ram_size); /* sdl.c */ -void sdl_display_init(DisplayState *ds, int full_screen); +void sdl_display_init(DisplayState *ds, int full_screen, int opengl_enable); /* cocoa.m */ void cocoa_display_init(DisplayState *ds, int full_screen); diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/vnc.c --- a/tools/ioemu/vnc.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/vnc.c Thu Mar 20 12:35:40 2008 -0600 @@ -369,20 +369,21 @@ static void vnc_dpy_resize(DisplayState VncState *vs = ds->opaque; int o; - if (linesize != w * vs->depth) - ds->shared_buf = 0; - if (!ds->shared_buf) { + ds->linesize = w * vs->depth; if (allocated) - ds->data = realloc(ds->data, w * h * vs->depth); + ds->data = realloc(ds->data, h * ds->linesize); else - ds->data = malloc(w * h * vs->depth); + ds->data = malloc(h * ds->linesize); allocated = 1; - } else if (allocated) { - free(ds->data); - allocated = 0; - } - vs->old_data = realloc(vs->old_data, w * h * vs->depth); + } else { + ds->linesize = linesize; + if (allocated) { + free(ds->data); + allocated = 0; + } + } + vs->old_data = realloc(vs->old_data, h * ds->linesize); vs->dirty_row = realloc(vs->dirty_row, h * sizeof(vs->dirty_row[0])); vs->update_row = realloc(vs->update_row, h * sizeof(vs->dirty_row[0])); @@ -399,7 +400,6 @@ static void vnc_dpy_resize(DisplayState size_changed = ds->width != w || ds->height != h; ds->width = w; ds->height = h; - ds->linesize = w * vs->depth; if (vs->csock != -1 && vs->has_resize && size_changed) { vs->width = ds->width; vs->height = ds->height; @@ -2494,6 +2494,7 @@ void vnc_display_init(DisplayState *ds) vs->ds->width = 640; vs->ds->height = 400; + vs->ds->linesize = 640 * 4; vnc_dpy_colourdepth(vs->ds, 24); } diff -r 8c921adf4833 -r 42f6c206c951 tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/ioemu/xenstore.c Thu Mar 20 12:35:40 2008 -0600 @@ -81,7 +81,7 @@ static void waitForDevice(char *fn) #define DIRECT_PCI_STR_LEN 160 char direct_pci_str[DIRECT_PCI_STR_LEN]; -void xenstore_parse_domain_config(int domid) +void xenstore_parse_domain_config(int hvm_domid) { char **e = NULL; char *buf = NULL, *path; @@ -100,7 +100,7 @@ void xenstore_parse_domain_config(int do return; } - path = xs_get_domain_path(xsh, domid); + path = xs_get_domain_path(xsh, hvm_domid); if (path == NULL) { fprintf(logfile, "xs_get_domain_path() error\n"); goto out; diff -r 8c921adf4833 -r 42f6c206c951 tools/libxc/ia64/dom_fw_acpi.c --- a/tools/libxc/ia64/dom_fw_acpi.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/libxc/ia64/dom_fw_acpi.c Thu Mar 20 12:35:40 2008 -0600 @@ -1,4 +1,5 @@ #include <inttypes.h> +#include "xc_dom_ia64_util.h" #include <xen/acpi.h> uint8_t diff -r 8c921adf4833 -r 42f6c206c951 tools/libxc/ia64/xc_dom_ia64_util.h --- a/tools/libxc/ia64/xc_dom_ia64_util.h Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/libxc/ia64/xc_dom_ia64_util.h Thu Mar 20 12:35:40 2008 -0600 @@ -23,4 +23,8 @@ extern int shared_info_ia64(struct xc_do #define FW_MEM_BASE 0xff000000UL #define FW_MEM_SIZE 0x01000000UL +#ifdef __XEN_TOOLS__ +/* Necessary for including the acpi header chain when not in kernel context */ +typedef struct { } spinlock_t; +#endif #endif /* XC_IA64_DOM_IA64_UTIL_H */ diff -r 8c921adf4833 -r 42f6c206c951 tools/libxc/xc_domain_save.c --- a/tools/libxc/xc_domain_save.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/libxc/xc_domain_save.c Thu Mar 20 12:35:40 2008 -0600 @@ -123,36 +123,6 @@ static inline int count_bits ( int nr, v for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ ) count += hweight32(*p); return count; -} - -static inline int permute( int i, int nr, int order_nr ) -{ - /* Need a simple permutation function so that we scan pages in a - pseudo random order, enabling us to get a better estimate of - the domain's page dirtying rate as we go (there are often - contiguous ranges of pfns that have similar behaviour, and we - want to mix them up. */ - - /* e.g. nr->oder 15->4 16->4 17->5 */ - /* 512MB domain, 128k pages, order 17 */ - - /* - QPONMLKJIHGFEDCBA - QPONMLKJIH - GFEDCBA - */ - - /* - QPONMLKJIHGFEDCBA - EDCBA - QPONM - LKJIHGF - */ - - do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } - while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ - - return i; } static uint64_t tv_to_us(struct timeval *new) @@ -859,9 +829,6 @@ int xc_domain_save(int xc_handle, int io /* base of the region in which domain memory is mapped */ unsigned char *region_base = NULL; - /* power of 2 order of p2m_size */ - int order_nr; - /* bitmap of pages: - that should be sent this iteration (unless later marked as skip); - to skip this iteration because already dirty; @@ -970,11 +937,6 @@ int xc_domain_save(int xc_handle, int io /* pretend we sent all the pages last iteration */ sent_last_iter = p2m_size; - - /* calculate the power of 2 order of p2m_size, e.g. - 15->4 16->4 17->5 */ - for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) - continue; /* Setup to_send / to_fix and to_skip bitmaps */ to_send = malloc(BITMAP_SIZE); @@ -1126,7 +1088,7 @@ int xc_domain_save(int xc_handle, int io (batch < MAX_BATCH_SIZE) && (N < p2m_size); N++ ) { - int n = permute(N, p2m_size, order_nr); + int n = N; if ( debug ) { diff -r 8c921adf4833 -r 42f6c206c951 tools/libxc/xc_linux.c --- a/tools/libxc/xc_linux.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/libxc/xc_linux.c Thu Mar 20 12:35:40 2008 -0600 @@ -4,7 +4,7 @@ * Use is subject to license terms. * * xc_gnttab functions: - * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx> + * Copyright (c) 2007-2008, D G Murray <Derek.Murray@xxxxxxxxxxxx> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -505,6 +505,19 @@ int xc_gnttab_munmap(int xcg_handle, return 0; } +int xc_gnttab_set_max_grants(int xcg_handle, + uint32_t count) +{ + struct ioctl_gntdev_set_max_grants set_max; + int rc; + + set_max.count = count; + if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_SET_MAX_GRANTS, &set_max)) ) + return rc; + + return 0; +} + /* * Local variables: * mode: C diff -r 8c921adf4833 -r 42f6c206c951 tools/libxc/xc_minios.c --- a/tools/libxc/xc_minios.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/libxc/xc_minios.c Thu Mar 20 12:35:40 2008 -0600 @@ -165,14 +165,6 @@ static int port_alloc(int xce_handle) { return i; } -static void poke_port(int xce_handle, evtchn_port_t port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - printk("poking port %d\n", port); - synch_set_bit(port, &s->evtchn_pending[0]); - xc_evtchn_unmask(xce_handle, port); -} - static void evtchn_handler(evtchn_port_t port, struct pt_regs *regs, void *data) { int xce_handle = (intptr_t) data; @@ -211,6 +203,7 @@ evtchn_port_or_error_t xc_evtchn_bind_un } files[xce_handle].evtchn.ports[i].bound = 1; files[xce_handle].evtchn.ports[i].port = port; + unmask_evtchn(port); return port; } @@ -235,9 +228,7 @@ evtchn_port_or_error_t xc_evtchn_bind_in } files[xce_handle].evtchn.ports[i].bound = 1; files[xce_handle].evtchn.ports[i].port = local_port; -/* Poke port on start: HVM won't send an event for the very first request since - * we were not ready yet */ - poke_port(xce_handle, local_port); + unmask_evtchn(local_port); return local_port; } @@ -275,6 +266,7 @@ evtchn_port_or_error_t xc_evtchn_bind_vi } files[xce_handle].evtchn.ports[i].bound = 1; files[xce_handle].evtchn.ports[i].port = port; + unmask_evtchn(port); return port; } diff -r 8c921adf4833 -r 42f6c206c951 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/libxc/xenctrl.h Thu Mar 20 12:35:40 2008 -0600 @@ -6,7 +6,7 @@ * Copyright (c) 2003-2004, K A Fraser. * * xc_gnttab functions: - * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx> + * Copyright (c) 2007-2008, D G Murray <Derek.Murray@xxxxxxxxxxxx> */ #ifndef XENCTRL_H @@ -832,6 +832,20 @@ int xc_gnttab_munmap(int xcg_handle, void *start_address, uint32_t count); +/* + * Sets the maximum number of grants that may be mapped by the given instance + * to @count. + * + * N.B. This function must be called after opening the handle, and before any + * other functions are invoked on it. + * + * N.B. When variable-length grants are mapped, fragmentation may be observed, + * and it may not be possible to satisfy requests up to the maximum number + * of grants. + */ +int xc_gnttab_set_max_grants(int xcg_handle, + uint32_t count); + int xc_hvm_set_pci_intx_level( int xc_handle, domid_t dom, uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx, diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/XendConfig.py Thu Mar 20 12:35:40 2008 -0600 @@ -123,19 +123,49 @@ XENAPI_CFG_TO_LEGACY_CFG = { LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(XENAPI_CFG_TO_LEGACY_CFG) -# Platform configuration keys. -XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'loader', 'display', - 'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor', - 'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl', - 'soundhw','stdvga', 'usb', 'usbdevice', 'hpet', 'vnc', - 'vncconsole', 'vncdisplay', 'vnclisten', 'timer_mode', - 'vncpasswd', 'vncunused', 'xauthority', 'pci', 'vhpt', - 'guest_os_type', 'hap'] +# Platform configuration keys and their types. +XENAPI_PLATFORM_CFG_TYPES = { + 'acpi': int, + 'apic': int, + 'boot': str, + 'device_model': str, + 'loader': str, + 'display' : str, + 'fda': str, + 'fdb': str, + 'keymap': str, + 'isa' : int, + 'localtime': int, + 'monitor': int, + 'nographic': int, + 'pae' : int, + 'rtc_timeoffset': int, + 'serial': str, + 'sdl': int, + 'opengl': int, + 'soundhw': str, + 'stdvga': int, + 'usb': int, + 'usbdevice': str, + 'hpet': int, + 'vnc': int, + 'vncconsole': int, + 'vncdisplay': int, + 'vnclisten': str, + 'timer_mode': int, + 'vncpasswd': str, + 'vncunused': int, + 'xauthority': str, + 'pci': str, + 'vhpt': int, + 'guest_os_type': str, + 'hap': int, +} # Xen API console 'other_config' keys. XENAPI_CONSOLE_OTHER_CFG = ['vncunused', 'vncdisplay', 'vnclisten', 'vncpasswd', 'type', 'display', 'xauthority', - 'keymap'] + 'keymap', 'opengl'] # List of XendConfig configuration keys that have no direct equivalent # in the old world. @@ -405,6 +435,12 @@ class XendConfig(dict): self['platform']['device_model'] = xen.util.auxbin.pathTo("qemu-dm") if self.is_hvm(): + if 'timer_mode' not in self['platform']: + self['platform']['timer_mode'] = 0 + if 'rtc_timeoffset' not in self['platform']: + self['platform']['rtc_timeoffset'] = 0 + if 'hpet' not in self['platform']: + self['platform']['hpet'] = 0 if 'loader' not in self['platform']: # Old configs may have hvmloader set as PV_kernel param if self.has_key('PV_kernel') and re.search('hvmloader', self['PV_kernel']): @@ -534,7 +570,7 @@ class XendConfig(dict): cfg['platform']['localtime'] = localtime # Compatibility hack -- can go soon. - for key in XENAPI_PLATFORM_CFG: + for key in XENAPI_PLATFORM_CFG_TYPES.keys(): val = sxp.child_value(sxp_cfg, "platform_" + key, None) if val is not None: self['platform'][key] = val @@ -713,7 +749,7 @@ class XendConfig(dict): self.update_with_image_sxp(image_sxp) # Convert Legacy HVM parameters to Xen API configuration - for key in XENAPI_PLATFORM_CFG: + for key in XENAPI_PLATFORM_CFG_TYPES.keys(): if key in cfg: self['platform'][key] = cfg[key] @@ -763,7 +799,7 @@ class XendConfig(dict): if image_type != 'hvm' and image_type != 'linux': self['platform']['image_type'] = image_type - for key in XENAPI_PLATFORM_CFG: + for key in XENAPI_PLATFORM_CFG_TYPES.keys(): val = sxp.child_value(image_sxp, key, None) if val is not None and val != '': self['platform'][key] = val @@ -847,6 +883,19 @@ class XendConfig(dict): self[key] = type_conv(val) else: self[key] = val + + # XenAPI defines platform as a string-string map. If platform + # configuration exists, convert values to appropriate type. + if 'platform' in xapi: + for key, val in xapi['platform'].items(): + type_conv = XENAPI_PLATFORM_CFG_TYPES.get(key) + if type_conv is None: + key = key.lower() + type_conv = XENAPI_PLATFORM_CFG_TYPES.get(key) + if callable(type_conv): + self['platform'][key] = type_conv(val) + else: + self['platform'][key] = val self['vcpus_params']['weight'] = \ int(self['vcpus_params'].get('weight', 256)) @@ -942,6 +991,7 @@ class XendConfig(dict): dev_type, dev_cfg = self['devices'][dev_uuid] is_bootable = dev_cfg.get('bootable', 0) config.append(['bootable', int(is_bootable)]) + config.append(['VDI', dev_cfg.get('VDI', '')]) sxpr.append(['device', config]) @@ -1276,6 +1326,12 @@ class XendConfig(dict): target['devices'][dev_uuid] = ('vfb', dev_info) target['console_refs'].append(dev_uuid) + # if console is rfb, set device_model ensuring qemu + # is invoked for pvfb services + if 'device_model' not in target['platform']: + target['platform']['device_model'] = \ + xen.util.auxbin.pathTo("qemu-dm") + # Finally, if we are a pvfb, we need to make a vkbd # as well that is not really exposed to Xen API vkbd_uuid = uuid.createString() @@ -1407,6 +1463,23 @@ class XendConfig(dict): config = cfg_sxp dev_type, dev_info = self['devices'][dev_uuid] + + if dev_type == 'pci': # Special case for pci + pci_devs = [] + for pci_dev in sxp.children(config, 'dev'): + pci_dev_info = {} + for opt_val in pci_dev[1:]: + try: + opt, val = opt_val + pci_dev_info[opt] = val + except TypeError: + pass + pci_devs.append(pci_dev_info) + self['devices'][dev_uuid] = (dev_type, + {'devs': pci_devs, + 'uuid': dev_uuid}) + return True + for opt_val in config[1:]: try: opt, val = opt_val @@ -1519,7 +1592,7 @@ class XendConfig(dict): if self.has_key('PV_args') and self['PV_args']: image.append(['args', self['PV_args']]) - for key in XENAPI_PLATFORM_CFG: + for key in XENAPI_PLATFORM_CFG_TYPES.keys(): if key in self['platform']: image.append([key, self['platform'][key]]) @@ -1555,7 +1628,7 @@ class XendConfig(dict): self['PV_ramdisk'] = sxp.child_value(image_sxp, 'ramdisk','') self['PV_args'] = kernel_args - for key in XENAPI_PLATFORM_CFG: + for key in XENAPI_PLATFORM_CFG_TYPES.keys(): val = sxp.child_value(image_sxp, key, None) if val is not None and val != '': self['platform'][key] = val diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Mar 20 12:35:40 2008 -0600 @@ -558,18 +558,17 @@ class XendDomainInfo: count += 1 - def pci_device_create(self, dev_config): - log.debug("XendDomainInfo.pci_device_create: %s" % scrub_password(dev_config)) + def hvm_pci_device_create(self, dev_config): + log.debug("XendDomainInfo.hvm_pci_device_create: %s" + % scrub_password(dev_config)) if not self.info.is_hvm(): - raise VmError("only HVM guest support pci attach") + raise VmError("hvm_pci_device_create called on non-HVM guest") #all the PCI devs share one conf node devid = '0' - dev_type = sxp.name(dev_config) - new_devs = sxp.child_value(dev_config, 'devs') - new_dev = new_devs[0] + new_dev = dev_config['devs'][0] dev_info = self._getDeviceInfo_pci(devid)#from self.info['devices'] #check conflict before trigger hotplug event @@ -611,35 +610,6 @@ class XendDomainInfo: new_dev['vslt']) self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str) - # update the virtual pci slot - vslt = xstransact.Read("/local/domain/0/device-model/%i/parameter" - % self.getDomid()) - new_dev['vslt'] = vslt - - if dev_info is None: - # create a new one from scrach - dev_cfg_sxp = [dev_type, - ['dev', - ['domain', new_dev['domain']], - ['bus', new_dev['bus']], - ['slot', new_dev['slot']], - ['func', new_dev['func']], - ['vslt', new_dev['vslt']] - ]] - dev_uuid = self.info.device_add(dev_type, cfg_sxp = dev_cfg_sxp) - dev_config_dict = self.info['devices'][dev_uuid][1] - try: - dev_config_dict['devid'] = devid = \ - self._createDevice(dev_type, dev_config_dict) - self._waitForDevice(dev_type, devid) - except VmError, ex: - raise ex - else: - # update the pci config to add the new dev - pci_devs.extend(new_devs) - self._reconfigureDevice('pci', devid, pci_conf) - - return self.getDeviceController('pci').sxpr(devid) def device_create(self, dev_config): """Create a new device. @@ -649,11 +619,6 @@ class XendDomainInfo: """ log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config)) dev_type = sxp.name(dev_config) - - if dev_type == 'pci': - rc = self.pci_device_create(dev_config) - return rc - dev_uuid = self.info.device_add(dev_type, cfg_sxp = dev_config) dev_config_dict = self.info['devices'][dev_uuid][1] log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config_dict)) @@ -676,6 +641,151 @@ class XendDomainInfo: xen.xend.XendDomain.instance().managed_config_save(self) return self.getDeviceController(dev_type).sxpr(devid) + def pci_convert_sxp_to_dict(self, dev_sxp): + """Convert pci device sxp to dict + @param dev_sxp: device configuration + @type dev_sxp: SXP object (parsed config) + @return: dev_config + @rtype: dictionary + """ + # In reconfigure phase, config of PCI device looks like below: + # + # sxp: + # [device, [pci, [dev, [domain, '0x0'], [bus, '0x0'], [slot, '0x0'], + # [func, '0x0'], [vslt, '0x0']], + # [state, 'Initialising']]] + # + # dict: + # {devs: [{domain: '0x0', bus: '0x0', slot: '0x0', func: '0x0', + # vslt: '0x0'}], + # states: ['Initialising']} + # + # state 'Initialising' means the device is being attached. + # state 'Closing' means the device is being detached. + + dev_config = {} + pci_devs = [] + for pci_dev in sxp.children(dev_sxp, 'dev'): + pci_dev_info = {} + for opt_val in pci_dev[1:]: + try: + opt, val = opt_val + pci_dev_info[opt] = val + except TypeError: + pass + pci_devs.append(pci_dev_info) + dev_config['devs'] = pci_devs + pci_states = [] + for pci_state in sxp.children(dev_sxp, 'state'): + try: + pci_states.append(pci_state[1]) + except IndexError: + raise XendError("Error reading state while parsing pci sxp") + dev_config['states'] = pci_states + + return dev_config + + def pci_device_configure(self, dev_sxp, devid = 0): + """Configure an existing pci device. + + @param dev_sxp: device configuration + @type dev_sxp: SXP object (parsed config) + @param devid: device id + @type devid: int + @return: Returns True if successfully updated device + @rtype: boolean + """ + log.debug("XendDomainInfo.pci_device_configure: %s" + % scrub_password(dev_sxp)) + + dev_class = sxp.name(dev_sxp) + + if dev_class != 'pci': + return False + + pci_state = sxp.child_value(dev_sxp, 'state') + existing_dev_info = self._getDeviceInfo_pci(devid) + + if existing_dev_info is None and pci_state != 'Initialising': + raise XendError("Cannot detach when pci platform does not exist") + + pci_dev = sxp.children(dev_sxp, 'dev')[0] + dev_config = self.pci_convert_sxp_to_dict(dev_sxp) + dev = dev_config['devs'][0] + + # Do HVM specific processing + if self.info.is_hvm(): + if pci_state == 'Initialising': + # HVM PCI device attachment + self.hvm_pci_device_create(dev_config) + # Update vslt + vslt = xstransact.Read("/local/domain/0/device-model/%i/parameter" + % self.getDomid()) + dev['vslt'] = vslt + for n in sxp.children(pci_dev): + if(n[0] == 'vslt'): + n[1] = vslt + else: + # HVM PCI device detachment + existing_dev_uuid = sxp.child_value(existing_dev_info, 'uuid') + existing_pci_conf = self.info['devices'][existing_dev_uuid][1] + existing_pci_devs = existing_pci_conf['devs'] + vslt = '0x0' + for x in existing_pci_devs: + if ( int(x['domain'], 16) == int(dev['domain'], 16) and + int(x['bus'], 16) == int(dev['bus'], 16) and + int(x['slot'], 16) == int(dev['slot'], 16) and + int(x['func'], 16) == int(dev['func'], 16) ): + vslt = x['vslt'] + break + if vslt == '0x0': + raise VmError("Device %04x:%02x:%02x.%02x is not connected" + % (int(dev['domain'],16), int(dev['bus'],16), + int(dev['slot'],16), int(dev['func'],16))) + self.hvm_destroyPCIDevice(int(vslt, 16)) + # Update vslt + dev['vslt'] = vslt + for n in sxp.children(pci_dev): + if(n[0] == 'vslt'): + n[1] = vslt + + # If pci platform does not exist, create and exit. + if existing_dev_info is None: + self.device_create(dev_sxp) + return True + + # use DevController.reconfigureDevice to change device config + dev_control = self.getDeviceController(dev_class) + dev_uuid = dev_control.reconfigureDevice(devid, dev_config) + if not self.info.is_hvm(): + # in PV case, wait until backend state becomes connected. + dev_control.waitForDevice_reconfigure(devid) + num_devs = dev_control.cleanupDevice(devid) + + # update XendConfig with new device info + if dev_uuid: + new_dev_sxp = dev_control.configuration(devid) + self.info.device_update(dev_uuid, new_dev_sxp) + + # If there is no device left, destroy pci and remove config. + if num_devs == 0: + if self.info.is_hvm(): + self.destroyDevice('pci', devid, True) + del self.info['devices'][dev_uuid] + platform = self.info['platform'] + orig_dev_num = len(platform['pci']) + # TODO: can use this to keep some info to ask high level + # management tools to hot insert a new passthrough dev + # after migration + if orig_dev_num != 0: + #platform['pci'] = ["%dDEVs" % orig_dev_num] + platform['pci'] = [] + else: + self.destroyDevice('pci', devid) + del self.info['devices'][dev_uuid] + + return True + def device_configure(self, dev_sxp, devid = None): """Configure an existing device. @@ -690,6 +800,10 @@ class XendDomainInfo: # convert device sxp to a dict dev_class = sxp.name(dev_sxp) dev_config = {} + + if dev_class == 'pci': + return self.pci_device_configure(dev_sxp) + for opt_val in dev_sxp[1:]: try: dev_config[opt_val[0]] = opt_val[1] @@ -714,11 +828,11 @@ class XendDomainInfo: for devclass in XendDevices.valid_devices(): self.getDeviceController(devclass).waitForDevices() - def destroyPCIDevice(self, vslot): - log.debug("destroyPCIDevice called %s", vslot) + def hvm_destroyPCIDevice(self, vslot): + log.debug("hvm_destroyPCIDevice called %s", vslot) if not self.info.is_hvm(): - raise VmError("only HVM guest support pci detach") + raise VmError("hvm_destroyPCIDevice called on non-HVM guest") #all the PCI devs share one conf node devid = '0' @@ -744,34 +858,15 @@ class XendDomainInfo: raise VmError("Device @ vslot 0x%x do not support hotplug." % (vslot)) bdf_str = "%s:%s:%s.%s" % (x['domain'], x['bus'], x['slot'], x['func']) - log.info("destroyPCIDevice:%s:%s!", x, bdf_str) + log.info("hvm_destroyPCIDevice:%s:%s!", x, bdf_str) self.image.signalDeviceModel('pci-rem', 'pci-removed', bdf_str) - - if pci_len > 1: - del pci_conf['devs'][devnum] - self._reconfigureDevice('pci', devid, pci_conf) - else: - self.getDeviceController('pci').destroyDevice(devid, True) - del self.info['devices'][dev_uuid] - platform = self.info['platform'] - orig_dev_num = len(platform['pci']) - - #need remove the pci config - #TODO:can use this to keep some info to ask high level management tools to hot insert a new passthrough dev after migration - if orig_dev_num != 0: -# platform['pci'] = ["%dDEVs" % orig_dev_num] - platform['pci'] = [] return 0 def destroyDevice(self, deviceClass, devid, force = False, rm_cfg = False): log.debug("XendDomainInfo.destroyDevice: deviceClass = %s, device = %s", deviceClass, devid) - - if deviceClass == 'dpci': - rc = self.destroyPCIDevice(devid) - return rc if rm_cfg: # Convert devid to device number. A device number is @@ -1967,36 +2062,44 @@ class XendDomainInfo: for v in range(0, self.info['VCPUs_max']): xc.vcpu_setaffinity(self.domid, v, self.info['cpus']) else: + def find_relaxed_node(node_list): + import sys + if node_list is None: + node_list = range(0, info['nr_nodes']) + nodeload = [0] + nodeload = nodeload * info['nr_nodes'] + from xen.xend import XendDomain + doms = XendDomain.instance().list('all') + for dom in doms: + cpuinfo = dom.getVCPUInfo() + for vcpu in sxp.children(cpuinfo, 'vcpu'): + def vinfo(n, t): + return t(sxp.child_value(vcpu, n)) + cpumap = vinfo('cpumap', list) + for i in node_list: + node_cpumask = info['node_to_cpu'][i] + for j in node_cpumask: + if j in cpumap: + nodeload[i] += 1 + break + for i in node_list: + if len(info['node_to_cpu'][i]) > 0: + nodeload[i] = int(nodeload[i] / len(info['node_to_cpu'][i])) + else: + nodeload[i] = sys.maxint + index = nodeload.index( min(nodeload) ) + return index + info = xc.physinfo() if info['nr_nodes'] > 1: node_memory_list = info['node_to_memory'] needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024 candidate_node_list = [] for i in range(0, info['nr_nodes']): - if node_memory_list[i] >= needmem: + if node_memory_list[i] >= needmem and len(info['node_to_cpu'][i]) > 0: candidate_node_list.append(i) - if candidate_node_list is None or len(candidate_node_list) == 1: - index = node_memory_list.index( max(node_memory_list) ) - cpumask = info['node_to_cpu'][index] - else: - nodeload = [0] - nodeload = nodeload * info['nr_nodes'] - from xen.xend import XendDomain - doms = XendDomain.instance().list('all') - for dom in doms: - cpuinfo = dom.getVCPUInfo() - for vcpu in sxp.children(cpuinfo, 'vcpu'): - def vinfo(n, t): - return t(sxp.child_value(vcpu, n)) - cpumap = vinfo('cpumap', list) - for i in candidate_node_list: - node_cpumask = info['node_to_cpu'][i] - for j in node_cpumask: - if j in cpumap: - nodeload[i] += 1 - break - index = nodeload.index( min(nodeload) ) - cpumask = info['node_to_cpu'][index] + index = find_relaxed_node(candidate_node_list) + cpumask = info['node_to_cpu'][index] for v in range(0, self.info['VCPUs_max']): xc.vcpu_setaffinity(self.domid, v, cpumask) @@ -2104,7 +2207,7 @@ class XendDomainInfo: self.state_updated.acquire() try: while self._stateGet() in (DOM_STATE_RUNNING,DOM_STATE_PAUSED): - self.state_updated.wait() + self.state_updated.wait(timeout=1.0) finally: self.state_updated.release() diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/image.py Thu Mar 20 12:35:40 2008 -0600 @@ -90,6 +90,7 @@ class ImageHandler: ("image/kernel", self.kernel), ("image/cmdline", self.cmdline), ("image/ramdisk", self.ramdisk)) + self.vm.permissionsVm("image/cmdline", { 'dom': self.vm.getDomid(), 'read': True } ) self.device_model = vmConfig['platform'].get('device_model') @@ -201,6 +202,7 @@ class ImageHandler: vnc_config = {} has_vnc = int(vmConfig['platform'].get('vnc', 0)) != 0 has_sdl = int(vmConfig['platform'].get('sdl', 0)) != 0 + opengl = 1 for dev_uuid in vmConfig['console_refs']: dev_type, dev_info = vmConfig['devices'][dev_uuid] if dev_type == 'vfb': @@ -208,6 +210,7 @@ class ImageHandler: if vfb_type == 'sdl': self.display = dev_info.get('display', {}) self.xauthority = dev_info.get('xauthority', {}) + opengl = int(dev_info.get('opengl', opengl)) has_sdl = True else: vnc_config = dev_info.get('other_config', {}) @@ -262,7 +265,8 @@ class ImageHandler: elif has_sdl: # SDL is default in QEMU. - pass + if int(vmConfig['platform'].get('opengl', opengl)) != 1 : + ret.append('-disable-opengl') else: ret.append('-nographic') @@ -580,7 +584,8 @@ class HVMImageHandler(ImageHandler): ret.append("nic,vlan=%d,macaddr=%s,model=%s" % (nics, mac, model)) ret.append("-net") - ret.append("tap,vlan=%d,bridge=%s" % (nics, bridge)) + ret.append("tap,vlan=%d,ifname=tap%d.%d,bridge=%s" % + (nics, self.vm.getDomid(), nics-1, bridge)) return ret diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/server/BlktapController.py --- a/tools/python/xen/xend/server/BlktapController.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/server/BlktapController.py Thu Mar 20 12:35:40 2008 -0600 @@ -13,7 +13,9 @@ blktap_disk_types = [ 'vmdk', 'ram', 'qcow', - 'qcow2' + 'qcow2', + + 'ioemu' ] class BlktapController(BlkifController): diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/server/DevController.py Thu Mar 20 12:35:40 2008 -0600 @@ -51,6 +51,8 @@ xenbusState = { 'Connected' : 4, 'Closing' : 5, 'Closed' : 6, + 'Reconfiguring': 7, + 'Reconfigured' : 8, } xoptions = XendOptions.instance() @@ -88,6 +90,8 @@ class DevController: (devid, back, front) = self.getDeviceDetails(config) if devid is None: return 0 + + self.setupDevice(config) (backpath, frontpath) = self.addStoreEntries(config, devid, back, front) @@ -198,6 +202,15 @@ class DevController: if status == Timeout: raise VmError("Device %s (%s) could not be disconnected. " % + (devid, self.deviceClass)) + + def waitForDevice_reconfigure(self, devid): + log.debug("Waiting for %s - reconfigureDevice.", devid) + + (status, err) = self.waitForBackend_reconfigure(devid) + + if status == Timeout: + raise VmError("Device %s (%s) could not be reconfigured. " % (devid, self.deviceClass)) @@ -325,6 +338,11 @@ class DevController: """ raise NotImplementedError() + + def setupDevice(self, config): + """ Setup device from config. + """ + return def migrate(self, deviceConfig, network, dst, step, domName): """ Migration of a device. The 'network' parameter indicates @@ -569,6 +587,22 @@ class DevController: return result['status'] + def waitForBackend_reconfigure(self, devid): + frontpath = self.frontendPath(devid) + backpath = xstransact.Read(frontpath, "backend") + if backpath: + statusPath = backpath + '/' + "state" + ev = Event() + result = { 'status': Timeout } + + xswatch(statusPath, xenbusStatusCallback, ev, result) + + ev.wait(DEVICE_CREATE_TIMEOUT) + + return (result['status'], None) + else: + return (Missing, None) + def backendPath(self, backdom, devid): """Construct backend path given the backend domain and device id. @@ -634,3 +668,19 @@ def deviceDestroyCallback(statusPath, ev ev.set() return 0 + + +def xenbusStatusCallback(statusPath, ev, result): + log.debug("xenbusStatusCallback %s.", statusPath) + + status = xstransact.Read(statusPath) + + if status == str(xenbusState['Connected']): + result['status'] = Connected + else: + return 1 + + log.debug("xenbusStatusCallback %d.", result['status']) + + ev.set() + return 0 diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/server/SrvDomain.py Thu Mar 20 12:35:40 2008 -0600 @@ -121,10 +121,10 @@ class SrvDomain(SrvDir): def op_pincpu(self, _, req): fn = FormFn(self.xd.domain_pincpu, - [['dom', 'int'], + [['dom', 'str'], ['vcpu', 'str'], ['cpumap', 'str']]) - val = fn(req.args, {'dom': self.dom.domid}) + val = fn(req.args, {'dom': self.dom.getName()}) return val def op_cpu_sedf_get(self, _, req): @@ -147,17 +147,17 @@ class SrvDomain(SrvDir): def op_domain_sched_credit_get(self, _, req): fn = FormFn(self.xd.domain_sched_credit_get, - [['dom', 'int']]) - val = fn(req.args, {'dom': self.dom.domid}) + [['dom', 'str']]) + val = fn(req.args, {'dom': self.dom.getName()}) return val def op_domain_sched_credit_set(self, _, req): fn = FormFn(self.xd.domain_sched_credit_set, - [['dom', 'int'], + [['dom', 'str'], ['weight', 'int'], ['cap', 'int']]) - val = fn(req.args, {'dom': self.dom.domid}) + val = fn(req.args, {'dom': self.dom.getName()}) return val def op_maxmem_set(self, _, req): @@ -187,7 +187,9 @@ class SrvDomain(SrvDir): def op_device_destroy(self, _, req): return self.call(self.dom.destroyDevice, [['type', 'str'], - ['dev', 'str']], + ['dev', 'str'], + ['force', 'int'], + ['rm_cfg', 'int']], req) def op_device_configure(self, _, req): diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/server/pciif.py Thu Mar 20 12:35:40 2008 -0600 @@ -24,7 +24,7 @@ from xen.xend.XendError import VmError from xen.xend.XendError import VmError from xen.xend.XendLogging import log -from xen.xend.server.DevController import DevController +from xen.xend.server.DevController import DevController, xenbusState import xen.lowlevel.xc @@ -44,6 +44,15 @@ while not (t&1): t>>=1 PAGE_SHIFT+=1 +def parse_hex(val): + try: + if isinstance(val, types.StringTypes): + return int(val, 16) + else: + return val + except ValueError: + return None + class PciController(DevController): def __init__(self, vm): @@ -52,15 +61,6 @@ class PciController(DevController): def getDeviceDetails(self, config): """@see DevController.getDeviceDetails""" - def parse_hex(val): - try: - if isinstance(val, types.StringTypes): - return int(val, 16) - else: - return val - except ValueError: - return None - back = {} pcidevid = 0 vslots = "" @@ -74,7 +74,6 @@ class PciController(DevController): if vslt is not None: vslots = vslots + vslt + ";" - self.setupDevice(domain, bus, slot, func) back['dev-%i' % pcidevid] = "%04x:%02x:%02x.%02x" % \ (domain, bus, slot, func) pcidevid += 1 @@ -86,27 +85,80 @@ class PciController(DevController): back['uuid'] = config.get('uuid','') return (0, back, {}) + def reconfigureDevice(self, _, config): """@see DevController.reconfigureDevice""" - #currently only support config changes by hot insert/remove pass-through dev - #delete all the devices in xenstore - (devid, new_back, new_front) = self.getDeviceDetails(config) - num_devs = self.readBackend(devid, 'num_devs') - for i in range(int(num_devs)): - self.removeBackend(devid, 'dev-%d' % i) - self.removeBackend(devid, 'num_devs') - - #create new devices config - num_devs = new_back['num_devs'] - for i in range(int(num_devs)): - dev_no = 'dev-%d' % i - self.writeBackend(devid, dev_no, new_back[dev_no]) - self.writeBackend(devid, 'num_devs', num_devs) - - if new_back['vslots'] is not None: - self.writeBackend(devid, 'vslots', new_back['vslots']) - - return new_back.get('uuid') + (devid, back, front) = self.getDeviceDetails(config) + num_devs = int(back['num_devs']) + states = config.get('states', []) + + old_vslots = self.readBackend(devid, 'vslots') + if old_vslots is None: + old_vslots = '' + num_olddevs = int(self.readBackend(devid, 'num_devs')) + + for i in range(num_devs): + try: + dev = back['dev-%i' % i] + state = states[i] + except: + raise XendError('Error reading config') + + if state == 'Initialising': + # PCI device attachment + for j in range(num_olddevs): + if dev == self.readBackend(devid, 'dev-%i' % j): + raise XendError('Device %s is already connected.' % dev) + log.debug('Attaching PCI device %s.' % dev) + (domain, bus, slotfunc) = dev.split(':') + (slot, func) = slotfunc.split('.') + domain = parse_hex(domain) + bus = parse_hex(bus) + slot = parse_hex(slot) + func = parse_hex(func) + self.setupOneDevice(domain, bus, slot, func) + + self.writeBackend(devid, 'dev-%i' % (num_olddevs + i), dev) + self.writeBackend(devid, 'state-%i' % (num_olddevs + i), + str(xenbusState['Initialising'])) + self.writeBackend(devid, 'num_devs', str(num_olddevs + i + 1)) + + # Update vslots + if back['vslots'] is not None: + vslots = old_vslots + back['vslots'] + self.writeBackend(devid, 'vslots', vslots) + + elif state == 'Closing': + # PCI device detachment + found = False + for j in range(num_olddevs): + if dev == self.readBackend(devid, 'dev-%i' % j): + found = True + log.debug('Detaching device %s' % dev) + self.writeBackend(devid, 'state-%i' % j, + str(xenbusState['Closing'])) + if not found: + raise XendError('Device %s is not connected' % dev) + + # Update vslots + if back['vslots'] is not None: + vslots = old_vslots + for vslt in back['vslots'].split(';'): + if vslt != '': + vslots = vslots.replace(vslt + ';', '', 1) + if vslots == '': + self.removeBackend(devid, 'vslots') + else: + self.writeBackend(devid, 'vslots', vslots) + + else: + raise XendError('Error configuring device %s: invalid state %s' + % (dev,state)) + + self.writeBackend(devid, 'state', str(xenbusState['Reconfiguring'])) + + return self.readBackend(devid, 'uuid') + def getDeviceConfiguration(self, devid, transaction = None): result = DevController.getDeviceConfiguration(self, devid, transaction) @@ -125,7 +177,7 @@ class PciController(DevController): pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + - r"(?P<func>[0-9a-fA-F]{1,2})", dev_config) + r"(?P<func>[0-7]{1,2})$", dev_config) if pci_match!=None: pci_dev_info = pci_match.groupdict() @@ -136,7 +188,10 @@ class PciController(DevController): #append vslot info if vslots is not None: - dev_dict['vslt'] = slot_list[i] + try: + dev_dict['vslt'] = slot_list[i] + except IndexError: + dev_dict['vslt'] = '0x0' pci_devs.append(dev_dict) @@ -171,7 +226,7 @@ class PciController(DevController): return sxpr - def setupDevice(self, domain, bus, slot, func): + def setupOneDevice(self, domain, bus, slot, func): """ Attach I/O resources for device to frontend domain """ fe_domid = self.getDomid() @@ -225,6 +280,116 @@ class PciController(DevController): raise VmError(('pci: failed to configure irq on device '+ '%s - errno=%d')%(dev.name,rc)) + def setupDevice(self, config): + """Setup devices from config + """ + for pci_config in config.get('devs', []): + domain = parse_hex(pci_config.get('domain', 0)) + bus = parse_hex(pci_config.get('bus', 0)) + slot = parse_hex(pci_config.get('slot', 0)) + func = parse_hex(pci_config.get('func', 0)) + self.setupOneDevice(domain, bus, slot, func) + + return + + def cleanupOneDevice(self, domain, bus, slot, func): + """ Detach I/O resources for device from frontend domain + """ + fe_domid = self.getDomid() + + try: + dev = PciDevice(domain, bus, slot, func) + except Exception, e: + raise VmError("pci: failed to locate device and "+ + "parse it's resources - "+str(e)) + + if dev.driver!='pciback': + raise VmError(("pci: PCI Backend does not own device "+ \ + "%s\n"+ \ + "See the pciback.hide kernel "+ \ + "command-line parameter or\n"+ \ + "bind your slot/device to the PCI backend using sysfs" \ + )%(dev.name)) + + for (start, size) in dev.ioports: + log.debug('pci: disabling ioport 0x%x/0x%x'%(start,size)) + rc = xc.domain_ioport_permission(domid = fe_domid, first_port = start, + nr_ports = size, allow_access = False) + if rc<0: + raise VmError(('pci: failed to configure I/O ports on device '+ + '%s - errno=%d')%(dev.name,rc)) + + for (start, size) in dev.iomem: + # Convert start/size from bytes to page frame sizes + start_pfn = start>>PAGE_SHIFT + # Round number of pages up to nearest page boundary (if not on one) + nr_pfns = (size+(PAGE_SIZE-1))>>PAGE_SHIFT + + log.debug('pci: disabling iomem 0x%x/0x%x pfn 0x%x/0x%x'% \ + (start,size,start_pfn,nr_pfns)) + rc = xc.domain_iomem_permission(domid = fe_domid, + first_pfn = start_pfn, + nr_pfns = nr_pfns, + allow_access = False) + if rc<0: + raise VmError(('pci: failed to configure I/O memory on device '+ + '%s - errno=%d')%(dev.name,rc)) + + if dev.irq>0: + log.debug('pci: disabling irq %d'%dev.irq) + rc = xc.domain_irq_permission(domid = fe_domid, pirq = dev.irq, + allow_access = False) + if rc<0: + raise VmError(('pci: failed to configure irq on device '+ + '%s - errno=%d')%(dev.name,rc)) + + def cleanupDevice(self, devid): + """ Detach I/O resources for device and cleanup xenstore nodes + after reconfigure. + + @param devid: The device ID + @type devid: int + @return: Return the number of devices connected + @rtype: int + """ + num_devs = int(self.readBackend(devid, 'num_devs')) + new_num_devs = 0 + for i in range(num_devs): + state = int(self.readBackend(devid, 'state-%i' % i)) + if state == xenbusState['Closing']: + # Detach I/O resources. + dev = self.readBackend(devid, 'dev-%i' % i) + (domain, bus, slotfunc) = dev.split(':') + (slot, func) = slotfunc.split('.') + domain = parse_hex(domain) + bus = parse_hex(bus) + slot = parse_hex(slot) + func = parse_hex(func) + # In HVM case, I/O resources are disabled in ioemu. + self.cleanupOneDevice(domain, bus, slot, func) + # Remove xenstore nodes. + self.removeBackend(devid, 'dev-%i' % i) + self.removeBackend(devid, 'vdev-%i' % i) + self.removeBackend(devid, 'state-%i' % i) + else: + if new_num_devs != i: + tmpdev = self.readBackend(devid, 'dev-%i' % i) + self.writeBackend(devid, 'dev-%i' % new_num_devs, tmpdev) + self.removeBackend(devid, 'dev-%i' % i) + tmpvdev = self.readBackend(devid, 'vdev-%i' % i) + if tmpvdev is not None: + self.writeBackend(devid, 'vdev-%i' % new_num_devs, + tmpvdev) + self.removeBackend(devid, 'vdev-%i' % i) + tmpstate = self.readBackend(devid, 'state-%i' % i) + self.writeBackend(devid, 'state-%i' % new_num_devs, tmpstate) + self.removeBackend(devid, 'state-%i' % i) + new_num_devs = new_num_devs + 1 + + self.writeBackend(devid, 'num_devs', str(new_num_devs)) + + return new_num_devs + def waitForBackend(self,devid): return (0, "ok - no hotplug") diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xend/server/vfbif.py --- a/tools/python/xen/xend/server/vfbif.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xend/server/vfbif.py Thu Mar 20 12:35:40 2008 -0600 @@ -7,7 +7,7 @@ import os CONFIG_ENTRIES = ['type', 'vncdisplay', 'vnclisten', 'vncpasswd', 'vncunused', 'display', 'xauthority', 'keymap', - 'uuid', 'location', 'protocol'] + 'uuid', 'location', 'protocol', 'opengl'] class VfbifController(DevController): """Virtual frame buffer controller. Handles all vfb devices for a domain. diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xm/create.py Thu Mar 20 12:35:40 2008 -0600 @@ -304,7 +304,7 @@ gopts.var('pci', val='BUS:DEV.FUNC', gopts.var('pci', val='BUS:DEV.FUNC', fn=append_value, default=[], use="""Add a PCI device to a domain, using given params (in hex). - For example 'pci=c0:02.1a'. + For example 'pci=c0:02.1'. The option may be repeated to add more than one pci device.""") gopts.var('ioports', val='FROM[-TO]', @@ -319,7 +319,7 @@ gopts.var('irq', val='IRQ', For example 'irq=7'. This option may be repeated to add more than one IRQ.""") -gopts.var('vfb', val="type={vnc,sdl},vncunused=1,vncdisplay=N,vnclisten=ADDR,display=DISPLAY,xauthority=XAUTHORITY,vncpasswd=PASSWORD", +gopts.var('vfb', val="type={vnc,sdl},vncunused=1,vncdisplay=N,vnclisten=ADDR,display=DISPLAY,xauthority=XAUTHORITY,vncpasswd=PASSWORD,opengl=1", fn=append_value, default=[], use="""Make the domain a framebuffer backend. The backend type should be either sdl or vnc. @@ -330,7 +330,7 @@ gopts.var('vfb', val="type={vnc,sdl},vnc default password. For type=sdl, a viewer will be started automatically using the given DISPLAY and XAUTHORITY, which default to the current user's - ones.""") + ones. OpenGL will be used by default unless opengl is set to 0.""") gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT," + \ "backend=DOM,vifname=NAME,rate=RATE,model=MODEL,accel=ACCEL", @@ -504,6 +504,10 @@ gopts.var('sdl', val='', fn=set_value, default=None, use="""Should the device model use SDL?""") +gopts.var('opengl', val='', + fn=set_value, default=None, + use="""Enable\Disable OpenGL""") + gopts.var('display', val='DISPLAY', fn=set_value, default=None, use="X11 display to use") @@ -641,7 +645,7 @@ def configure_vfbs(config_devs, vals): d['type'] = 'sdl' for (k,v) in d.iteritems(): if not k in [ 'vnclisten', 'vncunused', 'vncdisplay', 'display', - 'xauthority', 'type', 'vncpasswd' ]: + 'xauthority', 'type', 'vncpasswd', 'opengl' ]: err("configuration option %s unknown to vfbs" % k) config.append([k,v]) if not d.has_key("keymap"): @@ -745,7 +749,7 @@ def configure_hvm(config_image, vals): 'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten', 'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor', 'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet', - 'guest_os_type', 'hap'] + 'guest_os_type', 'hap', 'opengl'] for a in args: if a in vals.__dict__ and vals.__dict__[a] is not None: @@ -840,7 +844,7 @@ def preprocess_pci(vals): pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \ r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \ r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \ - r"(?P<func>[0-9a-fA-F])", pci_dev_str) + r"(?P<func>[0-7])$", pci_dev_str) if pci_match!=None: pci_dev_info = pci_match.groupdict('0') try: diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xm/main.py Thu Mar 20 12:35:40 2008 -0600 @@ -175,11 +175,11 @@ SUBCOMMAND_HELP = { 'vnet-delete' : ('<VnetId>', 'Delete a Vnet.'), 'vnet-list' : ('[-l|--long]', 'List Vnets.'), 'vtpm-list' : ('<Domain> [--long]', 'List virtual TPM devices.'), - 'pci-attach ' : ('<Domain> <dom> <bus> <slot> <func> [virtual slot]', + 'pci-attach' : ('<Domain> <domain:bus:slot.func> [virtual slot]', 'Insert a new pass-through pci device.'), - 'pci-detach ' : ('<Domain> <virtual slot>', + 'pci-detach' : ('<Domain> <domain:bus:slot.func>', 'Remove a domain\'s pass-through pci device.'), - 'pci-list' : ('<Domain>', + 'pci-list' : ('<Domain>', 'List pass-through pci devices for a domain.'), # security @@ -626,8 +626,11 @@ class Shell(cmd.Cmd): def preloop(self): cmd.Cmd.preloop(self) - import readline - readline.set_completer_delims(' ') + try: + import readline + readline.set_completer_delims(' ') + except ImportError: + pass def default(self, line): words = shlex.split(line) @@ -2229,29 +2232,37 @@ def xm_network_attach(args): vif.append(vif_param) server.xend.domain.device_create(dom, vif) -def parse_pci_configuration(args): +def parse_pci_configuration(args, state): dom = args[0] - - if len(args) == 6: - vslt = args[5] + pci_dev_str = args[1] + if len(args) == 3: + vslt = args[2] else: vslt = '0x0' #chose a free virtual PCI slot - - pci = ['pci', - ['devs', - [{'domain': "0x%x" % int(args[1], 16), - 'bus': "0x%x" % int(args[2], 16), - 'slot': "0x%x" % int(args[3], 16), - 'func': "0x%x" % int(args[4], 16), - 'vslt': "0x%x" % int(vslt, 16)}] - ]] + pci=['pci'] + pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \ + r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \ + r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \ + r"(?P<func>[0-7])$", pci_dev_str) + if pci_match == None: + raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt)) + pci_dev_info = pci_match.groupdict('0') + try: + pci.append(['dev', ['domain', '0x'+ pci_dev_info['domain']], \ + ['bus', '0x'+ pci_dev_info['bus']], + ['slot', '0x'+ pci_dev_info['slot']], + ['func', '0x'+ pci_dev_info['func']], + ['vslt', '0x%x' % int(vslt, 16)]]) + except: + raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt)) + pci.append(['state', state]) return (dom, pci) def xm_pci_attach(args): - arg_check(args, 'pci-attach', 5, 6) - (dom, pci) = parse_pci_configuration(args) - server.xend.domain.device_create(dom, pci) + arg_check(args, 'pci-attach', 2, 3) + (dom, pci) = parse_pci_configuration(args, 'Initialising') + server.xend.domain.device_configure(dom, pci) def detach(args, deviceClass): rm_cfg = True @@ -2316,12 +2327,11 @@ def xm_network_detach(args): arg_check(args, 'network-detach', 2, 3) detach(args, 'vif') - def xm_pci_detach(args): arg_check(args, 'pci-detach', 2) - dom = args[0] - dev = args[1] - server.xend.domain.destroyDevice(dom, 'dpci', dev) + (dom, pci) = parse_pci_configuration(args, 'Closing') + server.xend.domain.device_configure(dom, pci) + def xm_vnet_list(args): xenapi_unsupported() diff -r 8c921adf4833 -r 42f6c206c951 tools/python/xen/xm/xenapi_create.py --- a/tools/python/xen/xm/xenapi_create.py Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/python/xen/xm/xenapi_create.py Thu Mar 20 12:35:40 2008 -0600 @@ -810,7 +810,7 @@ class sxp2xml: get_child_by_name(image, "vxauthority", "127.0.0.1"), document)) console.appendChild(self.mk_other_config( - "vncpasswd", get_child_by_name(image, "vncpasswd", ""), + "opengl", get_child_by_name(image, "opengl", "1"), document)) consoles.append(console) diff -r 8c921adf4833 -r 42f6c206c951 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/xcutils/xc_save.c Thu Mar 20 12:35:40 2008 -0600 @@ -17,6 +17,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <err.h> #include <xs.h> #include <xenctrl.h> diff -r 8c921adf4833 -r 42f6c206c951 tools/xenstat/xentop/xentop.c --- a/tools/xenstat/xentop/xentop.c Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/xenstat/xentop/xentop.c Thu Mar 20 12:35:40 2008 -0600 @@ -1121,6 +1121,7 @@ int main(int argc, char **argv) do { gettimeofday(&curtime, NULL); top(); + fflush(stdout); oldtime = curtime; if ((!loop) && !(--iterations)) break; diff -r 8c921adf4833 -r 42f6c206c951 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/tools/xenstore/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -1,21 +1,16 @@ XEN_ROOT=../.. XEN_ROOT=../.. include $(XEN_ROOT)/tools/Rules.mk -XEN_LIBXC = $(XEN_ROOT)/tools/libxc MAJOR = 3.0 MINOR = 0 -PROFILE=#-pg -BASECFLAGS=-Werror +CFLAGS += -Werror +CFLAGS += -I. +CFLAGS += $(CFLAGS_libxenctrl) + # Make gcc generate dependencies. -BASECFLAGS += -Wp,-MD,.$(@F).d -PROG_DEP = .*.d -BASECFLAGS+= $(PROFILE) -BASECFLAGS+= $(CFLAGS_libxenctrl) -BASECFLAGS+= -I. - -CFLAGS += $(BASECFLAGS) -LDFLAGS += $(PROFILE) +CFLAGS += -Wp,-MD,.$(@F).d +DEP = .*.d CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm xenstore-chmod CLIENTS += xenstore-write @@ -45,22 +40,22 @@ endif endif xenstored: $(XENSTORED_OBJS) - $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) $(LDFLAGS_libxenctrl) $(SOCKET_LIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDFLAGS_libxenctrl) $(SOCKET_LIBS) -o $@ $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so - $(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore $(SOCKET_LIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< -L. -lxenstore $(SOCKET_LIBS) -o $@ $(CLIENTS_OBJS): xenstore_%.o: xenstore_client.c $(COMPILE.c) -DCLIENT_$(*F) -o $@ $< xenstore-control: xenstore_control.o libxenstore.so - $(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore $(SOCKET_LIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< -L. -lxenstore $(SOCKET_LIBS) -o $@ xenstore-ls: xsls.o libxenstore.so - $(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore $(SOCKET_LIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< -L. -lxenstore $(SOCKET_LIBS) -o $@ xs_tdb_dump: xs_tdb_dump.o utils.o tdb.o talloc.o - $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ libxenstore.so: libxenstore.so.$(MAJOR) ln -sf $< $@ @@ -71,7 +66,7 @@ libxenstore.so.$(MAJOR).$(MINOR): xs.opi $(CC) $(CFLAGS) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenstore.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $^ $(SOCKET_LIBS) -lpthread libxenstore.a: xs.o xs_lib.o - $(AR) rcs libxenstore.a $^ + $(AR) rcs $@ $^ .PHONY: clean clean: @@ -79,7 +74,7 @@ clean: rm -f xenstored xs_random xs_stress xs_crashme rm -f xs_tdb_dump xenstore-control xenstore-ls rm -f $(CLIENTS) - $(RM) $(PROG_DEP) + $(RM) $(DEP) .PHONY: TAGS TAGS: @@ -108,7 +103,7 @@ install: all $(INSTALL_DATA) xs.h $(DESTDIR)$(INCLUDEDIR) $(INSTALL_DATA) xs_lib.h $(DESTDIR)$(INCLUDEDIR) --include $(PROG_DEP) +-include $(DEP) # never delete any intermediate files. .SECONDARY: diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/ia64/linux-xen/setup.c --- a/xen/arch/ia64/linux-xen/setup.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/ia64/linux-xen/setup.c Thu Mar 20 12:35:40 2008 -0600 @@ -358,7 +358,7 @@ acpi_oem_console_setup(void) extern struct ns16550_defaults ns16550_com1; efi_system_table_t *systab; efi_config_table_t *tables; - struct acpi20_table_rsdp *rsdp = NULL; + struct acpi_table_rsdp *rsdp = NULL; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; int i; @@ -378,16 +378,17 @@ acpi_oem_console_setup(void) for (i = 0 ; i < (int)systab->nr_tables && !rsdp ; i++) { if (efi_guidcmp(tables[i].guid, ACPI_20_TABLE_GUID) == 0) rsdp = - (struct acpi20_table_rsdp *)__va(tables[i].table); - } - - if (!rsdp || strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) + (struct acpi_table_rsdp *)__va(tables[i].table); + } + + if (!rsdp || + strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) return -ENODEV; - xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address); + xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_physical_address); hdr = &xsdt->header; - if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) + if (strncmp(hdr->signature, ACPI_SIG_XSDT, sizeof(ACPI_SIG_XSDT) - 1)) return -ENODEV; /* Looking for Fujitsu PRIMEQUEST systems */ diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/ia64/xen/acpi.c --- a/xen/arch/ia64/xen/acpi.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/ia64/xen/acpi.c Thu Mar 20 12:35:40 2008 -0600 @@ -74,7 +74,7 @@ acpi_get_sysname (void) { /* #ifdef CONFIG_IA64_GENERIC */ unsigned long rsdp_phys; - struct acpi20_table_rsdp *rsdp; + struct acpi_table_rsdp *rsdp; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; @@ -84,15 +84,15 @@ acpi_get_sysname (void) return "dig"; } - rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys); - if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) { + rsdp = (struct acpi_table_rsdp *) __va(rsdp_phys); + if (strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) { printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); return "dig"; } - xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address); + xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_physical_address); hdr = &xsdt->header; - if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) { + if (strncmp(hdr->signature, ACPI_SIG_XSDT, sizeof(ACPI_SIG_XSDT) - 1)) { printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); return "dig"; } @@ -356,14 +356,14 @@ acpi_parse_madt (unsigned long phys_addr #ifdef CONFIG_ITANIUM has_8259 = 1; /* Firmware on old Itanium systems is broken */ #else - has_8259 = acpi_madt->flags.pcat_compat; + has_8259 = acpi_madt->flags & ACPI_MADT_PCAT_COMPAT; #endif iosapic_system_init(has_8259); /* Get base address of IPI Message Block */ - if (acpi_madt->lapic_address) - ipi_base_addr = (void __iomem *) ioremap(acpi_madt->lapic_address, 0); + if (acpi_madt->address) + ipi_base_addr = (void __iomem *)ioremap(acpi_madt->address, 0); printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr); @@ -416,7 +416,7 @@ acpi_numa_slit_init (struct acpi_table_s u32 len; len = sizeof(struct acpi_table_header) + 8 - + slit->localities * slit->localities; + + slit->locality_count * slit->locality_count; if (slit->header.length != len) { printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", len, slit->header.length); @@ -519,21 +519,24 @@ acpi_numa_arch_fixup (void) for (i = 0; i < srat_num_cpus; i++) node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; - printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes); - printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); - - if (!slit_table) return; + printk(KERN_INFO "Number of logical nodes in system = %d\n", + numnodes); + printk(KERN_INFO "Number of memory chunks in system = %d\n", + num_node_memblks); + + if (!slit_table) + return; memset(numa_slit, -1, sizeof(numa_slit)); - for (i=0; i<slit_table->localities; i++) { + for (i = 0; i < slit_table->locality_count; i++) { if (!pxm_bit_test(i)) continue; node_from = pxm_to_nid_map[i]; - for (j=0; j<slit_table->localities; j++) { + for (j=0; j < slit_table->locality_count; j++) { if (!pxm_bit_test(j)) continue; node_to = pxm_to_nid_map[j]; node_distance(node_from, node_to) = - slit_table->entry[i*slit_table->localities + j]; + slit_table->entry[i * slit_table->locality_count + j]; } } @@ -560,7 +563,7 @@ acpi_parse_fadt (unsigned long phys_addr acpi_parse_fadt (unsigned long phys_addr, unsigned long size) { struct acpi_table_header *fadt_header; - struct fadt_descriptor_rev2 *fadt; + struct acpi_table_fadt *fadt; if (!phys_addr || !size) return -EINVAL; @@ -569,16 +572,16 @@ acpi_parse_fadt (unsigned long phys_addr if (fadt_header->revision != 3) return -ENODEV; /* Only deal with ACPI 2.0 FADT */ - fadt = (struct fadt_descriptor_rev2 *) fadt_header; - - if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER)) + fadt = (struct acpi_table_fadt *) fadt_header; + + if (!(fadt->boot_flags & BAF_8042_KEYBOARD_CONTROLLER)) acpi_kbd_controller_present = 0; - if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) + if (fadt->boot_flags & BAF_LEGACY_DEVICES) acpi_legacy_devices = 1; #if 0 - acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE); + acpi_register_gsi(fadt->sci_interrupt, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE); #endif return 0; } diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/ia64/xen/dom_fw_common.c --- a/xen/arch/ia64/xen/dom_fw_common.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/ia64/xen/dom_fw_common.c Thu Mar 20 12:35:40 2008 -0600 @@ -208,18 +208,18 @@ print_md(efi_memory_desc_t *md) } struct fake_acpi_tables { - struct acpi20_table_rsdp rsdp; - struct xsdt_descriptor_rev2 xsdt; + struct acpi_table_rsdp rsdp; + struct acpi_table_xsdt xsdt; uint64_t madt_ptr; - struct fadt_descriptor_rev2 fadt; - struct facs_descriptor_rev2 facs; + struct acpi_table_fadt fadt; + struct acpi_table_facs facs; struct acpi_table_header dsdt; uint8_t aml[8 + 11 * MAX_VIRT_CPUS]; struct acpi_table_madt madt; struct acpi_table_lsapic lsapic[MAX_VIRT_CPUS]; - uint8_t pm1a_evt_blk[4]; - uint8_t pm1a_cnt_blk[1]; - uint8_t pm_tmr_blk[4]; + uint8_t pm1a_event_block[4]; + uint8_t pm1a_control_block[1]; + uint8_t pm_timer_block[4]; }; #define ACPI_TABLE_MPA(field) \ FW_ACPI_BASE_PADDR + offsetof(struct fake_acpi_tables, field); @@ -228,10 +228,10 @@ void void dom_fw_fake_acpi(domain_t *d, struct fake_acpi_tables *tables) { - struct acpi20_table_rsdp *rsdp = &tables->rsdp; - struct xsdt_descriptor_rev2 *xsdt = &tables->xsdt; - struct fadt_descriptor_rev2 *fadt = &tables->fadt; - struct facs_descriptor_rev2 *facs = &tables->facs; + struct acpi_table_rsdp *rsdp = &tables->rsdp; + struct acpi_table_xsdt *xsdt = &tables->xsdt; + struct acpi_table_fadt *fadt = &tables->fadt; + struct acpi_table_facs *facs = &tables->facs; struct acpi_table_header *dsdt = &tables->dsdt; struct acpi_table_madt *madt = &tables->madt; struct acpi_table_lsapic *lsapic = tables->lsapic; @@ -245,34 +245,37 @@ dom_fw_fake_acpi(domain_t *d, struct fak memset(tables, 0, sizeof(struct fake_acpi_tables)); /* setup XSDT (64bit version of RSDT) */ - memcpy(xsdt->signature, XSDT_SIG, sizeof(xsdt->signature)); + memcpy(xsdt->header.signature, ACPI_SIG_XSDT, + sizeof(xsdt->header.signature)); /* XSDT points to both the FADT and the MADT, so add one entry */ - xsdt->length = sizeof(struct xsdt_descriptor_rev2) + sizeof(uint64_t); - xsdt->revision = 1; - memcpy(xsdt->oem_id, "XEN", 3); - memcpy(xsdt->oem_table_id, "Xen/ia64", 8); - memcpy(xsdt->asl_compiler_id, "XEN", 3); - xsdt->asl_compiler_revision = xen_ia64_version(d); + xsdt->header.length = sizeof(struct acpi_table_xsdt) + sizeof(uint64_t); + xsdt->header.revision = 1; + memcpy(xsdt->header.oem_id, "XEN", 3); + memcpy(xsdt->header.oem_table_id, "Xen/ia64", 8); + memcpy(xsdt->header.asl_compiler_id, "XEN", 3); + xsdt->header.asl_compiler_revision = xen_ia64_version(d); xsdt->table_offset_entry[0] = ACPI_TABLE_MPA(fadt); tables->madt_ptr = ACPI_TABLE_MPA(madt); - xsdt->checksum = generate_acpi_checksum(xsdt, xsdt->length); + xsdt->header.checksum = generate_acpi_checksum(xsdt, + xsdt->header.length); /* setup FADT */ - memcpy(fadt->signature, FADT_SIG, sizeof(fadt->signature)); - fadt->length = sizeof(struct fadt_descriptor_rev2); - fadt->revision = FADT2_REVISION_ID; - memcpy(fadt->oem_id, "XEN", 3); - memcpy(fadt->oem_table_id, "Xen/ia64", 8); - memcpy(fadt->asl_compiler_id, "XEN", 3); - fadt->asl_compiler_revision = xen_ia64_version(d); - - memcpy(facs->signature, FACS_SIG, sizeof(facs->signature)); + memcpy(fadt->header.signature, ACPI_SIG_FADT, + sizeof(fadt->header.signature)); + fadt->header.length = sizeof(struct acpi_table_fadt); + fadt->header.revision = FADT2_REVISION_ID; + memcpy(fadt->header.oem_id, "XEN", 3); + memcpy(fadt->header.oem_table_id, "Xen/ia64", 8); + memcpy(fadt->header.asl_compiler_id, "XEN", 3); + fadt->header.asl_compiler_revision = xen_ia64_version(d); + + memcpy(facs->signature, ACPI_SIG_FACS, sizeof(facs->signature)); facs->version = 1; - facs->length = sizeof(struct facs_descriptor_rev2); - - fadt->xfirmware_ctrl = ACPI_TABLE_MPA(facs); + facs->length = sizeof(struct acpi_table_facs); + + fadt->Xfacs = ACPI_TABLE_MPA(facs); fadt->Xdsdt = ACPI_TABLE_MPA(dsdt); /* @@ -280,34 +283,35 @@ dom_fw_fake_acpi(domain_t *d, struct fak * from sanity checks in the ACPI CA. Emulate required ACPI hardware * registers in system memory. */ - fadt->pm1_evt_len = 4; - fadt->xpm1a_evt_blk.address_space_id = ACPI_ADR_SPACE_SYSTEM_MEMORY; - fadt->xpm1a_evt_blk.register_bit_width = 8; - fadt->xpm1a_evt_blk.address = ACPI_TABLE_MPA(pm1a_evt_blk); - fadt->pm1_cnt_len = 1; - fadt->xpm1a_cnt_blk.address_space_id = ACPI_ADR_SPACE_SYSTEM_MEMORY; - fadt->xpm1a_cnt_blk.register_bit_width = 8; - fadt->xpm1a_cnt_blk.address = ACPI_TABLE_MPA(pm1a_cnt_blk); - fadt->pm_tm_len = 4; - fadt->xpm_tmr_blk.address_space_id = ACPI_ADR_SPACE_SYSTEM_MEMORY; - fadt->xpm_tmr_blk.register_bit_width = 8; - fadt->xpm_tmr_blk.address = ACPI_TABLE_MPA(pm_tmr_blk); - - fadt->checksum = generate_acpi_checksum(fadt, fadt->length); + fadt->pm1_event_length = 4; + fadt->xpm1a_event_block.space_id = ACPI_ADR_SPACE_SYSTEM_MEMORY; + fadt->xpm1a_event_block.bit_width = 8; + fadt->xpm1a_event_block.address = ACPI_TABLE_MPA(pm1a_event_block); + fadt->pm1_control_length = 1; + fadt->xpm1a_control_block.space_id = ACPI_ADR_SPACE_SYSTEM_MEMORY; + fadt->xpm1a_control_block.bit_width = 8; + fadt->xpm1a_control_block.address = ACPI_TABLE_MPA(pm1a_control_block); + fadt->pm_timer_length = 4; + fadt->xpm_timer_block.space_id = ACPI_ADR_SPACE_SYSTEM_MEMORY; + fadt->xpm_timer_block.bit_width = 8; + fadt->xpm_timer_block.address = ACPI_TABLE_MPA(pm_timer_block); + + fadt->header.checksum = generate_acpi_checksum(fadt, + fadt->header.length); /* setup RSDP */ - memcpy(rsdp->signature, RSDP_SIG, strlen(RSDP_SIG)); + memcpy(rsdp->signature, ACPI_SIG_RSDP, strlen(ACPI_SIG_RSDP)); memcpy(rsdp->oem_id, "XEN", 3); rsdp->revision = 2; /* ACPI 2.0 includes XSDT */ - rsdp->length = sizeof(struct acpi20_table_rsdp); - rsdp->xsdt_address = ACPI_TABLE_MPA(xsdt); + rsdp->length = sizeof(struct acpi_table_rsdp); + rsdp->xsdt_physical_address = ACPI_TABLE_MPA(xsdt); rsdp->checksum = generate_acpi_checksum(rsdp, ACPI_RSDP_CHECKSUM_LENGTH); - rsdp->ext_checksum = generate_acpi_checksum(rsdp, rsdp->length); + rsdp->extended_checksum = generate_acpi_checksum(rsdp, rsdp->length); /* setup DSDT with trivial namespace. */ - memcpy(dsdt->signature, DSDT_SIG, strlen(DSDT_SIG)); + memcpy(dsdt->signature, ACPI_SIG_DSDT, strlen(ACPI_SIG_DSDT)); dsdt->revision = 1; memcpy(dsdt->oem_id, "XEN", 3); memcpy(dsdt->oem_table_id, "Xen/ia64", 8); @@ -346,7 +350,8 @@ dom_fw_fake_acpi(domain_t *d, struct fak dsdt->checksum = generate_acpi_checksum(dsdt, dsdt->length); /* setup MADT */ - memcpy(madt->header.signature, APIC_SIG, sizeof(madt->header.signature)); + memcpy(madt->header.signature, ACPI_SIG_MADT, + sizeof(madt->header.signature)); madt->header.revision = 2; memcpy(madt->header.oem_id, "XEN", 3); memcpy(madt->header.oem_table_id, "Xen/ia64", 8); diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/ia64/xen/pcdp.c --- a/xen/arch/ia64/xen/pcdp.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/ia64/xen/pcdp.c Thu Mar 20 12:35:40 2008 -0600 @@ -43,7 +43,7 @@ pcdp_hp_irq_fixup(struct pcdp *pcdp, str { efi_system_table_t *systab; efi_config_table_t *tables; - struct acpi20_table_rsdp *rsdp = NULL; + struct acpi_table_rsdp *rsdp = NULL; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; int i; @@ -66,16 +66,17 @@ pcdp_hp_irq_fixup(struct pcdp *pcdp, str for (i = 0 ; i < (int)systab->nr_tables && !rsdp ; i++) { if (efi_guidcmp(tables[i].guid, ACPI_20_TABLE_GUID) == 0) rsdp = - (struct acpi20_table_rsdp *)__va(tables[i].table); - } - - if (!rsdp || strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) - return; - - xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address); + (struct acpi_table_rsdp *)__va(tables[i].table); + } + + if (!rsdp || + strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) + return; + + xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_physical_address); hdr = &xsdt->header; - if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) + if (strncmp(hdr->signature, ACPI_SIG_XSDT, sizeof(ACPI_SIG_XSDT) - 1)) return; /* Sanity check; are we still looking at HP firmware tables? */ diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/acpi/boot.c --- a/xen/arch/x86/acpi/boot.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/acpi/boot.c Thu Mar 20 12:35:40 2008 -0600 @@ -63,7 +63,7 @@ int acpi_strict; int acpi_strict; EXPORT_SYMBOL(acpi_strict); -acpi_interrupt_flags acpi_sci_flags __initdata; +u8 acpi_sci_flags __initdata; int acpi_sci_override_gsi __initdata; int acpi_skip_timer_override __initdata; @@ -148,11 +148,11 @@ static int __init acpi_parse_madt(unsign return -ENODEV; } - if (madt->lapic_address) { - acpi_lapic_addr = (u64) madt->lapic_address; + if (madt->address) { + acpi_lapic_addr = (u64) madt->address; printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", - madt->lapic_address); + madt->address); } acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); @@ -342,23 +342,22 @@ static int __init acpi_parse_hpet(unsign return -ENODEV; } - if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) { + if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { printk(KERN_WARNING PREFIX "HPET timers must be located in " "memory.\n"); return -1; } #if 0/*def CONFIG_X86_64*/ - vxtime.hpet_address = hpet_tbl->addr.addrl | - ((long) hpet_tbl->addr.addrh << 32); - - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, vxtime.hpet_address); + vxtime.hpet_address = hpet_tbl->address.address; + + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, vxtime.hpet_address); #else /* X86 */ { extern unsigned long hpet_address; - hpet_address = hpet_tbl->addr.addrl; + hpet_address = hpet_tbl->address.address; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet_address); } @@ -377,11 +376,11 @@ extern u32 pmtmr_ioport; #ifdef CONFIG_ACPI_SLEEP /* Get pm1x_cnt and pm1x_evt information for ACPI sleep */ static void __init -acpi_fadt_parse_sleep_info(struct fadt_descriptor_rev2 *fadt) +acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt) { struct acpi_table_rsdp *rsdp; unsigned long rsdp_phys; - struct facs_descriptor_rev2 *facs = NULL; + struct acpi_table_facs *facs = NULL; uint64_t facs_pa; rsdp_phys = acpi_find_rsdp(); @@ -389,41 +388,46 @@ acpi_fadt_parse_sleep_info(struct fadt_d goto bad; rsdp = __va(rsdp_phys); - if (fadt->revision >= FADT2_REVISION_ID) { - /* Sanity check on FADT Rev. 2 */ - if ((fadt->xpm1a_cnt_blk.address_space_id != - ACPI_ADR_SPACE_SYSTEM_IO) || - (fadt->xpm1b_cnt_blk.address_space_id != - ACPI_ADR_SPACE_SYSTEM_IO) || - (fadt->xpm1a_evt_blk.address_space_id != - ACPI_ADR_SPACE_SYSTEM_IO) || - (fadt->xpm1b_evt_blk.address_space_id != - ACPI_ADR_SPACE_SYSTEM_IO)) - goto bad; - - acpi_sinfo.pm1a_cnt = (uint16_t)fadt->xpm1a_cnt_blk.address; - acpi_sinfo.pm1b_cnt = (uint16_t)fadt->xpm1b_cnt_blk.address; - acpi_sinfo.pm1a_evt = (uint16_t)fadt->xpm1a_evt_blk.address; - acpi_sinfo.pm1b_evt = (uint16_t)fadt->xpm1b_evt_blk.address; - } - - if (!acpi_sinfo.pm1a_cnt) - acpi_sinfo.pm1a_cnt = (uint16_t)fadt->V1_pm1a_cnt_blk; - if (!acpi_sinfo.pm1b_cnt) - acpi_sinfo.pm1b_cnt = (uint16_t)fadt->V1_pm1b_cnt_blk; - if (!acpi_sinfo.pm1a_evt) - acpi_sinfo.pm1a_evt = (uint16_t)fadt->V1_pm1a_evt_blk; - if (!acpi_sinfo.pm1b_evt) - acpi_sinfo.pm1b_evt = (uint16_t)fadt->V1_pm1b_evt_blk; + if (fadt->header.revision >= FADT2_REVISION_ID) { + memcpy(&acpi_sinfo.pm1a_cnt_blk, &fadt->xpm1a_control_block, + sizeof(struct acpi_generic_address)); + memcpy(&acpi_sinfo.pm1b_cnt_blk, &fadt->xpm1b_control_block, + sizeof(struct acpi_generic_address)); + memcpy(&acpi_sinfo.pm1a_evt_blk, &fadt->xpm1a_event_block, + sizeof(struct acpi_generic_address)); + memcpy(&acpi_sinfo.pm1b_evt_blk, &fadt->xpm1b_event_block, + sizeof(struct acpi_generic_address)); + } else { + acpi_sinfo.pm1a_cnt_blk.address = fadt->pm1a_control_block; + acpi_sinfo.pm1b_cnt_blk.address = fadt->pm1b_control_block; + acpi_sinfo.pm1a_evt_blk.address = fadt->pm1a_event_block; + acpi_sinfo.pm1b_evt_blk.address = fadt->pm1b_event_block; + acpi_sinfo.pm1a_cnt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO; + acpi_sinfo.pm1b_cnt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO; + acpi_sinfo.pm1a_evt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO; + acpi_sinfo.pm1b_evt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO; + acpi_sinfo.pm1a_cnt_blk.bit_width = 16; + acpi_sinfo.pm1b_cnt_blk.bit_width = 16; + acpi_sinfo.pm1a_evt_blk.bit_width = 16; + acpi_sinfo.pm1b_evt_blk.bit_width = 16; + acpi_sinfo.pm1a_cnt_blk.bit_offset = 0; + acpi_sinfo.pm1b_cnt_blk.bit_offset = 0; + acpi_sinfo.pm1a_evt_blk.bit_offset = 0; + acpi_sinfo.pm1b_evt_blk.bit_offset = 0; + acpi_sinfo.pm1a_cnt_blk.access_width = 0; + acpi_sinfo.pm1b_cnt_blk.access_width = 0; + acpi_sinfo.pm1a_evt_blk.access_width = 0; + acpi_sinfo.pm1b_evt_blk.access_width = 0; + } /* Now FACS... */ - if (fadt->revision >= FADT2_REVISION_ID) - facs_pa = fadt->xfirmware_ctrl; + if (fadt->header.revision >= FADT2_REVISION_ID) + facs_pa = fadt->Xfacs; else - facs_pa = (uint64_t)fadt->V1_firmware_ctrl; - - facs = (struct facs_descriptor_rev2 *) - __acpi_map_table(facs_pa, sizeof(struct facs_descriptor_rev2)); + facs_pa = (uint64_t)fadt->facs; + + facs = (struct acpi_table_facs *) + __acpi_map_table(facs_pa, sizeof(struct acpi_table_facs)); if (!facs) goto bad; @@ -446,20 +450,23 @@ acpi_fadt_parse_sleep_info(struct fadt_d if ((rsdp->revision < 2) || (facs->length < 32)) { acpi_sinfo.wakeup_vector = facs_pa + - offsetof(struct facs_descriptor_rev2, + offsetof(struct acpi_table_facs, firmware_waking_vector); acpi_sinfo.vector_width = 32; } else { acpi_sinfo.wakeup_vector = facs_pa + - offsetof(struct facs_descriptor_rev2, + offsetof(struct acpi_table_facs, xfirmware_waking_vector); acpi_sinfo.vector_width = 64; } printk(KERN_INFO PREFIX - "ACPI SLEEP INFO: pm1x_cnt[%x,%x], pm1x_evt[%x,%x]\n", - acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt, - acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt); + "ACPI SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], " + "pm1x_evt[%"PRIx64",%"PRIx64"]\n", + acpi_sinfo.pm1a_cnt_blk.address, + acpi_sinfo.pm1b_cnt_blk.address, + acpi_sinfo.pm1a_evt_blk.address, + acpi_sinfo.pm1b_evt_blk.address); printk(KERN_INFO PREFIX " wakeup_vec[%"PRIx64"], vec_size[%x]\n", acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width); @@ -471,9 +478,9 @@ bad: static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { - struct fadt_descriptor_rev2 *fadt = NULL; - - fadt = (struct fadt_descriptor_rev2 *)__acpi_map_table(phys, size); + struct acpi_table_fadt *fadt = NULL; + + fadt = (struct acpi_table_fadt *)__acpi_map_table(phys, size); if (!fadt) { printk(KERN_WARNING PREFIX "Unable to map FADT\n"); return 0; @@ -491,28 +498,28 @@ static int __init acpi_parse_fadt(unsign #ifdef CONFIG_X86_PM_TIMER /* detect the location of the ACPI PM Timer */ - if (fadt->revision >= FADT2_REVISION_ID) { + if (fadt->header.revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ - if (fadt->xpm_tmr_blk.address_space_id == + if (fadt->xpm_timer_block.space_id == ACPI_ADR_SPACE_SYSTEM_IO) - pmtmr_ioport = fadt->xpm_tmr_blk.address; + pmtmr_ioport = fadt->xpm_timer_block.address; /* * "X" fields are optional extensions to the original V1.0 * fields, so we must selectively expand V1.0 fields if the * corresponding X field is zero. */ if (!pmtmr_ioport) - pmtmr_ioport = fadt->V1_pm_tmr_blk; + pmtmr_ioport = fadt->pm_timer_block; } else { /* FADT rev. 1 */ - pmtmr_ioport = fadt->V1_pm_tmr_blk; + pmtmr_ioport = fadt->pm_timer_block; } if (pmtmr_ioport) printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); #endif - acpi_smi_cmd = fadt->smi_cmd; + acpi_smi_cmd = fadt->smi_command; acpi_enable_value = fadt->acpi_enable; acpi_disable_value = fadt->acpi_disable; diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/acpi/power.c --- a/xen/arch/x86/acpi/power.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/acpi/power.c Thu Mar 20 12:35:40 2008 -0600 @@ -106,7 +106,7 @@ static void acpi_sleep_prepare(u32 state *(uint64_t *)wakeup_vector_va = tboot_in_measured_env() ? (uint64_t)g_tboot_shared->s3_tb_wakeup_entry : - (uint64_t)bootsym_phys(wakeup_start); + (uint64_t)bootsym_phys(wakeup_start); } } @@ -198,7 +198,7 @@ static long enter_state_helper(void *dat */ int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep) { - if ( !IS_PRIV(current->domain) || !acpi_sinfo.pm1a_cnt ) + if ( !IS_PRIV(current->domain) || !acpi_sinfo.pm1a_cnt_blk.address ) return -EPERM; /* Sanity check */ @@ -222,10 +222,14 @@ int acpi_enter_sleep(struct xenpf_enter_ static int acpi_get_wake_status(void) { - uint16_t val; + uint32_t val; + acpi_status status; /* Wake status is the 15th bit of PM1 status register. (ACPI spec 3.0) */ - val = inw(acpi_sinfo.pm1a_evt) | inw(acpi_sinfo.pm1b_evt); + status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &val); + if ( ACPI_FAILURE(status) ) + return 0; + val &= ACPI_BITMASK_WAKE_STATUS; val >>= ACPI_BITPOSITION_WAKE_STATUS; return val; @@ -243,7 +247,7 @@ static void tboot_sleep(u8 sleep_state) case ACPI_STATE_S3: shutdown_type = TB_SHUTDOWN_S3; g_tboot_shared->s3_k_wakeup_entry = - (uint32_t)bootsym_phys(wakeup_start); + (uint32_t)bootsym_phys(wakeup_start); break; case ACPI_STATE_S4: shutdown_type = TB_SHUTDOWN_S4; @@ -261,6 +265,8 @@ static void tboot_sleep(u8 sleep_state) /* System is really put into sleep state by this stub */ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state) { + acpi_status status; + if ( tboot_in_measured_env() ) { tboot_sleep(sleep_state); @@ -270,9 +276,18 @@ acpi_status asmlinkage acpi_enter_sleep_ ACPI_FLUSH_CPU_CACHE(); - outw((u16)acpi_sinfo.pm1a_cnt_val, acpi_sinfo.pm1a_cnt); - if ( acpi_sinfo.pm1b_cnt ) - outw((u16)acpi_sinfo.pm1b_cnt_val, acpi_sinfo.pm1b_cnt); + status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL, + acpi_sinfo.pm1a_cnt_val); + if ( ACPI_FAILURE(status) ) + return_ACPI_STATUS(AE_ERROR); + + if ( acpi_sinfo.pm1b_cnt_blk.address ) + { + status = acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL, + acpi_sinfo.pm1b_cnt_val); + if ( ACPI_FAILURE(status) ) + return_ACPI_STATUS(AE_ERROR); + } /* Wait until we enter sleep state, and spin until we wake */ while ( !acpi_get_wake_status() ) diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/domain.c Thu Mar 20 12:35:40 2008 -0600 @@ -46,7 +46,7 @@ #include <asm/debugreg.h> #include <asm/msr.h> #include <asm/nmi.h> -#include <asm/iommu.h> +#include <xen/iommu.h> #ifdef CONFIG_COMPAT #include <compat/vcpu.h> #endif @@ -830,7 +830,7 @@ unmap_vcpu_info(struct vcpu *v) mfn = v->arch.vcpu_info_mfn; unmap_domain_page_global(v->vcpu_info); - v->vcpu_info = shared_info_addr(d, vcpu_info[v->vcpu_id]); + v->vcpu_info = (void *)&shared_info(d, vcpu_info[v->vcpu_id]); v->arch.vcpu_info_mfn = INVALID_MFN; put_page_and_type(mfn_to_page(mfn)); @@ -888,7 +888,7 @@ map_vcpu_info(struct vcpu *v, unsigned l */ vcpu_info(v, evtchn_upcall_pending) = 1; for ( i = 0; i < BITS_PER_GUEST_LONG(d); i++ ) - set_bit(i, vcpu_info_addr(v, evtchn_pending_sel)); + set_bit(i, &vcpu_info(v, evtchn_pending_sel)); /* * Only bother to update time for the current vcpu. If we're @@ -961,8 +961,9 @@ arch_do_vcpu_op( if ( !v->domain->is_pinned ) break; - cpu_id.phys_id = (x86_cpu_to_apicid[v->vcpu_id] | - (acpi_get_processor_id(v->vcpu_id) << 8)); + cpu_id.phys_id = + (uint64_t)x86_cpu_to_apicid[v->vcpu_id] | + ((uint64_t)acpi_get_processor_id(v->vcpu_id) << 32); rc = -EFAULT; if ( copy_to_guest(arg, &cpu_id, 1) ) diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/domctl.c Thu Mar 20 12:35:40 2008 -0600 @@ -26,7 +26,7 @@ #include <asm/hvm/cacheattr.h> #include <asm/processor.h> #include <xsm/xsm.h> -#include <asm/iommu.h> +#include <xen/iommu.h> long arch_do_domctl( struct xen_domctl *domctl, diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/Makefile --- a/xen/arch/x86/hvm/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -6,7 +6,6 @@ obj-y += i8254.o obj-y += i8254.o obj-y += intercept.o obj-y += io.o -obj-y += iommu.o obj-y += irq.o obj-y += mtrr.o obj-y += pmtimer.o diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/hvm.c Thu Mar 20 12:35:40 2008 -0600 @@ -59,8 +59,8 @@ struct hvm_function_table hvm_funcs __re struct hvm_function_table hvm_funcs __read_mostly; /* I/O permission bitmap is globally shared by all HVM guests. */ -char __attribute__ ((__section__ (".bss.page_aligned"))) - hvm_io_bitmap[3*PAGE_SIZE]; +unsigned long __attribute__ ((__section__ (".bss.page_aligned"))) + hvm_io_bitmap[3*PAGE_SIZE/BYTES_PER_LONG]; void hvm_enable(struct hvm_function_table *fns) { diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/intercept.c Thu Mar 20 12:35:40 2008 -0600 @@ -30,7 +30,7 @@ #include <asm/current.h> #include <io_ports.h> #include <xen/event.h> -#include <asm/iommu.h> +#include <xen/iommu.h> extern struct hvm_mmio_handler hpet_mmio_handler; extern struct hvm_mmio_handler vlapic_mmio_handler; diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/iommu.c --- a/xen/arch/x86/hvm/iommu.c Fri Mar 14 15:07:45 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,145 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#include <xen/init.h> -#include <xen/irq.h> -#include <xen/spinlock.h> -#include <xen/sched.h> -#include <xen/xmalloc.h> -#include <xen/domain_page.h> -#include <asm/delay.h> -#include <asm/string.h> -#include <asm/mm.h> -#include <asm/iommu.h> -#include <asm/hvm/vmx/intel-iommu.h> - -extern struct iommu_ops intel_iommu_ops; -extern struct iommu_ops amd_iommu_ops; - -int iommu_domain_init(struct domain *domain) -{ - struct hvm_iommu *hd = domain_hvm_iommu(domain); - - spin_lock_init(&hd->mapping_lock); - spin_lock_init(&hd->iommu_list_lock); - INIT_LIST_HEAD(&hd->pdev_list); - INIT_LIST_HEAD(&hd->g2m_ioport_list); - - if ( !iommu_enabled ) - return 0; - - switch ( boot_cpu_data.x86_vendor ) - { - case X86_VENDOR_INTEL: - hd->platform_ops = &intel_iommu_ops; - break; - case X86_VENDOR_AMD: - hd->platform_ops = &amd_iommu_ops; - break; - default: - BUG(); - } - - return hd->platform_ops->init(domain); -} - -int assign_device(struct domain *d, u8 bus, u8 devfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - - if ( !iommu_enabled || !hd->platform_ops) - return 0; - - return hd->platform_ops->assign_device(d, bus, devfn); -} - -void iommu_domain_destroy(struct domain *d) -{ - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - uint32_t i; - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct list_head *ioport_list, *digl_list, *tmp; - struct g2m_ioport *ioport; - struct dev_intx_gsi_link *digl; - - if ( !iommu_enabled || !hd->platform_ops) - return; - - if ( hvm_irq_dpci != NULL ) - { - for ( i = 0; i < NR_IRQS; i++ ) - { - if ( !hvm_irq_dpci->mirq[i].valid ) - continue; - - pirq_guest_unbind(d, i); - kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]); - - list_for_each_safe ( digl_list, tmp, - &hvm_irq_dpci->mirq[i].digl_list ) - { - digl = list_entry(digl_list, - struct dev_intx_gsi_link, list); - list_del(&digl->list); - xfree(digl); - } - } - - d->arch.hvm_domain.irq.dpci = NULL; - xfree(hvm_irq_dpci); - } - - if ( hd ) - { - list_for_each_safe ( ioport_list, tmp, &hd->g2m_ioport_list ) - { - ioport = list_entry(ioport_list, struct g2m_ioport, list); - list_del(&ioport->list); - xfree(ioport); - } - } - - return hd->platform_ops->teardown(d); -} - -int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - - if ( !iommu_enabled || !hd->platform_ops) - return 0; - - return hd->platform_ops->map_page(d, gfn, mfn); -} - -int iommu_unmap_page(struct domain *d, unsigned long gfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - - if ( !iommu_enabled || !hd->platform_ops) - return 0; - - return hd->platform_ops->unmap_page(d, gfn); -} - -void deassign_device(struct domain *d, u8 bus, u8 devfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - - if ( !iommu_enabled || !hd->platform_ops) - return; - - return hd->platform_ops->reassign_device(d, dom0, bus, devfn); -} diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/svm/emulate.c --- a/xen/arch/x86/hvm/svm/emulate.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/svm/emulate.c Thu Mar 20 12:35:40 2008 -0600 @@ -117,7 +117,9 @@ int __get_instruction_length_from_list(s } else { - inst_copy_from_guest(buffer, svm_rip2pointer(v), MAX_INST_LEN); + if ( inst_copy_from_guest(buffer, svm_rip2pointer(v), MAX_INST_LEN) + != MAX_INST_LEN ) + return 0; buf = buffer; } diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Mar 20 12:35:40 2008 -0600 @@ -495,6 +495,7 @@ static void svm_get_segment_register(str break; case x86_seg_ss: memcpy(reg, &vmcb->ss, sizeof(*reg)); + reg->attr.fields.dpl = vmcb->cpl; break; case x86_seg_tr: svm_sync_vmcb(v); @@ -943,6 +944,10 @@ static void svm_vmexit_do_cpuid(struct c { unsigned int eax, ebx, ecx, edx, inst_len; + inst_len = __get_instruction_length(current, INSTR_CPUID, NULL); + if ( inst_len == 0 ) + return; + eax = regs->eax; ebx = regs->ebx; ecx = regs->ecx; @@ -955,7 +960,6 @@ static void svm_vmexit_do_cpuid(struct c regs->ecx = ecx; regs->edx = edx; - inst_len = __get_instruction_length(current, INSTR_CPUID, NULL); __update_guest_eip(regs, inst_len); } @@ -1166,6 +1170,8 @@ static void svm_vmexit_do_hlt(struct vmc unsigned int inst_len; inst_len = __get_instruction_length(curr, INSTR_HLT, NULL); + if ( inst_len == 0 ) + return; __update_guest_eip(regs, inst_len); /* Check for pending exception or new interrupt. */ @@ -1354,6 +1360,8 @@ asmlinkage void svm_vmexit_handler(struc case VMEXIT_VMMCALL: inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL); + if ( inst_len == 0 ) + break; HVMTRACE_1D(VMMCALL, v, regs->eax); rc = hvm_do_hypercall(regs); if ( rc != HVM_HCALL_preempted ) diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Mar 20 12:35:40 2008 -0600 @@ -80,27 +80,27 @@ struct host_save_area *alloc_host_save_a void svm_disable_intercept_for_msr(struct vcpu *v, u32 msr) { - char *msr_bitmap = v->arch.hvm_svm.msrpm; + unsigned long *msr_bitmap = v->arch.hvm_svm.msrpm; /* * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address). */ if ( msr <= 0x1fff ) { - __clear_bit(msr*2, msr_bitmap + 0x000); - __clear_bit(msr*2+1, msr_bitmap + 0x000); + __clear_bit(msr*2, msr_bitmap + 0x000/BYTES_PER_LONG); + __clear_bit(msr*2+1, msr_bitmap + 0x000/BYTES_PER_LONG); } else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) { msr &= 0x1fff; - __clear_bit(msr*2, msr_bitmap + 0x800); - __clear_bit(msr*2+1, msr_bitmap + 0x800); + __clear_bit(msr*2, msr_bitmap + 0x800/BYTES_PER_LONG); + __clear_bit(msr*2+1, msr_bitmap + 0x800/BYTES_PER_LONG); } else if ( (msr >= 0xc001000) && (msr <= 0xc0011fff) ) { msr &= 0x1fff; - __clear_bit(msr*2, msr_bitmap + 0x1000); - __clear_bit(msr*2+1, msr_bitmap + 0x1000); + __clear_bit(msr*2, msr_bitmap + 0x1000/BYTES_PER_LONG); + __clear_bit(msr*2+1, msr_bitmap + 0x1000/BYTES_PER_LONG); } } diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/vlapic.c Thu Mar 20 12:35:40 2008 -0600 @@ -83,15 +83,17 @@ static unsigned int vlapic_lvt_mask[VLAP */ #define VEC_POS(v) ((v)%32) -#define REG_POS(v) (((v)/32)* 0x10) -#define vlapic_test_and_set_vector(vec, bitmap) \ - test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) -#define vlapic_test_and_clear_vector(vec, bitmap) \ - test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) -#define vlapic_set_vector(vec, bitmap) \ - set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) -#define vlapic_clear_vector(vec, bitmap) \ - clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) +#define REG_POS(v) (((v)/32) * 0x10) +#define vlapic_test_and_set_vector(vec, bitmap) \ + test_and_set_bit(VEC_POS(vec), \ + (unsigned long *)((bitmap) + REG_POS(vec))) +#define vlapic_test_and_clear_vector(vec, bitmap) \ + test_and_clear_bit(VEC_POS(vec), \ + (unsigned long *)((bitmap) + REG_POS(vec))) +#define vlapic_set_vector(vec, bitmap) \ + set_bit(VEC_POS(vec), (unsigned long *)((bitmap) + REG_POS(vec))) +#define vlapic_clear_vector(vec, bitmap) \ + clear_bit(VEC_POS(vec), (unsigned long *)((bitmap) + REG_POS(vec))) static int vlapic_find_highest_vector(void *bitmap) { diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Mar 20 12:35:40 2008 -0600 @@ -413,7 +413,7 @@ static void vmx_set_host_env(struct vcpu void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr) { - char *msr_bitmap = v->arch.hvm_vmx.msr_bitmap; + unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap; /* VMX MSR bitmap supported? */ if ( msr_bitmap == NULL ) @@ -426,14 +426,14 @@ void vmx_disable_intercept_for_msr(struc */ if ( msr <= 0x1fff ) { - __clear_bit(msr, msr_bitmap + 0x000); /* read-low */ - __clear_bit(msr, msr_bitmap + 0x800); /* write-low */ + __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */ + __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */ } else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) { msr &= 0x1fff; - __clear_bit(msr, msr_bitmap + 0x400); /* read-high */ - __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */ + __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */ + __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */ } } @@ -456,7 +456,7 @@ static int construct_vmcs(struct vcpu *v /* MSR access bitmap. */ if ( cpu_has_vmx_msr_bitmap ) { - char *msr_bitmap = alloc_xenheap_page(); + unsigned long *msr_bitmap = alloc_xenheap_page(); if ( msr_bitmap == NULL ) return -ENOMEM; @@ -870,7 +870,7 @@ void vmcs_dump_vcpu(struct vcpu *v) x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32; x |= (uint32_t)vmr(TSC_OFFSET); printk("TSC Offset = %016llx\n", x); - x = (unsigned long long)vmr(GUEST_IA32_DEBUGCTL) << 32; + x = (unsigned long long)vmr(GUEST_IA32_DEBUGCTL_HIGH) << 32; x |= (uint32_t)vmr(GUEST_IA32_DEBUGCTL); printk("DebugCtl=%016llx DebugExceptions=%016llx\n", x, (unsigned long long)vmr(GUEST_PENDING_DBG_EXCEPTIONS)); diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Mar 20 12:35:40 2008 -0600 @@ -1512,8 +1512,10 @@ static int vmx_msr_read_intercept(struct msr_content = var_range_base[index]; break; case MSR_IA32_DEBUGCTLMSR: - if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 ) - msr_content = 0; + msr_content = __vmread(GUEST_IA32_DEBUGCTL); +#ifdef __i386__ + msr_content |= (u64)__vmread(GUEST_IA32_DEBUGCTL_HIGH) << 32; +#endif break; case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; @@ -1732,11 +1734,15 @@ static int vmx_msr_write_intercept(struc } if ( (rc < 0) || - (vmx_add_guest_msr(v, ecx) < 0) || (vmx_add_host_load_msr(v, ecx) < 0) ) vmx_inject_hw_exception(v, TRAP_machine_check, 0); else - vmx_write_guest_msr(v, ecx, msr_content); + { + __vmwrite(GUEST_IA32_DEBUGCTL, msr_content); +#ifdef __i386__ + __vmwrite(GUEST_IA32_DEBUGCTL_HIGH, msr_content >> 32); +#endif + } break; } diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/hvm/vmx/vpmu_core2.c --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c Thu Mar 20 12:35:40 2008 -0600 @@ -101,7 +101,7 @@ static int is_core2_vpmu_msr(u32 msr_ind return 0; } -static void core2_vpmu_set_msr_bitmap(char *msr_bitmap) +static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap) { int i; @@ -109,12 +109,14 @@ static void core2_vpmu_set_msr_bitmap(ch for ( i = 0; i < core2_counters.num; i++ ) { clear_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap); - clear_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap+0x800); + clear_bit(msraddr_to_bitpos(core2_counters.msr[i]), + msr_bitmap + 0x800/BYTES_PER_LONG); } for ( i = 0; i < core2_get_pmc_count(); i++ ) { clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap+0x800); + clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), + msr_bitmap + 0x800/BYTES_PER_LONG); } /* Allow Read PMU Non-global Controls Directly. */ @@ -124,19 +126,21 @@ static void core2_vpmu_set_msr_bitmap(ch clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap); } -static void core2_vpmu_unset_msr_bitmap(char *msr_bitmap) +static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap) { int i; for ( i = 0; i < core2_counters.num; i++ ) { set_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap); - set_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap+0x800); + set_bit(msraddr_to_bitpos(core2_counters.msr[i]), + msr_bitmap + 0x800/BYTES_PER_LONG); } for ( i = 0; i < core2_get_pmc_count(); i++ ) { set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap+0x800); + set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), + msr_bitmap + 0x800/BYTES_PER_LONG); } for ( i = 0; i < core2_ctrls.num; i++ ) set_bit(msraddr_to_bitpos(core2_ctrls.msr[i]), msr_bitmap); diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/irq.c Thu Mar 20 12:35:40 2008 -0600 @@ -15,7 +15,7 @@ #include <xen/keyhandler.h> #include <xen/compat.h> #include <asm/current.h> -#include <asm/iommu.h> +#include <xen/iommu.h> /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */ int opt_noirqbalance = 0; @@ -362,13 +362,12 @@ int pirq_guest_unmask(struct domain *d) int pirq_guest_unmask(struct domain *d) { unsigned int irq; - shared_info_t *s = d->shared_info; for ( irq = find_first_bit(d->pirq_mask, NR_IRQS); irq < NR_IRQS; irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) ) { - if ( !test_bit(d->pirq_to_evtchn[irq], __shared_info_addr(d, s, evtchn_mask)) ) + if ( !test_bit(d->pirq_to_evtchn[irq], &shared_info(d, evtchn_mask)) ) __pirq_guest_eoi(d, irq); } @@ -660,13 +659,13 @@ static void dump_irqs(unsigned char key) printk("%u(%c%c%c%c)", d->domain_id, (test_bit(d->pirq_to_evtchn[irq], - shared_info_addr(d, evtchn_pending)) ? + &shared_info(d, evtchn_pending)) ? 'P' : '-'), (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_GUEST_LONG(d), - vcpu_info_addr(d->vcpu[0], evtchn_pending_sel)) ? + &vcpu_info(d->vcpu[0], evtchn_pending_sel)) ? 'S' : '-'), (test_bit(d->pirq_to_evtchn[irq], - shared_info_addr(d, evtchn_mask)) ? + &shared_info(d, evtchn_mask)) ? 'M' : '-'), (test_bit(irq, d->pirq_mask) ? 'M' : '-')); diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/mm.c Thu Mar 20 12:35:40 2008 -0600 @@ -645,6 +645,7 @@ get_page_from_l1e( struct page_info *page = mfn_to_page(mfn); uint32_t l1f = l1e_get_flags(l1e); struct vcpu *curr = current; + struct domain *owner; int okay; if ( !(l1f & _PAGE_PRESENT) ) @@ -672,6 +673,17 @@ get_page_from_l1e( return 1; } + + /* + * Let privileged domains transfer the right to map their target + * domain's pages. This is used to allow stub-domain pvfb export to dom0, + * until pvfb supports granted mappings. At that time this minor hack + * can go away. + */ + owner = page_get_owner(page); + if ( unlikely(d != owner) && (owner != NULL) && + (d != curr->domain) && IS_PRIV_FOR(d, owner) ) + d = owner; /* Foreign mappings into guests in shadow external mode don't * contribute to writeable mapping refcounts. (This allows the diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/mm/p2m.c Thu Mar 20 12:35:40 2008 -0600 @@ -27,7 +27,7 @@ #include <asm/page.h> #include <asm/paging.h> #include <asm/p2m.h> -#include <asm/iommu.h> +#include <xen/iommu.h> /* Debugging and auditing of the P2M code? */ #define P2M_AUDIT 0 @@ -229,7 +229,7 @@ set_p2m_entry(struct domain *d, unsigned if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, ((CONFIG_PAGING_LEVELS == 3) - ? (hvm_funcs.hap_supported ? 4 : 8) + ? (d->arch.hvm_domain.hap_enabled ? 4 : 8) : L3_PAGETABLE_ENTRIES), PGT_l2_page_table) ) goto out; diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/mm/paging.c Thu Mar 20 12:35:40 2008 -0600 @@ -114,7 +114,8 @@ static mfn_t paging_new_log_dirty_page(s return mfn; } -static mfn_t paging_new_log_dirty_leaf(struct domain *d, uint8_t **leaf_p) +static mfn_t paging_new_log_dirty_leaf( + struct domain *d, unsigned long **leaf_p) { mfn_t mfn = paging_new_log_dirty_page(d, (void **)leaf_p); if ( mfn_valid(mfn) ) @@ -264,7 +265,7 @@ void paging_mark_dirty(struct domain *d, mfn_t gmfn; int changed; mfn_t mfn, *l4, *l3, *l2; - uint8_t *l1; + unsigned long *l1; int i1, i2, i3, i4; gmfn = _mfn(guest_mfn); @@ -341,7 +342,7 @@ int paging_log_dirty_op(struct domain *d int rv = 0, clean = 0, peek = 1; unsigned long pages = 0; mfn_t *l4, *l3, *l2; - uint8_t *l1; + unsigned long *l1; int i4, i3, i2; domain_pause(d); @@ -399,7 +400,7 @@ int paging_log_dirty_op(struct domain *d (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES); i2++ ) { - static uint8_t zeroes[PAGE_SIZE]; + static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG]; unsigned int bytes = PAGE_SIZE; l1 = ((l2 && mfn_valid(l2[i2])) ? map_domain_page(mfn_x(l2[i2])) : zeroes); @@ -408,7 +409,7 @@ int paging_log_dirty_op(struct domain *d if ( likely(peek) ) { if ( copy_to_guest_offset(sc->dirty_bitmap, pages >> 3, - l1, bytes) != 0 ) + (uint8_t *)l1, bytes) != 0 ) { rv = -EFAULT; goto out; diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/mm/shadow/private.h Thu Mar 20 12:35:40 2008 -0600 @@ -483,7 +483,7 @@ sh_mfn_is_dirty(struct domain *d, mfn_t { unsigned long pfn; mfn_t mfn, *l4, *l3, *l2; - uint8_t *l1; + unsigned long *l1; int rv; ASSERT(shadow_mode_log_dirty(d)); diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/numa.c --- a/xen/arch/x86/numa.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/numa.c Thu Mar 20 12:35:40 2008 -0600 @@ -57,7 +57,7 @@ populate_memnodemap(const struct node *n { int i; int res = -1; - unsigned long addr, end; + paddr_t addr, end; if (shift >= 64) return -1; @@ -286,13 +286,13 @@ static void dump_numa(unsigned char key) (u32)(now>>32), (u32)now); for_each_online_node(i) { - unsigned long pa = (NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT; + paddr_t pa = (NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT; printk("idx%d -> NODE%d start->%lu size->%lu\n", i, NODE_DATA(i)->node_id, NODE_DATA(i)->node_start_pfn, NODE_DATA(i)->node_spanned_pages); /* sanity check phys_to_nid() */ - printk("phys_to_nid(%lx) -> %d should be %d\n", pa, phys_to_nid(pa), + printk("phys_to_nid(%"PRIpaddr") -> %d should be %d\n", pa, phys_to_nid(pa), NODE_DATA(i)->node_id); } for_each_online_cpu(i) diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/srat.c --- a/xen/arch/x86/srat.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/srat.c Thu Mar 20 12:35:40 2008 -0600 @@ -106,7 +106,7 @@ static __init int slit_valid(struct acpi static __init int slit_valid(struct acpi_table_slit *slit) { int i, j; - int d = slit->localities; + int d = slit->locality_count; for (i = 0; i < d; i++) { for (j = 0; j < d; j++) { u8 val = slit->entry[d*i + j]; @@ -308,7 +308,7 @@ int __node_distance(int a, int b) if (!acpi_slit) return a == b ? 10 : 20; - index = acpi_slit->localities * node_to_pxm(a); + index = acpi_slit->locality_count * node_to_pxm(a); return acpi_slit->entry[index + node_to_pxm(b)]; } diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/traps.c Thu Mar 20 12:35:40 2008 -0600 @@ -677,32 +677,75 @@ static int emulate_forced_invalid_op(str : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (a), "1" (b), "2" (c), "3" (d) ); - if ( regs->eax == 1 ) + if ( (regs->eax & 0x7fffffff) == 1 ) { /* Modify Feature Information. */ __clear_bit(X86_FEATURE_VME, &d); __clear_bit(X86_FEATURE_PSE, &d); __clear_bit(X86_FEATURE_PGE, &d); + __clear_bit(X86_FEATURE_MCE, &d); + __clear_bit(X86_FEATURE_MCA, &d); + if ( !IS_PRIV(current->domain) ) + __clear_bit(X86_FEATURE_MTRR, &d); + __clear_bit(X86_FEATURE_PSE36, &d); + } + switch ( (uint32_t)regs->eax ) + { + case 1: + /* Modify Feature Information. */ if ( !cpu_has_sep ) __clear_bit(X86_FEATURE_SEP, &d); #ifdef __i386__ if ( !supervisor_mode_kernel ) __clear_bit(X86_FEATURE_SEP, &d); #endif - if ( !IS_PRIV(current->domain) ) - __clear_bit(X86_FEATURE_MTRR, &d); - } - else if ( regs->eax == 0x80000001 ) - { + __clear_bit(X86_FEATURE_DS, &d); + __clear_bit(X86_FEATURE_ACC, &d); + __clear_bit(X86_FEATURE_PBE, &d); + + __clear_bit(X86_FEATURE_DTES64 % 32, &c); + __clear_bit(X86_FEATURE_MWAIT % 32, &c); + __clear_bit(X86_FEATURE_DSCPL % 32, &c); + __clear_bit(X86_FEATURE_VMXE % 32, &c); + __clear_bit(X86_FEATURE_SMXE % 32, &c); + __clear_bit(X86_FEATURE_EST % 32, &c); + __clear_bit(X86_FEATURE_TM2 % 32, &c); + if ( is_pv_32bit_vcpu(current) ) + __clear_bit(X86_FEATURE_CX16 % 32, &c); + __clear_bit(X86_FEATURE_XTPR % 32, &c); + __clear_bit(X86_FEATURE_PDCM % 32, &c); + __clear_bit(X86_FEATURE_DCA % 32, &c); + break; + case 0x80000001: /* Modify Feature Information. */ -#ifdef __i386__ - __clear_bit(X86_FEATURE_SYSCALL % 32, &d); -#endif + if ( is_pv_32bit_vcpu(current) ) + { + __clear_bit(X86_FEATURE_LM % 32, &d); + __clear_bit(X86_FEATURE_LAHF_LM % 32, &c); + } +#ifndef __i386__ + if ( is_pv_32on64_vcpu(current) && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) +#endif + __clear_bit(X86_FEATURE_SYSCALL % 32, &d); + __clear_bit(X86_FEATURE_PAGE1GB % 32, &d); __clear_bit(X86_FEATURE_RDTSCP % 32, &d); - } - else - { + + __clear_bit(X86_FEATURE_SVME % 32, &c); + __clear_bit(X86_FEATURE_OSVW % 32, &c); + __clear_bit(X86_FEATURE_IBS % 32, &c); + __clear_bit(X86_FEATURE_SKINIT % 32, &c); + __clear_bit(X86_FEATURE_WDT % 32, &c); + break; + case 5: /* MONITOR/MWAIT */ + case 0xa: /* Architectural Performance Monitor Features */ + case 0x8000000a: /* SVM revision and features */ + case 0x8000001b: /* Instruction Based Sampling */ + a = b = c = d = 0; + break; + default: (void)cpuid_hypervisor_leaves(regs->eax, &a, &b, &c, &d); + break; } regs->eax = a; @@ -1801,6 +1844,8 @@ static int emulate_privileged_op(struct case 0x20: /* MOV CR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); + if ( opcode < 0xc0 ) + goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -1841,6 +1886,8 @@ static int emulate_privileged_op(struct case 0x21: /* MOV DR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); + if ( opcode < 0xc0 ) + goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -1851,6 +1898,8 @@ static int emulate_privileged_op(struct case 0x22: /* MOV <reg>,CR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); + if ( opcode < 0xc0 ) + goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -1897,6 +1946,8 @@ static int emulate_privileged_op(struct case 0x23: /* MOV <reg>,DR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); + if ( opcode < 0xc0 ) + goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -2026,6 +2077,15 @@ static int emulate_privileged_op(struct case MSR_EFER: if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) goto fail; + break; + case MSR_IA32_MISC_ENABLE: + if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) + goto fail; + regs->eax &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL | + MSR_IA32_MISC_ENABLE_MONITOR_ENABLE); + regs->eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL | + MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | + MSR_IA32_MISC_ENABLE_XTPR_DISABLE; break; default: if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) ) diff -r 8c921adf4833 -r 42f6c206c951 xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/arch/x86/x86_emulate.c Thu Mar 20 12:35:40 2008 -0600 @@ -785,11 +785,21 @@ _mode_iopl( struct x86_emulate_ops *ops) { int cpl = get_cpl(ctxt, ops); + if ( cpl == -1 ) + return -1; return ((cpl >= 0) && (cpl <= ((ctxt->regs->eflags >> 12) & 3))); } -#define mode_ring0() (get_cpl(ctxt, ops) == 0) -#define mode_iopl() _mode_iopl(ctxt, ops) +#define mode_ring0() ({ \ + int _cpl = get_cpl(ctxt, ops); \ + fail_if(_cpl < 0); \ + (_cpl == 0); \ +}) +#define mode_iopl() ({ \ + int _iopl = _mode_iopl(ctxt, ops); \ + fail_if(_iopl < 0); \ + _iopl; \ +}) static int in_realmode( @@ -2255,7 +2265,6 @@ x86_emulate( case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ { unsigned long nr_reps = get_rep_prefix(); - generate_exception_if(!mode_iopl(), EXC_GP, 0); dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; dst.mem.seg = x86_seg_es; dst.mem.off = truncate_ea(_regs.edi); @@ -2285,7 +2294,6 @@ x86_emulate( case 0x6e ... 0x6f: /* outs %esi,%dx */ { unsigned long nr_reps = get_rep_prefix(); - generate_exception_if(!mode_iopl(), EXC_GP, 0); dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; if ( (nr_reps > 1) && (ops->rep_outs != NULL) && ((rc = ops->rep_outs(ea.mem.seg, truncate_ea(_regs.esi), @@ -2394,8 +2402,10 @@ x86_emulate( case 0x9d: /* popf */ { uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM; + if ( !mode_ring0() ) + mask |= EFLG_IOPL; if ( !mode_iopl() ) - mask |= EFLG_IOPL; + mask |= EFLG_IF; /* 64-bit mode: POP defaults to a 64-bit operand. */ if ( mode_64bit() && (op_bytes == 4) ) op_bytes = 8; @@ -2640,8 +2650,10 @@ x86_emulate( case 0xcf: /* iret */ { unsigned long cs, eip, eflags; uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM; + if ( !mode_ring0() ) + mask |= EFLG_IOPL; if ( !mode_iopl() ) - mask |= EFLG_IOPL; + mask |= EFLG_IF; fail_if(!in_realmode(ctxt, ops)); if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), &eip, op_bytes, ctxt)) || @@ -2818,7 +2830,6 @@ x86_emulate( unsigned int port = ((b < 0xe8) ? insn_fetch_type(uint8_t) : (uint16_t)_regs.edx); - generate_exception_if(!mode_iopl(), EXC_GP, 0); op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes; if ( b & 2 ) { @@ -3219,8 +3230,8 @@ x86_emulate( case 0x21: /* mov dr,reg */ case 0x22: /* mov reg,cr */ case 0x23: /* mov reg,dr */ + generate_exception_if(ea.type != OP_REG, EXC_UD, -1); generate_exception_if(!mode_ring0(), EXC_GP, 0); - modrm_rm |= (rex_prefix & 1) << 3; modrm_reg |= lock_prefix << 3; if ( b & 2 ) { diff -r 8c921adf4833 -r 42f6c206c951 xen/common/domain.c --- a/xen/common/domain.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/common/domain.c Thu Mar 20 12:35:40 2008 -0600 @@ -110,10 +110,6 @@ static void __domain_finalise_shutdown(s return; d->is_shut_down = 1; - - for_each_vcpu ( d, v ) - vcpu_sleep_nosync(v); - send_guest_global_virq(dom0, VIRQ_DOM_EXC); } @@ -126,7 +122,7 @@ static void vcpu_check_shutdown(struct v if ( d->is_shutting_down ) { if ( !v->paused_for_shutdown ) - atomic_inc(&v->pause_count); + vcpu_pause_nosync(v); v->paused_for_shutdown = 1; v->defer_shutdown = 0; __domain_finalise_shutdown(d); @@ -154,7 +150,7 @@ struct vcpu *alloc_vcpu( if ( !is_idle_domain(d) ) { set_bit(_VPF_down, &v->pause_flags); - v->vcpu_info = shared_info_addr(d, vcpu_info[vcpu_id]); + v->vcpu_info = (void *)&shared_info(d, vcpu_info[vcpu_id]); } if ( sched_init_vcpu(v, cpu_id) != 0 ) @@ -426,7 +422,7 @@ void domain_shutdown(struct domain *d, u { if ( v->defer_shutdown ) continue; - atomic_inc(&v->pause_count); + vcpu_pause_nosync(v); v->paused_for_shutdown = 1; } diff -r 8c921adf4833 -r 42f6c206c951 xen/common/event_channel.c --- a/xen/common/event_channel.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/common/event_channel.c Thu Mar 20 12:35:40 2008 -0600 @@ -539,7 +539,6 @@ void evtchn_set_pending(struct vcpu *v, void evtchn_set_pending(struct vcpu *v, int port) { struct domain *d = v->domain; - shared_info_t *s = d->shared_info; /* * The following bit operations must happen in strict order. @@ -548,12 +547,12 @@ void evtchn_set_pending(struct vcpu *v, * others may require explicit memory barriers. */ - if ( test_and_set_bit(port, __shared_info_addr(d, s, evtchn_pending)) ) + if ( test_and_set_bit(port, &shared_info(d, evtchn_pending)) ) return; - if ( !test_bit (port, __shared_info_addr(d, s, evtchn_mask)) && + if ( !test_bit (port, &shared_info(d, evtchn_mask)) && !test_and_set_bit(port / BITS_PER_GUEST_LONG(d), - vcpu_info_addr(v, evtchn_pending_sel)) ) + &vcpu_info(v, evtchn_pending_sel)) ) { vcpu_mark_events_pending(v); } @@ -750,7 +749,6 @@ static long evtchn_unmask(evtchn_unmask_ static long evtchn_unmask(evtchn_unmask_t *unmask) { struct domain *d = current->domain; - shared_info_t *s = d->shared_info; int port = unmask->port; struct vcpu *v; @@ -768,10 +766,10 @@ static long evtchn_unmask(evtchn_unmask_ * These operations must happen in strict order. Based on * include/xen/event.h:evtchn_set_pending(). */ - if ( test_and_clear_bit(port, __shared_info_addr(d, s, evtchn_mask)) && - test_bit (port, __shared_info_addr(d, s, evtchn_pending)) && + if ( test_and_clear_bit(port, &shared_info(d, evtchn_mask)) && + test_bit (port, &shared_info(d, evtchn_pending)) && !test_and_set_bit (port / BITS_PER_GUEST_LONG(d), - vcpu_info_addr(v, evtchn_pending_sel)) ) + &vcpu_info(v, evtchn_pending_sel)) ) { vcpu_mark_events_pending(v); } diff -r 8c921adf4833 -r 42f6c206c951 xen/common/keyhandler.c --- a/xen/common/keyhandler.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/common/keyhandler.c Thu Mar 20 12:35:40 2008 -0600 @@ -201,12 +201,12 @@ static void dump_domains(unsigned char k printk(" Notifying guest (virq %d, port %d, stat %d/%d/%d)\n", VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG], test_bit(v->virq_to_evtchn[VIRQ_DEBUG], - shared_info_addr(d, evtchn_pending)), + &shared_info(d, evtchn_pending)), test_bit(v->virq_to_evtchn[VIRQ_DEBUG], - shared_info_addr(d, evtchn_mask)), + &shared_info(d, evtchn_mask)), test_bit(v->virq_to_evtchn[VIRQ_DEBUG] / BITS_PER_GUEST_LONG(d), - vcpu_info_addr(v, evtchn_pending_sel))); + &vcpu_info(v, evtchn_pending_sel))); send_guest_vcpu_virq(v, VIRQ_DEBUG); } } diff -r 8c921adf4833 -r 42f6c206c951 xen/common/schedule.c --- a/xen/common/schedule.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/common/schedule.c Thu Mar 20 12:35:40 2008 -0600 @@ -365,7 +365,7 @@ static long do_poll(struct sched_poll *s goto out; rc = 0; - if ( test_bit(port, shared_info_addr(d, evtchn_pending)) ) + if ( test_bit(port, &shared_info(d, evtchn_pending)) ) goto out; } diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/acpi/Makefile --- a/xen/drivers/acpi/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/acpi/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -1,2 +1,5 @@ obj-y += tables.o obj-y += tables.o obj-y += numa.o +obj-y += osl.o + +obj-$(x86) += hwregs.o diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/acpi/hwregs.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/acpi/hwregs.c Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,385 @@ + +/******************************************************************************* + * + * Module Name: hwregs - Read/write access functions for the various ACPI + * control and status registers. + * + ******************************************************************************/ + +/* + * Copyright (C) 2000 - 2006, R. Byron Moore + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +#include <asm/io.h> +#include <xen/config.h> +#include <xen/init.h> +#include <xen/types.h> +#include <xen/errno.h> +#include <acpi/acpi.h> + +#define _COMPONENT ACPI_HARDWARE +ACPI_MODULE_NAME("hwregs") + +/****************************************************************************** + * + * FUNCTION: acpi_hw_register_read + * + * PARAMETERS: register_id - ACPI Register ID + * return_value - Where the register value is returned + * + * RETURN: Status and the value read. + * + * DESCRIPTION: Read from the specified ACPI register + * + ******************************************************************************/ +acpi_status +acpi_hw_register_read(u32 register_id, u32 * return_value) +{ + u32 value1 = 0; + u32 value2 = 0; + acpi_status status; + + ACPI_FUNCTION_TRACE(hw_register_read); + + switch (register_id) { + case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */ + + status = + acpi_hw_low_level_read(16, &value1, + &acpi_sinfo.pm1a_evt_blk); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* PM1B is optional */ + + status = + acpi_hw_low_level_read(16, &value2, + &acpi_sinfo.pm1b_evt_blk); + value1 |= value2; + break; + + + case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */ + + status = + acpi_hw_low_level_read(16, &value1, + &acpi_sinfo.pm1a_cnt_blk); + if (ACPI_FAILURE(status)) { + goto exit; + } + + status = + acpi_hw_low_level_read(16, &value2, + &acpi_sinfo.pm1b_cnt_blk); + value1 |= value2; + break; + + + default: + status = AE_BAD_PARAMETER; + break; + } + + exit: + + if (ACPI_SUCCESS(status)) { + *return_value = value1; + } + + return_ACPI_STATUS(status); +} + +/****************************************************************************** + * + * FUNCTION: acpi_hw_register_write + * + * PARAMETERS: register_id - ACPI Register ID + * Value - The value to write + * + * RETURN: Status + * + * DESCRIPTION: Write to the specified ACPI register + * + * NOTE: In accordance with the ACPI specification, this function automatically + * preserves the value of the following bits, meaning that these bits cannot be + * changed via this interface: + * + * PM1_CONTROL[0] = SCI_EN + * PM1_CONTROL[9] + * PM1_STATUS[11] + * + * ACPI References: + * 1) Hardware Ignored Bits: When software writes to a register with ignored + * bit fields, it preserves the ignored bit fields + * 2) SCI_EN: OSPM always preserves this bit position + * + ******************************************************************************/ + +acpi_status acpi_hw_register_write(u32 register_id, u32 value) +{ + acpi_status status; + u32 read_value; + + ACPI_FUNCTION_TRACE(hw_register_write); + + switch (register_id) { //By now we just need handle PM1 status/PM1 control + case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */ + + /* Perform a read first to preserve certain bits (per ACPI spec) */ + + status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, + &read_value); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* Insert the bits to be preserved */ + + ACPI_INSERT_BITS(value, ACPI_PM1_STATUS_PRESERVED_BITS, + read_value); + + /* Now we can write the data */ + + status = + acpi_hw_low_level_write(16, value, + &acpi_sinfo.pm1a_evt_blk); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* PM1B is optional */ + + status = + acpi_hw_low_level_write(16, value, + &acpi_sinfo.pm1b_evt_blk); + break; + + + case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */ + + /* + * Perform a read first to preserve certain bits (per ACPI spec) + * + * Note: This includes SCI_EN, we never want to change this bit + */ + status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL, + &read_value); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* Insert the bits to be preserved */ + + ACPI_INSERT_BITS(value, ACPI_PM1_CONTROL_PRESERVED_BITS, + read_value); + + /* Now we can write the data */ + + status = + acpi_hw_low_level_write(16, value, + &acpi_sinfo.pm1a_cnt_blk); + if (ACPI_FAILURE(status)) { + goto exit; + } + + status = + acpi_hw_low_level_write(16, value, + &acpi_sinfo.pm1b_cnt_blk); + break; + + case ACPI_REGISTER_PM1A_CONTROL: /* 16-bit access */ + + status = + acpi_hw_low_level_write(16, value, + &acpi_sinfo.pm1a_cnt_blk); + break; + + case ACPI_REGISTER_PM1B_CONTROL: /* 16-bit access */ + + status = + acpi_hw_low_level_write(16, value, + &acpi_sinfo.pm1b_cnt_blk); + break; + + + default: + status = AE_BAD_PARAMETER; + break; + } + + exit: + + return_ACPI_STATUS(status); +} + +/****************************************************************************** + * + * FUNCTION: acpi_hw_low_level_read + * + * PARAMETERS: Width - 8, 16, or 32 + * Value - Where the value is returned + * Reg - GAS register structure + * + * RETURN: Status + * + * DESCRIPTION: Read from either memory or IO space. + * + ******************************************************************************/ + +acpi_status +acpi_hw_low_level_read(u32 width, u32 * value, struct acpi_generic_address *reg) +{ + u64 address; + acpi_status status; + + ACPI_FUNCTION_NAME(hw_low_level_read); + + /* + * Must have a valid pointer to a GAS structure, and + * a non-zero address within. However, don't return an error + * because the PM1A/B code must not fail if B isn't present. + */ + if (!reg) { + return (AE_OK); + } + + /* Get a local copy of the address. Handles possible alignment issues */ + + ACPI_MOVE_64_TO_64(&address, ®->address); + if (!address) { + return (AE_OK); + } + *value = 0; + + /* + * Two address spaces supported: Memory or IO. + * PCI_Config is not supported here because the GAS struct is insufficient + */ + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + + status = acpi_os_read_memory((acpi_physical_address) address, + value, width); + break; + + case ACPI_ADR_SPACE_SYSTEM_IO: + + status = acpi_os_read_port((acpi_io_address) address, + value, width); + break; + + default: + + return (AE_BAD_PARAMETER); + } + + ACPI_DEBUG_PRINT((ACPI_DB_IO, + "Read: %8.8X width %2d from %8.8X%8.8X (%s)\n", + *value, width, + ACPI_FORMAT_UINT64(address), + acpi_ut_get_region_name(reg->address_space_id))); + + return (status); +} + +/****************************************************************************** + * + * FUNCTION: acpi_hw_low_level_write + * + * PARAMETERS: Width - 8, 16, or 32 + * Value - To be written + * Reg - GAS register structure + * + * RETURN: Status + * + * DESCRIPTION: Write to either memory or IO space. + * + ******************************************************************************/ + +acpi_status +acpi_hw_low_level_write(u32 width, u32 value, struct acpi_generic_address * reg) +{ + u64 address; + acpi_status status; + + ACPI_FUNCTION_NAME(hw_low_level_write); + + /* + * Must have a valid pointer to a GAS structure, and + * a non-zero address within. However, don't return an error + * because the PM1A/B code must not fail if B isn't present. + */ + if (!reg) { + return (AE_OK); + } + + /* Get a local copy of the address. Handles possible alignment issues */ + + ACPI_MOVE_64_TO_64(&address, ®->address); + if (!address) { + return (AE_OK); + } + + /* + * Two address spaces supported: Memory or IO. + * PCI_Config is not supported here because the GAS struct is insufficient + */ + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + + status = acpi_os_write_memory((acpi_physical_address) address, + value, width); + break; + + case ACPI_ADR_SPACE_SYSTEM_IO: + + status = acpi_os_write_port((acpi_io_address) address, + value, width); + break; + + default: + return (AE_BAD_PARAMETER); + } + + ACPI_DEBUG_PRINT((ACPI_DB_IO, + "Wrote: %8.8X width %2d to %8.8X%8.8X (%s)\n", + value, width, + ACPI_FORMAT_UINT64(address), + acpi_ut_get_region_name(reg->address_space_id))); + + return (status); +} diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/acpi/numa.c --- a/xen/drivers/acpi/numa.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/acpi/numa.c Thu Mar 20 12:35:40 2008 -0600 @@ -104,7 +104,7 @@ static int __init acpi_parse_slit(unsign slit = (struct acpi_table_slit *)__acpi_map_table(phys_addr, size); /* downcast just for %llu vs %lu for i386/ia64 */ - localities = (u32) slit->localities; + localities = (u32) slit->locality_count; acpi_numa_slit_init(slit); diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/acpi/osl.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/acpi/osl.c Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,183 @@ +/* + * acpi_osl.c - OS-dependent functions ($Revision: 83 $) + * + * Copyright (C) 2000 Andrew Henroid + * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx> + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + */ +#include <asm/io.h> +#include <xen/config.h> +#include <xen/init.h> +#include <xen/types.h> +#include <xen/errno.h> +#include <xen/acpi.h> +#include <xen/numa.h> +#include <acpi/acpi_bus.h> +#include <acpi/acmacros.h> +#include <acpi/acpiosxf.h> +#include <acpi/platform/aclinux.h> +#include <xen/spinlock.h> +#include <xen/domain_page.h> + +#define _COMPONENT ACPI_OS_SERVICES +ACPI_MODULE_NAME("osl") +#define PREFIX "ACPI: " +struct acpi_os_dpc { + acpi_osd_exec_callback function; + void *context; +}; + +#ifdef CONFIG_ACPI_CUSTOM_DSDT +#include CONFIG_ACPI_CUSTOM_DSDT_FILE +#endif + +#ifdef ENABLE_DEBUGGER +#include <linux/kdb.h> + +/* stuff for debugger support */ +int acpi_in_debugger; +EXPORT_SYMBOL(acpi_in_debugger); + +extern char line_buf[80]; +#endif /*ENABLE_DEBUGGER */ + +int acpi_specific_hotkey_enabled = TRUE; +EXPORT_SYMBOL(acpi_specific_hotkey_enabled); + + +acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width) +{ + u32 dummy; + + if (!value) + value = &dummy; + + *value = 0; + if (width <= 8) { + *(u8 *) value = inb(port); + } else if (width <= 16) { + *(u16 *) value = inw(port); + } else if (width <= 32) { + *(u32 *) value = inl(port); + } else { + BUG(); + } + + return AE_OK; +} + +EXPORT_SYMBOL(acpi_os_read_port); + +acpi_status acpi_os_write_port(acpi_io_address port, u32 value, u32 width) +{ + if (width <= 8) { + outb(value, port); + } else if (width <= 16) { + outw(value, port); + } else if (width <= 32) { + outl(value, port); + } else { + BUG(); + } + + return AE_OK; +} + +EXPORT_SYMBOL(acpi_os_write_port); + +acpi_status +acpi_os_read_memory(acpi_physical_address phys_addr, u32 * value, u32 width) +{ + u32 dummy; + void __iomem *virt_addr; + + virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); + if (!value) + value = &dummy; + + switch (width) { + case 8: + *(u8 *) value = readb(virt_addr); + break; + case 16: + *(u16 *) value = readw(virt_addr); + break; + case 32: + *(u32 *) value = readl(virt_addr); + break; + default: + BUG(); + } + + unmap_domain_page(virt_addr); + + return AE_OK; +} + +acpi_status +acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width) +{ + void __iomem *virt_addr; + + virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); + + switch (width) { + case 8: + writeb(value, virt_addr); + break; + case 16: + writew(value, virt_addr); + break; + case 32: + writel(value, virt_addr); + break; + default: + BUG(); + } + + unmap_domain_page(virt_addr); + + return AE_OK; +} + +/* + * Acquire a spinlock. + * + * handle is a pointer to the spinlock_t. + */ + +acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock lockp) +{ + acpi_cpu_flags flags; + spin_lock_irqsave(lockp, flags); + return flags; +} + +/* + * Release a spinlock. See above. + */ + +void acpi_os_release_lock(acpi_spinlock lockp, acpi_cpu_flags flags) +{ + spin_unlock_irqrestore(lockp, flags); +} + diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/acpi/tables.c --- a/xen/drivers/acpi/tables.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/acpi/tables.c Thu Mar 20 12:35:40 2008 -0600 @@ -295,15 +295,15 @@ acpi_get_table_header_early(enum acpi_ta /* Map the DSDT header via the pointer in the FADT */ if (id == ACPI_DSDT) { - struct fadt_descriptor_rev2 *fadt = - (struct fadt_descriptor_rev2 *)*header; - - if (fadt->revision == 3 && fadt->Xdsdt) { + struct acpi_table_fadt *fadt = + (struct acpi_table_fadt *)*header; + + if (fadt->header.revision == 3 && fadt->Xdsdt) { *header = (void *)__acpi_map_table(fadt->Xdsdt, sizeof(struct acpi_table_header)); - } else if (fadt->V1_dsdt) { - *header = (void *)__acpi_map_table(fadt->V1_dsdt, + } else if (fadt->dsdt) { + *header = (void *)__acpi_map_table(fadt->dsdt, sizeof(struct acpi_table_header)); } else @@ -424,12 +424,11 @@ static int __init acpi_table_get_sdt(str /* First check XSDT (but only on ACPI 2.0-compatible systems) */ - if ((rsdp->revision >= 2) && - (((struct acpi20_table_rsdp *)rsdp)->xsdt_address)) { + if ((rsdp->revision >= 2) && rsdp->xsdt_physical_address) { struct acpi_table_xsdt *mapped_xsdt = NULL; - sdt_pa = ((struct acpi20_table_rsdp *)rsdp)->xsdt_address; + sdt_pa = rsdp->xsdt_physical_address; /* map in just the header */ header = (struct acpi_table_header *) @@ -471,16 +470,16 @@ static int __init acpi_table_get_sdt(str } for (i = 0; i < sdt_count; i++) - sdt_entry[i].pa = (unsigned long)mapped_xsdt->entry[i]; + sdt_entry[i].pa = (unsigned long)mapped_xsdt->table_offset_entry[i]; } /* Then check RSDT */ - else if (rsdp->rsdt_address) { + else if (rsdp->rsdt_physical_address) { struct acpi_table_rsdt *mapped_rsdt = NULL; - sdt_pa = rsdp->rsdt_address; + sdt_pa = rsdp->rsdt_physical_address; /* map in just the header */ header = (struct acpi_table_header *) @@ -521,7 +520,7 @@ static int __init acpi_table_get_sdt(str } for (i = 0; i < sdt_count; i++) - sdt_entry[i].pa = (unsigned long)mapped_rsdt->entry[i]; + sdt_entry[i].pa = (unsigned long)mapped_rsdt->table_offset_entry[i]; } else { @@ -613,13 +612,10 @@ int __init acpi_table_init(void) if (rsdp->revision < 2) result = - acpi_table_compute_checksum(rsdp, - sizeof(struct acpi_table_rsdp)); + acpi_table_compute_checksum(rsdp, 20); else result = - acpi_table_compute_checksum(rsdp, - ((struct acpi20_table_rsdp *) - rsdp)->length); + acpi_table_compute_checksum(rsdp, rsdp->length); if (result) { printk(KERN_WARNING " >>> ERROR: Invalid checksum\n"); @@ -663,12 +659,11 @@ acpi_table_disable(enum acpi_table_id ta /* First check XSDT (but only on ACPI 2.0-compatible systems) */ - if ((rsdp->revision >= 2) && - (((struct acpi20_table_rsdp *)rsdp)->xsdt_address)) { + if ((rsdp->revision >= 2) && rsdp->xsdt_physical_address) { struct acpi_table_xsdt *mapped_xsdt = NULL; - sdt_pa = ((struct acpi20_table_rsdp *)rsdp)->xsdt_address; + sdt_pa = rsdp->xsdt_physical_address; /* map in just the header */ header = (struct acpi_table_header *) @@ -702,7 +697,7 @@ acpi_table_disable(enum acpi_table_id ta if (id < sdt_count) { header = (struct acpi_table_header *) - __acpi_map_table(mapped_xsdt->entry[id], sizeof(struct acpi_table_header)); + __acpi_map_table(mapped_xsdt->table_offset_entry[id], sizeof(struct acpi_table_header)); } else { printk(KERN_WARNING PREFIX "Unable to disable entry %d\n", @@ -713,11 +708,11 @@ acpi_table_disable(enum acpi_table_id ta /* Then check RSDT */ - else if (rsdp->rsdt_address) { + else if (rsdp->rsdt_physical_address) { struct acpi_table_rsdt *mapped_rsdt = NULL; - sdt_pa = rsdp->rsdt_address; + sdt_pa = rsdp->rsdt_physical_address; /* map in just the header */ header = (struct acpi_table_header *) @@ -749,7 +744,7 @@ acpi_table_disable(enum acpi_table_id ta } if (id < sdt_count) { header = (struct acpi_table_header *) - __acpi_map_table(mapped_rsdt->entry[id], sizeof(struct acpi_table_header)); + __acpi_map_table(mapped_rsdt->table_offset_entry[id], sizeof(struct acpi_table_header)); } else { printk(KERN_WARNING PREFIX "Unable to disable entry %d\n", diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/Makefile --- a/xen/drivers/passthrough/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -1,2 +1,5 @@ subdir-$(x86) += vtd subdir-$(x86) += vtd subdir-$(x86) += amd + +obj-y += iommu.o +obj-y += io.o diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/amd/iommu_acpi.c --- a/xen/drivers/passthrough/amd/iommu_acpi.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Thu Mar 20 12:35:40 2008 -0600 @@ -29,12 +29,12 @@ extern struct ivrs_mappings *ivrs_mappin extern struct ivrs_mappings *ivrs_mappings; static struct amd_iommu * __init find_iommu_from_bdf_cap( - u16 bdf, u8 cap_offset) + u16 bdf, u8 cap_offset) { struct amd_iommu *iommu; - for_each_amd_iommu( iommu ) - if ( iommu->bdf == bdf && iommu->cap_offset == cap_offset ) + for_each_amd_iommu ( iommu ) + if ( (iommu->bdf == bdf) && (iommu->cap_offset == cap_offset) ) return iommu; return NULL; @@ -57,15 +57,17 @@ static void __init reserve_iommu_exclusi iommu->exclusion_limit = limit; } -static void __init reserve_iommu_exclusion_range_all(struct amd_iommu *iommu, - unsigned long base, unsigned long limit) +static void __init reserve_iommu_exclusion_range_all( + struct amd_iommu *iommu, + unsigned long base, unsigned long limit) { reserve_iommu_exclusion_range(iommu, base, limit); iommu->exclusion_allow_all = IOMMU_CONTROL_ENABLED; } -static void __init reserve_unity_map_for_device(u16 bdf, unsigned long base, - unsigned long length, u8 iw, u8 ir) +static void __init reserve_unity_map_for_device( + u16 bdf, unsigned long base, + unsigned long length, u8 iw, u8 ir) { unsigned long old_top, new_top; @@ -80,7 +82,7 @@ static void __init reserve_unity_map_for if ( ivrs_mappings[bdf].addr_range_start < base ) base = ivrs_mappings[bdf].addr_range_start; length = new_top - base; - } + } /* extend r/w permissioms and keep aggregate */ if ( iw ) @@ -93,7 +95,7 @@ static void __init reserve_unity_map_for } static int __init register_exclusion_range_for_all_devices( - unsigned long base, unsigned long limit, u8 iw, u8 ir) + unsigned long base, unsigned long limit, u8 iw, u8 ir) { unsigned long range_top, iommu_top, length; struct amd_iommu *iommu; @@ -105,7 +107,7 @@ static int __init register_exclusion_ran iommu_top = max_page * PAGE_SIZE; if ( base < iommu_top ) { - if (range_top > iommu_top) + if ( range_top > iommu_top ) range_top = iommu_top; length = range_top - base; /* reserve r/w unity-mapped page entries for devices */ @@ -116,7 +118,7 @@ static int __init register_exclusion_ran base = iommu_top; } /* register IOMMU exclusion range settings */ - if (limit >= iommu_top) + if ( limit >= iommu_top ) { for_each_amd_iommu( iommu ) reserve_iommu_exclusion_range_all(iommu, base, limit); @@ -125,8 +127,8 @@ static int __init register_exclusion_ran return 0; } -static int __init register_exclusion_range_for_device(u16 bdf, - unsigned long base, unsigned long limit, u8 iw, u8 ir) +static int __init register_exclusion_range_for_device( + u16 bdf, unsigned long base, unsigned long limit, u8 iw, u8 ir) { unsigned long range_top, iommu_top, length; struct amd_iommu *iommu; @@ -147,7 +149,7 @@ static int __init register_exclusion_ran iommu_top = max_page * PAGE_SIZE; if ( base < iommu_top ) { - if (range_top > iommu_top) + if ( range_top > iommu_top ) range_top = iommu_top; length = range_top - base; /* reserve unity-mapped page entries for device */ @@ -159,8 +161,8 @@ static int __init register_exclusion_ran base = iommu_top; } - /* register IOMMU exclusion range settings for device */ - if ( limit >= iommu_top ) + /* register IOMMU exclusion range settings for device */ + if ( limit >= iommu_top ) { reserve_iommu_exclusion_range(iommu, base, limit); ivrs_mappings[bdf].dte_allow_exclusion = IOMMU_CONTROL_ENABLED; @@ -171,8 +173,8 @@ static int __init register_exclusion_ran } static int __init register_exclusion_range_for_iommu_devices( - struct amd_iommu *iommu, - unsigned long base, unsigned long limit, u8 iw, u8 ir) + struct amd_iommu *iommu, + unsigned long base, unsigned long limit, u8 iw, u8 ir) { unsigned long range_top, iommu_top, length; u16 bus, devfn, bdf, req; @@ -183,7 +185,7 @@ static int __init register_exclusion_ran iommu_top = max_page * PAGE_SIZE; if ( base < iommu_top ) { - if (range_top > iommu_top) + if ( range_top > iommu_top ) range_top = iommu_top; length = range_top - base; /* reserve r/w unity-mapped page entries for devices */ @@ -205,19 +207,19 @@ static int __init register_exclusion_ran } /* register IOMMU exclusion range settings */ - if (limit >= iommu_top) + if ( limit >= iommu_top ) reserve_iommu_exclusion_range_all(iommu, base, limit); return 0; } static int __init parse_ivmd_device_select( - struct acpi_ivmd_block_header *ivmd_block, - unsigned long base, unsigned long limit, u8 iw, u8 ir) + struct acpi_ivmd_block_header *ivmd_block, + unsigned long base, unsigned long limit, u8 iw, u8 ir) { u16 bdf; bdf = ivmd_block->header.dev_id; - if (bdf >= ivrs_bdf_entries) + if ( bdf >= ivrs_bdf_entries ) { dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf); return -ENODEV; @@ -227,44 +229,41 @@ static int __init parse_ivmd_device_sele } static int __init parse_ivmd_device_range( - struct acpi_ivmd_block_header *ivmd_block, - unsigned long base, unsigned long limit, u8 iw, u8 ir) + struct acpi_ivmd_block_header *ivmd_block, + unsigned long base, unsigned long limit, u8 iw, u8 ir) { u16 first_bdf, last_bdf, bdf; int error; first_bdf = ivmd_block->header.dev_id; - if (first_bdf >= ivrs_bdf_entries) - { - dprintk(XENLOG_ERR, "IVMD Error: " - "Invalid Range_First Dev_Id 0x%x\n", first_bdf); - return -ENODEV; + if ( first_bdf >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVMD Error: " + "Invalid Range_First Dev_Id 0x%x\n", first_bdf); + return -ENODEV; } last_bdf = ivmd_block->last_dev_id; - if (last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf) + if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) ) { dprintk(XENLOG_ERR, "IVMD Error: " - "Invalid Range_Last Dev_Id 0x%x\n", last_bdf); - return -ENODEV; - } - - dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n", - first_bdf, last_bdf); - - for ( bdf = first_bdf, error = 0; - bdf <= last_bdf && !error; ++bdf ) - { - error = register_exclusion_range_for_device( - bdf, base, limit, iw, ir); - } - - return error; + "Invalid Range_Last Dev_Id 0x%x\n", last_bdf); + return -ENODEV; + } + + dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n", + first_bdf, last_bdf); + + for ( bdf = first_bdf, error = 0; (bdf <= last_bdf) && !error; bdf++ ) + error = register_exclusion_range_for_device( + bdf, base, limit, iw, ir); + + return error; } static int __init parse_ivmd_device_iommu( - struct acpi_ivmd_block_header *ivmd_block, - unsigned long base, unsigned long limit, u8 iw, u8 ir) + struct acpi_ivmd_block_header *ivmd_block, + unsigned long base, unsigned long limit, u8 iw, u8 ir) { struct amd_iommu *iommu; @@ -273,14 +272,14 @@ static int __init parse_ivmd_device_iomm ivmd_block->cap_offset); if ( !iommu ) { - dprintk(XENLOG_ERR, - "IVMD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", - ivmd_block->header.dev_id, ivmd_block->cap_offset); - return -ENODEV; + dprintk(XENLOG_ERR, + "IVMD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", + ivmd_block->header.dev_id, ivmd_block->cap_offset); + return -ENODEV; } return register_exclusion_range_for_iommu_devices( - iommu, base, limit, iw, ir); + iommu, base, limit, iw, ir); } static int __init parse_ivmd_block(struct acpi_ivmd_block_header *ivmd_block) @@ -288,11 +287,11 @@ static int __init parse_ivmd_block(struc unsigned long start_addr, mem_length, base, limit; u8 iw, ir; - if (ivmd_block->header.length < - sizeof(struct acpi_ivmd_block_header)) - { - dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n"); - return -ENODEV; + if ( ivmd_block->header.length < + sizeof(struct acpi_ivmd_block_header) ) + { + dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n"); + return -ENODEV; } start_addr = (unsigned long)ivmd_block->start_addr; @@ -301,7 +300,7 @@ static int __init parse_ivmd_block(struc limit = (start_addr + mem_length - 1) & PAGE_MASK; dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n", - ivmd_block->header.type); + ivmd_block->header.type); dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr); dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length); @@ -322,27 +321,27 @@ static int __init parse_ivmd_block(struc } else { - dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n"); - return -ENODEV; + dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n"); + return -ENODEV; } switch( ivmd_block->header.type ) { case AMD_IOMMU_ACPI_IVMD_ALL_TYPE: return register_exclusion_range_for_all_devices( - base, limit, iw, ir); + base, limit, iw, ir); case AMD_IOMMU_ACPI_IVMD_ONE_TYPE: return parse_ivmd_device_select(ivmd_block, - base, limit, iw, ir); + base, limit, iw, ir); case AMD_IOMMU_ACPI_IVMD_RANGE_TYPE: return parse_ivmd_device_range(ivmd_block, - base, limit, iw, ir); + base, limit, iw, ir); case AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE: return parse_ivmd_device_iommu(ivmd_block, - base, limit, iw, ir); + base, limit, iw, ir); default: dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n"); @@ -350,8 +349,8 @@ static int __init parse_ivmd_block(struc } } -static u16 __init parse_ivhd_device_padding(u16 pad_length, - u16 header_length, u16 block_length) +static u16 __init parse_ivhd_device_padding( + u16 pad_length, u16 header_length, u16 block_length) { if ( header_length < (block_length + pad_length) ) { @@ -363,7 +362,7 @@ static u16 __init parse_ivhd_device_padd } static u16 __init parse_ivhd_device_select( - union acpi_ivhd_device *ivhd_device) + union acpi_ivhd_device *ivhd_device) { u16 bdf; @@ -385,8 +384,8 @@ static u16 __init parse_ivhd_device_sele } static u16 __init parse_ivhd_device_range( - union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) { u16 dev_length, first_bdf, last_bdf, bdf; u8 sys_mgt; @@ -399,7 +398,8 @@ static u16 __init parse_ivhd_device_rang } if ( ivhd_device->range.trailer.type != - AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END) { + AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) + { dprintk(XENLOG_ERR, "IVHD Error: " "Invalid Range: End_Type 0x%x\n", ivhd_device->range.trailer.type); @@ -409,35 +409,35 @@ static u16 __init parse_ivhd_device_rang first_bdf = ivhd_device->header.dev_id; if ( first_bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: First Dev_Id 0x%x\n", first_bdf); - return 0; + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + return 0; } last_bdf = ivhd_device->range.trailer.dev_id; - if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf ) - { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); - return 0; + if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); + return 0; } dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n", - first_bdf, last_bdf); + first_bdf, last_bdf); /* override flags for range of devices */ sys_mgt = get_field_from_byte(ivhd_device->header.flags, - AMD_IOMMU_ACPI_SYS_MGT_MASK, - AMD_IOMMU_ACPI_SYS_MGT_SHIFT); - for ( bdf = first_bdf; bdf <= last_bdf; ++bdf ) + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + for ( bdf = first_bdf; bdf <= last_bdf; bdf++ ) ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; return dev_length; } static u16 __init parse_ivhd_device_alias( - union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) { u16 dev_length, alias_id, bdf; @@ -445,7 +445,7 @@ static u16 __init parse_ivhd_device_alia if ( header_length < (block_length + dev_length) ) { dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device_Entry Length!\n"); + "Invalid Device_Entry Length!\n"); return 0; } @@ -460,9 +460,9 @@ static u16 __init parse_ivhd_device_alia alias_id = ivhd_device->alias.dev_id; if ( alias_id >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Alias Dev_Id 0x%x\n", alias_id); - return 0; + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Alias Dev_Id 0x%x\n", alias_id); + return 0; } dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id); @@ -470,18 +470,18 @@ static u16 __init parse_ivhd_device_alia /* override requestor_id and flags for device */ ivrs_mappings[bdf].dte_requestor_id = alias_id; ivrs_mappings[bdf].dte_sys_mgt_enable = - get_field_from_byte(ivhd_device->header.flags, - AMD_IOMMU_ACPI_SYS_MGT_MASK, - AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + get_field_from_byte(ivhd_device->header.flags, + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); ivrs_mappings[alias_id].dte_sys_mgt_enable = - ivrs_mappings[bdf].dte_sys_mgt_enable; + ivrs_mappings[bdf].dte_sys_mgt_enable; return dev_length; } static u16 __init parse_ivhd_device_alias_range( - union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) { u16 dev_length, first_bdf, last_bdf, alias_id, bdf; @@ -496,7 +496,7 @@ static u16 __init parse_ivhd_device_alia } if ( ivhd_device->alias_range.trailer.type != - AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) + AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) { dprintk(XENLOG_ERR, "IVHD Error: " "Invalid Range: End_Type 0x%x\n", @@ -536,7 +536,7 @@ static u16 __init parse_ivhd_device_alia sys_mgt = get_field_from_byte(ivhd_device->header.flags, AMD_IOMMU_ACPI_SYS_MGT_MASK, AMD_IOMMU_ACPI_SYS_MGT_SHIFT); - for ( bdf = first_bdf; bdf <= last_bdf; ++bdf ) + for ( bdf = first_bdf; bdf <= last_bdf; bdf++ ) { ivrs_mappings[bdf].dte_requestor_id = alias_id; ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; @@ -547,8 +547,8 @@ static u16 __init parse_ivhd_device_alia } static u16 __init parse_ivhd_device_extended( - union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) { u16 dev_length, bdf; @@ -578,8 +578,8 @@ static u16 __init parse_ivhd_device_exte } static u16 __init parse_ivhd_device_extended_range( - union acpi_ivhd_device *ivhd_device, - u16 header_length, u16 block_length) + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) { u16 dev_length, first_bdf, last_bdf, bdf; u8 sys_mgt; @@ -593,7 +593,7 @@ static u16 __init parse_ivhd_device_exte } if ( ivhd_device->extended_range.trailer.type != - AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) + AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) { dprintk(XENLOG_ERR, "IVHD Error: " "Invalid Range: End_Type 0x%x\n", @@ -604,13 +604,13 @@ static u16 __init parse_ivhd_device_exte first_bdf = ivhd_device->header.dev_id; if ( first_bdf >= ivrs_bdf_entries ) { - dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Range: First Dev_Id 0x%x\n", first_bdf); - return 0; + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + return 0; } last_bdf = ivhd_device->extended_range.trailer.dev_id; - if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf ) + if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) ) { dprintk(XENLOG_ERR, "IVHD Error: " "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); @@ -624,7 +624,7 @@ static u16 __init parse_ivhd_device_exte sys_mgt = get_field_from_byte(ivhd_device->header.flags, AMD_IOMMU_ACPI_SYS_MGT_MASK, AMD_IOMMU_ACPI_SYS_MGT_SHIFT); - for ( bdf = first_bdf; bdf <= last_bdf; ++bdf ) + for ( bdf = first_bdf; bdf <= last_bdf; bdf++ ) ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; return dev_length; @@ -637,20 +637,20 @@ static int __init parse_ivhd_block(struc struct amd_iommu *iommu; if ( ivhd_block->header.length < - sizeof(struct acpi_ivhd_block_header) ) + sizeof(struct acpi_ivhd_block_header) ) { dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n"); return -ENODEV; } iommu = find_iommu_from_bdf_cap(ivhd_block->header.dev_id, - ivhd_block->cap_offset); + ivhd_block->cap_offset); if ( !iommu ) { dprintk(XENLOG_ERR, "IVHD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", ivhd_block->header.dev_id, ivhd_block->cap_offset); - return -ENODEV; + return -ENODEV; } dprintk(XENLOG_INFO, "IVHD Block:\n"); @@ -668,29 +668,29 @@ static int __init parse_ivhd_block(struc AMD_IOMMU_ACPI_COHERENT_MASK, AMD_IOMMU_ACPI_COHERENT_SHIFT); iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_IOTLB_SUP_MASK, - AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT); + AMD_IOMMU_ACPI_IOTLB_SUP_MASK, + AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT); iommu->isochronous = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_ISOC_MASK, - AMD_IOMMU_ACPI_ISOC_SHIFT); + AMD_IOMMU_ACPI_ISOC_MASK, + AMD_IOMMU_ACPI_ISOC_SHIFT); iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_RES_PASS_PW_MASK, - AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT); + AMD_IOMMU_ACPI_RES_PASS_PW_MASK, + AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT); iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags, - AMD_IOMMU_ACPI_PASS_PW_MASK, - AMD_IOMMU_ACPI_PASS_PW_SHIFT); + AMD_IOMMU_ACPI_PASS_PW_MASK, + AMD_IOMMU_ACPI_PASS_PW_SHIFT); iommu->ht_tunnel_enable = get_field_from_byte( - ivhd_block->header.flags, - AMD_IOMMU_ACPI_HT_TUN_ENB_MASK, - AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT); + ivhd_block->header.flags, + AMD_IOMMU_ACPI_HT_TUN_ENB_MASK, + AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT); /* parse Device Entries */ block_length = sizeof(struct acpi_ivhd_block_header); - while( ivhd_block->header.length >= - (block_length + sizeof(struct acpi_ivhd_device_header)) ) + while ( ivhd_block->header.length >= + (block_length + sizeof(struct acpi_ivhd_device_header)) ) { ivhd_device = (union acpi_ivhd_device *) - ((u8 *)ivhd_block + block_length); + ((u8 *)ivhd_block + block_length); dprintk(XENLOG_INFO, "IVHD Device Entry:\n"); dprintk(XENLOG_INFO, " Type 0x%x\n", @@ -700,7 +700,7 @@ static int __init parse_ivhd_block(struc dprintk(XENLOG_INFO, " Flags 0x%x\n", ivhd_device->header.flags); - switch( ivhd_device->header.type ) + switch ( ivhd_device->header.type ) { case AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD: dev_length = parse_ivhd_device_padding( @@ -716,7 +716,8 @@ static int __init parse_ivhd_block(struc dev_length = parse_ivhd_device_select(ivhd_device); break; case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START: - dev_length = parse_ivhd_device_range(ivhd_device, + dev_length = parse_ivhd_device_range( + ivhd_device, ivhd_block->header.length, block_length); break; case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT: @@ -741,7 +742,7 @@ static int __init parse_ivhd_block(struc break; default: dprintk(XENLOG_ERR, "IVHD Error: " - "Invalid Device Type!\n"); + "Invalid Device Type!\n"); dev_length = 0; break; } @@ -759,7 +760,7 @@ static int __init parse_ivrs_block(struc struct acpi_ivhd_block_header *ivhd_block; struct acpi_ivmd_block_header *ivmd_block; - switch(ivrs_block->type) + switch ( ivrs_block->type ) { case AMD_IOMMU_ACPI_IVHD_TYPE: ivhd_block = (struct acpi_ivhd_block_header *)ivrs_block; @@ -786,7 +787,7 @@ void __init dump_acpi_table_header(struc printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n"); printk(XENLOG_INFO " Signature "); - for ( i = 0; i < ACPI_NAME_SIZE; ++i ) + for ( i = 0; i < ACPI_NAME_SIZE; i++ ) printk("%c", table->signature[i]); printk("\n"); @@ -795,28 +796,27 @@ void __init dump_acpi_table_header(struc printk(" CheckSum 0x%x\n", table->checksum); printk(" OEM_Id "); - for ( i = 0; i < ACPI_OEM_ID_SIZE; ++i ) + for ( i = 0; i < ACPI_OEM_ID_SIZE; i++ ) printk("%c", table->oem_id[i]); printk("\n"); printk(" OEM_Table_Id "); - for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; ++i ) + for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; i++ ) printk("%c", table->oem_table_id[i]); printk("\n"); printk(" OEM_Revision 0x%x\n", table->oem_revision); printk(" Creator_Id "); - for ( i = 0; i < ACPI_NAME_SIZE; ++i ) + for ( i = 0; i < ACPI_NAME_SIZE; i++ ) printk("%c", table->asl_compiler_id[i]); printk("\n"); printk(" Creator_Revision 0x%x\n", - table->asl_compiler_revision); -} - -int __init parse_ivrs_table(unsigned long phys_addr, - unsigned long size) + table->asl_compiler_revision); +} + +int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size) { struct acpi_ivrs_block_header *ivrs_block; unsigned long length, i; @@ -834,7 +834,7 @@ int __init parse_ivrs_table(unsigned lon /* validate checksum: sum of entire table == 0 */ checksum = 0; raw_table = (u8 *)table; - for ( i = 0; i < table->length; ++i ) + for ( i = 0; i < table->length; i++ ) checksum += raw_table[i]; if ( checksum ) { @@ -845,11 +845,10 @@ int __init parse_ivrs_table(unsigned lon /* parse IVRS blocks */ length = sizeof(struct acpi_ivrs_table_header); - while( error == 0 && table->length > - (length + sizeof(struct acpi_ivrs_block_header)) ) + while ( (error == 0) && (table->length > (length + sizeof(*ivrs_block))) ) { ivrs_block = (struct acpi_ivrs_block_header *) - ((u8 *)table + length); + ((u8 *)table + length); dprintk(XENLOG_INFO, "IVRS Block:\n"); dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type); @@ -857,16 +856,16 @@ int __init parse_ivrs_table(unsigned lon dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length); dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id); - if (table->length >= (length + ivrs_block->length)) - error = parse_ivrs_block(ivrs_block); - else + if ( table->length < (length + ivrs_block->length) ) { - dprintk(XENLOG_ERR, "IVRS Error: " - "Table Length Exceeded: 0x%x -> 0x%lx\n", - table->length, - (length + ivrs_block->length)); - return -ENODEV; + dprintk(XENLOG_ERR, "IVRS Error: " + "Table Length Exceeded: 0x%x -> 0x%lx\n", + table->length, + (length + ivrs_block->length)); + return -ENODEV; } + + error = parse_ivrs_block(ivrs_block); length += ivrs_block->length; } diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/amd/iommu_detect.c --- a/xen/drivers/passthrough/amd/iommu_detect.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_detect.c Thu Mar 20 12:35:40 2008 -0600 @@ -20,14 +20,14 @@ #include <xen/config.h> #include <xen/errno.h> -#include <asm/iommu.h> +#include <xen/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> #include "../pci-direct.h" #include "../pci_regs.h" -static int __init valid_bridge_bus_config(int bus, int dev, int func, - int *sec_bus, int *sub_bus) +static int __init valid_bridge_bus_config( + int bus, int dev, int func, int *sec_bus, int *sub_bus) { int pri_bus; @@ -35,7 +35,7 @@ static int __init valid_bridge_bus_confi *sec_bus = read_pci_config_byte(bus, dev, func, PCI_SECONDARY_BUS); *sub_bus = read_pci_config_byte(bus, dev, func, PCI_SUBORDINATE_BUS); - return ( pri_bus == bus && *sec_bus > bus && *sub_bus >= *sec_bus ); + return ((pri_bus == bus) && (*sec_bus > bus) && (*sub_bus >= *sec_bus)); } int __init get_iommu_last_downstream_bus(struct amd_iommu *iommu) @@ -49,9 +49,11 @@ int __init get_iommu_last_downstream_bus iommu->downstream_bus_present[bus] = 1; dev = PCI_SLOT(iommu->first_devfn); multi_func = PCI_FUNC(iommu->first_devfn) > 0; - for ( devfn = iommu->first_devfn; devfn <= iommu->last_devfn; ++devfn ) { + for ( devfn = iommu->first_devfn; devfn <= iommu->last_devfn; devfn++ ) + { /* skipping to next device#? */ - if ( dev != PCI_SLOT(devfn) ) { + if ( dev != PCI_SLOT(devfn) ) + { dev = PCI_SLOT(devfn); multi_func = 0; } @@ -62,14 +64,15 @@ int __init get_iommu_last_downstream_bus continue; hdr_type = read_pci_config_byte(bus, dev, func, - PCI_HEADER_TYPE); + PCI_HEADER_TYPE); if ( func == 0 ) multi_func = IS_PCI_MULTI_FUNCTION(hdr_type); if ( (func == 0 || multi_func) && - IS_PCI_TYPE1_HEADER(hdr_type) ) { - if (!valid_bridge_bus_config(bus, dev, func, - &sec_bus, &sub_bus)) + IS_PCI_TYPE1_HEADER(hdr_type) ) + { + if ( !valid_bridge_bus_config(bus, dev, func, + &sec_bus, &sub_bus) ) return -ENODEV; if ( sub_bus > iommu->last_downstream_bus ) @@ -84,18 +87,18 @@ int __init get_iommu_last_downstream_bus } int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr, - struct amd_iommu *iommu) + struct amd_iommu *iommu) { u32 cap_header, cap_range, misc_info; u64 mmio_bar; - mmio_bar = (u64)read_pci_config(bus, dev, func, - cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32; + mmio_bar = (u64)read_pci_config( + bus, dev, func, cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32; mmio_bar |= read_pci_config(bus, dev, func, - cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET); + cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET); iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF; - if ( (mmio_bar & 0x1) == 0 || iommu->mmio_base_phys == 0 ) + if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) ) { dprintk(XENLOG_ERR , "AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar); @@ -106,42 +109,37 @@ int __init get_iommu_capabilities(u8 bus iommu->cap_offset = cap_ptr; cap_header = read_pci_config(bus, dev, func, cap_ptr); - iommu->revision = get_field_from_reg_u32(cap_header, - PCI_CAP_REV_MASK, PCI_CAP_REV_SHIFT); - iommu->iotlb_support = get_field_from_reg_u32(cap_header, - PCI_CAP_IOTLB_MASK, PCI_CAP_IOTLB_SHIFT); - iommu->ht_tunnel_support = get_field_from_reg_u32(cap_header, - PCI_CAP_HT_TUNNEL_MASK, - PCI_CAP_HT_TUNNEL_SHIFT); - iommu->pte_not_present_cached = get_field_from_reg_u32(cap_header, - PCI_CAP_NP_CACHE_MASK, - PCI_CAP_NP_CACHE_SHIFT); + iommu->revision = get_field_from_reg_u32( + cap_header, PCI_CAP_REV_MASK, PCI_CAP_REV_SHIFT); + iommu->iotlb_support = get_field_from_reg_u32( + cap_header, PCI_CAP_IOTLB_MASK, PCI_CAP_IOTLB_SHIFT); + iommu->ht_tunnel_support = get_field_from_reg_u32( + cap_header, PCI_CAP_HT_TUNNEL_MASK, PCI_CAP_HT_TUNNEL_SHIFT); + iommu->pte_not_present_cached = get_field_from_reg_u32( + cap_header, PCI_CAP_NP_CACHE_MASK, PCI_CAP_NP_CACHE_SHIFT); cap_range = read_pci_config(bus, dev, func, - cap_ptr + PCI_CAP_RANGE_OFFSET); - iommu->unit_id = get_field_from_reg_u32(cap_range, - PCI_CAP_UNIT_ID_MASK, - PCI_CAP_UNIT_ID_SHIFT); - iommu->root_bus = get_field_from_reg_u32(cap_range, - PCI_CAP_BUS_NUMBER_MASK, - PCI_CAP_BUS_NUMBER_SHIFT); - iommu->first_devfn = get_field_from_reg_u32(cap_range, - PCI_CAP_FIRST_DEVICE_MASK, - PCI_CAP_FIRST_DEVICE_SHIFT); - iommu->last_devfn = get_field_from_reg_u32(cap_range, - PCI_CAP_LAST_DEVICE_MASK, - PCI_CAP_LAST_DEVICE_SHIFT); + cap_ptr + PCI_CAP_RANGE_OFFSET); + iommu->unit_id = get_field_from_reg_u32( + cap_range, PCI_CAP_UNIT_ID_MASK, PCI_CAP_UNIT_ID_SHIFT); + iommu->root_bus = get_field_from_reg_u32( + cap_range, PCI_CAP_BUS_NUMBER_MASK, PCI_CAP_BUS_NUMBER_SHIFT); + iommu->first_devfn = get_field_from_reg_u32( + cap_range, PCI_CAP_FIRST_DEVICE_MASK, PCI_CAP_FIRST_DEVICE_SHIFT); + iommu->last_devfn = get_field_from_reg_u32( + cap_range, PCI_CAP_LAST_DEVICE_MASK, PCI_CAP_LAST_DEVICE_SHIFT); misc_info = read_pci_config(bus, dev, func, - cap_ptr + PCI_MISC_INFO_OFFSET); - iommu->msi_number = get_field_from_reg_u32(misc_info, - PCI_CAP_MSI_NUMBER_MASK, - PCI_CAP_MSI_NUMBER_SHIFT); + cap_ptr + PCI_MISC_INFO_OFFSET); + iommu->msi_number = get_field_from_reg_u32( + misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT); + return 0; } -static int __init scan_caps_for_iommu(int bus, int dev, int func, - iommu_detect_callback_ptr_t iommu_detect_callback) +static int __init scan_caps_for_iommu( + int bus, int dev, int func, + iommu_detect_callback_ptr_t iommu_detect_callback) { int cap_ptr, cap_id, cap_type; u32 cap_header; @@ -149,32 +147,35 @@ static int __init scan_caps_for_iommu(in count = 0; cap_ptr = read_pci_config_byte(bus, dev, func, - PCI_CAPABILITY_LIST); - while ( cap_ptr >= PCI_MIN_CAP_OFFSET && - count < PCI_MAX_CAP_BLOCKS && !error ) { + PCI_CAPABILITY_LIST); + while ( (cap_ptr >= PCI_MIN_CAP_OFFSET) && + (count < PCI_MAX_CAP_BLOCKS) && + !error ) + { cap_ptr &= PCI_CAP_PTR_MASK; cap_header = read_pci_config(bus, dev, func, cap_ptr); - cap_id = get_field_from_reg_u32(cap_header, - PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT); - - if ( cap_id == PCI_CAP_ID_SECURE_DEVICE ) { - cap_type = get_field_from_reg_u32(cap_header, - PCI_CAP_TYPE_MASK, PCI_CAP_TYPE_SHIFT); - if ( cap_type == PCI_CAP_TYPE_IOMMU ) { + cap_id = get_field_from_reg_u32( + cap_header, PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT); + + if ( cap_id == PCI_CAP_ID_SECURE_DEVICE ) + { + cap_type = get_field_from_reg_u32( + cap_header, PCI_CAP_TYPE_MASK, PCI_CAP_TYPE_SHIFT); + if ( cap_type == PCI_CAP_TYPE_IOMMU ) error = iommu_detect_callback( - bus, dev, func, cap_ptr); - } - } - - cap_ptr = get_field_from_reg_u32(cap_header, - PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT); - ++count; } - - return error; -} - -static int __init scan_functions_for_iommu(int bus, int dev, - iommu_detect_callback_ptr_t iommu_detect_callback) + bus, dev, func, cap_ptr); + } + + cap_ptr = get_field_from_reg_u32( + cap_header, PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT); + count++; + } + + return error; +} + +static int __init scan_functions_for_iommu( + int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback) { int func, hdr_type; int count, error = 0; @@ -182,19 +183,20 @@ static int __init scan_functions_for_iom func = 0; count = 1; while ( VALID_PCI_VENDOR_ID(read_pci_config_16(bus, dev, func, - PCI_VENDOR_ID)) && !error && func < count ) { + PCI_VENDOR_ID)) && + !error && (func < count) ) + { hdr_type = read_pci_config_byte(bus, dev, func, - PCI_HEADER_TYPE); + PCI_HEADER_TYPE); if ( func == 0 && IS_PCI_MULTI_FUNCTION(hdr_type) ) count = PCI_MAX_FUNC_COUNT; if ( IS_PCI_TYPE0_HEADER(hdr_type) || - IS_PCI_TYPE1_HEADER(hdr_type) ) { - error = scan_caps_for_iommu(bus, dev, func, - iommu_detect_callback); - } - ++func; + IS_PCI_TYPE1_HEADER(hdr_type) ) + error = scan_caps_for_iommu(bus, dev, func, + iommu_detect_callback); + func++; } return error; @@ -205,13 +207,11 @@ int __init scan_for_iommu(iommu_detect_c { int bus, dev, error = 0; - for ( bus = 0; bus < PCI_MAX_BUS_COUNT && !error; ++bus ) { - for ( dev = 0; dev < PCI_MAX_DEV_COUNT && !error; ++dev ) { - error = scan_functions_for_iommu(bus, dev, - iommu_detect_callback); - } - } - - return error; -} - + for ( bus = 0; bus < PCI_MAX_BUS_COUNT && !error; ++bus ) + for ( dev = 0; dev < PCI_MAX_DEV_COUNT && !error; ++dev ) + error = scan_functions_for_iommu(bus, dev, + iommu_detect_callback); + + return error; +} + diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_init.c Thu Mar 20 12:35:40 2008 -0600 @@ -32,26 +32,28 @@ int __init map_iommu_mmio_region(struct { unsigned long mfn; - if ( nr_amd_iommus > MAX_AMD_IOMMUS ) { + if ( nr_amd_iommus > MAX_AMD_IOMMUS ) + { gdprintk(XENLOG_ERR, - "IOMMU: nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus); + "IOMMU: nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus); return -ENOMEM; } - iommu->mmio_base = (void *) fix_to_virt(FIX_IOMMU_MMIO_BASE_0 + - nr_amd_iommus * MMIO_PAGES_PER_IOMMU); - mfn = (unsigned long)iommu->mmio_base_phys >> PAGE_SHIFT; + iommu->mmio_base = (void *)fix_to_virt( + FIX_IOMMU_MMIO_BASE_0 + nr_amd_iommus * MMIO_PAGES_PER_IOMMU); + mfn = (unsigned long)(iommu->mmio_base_phys >> PAGE_SHIFT); map_pages_to_xen((unsigned long)iommu->mmio_base, mfn, - MMIO_PAGES_PER_IOMMU, PAGE_HYPERVISOR_NOCACHE); + MMIO_PAGES_PER_IOMMU, PAGE_HYPERVISOR_NOCACHE); - memset((u8*)iommu->mmio_base, 0, IOMMU_MMIO_REGION_LENGTH); + memset(iommu->mmio_base, 0, IOMMU_MMIO_REGION_LENGTH); return 0; } void __init unmap_iommu_mmio_region(struct amd_iommu *iommu) { - if ( iommu->mmio_base ) { + if ( iommu->mmio_base ) + { iounmap(iommu->mmio_base); iommu->mmio_base = NULL; } @@ -67,16 +69,16 @@ void __init register_iommu_dev_table_in_ addr_hi = addr_64 >> 32; set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_DEV_TABLE_BASE_LOW_MASK, - IOMMU_DEV_TABLE_BASE_LOW_SHIFT, &entry); + IOMMU_DEV_TABLE_BASE_LOW_MASK, + IOMMU_DEV_TABLE_BASE_LOW_SHIFT, &entry); set_field_in_reg_u32((iommu->dev_table.alloc_size / PAGE_SIZE) - 1, - entry, IOMMU_DEV_TABLE_SIZE_MASK, - IOMMU_DEV_TABLE_SIZE_SHIFT, &entry); + entry, IOMMU_DEV_TABLE_SIZE_MASK, + IOMMU_DEV_TABLE_SIZE_SHIFT, &entry); writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_LOW_OFFSET); set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_DEV_TABLE_BASE_HIGH_MASK, - IOMMU_DEV_TABLE_BASE_HIGH_SHIFT, &entry); + IOMMU_DEV_TABLE_BASE_HIGH_MASK, + IOMMU_DEV_TABLE_BASE_HIGH_SHIFT, &entry); writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_HIGH_OFFSET); } @@ -91,49 +93,49 @@ void __init register_iommu_cmd_buffer_in addr_hi = addr_64 >> 32; set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_CMD_BUFFER_BASE_LOW_MASK, - IOMMU_CMD_BUFFER_BASE_LOW_SHIFT, &entry); + IOMMU_CMD_BUFFER_BASE_LOW_MASK, + IOMMU_CMD_BUFFER_BASE_LOW_SHIFT, &entry); writel(entry, iommu->mmio_base + IOMMU_CMD_BUFFER_BASE_LOW_OFFSET); power_of2_entries = get_order_from_bytes(iommu->cmd_buffer.alloc_size) + IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE; set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_CMD_BUFFER_BASE_HIGH_MASK, - IOMMU_CMD_BUFFER_BASE_HIGH_SHIFT, &entry); + IOMMU_CMD_BUFFER_BASE_HIGH_MASK, + IOMMU_CMD_BUFFER_BASE_HIGH_SHIFT, &entry); set_field_in_reg_u32(power_of2_entries, entry, - IOMMU_CMD_BUFFER_LENGTH_MASK, - IOMMU_CMD_BUFFER_LENGTH_SHIFT, &entry); + IOMMU_CMD_BUFFER_LENGTH_MASK, + IOMMU_CMD_BUFFER_LENGTH_SHIFT, &entry); writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET); } static void __init set_iommu_translation_control(struct amd_iommu *iommu, - int enable) + int enable) { u32 entry; entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); set_field_in_reg_u32(iommu->ht_tunnel_support ? IOMMU_CONTROL_ENABLED : - IOMMU_CONTROL_ENABLED, entry, - IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK, - IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT, &entry); + IOMMU_CONTROL_ENABLED, entry, + IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK, + IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT, &entry); set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED : - IOMMU_CONTROL_ENABLED, entry, - IOMMU_CONTROL_TRANSLATION_ENABLE_MASK, - IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT, &entry); + IOMMU_CONTROL_ENABLED, entry, + IOMMU_CONTROL_TRANSLATION_ENABLE_MASK, + IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT, &entry); writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); } static void __init set_iommu_command_buffer_control(struct amd_iommu *iommu, - int enable) + int enable) { u32 entry; entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED : - IOMMU_CONTROL_ENABLED, entry, - IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK, - IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT, &entry); + IOMMU_CONTROL_ENABLED, entry, + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK, + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT, &entry); writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); } @@ -146,34 +148,34 @@ static void __init register_iommu_exclus addr_hi = iommu->exclusion_limit >> 32; set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_EXCLUSION_LIMIT_HIGH_MASK, - IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT, &entry); + IOMMU_EXCLUSION_LIMIT_HIGH_MASK, + IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT, &entry); writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_HIGH_OFFSET); set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_EXCLUSION_LIMIT_LOW_MASK, - IOMMU_EXCLUSION_LIMIT_LOW_SHIFT, &entry); + IOMMU_EXCLUSION_LIMIT_LOW_MASK, + IOMMU_EXCLUSION_LIMIT_LOW_SHIFT, &entry); writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_LOW_OFFSET); addr_lo = iommu->exclusion_base & DMA_32BIT_MASK; addr_hi = iommu->exclusion_base >> 32; set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_EXCLUSION_BASE_HIGH_MASK, - IOMMU_EXCLUSION_BASE_HIGH_SHIFT, &entry); + IOMMU_EXCLUSION_BASE_HIGH_MASK, + IOMMU_EXCLUSION_BASE_HIGH_SHIFT, &entry); writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_HIGH_OFFSET); set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_EXCLUSION_BASE_LOW_MASK, - IOMMU_EXCLUSION_BASE_LOW_SHIFT, &entry); + IOMMU_EXCLUSION_BASE_LOW_MASK, + IOMMU_EXCLUSION_BASE_LOW_SHIFT, &entry); set_field_in_reg_u32(iommu->exclusion_allow_all, entry, - IOMMU_EXCLUSION_ALLOW_ALL_MASK, - IOMMU_EXCLUSION_ALLOW_ALL_SHIFT, &entry); + IOMMU_EXCLUSION_ALLOW_ALL_MASK, + IOMMU_EXCLUSION_ALLOW_ALL_SHIFT, &entry); set_field_in_reg_u32(iommu->exclusion_enable, entry, - IOMMU_EXCLUSION_RANGE_ENABLE_MASK, - IOMMU_EXCLUSION_RANGE_ENABLE_SHIFT, &entry); + IOMMU_EXCLUSION_RANGE_ENABLE_MASK, + IOMMU_EXCLUSION_RANGE_ENABLE_SHIFT, &entry); writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET); } @@ -184,5 +186,3 @@ void __init enable_iommu(struct amd_iomm set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus); } - - diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/amd/iommu_map.c Thu Mar 20 12:35:40 2008 -0600 @@ -19,7 +19,7 @@ */ #include <xen/sched.h> -#include <asm/hvm/iommu.h> +#include <xen/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -132,7 +132,8 @@ void flush_command_buffer(struct amd_iom send_iommu_command(iommu, cmd); /* wait for 'ComWaitInt' to signal comp#endifletion? */ - if ( amd_iommu_poll_comp_wait ) { + if ( amd_iommu_poll_comp_wait ) + { loop_count = amd_iommu_poll_comp_wait; do { status = readl(iommu->mmio_base + @@ -152,8 +153,10 @@ void flush_command_buffer(struct amd_iom IOMMU_STATUS_MMIO_OFFSET); } else + { dprintk(XENLOG_WARNING, "AMD IOMMU: Warning:" " ComWaitInt bit did not assert!\n"); + } } } @@ -234,7 +237,7 @@ static void amd_iommu_set_page_directory } void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u16 domain_id, - u8 sys_mgt, u8 dev_ex, u8 paging_mode) + u8 sys_mgt, u8 dev_ex, u8 paging_mode) { u64 addr_hi, addr_lo; u32 entry; @@ -397,7 +400,7 @@ int amd_iommu_map_page(struct domain *d, spin_lock_irqsave(&hd->mapping_lock, flags); pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn); - if ( pte == 0 ) + if ( pte == NULL ) { dprintk(XENLOG_ERR, "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn); @@ -428,7 +431,7 @@ int amd_iommu_unmap_page(struct domain * spin_lock_irqsave(&hd->mapping_lock, flags); pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn); - if ( pte == 0 ) + if ( pte == NULL ) { dprintk(XENLOG_ERR, "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn); @@ -441,7 +444,7 @@ int amd_iommu_unmap_page(struct domain * spin_unlock_irqrestore(&hd->mapping_lock, flags); /* send INVALIDATE_IOMMU_PAGES command */ - for_each_amd_iommu(iommu) + for_each_amd_iommu ( iommu ) { spin_lock_irqsave(&iommu->lock, flags); invalidate_iommu_page(iommu, io_addr, requestor_id); @@ -453,9 +456,9 @@ int amd_iommu_unmap_page(struct domain * } int amd_iommu_reserve_domain_unity_map( - struct domain *domain, - unsigned long phys_addr, - unsigned long size, int iw, int ir) + struct domain *domain, + unsigned long phys_addr, + unsigned long size, int iw, int ir) { unsigned long flags, npages, i; void *pte; @@ -466,17 +469,18 @@ int amd_iommu_reserve_domain_unity_map( spin_lock_irqsave(&hd->mapping_lock, flags); for ( i = 0; i < npages; ++i ) { - pte = get_pte_from_page_tables(hd->root_table, - hd->paging_mode, phys_addr>>PAGE_SHIFT); - if ( pte == 0 ) + pte = get_pte_from_page_tables( + hd->root_table, hd->paging_mode, phys_addr >> PAGE_SHIFT); + if ( pte == NULL ) { dprintk(XENLOG_ERR, - "AMD IOMMU: Invalid IO pagetable entry phys_addr = %lx\n", phys_addr); + "AMD IOMMU: Invalid IO pagetable entry " + "phys_addr = %lx\n", phys_addr); spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } set_page_table_entry_present((u32 *)pte, - phys_addr, iw, ir); + phys_addr, iw, ir); phys_addr += PAGE_SIZE; } spin_unlock_irqrestore(&hd->mapping_lock, flags); diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Mar 20 12:35:40 2008 -0600 @@ -168,7 +168,7 @@ int iommu_detect_callback(u8 bus, u8 dev list_add_tail(&iommu->list, &amd_iommu_head); /* allocate resources for this IOMMU */ - if (allocate_iommu_resources(iommu) != 0) + if ( allocate_iommu_resources(iommu) != 0 ) goto error_out; return 0; @@ -208,7 +208,7 @@ static int __init amd_iommu_init(void) } /* assign default values for device entries */ - for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) + for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) { ivrs_mappings[bdf].dte_requestor_id = bdf; ivrs_mappings[bdf].dte_sys_mgt_enable = @@ -288,7 +288,8 @@ void amd_iommu_setup_domain_device( sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable; dev_ex = ivrs_mappings[req_id].dte_allow_exclusion; amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr, - req_id, sys_mgt, dev_ex, hd->paging_mode); + req_id, sys_mgt, dev_ex, + hd->paging_mode); invalidate_dev_table_entry(iommu, req_id); flush_command_buffer(iommu); @@ -317,8 +318,8 @@ void __init amd_iommu_setup_dom0_devices { l = read_pci_config(bus, dev, func, PCI_VENDOR_ID); /* some broken boards return 0 or ~0 if a slot is empty: */ - if ( l == 0xffffffff || l == 0x00000000 || - l == 0x0000ffff || l == 0xffff0000 ) + if ( (l == 0xffffffff) || (l == 0x00000000) || + (l == 0x0000ffff) || (l == 0xffff0000) ) continue; pdev = xmalloc(struct pci_dev); @@ -368,22 +369,22 @@ int amd_iommu_detect(void) /* allocate 'ivrs mappings' table */ /* note: the table has entries to accomodate all IOMMUs */ last_bus = 0; - for_each_amd_iommu (iommu) - if (iommu->last_downstream_bus > last_bus) - last_bus = iommu->last_downstream_bus; + for_each_amd_iommu ( iommu ) + if ( iommu->last_downstream_bus > last_bus ) + last_bus = iommu->last_downstream_bus; ivrs_bdf_entries = (last_bus + 1) * - IOMMU_DEV_TABLE_ENTRIES_PER_BUS; + IOMMU_DEV_TABLE_ENTRIES_PER_BUS; ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries); if ( !ivrs_mappings ) { dprintk(XENLOG_ERR, "AMD IOMMU:" - " Error allocating IVRS DevMappings table\n"); + " Error allocating IVRS DevMappings table\n"); goto error_out; } memset(ivrs_mappings, 0, - ivrs_bdf_entries * sizeof(struct ivrs_mappings)); + ivrs_bdf_entries * sizeof(struct ivrs_mappings)); } if ( amd_iommu_init() != 0 ) @@ -424,6 +425,7 @@ static int allocate_domain_resources(str spin_unlock_irqrestore(&hd->mapping_lock, flags); return 0; + error_out: spin_unlock_irqrestore(&hd->mapping_lock, flags); return -ENOMEM; @@ -433,7 +435,7 @@ static int get_paging_mode(unsigned long { int level = 1; - BUG_ON ( !max_page ); + BUG_ON(!max_page); if ( entries > max_page ) entries = max_page; @@ -441,8 +443,7 @@ static int get_paging_mode(unsigned long while ( entries > PTE_PER_TABLE_SIZE ) { entries = PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT; - ++level; - if ( level > 6 ) + if ( ++level > 6 ) return -ENOMEM; } @@ -509,7 +510,7 @@ static int reassign_device( struct domai int bdf; unsigned long flags; - for_each_pdev( source, pdev ) + for_each_pdev ( source, pdev ) { if ( (pdev->bus != bus) || (pdev->devfn != devfn) ) continue; @@ -522,23 +523,7 @@ static int reassign_device( struct domai iommu = (bdf < ivrs_bdf_entries) ? find_iommu_for_device(bus, pdev->devfn) : NULL; - if ( iommu ) - { - amd_iommu_disable_domain_device(source, iommu, bdf); - /* Move pci device from the source domain to target domain. */ - spin_lock_irqsave(&source_hd->iommu_list_lock, flags); - spin_lock_irqsave(&target_hd->iommu_list_lock, flags); - list_move(&pdev->list, &target_hd->pdev_list); - spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); - spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); - - amd_iommu_setup_domain_device(target, iommu, bdf); - gdprintk(XENLOG_INFO , - "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - source->domain_id, target->domain_id); - } - else + if ( !iommu ) { gdprintk(XENLOG_ERR , "AMD IOMMU: fail to find iommu." " %x:%x.%x cannot be assigned to domain %d\n", @@ -546,6 +531,20 @@ static int reassign_device( struct domai return -ENODEV; } + amd_iommu_disable_domain_device(source, iommu, bdf); + /* Move pci device from the source domain to target domain. */ + spin_lock_irqsave(&source_hd->iommu_list_lock, flags); + spin_lock_irqsave(&target_hd->iommu_list_lock, flags); + list_move(&pdev->list, &target_hd->pdev_list); + spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); + spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); + + amd_iommu_setup_domain_device(target, iommu, bdf); + gdprintk(XENLOG_INFO , + "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + source->domain_id, target->domain_id); + break; } return 0; @@ -557,9 +556,10 @@ int amd_iommu_assign_device(struct domai int req_id; req_id = ivrs_mappings[bdf].dte_requestor_id; - if (ivrs_mappings[req_id].unity_map_enable) - { - amd_iommu_reserve_domain_unity_map(d, + if ( ivrs_mappings[req_id].unity_map_enable ) + { + amd_iommu_reserve_domain_unity_map( + d, ivrs_mappings[req_id].addr_range_start, ivrs_mappings[req_id].addr_range_length, ivrs_mappings[req_id].write_permission, @@ -606,7 +606,7 @@ static void deallocate_next_page_table(v { deallocate_next_page_table(next_table, next_index, next_level); - ++next_index; + next_index++; } while (next_index < PTE_PER_TABLE_SIZE); } @@ -622,11 +622,12 @@ static void deallocate_iommu_page_tables if ( hd ->root_table ) { index = 0; + do { deallocate_next_page_table(hd->root_table, index, hd->paging_mode); - ++index; + index++; } while ( index < PTE_PER_TABLE_SIZE ); free_xenheap_page(hd ->root_table); @@ -644,7 +645,8 @@ void amd_iommu_domain_destroy(struct dom release_domain_devices(d); } -void amd_iommu_return_device(struct domain *s, struct domain *t, u8 bus, u8 devfn) +void amd_iommu_return_device( + struct domain *s, struct domain *t, u8 bus, u8 devfn) { pdev_flr(bus, devfn); reassign_device(s, t, bus, devfn); diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/io.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/io.c Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> + * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx> + */ + +#include <xen/event.h> +#include <xen/iommu.h> + +static void pt_irq_time_out(void *data) +{ + struct hvm_mirq_dpci_mapping *irq_map = data; + unsigned int guest_gsi, machine_gsi = 0; + struct hvm_irq_dpci *dpci = irq_map->dom->arch.hvm_domain.irq.dpci; + struct dev_intx_gsi_link *digl; + uint32_t device, intx; + + list_for_each_entry ( digl, &irq_map->digl_list, list ) + { + guest_gsi = digl->gsi; + machine_gsi = dpci->girq[guest_gsi].machine_gsi; + device = digl->device; + intx = digl->intx; + hvm_pci_intx_deassert(irq_map->dom, device, intx); + } + + clear_bit(machine_gsi, dpci->dirq_mask); + stop_timer(&dpci->hvm_timer[irq_to_vector(machine_gsi)]); + spin_lock(&dpci->dirq_lock); + dpci->mirq[machine_gsi].pending = 0; + spin_unlock(&dpci->dirq_lock); + pirq_guest_eoi(irq_map->dom, machine_gsi); +} + +int pt_irq_create_bind_vtd( + struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) +{ + struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + uint32_t machine_gsi, guest_gsi; + uint32_t device, intx, link; + struct dev_intx_gsi_link *digl; + + if ( hvm_irq_dpci == NULL ) + { + hvm_irq_dpci = xmalloc(struct hvm_irq_dpci); + if ( hvm_irq_dpci == NULL ) + return -ENOMEM; + + memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci)); + spin_lock_init(&hvm_irq_dpci->dirq_lock); + for ( int i = 0; i < NR_IRQS; i++ ) + INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list); + + if ( cmpxchg((unsigned long *)&d->arch.hvm_domain.irq.dpci, + 0, (unsigned long)hvm_irq_dpci) != 0 ) + xfree(hvm_irq_dpci); + + hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + } + + machine_gsi = pt_irq_bind->machine_irq; + device = pt_irq_bind->u.pci.device; + intx = pt_irq_bind->u.pci.intx; + guest_gsi = hvm_pci_intx_gsi(device, intx); + link = hvm_pci_intx_link(device, intx); + hvm_irq_dpci->link_cnt[link]++; + + digl = xmalloc(struct dev_intx_gsi_link); + if ( !digl ) + return -ENOMEM; + + digl->device = device; + digl->intx = intx; + digl->gsi = guest_gsi; + digl->link = link; + list_add_tail(&digl->list, + &hvm_irq_dpci->mirq[machine_gsi].digl_list); + + hvm_irq_dpci->girq[guest_gsi].valid = 1; + hvm_irq_dpci->girq[guest_gsi].device = device; + hvm_irq_dpci->girq[guest_gsi].intx = intx; + hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi; + + /* Bind the same mirq once in the same domain */ + if ( !hvm_irq_dpci->mirq[machine_gsi].valid ) + { + hvm_irq_dpci->mirq[machine_gsi].valid = 1; + hvm_irq_dpci->mirq[machine_gsi].dom = d; + + init_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)], + pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0); + /* Deal with gsi for legacy devices */ + pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE); + } + + gdprintk(XENLOG_INFO VTDPREFIX, + "VT-d irq bind: m_irq = %x device = %x intx = %x\n", + machine_gsi, device, intx); + return 0; +} + +int pt_irq_destroy_bind_vtd( + struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) +{ + struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + uint32_t machine_gsi, guest_gsi; + uint32_t device, intx, link; + struct list_head *digl_list, *tmp; + struct dev_intx_gsi_link *digl; + + if ( hvm_irq_dpci == NULL ) + return 0; + + machine_gsi = pt_irq_bind->machine_irq; + device = pt_irq_bind->u.pci.device; + intx = pt_irq_bind->u.pci.intx; + guest_gsi = hvm_pci_intx_gsi(device, intx); + link = hvm_pci_intx_link(device, intx); + hvm_irq_dpci->link_cnt[link]--; + + gdprintk(XENLOG_INFO, + "pt_irq_destroy_bind_vtd: machine_gsi=%d, guest_gsi=%d, device=%d, intx=%d.\n", + machine_gsi, guest_gsi, device, intx); + memset(&hvm_irq_dpci->girq[guest_gsi], 0, sizeof(struct hvm_girq_dpci_mapping)); + + /* clear the mirq info */ + if ( hvm_irq_dpci->mirq[machine_gsi].valid ) + { + + list_for_each_safe ( digl_list, tmp, + &hvm_irq_dpci->mirq[machine_gsi].digl_list ) + { + digl = list_entry(digl_list, + struct dev_intx_gsi_link, list); + if ( digl->device == device && + digl->intx == intx && + digl->link == link && + digl->gsi == guest_gsi ) + { + list_del(&digl->list); + xfree(digl); + } + } + + if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) ) + { + pirq_guest_unbind(d, machine_gsi); + kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]); + hvm_irq_dpci->mirq[machine_gsi].dom = NULL; + hvm_irq_dpci->mirq[machine_gsi].valid = 0; + } + } + + gdprintk(XENLOG_INFO, + "XEN_DOMCTL_irq_unmapping: m_irq = %x device = %x intx = %x\n", + machine_gsi, device, intx); + + return 0; +} + +int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + if ( !iommu_enabled || (d == dom0) || (hvm_irq->dpci == NULL) || + !hvm_irq->dpci->mirq[mirq].valid ) + return 0; + + /* + * Set a timer here to avoid situations where the IRQ line is shared, and + * the device belonging to the pass-through guest is not yet active. In + * this case the guest may not pick up the interrupt (e.g., masked at the + * PIC) and we need to detect that. + */ + set_bit(mirq, hvm_irq->dpci->dirq_mask); + set_timer(&hvm_irq->dpci->hvm_timer[irq_to_vector(mirq)], + NOW() + PT_IRQ_TIME_OUT); + vcpu_kick(d->vcpu[0]); + + return 1; +} + +static void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + struct hvm_irq_dpci *dpci = hvm_irq->dpci; + struct dev_intx_gsi_link *digl, *tmp; + int i; + + ASSERT(isairq < NR_ISAIRQS); + if ( !iommu_enabled || !dpci || + !test_bit(isairq, dpci->isairq_map) ) + return; + + /* Multiple mirq may be mapped to one isa irq */ + for ( i = 0; i < NR_IRQS; i++ ) + { + if ( !dpci->mirq[i].valid ) + continue; + + list_for_each_entry_safe ( digl, tmp, + &dpci->mirq[i].digl_list, list ) + { + if ( hvm_irq->pci_link.route[digl->link] == isairq ) + { + hvm_pci_intx_deassert(d, digl->device, digl->intx); + spin_lock(&dpci->dirq_lock); + if ( --dpci->mirq[i].pending == 0 ) + { + spin_unlock(&dpci->dirq_lock); + gdprintk(XENLOG_INFO VTDPREFIX, + "hvm_dpci_isairq_eoi:: mirq = %x\n", i); + stop_timer(&dpci->hvm_timer[irq_to_vector(i)]); + pirq_guest_eoi(d, i); + } + else + spin_unlock(&dpci->dirq_lock); + } + } + } +} + +void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, + union vioapic_redir_entry *ent) +{ + struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + uint32_t device, intx, machine_gsi; + + if ( !iommu_enabled || (hvm_irq_dpci == NULL) || + (guest_gsi >= NR_ISAIRQS && + !hvm_irq_dpci->girq[guest_gsi].valid) ) + return; + + if ( guest_gsi < NR_ISAIRQS ) + { + hvm_dpci_isairq_eoi(d, guest_gsi); + return; + } + + machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi; + device = hvm_irq_dpci->girq[guest_gsi].device; + intx = hvm_irq_dpci->girq[guest_gsi].intx; + hvm_pci_intx_deassert(d, device, intx); + + spin_lock(&hvm_irq_dpci->dirq_lock); + if ( --hvm_irq_dpci->mirq[machine_gsi].pending == 0 ) + { + spin_unlock(&hvm_irq_dpci->dirq_lock); + + gdprintk(XENLOG_INFO VTDPREFIX, + "hvm_dpci_eoi:: mirq = %x\n", machine_gsi); + stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]); + if ( (ent == NULL) || !ent->fields.mask ) + pirq_guest_eoi(d, machine_gsi); + } + else + spin_unlock(&hvm_irq_dpci->dirq_lock); +} diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/iommu.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/iommu.c Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,136 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <xen/sched.h> +#include <xen/iommu.h> + +extern struct iommu_ops intel_iommu_ops; +extern struct iommu_ops amd_iommu_ops; + +int iommu_domain_init(struct domain *domain) +{ + struct hvm_iommu *hd = domain_hvm_iommu(domain); + + spin_lock_init(&hd->mapping_lock); + spin_lock_init(&hd->iommu_list_lock); + INIT_LIST_HEAD(&hd->pdev_list); + INIT_LIST_HEAD(&hd->g2m_ioport_list); + + if ( !iommu_enabled ) + return 0; + + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + hd->platform_ops = &intel_iommu_ops; + break; + case X86_VENDOR_AMD: + hd->platform_ops = &amd_iommu_ops; + break; + default: + BUG(); + } + + return hd->platform_ops->init(domain); +} + +int assign_device(struct domain *d, u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + return hd->platform_ops->assign_device(d, bus, devfn); +} + +void iommu_domain_destroy(struct domain *d) +{ + struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + uint32_t i; + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct list_head *ioport_list, *digl_list, *tmp; + struct g2m_ioport *ioport; + struct dev_intx_gsi_link *digl; + + if ( !iommu_enabled || !hd->platform_ops ) + return; + + if ( hvm_irq_dpci != NULL ) + { + for ( i = 0; i < NR_IRQS; i++ ) + { + if ( !hvm_irq_dpci->mirq[i].valid ) + continue; + + pirq_guest_unbind(d, i); + kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]); + + list_for_each_safe ( digl_list, tmp, + &hvm_irq_dpci->mirq[i].digl_list ) + { + digl = list_entry(digl_list, + struct dev_intx_gsi_link, list); + list_del(&digl->list); + xfree(digl); + } + } + + d->arch.hvm_domain.irq.dpci = NULL; + xfree(hvm_irq_dpci); + } + + if ( hd ) + { + list_for_each_safe ( ioport_list, tmp, &hd->g2m_ioport_list ) + { + ioport = list_entry(ioport_list, struct g2m_ioport, list); + list_del(&ioport->list); + xfree(ioport); + } + } + + return hd->platform_ops->teardown(d); +} + +int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + return hd->platform_ops->map_page(d, gfn, mfn); +} + +int iommu_unmap_page(struct domain *d, unsigned long gfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + return hd->platform_ops->unmap_page(d, gfn); +} + +void deassign_device(struct domain *d, u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !iommu_enabled || !hd->platform_ops ) + return; + + return hd->platform_ops->reassign_device(d, dom0, bus, devfn); +} diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/Makefile --- a/xen/drivers/passthrough/vtd/Makefile Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/vtd/Makefile Thu Mar 20 12:35:40 2008 -0600 @@ -1,6 +1,5 @@ obj-y += iommu.o obj-y += iommu.o obj-y += dmar.o obj-y += utils.o -obj-y += io.o obj-y += qinval.o obj-y += intremap.o diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/dmar.c --- a/xen/drivers/passthrough/vtd/dmar.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/vtd/dmar.c Thu Mar 20 12:35:40 2008 -0600 @@ -555,13 +555,13 @@ static int __init acpi_parse_dmar(unsign return -ENODEV; } - if ( !dmar->haw ) - { - dprintk(XENLOG_WARNING VTDPREFIX, "Zero: Invalid DMAR haw\n"); + if ( !dmar->width ) + { + dprintk(XENLOG_WARNING VTDPREFIX, "Zero: Invalid DMAR width\n"); return -EINVAL; } - dmar_host_address_width = dmar->haw; + dmar_host_address_width = dmar->width; dprintk(XENLOG_INFO VTDPREFIX, "Host address width %d\n", dmar_host_address_width); diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/dmar.h --- a/xen/drivers/passthrough/vtd/dmar.h Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/vtd/dmar.h Thu Mar 20 12:35:40 2008 -0600 @@ -22,7 +22,7 @@ #define _DMAR_H_ #include <xen/list.h> -#include <asm/iommu.h> +#include <xen/iommu.h> extern u8 dmar_host_address_width; @@ -100,5 +100,6 @@ struct acpi_rmrr_unit * acpi_find_matche int vtd_hw_check(void); void disable_pmr(struct iommu *iommu); +int is_usb_device(struct pci_dev *pdev); #endif // _DMAR_H_ diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/extern.h --- a/xen/drivers/passthrough/vtd/extern.h Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/vtd/extern.h Thu Mar 20 12:35:40 2008 -0600 @@ -42,8 +42,6 @@ int invalidate_sync(struct iommu *iommu) int invalidate_sync(struct iommu *iommu); int iommu_flush_iec_global(struct iommu *iommu); int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx); -void print_iommu_regs(struct acpi_drhd_unit *drhd); -int vtd_hw_check(void); struct iommu * ioapic_to_iommu(unsigned int apic_id); struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id); void clear_fault_bits(struct iommu *iommu); diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/intremap.c --- a/xen/drivers/passthrough/vtd/intremap.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/vtd/intremap.c Thu Mar 20 12:35:40 2008 -0600 @@ -18,28 +18,10 @@ * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx> */ -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/init.h> #include <xen/irq.h> -#include <xen/delay.h> #include <xen/sched.h> -#include <xen/acpi.h> -#include <xen/keyhandler.h> -#include <xen/spinlock.h> -#include <asm/io.h> -#include <asm/mc146818rtc.h> -#include <asm/smp.h> -#include <asm/desc.h> -#include <mach_apic.h> -#include <io_ports.h> - -#include <xen/spinlock.h> -#include <xen/xmalloc.h> -#include <xen/domain_page.h> -#include <asm/delay.h> -#include <asm/string.h> -#include <asm/iommu.h> +#include <xen/iommu.h> +#include "iommu.h" #include "dmar.h" #include "vtd.h" #include "../pci-direct.h" @@ -172,7 +154,7 @@ io_apic_read_remap_rte( struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); - if ( !iommu || !(ir_ctrl->iremap) ) + if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) ) { *IO_APIC_BASE(apic) = reg; return *(IO_APIC_BASE(apic)+4); @@ -218,7 +200,7 @@ io_apic_write_remap_rte( struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); - if ( !iommu || !(ir_ctrl->iremap) ) + if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) ) { *IO_APIC_BASE(apic) = reg; *(IO_APIC_BASE(apic)+4) = value; diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/io.c --- a/xen/drivers/passthrough/vtd/io.c Fri Mar 14 15:07:45 2008 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,296 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> - * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx> - */ - -#include <xen/init.h> -#include <xen/config.h> -#include <xen/init.h> -#include <xen/mm.h> -#include <xen/lib.h> -#include <xen/errno.h> -#include <xen/trace.h> -#include <xen/event.h> -#include <xen/hypercall.h> -#include <asm/current.h> -#include <asm/cpufeature.h> -#include <asm/processor.h> -#include <asm/msr.h> -#include <asm/apic.h> -#include <asm/paging.h> -#include <asm/shadow.h> -#include <asm/p2m.h> -#include <asm/hvm/hvm.h> -#include <asm/hvm/support.h> -#include <asm/hvm/vpt.h> -#include <asm/hvm/vpic.h> -#include <asm/hvm/vlapic.h> -#include <public/sched.h> -#include <xen/iocap.h> -#include <public/hvm/ioreq.h> -#include <public/domctl.h> - -static void pt_irq_time_out(void *data) -{ - struct hvm_mirq_dpci_mapping *irq_map = data; - unsigned int guest_gsi, machine_gsi = 0; - struct hvm_irq_dpci *dpci = irq_map->dom->arch.hvm_domain.irq.dpci; - struct dev_intx_gsi_link *digl; - uint32_t device, intx; - - list_for_each_entry ( digl, &irq_map->digl_list, list ) - { - guest_gsi = digl->gsi; - machine_gsi = dpci->girq[guest_gsi].machine_gsi; - device = digl->device; - intx = digl->intx; - hvm_pci_intx_deassert(irq_map->dom, device, intx); - } - - clear_bit(machine_gsi, dpci->dirq_mask); - stop_timer(&dpci->hvm_timer[irq_to_vector(machine_gsi)]); - spin_lock(&dpci->dirq_lock); - dpci->mirq[machine_gsi].pending = 0; - spin_unlock(&dpci->dirq_lock); - pirq_guest_eoi(irq_map->dom, machine_gsi); -} - -int pt_irq_create_bind_vtd( - struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) -{ - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - uint32_t machine_gsi, guest_gsi; - uint32_t device, intx, link; - struct dev_intx_gsi_link *digl; - - if ( hvm_irq_dpci == NULL ) - { - hvm_irq_dpci = xmalloc(struct hvm_irq_dpci); - if ( hvm_irq_dpci == NULL ) - return -ENOMEM; - - memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci)); - spin_lock_init(&hvm_irq_dpci->dirq_lock); - for ( int i = 0; i < NR_IRQS; i++ ) - INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list); - - if ( cmpxchg((unsigned long *)&d->arch.hvm_domain.irq.dpci, - 0, (unsigned long)hvm_irq_dpci) != 0 ) - xfree(hvm_irq_dpci); - - hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - } - - machine_gsi = pt_irq_bind->machine_irq; - device = pt_irq_bind->u.pci.device; - intx = pt_irq_bind->u.pci.intx; - guest_gsi = hvm_pci_intx_gsi(device, intx); - link = hvm_pci_intx_link(device, intx); - hvm_irq_dpci->link_cnt[link]++; - - digl = xmalloc(struct dev_intx_gsi_link); - if ( !digl ) - return -ENOMEM; - - digl->device = device; - digl->intx = intx; - digl->gsi = guest_gsi; - digl->link = link; - list_add_tail(&digl->list, - &hvm_irq_dpci->mirq[machine_gsi].digl_list); - - hvm_irq_dpci->girq[guest_gsi].valid = 1; - hvm_irq_dpci->girq[guest_gsi].device = device; - hvm_irq_dpci->girq[guest_gsi].intx = intx; - hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi; - - /* Bind the same mirq once in the same domain */ - if ( !hvm_irq_dpci->mirq[machine_gsi].valid ) - { - hvm_irq_dpci->mirq[machine_gsi].valid = 1; - hvm_irq_dpci->mirq[machine_gsi].dom = d; - - init_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)], - pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0); - /* Deal with gsi for legacy devices */ - pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE); - } - - gdprintk(XENLOG_INFO VTDPREFIX, - "VT-d irq bind: m_irq = %x device = %x intx = %x\n", - machine_gsi, device, intx); - return 0; -} - -int pt_irq_destroy_bind_vtd( - struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) -{ - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - uint32_t machine_gsi, guest_gsi; - uint32_t device, intx, link; - struct list_head *digl_list, *tmp; - struct dev_intx_gsi_link *digl; - - if ( hvm_irq_dpci == NULL ) - return 0; - - machine_gsi = pt_irq_bind->machine_irq; - device = pt_irq_bind->u.pci.device; - intx = pt_irq_bind->u.pci.intx; - guest_gsi = hvm_pci_intx_gsi(device, intx); - link = hvm_pci_intx_link(device, intx); - hvm_irq_dpci->link_cnt[link]--; - - gdprintk(XENLOG_INFO, - "pt_irq_destroy_bind_vtd: machine_gsi=%d, guest_gsi=%d, device=%d, intx=%d.\n", - machine_gsi, guest_gsi, device, intx); - memset(&hvm_irq_dpci->girq[guest_gsi], 0, sizeof(struct hvm_girq_dpci_mapping)); - - /* clear the mirq info */ - if ( hvm_irq_dpci->mirq[machine_gsi].valid ) - { - - list_for_each_safe ( digl_list, tmp, - &hvm_irq_dpci->mirq[machine_gsi].digl_list ) - { - digl = list_entry(digl_list, - struct dev_intx_gsi_link, list); - if ( digl->device == device && - digl->intx == intx && - digl->link == link && - digl->gsi == guest_gsi ) - { - list_del(&digl->list); - xfree(digl); - } - } - - if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) ) - { - pirq_guest_unbind(d, machine_gsi); - kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]); - hvm_irq_dpci->mirq[machine_gsi].dom = NULL; - hvm_irq_dpci->mirq[machine_gsi].valid = 0; - } - } - - gdprintk(XENLOG_INFO, - "XEN_DOMCTL_irq_unmapping: m_irq = %x device = %x intx = %x\n", - machine_gsi, device, intx); - - return 0; -} - -int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq) -{ - struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; - - if ( !iommu_enabled || (d == dom0) || (hvm_irq->dpci == NULL) || - !hvm_irq->dpci->mirq[mirq].valid ) - return 0; - - /* - * Set a timer here to avoid situations where the IRQ line is shared, and - * the device belonging to the pass-through guest is not yet active. In - * this case the guest may not pick up the interrupt (e.g., masked at the - * PIC) and we need to detect that. - */ - set_bit(mirq, hvm_irq->dpci->dirq_mask); - set_timer(&hvm_irq->dpci->hvm_timer[irq_to_vector(mirq)], - NOW() + PT_IRQ_TIME_OUT); - vcpu_kick(d->vcpu[0]); - - return 1; -} - -static void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq) -{ - struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; - struct hvm_irq_dpci *dpci = hvm_irq->dpci; - struct dev_intx_gsi_link *digl, *tmp; - int i; - - ASSERT(isairq < NR_ISAIRQS); - if ( !iommu_enabled || !dpci || - !test_bit(isairq, dpci->isairq_map) ) - return; - - /* Multiple mirq may be mapped to one isa irq */ - for ( i = 0; i < NR_IRQS; i++ ) - { - if ( !dpci->mirq[i].valid ) - continue; - - list_for_each_entry_safe ( digl, tmp, - &dpci->mirq[i].digl_list, list ) - { - if ( hvm_irq->pci_link.route[digl->link] == isairq ) - { - hvm_pci_intx_deassert(d, digl->device, digl->intx); - spin_lock(&dpci->dirq_lock); - if ( --dpci->mirq[i].pending == 0 ) - { - spin_unlock(&dpci->dirq_lock); - gdprintk(XENLOG_INFO VTDPREFIX, - "hvm_dpci_isairq_eoi:: mirq = %x\n", i); - stop_timer(&dpci->hvm_timer[irq_to_vector(i)]); - pirq_guest_eoi(d, i); - } - else - spin_unlock(&dpci->dirq_lock); - } - } - } -} - -void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, - union vioapic_redir_entry *ent) -{ - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - uint32_t device, intx, machine_gsi; - - if ( !iommu_enabled || (hvm_irq_dpci == NULL) || - (guest_gsi >= NR_ISAIRQS && - !hvm_irq_dpci->girq[guest_gsi].valid) ) - return; - - if ( guest_gsi < NR_ISAIRQS ) - { - hvm_dpci_isairq_eoi(d, guest_gsi); - return; - } - - machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi; - device = hvm_irq_dpci->girq[guest_gsi].device; - intx = hvm_irq_dpci->girq[guest_gsi].intx; - hvm_pci_intx_deassert(d, device, intx); - - spin_lock(&hvm_irq_dpci->dirq_lock); - if ( --hvm_irq_dpci->mirq[machine_gsi].pending == 0 ) - { - spin_unlock(&hvm_irq_dpci->dirq_lock); - - gdprintk(XENLOG_INFO VTDPREFIX, - "hvm_dpci_eoi:: mirq = %x\n", machine_gsi); - stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]); - if ( (ent == NULL) || !ent->fields.mask ) - pirq_guest_eoi(d, machine_gsi); - } - else - spin_unlock(&hvm_irq_dpci->dirq_lock); -} diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/vtd/iommu.c Thu Mar 20 12:35:40 2008 -0600 @@ -19,17 +19,12 @@ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> - adapted to xen */ -#include <xen/init.h> #include <xen/irq.h> -#include <xen/spinlock.h> #include <xen/sched.h> #include <xen/xmalloc.h> #include <xen/domain_page.h> -#include <asm/delay.h> -#include <asm/string.h> -#include <asm/mm.h> -#include <asm/iommu.h> -#include <asm/hvm/vmx/intel-iommu.h> +#include <xen/iommu.h> +#include "iommu.h" #include "dmar.h" #include "../pci-direct.h" #include "../pci_regs.h" @@ -39,8 +34,8 @@ #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid) static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */ -static int domid_bitmap_size; /* domain id bitmap size in bit */ -static void *domid_bitmap; /* iommu domain id bitmap */ +static int domid_bitmap_size; /* domain id bitmap size in bits */ +static unsigned long *domid_bitmap; /* iommu domain id bitmap */ #define DID_FIELD_WIDTH 16 #define DID_HIGH_OFFSET 8 @@ -72,6 +67,93 @@ static void iommu_domid_release(struct d d->arch.hvm_domain.hvm_iommu.iommu_domid = 0; clear_bit(iommu_domid, domid_bitmap); } +} + +static struct intel_iommu *alloc_intel_iommu(void) +{ + struct intel_iommu *intel; + + intel = xmalloc(struct intel_iommu); + if ( !intel ) + { + gdprintk(XENLOG_ERR VTDPREFIX, + "Allocate intel_iommu failed.\n"); + return NULL; + } + memset(intel, 0, sizeof(struct intel_iommu)); + + spin_lock_init(&intel->qi_ctrl.qinval_lock); + spin_lock_init(&intel->qi_ctrl.qinval_poll_lock); + + spin_lock_init(&intel->ir_ctrl.iremap_lock); + + return intel; +} + +static void free_intel_iommu(struct intel_iommu *intel) +{ + if ( intel ) + { + xfree(intel); + intel = NULL; + } +} + +struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu) +{ + if ( !iommu ) + return NULL; + + if ( !iommu->intel ) + { + iommu->intel = alloc_intel_iommu(); + if ( !iommu->intel ) + { + dprintk(XENLOG_ERR VTDPREFIX, + "iommu_qi_ctrl: Allocate iommu->intel failed.\n"); + return NULL; + } + } + + return &(iommu->intel->qi_ctrl); +} + +struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu) +{ + if ( !iommu ) + return NULL; + + if ( !iommu->intel ) + { + iommu->intel = alloc_intel_iommu(); + if ( !iommu->intel ) + { + dprintk(XENLOG_ERR VTDPREFIX, + "iommu_ir_ctrl: Allocate iommu->intel failed.\n"); + return NULL; + } + } + + return &(iommu->intel->ir_ctrl); +} + +struct iommu_flush *iommu_get_flush(struct iommu *iommu) +{ + if ( !iommu ) + return NULL; + + if ( !iommu->intel ) + { + iommu->intel = alloc_intel_iommu(); + if ( !iommu->intel ) + { + dprintk(XENLOG_ERR VTDPREFIX, + "iommu_get_flush: Allocate iommu->intel failed.\n"); + return NULL; + } + } + + return &(iommu->intel->flush); } unsigned int x86_clflush_size; @@ -756,40 +838,34 @@ static int iommu_page_fault_do_one(struc PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr, fault_reason, iommu->reg); - if (fault_reason < 0x20) + if ( fault_reason < 0x20 ) print_vtd_entries(current->domain, iommu, (source_id >> 8), - (source_id & 0xff), (addr >> PAGE_SHIFT)); + (source_id & 0xff), (addr >> PAGE_SHIFT)); return 0; } static void iommu_fault_status(u32 fault_status) { - if (fault_status & DMA_FSTS_PFO) + if ( fault_status & DMA_FSTS_PFO ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Fault Overflow\n"); - else - if (fault_status & DMA_FSTS_PPF) + else if ( fault_status & DMA_FSTS_PPF ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Primary Pending Fault\n"); - else - if (fault_status & DMA_FSTS_AFO) + else if ( fault_status & DMA_FSTS_AFO ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Advanced Fault Overflow\n"); - else - if (fault_status & DMA_FSTS_APF) + else if ( fault_status & DMA_FSTS_APF ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Advanced Pending Fault\n"); - else - if (fault_status & DMA_FSTS_IQE) + else if ( fault_status & DMA_FSTS_IQE ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Invalidation Queue Error\n"); - else - if (fault_status & DMA_FSTS_ICE) + else if ( fault_status & DMA_FSTS_ICE ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Invalidation Completion Error\n"); - else - if (fault_status & DMA_FSTS_ITE) + else if ( fault_status & DMA_FSTS_ITE ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Invalidation Time-out Error\n"); } @@ -976,8 +1052,6 @@ struct iommu *iommu_alloc(void *hw_data) { struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data; struct iommu *iommu; - struct qi_ctrl *qi_ctrl; - struct ir_ctrl *ir_ctrl; if ( nr_iommus > MAX_IOMMUS ) { @@ -1014,12 +1088,7 @@ struct iommu *iommu_alloc(void *hw_data) spin_lock_init(&iommu->lock); spin_lock_init(&iommu->register_lock); - qi_ctrl = iommu_qi_ctrl(iommu); - spin_lock_init(&qi_ctrl->qinval_lock); - spin_lock_init(&qi_ctrl->qinval_poll_lock); - - ir_ctrl = iommu_ir_ctrl(iommu); - spin_lock_init(&ir_ctrl->iremap_lock); + iommu->intel = alloc_intel_iommu(); drhd->iommu = iommu; return iommu; @@ -1036,6 +1105,7 @@ static void free_iommu(struct iommu *iom free_xenheap_page((void *)iommu->root_entry); if ( iommu->reg ) iounmap(iommu->reg); + free_intel_iommu(iommu->intel); free_irq(iommu->vector); xfree(iommu); } @@ -1063,7 +1133,7 @@ int intel_iommu_domain_init(struct domai iommu = drhd->iommu ? : iommu_alloc(drhd); /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) + if ( guest_width > cap_mgaw(iommu->cap) ) guest_width = cap_mgaw(iommu->cap); adjust_width = guestwidth_to_adjustwidth(guest_width); agaw = width_to_agaw(adjust_width); @@ -1885,7 +1955,8 @@ int iommu_setup(void) /* Allocate domain id bitmap, and set bit 0 as reserved */ domid_bitmap_size = cap_ndoms(iommu->cap); - domid_bitmap = xmalloc_bytes(domid_bitmap_size / 8); + domid_bitmap = xmalloc_array(unsigned long, + BITS_TO_LONGS(domid_bitmap_size)); if ( domid_bitmap == NULL ) goto error; memset(domid_bitmap, 0, domid_bitmap_size / 8); @@ -1948,6 +2019,12 @@ int intel_iommu_assign_device(struct dom for_each_rmrr_device( rmrr, pdev ) if ( pdev->bus == bus && pdev->devfn == devfn ) { + /* FIXME: Because USB RMRR conflicts with guest bios region, + * ignore USB RMRR temporarily. + */ + if ( is_usb_device(pdev) ) + return 0; + ret = iommu_prepare_rmrr_dev(d, rmrr, pdev); if ( ret ) { diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/iommu.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/vtd/iommu.h Thu Mar 20 12:35:40 2008 -0600 @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@xxxxxxxxx> + */ + +#ifndef _INTEL_IOMMU_H_ +#define _INTEL_IOMMU_H_ + +#include <xen/types.h> + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ + +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* invalidation queue head */ +#define DMAR_IQT_REG 0x88 /* invalidation queue tail */ +#define DMAR_IQA_REG 0x90 /* invalidation queue addr */ +#define DMAR_IRTA_REG 0xB8 /* intr remap */ + +#define OFFSET_STRIDE (9) +#define dmar_readl(dmar, reg) readl(dmar + reg) +#define dmar_writel(dmar, reg, val) writel(val, dmar + reg) +#define dmar_readq(dmar, reg) ({ \ + u32 lo, hi; \ + lo = dmar_readl(dmar, reg); \ + hi = dmar_readl(dmar, reg + 4); \ + (((u64) hi) << 32) + lo; }) +#define dmar_writeq(dmar, reg, val) do {\ + dmar_writel(dmar, reg, (u32)val); \ + dmar_writel(dmar, reg + 4, (u32)((u64) val >> 32)); \ + } while (0) + +#define VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define cap_read_drain(c) (((c) >> 55) & 1) +#define cap_write_drain(c) (((c) >> 54) & 1) +#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define cap_super_page_val(c) (((c) >> 34) & 0xf) +#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) + +#define cap_isoch(c) (((c) >> 23) & 1) +#define cap_qos(c) (((c) >> 22) & 1) +#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define cap_sagaw(c) (((c) >> 8) & 0x1f) +#define cap_caching_mode(c) (((c) >> 7) & 1) +#define cap_phmr(c) (((c) >> 6) & 1) +#define cap_plmr(c) (((c) >> 5) & 1) +#define cap_rwbf(c) (((c) >> 4) & 1) +#define cap_afl(c) (((c) >> 3) & 1) +#define cap_ndoms(c) (1 << (4 + 2 * ((c) & 0x7))) + +/* + * Extended Capability Register + */ + +#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) +#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) +#define ecap_coherent(e) ((e >> 0) & 0x1) +#define ecap_queued_inval(e) ((e >> 1) & 0x1) +#define ecap_dev_iotlb(e) ((e >> 2) & 0x1) +#define ecap_intr_remap(e) ((e >> 3) & 0x1) +#define ecap_ext_intr(e) ((e >> 4) & 0x1) +#define ecap_cache_hints(e) ((e >> 5) & 0x1) +#define ecap_pass_thru(e) ((e >> 6) & 0x1) + +/* IOTLB_REG */ +#define DMA_TLB_FLUSH_GRANU_OFFSET 60 +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) +#define DMA_TLB_IIRG(x) (((x) >> 60) & 7) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_DID(x) (((u64)(x & 0xffff)) << 32) + +#define DMA_TLB_READ_DRAIN (((u64)1) << 49) +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) +#define DMA_TLB_IVT (((u64)1) << 63) + +#define DMA_TLB_IVA_ADDR(x) ((((u64)x) >> 12) << 12) +#define DMA_TLB_IVA_HINT(x) ((((u64)x) & 1) << 6) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((u64)1) << 31) +#define DMA_GCMD_SRTP (((u64)1) << 30) +#define DMA_GCMD_SFL (((u64)1) << 29) +#define DMA_GCMD_EAFL (((u64)1) << 28) +#define DMA_GCMD_WBF (((u64)1) << 27) +#define DMA_GCMD_QIE (((u64)1) << 26) +#define DMA_GCMD_IRE (((u64)1) << 25) +#define DMA_GCMD_SIRTP (((u64)1) << 24) +#define DMA_GCMD_CFI (((u64)1) << 23) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((u64)1) << 31) +#define DMA_GSTS_RTPS (((u64)1) << 30) +#define DMA_GSTS_FLS (((u64)1) << 29) +#define DMA_GSTS_AFLS (((u64)1) << 28) +#define DMA_GSTS_WBFS (((u64)1) << 27) +#define DMA_GSTS_QIES (((u64)1) <<26) +#define DMA_GSTS_IRES (((u64)1) <<25) +#define DMA_GSTS_SIRTPS (((u64)1) << 24) +#define DMA_GSTS_CFIS (((u64)1) <<23) + +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1) << 31) +#define DMA_PMEN_PRS (((u32)1) << 0) + +/* CCMD_REG */ +#define DMA_CCMD_INVL_GRANU_OFFSET 61 +#define DMA_CCMD_ICC (((u64)1) << 63) +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) +#define DMA_CCMD_CIRG(x) ((((u64)3) << 61) & x) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) + +#define DMA_CCMD_CAIG_MASK(x) (((u64)x) & ((u64) 0x3 << 59)) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((u64)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PFO ((u64)1 << 0) +#define DMA_FSTS_PPF ((u64)1 << 1) +#define DMA_FSTS_AFO ((u64)1 << 2) +#define DMA_FSTS_APF ((u64)1 << 3) +#define DMA_FSTS_IQE ((u64)1 << 4) +#define DMA_FSTS_ICE ((u64)1 << 5) +#define DMA_FSTS_ITE ((u64)1 << 6) +#define DMA_FSTS_FAULTS DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_AFO | DMA_FSTS_APF | DMA_FSTS_IQE | DMA_FSTS_ICE | DMA_FSTS_ITE +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) + +/* FRCD_REG, 32 bits access */ +#define DMA_FRCD_F (((u64)1) << 31) +#define dma_frcd_type(d) ((d >> 30) & 1) +#define dma_frcd_fault_reason(c) (c & 0xff) +#define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define root_present(root) ((root).val & 1) +#define set_root_present(root) do {(root).val |= 1;} while(0) +#define get_context_addr(root) ((root).val & PAGE_MASK_4K) +#define set_root_value(root, value) \ + do {(root).val |= ((value) & PAGE_MASK_4K);} while(0) + +struct context_entry { + u64 lo; + u64 hi; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while(0) +#define context_clear_present(c) do {(c).lo &= ~1;} while(0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while(0) + +#define context_set_translation_type(c, val) do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= (val & 3) << 2; \ + } while(0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 +#define CONTEXT_TT_PASS_THRU 2 + +#define context_set_address_root(c, val) \ + do {(c).lo &= 0xfff; (c).lo |= (val) & PAGE_MASK_4K ;} while(0) +#define context_set_address_width(c, val) \ + do {(c).hi &= 0xfffffff8; (c).hi |= (val) & 7;} while(0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while(0) + +/* page table handling */ +#define LEVEL_STRIDE (9) +#define LEVEL_MASK ((1 << LEVEL_STRIDE) - 1) +#define agaw_to_level(val) ((val) + 2) +#define agaw_to_width(val) (30 + val * LEVEL_STRIDE) +#define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE) +#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) +#define address_level_offset(addr, level) \ + ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) +#define level_size(l) (1 << level_to_offset_bits(l)) +#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while(0) +#define dma_set_pte_readable(p) do {(p).val |= 1;} while(0) +#define dma_set_pte_writable(p) do {(p).val |= 2;} while(0) +#define dma_set_pte_superpage(p) do {(p).val |= 8;} while(0) +#define dma_set_pte_prot(p, prot) do { (p).val = (((p).val >> 2) << 2) | ((prot) & 3);} while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {(p).val |= ((addr) >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;} while(0) +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) +#define dma_pte_present(p) (((p).val & 3) != 0) + +/* interrupt remap entry */ +struct iremap_entry { + union { + u64 lo_val; + struct { + u64 p : 1, + fpd : 1, + dm : 1, + rh : 1, + tm : 1, + dlm : 3, + avail : 4, + res_1 : 4, + vector : 8, + res_2 : 8, + dst : 32; + }lo; + }; + union { + u64 hi_val; + struct { + u64 sid : 16, + sq : 2, + svt : 2, + res_1 : 44; + }hi; + }; +}; +#define IREMAP_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct iremap_entry)) +#define iremap_present(v) ((v).lo & 1) +#define iremap_fault_disable(v) (((v).lo >> 1) & 1) + +#define iremap_set_present(v) do {(v).lo |= 1;} while(0) +#define iremap_clear_present(v) do {(v).lo &= ~1;} while(0) + +/* queue invalidation entry */ +struct qinval_entry { + union { + struct { + struct { + u64 type : 4, + granu : 2, + res_1 : 10, + did : 16, + sid : 16, + fm : 2, + res_2 : 14; + }lo; + struct { + u64 res; + }hi; + }cc_inv_dsc; + struct { + struct { + u64 type : 4, + granu : 2, + dw : 1, + dr : 1, + res_1 : 8, + did : 16, + res_2 : 32; + }lo; + struct { + u64 am : 6, + ih : 1, + res_1 : 5, + addr : 52; + }hi; + }iotlb_inv_dsc; + struct { + struct { + u64 type : 4, + res_1 : 12, + max_invs_pend: 5, + res_2 : 11, + sid : 16, + res_3 : 16; + }lo; + struct { + u64 size : 1, + res_1 : 11, + addr : 52; + }hi; + }dev_iotlb_inv_dsc; + struct { + struct { + u64 type : 4, + granu : 1, + res_1 : 22, + im : 5, + iidx : 16, + res_2 : 16; + }lo; + struct { + u64 res; + }hi; + }iec_inv_dsc; + struct { + struct { + u64 type : 4, + iflag : 1, + sw : 1, + fn : 1, + res_1 : 25, + sdata : 32; + }lo; + struct { + u64 res_1 : 2, + saddr : 62; + }hi; + }inv_wait_dsc; + }q; +}; + +struct poll_info { + u64 saddr; + u32 udata; +}; + +#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry)) +#define qinval_present(v) ((v).lo & 1) +#define qinval_fault_disable(v) (((v).lo >> 1) & 1) + +#define qinval_set_present(v) do {(v).lo |= 1;} while(0) +#define qinval_clear_present(v) do {(v).lo &= ~1;} while(0) + +#define RESERVED_VAL 0 + +#define TYPE_INVAL_CONTEXT 0x1 +#define TYPE_INVAL_IOTLB 0x2 +#define TYPE_INVAL_DEVICE_IOTLB 0x3 +#define TYPE_INVAL_IEC 0x4 +#define TYPE_INVAL_WAIT 0x5 + +#define NOTIFY_TYPE_POLL 1 +#define NOTIFY_TYPE_INTR 1 +#define INTERRUTP_FLAG 1 +#define STATUS_WRITE 1 +#define FENCE_FLAG 1 + +#define IEC_GLOBAL_INVL 0 +#define IEC_INDEX_INVL 1 +#define IRTA_REG_EIME_SHIFT 11 +#define IRTA_REG_TABLE_SIZE 7 // 4k page = 256 * 16 byte entries + // 2^^(IRTA_REG_TABLE_SIZE + 1) = 256 + // IRTA_REG_TABLE_SIZE = 7 + +#define VTD_PAGE_TABLE_LEVEL_3 3 +#define VTD_PAGE_TABLE_LEVEL_4 4 + +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 +#define MAX_IOMMU_REGS 0xc0 + +extern struct list_head acpi_drhd_units; +extern struct list_head acpi_rmrr_units; +extern struct list_head acpi_ioapic_units; + +struct qi_ctrl { + struct qinval_entry *qinval; /* queue invalidation page */ + int qinval_index; /* queue invalidation index */ + spinlock_t qinval_lock; /* lock for queue invalidation page */ + spinlock_t qinval_poll_lock; /* lock for queue invalidation poll addr */ + volatile u32 qinval_poll_status; /* used by poll methord to sync */ +}; + +struct ir_ctrl { + struct iremap_entry *iremap; /* interrupt remap table */ + int iremap_index; /* interrupt remap index */ + spinlock_t iremap_lock; /* lock for irq remappping table */ +}; + +struct iommu_flush { + int (*context)(void *iommu, u16 did, u16 source_id, + u8 function_mask, u64 type, int non_present_entry_flush); + int (*iotlb)(void *iommu, u16 did, u64 addr, unsigned int size_order, + u64 type, int non_present_entry_flush); +}; + +struct intel_iommu { + struct qi_ctrl qi_ctrl; + struct ir_ctrl ir_ctrl; + struct iommu_flush flush; +}; + +#endif diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/qinval.c --- a/xen/drivers/passthrough/vtd/qinval.c Fri Mar 14 15:07:45 2008 -0600 +++ b/xen/drivers/passthrough/vtd/qinval.c Thu Mar 20 12:35:40 2008 -0600 @@ -19,15 +19,9 @@ */ -#include <xen/init.h> -#include <xen/irq.h> -#include <xen/spinlock.h> #include <xen/sched.h> -#include <xen/xmalloc.h> -#include <xen/domain_page.h> -#include <asm/delay.h> -#include <asm/string.h> -#include <asm/iommu.h> +#include <xen/iommu.h> +#include "iommu.h" #include "dmar.h" #include "vtd.h" #include "../pci-direct.h" diff -r 8c921adf4833 -r 42f6c206c951 xen/drivers/passthrough/vtd/utils.c _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |