[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1235986016 -32400 # Node ID 6227bf629626105380f8f192da78a9e25adf4a27 # Parent c4c4ba857d8b4b045f169b33c7e36dcdc07af720 # Parent e5c696aaf2a6e8805231c0c0f1414560262e7005 merge with xen-unstable.hg --- buildconfigs/enable-xen-config | 57 +++++- buildconfigs/mk.linux-2.6-pvops | 14 + docs/misc/xsm-flask.txt | 148 ++++++++++++++++ tools/console/daemon/io.c | 20 +- tools/hotplug/Linux/init.d/xendomains | 63 ++++-- tools/libxc/xc_ptrace.c | 255 ++-------------------------- tools/misc/xenpm.c | 38 ++-- tools/pygrub/src/pygrub | 6 tools/python/xen/util/vscsi_util.py | 55 +++++- tools/python/xen/xend/XendAPI.py | 9 tools/python/xen/xend/XendBootloader.py | 73 ++++++-- tools/python/xen/xend/XendConfig.py | 41 +++- tools/python/xen/xend/XendConstants.py | 1 tools/python/xen/xend/XendDomainInfo.py | 17 + tools/python/xen/xend/server/vscsiif.py | 15 + tools/python/xen/xm/create.py | 115 +++++++----- tools/python/xen/xm/main.py | 61 ++++-- tools/python/xen/xm/xenapi_create.py | 1 tools/xenstore/xenstored_core.c | 6 xen/arch/ia64/xen/domain.c | 10 - xen/arch/x86/acpi/power.c | 2 xen/arch/x86/hvm/hvm.c | 16 + xen/arch/x86/hvm/mtrr.c | 19 +- xen/arch/x86/hvm/vpic.c | 2 xen/arch/x86/hvm/vpt.c | 26 ++ xen/arch/x86/io_apic.c | 71 ++----- xen/arch/x86/irq.c | 18 + xen/arch/x86/mm/shadow/multi.c | 19 +- xen/arch/x86/msi.c | 111 ++++++++---- xen/arch/x86/shutdown.c | 1 xen/arch/x86/time.c | 7 xen/common/timer.c | 7 xen/drivers/passthrough/amd/iommu_init.c | 6 xen/drivers/passthrough/amd/iommu_intr.c | 14 - xen/drivers/passthrough/amd/iommu_map.c | 31 +-- xen/drivers/passthrough/amd/pci_amd_iommu.c | 23 +- xen/drivers/passthrough/pci.c | 1 xen/drivers/passthrough/vtd/ia64/vtd.c | 31 +++ xen/drivers/passthrough/vtd/iommu.c | 45 ++-- xen/drivers/passthrough/vtd/iommu.h | 2 xen/drivers/passthrough/vtd/qinval.c | 5 xen/drivers/passthrough/vtd/x86/vtd.c | 20 ++ xen/include/asm-ia64/msi.h | 20 ++ xen/include/asm-x86/fixmap.h | 2 xen/include/asm-x86/io_apic.h | 8 xen/include/asm-x86/msi.h | 8 xen/include/asm-x86/mtrr.h | 3 xen/include/public/hvm/params.h | 5 xen/include/xen/iommu.h | 2 xen/include/xen/pci.h | 6 xen/include/xen/timer.h | 3 xen/xsm/flask/ss/policydb.c | 4 52 files changed, 965 insertions(+), 578 deletions(-) diff -r c4c4ba857d8b -r 6227bf629626 buildconfigs/enable-xen-config --- a/buildconfigs/enable-xen-config Mon Mar 02 16:52:22 2009 +0900 +++ b/buildconfigs/enable-xen-config Mon Mar 02 18:26:56 2009 +0900 @@ -19,29 +19,70 @@ setopt() # Then append the new value case ${VALUE} in - y|m) echo "${OPTION}=${VALUE}" >> "${CONFIG}" ;; - n) echo "# ${OPTION} is not set" >> "${CONFIG}" ;; - *) echo "Invalid value ${VALUE} for ${OPTION}" 1>&2 ; exit 1 ;; + n) echo "# ${OPTION} is not set" >> "${CONFIG}" ;; + y|m|*) echo "${OPTION}=${VALUE}" >> "${CONFIG}" ;; esac } setopt CONFIG_PARAVIRT y +setopt CONFIG_PARAVIRT_DEBUG y setopt CONFIG_PARAVIRT_GUEST y + setopt CONFIG_XEN y +setopt CONFIG_XEN_BLKDEV_FRONTEND y +setopt CONFIG_XEN_NETDEV_FRONTEND y +setopt CONFIG_XEN_KBDDEV_FRONTEND y +setopt CONFIG_XEN_FBDEV_FRONTEND y +setopt CONFIG_XEN_BALLOON y +setopt CONFIG_XEN_SCRUB_PAGES y +setopt CONFIG_XEN_DEV_EVTCHN y +setopt CONFIG_XEN_BACKEND y +setopt CONFIG_XEN_BLKDEV_BACKEND y +setopt CONFIG_XEN_NETDEV_BACKEND y +setopt CONFIG_XENFS y +setopt CONFIG_XEN_COMPAT_XENFS y +setopt CONFIG_HVC_XEN y +setopt CONFIG_XEN_MAX_DOMAIN_MEMORY 32 +setopt CONFIG_XEN_DEBUG_FS y +setopt CONFIG_XEN_DOM0 y + setopt CONFIG_VMI y + setopt CONFIG_KVM y setopt CONFIG_KVM_INTEL y setopt CONFIG_KVM_AMD y +setopt CONFIG_KVM_CLOCK y +setopt CONFIG_KVM_GUEST n +setopt CONFIG_KVM_TRACE n + setopt CONFIG_LGUEST n -setopt CONFIG_XEN_BLKDEV_FRONTEND y -setopt CONFIG_XEN_NETDEV_FRONTEND y -setopt CONFIG_HVC_XEN y + +setopt CONFIG_LOCALVERSION_AUTO n + +# Should all be set one way or another in defconfig but aren't setopt CONFIG_NUMA n -setopt CONFIG_LOCALVERSION_AUTO n +setopt CONFIG_X86_VSMP n +setopt CONFIG_X86_UV n +setopt CONFIG_CALGARY_IOMMU n +setopt CONFIG_AMD_IOMMU n +setopt CONFIG_MAXSMP n +setopt CONFIG_SPARSEMEM_VMEMMAP n +setopt CONFIG_I7300_IDLE n +setopt CONFIG_DMAR n +setopt CONFIG_INTR_REMAP n +setopt CONFIG_GFS2_FS n +setopt CONFIG_IOMMU_DEBUG n case ${XEN_TARGET_ARCH} in x86_32) setopt CONFIG_64BIT n ;; - x86_64) setopt CONFIG_64BIT y ;; + x86_64) + setopt CONFIG_64BIT y + setopt CONFIG_IA32_EMULATION y + setopt CONFIG_IA32_AOUT n + setopt CONFIG_CRYPTO_AES_X86_64 n + setopt CONFIG_CRYPTO_SALSA20_X86_64 n + setopt CONFIG_CRYPTO_TWOFISH_X86_64 n + ;; *) ;; esac diff -r c4c4ba857d8b -r 6227bf629626 buildconfigs/mk.linux-2.6-pvops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/buildconfigs/mk.linux-2.6-pvops Mon Mar 02 18:26:56 2009 +0900 @@ -0,0 +1,14 @@ +XEN_LINUX_SOURCE ?= git-clone +LINUX_VER ?= 2.6-pvops + +IMAGE_TARGET ?= bzImage + +XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config + +XEN_LINUX_GIT_URL ?= git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git +XEN_LINUX_GIT_REMOTENAME ?= xen +XEN_LINUX_GIT_REMOTEBRANCH ?= xen/dom0/hackery + +EXTRAVERSION ?= + +include buildconfigs/mk.linux-2.6-common diff -r c4c4ba857d8b -r 6227bf629626 docs/misc/xsm-flask.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/misc/xsm-flask.txt Mon Mar 02 18:26:56 2009 +0900 @@ -0,0 +1,148 @@ +These notes are compiled from xen-devel questions and postings that have occured +since the inclusion of XSM. These notes are not intended to be definitive +documentation but should address many common problems that arrise when +experimenting with XSM:FLASK. + +Xen XSM:FLASK configuration +--------------------------- + +1) cd xen-unstable.hg +2) edit Config.mk in the toplevel xen directory as follows: + + XSM_ENABLE ?= y + FLASK_ENABLE ?= y + ACM_SECURITY ?= n + +NB: Only one security module can be selected at a time. If no module is +selected, then the default DUMMY module will be enforced. The DUMMY module +only exercises the security framework and does not enforce any security +policies. Changing the security module selection will require recompiling xen. +These settings will also configure the corresponding toolchain support. + +3) make xen +4) make tools + + +Xen XSM:FLASK policy +-------------------- + +These instructions will enable the configuration and build of the sample policy. +The sample policy provides the MINIMUM policy necessary to boot a +paravirtualized dom0 and create a paravirtualized domU. Many of the +default capabilities and usages supported by dom0/domU are disallowed by the +sample policy. Further, the policy is comprised of a limited number of types and +must be adjusted to meet the specific security goals of the installation. +Modification of the policy is straightforward and is covered in a later section. + +NB: The policy is not automatically built as part of the tool support because +of an external dependancy on the checkpolicy compiler. The FLASK policy uses +the same syntax and structure as SELinux and compiling the policy relies on +the SELinux policy toolchain. This toolchain is available under many +distributions as well as the following URL, + + http://userspace.selinuxproject.org/releases/20080909/stable/checkpolicy-1.34.7.tar.gz + +1) cd xen-unstable.hg/tools/flask/policy +2) make policy +3) cp policy.20 /boot/xenpolicy.20 +4) edit /etc/grub.conf, add a module line to the xen entry, + + module /xenpolicy.20 + +5) reboot, and select the updated xen entry + +NB: The module entry can be inserted on any line after the xen kernel line. Typical +configurations use the last module entry or the module entry that immediately +follows the xen kernel entry. + +Xen configuration of xend +------------------------- + +1) cd /etc/xen +2) edit xend-config.sxp +3) uncomment the line containing the key:value pair entry, + + #(xsm_module_name dummy) + +4) change the value entry to 'flask' + + (xsm_module_name flask) + +5) restart xend + +Creating policy controlled domains +---------------------------------- + +2) Edit the domain config file and add the following entry, + + access_control = ["policy=,label=system_u:object_r:domU_t"] + +NB: The 'policy' field is not used by XSM:FLASK. The 'label' must exist in the +loaded policy. 'system_u:object_r:domU_t' is one of the existing labels from +the sample policy and shown for example purposes. + +2) Create the domain using the 'xm create' command. +3) Use the 'xm list -l' command to list the running domains and their labels. + +Updating the XSM:FLASK policy +----------------------------- + +It is recommended that the XSM:FLASK policy be tailored to meet the specific +security goals of the platform. The policy is tailored by editing the xen.te +file in the 'policy' subdirectory. + +1) cd xen-unstable.hg/tools/flask/policy +2) edit policy/modules/xen/xen.te - make changes to support platform security goals. +3) make policy +4) cp policy.20 /boot/xenpolicy.20 +5) reboot + +Alternatively, one may reload the policy using the 'flask_loadpolicy' tool +installed by the xen tools. + +1) flask_loadpolicy policy.20 + +NB: The sample policy permits policy reloads as well as general manipulation of +the Flask security server only from dom0. The policy can be tailored further to +restrict policy reloads and other manipulations to boot-time only, by removing +the corresponding statements from the policy. + +Enforcing the XSM:FLASK policy +------------------------------ + +By default, XSM:FLASK is compiled and installed in permissive mode. This +configuration will allow an XSM:FLASK system to start in enforcing mode. + +1) edit /etc/grub.conf +2) append the parameter 'flask_enforcing=1' to the xen kernel line. +3) reboot, and select the updated xen entry + + +Additional notes on XSM:FLASK +----------------------------- + +1) xen command line parameters + + a) flask_enforcing + + The default value for flask_enforcing is '0'. This parameter causes the + platform to boot in permissive mode which means that the policy is loaded + but not enforced. This mode is often helpful for developing new systems + and policies as the policy violations are reported on the xen console and + may be viewed in dom0 through 'xm dmesg'. + + To boot the platform into enforcing mode, which means that the policy is + loaded and enforced, append 'flask_enforcing=1' on the grub line. + + This parameter may also be changed through the flask hyercall. + + b) flask_enabled + + The default value for flask_enabled is '1'. This parameter causes the + platform to enable the FLASK security module under the XSM framework. + The parameter may be enabled/disabled only once per boot. If the parameter + is set to '0', only a reboot can re-enable flask. When flask_enabled is '0' + the DUMMY module is enforced. + + This parameter may also be changed through the flask hypercall. But may + only be performed once per boot. diff -r c4c4ba857d8b -r 6227bf629626 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/console/daemon/io.c Mon Mar 02 18:26:56 2009 +0900 @@ -402,14 +402,28 @@ static int domain_create_tty(struct doma assert(dom->slave_fd == -1); assert(dom->master_fd == -1); - cfmakeraw(&term); - - if (openpty(&dom->master_fd, &dom->slave_fd, NULL, &term, NULL) < 0) { + if (openpty(&dom->master_fd, &dom->slave_fd, NULL, NULL, NULL) < 0) { err = errno; dolog(LOG_ERR, "Failed to create tty for domain-%d " "(errno = %i, %s)", dom->domid, err, strerror(err)); return 0; + } + + if (tcgetattr(dom->slave_fd, &term) < 0) { + err = errno; + dolog(LOG_ERR, "Failed to get tty attributes for domain-%d " + "(errno = %i, %s)", + dom->domid, err, strerror(err)); + goto out; + } + cfmakeraw(&term); + if (tcsetattr(dom->slave_fd, TCSANOW, &term) < 0) { + err = errno; + dolog(LOG_ERR, "Failed to set tty attributes for domain-%d " + "(errno = %i, %s)", + dom->domid, err, strerror(err)); + goto out; } if ((slave = ptsname(dom->master_fd)) == NULL) { diff -r c4c4ba857d8b -r 6227bf629626 tools/hotplug/Linux/init.d/xendomains --- a/tools/hotplug/Linux/init.d/xendomains Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/hotplug/Linux/init.d/xendomains Mon Mar 02 18:26:56 2009 +0900 @@ -213,7 +213,7 @@ start() start() { if [ -f $LOCKFILE ]; then - echo -n "xendomains already running (lockfile exists)" + echo -e "xendomains already running (lockfile exists)" return; fi @@ -230,10 +230,12 @@ start() HEADER=`head -c 16 $dom | head -n 1 2> /dev/null` if [ $HEADER = "LinuxGuestRecord" ]; then echo -n " ${dom##*/}" - xm restore $dom + XMR=`xm restore $dom 2>&1 1>/dev/null` + #xm restore $dom if [ $? -ne 0 ]; then + echo -e "\nAn error occurred while restoring domain ${dom##*/}:\n$XMR" rc_failed $? - echo -n '!' + echo -e '!' else # mv $dom ${dom%/*}/.${dom##*/} rm $dom @@ -241,7 +243,7 @@ start() fi fi done - echo . + echo -e fi if contains_something "$XENDOMAINS_AUTO" @@ -264,16 +266,17 @@ start() if [ $? -eq 0 ] || is_running $dom; then echo -n "(skip)" else - xm create --quiet --defconfig $dom + XMC=`xm create --quiet --defconfig $dom` if [ $? -ne 0 ]; then + echo -e "\nAn error occurred while creating domain ${dom##*/}: $XMC\n" rc_failed $? - echo -n '!' + echo -e '!' else usleep $XENDOMAINS_CREATE_USLEEP fi fi done - fi + fi } all_zombies() @@ -293,18 +296,21 @@ all_zombies() # if it has not exited by that time kill it, so the init script will # succeed within a finite amount of time; if $2 is nonnull, it will # kill the command as well as soon as no domain (except for zombies) -# are left (used for shutdown --all). +# are left (used for shutdown --all). Third parameter, if any, suppresses +# output of dots per working state (formatting issues) watchdog_xm() { if test -z "$XENDOMAINS_STOP_MAXWAIT" -o "$XENDOMAINS_STOP_MAXWAIT" = "0"; then exit fi + usleep 20000 for no in `seq 0 $XENDOMAINS_STOP_MAXWAIT`; do # exit if xm save/migrate/shutdown is finished PSAX=`ps axlw | grep "xm $1" | grep -v grep` if test -z "$PSAX"; then exit; fi - echo -n "."; sleep 1 + if ! test -n "$3"; then echo -n '.'; fi + sleep 1 # go to kill immediately if there's only zombies left if all_zombies && test -n "$2"; then break; fi done @@ -312,10 +318,14 @@ watchdog_xm() read PSF PSUID PSPID PSPPID < <(echo "$PSAX") # kill xm $1 kill $PSPID >/dev/null 2>&1 + + echo -e . } stop() { + exec 3>&2 2> /dev/null + # Collect list of domains to shut down if test "$XENDOMAINS_AUTO_ONLY" = "true"; then rdnames @@ -333,7 +343,7 @@ stop() # nothing ;; (*) - echo -n '(skip)' + echo -e '(skip)' continue ;; esac @@ -345,8 +355,9 @@ stop() if test -n "$XENDOMAINS_SYSRQ"; then for sysrq in $XENDOMAINS_SYSRQ; do echo -n "(SR-$sysrq)" - xm sysrq $id $sysrq + XMR=`xm sysrq $id $sysrq 2>&1 1>/dev/null` if test $? -ne 0; then + echo -e "\nAn error occurred while doing sysrq on domain:\n$XMR\n" rc_failed $? echo -n '!' fi @@ -362,13 +373,18 @@ stop() echo -n "(migr)" watchdog_xm migrate & WDOG_PID=$! - xm migrate $id $XENDOMAINS_MIGRATE + XMR=`xm migrate $id $XENDOMAINS_MIGRATE 2>&1 1>/dev/null` if test $? -ne 0; then + echo -e "\nAn error occurred while migrating domain:\n$XMR\n" rc_failed $? - echo -n '!' + echo -e '!' + kill $WDOG_PID >/dev/null 2>&1 else kill $WDOG_PID >/dev/null 2>&1 + + echo -e . + usleep 1000 continue fi fi @@ -377,13 +393,16 @@ stop() watchdog_xm save & WDOG_PID=$! mkdir -p "$XENDOMAINS_SAVE" - xm save $id $XENDOMAINS_SAVE/$name + XMR=`xm save $id $XENDOMAINS_SAVE/$name 2>&1 1>/dev/null` if test $? -ne 0; then + echo -e "\nAn error occurred while saving domain:\n$XMR\n" rc_failed $? - echo -n '!' + echo -e '!' kill $WDOG_PID >/dev/null 2>&1 else kill $WDOG_PID >/dev/null 2>&1 + echo -e . + usleep 1000 continue fi fi @@ -392,10 +411,11 @@ stop() echo -n "(shut)" watchdog_xm shutdown & WDOG_PID=$! - xm shutdown $id $XENDOMAINS_SHUTDOWN + XMR=`xm shutdown $id $XENDOMAINS_SHUTDOWN 2>&1 1>/dev/null` if test $? -ne 0; then + echo -e "\nAn error occurred while shutting down domain:\n$XMR\n" rc_failed $? - echo -n '!' + echo -e '!' fi kill $WDOG_PID >/dev/null 2>&1 fi @@ -408,18 +428,21 @@ stop() if ! all_zombies && test -n "$XENDOMAINS_SHUTDOWN_ALL"; then # XENDOMAINS_SHUTDOWN_ALL should be "--all --halt --wait" echo -n " SHUTDOWN_ALL " - watchdog_xm shutdown 1 & + watchdog_xm shutdown 1 false & WDOG_PID=$! - xm shutdown $XENDOMAINS_SHUTDOWN_ALL + XMR=`xm shutdown $XENDOMAINS_SHUTDOWN_ALL 2>&1 1>/dev/null` if test $? -ne 0; then + echo -e "\nAn error occurred while shutting down all domains: $XMR\n" rc_failed $? - echo -n '!' + echo -e '!' fi kill $WDOG_PID >/dev/null 2>&1 fi # Unconditionally delete lock file rm -f $LOCKFILE + + exec 2>&3 } check_domain_up() diff -r c4c4ba857d8b -r 6227bf629626 tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/libxc/xc_ptrace.c Mon Mar 02 18:26:56 2009 +0900 @@ -149,173 +149,6 @@ online_vcpus_changed(uint64_t cpumap) } -/* --------------------- */ -/* XXX application state */ -static long nr_pages = 0; -static uint64_t *page_array = NULL; - -static uint64_t to_ma(int cpu, uint64_t maddr) -{ - return maddr; -} - -static void * -map_domain_va_32( - int xc_handle, - int cpu, - void *guest_va, - int perm) -{ - unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va; - uint32_t *l2, *l1; - static void *v[MAX_VIRT_CPUS]; - - l2 = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, - xen_cr3_to_pfn(ctxt[cpu].c.ctrlreg[3])); - if ( l2 == NULL ) - return NULL; - - l2e = l2[l2_table_offset_i386(va)]; - munmap(l2, PAGE_SIZE); - if ( !(l2e & _PAGE_PRESENT) ) - return NULL; - l1p = to_ma(cpu, l2e); - l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT); - if ( l1 == NULL ) - return NULL; - - l1e = l1[l1_table_offset_i386(va)]; - munmap(l1, PAGE_SIZE); - if ( !(l1e & _PAGE_PRESENT) ) - return NULL; - p = to_ma(cpu, l1e); - if ( v[cpu] != NULL ) - munmap(v[cpu], PAGE_SIZE); - v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); - if ( v[cpu] == NULL ) - return NULL; - - return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1))); -} - - -static void * -map_domain_va_pae( - int xc_handle, - int cpu, - void *guest_va, - int perm) -{ - uint64_t l3e, l2e, l1e, l2p, l1p, p; - unsigned long va = (unsigned long)guest_va; - uint64_t *l3, *l2, *l1; - static void *v[MAX_VIRT_CPUS]; - - l3 = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, - xen_cr3_to_pfn(ctxt[cpu].c.ctrlreg[3])); - if ( l3 == NULL ) - return NULL; - - l3e = l3[l3_table_offset_pae(va)]; - munmap(l3, PAGE_SIZE); - if ( !(l3e & _PAGE_PRESENT) ) - return NULL; - l2p = to_ma(cpu, l3e); - l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT); - if ( l2 == NULL ) - return NULL; - - l2e = l2[l2_table_offset_pae(va)]; - munmap(l2, PAGE_SIZE); - if ( !(l2e & _PAGE_PRESENT) ) - return NULL; - l1p = to_ma(cpu, l2e); - l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT); - if ( l1 == NULL ) - return NULL; - - l1e = l1[l1_table_offset_pae(va)]; - munmap(l1, PAGE_SIZE); - if ( !(l1e & _PAGE_PRESENT) ) - return NULL; - p = to_ma(cpu, l1e); - if ( v[cpu] != NULL ) - munmap(v[cpu], PAGE_SIZE); - v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); - if ( v[cpu] == NULL ) - return NULL; - - return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1))); -} - -#ifdef __x86_64__ -static void * -map_domain_va_64( - int xc_handle, - int cpu, - void *guest_va, - int perm) -{ - unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned long)guest_va; - uint64_t *l4, *l3, *l2, *l1; - static void *v[MAX_VIRT_CPUS]; - - if ((ctxt[cpu].c.ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ - return map_domain_va_32(xc_handle, cpu, guest_va, perm); - - l4 = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, - xen_cr3_to_pfn(ctxt[cpu].c.ctrlreg[3])); - if ( l4 == NULL ) - return NULL; - - l4e = l4[l4_table_offset(va)]; - munmap(l4, PAGE_SIZE); - if ( !(l4e & _PAGE_PRESENT) ) - return NULL; - l3p = to_ma(cpu, l4e); - l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l3p >> PAGE_SHIFT); - if ( l3 == NULL ) - return NULL; - - l3e = l3[l3_table_offset(va)]; - munmap(l3, PAGE_SIZE); - if ( !(l3e & _PAGE_PRESENT) ) - return NULL; - l2p = to_ma(cpu, l3e); - l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT); - if ( l2 == NULL ) - return NULL; - - l2e = l2[l2_table_offset(va)]; - munmap(l2, PAGE_SIZE); - if ( !(l2e & _PAGE_PRESENT) ) - return NULL; - l1p = to_ma(cpu, l2e); - if (l2e & 0x80) { /* 2M pages */ - p = to_ma(cpu, l1p + (l1_table_offset(va) << PAGE_SHIFT)); - } else { /* 4K pages */ - l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT); - if ( l1 == NULL ) - return NULL; - - l1e = l1[l1_table_offset(va)]; - munmap(l1, PAGE_SIZE); - if ( !(l1e & _PAGE_PRESENT) ) - return NULL; - p = to_ma(cpu, l1e); - } - if ( v[cpu] != NULL ) - munmap(v[cpu], PAGE_SIZE); - v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT); - if ( v[cpu] == NULL ) - return NULL; - - return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1))); -} -#endif static void * map_domain_va( @@ -324,68 +157,30 @@ map_domain_va( void *guest_va, int perm) { - unsigned long va = (unsigned long) guest_va; - long npgs = xc_get_tot_pages(xc_handle, current_domid); - static enum { MODE_UNKNOWN, MODE_64, MODE_32, MODE_PAE } mode; - - if ( mode == MODE_UNKNOWN ) - { - xen_capabilities_info_t caps; - (void)xc_version(xc_handle, XENVER_capabilities, caps); - if ( strstr(caps, "-x86_64") ) - mode = MODE_64; - else if ( strstr(caps, "-x86_32p") ) - mode = MODE_PAE; - else if ( strstr(caps, "-x86_32") ) - mode = MODE_32; - } - - if ( nr_pages != npgs ) - { - if ( nr_pages > 0 ) - free(page_array); - nr_pages = npgs; - if ( (page_array = malloc(nr_pages * sizeof(*page_array))) == NULL ) - { - IPRINTF("Could not allocate memory\n"); - return NULL; - } - if ( xc_get_pfn_list(xc_handle, current_domid, - page_array, nr_pages) != nr_pages ) - { - IPRINTF("Could not get the page frame list\n"); - return NULL; - } - } - - if (fetch_regs(xc_handle, cpu, NULL)) + unsigned long va = (unsigned long)guest_va; + unsigned long mfn; + void *map; + + /* cross page boundary */ + if ( (va & ~PAGE_MASK) + sizeof(long) > PAGE_SIZE ) return NULL; - if (!paging_enabled(&ctxt[cpu])) { - static void * v; - uint64_t page; - - if ( v != NULL ) - munmap(v, PAGE_SIZE); - - page = to_ma(cpu, va); - - v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, - perm, page >> PAGE_SHIFT); - - if ( v == NULL ) - return NULL; - - return (void *)(((unsigned long)v) | (va & BSD_PAGE_MASK)); - } -#ifdef __x86_64__ - if ( mode == MODE_64 ) - return map_domain_va_64(xc_handle, cpu, guest_va, perm); -#endif - if ( mode == MODE_PAE ) - return map_domain_va_pae(xc_handle, cpu, guest_va, perm); - /* else ( mode == MODE_32 ) */ - return map_domain_va_32(xc_handle, cpu, guest_va, perm); + mfn = xc_translate_foreign_address(xc_handle, current_domid, cpu, va); + if ( mfn == 0 ) + return NULL; + + map = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, + perm, mfn); + if (map == NULL) + return NULL; + + return map + (va & ~PAGE_MASK); +} + +static void +unmap_domain_va(void *guest_va) +{ + munmap((void *)((unsigned long)guest_va & PAGE_MASK), PAGE_SIZE); } int control_c_pressed_flag = 0; @@ -473,6 +268,8 @@ xc_ptrace( if ( guest_va == NULL ) goto out_error; retval = *guest_va; + if (!current_isfile) + unmap_domain_va(guest_va); break; case PTRACE_POKETEXT: @@ -486,7 +283,9 @@ xc_ptrace( xc_handle, cpu, addr, PROT_READ|PROT_WRITE); if ( guest_va == NULL ) goto out_error; - *guest_va = (unsigned long)data; + *guest_va = edata; + if (!current_isfile) + unmap_domain_va(guest_va); break; case PTRACE_GETREGS: diff -r c4c4ba857d8b -r 6227bf629626 tools/misc/xenpm.c --- a/tools/misc/xenpm.c Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/misc/xenpm.c Mon Mar 02 18:26:56 2009 +0900 @@ -303,35 +303,34 @@ static void signal_int_handler(int signo printf("Elapsed time (ms): %"PRIu64"\n", (usec_end - usec_start) / 1000UL); for ( i = 0; i < max_cpu_nr; i++ ) { - uint64_t temp; - printf("CPU%d:\n\tresidency\tpercentage\n", i); - if ( cx_cap ) + uint64_t res, triggers; + double avg_res; + + printf("\nCPU%d:\tResidency(ms)\t\tAvg Res(ms)\n",i); + if ( cx_cap && sum_cx[i] > 0 ) { for ( j = 0; j < cxstat_end[i].nr; j++ ) { - if ( sum_cx[i] > 0 ) - { - temp = cxstat_end[i].residencies[j] - - cxstat_start[i].residencies[j]; - printf(" C%d\t%"PRIu64" ms\t%.2f%%\n", j, - temp / 1000000UL, 100UL * temp / (double)sum_cx[i]); - } + res = cxstat_end[i].residencies[j] - + cxstat_start[i].residencies[j]; + triggers = cxstat_end[i].triggers[j] - + cxstat_start[i].triggers[j]; + avg_res = (triggers==0) ? 0: (double)res/triggers/1000000.0; + printf(" C%d\t%"PRIu64"\t(%5.2f%%)\t%.2f\n", j, res/1000000UL, + 100 * res / (double)sum_cx[i], avg_res ); } + printf("\n"); } - if ( px_cap ) + if ( px_cap && sum_px[i]>0 ) { for ( j = 0; j < pxstat_end[i].total; j++ ) { - if ( sum_px[i] > 0 ) - { - temp = pxstat_end[i].pt[j].residency - - pxstat_start[i].pt[j].residency; - printf(" P%d\t%"PRIu64" ms\t%.2f%%\n", j, - temp / 1000000UL, 100UL * temp / (double)sum_px[i]); - } + res = pxstat_end[i].pt[j].residency - + pxstat_start[i].pt[j].residency; + printf(" P%d\t%"PRIu64"\t(%5.2f%%)\n", j, + res / 1000000UL, 100UL * res / (double)sum_px[i]); } } - printf("\n"); } /* some clean up and then exits */ @@ -408,6 +407,7 @@ void start_gather_func(int argc, char *a free(cxstat); return ; } + printf("Start sampling, waiting for CTRL-C or SIGINT signal ...\n"); pause(); } diff -r c4c4ba857d8b -r 6227bf629626 tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/pygrub/src/pygrub Mon Mar 02 18:26:56 2009 +0900 @@ -501,7 +501,7 @@ def get_entry_idx(cf, entry): return None -def run_grub(file, entry, fs): +def run_grub(file, entry, fs, arg): global g global sel @@ -534,7 +534,7 @@ def run_grub(file, entry, fs): if img.initrd: grubcfg["ramdisk"] = img.initrd[1] if img.args: - grubcfg["args"] = img.args + grubcfg["args"] = img.args + " " + arg return grubcfg @@ -659,7 +659,7 @@ if __name__ == "__main__": chosencfg = sniff_solaris(fs, incfg) if not chosencfg["kernel"]: - chosencfg = run_grub(file, entry, fs) + chosencfg = run_grub(file, entry, fs, incfg["args"]) data = fs.open_file(chosencfg["kernel"]).read() (tfd, bootcfg["kernel"]) = tempfile.mkstemp(prefix="boot_kernel.", diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/util/vscsi_util.py --- a/tools/python/xen/util/vscsi_util.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/util/vscsi_util.py Mon Mar 02 18:26:56 2009 +0900 @@ -78,8 +78,38 @@ def _vscsi_get_hctl_by(phyname, scsi_dev return (None, None) -def vscsi_get_scsidevices(): - """ get all scsi devices""" +def _vscsi_get_scsiid(sg): + scsi_id = os.popen('/sbin/scsi_id -gu -s /class/scsi_generic/' + sg).read().split() + if len(scsi_id): + return scsi_id[0] + return None + + +def _vscsi_get_scsidevices_by_lsscsi(option = ""): + """ get all scsi devices information by lsscsi """ + + devices = [] + + for scsiinfo in os.popen('lsscsi -g %s' % option).readlines(): + s = scsiinfo.split() + hctl = s[0][1:-1] + try: + devname = s[-2].split('/dev/')[1] + except IndexError: + devname = None + try: + sg = s[-1].split('/dev/')[1] + scsi_id = _vscsi_get_scsiid(sg) + except IndexError: + sg = None + scsi_id = None + devices.append([hctl, devname, sg, scsi_id]) + + return devices + + +def _vscsi_get_scsidevices_by_sysfs(): + """ get all scsi devices information by sysfs """ devices = [] sysfs_mnt = utils.find_sysfs_mount() @@ -100,18 +130,29 @@ def vscsi_get_scsidevices(): if re.match('^scsi_generic', f): sg = os.path.basename(realpath) - lines = os.popen('/sbin/scsi_id -gu -s /class/scsi_generic/' + sg).read().split() - if len(lines): - scsi_id = lines[0] - + scsi_id = _vscsi_get_scsiid(sg) devices.append([hctl, devname, sg, scsi_id]) return devices + + +def vscsi_get_scsidevices(): + """ get all scsi devices information """ + + devices = _vscsi_get_scsidevices_by_lsscsi("") + if devices: + return devices + return _vscsi_get_scsidevices_by_sysfs() def vscsi_get_hctl_and_devname_by(target, scsi_devices = None): if scsi_devices is None: - scsi_devices = vscsi_get_scsidevices() + if len(target.split(':')) == 4: + scsi_devices = _vscsi_get_scsidevices_by_lsscsi(target) + elif target.startswith('/dev/'): + scsi_devices = _vscsi_get_scsidevices_by_lsscsi("| grep %s" % target) + else: + scsi_devices = vscsi_get_scsidevices() if len(target.split(':')) == 4: return _vscsi_get_devname_by(target, scsi_devices) diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xend/XendAPI.py Mon Mar 02 18:26:56 2009 +0900 @@ -29,6 +29,7 @@ import XendDomain, XendDomainInfo, XendN import XendDomain, XendDomainInfo, XendNode, XendDmesg import XendLogging, XendTaskManager, XendAPIStore +from xen.xend import uuid as genuuid from XendAPIVersion import * from XendAuthSessions import instance as auth_manager from XendError import * @@ -1867,7 +1868,7 @@ class XendAPI(object): dom = xendom.get_vm_by_uuid(vbd_struct['VM']) vdi = xennode.get_vdi_by_uuid(vbd_struct['VDI']) if not vdi: - return xen_api_error(['HANDLE_INVALID', 'VDI', vdi_ref]) + return xen_api_error(['HANDLE_INVALID', 'VDI', vbd_struct['VDI']]) # new VBD via VDI/SR vdi_image = vdi.get_location() @@ -2392,7 +2393,7 @@ class XendAPI(object): tpmif.destroy_vtpmstate(dom.getName()) return xen_api_success_void() else: - return xen_api_error(['HANDLE_INVALID', 'VM', vtpm_struct['VM']]) + return xen_api_error(['HANDLE_INVALID', 'VTPM', vtpm_ref]) # class methods def VTPM_create(self, session, vtpm_struct): @@ -2614,7 +2615,7 @@ class XendAPI(object): return xen_api_success_void() def event_unregister(self, session, unreg_classes): - event_unregister(session, reg_classes) + event_unregister(session, unreg_classes) return xen_api_success_void() def event_next(self, session): @@ -2641,7 +2642,7 @@ class XendAPI(object): return xen_api_error(['DEBUG_FAIL', session]) def debug_create(self, session): - debug_uuid = uuid.createString() + debug_uuid = genuuid.createString() self._debug[debug_uuid] = None return xen_api_success(debug_uuid) diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xend/XendBootloader.py --- a/tools/python/xen/xend/XendBootloader.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xend/XendBootloader.py Mon Mar 02 18:26:56 2009 +0900 @@ -67,9 +67,23 @@ def bootloader(blexec, disk, dom, quiet # listening on the bootloader's fifo for the results. (m1, s1) = pty.openpty() - tty.setraw(m1); - fcntl.fcntl(m1, fcntl.F_SETFL, os.O_NDELAY); - os.close(s1) + + # On Solaris, the pty master side will get cranky if we try + # to write to it while there is no slave. To work around this, + # keep the slave descriptor open until we're done. Set it + # to raw terminal parameters, otherwise it will echo back + # characters, which will confuse the I/O loop below. + # Furthermore, a raw master pty device has no terminal + # semantics on Solaris, so don't try to set any attributes + # for it. + if os.uname()[0] != 'SunOS' and os.uname()[0] != 'NetBSD': + tty.setraw(m1) + os.close(s1) + else: + tty.setraw(s1) + + fcntl.fcntl(m1, fcntl.F_SETFL, os.O_NDELAY) + slavename = ptsname.ptsname(m1) dom.storeDom("console/tty", slavename) @@ -108,7 +122,11 @@ def bootloader(blexec, disk, dom, quiet # record that this domain is bootloading dom.bootloader_pid = child - tty.setraw(m2); + # On Solaris, the master pty side does not have terminal semantics, + # so don't try to set any attributes, as it will fail. + if os.uname()[0] != 'SunOS': + tty.setraw(m2); + fcntl.fcntl(m2, fcntl.F_SETFL, os.O_NDELAY); while True: try: @@ -117,32 +135,55 @@ def bootloader(blexec, disk, dom, quiet if e.errno == errno.EINTR: continue break + + fcntl.fcntl(r, fcntl.F_SETFL, os.O_NDELAY); + ret = "" inbuf=""; outbuf=""; - while True: - sel = select.select([r, m1, m2], [m1, m2], []) + # filedescriptors: + # r - input from the bootloader (bootstring output) + # m1 - input/output from/to xenconsole + # m2 - input/output from/to pty that controls the bootloader + # The filedescriptors are NDELAY, so it's ok to try to read + # bigger chunks than may be available, to keep e.g. curses + # screen redraws in the bootloader efficient. m1 is the side that + # gets xenconsole input, which will be keystrokes, so a small number + # is sufficient. m2 is pygrub output, which will be curses screen + # updates, so a larger number (1024) is appropriate there. + # + # For writeable descriptors, only include them in the set for select + # if there is actual data to write, otherwise this would loop too fast, + # eating up CPU time. + + while True: + wsel = [] + if len(outbuf) != 0: + wsel = wsel + [m1] + if len(inbuf) != 0: + wsel = wsel + [m2] + sel = select.select([r, m1, m2], wsel, []) try: if m1 in sel[0]: - s = os.read(m1, 1) + s = os.read(m1, 16) inbuf += s - if m2 in sel[1] and len(inbuf) != 0: - os.write(m2, inbuf[0]) - inbuf = inbuf[1:] + if m2 in sel[1]: + n = os.write(m2, inbuf) + inbuf = inbuf[n:] except OSError, e: if e.errno == errno.EIO: pass try: if m2 in sel[0]: - s = os.read(m2, 1) + s = os.read(m2, 1024) outbuf += s - if m1 in sel[1] and len(outbuf) != 0: - os.write(m1, outbuf[0]) - outbuf = outbuf[1:] + if m1 in sel[1]: + n = os.write(m1, outbuf) + outbuf = outbuf[n:] except OSError, e: if e.errno == errno.EIO: pass if r in sel[0]: - s = os.read(r, 1) + s = os.read(r, 128) ret = ret + s if len(s) == 0: break @@ -152,6 +193,8 @@ def bootloader(blexec, disk, dom, quiet os.close(r) os.close(m2) os.close(m1) + if os.uname()[0] == 'SunOS' or os.uname()[0] == 'NetBSD': + os.close(s1) os.unlink(fifo) # Re-acquire the lock to cover the changes we're about to make diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xend/XendConfig.py Mon Mar 02 18:26:56 2009 +0900 @@ -158,6 +158,7 @@ XENAPI_PLATFORM_CFG_TYPES = { 'vncdisplay': int, 'vnclisten': str, 'timer_mode': int, + 'vpt_align': int, 'viridian': int, 'vncpasswd': str, 'vncunused': int, @@ -430,6 +431,8 @@ class XendConfig(dict): def _vcpus_sanity_check(self): if 'VCPUs_max' in self and 'vcpu_avail' not in self: self['vcpu_avail'] = (1 << self['VCPUs_max']) - 1 + if 'online_vcpus' in self: + self['VCPUs_live'] = self['online_vcpus'] def _uuid_sanity_check(self): """Make sure UUID is in proper string format with hyphens.""" @@ -459,6 +462,8 @@ class XendConfig(dict): self['platform']['rtc_timeoffset'] = 0 if 'hpet' not in self['platform']: self['platform']['hpet'] = 0 + if 'vpt_align' not in self['platform']: + self['platform']['vpt_align'] = 1 if 'loader' not in self['platform']: # Old configs may have hvmloader set as PV_kernel param if self.has_key('PV_kernel') and self['PV_kernel'] != '': @@ -1269,6 +1274,7 @@ class XendConfig(dict): uuid.createString()) vscsi_dict = self.vscsi_convert_sxp_to_dict(config) vscsi_devs = vscsi_dict['devs'] + vscsi_mode = vscsi_dict['feature-host'] # create XenAPI DSCSI objects. for vscsi_dev in vscsi_devs: @@ -1283,9 +1289,14 @@ class XendConfig(dict): } XendDSCSI(dscsi_uuid, dscsi_record) - target['devices'][vscsi_devs_uuid] = \ - (dev_type, {'devs': vscsi_devs, 'uuid': vscsi_devs_uuid} ) - log.debug("XendConfig: reading device: %s" % vscsi_devs) + vscsi_info = { + 'devs': vscsi_devs, + 'feature-host': vscsi_mode, + 'uuid': vscsi_devs_uuid + } + target['devices'][vscsi_devs_uuid] = (dev_type, vscsi_info) + log.debug("XendConfig: reading device: %s,%s" % \ + (vscsi_devs, vscsi_mode)) return vscsi_devs_uuid for opt_val in config[1:]: @@ -1575,7 +1586,7 @@ class XendConfig(dict): try: opt, val = opt_val pci_dev_info[opt] = val - except TypeError: + except (TypeError, ValueError): pass # append uuid for each pci device. dpci_uuid = pci_dev_info.get('uuid', uuid.createString()) @@ -1609,6 +1620,7 @@ class XendConfig(dict): # # [device, # [vscsi, + # [feature-host, 0], # [dev, # [devid, 0], [p-devname, sdb], [p-dev, 1:0:0:1], # [v-dev, 0:0:0:0], [state, 1] @@ -1619,6 +1631,7 @@ class XendConfig(dict): # ] # ], # [vscsi, + # [feature-host, 1], # [dev, # [devid, 1], [p-devname, sdg], [p-dev, 2:0:0:0], # [v-dev, 1:0:0:0], [state, 1] @@ -1639,6 +1652,7 @@ class XendConfig(dict): # # [device, # [vscsi, + # [feature-host, 0], # [dev, # [devid, 0], [p-devname, sdd], [p-dev, 1:0:0:3], # [v-dev, 0:0:0:2], [state, 1] @@ -1653,7 +1667,8 @@ class XendConfig(dict): # The Dict looks like this: # # { devs: [ {devid: 0, p-devname: sdd, p-dev: 1:0:0:3, - # v-dev: 0:0:0:2, state: 1} ] } + # v-dev: 0:0:0:2, state: 1} ], + # feature-host: 1 } dev_config = {} @@ -1672,6 +1687,9 @@ class XendConfig(dict): vscsi_devs.append(vscsi_dev_info) dev_config['devs'] = vscsi_devs + vscsi_mode = sxp.children(dev_sxp, 'feature-host')[0] + dev_config['feature-host'] = vscsi_mode[1] + return dev_config def console_add(self, protocol, location, other_config = {}): @@ -1784,6 +1802,7 @@ class XendConfig(dict): if dev_type == 'vscsi': # Special case for vscsi vscsi_dict = self.vscsi_convert_sxp_to_dict(config) vscsi_devs = vscsi_dict['devs'] + vscsi_mode = vscsi_dict['feature-host'] # destroy existing XenAPI DSCSI objects for dscsi_uuid in XendDSCSI.get_by_VM(self['uuid']): @@ -1802,8 +1821,12 @@ class XendConfig(dict): } XendDSCSI(dscsi_uuid, dscsi_record) - self['devices'][dev_uuid] = \ - (dev_type, {'devs': vscsi_devs, 'uuid': dev_uuid} ) + vscsi_info = { + 'devs': vscsi_devs, + 'feature-host': vscsi_mode, + 'uuid': dev_uuid + } + self['devices'][dev_uuid] = (dev_type, vscsi_info) return True for opt_val in config[1:]: @@ -1880,7 +1903,6 @@ class XendConfig(dict): def all_devices_sxpr(self, target = None): """Returns the SXPR for all devices in the current configuration.""" sxprs = [] - pci_devs = [] if target == None: target = self @@ -1895,7 +1917,8 @@ class XendConfig(dict): if dev_type == 'pci': sxpr = ['pci', ['uuid', dev_info['uuid']]] elif dev_type == 'vscsi': - sxpr = ['vscsi', ['uuid', dev_info['uuid']]] + sxpr = ['vscsi', ['uuid', dev_info['uuid']], + ['feature-host', dev_info['feature-host']]] for pci_dev_info in dev_info['devs']: pci_dev_sxpr = ['dev'] for opt, val in pci_dev_info.items(): diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xend/XendConstants.py --- a/tools/python/xen/xend/XendConstants.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xend/XendConstants.py Mon Mar 02 18:26:56 2009 +0900 @@ -50,6 +50,7 @@ HVM_PARAM_TIMER_MODE = 10 HVM_PARAM_TIMER_MODE = 10 HVM_PARAM_HPET_ENABLED = 11 HVM_PARAM_ACPI_S_STATE = 14 +HVM_PARAM_VPT_ALIGN = 16 restart_modes = [ "restart", diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xend/XendDomainInfo.py Mon Mar 02 18:26:56 2009 +0900 @@ -898,15 +898,21 @@ class XendDomainInfo: else: cur_dev_sxp = self._getDeviceInfo_vscsi(req_devid, None) new_dev_sxp = ['vscsi'] + cur_mode = sxp.children(cur_dev_sxp, 'feature-host')[0] + new_dev_sxp.append(cur_mode) + for cur_dev in sxp.children(cur_dev_sxp, 'dev'): if state == xenbusState['Closing']: + if int(cur_mode[1]) == 1: + continue cur_dev_vdev = sxp.child_value(cur_dev, 'v-dev') if cur_dev_vdev == dev['v-dev']: continue new_dev_sxp.append(cur_dev) if state == xenbusState['Initialising']: - new_dev_sxp.append(sxp.child0(dev_sxp, 'dev')) + for new_dev in sxp.children(dev_sxp, 'dev'): + new_dev_sxp.append(new_dev) dev_uuid = sxp.child_value(cur_dev_sxp, 'uuid') self.info.device_update(dev_uuid, new_dev_sxp) @@ -1112,7 +1118,8 @@ class XendDomainInfo: vscsi_dev.append(['frontstate', None]) vscsi_devs[1].append(vscsi_dev) dev_num = int(sxp.child_value(vscsi_dev, 'devid')) - sxprs.append([dev_num, [vscsi_devs]]) + vscsi_mode = sxp.children(dev_info, 'feature-host')[0] + sxprs.append([dev_num, [vscsi_devs, vscsi_mode]]) elif deviceClass == 'vbd': dev = sxp.child_value(dev_info, 'dev') if 'ioemu:' in dev: @@ -2236,6 +2243,12 @@ class XendDomainInfo: if hvm and hpet is not None: xc.hvm_set_param(self.domid, HVM_PARAM_HPET_ENABLED, long(hpet)) + + # Optionally enable periodic vpt aligning + vpt_align = self.info["platform"].get("vpt_align") + if hvm and vpt_align is not None: + xc.hvm_set_param(self.domid, HVM_PARAM_VPT_ALIGN, + long(vpt_align)) # Set maximum number of vcpus in domain xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max'])) diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xend/server/vscsiif.py --- a/tools/python/xen/xend/server/vscsiif.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xend/server/vscsiif.py Mon Mar 02 18:26:56 2009 +0900 @@ -68,6 +68,8 @@ class VSCSIController(DevController): vscsi_config.append(['devs', devs]) state = self.readFrontend(devid, 'state') vscsi_config.append(['state', state]) + hostmode = self.readBackend(devid, 'feature-host') + vscsi_config.append(['feature-host', hostmode]) backid = self.readFrontend(devid, 'backend-id') vscsi_config.append(['backend-id', backid]) backpath = self.readFrontend(devid, 'backend') @@ -98,6 +100,8 @@ class VSCSIController(DevController): devid = vscsi_config.get('devid', '') back[devpath + '/devid'] = str(devid) + host_mode = config.get('feature-host','') + back['feature-host'] = str(host_mode) back['uuid'] = config.get('uuid','') devid = int(devid) return (devid, back, {}) @@ -133,6 +137,7 @@ class VSCSIController(DevController): vscsi_devs.append(dev_dict) config['devs'] = vscsi_devs + config['feature-host'] = self.readBackend(devid, 'feature-host') config['uuid'] = self.readBackend(devid, 'uuid') return config @@ -171,6 +176,7 @@ class VSCSIController(DevController): vscsi_config = config['devs'][0] state = vscsi_config.get('state', xenbusState['Unknown']) driver_state = self.readBackend(devid, 'state') + if str(xenbusState['Connected']) != driver_state: raise VmError("Driver status is not connected") @@ -182,13 +188,20 @@ class VSCSIController(DevController): elif state == xenbusState['Closing']: found = False devs = self.readBackendList(devid, "vscsi-devs") + hostmode = int(self.readBackend(devid, 'feature-host')) vscsipath = "vscsi-devs/" vdev = vscsi_config.get('v-dev', '') for dev in devs: devpath = vscsipath + dev old_vdev = self.readBackend(devid, devpath + '/v-dev') - if vdev == old_vdev: + + if hostmode == 1: + #At hostmode, all v-dev that belongs to devid is deleted. + found = True + self.writeBackend(devid, devpath + '/state', \ + str(xenbusState['Closing'])) + elif vdev == old_vdev: found = True self.writeBackend(devid, devpath + '/state', \ str(xenbusState['Closing'])) diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xm/create.py Mon Mar 02 18:26:56 2009 +0900 @@ -218,6 +218,10 @@ gopts.var('timer_mode', val='TIMER_MODE' fn=set_int, default=1, use="""Timer mode (0=delay virtual time when ticks are missed; 1=virtual time is always wallclock time.""") + +gopts.var('vpt_align', val='VPT_ALIGN', + fn=set_int, default=1, + use="Enable aligning all periodic vpt to reduce timer interrupts.") gopts.var('viridian', val='VIRIDIAN', fn=set_int, default=0, @@ -699,32 +703,19 @@ def configure_pci(config_devs, vals): config_pci.insert(0, 'pci') config_devs.append(['device', config_pci]) -def vscsi_convert_sxp_to_dict(dev_sxp): - dev_dict = {} - for opt_val in dev_sxp[1:]: - try: - opt, val = opt_val - dev_dict[opt] = val - except TypeError: - pass - return dev_dict - -def vscsi_lookup_devid(devlist, req_devid): - if len(devlist) == 0: - return 0 - else: - for devid, backend in devlist: - if devid == req_devid: - return 1 - return 0 - def configure_vscsis(config_devs, vals): """Create the config for vscsis (virtual scsi devices). """ - devidlist = [] - config_scsi = [] + + def get_devid(hctl): + return int(hctl.split(':')[0]) + if len(vals.vscsi) == 0: return 0 + + config_scsi = {} + pHCTL_list = [] + vHCTL_list = [] scsi_devices = vscsi_util.vscsi_get_scsidevices() for (p_dev, v_dev, backend) in vals.vscsi: @@ -734,34 +725,55 @@ def configure_vscsis(config_devs, vals): if p_hctl == None: raise ValueError('Cannot find device "%s"' % p_dev) - for config in config_scsi: - dev = vscsi_convert_sxp_to_dict(config) - if dev['v-dev'] == v_dev: - raise ValueError('The virtual device "%s" is already defined' % v_dev) - - v_hctl = v_dev.split(':') - devid = int(v_hctl[0]) - config_scsi.append(['dev', \ - ['state', xenbusState['Initialising']], \ - ['devid', devid], \ - ['p-dev', p_hctl], \ - ['p-devname', devname], \ - ['v-dev', v_dev] ]) - - if vscsi_lookup_devid(devidlist, devid) == 0: - devidlist.append([devid, backend]) - - for devid, backend in devidlist: - tmp = [] - for config in config_scsi: - dev = vscsi_convert_sxp_to_dict(config) - if dev['devid'] == devid: - tmp.append(config) - - tmp.insert(0, 'vscsi') - if backend: - tmp.append(['backend', backend]) - config_devs.append(['device', tmp]) + feature_host = 0 + if v_dev == 'host': + feature_host = 1 + scsi_info = [] + devid = get_devid(p_hctl) + for (pHCTL, devname, _, _) in scsi_devices: + if get_devid(pHCTL) == devid: + scsi_info.append([devid, pHCTL, devname, pHCTL]) + else: + scsi_info = [[get_devid(v_dev), p_hctl, devname, v_dev]] + + devid_key = scsi_info[0][0] + try: + config = config_scsi[devid_key] + except KeyError: + config = {'feature-host': feature_host, 'backend': backend, 'devs': []} + + devs = config['devs'] + for (devid, pHCTL, devname, vHCTL) in scsi_info: + if pHCTL in pHCTL_list: + raise ValueError('The physical device "%s" is already defined' % pHCTL) + if vHCTL in vHCTL_list: + raise ValueError('The virtual device "%s" is already defined' % vHCTL) + pHCTL_list.append(pHCTL) + vHCTL_list.append(vHCTL) + devs.append(['dev', \ + ['state', xenbusState['Initialising']], \ + ['devid', devid], \ + ['p-dev', pHCTL], \ + ['p-devname', devname], \ + ['v-dev', vHCTL] ]) + + if config['feature-host'] != feature_host: + raise ValueError('The physical device "%s" cannot define ' + 'because mode is different' % scsi_info[0][1]) + if config['backend'] != backend: + raise ValueError('The physical device "%s" cannot define ' + 'because backend is different' % scsi_info[0][1]) + + config['devs'] = devs + config_scsi[devid_key] = config + + for config in config_scsi.values(): + device = ['vscsi', ['feature-host', config['feature-host']]] + for dev in config['devs']: + device.append(dev) + if config['backend']: + device.append(['backend', config['backend']]) + config_devs.append(['device', device]) def configure_ioports(config_devs, vals): """Create the config for legacy i/o ranges. @@ -891,7 +903,8 @@ def configure_hvm(config_image, vals): 'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor', 'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet', 'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check', - 'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate' ] + 'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate', + 'vpt_align' ] for a in args: if a in vals.__dict__ and vals.__dict__[a] is not None: @@ -1039,7 +1052,7 @@ def preprocess_vscsi(vals): n = len(d) if n == 2: tmp = d[1].split(':') - if len(tmp) != 4: + if d[1] != 'host' and len(tmp) != 4: err('vscsi syntax error "%s"' % d[1]) else: d.append(None) diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xm/main.py Mon Mar 02 18:26:56 2009 +0900 @@ -2032,6 +2032,8 @@ def parse_dev_info(info): 'mac' : get_info('mac', str, '??'), #block-device specific 'ring-ref' : get_info('ring-ref', int, -1), + #vscsi specific + 'feature-host' : get_info('feature-host', int, -1), } def arg_check_for_resource_list(args, name): @@ -2275,14 +2277,14 @@ def xm_scsi_list(args): hdr = 0 for x in devs: if hdr == 0: - print "%-3s %-3s %-5s %-10s %-5s %-10s %-4s" \ - % ('Idx', 'BE', 'state', 'phy-hctl', 'phy', 'vir-hctl', 'devstate') + print "%-3s %-3s %-5s %-4s %-10s %-5s %-10s %-4s" \ + % ('Idx', 'BE', 'state', 'host', 'phy-hctl', 'phy', 'vir-hctl', 'devstate') hdr = 1 ni = parse_dev_info(x[1]) ni['idx'] = int(x[0]) for dev in x[1][0][1]: mi = vscsi_convert_sxp_to_dict(dev) - print "%(idx)-3d %(backend-id)-3d %(state)-5d " % ni, + print "%(idx)-3d %(backend-id)-3d %(state)-5d %(feature-host)-4d " % ni, print "%(p-dev)-10s %(p-devname)-5s %(v-dev)-10s %(frontstate)-4s" % mi def parse_block_configuration(args): @@ -2512,27 +2514,46 @@ def xm_pci_attach(args): server.xend.domain.device_configure(dom, pci) def parse_scsi_configuration(p_scsi, v_hctl, state): - v = v_hctl.split(':') - if len(v) != 4: - raise OptionError("Invalid argument: %s" % v_hctl) - - p_hctl = None - devname = None + def get_devid(hctl): + return int(hctl.split(':')[0]) + + host_mode = 0 + scsi_devices = None + if p_scsi is not None: + # xm scsi-attach + if v_hctl == "host": + host_mode = 1 + scsi_devices = vscsi_util.vscsi_get_scsidevices() + elif len(v_hctl.split(':')) != 4: + raise OptionError("Invalid argument: %s" % v_hctl) (p_hctl, devname) = \ - vscsi_util.vscsi_get_hctl_and_devname_by(p_scsi) + vscsi_util.vscsi_get_hctl_and_devname_by(p_scsi, scsi_devices) if p_hctl is None: raise OptionError("Cannot find device '%s'" % p_scsi) - - scsi = ['vscsi'] - scsi.append(['dev', \ - ['state', state], \ - ['devid', int(v[0])], \ - ['p-dev', p_hctl], \ - ['p-devname', devname], \ - ['v-dev', v_hctl] \ - ]) - + if host_mode: + scsi_info = [] + devid = get_devid(p_hctl) + for pHCTL, devname, _, _ in scsi_devices: + if get_devid(pHCTL) == devid: + scsi_info.append([devid, pHCTL, devname, pHCTL]) + else: + scsi_info = [[get_devid(v_hctl), p_hctl, devname, v_hctl]] + else: + # xm scsi-detach + if len(v_hctl.split(':')) != 4: + raise OptionError("Invalid argument: %s" % v_hctl) + scsi_info = [[get_devid(v_hctl), None, None, v_hctl]] + + scsi = ['vscsi', ['feature-host', host_mode]] + for devid, pHCTL, devname, vHCTL in scsi_info: + scsi.append(['dev', \ + ['state', state], \ + ['devid', devid], \ + ['p-dev', pHCTL], \ + ['p-devname', devname], \ + ['v-dev', vHCTL] \ + ]) return scsi def xm_scsi_attach(args): diff -r c4c4ba857d8b -r 6227bf629626 tools/python/xen/xm/xenapi_create.py --- a/tools/python/xen/xm/xenapi_create.py Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/python/xen/xm/xenapi_create.py Mon Mar 02 18:26:56 2009 +0900 @@ -1037,6 +1037,7 @@ class sxp2xml: 'usbdevice', 'hpet', 'timer_mode', + 'vpt_align', 'viridian', 'vhpt', 'guest_os_type', diff -r c4c4ba857d8b -r 6227bf629626 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Mon Mar 02 16:52:22 2009 +0900 +++ b/tools/xenstore/xenstored_core.c Mon Mar 02 18:26:56 2009 +0900 @@ -1937,14 +1937,17 @@ int main(int argc, char *argv[]) handle_event(); next = list_entry(connections.next, typeof(*conn), list); + if (&next->list != &connections) + talloc_increase_ref_count(next); while (&next->list != &connections) { conn = next; next = list_entry(conn->list.next, typeof(*conn), list); + if (&next->list != &connections) + talloc_increase_ref_count(next); if (conn->domain) { - talloc_increase_ref_count(conn); if (domain_can_read(conn)) handle_input(conn); if (talloc_free(conn) == 0) @@ -1957,7 +1960,6 @@ int main(int argc, char *argv[]) if (talloc_free(conn) == 0) continue; } else { - talloc_increase_ref_count(conn); if (FD_ISSET(conn->fd, &inset)) handle_input(conn); if (talloc_free(conn) == 0) diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/ia64/xen/domain.c Mon Mar 02 18:26:56 2009 +0900 @@ -2023,6 +2023,7 @@ static void __init calc_dom0_size(void) unsigned long p2m_pages; unsigned long spare_hv_pages; unsigned long max_dom0_size; + unsigned long iommu_pg_table_pages = 0; /* Estimate maximum memory we can safely allocate for dom0 * by subtracting the p2m table allocation and a chunk of memory @@ -2033,8 +2034,13 @@ static void __init calc_dom0_size(void) domheap_pages = avail_domheap_pages(); p2m_pages = domheap_pages / PTRS_PER_PTE; spare_hv_pages = 8192 + (domheap_pages / 4096); - max_dom0_size = (domheap_pages - (p2m_pages + spare_hv_pages)) - * PAGE_SIZE; + + if (iommu_enabled) + iommu_pg_table_pages = domheap_pages * 4 / 512; + /* There are 512 ptes in one 4K vtd page. */ + + max_dom0_size = (domheap_pages - (p2m_pages + spare_hv_pages) - + iommu_pg_table_pages) * PAGE_SIZE; printk("Maximum permitted dom0 size: %luMB\n", max_dom0_size / (1024*1024)); diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/acpi/power.c --- a/xen/arch/x86/acpi/power.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/acpi/power.c Mon Mar 02 18:26:56 2009 +0900 @@ -175,6 +175,7 @@ static int enter_state(u32 state) printk("Entering ACPI S%d state.\n", state); local_irq_save(flags); + spin_debug_disable(); if ( (error = device_power_down()) ) { @@ -208,6 +209,7 @@ static int enter_state(u32 state) printk(XENLOG_INFO "Finishing wakeup from ACPI S%d state.", state); done: + spin_debug_enable(); local_irq_restore(flags); console_end_sync(); acpi_sleep_post(state); diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/hvm/hvm.c Mon Mar 02 18:26:56 2009 +0900 @@ -542,6 +542,22 @@ static int hvm_load_cpu_ctxt(struct doma return -EINVAL; } + /* Older Xen versions used to save the segment arbytes directly + * from the VMCS on Intel hosts. Detect this and rearrange them + * into the struct segment_register format. */ +#define UNFOLD_ARBYTES(_r) \ + if ( (_r & 0xf000) && !(_r & 0x0f00) ) \ + _r = ((_r & 0xff) | ((_r >> 4) & 0xf00)) + UNFOLD_ARBYTES(ctxt.cs_arbytes); + UNFOLD_ARBYTES(ctxt.ds_arbytes); + UNFOLD_ARBYTES(ctxt.es_arbytes); + UNFOLD_ARBYTES(ctxt.fs_arbytes); + UNFOLD_ARBYTES(ctxt.gs_arbytes); + UNFOLD_ARBYTES(ctxt.ss_arbytes); + UNFOLD_ARBYTES(ctxt.tr_arbytes); + UNFOLD_ARBYTES(ctxt.ldtr_arbytes); +#undef UNFOLD_ARBYTES + /* Architecture-specific vmcs/vmcb bits */ if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 ) return -EINVAL; diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/hvm/mtrr.c --- a/xen/arch/x86/hvm/mtrr.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/hvm/mtrr.c Mon Mar 02 18:26:56 2009 +0900 @@ -351,11 +351,18 @@ static uint8_t effective_mm_type(struct static uint8_t effective_mm_type(struct mtrr_state *m, uint64_t pat, paddr_t gpa, - uint32_t pte_flags) + uint32_t pte_flags, + uint8_t gmtrr_mtype) { uint8_t mtrr_mtype, pat_value, effective; - - mtrr_mtype = get_mtrr_type(m, gpa); + + /* if get_pat_flags() gives a dedicated MTRR type, + * just use it + */ + if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE ) + mtrr_mtype = get_mtrr_type(m, gpa); + else + mtrr_mtype = gmtrr_mtype; pat_value = page_pat_type(pat, pte_flags); @@ -367,7 +374,8 @@ uint32_t get_pat_flags(struct vcpu *v, uint32_t get_pat_flags(struct vcpu *v, uint32_t gl1e_flags, paddr_t gpaddr, - paddr_t spaddr) + paddr_t spaddr, + uint8_t gmtrr_mtype) { uint8_t guest_eff_mm_type; uint8_t shadow_mtrr_type; @@ -378,7 +386,8 @@ uint32_t get_pat_flags(struct vcpu *v, /* 1. Get the effective memory type of guest physical address, * with the pair of guest MTRR and PAT */ - guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, gl1e_flags); + guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, + gl1e_flags, gmtrr_mtype); /* 2. Get the memory type of host physical address, with MTRR */ shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr); diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/hvm/vpic.c --- a/xen/arch/x86/hvm/vpic.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/hvm/vpic.c Mon Mar 02 18:26:56 2009 +0900 @@ -56,7 +56,7 @@ static int vpic_get_priority(struct hvm_ /* prio = ffs(mask ROR vpic->priority_add); */ asm ( "ror %%cl,%b1 ; bsf %1,%0" - : "=r" (prio) : "r" ((uint32_t)mask), "c" (vpic->priority_add) ); + : "=r" (prio) : "q" ((uint32_t)mask), "c" (vpic->priority_add) ); return prio; } diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/hvm/vpt.c --- a/xen/arch/x86/hvm/vpt.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/hvm/vpt.c Mon Mar 02 18:26:56 2009 +0900 @@ -384,13 +384,25 @@ void create_periodic_time( pt->period_cycles = (u64)period; pt->one_shot = !period; pt->scheduled = NOW() + delta; - /* - * Offset LAPIC ticks from other timer ticks. Otherwise guests which use - * LAPIC ticks for process accounting can see long sequences of process - * ticks incorrectly accounted to interrupt processing. - */ - if ( !pt->one_shot && (pt->source == PTSRC_lapic) ) - pt->scheduled += delta >> 1; + + if ( !pt->one_shot ) + { + if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VPT_ALIGN] ) + { + pt->scheduled = align_timer(pt->scheduled, pt->period); + } + else if ( pt->source == PTSRC_lapic ) + { + /* + * Offset LAPIC ticks from other timer ticks. Otherwise guests + * which use LAPIC ticks for process accounting can see long + * sequences of process ticks incorrectly accounted to interrupt + * processing (seen with RHEL3 guest). + */ + pt->scheduled += delta >> 1; + } + } + pt->cb = cb; pt->priv = data; diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/io_apic.c Mon Mar 02 18:26:56 2009 +0900 @@ -1779,6 +1779,20 @@ static inline void check_timer(void) */ #define PIC_IRQS (1 << PIC_CASCADE_IR) +static struct IO_APIC_route_entry *ioapic_pm_state; + +void ioapic_pm_state_alloc(void) +{ + int i, nr_entry = 0; + + for (i = 0; i < nr_ioapics; i++) + nr_entry += nr_ioapic_registers[i]; + + ioapic_pm_state = _xmalloc(sizeof(struct IO_APIC_route_entry)*nr_entry, + sizeof(struct IO_APIC_route_entry)); + BUG_ON(ioapic_pm_state == NULL); +} + void __init setup_IO_APIC(void) { enable_IO_APIC(); @@ -1801,40 +1815,16 @@ void __init setup_IO_APIC(void) init_IO_APIC_traps(); check_timer(); print_IO_APIC(); + ioapic_pm_state_alloc(); register_keyhandler('z', print_IO_APIC_keyhandler, "print ioapic info"); } -struct IO_APIC_route_entry *ioapic_pm_state=NULL; - -void ioapic_pm_state_alloc(void) -{ - int i, nr_entry = 0; - - if (ioapic_pm_state != NULL) - return; - - for (i = 0; i < nr_ioapics; i++) - nr_entry += nr_ioapic_registers[i]; - - ioapic_pm_state = _xmalloc(sizeof(struct IO_APIC_route_entry)*nr_entry, - sizeof(struct IO_APIC_route_entry)); -} - -int ioapic_suspend(void) -{ - struct IO_APIC_route_entry *entry; +void ioapic_suspend(void) +{ + struct IO_APIC_route_entry *entry = ioapic_pm_state; unsigned long flags; - int apic,i; - - ioapic_pm_state_alloc(); - - if (ioapic_pm_state == NULL) { - printk("Cannot suspend ioapic due to lack of memory\n"); - return 1; - } - - entry = ioapic_pm_state; + int apic, i; spin_lock_irqsave(&ioapic_lock, flags); for (apic = 0; apic < nr_ioapics; apic++) { @@ -1844,23 +1834,14 @@ int ioapic_suspend(void) } } spin_unlock_irqrestore(&ioapic_lock, flags); - - return 0; -} - -int ioapic_resume(void) -{ - struct IO_APIC_route_entry *entry; +} + +void ioapic_resume(void) +{ + struct IO_APIC_route_entry *entry = ioapic_pm_state; unsigned long flags; union IO_APIC_reg_00 reg_00; - int i,apic; - - if (ioapic_pm_state == NULL){ - printk("Cannot resume ioapic due to lack of memory\n"); - return 1; - } - - entry = ioapic_pm_state; + int i, apic; spin_lock_irqsave(&ioapic_lock, flags); for (apic = 0; apic < nr_ioapics; apic++){ @@ -1875,8 +1856,6 @@ int ioapic_resume(void) } } spin_unlock_irqrestore(&ioapic_lock, flags); - - return 0; } /* -------------------------------------------------------------------------- diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/irq.c Mon Mar 02 18:26:56 2009 +0900 @@ -1142,6 +1142,8 @@ void fixup_irqs(cpumask_t map) static int warned; irq_guest_action_t *action; struct pending_eoi *peoi; + irq_desc_t *desc; + unsigned long flags; /* Direct all future interrupts away from this CPU. */ for ( vector = 0; vector < NR_VECTORS; vector++ ) @@ -1150,18 +1152,24 @@ void fixup_irqs(cpumask_t map) if ( vector_to_irq(vector) == 2 ) continue; - cpus_and(mask, irq_desc[vector].affinity, map); + desc = &irq_desc[vector]; + + spin_lock_irqsave(&desc->lock, flags); + + cpus_and(mask, desc->affinity, map); if ( any_online_cpu(mask) == NR_CPUS ) { printk("Breaking affinity for vector %u (irq %i)\n", vector, vector_to_irq(vector)); mask = map; } - if ( irq_desc[vector].handler->set_affinity ) - irq_desc[vector].handler->set_affinity(vector, mask); - else if ( irq_desc[vector].action && !(warned++) ) - printk("Cannot set affinity for irq %u (irq %i)\n", + if ( desc->handler->set_affinity ) + desc->handler->set_affinity(vector, mask); + else if ( desc->action && !(warned++) ) + printk("Cannot set affinity for vector %u (irq %i)\n", vector, vector_to_irq(vector)); + + spin_unlock_irqrestore(&desc->lock, flags); } /* Service any interrupts that beat us in the re-direction race. */ diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Mar 02 18:26:56 2009 +0900 @@ -546,15 +546,32 @@ _sh_propagate(struct vcpu *v, !is_xen_heap_mfn(mfn_x(target_mfn)) ) { unsigned int type; + + /* compute the PAT index for shadow page entry when VT-d is enabled + * and device assigned. + * 1) direct MMIO: compute the PAT index with gMTRR=UC and gPAT. + * 2) if enables snoop control, compute the PAT index as WB. + * 3) if disables snoop control, compute the PAT index with + * gMTRR and gPAT. + */ if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) ) sflags |= pat_type_2_pte_flags(type); else if ( d->arch.hvm_domain.is_in_uc_mode ) sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE); + else if ( p2mt == p2m_mmio_direct ) + sflags |= get_pat_flags(v, + gflags, + gfn_to_paddr(target_gfn), + ((paddr_t)mfn_x(target_mfn)) << PAGE_SHIFT, + MTRR_TYPE_UNCACHABLE); + else if ( iommu_snoop ) + sflags |= pat_type_2_pte_flags(PAT_TYPE_WRBACK); else sflags |= get_pat_flags(v, gflags, gfn_to_paddr(target_gfn), - ((paddr_t)mfn_x(target_mfn)) << PAGE_SHIFT); + ((paddr_t)mfn_x(target_mfn)) << PAGE_SHIFT, + NO_HARDCODE_MEM_TYPE); } // Set the A&D bits for higher level shadows. diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/msi.c Mon Mar 02 18:26:56 2009 +0900 @@ -29,17 +29,17 @@ /* bitmap indicate which fixed map is free */ DEFINE_SPINLOCK(msix_fixmap_lock); -DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES); +DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES); static int msix_fixmap_alloc(void) { - int i, rc = -1; + int i, rc = -ENOMEM; spin_lock(&msix_fixmap_lock); - for ( i = 0; i < MAX_MSIX_PAGES; i++ ) + for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ ) if ( !test_bit(i, &msix_fixmap_pages) ) break; - if ( i == MAX_MSIX_PAGES ) + if ( i == FIX_MSIX_MAX_PAGES ) goto out; rc = FIX_MSIX_IO_RESERV_BASE + i; set_bit(i, &msix_fixmap_pages); @@ -51,8 +51,66 @@ static int msix_fixmap_alloc(void) static void msix_fixmap_free(int idx) { + spin_lock(&msix_fixmap_lock); if ( idx >= FIX_MSIX_IO_RESERV_BASE ) clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages); + spin_unlock(&msix_fixmap_lock); +} + +static int msix_get_fixmap(struct pci_dev *dev, unsigned long table_paddr, + unsigned long entry_paddr) +{ + int nr_page, idx; + + nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT); + + if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES ) + return -EINVAL; + + spin_lock(&dev->msix_table_lock); + if ( dev->msix_table_refcnt[nr_page]++ == 0 ) + { + idx = msix_fixmap_alloc(); + if ( idx < 0 ) + { + dev->msix_table_refcnt[nr_page]--; + goto out; + } + set_fixmap_nocache(idx, entry_paddr); + dev->msix_table_idx[nr_page] = idx; + } + else + idx = dev->msix_table_idx[nr_page]; + + out: + spin_unlock(&dev->msix_table_lock); + return idx; +} + +static void msix_put_fixmap(struct pci_dev *dev, int idx) +{ + int i; + unsigned long start; + + spin_lock(&dev->msix_table_lock); + for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ ) + { + if ( dev->msix_table_idx[i] == idx ) + break; + } + if ( i == MAX_MSIX_TABLE_PAGES ) + goto out; + + if ( --dev->msix_table_refcnt[i] == 0 ) + { + start = fix_to_virt(idx); + destroy_xen_mappings(start, start + PAGE_SIZE); + msix_fixmap_free(idx); + dev->msix_table_idx[i] = 0; + } + + out: + spin_unlock(&dev->msix_table_lock); } /* @@ -122,8 +180,7 @@ static void read_msi_msg(struct msi_desc case PCI_CAP_ID_MSIX: { void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + base = entry->mask_base; msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); @@ -199,8 +256,7 @@ static void write_msi_msg(struct msi_des case PCI_CAP_ID_MSIX: { void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + base = entry->mask_base; writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); @@ -288,8 +344,7 @@ static void msix_flush_writes(unsigned i break; case PCI_CAP_ID_MSIX: { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; readl(entry->mask_base + offset); break; } @@ -330,8 +385,7 @@ static void msi_set_mask_bit(unsigned in break; case PCI_CAP_ID_MSIX: { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; writel(flag, entry->mask_base + offset); readl(entry->mask_base + offset); break; @@ -392,13 +446,10 @@ int msi_free_vector(struct msi_desc *ent { unsigned long start; - writel(1, entry->mask_base + entry->msi_attrib.entry_nr - * PCI_MSIX_ENTRY_SIZE - + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); - msix_fixmap_free(virt_to_fix(start)); - destroy_xen_mappings(start, start + PAGE_SIZE); + msix_put_fixmap(entry->dev, virt_to_fix(start)); } list_del(&entry->list); xfree(entry); @@ -500,8 +551,8 @@ static int msix_capability_init(struct p struct msi_desc *entry; int pos; u16 control; - unsigned long phys_addr; - u32 table_offset; + unsigned long table_paddr, entry_paddr; + u32 table_offset, entry_offset; u8 bir; void __iomem *base; int idx; @@ -525,15 +576,17 @@ static int msix_capability_init(struct p table_offset = pci_conf_read32(bus, slot, func, msix_table_offset_reg(pos)); bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); table_offset &= ~PCI_MSIX_FLAGS_BIRMASK; - phys_addr = msi->table_base + table_offset; - idx = msix_fixmap_alloc(); + entry_offset = msi->entry_nr * PCI_MSIX_ENTRY_SIZE; + + table_paddr = msi->table_base + table_offset; + entry_paddr = table_paddr + entry_offset; + idx = msix_get_fixmap(dev, table_paddr, entry_paddr); if ( idx < 0 ) { xfree(entry); - return -ENOMEM; - } - set_fixmap_nocache(idx, phys_addr); - base = (void *)(fix_to_virt(idx) + (phys_addr & ((1UL << PAGE_SHIFT) - 1))); + return idx; + } + base = (void *)(fix_to_virt(idx) + (entry_paddr & ((1UL << PAGE_SHIFT) - 1))); entry->msi_attrib.type = PCI_CAP_ID_MSIX; entry->msi_attrib.is_64 = 1; @@ -548,9 +601,7 @@ static int msix_capability_init(struct p list_add_tail(&entry->list, &dev->msi_list); /* Mask interrupt here */ - writel(1, entry->mask_base + entry->msi_attrib.entry_nr - * PCI_MSIX_ENTRY_SIZE - + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); *desc = entry; /* Restore MSI-X enabled bits */ @@ -675,9 +726,7 @@ static void __pci_disable_msix(struct ms BUG_ON(list_empty(&dev->msi_list)); - writel(1, entry->mask_base + entry->msi_attrib.entry_nr - * PCI_MSIX_ENTRY_SIZE - + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); pci_conf_write16(bus, slot, func, msix_control_reg(pos), control); } diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/shutdown.c --- a/xen/arch/x86/shutdown.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/shutdown.c Mon Mar 02 18:26:56 2009 +0900 @@ -302,6 +302,7 @@ void machine_restart(unsigned int delay_ watchdog_disable(); console_start_sync(); + spin_debug_disable(); local_irq_enable(); diff -r c4c4ba857d8b -r 6227bf629626 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/arch/x86/time.c Mon Mar 02 18:26:56 2009 +0900 @@ -607,13 +607,14 @@ static void platform_time_calibration(vo { u64 count; s_time_t stamp; - - spin_lock_irq(&platform_timer_lock); + unsigned long flags; + + spin_lock_irqsave(&platform_timer_lock, flags); count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask); stamp = __read_platform_stime(count); stime_platform_stamp = stamp; platform_timer_stamp = count; - spin_unlock_irq(&platform_timer_lock); + spin_unlock_irqrestore(&platform_timer_lock, flags); } static void resume_platform_timer(void) diff -r c4c4ba857d8b -r 6227bf629626 xen/common/timer.c --- a/xen/common/timer.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/common/timer.c Mon Mar 02 18:26:56 2009 +0900 @@ -473,6 +473,13 @@ void process_pending_timers(void) timer_softirq_action(); } +s_time_t align_timer(s_time_t firsttick, uint64_t period) +{ + if ( !period ) + return firsttick; + + return firsttick + (period - 1) - ((firsttick - 1) % period); +} static void dump_timerq(unsigned char key) { diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/amd/iommu_init.c Mon Mar 02 18:26:56 2009 +0900 @@ -37,9 +37,6 @@ struct list_head amd_iommu_head; struct list_head amd_iommu_head; struct table_struct device_table; -extern void *int_remap_table; -extern spinlock_t int_remap_table_lock; - static int __init map_iommu_mmio_region(struct amd_iommu *iommu) { unsigned long mfn; @@ -487,11 +484,13 @@ static int set_iommu_interrupt_handler(s } irq_desc[vector].handler = &iommu_msi_type; + vector_to_iommu[vector] = iommu; ret = request_irq_vector(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu); if ( ret ) { irq_desc[vector].handler = &no_irq_type; + vector_to_iommu[vector] = NULL; free_irq_vector(vector); amd_iov_error("can't request irq\n"); return 0; @@ -499,7 +498,6 @@ static int set_iommu_interrupt_handler(s /* Make sure that vector is never re-used. */ vector_irq[vector] = NEVER_ASSIGN_IRQ; - vector_to_iommu[vector] = iommu; iommu->vector = vector; return vector; } diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/amd/iommu_intr.c --- a/xen/drivers/passthrough/amd/iommu_intr.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/amd/iommu_intr.c Mon Mar 02 18:26:56 2009 +0900 @@ -23,7 +23,7 @@ #include <asm/hvm/svm/amd-iommu-proto.h> #define INTREMAP_TABLE_ORDER 1 -DEFINE_SPINLOCK(int_remap_table_lock); +static DEFINE_SPINLOCK(int_remap_table_lock); void *int_remap_table = NULL; static u8 *get_intremap_entry(u8 vector, u8 dm) @@ -110,21 +110,13 @@ static void update_intremap_entry_from_i int __init amd_iommu_setup_intremap_table(void) { - unsigned long flags; - - spin_lock_irqsave(&int_remap_table_lock, flags); - if ( int_remap_table == NULL ) { int_remap_table = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER); if ( int_remap_table == NULL ) - { - spin_unlock_irqrestore(&int_remap_table_lock, flags); return -ENOMEM; - } memset(int_remap_table, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER)); } - spin_unlock_irqrestore(&int_remap_table_lock, flags); return 0; } @@ -210,15 +202,11 @@ void amd_iommu_msi_msg_update_ire( int __init deallocate_intremap_table(void) { - unsigned long flags; - - spin_lock_irqsave(&int_remap_table_lock, flags); if ( int_remap_table ) { __free_amd_iommu_tables(int_remap_table, INTREMAP_TABLE_ORDER); int_remap_table = NULL; } - spin_unlock_irqrestore(&int_remap_table_lock, flags); return 0; } diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/amd/iommu_map.c Mon Mar 02 18:26:56 2009 +0900 @@ -446,14 +446,13 @@ int amd_iommu_map_page(struct domain *d, int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn) { u64 iommu_l2e; - unsigned long flags; struct hvm_iommu *hd = domain_hvm_iommu(d); int iw = IOMMU_IO_WRITE_ENABLED; int ir = IOMMU_IO_READ_ENABLED; BUG_ON( !hd->root_table ); - spin_lock_irqsave(&hd->mapping_lock, flags); + spin_lock(&hd->mapping_lock); if ( is_hvm_domain(d) && !hd->p2m_synchronized ) goto out; @@ -461,14 +460,14 @@ int amd_iommu_map_page(struct domain *d, iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); if ( iommu_l2e == 0 ) { - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); return -EFAULT; } set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); out: - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); return 0; } @@ -481,11 +480,11 @@ int amd_iommu_unmap_page(struct domain * BUG_ON( !hd->root_table ); - spin_lock_irqsave(&hd->mapping_lock, flags); + spin_lock(&hd->mapping_lock); if ( is_hvm_domain(d) && !hd->p2m_synchronized ) { - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); return 0; } @@ -493,14 +492,14 @@ int amd_iommu_unmap_page(struct domain * if ( iommu_l2e == 0 ) { - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); return -EFAULT; } /* mark PTE as 'page not present' */ clear_iommu_l1e_present(iommu_l2e, gfn); - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); /* send INVALIDATE_IOMMU_PAGES command */ for_each_amd_iommu ( iommu ) @@ -520,12 +519,12 @@ int amd_iommu_reserve_domain_unity_map( unsigned long size, int iw, int ir) { u64 iommu_l2e; - unsigned long flags, npages, i; + unsigned long npages, i; struct hvm_iommu *hd = domain_hvm_iommu(domain); npages = region_to_pages(phys_addr, size); - spin_lock_irqsave(&hd->mapping_lock, flags); + spin_lock(&hd->mapping_lock); for ( i = 0; i < npages; ++i ) { iommu_l2e = iommu_l2e_from_pfn( @@ -533,7 +532,7 @@ int amd_iommu_reserve_domain_unity_map( if ( iommu_l2e == 0 ) { - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n", phys_addr); return -EFAULT; @@ -544,13 +543,13 @@ int amd_iommu_reserve_domain_unity_map( phys_addr += PAGE_SIZE; } - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); return 0; } int amd_iommu_sync_p2m(struct domain *d) { - unsigned long mfn, gfn, flags; + unsigned long mfn, gfn; u64 iommu_l2e; struct page_info *page; struct hvm_iommu *hd; @@ -562,7 +561,7 @@ int amd_iommu_sync_p2m(struct domain *d) hd = domain_hvm_iommu(d); - spin_lock_irqsave(&hd->mapping_lock, flags); + spin_lock(&hd->mapping_lock); if ( hd->p2m_synchronized ) goto out; @@ -582,7 +581,7 @@ int amd_iommu_sync_p2m(struct domain *d) if ( iommu_l2e == 0 ) { spin_unlock(&d->page_alloc_lock); - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); return -EFAULT; } @@ -595,7 +594,7 @@ int amd_iommu_sync_p2m(struct domain *d) hd->p2m_synchronized = 1; out: - spin_unlock_irqrestore(&hd->mapping_lock, flags); + spin_unlock(&hd->mapping_lock); return 0; } diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Mon Mar 02 18:26:56 2009 +0900 @@ -172,22 +172,18 @@ static int allocate_domain_resources(str static int allocate_domain_resources(struct hvm_iommu *hd) { /* allocate root table */ - unsigned long flags; - - spin_lock_irqsave(&hd->mapping_lock, flags); + spin_lock(&hd->mapping_lock); if ( !hd->root_table ) { hd->root_table = alloc_amd_iommu_pgtable(); if ( !hd->root_table ) - goto error_out; - } - spin_unlock_irqrestore(&hd->mapping_lock, flags); - - return 0; - - error_out: - spin_unlock_irqrestore(&hd->mapping_lock, flags); - return -ENOMEM; + { + spin_unlock(&hd->mapping_lock); + return -ENOMEM; + } + } + spin_unlock(&hd->mapping_lock); + return 0; } static int get_paging_mode(unsigned long entries) @@ -298,7 +294,6 @@ static int reassign_device( struct domai bus, PCI_SLOT(devfn), PCI_FUNC(devfn), source->domain_id, target->domain_id); - spin_unlock(&pcidevs_lock); return 0; } @@ -352,11 +347,13 @@ static void deallocate_iommu_page_tables { struct hvm_iommu *hd = domain_hvm_iommu(d); + spin_lock(&hd->mapping_lock); if ( hd->root_table ) { deallocate_next_page_table(hd->root_table, hd->paging_mode); hd->root_table = NULL; } + spin_unlock(&hd->mapping_lock); } diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/pci.c --- a/xen/drivers/passthrough/pci.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/pci.c Mon Mar 02 18:26:56 2009 +0900 @@ -48,6 +48,7 @@ struct pci_dev *alloc_pdev(u8 bus, u8 de pdev->domain = NULL; INIT_LIST_HEAD(&pdev->msi_list); list_add(&pdev->alldevs_list, &alldevs_list); + spin_lock_init(&pdev->msix_table_lock); return pdev; } diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/vtd/ia64/vtd.c --- a/xen/drivers/passthrough/vtd/ia64/vtd.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/vtd/ia64/vtd.c Mon Mar 02 18:26:56 2009 +0900 @@ -114,3 +114,34 @@ void hvm_dpci_isairq_eoi(struct domain * { /* dummy */ } + +static int do_dom0_iommu_mapping(unsigned long start, unsigned long end, + void *arg) +{ + unsigned long tmp, pfn, j, page_addr = start; + struct domain *d = (struct domain *)arg; + + extern int xen_in_range(paddr_t start, paddr_t end); + /* Set up 1:1 page table for dom0 for all Ram except Xen bits.*/ + + while (page_addr < end) + { + if (xen_in_range(page_addr, page_addr + PAGE_SIZE)) + continue; + + pfn = page_addr >> PAGE_SHIFT; + tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K); + for ( j = 0; j < tmp; j++ ) + iommu_map_page(d, (pfn*tmp+j), (pfn*tmp+j)); + + page_addr += PAGE_SIZE; + } + return 0; +} + +void iommu_set_dom0_mapping(struct domain *d) +{ + if (dom0) + BUG_ON(d != dom0); + efi_memmap_walk(do_dom0_iommu_mapping, d); +} diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.c Mon Mar 02 18:26:56 2009 +0900 @@ -30,6 +30,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <asm/msi.h> #include "iommu.h" #include "dmar.h" #include "extern.h" @@ -40,6 +41,7 @@ static spinlock_t domid_bitmap_lock; static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */ static int domid_bitmap_size; /* domain id bitmap size in bits */ static unsigned long *domid_bitmap; /* iommu domain id bitmap */ +static bool_t rwbf_quirk; static void setup_dom0_devices(struct domain *d); static void setup_dom0_rmrr(struct domain *d); @@ -230,7 +232,7 @@ static void iommu_flush_write_buffer(str unsigned long flag; s_time_t start_time; - if ( !cap_rwbf(iommu->cap) ) + if ( !rwbf_quirk && !cap_rwbf(iommu->cap) ) return; val = iommu->gcmd | DMA_GCMD_WBF; @@ -829,7 +831,6 @@ static void dma_msi_data_init(struct iom spin_unlock_irqrestore(&iommu->register_lock, flags); } -#ifdef SUPPORT_MSI_REMAPPING static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu) { u64 msi_address; @@ -846,12 +847,6 @@ static void dma_msi_addr_init(struct iom dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32)); spin_unlock_irqrestore(&iommu->register_lock, flags); } -#else -static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu) -{ - /* ia64: TODO */ -} -#endif static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest) { @@ -870,7 +865,7 @@ static struct hw_interrupt_type dma_msi_ .set_affinity = dma_msi_set_affinity, }; -int iommu_set_interrupt(struct iommu *iommu) +static int iommu_set_interrupt(struct iommu *iommu) { int vector, ret; @@ -882,10 +877,12 @@ int iommu_set_interrupt(struct iommu *io } irq_desc[vector].handler = &dma_msi_type; + vector_to_iommu[vector] = iommu; ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu); if ( ret ) { irq_desc[vector].handler = &no_irq_type; + vector_to_iommu[vector] = NULL; free_irq_vector(vector); gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); return ret; @@ -893,7 +890,6 @@ int iommu_set_interrupt(struct iommu *io /* Make sure that vector is never re-used. */ vector_irq[vector] = NEVER_ASSIGN_IRQ; - vector_to_iommu[vector] = iommu; return vector; } @@ -987,7 +983,6 @@ static int intel_iommu_domain_init(struc { struct hvm_iommu *hd = domain_hvm_iommu(d); struct iommu *iommu = NULL; - u64 i, j, tmp; struct acpi_drhd_unit *drhd; drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); @@ -999,17 +994,8 @@ static int intel_iommu_domain_init(struc { extern int xen_in_range(paddr_t start, paddr_t end); - /* Set up 1:1 page table for dom0 for all RAM except Xen bits. */ - for ( i = 0; i < max_page; i++ ) - { - if ( !page_is_conventional_ram(i) || - xen_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) ) - continue; - - tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K); - for ( j = 0; j < tmp; j++ ) - iommu_map_page(d, (i*tmp+j), (i*tmp+j)); - } + /* Set up 1:1 page table for dom0 */ + iommu_set_dom0_mapping(d); setup_dom0_devices(d); setup_dom0_rmrr(d); @@ -1734,6 +1720,19 @@ static void setup_dom0_rmrr(struct domai spin_unlock(&pcidevs_lock); } +static void platform_quirks(void) +{ + u32 id; + + /* Mobile 4 Series Chipset neglects to set RWBF capability. */ + id = pci_conf_read32(0, 0, 0, 0); + if ( id == 0x2a408086 ) + { + dprintk(XENLOG_INFO VTDPREFIX, "DMAR: Forcing write-buffer flush\n"); + rwbf_quirk = 1; + } +} + int intel_vtd_setup(void) { struct acpi_drhd_unit *drhd; @@ -1741,6 +1740,8 @@ int intel_vtd_setup(void) if ( !vtd_enabled ) return -ENODEV; + + platform_quirks(); spin_lock_init(&domid_bitmap_lock); clflush_size = get_cache_line_size(); diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.h Mon Mar 02 18:26:56 2009 +0900 @@ -397,8 +397,8 @@ struct poll_info { u32 udata; }; -#define MAX_QINVAL_PAGES 8 #define NUM_QINVAL_PAGES 1 +#define IQA_REG_QS 0 // derived from NUM_QINVAL_PAGES per VT-d spec. #define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct qinval_entry)) #define qinval_present(v) ((v).lo & 1) #define qinval_fault_disable(v) (((v).lo >> 1) & 1) diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/vtd/qinval.c --- a/xen/drivers/passthrough/vtd/qinval.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/vtd/qinval.c Mon Mar 02 18:26:56 2009 +0900 @@ -317,9 +317,9 @@ int queue_invalidate_iec(struct iommu *i return ret; } -u64 iec_cap; int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx) { + u64 iec_cap; int ret; ret = queue_invalidate_iec(iommu, granu, im, iidx); ret |= invalidate_sync(iommu); @@ -445,8 +445,7 @@ int qinval_setup(struct iommu *iommu) * registers are automatically reset to 0 with write * to IQA register. */ - if ( NUM_QINVAL_PAGES <= MAX_QINVAL_PAGES ) - qi_ctrl->qinval_maddr |= NUM_QINVAL_PAGES - 1; + qi_ctrl->qinval_maddr |= IQA_REG_QS; dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr); /* enable queued invalidation hardware */ diff -r c4c4ba857d8b -r 6227bf629626 xen/drivers/passthrough/vtd/x86/vtd.c --- a/xen/drivers/passthrough/vtd/x86/vtd.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c Mon Mar 02 18:26:56 2009 +0900 @@ -143,3 +143,23 @@ void hvm_dpci_isairq_eoi(struct domain * } spin_unlock(&d->event_lock); } + +void iommu_set_dom0_mapping(struct domain *d) +{ + u64 i, j, tmp; + extern int xen_in_range(paddr_t start, paddr_t end); + + BUG_ON(d->domain_id != 0); + + for ( i = 0; i < max_page; i++ ) + { + /* Set up 1:1 mapping for dom0 for all RAM except Xen bits. */ + if ( !page_is_conventional_ram(i) || + xen_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) ) + continue; + + tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K); + for ( j = 0; j < tmp; j++ ) + iommu_map_page(d, (i*tmp+j), (i*tmp+j)); + } +} diff -r c4c4ba857d8b -r 6227bf629626 xen/include/asm-ia64/msi.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/msi.h Mon Mar 02 18:26:56 2009 +0900 @@ -0,0 +1,20 @@ +#ifndef __ASM_MSI_H +#define __ASM_MSI_H + +/* + * MSI Defined Data Structures + */ +#define MSI_ADDRESS_HEADER 0xfee +#define MSI_ADDRESS_HEADER_SHIFT 12 +#define MSI_ADDRESS_HEADER_MASK 0xfff000 +#define MSI_ADDRESS_DEST_ID_MASK 0xfff0000f +#define MSI_TARGET_CPU_MASK 0xff +#define MSI_TARGET_CPU_SHIFT 4 +#define MSI_DELIVERY_MODE 0 +#define MSI_LEVEL_MODE 1 /* Edge always assert */ +#define MSI_TRIGGER_MODE 0 /* MSI is edge sensitive */ +#define MSI_PHYSICAL_MODE 0 +#define MSI_LOGICAL_MODE 1 +#define MSI_REDIRECTION_HINT_MODE 0 + +#endif /* __ASM_MSI_H */ diff -r c4c4ba857d8b -r 6227bf629626 xen/include/asm-x86/fixmap.h --- a/xen/include/asm-x86/fixmap.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/asm-x86/fixmap.h Mon Mar 02 18:26:56 2009 +0900 @@ -50,7 +50,7 @@ enum fixed_addresses { FIX_IOMMU_MMIO_END = FIX_IOMMU_MMIO_BASE_0 + IOMMU_PAGES -1, FIX_TBOOT_SHARED_BASE, FIX_MSIX_IO_RESERV_BASE, - FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + MAX_MSIX_PAGES -1, + FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1, __end_of_fixed_addresses }; diff -r c4c4ba857d8b -r 6227bf629626 xen/include/asm-x86/io_apic.h --- a/xen/include/asm-x86/io_apic.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/asm-x86/io_apic.h Mon Mar 02 18:26:56 2009 +0900 @@ -182,13 +182,13 @@ extern int timer_uses_ioapic_pin_0; #endif /*CONFIG_ACPI_BOOT*/ extern int (*ioapic_renumber_irq)(int ioapic, int irq); -extern int ioapic_suspend(void); -extern int ioapic_resume(void); +extern void ioapic_suspend(void); +extern void ioapic_resume(void); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 -static inline int ioapic_suspend(void) {return 0}; -static inline int ioapic_resume(void) {return 0}; +static inline void ioapic_suspend(void) {} +static inline void ioapic_resume(void) {} #endif extern int assign_irq_vector(int irq); diff -r c4c4ba857d8b -r 6227bf629626 xen/include/asm-x86/msi.h --- a/xen/include/asm-x86/msi.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/asm-x86/msi.h Mon Mar 02 18:26:56 2009 +0900 @@ -49,9 +49,9 @@ /* MAX fixed pages reserved for mapping MSIX tables. */ #if defined(__x86_64__) -#define MAX_MSIX_PAGES 512 -#else -#define MAX_MSIX_PAGES 32 +#define FIX_MSIX_MAX_PAGES 512 +#else +#define FIX_MSIX_MAX_PAGES 32 #endif struct msi_info { @@ -93,7 +93,7 @@ struct msi_desc { struct list_head list; - void __iomem *mask_base; + void __iomem *mask_base; /* va for the entry in mask table */ struct pci_dev *dev; int vector; diff -r c4c4ba857d8b -r 6227bf629626 xen/include/asm-x86/mtrr.h --- a/xen/include/asm-x86/mtrr.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/asm-x86/mtrr.h Mon Mar 02 18:26:56 2009 +0900 @@ -11,6 +11,7 @@ #define MTRR_TYPE_WRBACK 6 #define MTRR_NUM_TYPES 7 #define MEMORY_NUM_TYPES MTRR_NUM_TYPES +#define NO_HARDCODE_MEM_TYPE MTRR_NUM_TYPES #define NORMAL_CACHE_MODE 0 #define NO_FILL_CACHE_MODE 2 @@ -63,7 +64,7 @@ extern int mtrr_del_page(int reg, unsign extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr, - paddr_t spaddr); + paddr_t spaddr, uint8_t gmtrr_mtype); extern uint8_t epte_get_entry_emt( struct domain *d, unsigned long gfn, unsigned long mfn, uint8_t *igmt, int direct_mmio); diff -r c4c4ba857d8b -r 6227bf629626 xen/include/public/hvm/params.h --- a/xen/include/public/hvm/params.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/public/hvm/params.h Mon Mar 02 18:26:56 2009 +0900 @@ -103,6 +103,9 @@ /* TSS used on Intel when CR0.PE=0. */ #define HVM_PARAM_VM86_TSS 15 -#define HVM_NR_PARAMS 16 +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +#define HVM_NR_PARAMS 17 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff -r c4c4ba857d8b -r 6227bf629626 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/xen/iommu.h Mon Mar 02 18:26:56 2009 +0900 @@ -114,4 +114,6 @@ void iommu_suspend(void); void iommu_suspend(void); void iommu_resume(void); +void iommu_set_dom0_mapping(struct domain *d); + #endif /* _IOMMU_H_ */ diff -r c4c4ba857d8b -r 6227bf629626 xen/include/xen/pci.h --- a/xen/include/xen/pci.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/xen/pci.h Mon Mar 02 18:26:56 2009 +0900 @@ -29,10 +29,16 @@ #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) +#define MAX_MSIX_TABLE_PAGES 8 /* 2048 entries */ struct pci_dev { struct list_head alldevs_list; struct list_head domain_list; + struct list_head msi_list; + int msix_table_refcnt[MAX_MSIX_TABLE_PAGES]; + int msix_table_idx[MAX_MSIX_TABLE_PAGES]; + spinlock_t msix_table_lock; + struct domain *domain; const u8 bus; const u8 devfn; diff -r c4c4ba857d8b -r 6227bf629626 xen/include/xen/timer.h --- a/xen/include/xen/timer.h Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/include/xen/timer.h Mon Mar 02 18:26:56 2009 +0900 @@ -122,6 +122,9 @@ DECLARE_PER_CPU(s_time_t, timer_deadline /* Arch-defined function to reprogram timer hardware for new deadline. */ extern int reprogram_timer(s_time_t timeout); +/* calculate the aligned first tick time for a given periodic timer */ +extern s_time_t align_timer(s_time_t firsttick, uint64_t period); + #endif /* _TIMER_H_ */ /* diff -r c4c4ba857d8b -r 6227bf629626 xen/xsm/flask/ss/policydb.c --- a/xen/xsm/flask/ss/policydb.c Mon Mar 02 16:52:22 2009 +0900 +++ b/xen/xsm/flask/ss/policydb.c Mon Mar 02 18:26:56 2009 +0900 @@ -1515,8 +1515,8 @@ int policydb_read(struct policydb *p, vo if ( len != strlen(POLICYDB_STRING) ) { printk(KERN_ERR "security: policydb string length %d does not " - "match expected length %Zu\n", - len, (u32) strlen(POLICYDB_STRING)); + "match expected length %lu\n", + len, strlen(POLICYDB_STRING)); goto bad; } policydb_str = xmalloc_array(char, len + 1); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |