[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Merge with xen-ia64-unstable.hg



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 2dcfb85f5bacc852e37d4fd0b121f1cd3093232f
# Parent  3a45893315046e7af731a80970b8fa29b82e664c
# Parent  5ff5117291add571df9d7f3973b0a69a0515c04f
Merge with xen-ia64-unstable.hg

diff -r 3a4589331504 -r 2dcfb85f5bac docs/man/xmdomain.cfg.pod.5
--- a/docs/man/xmdomain.cfg.pod.5       Fri Dec  2 21:52:41 2005
+++ b/docs/man/xmdomain.cfg.pod.5       Sat Dec  3 09:44:38 2005
@@ -156,6 +156,16 @@
 the first cpu, 1 the second, and so on.  This defaults to -1, which
 means Xen is free to pick which CPU to start on.
 
+=item B<cpus>
+
+Specifies a list of CPUs on which the domains' VCPUs are allowed to
+execute upon.  The syntax supports ranges (0-3), and negation, ^1.
+For instance:
+
+    cpus = "0-3,5,^1"
+
+Will result in CPUs 0, 2, 3, 5 being available for use by the domain.
+
 =item B<extra>
 
 Extra information to append to the end of the kernel parameter line.
diff -r 3a4589331504 -r 2dcfb85f5bac 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Fri Dec  2 21:52:41 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Sat Dec  3 09:44:38 2005
@@ -560,7 +560,8 @@
        }
        while (!time_values_up_to_date(cpu));
 
-       if (unlikely(delta < -1000000LL) || unlikely(delta_cpu < 0)) {
+       if ((unlikely(delta < -1000000LL) || unlikely(delta_cpu < 0))
+           && printk_ratelimit()) {
                printk("Timer ISR/%d: Time went backwards: "
                       "delta=%lld cpu_delta=%lld shadow=%lld "
                       "off=%lld processed=%lld cpu_processed=%lld\n",
diff -r 3a4589331504 -r 2dcfb85f5bac 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Dec  2 
21:52:41 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Sat Dec  3 
09:44:38 2005
@@ -59,6 +59,8 @@
 
 #define streq(a, b) (strcmp((a), (b)) == 0)
 
+static char *kasprintf(const char *fmt, ...);
+
 static struct notifier_block *xenstore_chain;
 
 /* If something in array of ids matches this device, return it. */
@@ -226,8 +228,11 @@
                                  int num_envp, char *buffer, int buffer_size)
 {
        struct xenbus_device *xdev;
+       struct xenbus_driver *drv = NULL;
        int i = 0;
        int length = 0;
+       char *basepath_end;
+       char *frontend_id;
 
        DPRINTK("");
 
@@ -237,6 +242,9 @@
        xdev = to_xenbus_device(dev);
        if (xdev == NULL)
                return -ENODEV;
+
+       if (dev->driver)
+               drv = to_xenbus_driver(dev->driver);
 
        /* stuff we want to pass to /sbin/hotplug */
        add_hotplug_env_var(envp, num_envp, &i,
@@ -246,6 +254,25 @@
        add_hotplug_env_var(envp, num_envp, &i,
                            buffer, buffer_size, &length,
                            "XENBUS_PATH=%s", xdev->nodename);
+
+       add_hotplug_env_var(envp, num_envp, &i,
+                           buffer, buffer_size, &length,
+                           "XENBUS_BASE_PATH=%s", xdev->nodename);
+
+       basepath_end = strrchr(envp[i - 1], '/');
+       length -= strlen(basepath_end);
+       *basepath_end = '\0';
+       basepath_end = strrchr(envp[i - 1], '/');
+       length -= strlen(basepath_end);
+       *basepath_end = '\0';
+
+       basepath_end++;
+       frontend_id = kmalloc(strlen(basepath_end) + 1, GFP_KERNEL);
+       strcpy(frontend_id, basepath_end);
+       add_hotplug_env_var(envp, num_envp, &i,
+                           buffer, buffer_size, &length,
+                           "XENBUS_FRONTEND_ID=%s", frontend_id);
+       kfree(frontend_id);
 
        /* terminate, set to next free slot, shrink available space */
        envp[i] = NULL;
@@ -254,9 +281,9 @@
        buffer = &buffer[length];
        buffer_size -= length;
 
-       if (dev->driver && to_xenbus_driver(dev->driver)->hotplug)
-               return to_xenbus_driver(dev->driver)->hotplug
-                       (xdev, envp, num_envp, buffer, buffer_size);
+       if (drv && drv->hotplug)
+               return drv->hotplug(xdev, envp, num_envp, buffer,
+                                   buffer_size);
 
        return 0;
 }
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/block
--- a/tools/examples/block      Fri Dec  2 21:52:41 2005
+++ b/tools/examples/block      Sat Dec  3 09:44:38 2005
@@ -89,36 +89,51 @@
     fi
   done
 
-##
-## XXX SMH: the below causes live migration on localhost to fail sometimes
-## since the source domain may still appear to be using a local device. 
-## For now simply comment it out - a proper fix will come in due course. 
-
-#   for file in /sys/devices/xen-backend/*/physical_device
-#   do
-#     if [ -e "$file" ] # Cope with no devices, i.e. the * above did not 
expand.
-#     then
-#       local d=$(cat "$file")
-#       if [ "$d" == "$devmm" ]
-#       then
-#         if [ "$mode" == 'w' ]
-#         then
-#           echo 'guest'
-#           return
-#         else
-#           local m=$(cat "${file/physical_device/mode}")
-
-#           if expr index "$m" 'w' >/dev/null
-#           then
-#             echo 'guest'
-#             return
-#           fi
-#         fi
-#       fi
-#     fi
-#   done
+  for dom in $(xenstore-list "$XENBUS_BASE_PATH")
+  do
+    for dev in $(xenstore-list "$XENBUS_BASE_PATH/$dom")
+    do
+      d=$(xenstore_read_default \
+            "$XENBUS_BASE_PATH/$dom/$dev/physical-device" "")
+
+      if [ "$d" == "$devmm" ]
+      then
+        if [ "$mode" == 'w' ]
+        then
+          if ! same_vm $dom
+          then
+            echo 'guest'
+            return
+          fi
+        else
+          local m=$(xenstore_read "$XENBUS_BASE_PATH/$dom/$dev/mode")
+          m=$(canonicalise_mode "$m")
+
+          if [ "$m" == 'w' ]
+          then
+            if ! same_vm $dom
+            then
+              echo 'guest'
+              return
+            fi
+          fi
+        fi
+      fi
+    done
+  done
 
   echo 'ok'
+}
+
+
+same_vm()
+{
+  local thisdom="$XENBUS_FRONTEND_ID"
+  local otherdom="$1"
+  local thisvm=$(xenstore-read "/local/domain/$thisdom/vm")
+  local othervm=$(xenstore-read "/local/domain/otherdom/vm")
+
+  return [ "$thisvm" == "$othervm" ]
 }
 
 
@@ -200,6 +215,7 @@
     m2='read-only '
   fi
 
+  release_lock "block"
   ebusy \
 "${prefix}${m1}in ${dom}domain,
 and so cannot be mounted ${m2}${when}."
@@ -224,79 +240,106 @@
     case $t in 
       phy)
         dev=$(expand_dev $p)
+        claim_lock "block"
         check_device_sharing "$dev" "$mode"
        write_dev "$dev"
+        release_lock "block"
        exit 0
        ;;
 
       file)
         # Canonicalise the file, for sharing check comparison, and the mode
         # for ease of use here.
-        file=$(readlink -f "$p")
+        file=$(readlink -f "$p") || fatal "$p does not exist."
         mode=$(canonicalise_mode "$mode")
+
+        claim_lock "block"
 
         if [ "$mode" == 'w' ] && ! stat "$file" -c %A | grep -q w
         then
+          release_lock "block"
           ebusy \
 "File $file is read-only, and so I will not
 mount it read-write in a guest domain."
         fi
 
-
-       while true
-        do 
-          loopdev=''
-          for dev in /dev/loop*
-          do
-            if [ ! -b "$dev" ]
+        loopdev=''
+        for dev in /dev/loop*
+        do
+          if [ ! -b "$dev" ]
+          then
+            continue
+          fi
+
+          f=$(losetup "$dev" 2>/dev/null) || f=''
+
+          if [ "$f" ]
+          then
+            # $dev is in use.  Check sharing.
+            if [ "$mode" == '!' ]
             then
               continue
             fi
 
-            f=$(losetup "$dev" 2>/dev/null) || f='()'
             f=$(echo "$f" | sed -e 's/.*(\(.*\)).*/\1/g')
 
-            log err "$file $f $dev"
-
-            if [ "$f" ]
-            then
-              # $dev is in use.  Check sharing.
-              if [ "$mode" == '!' ]
+            # $f is the filename, as read from losetup, but the loopback
+            # driver truncates filenames at 64 characters, so we need to go
+            # trawling through the store if it's longer than that.  Truncation
+            # is indicated by an asterisk at the end of the filename.
+            if expr index "$f" '*' >/dev/null
+            then
+              found=""
+              for dom in $(xenstore-list "$XENBUS_BASE_PATH")
+              do
+                for domdev in $(xenstore-list "$XENBUS_BASE_PATH/$dom")
+                do
+                  d=$(xenstore_read_default \
+                        "$XENBUS_BASE_PATH/$dom/$domdev/node" "")
+                  if [ "$d" == "$dev" ]
+                  then
+                    f=$(xenstore_read "$XENBUS_BASE_PATH/$dom/$domdev/params")
+                    found=1
+                    break 2
+                  fi
+                done
+              done
+
+              if [ ! "$found" ]
               then
+                # This loopback device is in use by someone else, so skip it.
+                log debug "Loopback sharing check skips device $dev."
                 continue
               fi
-
-              f=$(readlink -f "$f")
-
-              if [ "$f" == "$file" ]
-              then
-                check_file_sharing "$file" "$dev" "$mode"
-              fi
-            else
-              # $dev is not in use, so we'll remember it for use later; we want
-              # to finish the sharing check first.
-              
-              if [ "$loopdev" == '' ]
-              then
-                loopdev="$dev"
-              fi
-            fi
-          done
-
-          if [ "$loopdev" == '' ]
-          then
-            fatal 'Failed to find an unused loop device'
+            fi
+
+            f=$(readlink -f "$f")
+
+            if [ "$f" == "$file" ]
+            then
+              check_file_sharing "$file" "$dev" "$mode"
+            fi
+          else
+            # $dev is not in use, so we'll remember it for use later; we want
+            # to finish the sharing check first.
+
+            if [ "$loopdev" == '' ]
+            then
+              loopdev="$dev"
+            fi
           fi
-          if losetup "$loopdev" "$file"
-          then
-           log err "mapped $file using $loopdev"
-            xenstore_write "$XENBUS_PATH/node" "$loopdev"
-            write_dev "$loopdev"
-            exit 0
-          else
-            log err "losetup $loopdev $file failed, retry"
-          fi
-       done
+        done
+
+        if [ "$loopdev" == '' ]
+        then
+          fatal 'Failed to find an unused loop device'
+        fi
+
+        do_or_die losetup "$loopdev" "$file"
+        xenstore_write "$XENBUS_PATH/node" "$loopdev"
+        write_dev "$loopdev"
+        release_lock "block"
+        exit 0
        ;;
     esac
     ;;
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/vif-nat
--- a/tools/examples/vif-nat    Fri Dec  2 21:52:41 2005
+++ b/tools/examples/vif-nat    Sat Dec  3 09:44:38 2005
@@ -91,8 +91,6 @@
 netmask=$(dotted_quad $intmask)
 network=$(dotted_quad $(( $vif_int & $intmask )) )
 
-main_ip=$(dom0_ip)
-
 
 dhcp_remove_entry()
 {
@@ -140,7 +138,7 @@
 
         do_or_die ip link set "$vif" up arp on
         do_or_die ip addr add "$router_ip" dev "$vif"
-        do_or_die ip route add "$vif_ip" dev "$vif" src "$main_ip"
+        do_or_die ip route add "$vif_ip" dev "$vif" src "$router_ip"
         echo 1 >/proc/sys/net/ipv4/conf/${vif}/proxy_arp
         [ "$dhcp" != 'no' ] && dhcp_up
         ;;
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/xen-hotplug-common.sh
--- a/tools/examples/xen-hotplug-common.sh      Fri Dec  2 21:52:41 2005
+++ b/tools/examples/xen-hotplug-common.sh      Sat Dec  3 09:44:38 2005
@@ -18,6 +18,8 @@
 
 dir=$(dirname "$0")
 . "$dir/xen-script-common.sh"
+
+exec 2>>/var/log/xen-hotplug.log
 
 export PATH="/sbin:/bin:/usr/bin:/usr/sbin:$PATH"
 export LANG="POSIX"
@@ -45,7 +47,7 @@
 }
 
 sigerr() {
-  fatal "$0" "$@" "failed; error detected."
+  fatal "$0 failed; error detected."
 }
 
 trap sigerr ERR
@@ -86,4 +88,88 @@
   xenstore-write "$@" || fatal "Writing $@ to xenstore failed."
 }
 
+
+#
+# Serialisation
+#
+
+LOCK_SLEEPTIME=1
+LOCK_SPINNING_RETRIES=5
+LOCK_RETRIES=10
+LOCK_BASEDIR=/var/run/xen-hotplug
+
+
+claim_lock()
+{
+  local lockdir="$LOCK_BASEDIR/$1"
+  mkdir -p "$LOCK_BASEDIR"
+  _claim_lock "$lockdir"
+}
+
+
+release_lock()
+{
+  _release_lock "$LOCK_BASEDIR/$1"
+}
+
+
+_claim_lock()
+{
+  local lockdir="$1"
+  local owner=$(_lock_owner "$lockdir")
+  local retries=0
+
+  while expr $retries '<' $LOCK_RETRIES
+  do
+    mkdir "$lockdir" && trap "release_lock $1; sigerr" ERR &&
+      _update_lock_info "$lockdir" && return
+
+    local new_owner=$(_lock_owner "$lockdir")
+    if [ "$new_owner" != "$owner" ]
+    then
+      owner="$new_owner"
+      retries=0
+    fi
+
+    if expr $retries '>' $LOCK_SPINNING_RETRIES
+    then
+      sleep $LOCK_SLEEPTIME
+    else
+      sleep 0
+    fi
+    retries=$(($retries + 1))
+  done
+  _steal_lock "$lockdir"
+}
+
+
+_release_lock()
+{
+  trap sigerr ERR
+  rm -rf "$1" || true
+}
+
+
+_steal_lock()
+{
+  local lockdir="$1"
+  local owner=$(cat "$lockdir/owner" 2>/dev/null || echo "unknown")
+  log err "Forced to steal lock on $lockdir from $owner!"
+  _release_lock "$lockdir"
+  _claim_lock "$lockdir"
+}
+
+
+_lock_owner()
+{
+  cat "$1/owner" 2>/dev/null || echo "unknown"
+}
+
+
+_update_lock_info()
+{
+  echo "$$: $0" >"$1/owner"
+}
+
+
 log debug "$@" "XENBUS_PATH=$XENBUS_PATH"
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/xen-network-common.sh
--- a/tools/examples/xen-network-common.sh      Fri Dec  2 21:52:41 2005
+++ b/tools/examples/xen-network-common.sh      Sat Dec  3 09:44:38 2005
@@ -93,5 +93,5 @@
 
 find_dhcpd_init_file()
 {
-  first_file -x /etc/init.d/dhcp3-server /etc/init.d/dhcp
+  first_file -x /etc/init.d/{dhcp3-server,dhcp,dhcpd}
 }
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Fri Dec  2 21:52:41 2005
+++ b/tools/examples/xmexample.vmx      Sat Dec  3 09:44:38 2005
@@ -30,8 +30,10 @@
 # the number of cpus guest platform has, default=1
 vcpus=1
 
-# Which CPU to start domain on? 
-#cpu = -1   # leave to Xen to pick
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = ""         # leave to Xen to pick
+#cpus = "0"        # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # run on cpus 0,2,3,5
 
 # Optionally define mac and/or bridge for the network interfaces.
 # Random MACs are assigned if not given.
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/xmexample.vti
--- a/tools/examples/xmexample.vti      Fri Dec  2 21:52:41 2005
+++ b/tools/examples/xmexample.vti      Sat Dec  3 09:44:38 2005
@@ -23,8 +23,10 @@
 # A name for your domain. All domains must have different names.
 name = "ExampleVMXDomain"
 
-# Which CPU to start domain on? 
-#cpu = -1   # leave to Xen to pick
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = ""         # leave to Xen to pick
+#cpus = "0"        # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # run on cpus 0,2,3,5
 
 # Disable vif for now
 nics=0
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/xmexample1
--- a/tools/examples/xmexample1 Fri Dec  2 21:52:41 2005
+++ b/tools/examples/xmexample1 Sat Dec  3 09:44:38 2005
@@ -22,8 +22,10 @@
 # A name for your domain. All domains must have different names.
 name = "ExampleDomain"
 
-# Which CPU to start domain on? 
-#cpu = -1   # leave to Xen to pick
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = ""         # leave to Xen to pick
+#cpus = "0"        # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # run on cpus 0,2,3,5
 
 # Number of Virtual CPUS to use, default is 1
 #vcpus = 1
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/xmexample2
--- a/tools/examples/xmexample2 Fri Dec  2 21:52:41 2005
+++ b/tools/examples/xmexample2 Sat Dec  3 09:44:38 2005
@@ -51,9 +51,11 @@
 # so we use the vmid to create a name.
 name = "VM%d" % vmid
 
-# Which CPU to start domain on? 
-#cpu = -1   # leave to Xen to pick
-cpu = vmid  # set based on vmid (mod number of CPUs)
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = ""         # leave to Xen to pick
+#cpus = "0"        # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # run on cpus 0,2,3,5
+#cpus = "%s" % vmid # set based on vmid (mod number of CPUs)
 
 # Number of Virtual CPUS to use, default is 1
 #vcpus = 1
diff -r 3a4589331504 -r 2dcfb85f5bac tools/examples/xmexample3
--- a/tools/examples/xmexample3 Fri Dec  2 21:52:41 2005
+++ b/tools/examples/xmexample3 Sat Dec  3 09:44:38 2005
@@ -51,9 +51,11 @@
 # so we use the vmid to create a name.
 name = "VM%d" % vmid
 
-# Which CPU to start domain on? 
-#cpu = -1   # leave to Xen to pick
-cpu = vmid  # set based on vmid (mod number of CPUs)
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = ""         # leave to Xen to pick
+#cpus = "0"        # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # run on cpus 0,2,3,5
+cpus = "%s" % vmid # set based on vmid (mod number of CPUs)
 
 #----------------------------------------------------------------------------
 # Define network interfaces.
diff -r 3a4589331504 -r 2dcfb85f5bac tools/python/xen/util/bugtool.py
--- a/tools/python/xen/util/bugtool.py  Fri Dec  2 21:52:41 2005
+++ b/tools/python/xen/util/bugtool.py  Sat Dec  3 09:44:38 2005
@@ -41,9 +41,10 @@
 
 TITLE_RE = re.compile(r'<title>(.*)</title>')
 
-FILES_TO_SEND = [ '/var/log/syslog', '/var/log/messages', '/var/log/debug',
-                  '/var/log/xend.log', '/var/log/xend-debug.log',
-                  '/var/log/xenstored-trace.log' ]
+FILES_TO_SEND = [ '/var/log/' + x for x in 
+                  [ 'syslog', 'messages', 'debug',
+                    'xend.log', 'xend-debug.log', 'xenstored-trace.log',
+                    'xen-hotplug.log' ] ]
 #FILES_TO_SEND = [  ]
 
 
diff -r 3a4589331504 -r 2dcfb85f5bac tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Fri Dec  2 21:52:41 2005
+++ b/tools/python/xen/xend/XendDomain.py       Sat Dec  3 09:44:38 2005
@@ -36,6 +36,7 @@
 from xen.xend import XendCheckpoint
 from xen.xend.XendError import XendError
 from xen.xend.XendLogging import log
+from xen.xend.xenstore.xstransact import xstransact
 from xen.xend.xenstore.xswatch import xswatch
 
 
@@ -46,6 +47,8 @@
 __all__ = [ "XendDomain" ]
 
 PRIV_DOMAIN = 0
+VMROOT = '/vm/'
+
 
 class XendDomain:
     """Index of all domains. Singleton.
@@ -64,6 +67,9 @@
     # instance() must be able to return a valid instance of this class even
     # during this initialisation.
     def init(self):
+        xstransact.Mkdir(VMROOT)
+        xstransact.SetPermissions(VMROOT, { 'dom' : PRIV_DOMAIN })
+
         self.domains_lock.acquire()
         try:
             self._add_domain(
diff -r 3a4589331504 -r 2dcfb85f5bac tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Dec  2 21:52:41 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Sat Dec  3 09:44:38 2005
@@ -43,7 +43,7 @@
 from xen.xend.XendBootloader import bootloader
 from xen.xend.XendError import XendError, VmError
 
-from xen.xend.xenstore.xstransact import xstransact
+from xen.xend.xenstore.xstransact import xstransact, complete
 from xen.xend.xenstore.xsutil import GetDomainPath, IntroduceDomain
 from xen.xend.xenstore.xswatch import xswatch
 
@@ -83,8 +83,6 @@
 STATE_DOM_SHUTDOWN = 2
 
 SHUTDOWN_TIMEOUT = 30
-
-VMROOT  = '/vm/'
 
 ZOMBIE_PREFIX = 'Zombie-'
 
@@ -234,7 +232,7 @@
             log.warn(str(exn))
 
         vm = XendDomainInfo(xeninfo, domid, dompath, True, priv)
-        vm.removeDom()
+        vm.recreateDom()
         vm.removeVm()
         vm.storeVmDetails()
         vm.storeDomDetails()
@@ -288,6 +286,7 @@
         result[e[0]] = get_cfg(e[0], e[1])
 
     result['cpu']       = get_cfg('cpu',       int)
+    result['cpus']      = get_cfg('cpus',      str)
     result['image']     = get_cfg('image')
 
     try:
@@ -300,6 +299,43 @@
         raise VmError(
             'Invalid configuration setting: vcpus = %s: %s' %
             (sxp.child_value(result['image'], 'vcpus', 1), str(exn)))
+
+    try:
+        # support legacy config files with 'cpu' parameter
+        # NB: prepending to list to support previous behavior
+        #     where 'cpu' parameter pinned VCPU0.
+        if result['cpu']:
+           if result['cpus']:
+               result['cpus'] = "%s,%s" % (str(result['cpu']), result['cpus'])
+           else:
+               result['cpus'] = str(result['cpu'])
+
+        # convert 'cpus' string to list of ints
+        # 'cpus' supports a list of ranges (0-3), seperated by
+        # commas, and negation, (^1).  
+        # Precedence is settled by  order of the string:
+        #     "0-3,^1"   -> [0,2,3]
+        #     "0-3,^1,1" -> [0,1,2,3]
+        if result['cpus']:
+            cpus = []
+            for c in result['cpus'].split(','):
+                if c.find('-') != -1:             
+                    (x,y) = c.split('-')
+                    for i in range(int(x),int(y)+1):
+                        cpus.append(int(i))
+                else:
+                    # remove this element from the list 
+                    if c[0] == '^':
+                        cpus = [x for x in cpus if x != int(c[1])]
+                    else:
+                        cpus.append(int(c))
+
+            result['cpus'] = cpus
+        
+    except ValueError, exn:
+        raise VmError(
+            'Invalid configuration setting: cpus = %s: %s' %
+            (result['cpus'], exn))
 
     result['backend'] = []
     for c in sxp.children(config, 'backend'):
@@ -385,7 +421,7 @@
         else:
             self.domid = None
 
-        self.vmpath  = VMROOT + self.info['uuid']
+        self.vmpath  = XendDomain.VMROOT + self.info['uuid']
         self.dompath = dompath
 
         if augment:
@@ -488,6 +524,7 @@
             defaultInfo('on_reboot',    lambda: "restart")
             defaultInfo('on_crash',     lambda: "restart")
             defaultInfo('cpu',          lambda: None)
+            defaultInfo('cpus',         lambda: [])
             defaultInfo('cpu_weight',   lambda: 1.0)
 
             # some domains don't have a config file (e.g. dom0 )
@@ -569,6 +606,14 @@
 
     def removeDom(self, *args):
         return xstransact.Remove(self.dompath, *args)
+
+    def recreateDom(self):
+        complete(self.dompath, lambda t: self._recreateDom(t))
+
+    def _recreateDom(self, t):
+        t.remove()
+        t.mkdir()
+        t.set_permissions({ 'dom' : self.domid })
 
 
     ## private:
@@ -769,7 +814,10 @@
                     if reason == 'suspend':
                         self.state_set(STATE_DOM_SHUTDOWN)
                         # Don't destroy the domain.  XendCheckpoint will do
-                        # this once it has finished.
+                        # this once it has finished.  However, stop watching
+                        # the VM path now, otherwise we will end up with one
+                        # watch for the old domain, and one for the new.
+                        self.unwatchVm()
                     elif reason in ['poweroff', 'reboot']:
                         restart_reason = reason
                     else:
@@ -968,9 +1016,9 @@
         if self.infoIsSet('image'):
             sxpr.append(['image', self.info['image']])
 
-        if self.infoIsSet('device'):
-            for (_, c) in self.info['device']:
-                sxpr.append(['device', c])
+        for cls in controllerClasses:
+            for config in self.getDeviceConfigurations(cls):
+                sxpr.append(['device', config])
 
         def stateChar(name):
             if name in self.info:
@@ -1084,7 +1132,7 @@
 
         self.dompath = GetDomainPath(self.domid)
 
-        self.removeDom()
+        self.recreateDom()
 
         # Set maximum number of vcpus in domain
         xc.domain_max_vcpus(self.domid, int(self.info['vcpus']))
@@ -1121,9 +1169,15 @@
             xc.domain_setmaxmem(self.domid, m)
             xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
 
-            cpu = self.info['cpu']
-            if cpu is not None and cpu != -1:
-                xc.domain_pincpu(self.domid, 0, 1 << cpu)
+            # repin domain vcpus if a restricted cpus list is provided
+            # this is done prior to memory allocation to aide in memory
+            # distribution for NUMA systems.
+            cpus = self.info['cpus']
+            if cpus is not None and len(cpus) > 0:
+                for v in range(0, self.info['max_vcpu_id']+1):
+                    # pincpu takes a list of ints
+                    cpu = [ int( cpus[v % len(cpus)] ) ]
+                    xc.domain_pincpu(self.domid, v, cpu)
 
             self.createChannels()
 
@@ -1179,18 +1233,31 @@
     def cleanupVm(self):
         """Cleanup VM resources.  Idempotent.  Nothrow guarantee."""
 
+        self.unwatchVm()
+
+        try:
+            self.removeVm()
+        except:
+            log.exception("Removing VM path failed.")
+
+
+    ## private:
+
+    def unwatchVm(self):
+        """Remove the watch on the VM path, if any.  Idempotent.  Nothrow
+        guarantee."""
+
         try:
             try:
                 if self.vmWatch:
                     self.vmWatch.unwatch()
+            finally:
                 self.vmWatch = None
-            except:
-                log.exception("Unwatching VM path failed.")
-
-            self.removeVm()
         except:
-            log.exception("Removing VM path failed.")
-
+            log.exception("Unwatching VM path failed.")
+
+
+    ## public:
 
     def destroy(self):
         """Cleanup VM and destroy domain.  Nothrow guarantee."""
@@ -1345,6 +1412,7 @@
             if rename:
                 self.preserveForRestart()
             else:
+                self.unwatchVm()
                 self.destroyDomain()
 
             # new_dom's VM will be the same as this domain's VM, except where
@@ -1381,10 +1449,11 @@
         log.info("Renaming dead domain %s (%d, %s) to %s (%s).",
                  self.info['name'], self.domid, self.info['uuid'],
                  new_name, new_uuid)
+        self.unwatchVm()
         self.release_devices()
         self.info['name'] = new_name
         self.info['uuid'] = new_uuid
-        self.vmpath = VMROOT + new_uuid
+        self.vmpath = XendDomain.VMROOT + new_uuid
         self.storeVmDetails()
         self.preserve()
 
@@ -1392,6 +1461,7 @@
     def preserve(self):
         log.info("Preserving dead domain %s (%d).", self.info['name'],
                  self.domid)
+        self.unwatchVm()
         self.storeDom('xend/shutdown_completed', 'True')
         self.state_set(STATE_DOM_SHUTDOWN)
 
diff -r 3a4589331504 -r 2dcfb85f5bac 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Fri Dec  2 21:52:41 2005
+++ b/tools/python/xen/xend/server/DevController.py     Sat Dec  3 09:44:38 2005
@@ -22,7 +22,7 @@
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
 
-from xen.xend.xenstore.xstransact import xstransact
+from xen.xend.xenstore.xstransact import xstransact, complete
 from xen.xend.xenstore.xswatch import xswatch
 
 DEVICE_CREATE_TIMEOUT = 10
@@ -85,6 +85,8 @@
         (backpath, frontpath) = self.addStoreEntries(config, devid, back,
                                                      front)
 
+        import xen.xend.XendDomain
+        count = 0
         while True:
             t = xstransact()
             try:
@@ -97,19 +99,31 @@
                     
                     raise VmError("Device %s is already connected." % dev_str)
 
-                log.debug('DevController: writing %s to %s.', str(front),
-                          frontpath)
-                log.debug('DevController: writing %s to %s.', str(back),
-                          backpath)
+                if count == 0:
+                    log.debug('DevController: writing %s to %s.', str(front),
+                              frontpath)
+                    log.debug('DevController: writing %s to %s.', str(back),
+                              backpath)
+                elif count % 50 == 0:
+                    log.debug(
+                      'DevController: still waiting to write device entries.')
 
                 t.remove(frontpath)
                 t.remove(backpath)
 
+                t.mkdir(backpath)
+                t.set_permissions(backpath,
+                                  {'dom': xen.xend.XendDomain.PRIV_DOMAIN },
+                                  {'dom'  : self.vm.getDomid(),
+                                   'read' : True })
+
                 t.write2(frontpath, front)
                 t.write2(backpath,  back)
 
                 if t.commit():
                     return devid
+
+                count += 1
             except:
                 t.abort()
                 raise
@@ -266,20 +280,17 @@
         the device configuration instead.
         """
         path = self.frontendMiscPath()
-        while True:
-            t = xstransact(path)
-            try:
-                result = t.read("nextDeviceID")
-                if result:
-                    result = int(result)
-                else:
-                    result = 0
-                t.write("nextDeviceID", str(result + 1))
-                if t.commit():
-                    return result
-            except:
-                t.abort()
-                raise
+        return complete(path, self._allocateDeviceID)
+
+
+    def _allocateDeviceID(self, t):
+        result = t.read("nextDeviceID")
+        if result:
+            result = int(result)
+        else:
+            result = 0
+        t.write("nextDeviceID", str(result + 1))
+        return result
 
 
     def readBackend(self, devid, *args):
diff -r 3a4589331504 -r 2dcfb85f5bac 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Fri Dec  2 21:52:41 2005
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Sat Dec  3 09:44:38 2005
@@ -213,6 +213,27 @@
                 self._write(key, fmt % val)
 
 
+    def mkdir(self, *args):
+        if len(args) == 0:
+            xshandle().mkdir(self.transaction, self.path)
+        else:
+            for key in args:
+                xshandle().mkdir(self.transaction, self.prependPath(key))
+
+
+    def set_permissions(self, *args):
+        if len(args) == 0:
+            raise TypeError
+        elif isinstance(args[0], str):
+            self.callRebased(args[0], self.set_permissions, *args[1:])
+        else:
+            if not self.path:
+                raise RuntimeError('Cannot set permissions on the root')
+
+            xshandle().set_permissions(self.transaction, self.path,
+                                       list(args))
+
+
     def remove2(self, middlePath, *args):
         self.callRebased(middlePath, self.remove, *args)
 
@@ -245,29 +266,11 @@
         given path, and return a list composed of the values at each of those
         instead.  This operation is performed inside a transaction.
         """
-        while True:
-            t = cls(path)
-            try:
-                v = t.read(*args)
-                t.abort()
-                return v
-            except:
-                t.abort()
-                raise
-
+        return complete(path, lambda t: t.read(*args))
     Read = classmethod(Read)
 
     def Write(cls, path, *args):
-        while True:
-            t = cls(path)
-            try:
-                t.write(*args)
-                if t.commit():
-                    return
-            except:
-                t.abort()
-                raise
-
+        complete(path, lambda t: t.write(*args))
     Write = classmethod(Write)
 
     def Remove(cls, path, *args):
@@ -275,16 +278,7 @@
         each further argument as a subpath to the given path, and remove each
         of those instead.  This operation is performed inside a transaction.
         """
-        while True:
-            t = cls(path)
-            try:
-                t.remove(*args)
-                if t.commit():
-                    return
-            except:
-                t.abort()
-                raise
-
+        complete(path, lambda t: t.remove(*args))
     Remove = classmethod(Remove)
 
     def List(cls, path, *args):
@@ -294,16 +288,7 @@
         and return the cumulative listing of each of those instead.  This
         operation is performed inside a transaction.
         """
-        while True:
-            t = cls(path)
-            try:
-                v = t.list(*args)
-                if t.commit():
-                    return v
-            except:
-                t.abort()
-                raise
-
+        return complete(path, lambda t: t.list(*args))
     List = classmethod(List)
 
     def ListRecursive(cls, path, *args):
@@ -313,40 +298,33 @@
         subpath to the given path, and return the cumulative listing of each
         of those instead.  This operation is performed inside a transaction.
         """
-        while True:
-            t = cls(path)
-            try:
-                v = t.list_recursive(*args)
-                if t.commit():
-                    return v
-            except:
-                t.abort()
-                raise
-
+        return complete(path, lambda t: t.list_recursive(*args))
     ListRecursive = classmethod(ListRecursive)
 
     def Gather(cls, path, *args):
-        while True:
-            t = cls(path)
-            try:
-                v = t.gather(*args)
-                if t.commit():
-                    return v
-            except:
-                t.abort()
-                raise
-
+        return complete(path, lambda t: t.gather(*args))
     Gather = classmethod(Gather)
 
     def Store(cls, path, *args):
-        while True:
-            t = cls(path)
-            try:
-                v = t.store(*args)
-                if t.commit():
-                    return v
-            except:
-                t.abort()
-                raise
-
+        complete(path, lambda t: t.store(*args))
     Store = classmethod(Store)
+
+    def SetPermissions(cls, path, *args):
+        complete(path, lambda t: t.set_permissions(*args))
+    SetPermissions = classmethod(SetPermissions)
+
+    def Mkdir(cls, path, *args):
+        complete(path, lambda t: t.mkdir(*args))
+    Mkdir = classmethod(Mkdir)
+
+
+def complete(path, f):
+    while True:
+        t = xstransact(path)
+        try:
+            result = f(t)
+            if t.commit():
+                return result
+        except:
+            t.abort()
+            raise
diff -r 3a4589331504 -r 2dcfb85f5bac tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Fri Dec  2 21:52:41 2005
+++ b/tools/python/xen/xm/create.py     Sat Dec  3 09:44:38 2005
@@ -154,7 +154,11 @@
 
 gopts.var('cpu', val='CPU',
           fn=set_int, default=None,
-          use="CPU to run the domain on.")
+          use="CPU to run the VCPU0 on.")
+
+gopts.var('cpus', val='CPUS',
+          fn=set_int, default=None,
+          use="CPUS to run the domain on.")
 
 gopts.var('lapic', val='LAPIC',
           fn=set_int, default=0,
@@ -572,6 +576,8 @@
     
     if vals.cpu is not None:
         config.append(['cpu', vals.cpu])
+    if vals.cpus is not None:
+        config.append(['cpus', vals.cpus])
     if vals.cpu_weight is not None:
         config.append(['cpu_weight', vals.cpu_weight])
     if vals.blkif:
diff -r 3a4589331504 -r 2dcfb85f5bac tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Fri Dec  2 21:52:41 2005
+++ b/tools/python/xen/xm/main.py       Sat Dec  3 09:44:38 2005
@@ -459,7 +459,9 @@
             for x in server.xend_node()[1:]:
                 if len(x) > 1 and x[0] == 'nr_cpus':
                     nr_cpus = int(x[1])
-                    cpumap = filter(lambda x: x < nr_cpus, cpumap)
+                    # normalize cpumap by modulus nr_cpus, and drop duplicates
+                    cpumap = dict.fromkeys(
+                                map(lambda x: x % nr_cpus, cpumap)).keys()
                     if len(cpumap) == nr_cpus:
                         return "any cpu"
                     break
diff -r 3a4589331504 -r 2dcfb85f5bac tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Fri Dec  2 21:52:41 2005
+++ b/tools/xenstore/xenstored_core.c   Sat Dec  3 09:44:38 2005
@@ -1401,7 +1401,7 @@
 static void manual_node(const char *name, const char *child)
 {
        struct node *node;
-       struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
+       struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
 
        node = talloc(NULL, struct node);
        node->name = name;
@@ -1442,6 +1442,7 @@
                   the balloon driver, this can be fatal.
                */
                internal_rm("/local");
+               manual_node("/", "local");
        }
        else {
                tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
diff -r 3a4589331504 -r 2dcfb85f5bac tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Fri Dec  2 21:52:41 2005
+++ b/tools/xenstore/xenstored_domain.c Sat Dec  3 09:44:38 2005
@@ -287,6 +287,7 @@
 
        domain->conn = new_connection(writechn, readchn);
        domain->conn->domain = domain;
+       domain->conn->id = domid;
 
        domain->remote_port = port;
        domain->mfn = mfn;
diff -r 3a4589331504 -r 2dcfb85f5bac 
tools/xm-test/tests/network-attach/network_utils.py
--- a/tools/xm-test/tests/network-attach/network_utils.py       Fri Dec  2 
21:52:41 2005
+++ b/tools/xm-test/tests/network-attach/network_utils.py       Sat Dec  3 
09:44:38 2005
@@ -28,7 +28,7 @@
     eths_before = count_eth(console)
     status, output = traceCommand("xm network-detach %s %d" % (domain_name, 
num))
     if status != 0:
-        return -1, "xm network-attach returned invalid %i != 0" % status
+        return -1, "xm network-detach returned invalid %i != 0" % status
 
     eths_after = count_eth(console)
     if eths_after != (eths_before-1):
diff -r 3a4589331504 -r 2dcfb85f5bac xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Fri Dec  2 21:52:41 2005
+++ b/xen/arch/x86/Makefile     Sat Dec  3 09:44:38 2005
@@ -37,7 +37,8 @@
 default: $(TARGET)
 
 $(TARGET): $(TARGET)-syms boot/mkelf32
-       ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000
+       ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
+       `nm $(TARGET)-syms | sort | tail -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
 
 $(CURDIR)/arch.o: $(OBJS)
        $(LD) $(LDFLAGS) -r -o $@ $(OBJS)
diff -r 3a4589331504 -r 2dcfb85f5bac xen/arch/x86/boot/mkelf32.c
--- a/xen/arch/x86/boot/mkelf32.c       Fri Dec  2 21:52:41 2005
+++ b/xen/arch/x86/boot/mkelf32.c       Sat Dec  3 09:44:38 2005
@@ -222,6 +222,7 @@
 
 int main(int argc, char **argv)
 {
+    u64        final_exec_addr;
     u32        loadbase, dat_siz, mem_siz;
     char      *inimage, *outimage;
     int        infd, outfd;
@@ -234,15 +235,17 @@
     Elf64_Ehdr in64_ehdr;
     Elf64_Phdr in64_phdr;
 
-    if ( argc != 4 )
-    {
-        fprintf(stderr, "Usage: mkelf32 <in-image> <out-image> <load-base>\n");
+    if ( argc != 5 )
+    {
+        fprintf(stderr, "Usage: mkelf32 <in-image> <out-image> "
+                "<load-base> <final-exec-addr>\n");
         return 1;
     }
 
     inimage  = argv[1];
     outimage = argv[2];
     loadbase = strtoul(argv[3], NULL, 16);
+    final_exec_addr = strtoul(argv[4], NULL, 16);
 
     infd = open(inimage, O_RDONLY);
     if ( infd == -1 )
@@ -286,7 +289,10 @@
 
         (void)lseek(infd, in32_phdr.p_offset, SEEK_SET);
         dat_siz = (u32)in32_phdr.p_filesz;
-        mem_siz = (u32)in32_phdr.p_memsz;
+
+        /* Do not use p_memsz: it does not include BSS alignment padding. */
+        /*mem_siz = (u32)in32_phdr.p_memsz;*/
+        mem_siz = (u32)(final_exec_addr - in32_phdr.p_vaddr);
         break;
 
     case ELFCLASS64:
@@ -314,7 +320,10 @@
 
         (void)lseek(infd, in64_phdr.p_offset, SEEK_SET);
         dat_siz = (u32)in64_phdr.p_filesz;
-        mem_siz = (u32)in64_phdr.p_memsz;
+
+        /* Do not use p_memsz: it does not include BSS alignment padding. */
+        /*mem_siz = (u32)in64_phdr.p_memsz;*/
+        mem_siz = (u32)(final_exec_addr - in64_phdr.p_vaddr);
         break;
 
     default:
diff -r 3a4589331504 -r 2dcfb85f5bac xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Fri Dec  2 21:52:41 2005
+++ b/xen/arch/x86/boot/x86_32.S        Sat Dec  3 09:44:38 2005
@@ -74,16 +74,16 @@
         cmp     $0x2BADB002,%eax
         jne     not_multiboot
 
-        /* Save the Multiboot info structure for later use. */
-       add     $__PAGE_OFFSET,%ebx
-        push    %ebx
-
         /* Initialize BSS (no nasty surprises!) */
         mov     $__bss_start-__PAGE_OFFSET,%edi
         mov     $_end-__PAGE_OFFSET,%ecx
         sub     %edi,%ecx
         xor     %eax,%eax
         rep     stosb
+
+        /* Save the Multiboot info structure for later use. */
+        add     $__PAGE_OFFSET,%ebx
+        push    %ebx
 
 #ifdef CONFIG_X86_PAE
         /* Initialize low and high mappings of all memory with 2MB pages */
@@ -238,27 +238,28 @@
         .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
 
         .org 0x2000
-/* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */
-/* CPU0 stack is aligned on an even page boundary!                        */
-ENTRY(cpu0_stack)
-        .org 0x2000 + STACK_SIZE
-
-#ifdef CONFIG_X86_PAE
-
+
+#ifdef CONFIG_X86_PAE
 ENTRY(idle_pg_table)
 ENTRY(idle_pg_table_l3)
-        .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01
-        .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01
-        .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01
-        .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01
-        .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE
+        .long idle_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
+        .long idle_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
+        .long idle_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
+        .long idle_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
+.section ".bss.page_aligned","w"
 ENTRY(idle_pg_table_l2)
-        .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE
-
-#else /* CONFIG_X86_PAE */
-
+        .fill 4*PAGE_SIZE,1,0
+#else
+.section ".bss.page_aligned","w"
 ENTRY(idle_pg_table)
-ENTRY(idle_pg_table_l2) # Initial page directory is 4kB
-        .org 0x2000 + STACK_SIZE + PAGE_SIZE
-
-#endif /* CONFIG_X86_PAE */
+ENTRY(idle_pg_table_l2)
+        .fill 1*PAGE_SIZE,1,0
+#endif
+
+#if (STACK_ORDER == 0)
+.section ".bss.page_aligned","w"
+#else
+.section ".bss.twopage_aligned","w"
+#endif
+ENTRY(cpu0_stack)
+        .fill STACK_SIZE,1,0
diff -r 3a4589331504 -r 2dcfb85f5bac xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Fri Dec  2 21:52:41 2005
+++ b/xen/arch/x86/boot/x86_64.S        Sat Dec  3 09:44:38 2005
@@ -249,13 +249,8 @@
 ENTRY(idle_pg_table_l3)
         .quad idle_pg_table_l2 - __PAGE_OFFSET + 7
 
+/* Initial PDE -- level-2 page table. Maps first 64MB physical memory. */
         .org 0x4000
-/* Maximum STACK_ORDER for x86/64 is 2. We must therefore ensure that the */
-/* CPU0 stack is aligned on a 4-page boundary.                            */
-ENTRY(cpu0_stack)
-
-/* Initial PDE -- level-2 page table. Maps first 64MB physical memory. */
-        .org 0x4000 + STACK_SIZE
 ENTRY(idle_pg_table_l2)
         .macro identmap from=0, count=32
         .if \count-1
@@ -265,7 +260,15 @@
         .quad 0x00000000000001e3 + \from
         .endif
         .endm
-        identmap /* Too orangey for crows :-) */
-
-        .org 0x4000 + STACK_SIZE + PAGE_SIZE
+        identmap
+
+        .org 0x4000 + PAGE_SIZE
         .code64
+
+#if (STACK_ORDER == 0)
+.section ".bss.page_aligned","w"
+#else
+.section ".bss.twopage_aligned","w"
+#endif
+ENTRY(cpu0_stack)
+        .fill STACK_SIZE,1,0
diff -r 3a4589331504 -r 2dcfb85f5bac xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Dec  2 21:52:41 2005
+++ b/xen/arch/x86/mm.c Sat Dec  3 09:44:38 2005
@@ -128,8 +128,9 @@
 
 /* Used to defer flushing of memory structures. */
 static struct {
-#define DOP_FLUSH_TLB   (1<<0) /* Flush the TLB.                 */
-#define DOP_RELOAD_LDT  (1<<1) /* Reload the LDT shadow mapping. */
+#define DOP_FLUSH_TLB      (1<<0) /* Flush the local TLB.                    */
+#define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
+#define DOP_RELOAD_LDT     (1<<2) /* Reload the LDT shadow mapping.          */
     unsigned int   deferred_ops;
     /* If non-NULL, specifies a foreign subject domain for some operations. */
     struct domain *foreign;
@@ -1323,14 +1324,28 @@
     struct domain *owner = page_get_owner(page);
     unsigned long gpfn;
 
-    if ( unlikely((owner != NULL) && shadow_mode_enabled(owner)) )
-    {
-        mark_dirty(owner, page_to_pfn(page));
-        if ( unlikely(shadow_mode_refcounts(owner)) )
-            return;
-        gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
-        ASSERT(VALID_M2P(gpfn));
-        remove_shadow(owner, gpfn, type & PGT_type_mask);
+    if ( likely(owner != NULL) )
+    {
+        /*
+         * We have to flush before the next use of the linear mapping
+         * (e.g., update_va_mapping()) or we could end up modifying a page
+         * that is no longer a page table (and hence screw up ref counts).
+         */
+        percpu_info[smp_processor_id()].deferred_ops |= DOP_FLUSH_ALL_TLBS;
+
+        if ( unlikely(shadow_mode_enabled(owner)) )
+        {
+            /* Raw page tables are rewritten during save/restore. */
+            if ( !shadow_mode_translate(owner) )
+                mark_dirty(owner, page_to_pfn(page));
+
+            if ( shadow_mode_refcounts(owner) )
+                return;
+
+            gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
+            ASSERT(VALID_M2P(gpfn));
+            remove_shadow(owner, gpfn, type & PGT_type_mask);
+        }
     }
 
     switch ( type & PGT_type_mask )
@@ -1600,11 +1615,14 @@
     deferred_ops = percpu_info[cpu].deferred_ops;
     percpu_info[cpu].deferred_ops = 0;
 
-    if ( deferred_ops & DOP_FLUSH_TLB )
+    if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
     {
         if ( shadow_mode_enabled(d) )
             shadow_sync_all(d);
-        local_flush_tlb();
+        if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
+            flush_tlb_mask(d->cpumask);
+        else
+            local_flush_tlb();
     }
         
     if ( deferred_ops & DOP_RELOAD_LDT )
diff -r 3a4589331504 -r 2dcfb85f5bac xen/arch/x86/x86_32/xen.lds
--- a/xen/arch/x86/x86_32/xen.lds       Fri Dec  2 21:52:41 2005
+++ b/xen/arch/x86/x86_32/xen.lds       Sat Dec  3 09:44:38 2005
@@ -23,7 +23,6 @@
   _etext = .;                  /* End of text section */
 
   .rodata : { *(.rodata) *(.rodata.*) } :text
-  .kstrtab : { *(.kstrtab) } :text
 
   . = ALIGN(32);               /* Exception table */
   __start___ex_table = .;
@@ -35,23 +34,10 @@
   __pre_ex_table : { *(__pre_ex_table) } :text
   __stop___pre_ex_table = .;
 
-  __start___ksymtab = .;       /* Kernel symbol table */
-  __ksymtab : { *(__ksymtab) } :text
-  __stop___ksymtab = .;
-
-  __start___kallsyms = .;      /* All kernel symbols */
-  __kallsyms : { *(__kallsyms) } :text
-  __stop___kallsyms = .;
-
   .data : {                    /* Data */
        *(.data)
        CONSTRUCTORS
        } :text
-
-  _edata = .;                  /* End of data section */
-
-  . = ALIGN(8192);             /* init_task */
-  .data.init_task : { *(.data.init_task) } :text
 
   . = ALIGN(4096);             /* Init code and data */
   __init_begin = .;
@@ -64,10 +50,13 @@
   __initcall_start = .;
   .initcall.init : { *(.initcall.init) } :text
   __initcall_end = .;
+  . = ALIGN(8192);
   __init_end = .;
 
   __bss_start = .;             /* BSS */
   .bss : {
+       *(.bss.twopage_aligned)
+       *(.bss.page_aligned)
        *(.bss)
        } :text
   _end = . ;
diff -r 3a4589331504 -r 2dcfb85f5bac xen/arch/x86/x86_64/xen.lds
--- a/xen/arch/x86/x86_64/xen.lds       Fri Dec  2 21:52:41 2005
+++ b/xen/arch/x86/x86_64/xen.lds       Sat Dec  3 09:44:38 2005
@@ -21,7 +21,6 @@
   _etext = .;                  /* End of text section */
 
   .rodata : { *(.rodata) *(.rodata.*) } :text
-  .kstrtab : { *(.kstrtab) } :text
 
   . = ALIGN(32);               /* Exception table */
   __start___ex_table = .;
@@ -33,23 +32,10 @@
   __pre_ex_table : { *(__pre_ex_table) } :text
   __stop___pre_ex_table = .;
 
-  __start___ksymtab = .;       /* Kernel symbol table */
-  __ksymtab : { *(__ksymtab) } :text
-  __stop___ksymtab = .;
-
-  __start___kallsyms = .;      /* All kernel symbols */
-  __kallsyms : { *(__kallsyms) } :text
-  __stop___kallsyms = .;
-
   .data : {                    /* Data */
        *(.data)
        CONSTRUCTORS
        } :text
-
-  _edata = .;                  /* End of data section */
-
-  . = ALIGN(8192);             /* init_task */
-  .data.init_task : { *(.data.init_task) } :text
 
   . = ALIGN(4096);             /* Init code and data */
   __init_begin = .;
@@ -62,10 +48,13 @@
   __initcall_start = .;
   .initcall.init : { *(.initcall.init) } :text
   __initcall_end = .;
+  . = ALIGN(8192);
   __init_end = .;
 
   __bss_start = .;             /* BSS */
   .bss : {
+       *(.bss.twopage_aligned)
+       *(.bss.page_aligned)
        *(.bss)
        } :text
   _end = . ;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.