[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Within the store, split the persistent information regarding a VM from the



# HG changeset patch
# User emellor@ewan
# Node ID 4a2c162d3e7caf3016fd728f7b0a63b997d09427
# Parent  a39510ad5c591ee84592924e718c90d746f90097
Within the store, split the persistent information regarding a VM from the
transient information regarding a domain.  This allows live localhost migration,
which is important for testing migration, and also allows migration when we
have a distributed store.  The backend paths in the store now refer to the
frontend domain ID, not its UUID, and blktap has changed to match.

To support this split in the information, the cleanup procedure has been split
to match.

Change the save-restore interface between XendDomain, XendDomainInfo, and 
XendCheckpoint, to remove some intermingling, in particular taking XendDomain
out of the loop for the restore procedure.

Improved the recovery procedure to avoid trying to destroy dom0 when recovering.

Added a lock around XendDomain.refresh and one around
XendDomainInfo.refreshShutdown, to improve the behaviour when recreating
domains at startup.  There are still races here, so more thought needs to be
given to the locking.

Live reconfiguration of an existing domain is temporarily broken.

Signed-off-by: Ewan Mellor <ewan@xxxxxxxxxxxxx>

diff -r a39510ad5c59 -r 4a2c162d3e7c tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c     Fri Sep 30 10:55:49 2005
+++ b/tools/blktap/xenbus.c     Fri Sep 30 12:41:10 2005
@@ -116,25 +116,25 @@
 
 
 /* This assumes that the domain name we are looking for is unique! */
-char *get_dom_uuid(struct xs_handle *h, const char *name)
-{
-    char **e, *val, *uuid = NULL;
+char *get_dom_domid(struct xs_handle *h, const char *name)
+{
+    char **e, *val, *domid = NULL;
     int num, i, len;
     char *path;
 
-    e = xs_directory(h, "/domain", &num);
+    e = xs_directory(h, "/local/domain", &num);
 
     i=0;
     while (i < num) {
-        asprintf(&path, "/domain/%s/name", e[i]);
+        asprintf(&path, "/local/domain/%s/name", e[i]);
         val = xs_read(h, path, &len);
         free(path);
         if (val == NULL)
             continue;
         if (strcmp(val, name) == 0) {
             /* match! */
-            asprintf(&path, "/domain/%s/uuid", e[i]);
-            uuid = xs_read(h, path, &len);
+            asprintf(&path, "/local/domain/%s/domid", e[i]);
+            domid = xs_read(h, path, &len);
             free(val);
             free(path);
             break;
@@ -144,7 +144,7 @@
     }
 
     free(e);
-    return uuid;
+    return domid;
 }
 
 static int strsep_len(const char *str, char c, unsigned int len)
@@ -553,15 +553,15 @@
 
 int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
 {
-    char *uuid, *path;
+    char *domid, *path;
     struct xenbus_watch *vbd_watch;
     int er;
 
-    uuid = get_dom_uuid(h, domname);
-
-    DPRINTF("%s: %s\n", domname, (uuid != NULL) ? uuid : "[ not found! ]");
-
-    asprintf(&path, "/domain/%s/backend/vbd", uuid);
+    domid = get_dom_domid(h, domname);
+
+    DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
+
+    asprintf(&path, "/local/domain/%s/backend/vbd", domid);
     if (path == NULL) 
         return -ENOMEM;
 
diff -r a39510ad5c59 -r 4a2c162d3e7c tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Fri Sep 30 10:55:49 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py   Fri Sep 30 12:41:10 2005
@@ -56,7 +56,7 @@
     # simply uses the defaults compiled into libxenguest; see the comments 
     # and/or code in xc_linux_save() for more information. 
     cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
-           str(dominfo.domid), "0", "0", str(int(live)) ]
+           str(dominfo.getDomid()), "0", "0", str(int(live)) ]
     log.info("[xc_save] " + join(cmd))
     child = xPopen3(cmd, True, -1, [fd, xc.handle()])
     
@@ -76,10 +76,10 @@
             if fd == child.fromchild.fileno():
                 l = child.fromchild.readline()
                 if l.rstrip() == "suspend":
-                    log.info("suspending %d" % dominfo.domid)
-                    xd.domain_shutdown(dominfo.domid, reason='suspend')
+                    log.info("suspending %d" % dominfo.getDomid())
+                    xd.domain_shutdown(dominfo.getDomid(), reason='suspend')
                     dominfo.state_wait(XendDomainInfo.STATE_VM_SUSPENDED)
-                    log.info("suspend %d done" % dominfo.domid)
+                    log.info("suspend %d done" % dominfo.getDomid())
                     child.tochild.write("done\n")
                     child.tochild.flush()
         if filter(lambda (fd, event): event & select.POLLHUP, r):
@@ -90,11 +90,10 @@
     if child.wait() != 0:
         raise XendError("xc_save failed: %s" % lasterr)
 
-    dominfo.closeStoreChannel()
-    xd.domain_destroy(dominfo.domid)
+    dominfo.destroy()
     return None
 
-def restore(xd, fd):
+def restore(fd):
     signature = read_exact(fd, len(SIGNATURE),
         "not a valid guest state file: signature read")
     if signature != SIGNATURE:
@@ -113,7 +112,7 @@
         raise XendError("not a valid guest state file: config parse")
 
     vmconfig = p.get_val()
-    dominfo = xd.domain_configure(vmconfig)
+    dominfo = XendDomainInfo.restore(vmconfig)
 
     l = read_exact(fd, sizeof_unsigned_long,
                    "not a valid guest state file: pfn count read")
@@ -133,7 +132,7 @@
         console_evtchn = 0
 
     cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
-           str(dominfo.domid), str(nr_pfns),
+           str(dominfo.getDomid()), str(nr_pfns),
            str(store_evtchn), str(console_evtchn)]
     log.info("[xc_restore] " + join(cmd))
     child = xPopen3(cmd, True, -1, [fd, xc.handle()])
@@ -161,10 +160,10 @@
                         if dominfo.store_channel:
                             dominfo.setStoreRef(int(m.group(2)))
                             if dominfo.store_mfn >= 0:
-                                IntroduceDomain(dominfo.domid,
+                                IntroduceDomain(dominfo.getDomid(),
                                                 dominfo.store_mfn,
                                                 dominfo.store_channel.port1,
-                                                dominfo.path)
+                                                dominfo.getDomainPath())
                     m = re.match(r"^(console-mfn) (\d+)\n$", l)
                     if m:
                         dominfo.setConsoleRef(int(m.group(2)))
diff -r a39510ad5c59 -r 4a2c162d3e7c tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Fri Sep 30 10:55:49 2005
+++ b/tools/python/xen/xend/XendDomain.py       Fri Sep 30 12:41:10 2005
@@ -22,6 +22,7 @@
  Needs to be persistent for one uptime.
 """
 import os
+import threading
 
 import xen.lowlevel.xc
 
@@ -57,6 +58,9 @@
 
     """Dict of domain info indexed by domain id."""
     domains = None
+
+
+    ## public:
     
     def __init__(self):
         # Hack alert. Python does not support mutual imports, but 
XendDomainInfo
@@ -65,6 +69,7 @@
         # So we stuff the XendDomain instance (self) into xroot's components.
         xroot.add_component("xen.xend.XendDomain", self)
         self.domains = XendDomainDict()
+        self.refresh_lock = threading.Condition()
         self.watchReleaseDomain()
         self.refresh()
         self.dom0_setup()
@@ -93,6 +98,9 @@
         """
         doms = self.list_sorted()
         return map(lambda x: x.getName(), doms)
+
+
+    ## private:
 
     def onReleaseDomain(self):
         self.refresh()
@@ -135,9 +143,6 @@
 
     def dom0_setup(self):
         dom0 = self.domain_lookup(PRIV_DOMAIN)
-        if not dom0:
-            dom0 = self.recreate_domain(self.xen_domain(PRIV_DOMAIN))
-        dom0.dom0_init_store()
         dom0.dom0_enforce_vcpus()
 
 
@@ -150,10 +155,10 @@
         if info.getDomid() in self.domains:
             notify = False
         self.domains[info.getDomid()] = info
-        info.exportToDB()
-        if notify:
-            eserver.inject('xend.domain.create', [info.getName(),
-                                                  info.getDomid()])
+        #info.exportToDB()
+        #if notify:
+        #    eserver.inject('xend.domain.create', [info.getName(),
+        #                                          info.getDomid()])
 
     def _delete_domain(self, domid, notify=True):
         """Remove a domain from the tables.
@@ -164,8 +169,8 @@
         info = self.domains.get(domid)
         if info:
             del self.domains[domid]
-            info.cleanup()
-            info.delete()
+            info.cleanupDomain()
+            info.cleanupVm()
             if notify:
                 eserver.inject('xend.domain.died', [info.getName(),
                                                     info.getDomid()])
@@ -174,25 +179,36 @@
     def refresh(self):
         """Refresh domain list from Xen.
         """
-        doms = self.xen_domains()
-        for d in self.domains.values():
-            info = doms.get(d.getDomid())
-            if info:
-                d.update(info)
-            else:
-                self._delete_domain(d.getDomid())
-        for d in doms:
-            if d not in self.domains:
-                try:
-                    self.recreate_domain(doms[d])
-                except:
-                    log.exception(
-                        "Failed to recreate information for domain %d.  "
-                        "Destroying it in the hope of recovery.", d)
+        self.refresh_lock.acquire()
+        try:
+            doms = self.xen_domains()
+            for d in self.domains.values():
+                info = doms.get(d.getDomid())
+                if info:
+                    d.update(info)
+                else:
+                    self._delete_domain(d.getDomid())
+            for d in doms:
+                if d not in self.domains and not doms[d]['dying']:
                     try:
-                        xc.domain_destroy(dom = d)
+                        self.recreate_domain(doms[d])
                     except:
-                        log.exception('Destruction of %d failed.', d)
+                        if d == PRIV_DOMAIN:
+                            log.exception(
+                                "Failed to recreate information for domain "
+                                "%d.  Doing nothing except crossing my "
+                                "fingers.", d)
+                        else:
+                            log.exception(
+                                "Failed to recreate information for domain "
+                                "%d.  Destroying it in the hope of "
+                                "recovery.", d)
+                            try:
+                                xc.domain_destroy(dom = d)
+                            except:
+                                log.exception('Destruction of %d failed.', d)
+        finally:
+            self.refresh_lock.release()
 
 
     def update_domain(self, id):
@@ -208,6 +224,9 @@
         else:
             self._delete_domain(id)
 
+
+    ## public:
+
     def domain_create(self, config):
         """Create a domain from a configuration.
 
@@ -219,19 +238,12 @@
         return dominfo
 
     def domain_configure(self, config):
-        """Configure an existing domain. This is intended for internal
-        use by domain restore and migrate.
+        """Configure an existing domain.
 
         @param vmconfig: vm configuration
         """
-        # We accept our configuration specified as ['config' [...]], which
-        # some tools or configuration files may be using.  For save-restore,
-        # we use the value of XendDomainInfo.sxpr() directly, which has no
-        # such item.
-        nested = sxp.child_value(config, 'config')
-        if nested:
-            config = nested
-        return XendDomainInfo.restore(config)
+        # !!!
+        raise XendError("Unsupported")
 
     def domain_restore(self, src):
         """Restore a domain from file.
@@ -241,7 +253,7 @@
 
         try:
             fd = os.open(src, os.O_RDONLY)
-            dominfo = XendCheckpoint.restore(self, fd)
+            dominfo = XendCheckpoint.restore(fd)
             self._add_domain(dominfo)
             return dominfo
         except OSError, ex:
diff -r a39510ad5c59 -r 4a2c162d3e7c tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 30 10:55:49 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 30 12:41:10 2005
@@ -94,8 +94,8 @@
 SHUTDOWN_TIMEOUT = 30
 
 
-DOMROOT = '/domain'
-VMROOT  = '/domain'
+DOMROOT = '/local/domain/'
+VMROOT  = '/vm/'
 
 
 xc = xen.lowlevel.xc.new()
@@ -116,6 +116,31 @@
     ]
 
 
+def restore(config):
+    """Create a domain and a VM object to do a restore.
+
+    @param config:    domain configuration
+    """
+
+    log.debug("XendDomainInfo.restore(%s)", config)
+
+    try:
+        uuid    =     sxp.child_value(config, 'uuid')
+        ssidref = int(sxp.child_value(config, 'ssidref'))
+    except TypeError, exn:
+        raise VmError('Invalid ssidref in config: %s' % exn)
+
+    vm = XendDomainInfo(uuid, XendDomainInfo.parseConfig(config),
+                        xc.domain_create(ssidref = ssidref))
+    vm.storeVmDetails()
+    vm.configure()
+    vm.create_channel()
+#         vm.exportToDB()
+#    vm.refreshShutdown()
+    vm.storeDomDetails()
+    return vm
+
+
 def domain_exists(name):
     # See comment in XendDomain constructor.
     xd = get_component('xen.xend.XendDomain')
@@ -161,7 +186,7 @@
         @raise: VmError for invalid configuration
         """
 
-        log.debug("XendDomainInfo.create(...)")
+        log.debug("XendDomainInfo.create(%s)", config)
         
         vm = cls(getUuid(), cls.parseConfig(config))
         vm.construct()
@@ -172,9 +197,13 @@
 
 
     def recreate(cls, xeninfo):
-        """Create the VM object for an existing domain."""
+        """Create the VM object for an existing domain.  The domain must not
+        be dying, as the paths in the store should already have been removed,
+        and asking us to recreate them causes problems."""
 
         log.debug("XendDomainInfo.recreate(%s)", xeninfo)
+
+        assert not xeninfo['dying']
 
         domid = xeninfo['dom']
         try:
@@ -191,45 +220,29 @@
                 raise XendError(
                     'No vm/uuid path in store for existing domain %d' % domid)
 
+            log.info("Recreating domain %d, UUID %s.", domid, uuid)
+
+            vm = cls(uuid, xeninfo, domid, True)
+
         except Exception, exn:
             log.warn(str(exn))
+
             uuid = getUuid()
 
-        log.info("Recreating domain %d, uuid %s", domid, uuid)
-
-        vm = cls(uuid, xeninfo, domid, True)
+            log.info("Recreating domain %d with new UUID %s.", domid, uuid)
+
+            vm = cls(uuid, xeninfo, domid, True)
+            vm.storeVmDetails()
+            vm.storeDomDetails()
+
+        vm.create_channel()
+        if domid == 0:
+            vm.initStoreConnection()
+
         vm.refreshShutdown(xeninfo)
         return vm
 
     recreate = classmethod(recreate)
-
-
-    def restore(cls, config, uuid = None):
-        """Create a domain and a VM object to do a restore.
-
-        @param config:    domain configuration
-        @param uuid:      uuid to use
-        """
-        
-        log.debug("XendDomainInfo.restore(%s, %s)", config, uuid)
-
-        if not uuid:
-            uuid = getUuid()
-
-        try:
-            ssidref = int(sxp.child_value(config, 'ssidref'))
-        except TypeError, exn:
-            raise VmError('Invalid ssidref in config: %s' % exn)
-
-        vm = cls(uuid, cls.parseConfig(config),
-                 xc.domain_create(ssidref = ssidref))
-        vm.create_channel()
-        vm.configure()
-        vm.exportToDB()
-        vm.refreshShutdown()
-        return vm
-
-    restore = classmethod(restore)
 
 
     def parseConfig(cls, config):
@@ -294,8 +307,6 @@
         self.uuid = uuid
         self.info = info
 
-        self.path = DOMROOT + "/" + uuid
-
         if domid:
             self.domid = domid
         elif 'dom' in info:
@@ -303,6 +314,12 @@
         else:
             self.domid = None
 
+        self.vmpath  = VMROOT + uuid
+        if self.domid is None:
+            self.dompath = None
+        else:
+            self.dompath = DOMROOT + str(self.domid)
+
         if augment:
             self.augmentInfo()
 
@@ -317,9 +334,7 @@
 
         self.state = STATE_VM_OK
         self.state_updated = threading.Condition()
-
-        self.writeVm("uuid", self.uuid)
-        self.storeDom("vm", self.path)
+        self.refresh_shutdown_lock = threading.Condition()
 
 
     def augmentInfo(self):
@@ -332,13 +347,21 @@
                 self.info[name] = val
 
         params = (("name", str),
-                  ("restart-mode", str),
+                  ("restart_mode", str),
                   ("image",        str),
-                  ("start-time", float))
+                  ("start_time", float))
 
         from_store = self.gatherVm(*params)
 
         map(lambda x, y: useIfNeeded(x[0], y), params, from_store)
+
+        device = []
+        for c in controllerClasses:
+            devconfig = self.getDeviceConfigurations(c)
+            if devconfig:
+                device.extend(map(lambda x: (c, x), devconfig))
+
+        useIfNeeded('device', device)
 
 
     def validateInfo(self):
@@ -377,7 +400,7 @@
             # mem_kb.
 
             def discard_negatives(name):
-                if self.infoIsSet(name) and self.info[name] <= 0:
+                if self.infoIsSet(name) and self.info[name] < 0:
                     del self.info[name]
 
             def valid_KiB_(mb_name, kb_name):
@@ -403,7 +426,7 @@
 
             def valid_KiB(mb_name, kb_name):
                 result = valid_KiB_(mb_name, kb_name)
-                if result <= 0:
+                if result is None or result < 0:
                     raise VmError('Invalid %s / %s: %s' %
                                   (mb_name, kb_name, result))
                 else:
@@ -452,42 +475,60 @@
 
 
     def readVm(self, *args):
-        return xstransact.Read(self.path, *args)
+        return xstransact.Read(self.vmpath, *args)
 
     def writeVm(self, *args):
-        return xstransact.Write(self.path, *args)
+        return xstransact.Write(self.vmpath, *args)
 
     def removeVm(self, *args):
-        return xstransact.Remove(self.path, *args)
+        return xstransact.Remove(self.vmpath, *args)
 
     def gatherVm(self, *args):
-        return xstransact.Gather(self.path, *args)
+        return xstransact.Gather(self.vmpath, *args)
 
     def storeVm(self, *args):
-        return xstransact.Store(self.path, *args)
+        return xstransact.Store(self.vmpath, *args)
 
     def readDom(self, *args):
-        return xstransact.Read(self.path, *args)
+        return xstransact.Read(self.dompath, *args)
 
     def writeDom(self, *args):
-        return xstransact.Write(self.path, *args)
+        return xstransact.Write(self.dompath, *args)
 
     def removeDom(self, *args):
-        return xstransact.Remove(self.path, *args)
+        return xstransact.Remove(self.dompath, *args)
 
     def gatherDom(self, *args):
-        return xstransact.Gather(self.path, *args)
+        return xstransact.Gather(self.dompath, *args)
 
     def storeDom(self, *args):
-        return xstransact.Store(self.path, *args)
-
-
-    def exportToDB(self):
+        return xstransact.Store(self.dompath, *args)
+
+
+    def storeVmDetails(self):
+        to_store = {
+            'uuid':               self.uuid,
+
+            # !!!
+            'memory/target':      str(self.info['memory_KiB'])
+            }
+
+        if self.infoIsSet('image'):
+            to_store['image'] = sxp.to_string(self.info['image'])
+
+        for k in ['name', 'ssidref', 'restart_mode']:
+            if self.infoIsSet(k):
+                to_store[k] = str(self.info[k])
+
+        log.debug("Storing VM details: %s" % str(to_store))
+
+        self.writeVm(to_store)
+
+
+    def storeDomDetails(self):
         to_store = {
             'domid':              str(self.domid),
-            'uuid':               self.uuid,
-
-            'xend/restart_mode':  str(self.info['restart_mode']),
+            'vm':                 self.vmpath,
 
             'memory/target':      str(self.info['memory_KiB'])
             }
@@ -496,11 +537,9 @@
             if v:
                 to_store[k] = str(v)
 
-        to_store['image'] = sxp.to_string(self.info['image'])
-
-        log.debug("Storing %s" % str(to_store))
-
-        self.writeVm(to_store)
+        log.debug("Storing domain details: %s" % str(to_store))
+
+        self.writeDom(to_store)
 
 
     def setDomid(self, domid):
@@ -522,8 +561,8 @@
     def getName(self):
         return self.info['name']
 
-    def getPath(self):
-        return self.path
+    def getDomainPath(self):
+        return self.dompath
 
     def getUuid(self):
         return self.uuid
@@ -549,78 +588,97 @@
 
 
     def refreshShutdown(self, xeninfo = None):
-        if xeninfo is None:
-            xeninfo = dom_get(self.domid)
+        # If set at the end of this method, a restart is required, with the
+        # given reason.  This restart has to be done out of the scope of
+        # refresh_shutdown_lock.
+        restart_reason = None
+        
+        self.refresh_shutdown_lock.acquire()
+        try:
             if xeninfo is None:
-                # The domain no longer exists.  This will occur if we have
-                # scheduled a timer to check for shutdown timeouts and the
-                # shutdown succeeded.
+                xeninfo = dom_get(self.domid)
+                if xeninfo is None:
+                    # The domain no longer exists.  This will occur if we have
+                    # scheduled a timer to check for shutdown timeouts and the
+                    # shutdown succeeded.  It will also occur if someone
+                    # destroys a domain beneath us.  We clean up, just in
+                    # case.
+                    self.cleanupDomain()
+                    self.cleanupVm()
+                    return
+
+            if xeninfo['dying']:
+                # Dying means that a domain has been destroyed, but has not
+                # yet been cleaned up by Xen.  This could persist indefinitely
+                # if, for example, another domain has some of its pages
+                # mapped.  We might like to diagnose this problem in the
+                # future, but for now all we do is make sure that it's not
+                # us holding the pages, by calling the cleanup methods.
+                self.cleanupDomain()
+                self.cleanupVm()
                 return
 
-        if xeninfo['dying']:
-            # Dying means that a domain has been destroyed, but has not yet
-            # been cleaned up by Xen.  This could persist indefinitely if,
-            # for example, another domain has some of its pages mapped.
-            # We might like to diagnose this problem in the future, but for
-            # now all we can sensibly do is ignore it.
-            pass
-
-        elif xeninfo['crashed']:
-            log.warn('Domain has crashed: name=%s id=%d.',
-                     self.info['name'], self.domid)
-
-            if xroot.get_enable_dump():
-                self.dumpCore()
-
-            self.maybeRestart('crashed')
-
-        elif xeninfo['shutdown']:
-            reason = shutdown_reason(xeninfo['shutdown_reason'])
-
-            log.info('Domain has shutdown: name=%s id=%d reason=%s.',
-                     self.info['name'], self.domid, reason)
-
-            self.clearRestart()
-
-            if reason == 'suspend':
-                self.state_set(STATE_VM_SUSPENDED)
-                # Don't destroy the domain.  XendCheckpoint will do this once
-                # it has finished.
-            elif reason in ['poweroff', 'reboot']:
-                self.maybeRestart(reason)
+            elif xeninfo['crashed']:
+                log.warn('Domain has crashed: name=%s id=%d.',
+                         self.info['name'], self.domid)
+
+                if xroot.get_enable_dump():
+                    self.dumpCore()
+
+                restart_reason = 'crashed'
+
+            elif xeninfo['shutdown']:
+                reason = shutdown_reason(xeninfo['shutdown_reason'])
+
+                log.info('Domain has shutdown: name=%s id=%d reason=%s.',
+                         self.info['name'], self.domid, reason)
+
+                self.clearRestart()
+
+                if reason == 'suspend':
+                    self.state_set(STATE_VM_SUSPENDED)
+                    # Don't destroy the domain.  XendCheckpoint will do this
+                    # once it has finished.
+                elif reason in ['poweroff', 'reboot']:
+                    restart_reason = reason
+                else:
+                    self.destroy()
+
             else:
-                self.destroy()
-
-        else:
-            # Domain is alive.  If we are shutting it down, then check
-            # the timeout on that, and destroy it if necessary.
-
-            sst = self.readVm('xend/shutdown_start_time')
-            if sst:
-                sst = float(sst)
-                timeout = SHUTDOWN_TIMEOUT - time.time() + sst
-                if timeout < 0:
-                    log.info(
-                        "Domain shutdown timeout expired: name=%s id=%s",
-                        self.info['name'], self.domid)
-                    self.destroy()
-                else:
-                    log.debug(
-                        "Scheduling refreshShutdown on domain %d in %ds.",
-                        self.domid, timeout)
-                    scheduler.later(timeout, self.refreshShutdown)
+                # Domain is alive.  If we are shutting it down, then check
+                # the timeout on that, and destroy it if necessary.
+
+                sst = self.readDom('xend/shutdown_start_time')
+                if sst:
+                    sst = float(sst)
+                    timeout = SHUTDOWN_TIMEOUT - time.time() + sst
+                    if timeout < 0:
+                        log.info(
+                            "Domain shutdown timeout expired: name=%s id=%s",
+                            self.info['name'], self.domid)
+                        self.destroy()
+                    else:
+                        log.debug(
+                            "Scheduling refreshShutdown on domain %d in %ds.",
+                            self.domid, timeout)
+                        scheduler.later(timeout, self.refreshShutdown)
+        finally:
+            self.refresh_shutdown_lock.release()
+
+        if restart_reason:
+            self.maybeRestart(restart_reason)
 
 
     def shutdown(self, reason):
         if not reason in shutdown_reasons.values():
             raise XendError('invalid reason:' + reason)
-        self.storeVm("control/shutdown", reason)
+        self.storeDom("control/shutdown", reason)
         if not reason == 'suspend':
-            self.storeVm('xend/shutdown_start_time', time.time())
+            self.storeDom('xend/shutdown_start_time', time.time())
 
 
     def clearRestart(self):
-        self.removeVm("xend/shutdown_start_time")
+        self.removeDom("xend/shutdown_start_time")
 
 
     def maybeRestart(self, reason):
@@ -647,12 +705,10 @@
         """Close the given channel, if set, and remove the given entry in the
         store.  Nothrow guarantee."""
         
+        if channel:
+            channel.close()
         try:
-            try:
-                if channel:
-                    channel.close()
-            finally:
-                self.removeDom(entry)
+            self.removeDom(entry)
         except Exception, exn:
             log.exception(exn)
         
@@ -752,6 +808,10 @@
 
 
     ## private:
+
+    def getDeviceConfigurations(self, deviceClass):
+        return self.getDeviceController(deviceClass).configurations()
+
 
     def getDeviceController(self, name):
         if name not in controllerClasses:
@@ -864,9 +924,8 @@
 
 
     def construct(self):
-        """Construct the vm instance from its configuration.
-
-        @param config: configuration
+        """Construct the domain.
+
         @raise: VmError on error
         """
 
@@ -881,17 +940,18 @@
                           self.info['name'])
 
         try:
+            self.dompath = DOMROOT + str(self.domid)
+
             self.initDomain()
             self.construct_image()
             self.configure()
-            self.exportToDB()
-        except Exception, ex:
-            # Catch errors, cleanup and re-raise.
-            print 'Domain construction error:', ex
-            import traceback
-            traceback.print_exc()
+            self.storeVmDetails()
+            self.storeDomDetails()
+        except Exception:
+            log.exception('Domain construction failed')
             self.destroy()
-            raise
+            raise VmError('Creating domain failed: name=%s' %
+                          self.info['name'])
 
 
     def initDomain(self):
@@ -926,38 +986,29 @@
                   self.domid, self.info['name'], self.info['memory_KiB'])
 
 
-    def configure_vcpus(self, vcpus):
+    def configure_vcpus(self):
         d = {}
-        for v in range(0, vcpus):
+        for v in range(0, self.info['vcpus']):
             d["cpu/%d/availability" % v] = "online"
         self.writeVm(d)
 
+
     def construct_image(self):
         """Construct the boot image for the domain.
         """
         self.create_channel()
         self.image.createImage()
-        self.exportToDB()
-        if self.store_channel and self.store_mfn >= 0:
-            IntroduceDomain(self.domid, self.store_mfn,
-                            self.store_channel.port1, self.path)
-        # get the configured value of vcpus and update store
-        self.configure_vcpus(self.info['vcpus'])
+#        !!! self.exportToDB()
+        IntroduceDomain(self.domid, self.store_mfn,
+                        self.store_channel.port1, self.dompath)
+        self.configure_vcpus()
 
 
     ## public:
 
-    def delete(self):
-        """Delete the vm's db.
-        """
-        try:
-            xstransact.Remove(self.path, 'domid')
-        except Exception, ex:
-            log.warning("error in domain db delete: %s", ex)
-
-
-    def cleanup(self):
-        """Cleanup vm resources: release devices.  Nothrow guarantee."""
+    def cleanupDomain(self):
+        """Cleanup domain resources; release devices.  Idempotent.  Nothrow
+        guarantee."""
 
         self.state_set(STATE_VM_TERMINATED)
         self.release_devices()
@@ -972,24 +1023,29 @@
                     "XendDomainInfo.cleanup: image.destroy() failed.")
             self.image = None
 
-
-    def destroy(self):
-        """Cleanup vm and destroy domain.  Nothrow guarantee."""
-
-        log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
-
-        self.cleanup()
+        try:
+            self.removeDom()
+        except Exception:
+            log.exception("Removing domain path failed.")
+
+
+    def cleanupVm(self):
+        """Cleanup VM resources.  Idempotent.  Nothrow guarantee."""
 
         try:
             self.removeVm()
         except Exception:
             log.exception("Removing VM path failed.")
 
-        try:
-            self.removeDom()
-        except Exception:
-            log.exception("Removing domain path failed.")
-
+
+    def destroy(self):
+        """Cleanup VM and destroy domain.  Nothrow guarantee."""
+
+        log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
+
+        self.cleanupDomain()
+        self.cleanupVm()
+        
         try:
             if self.domid is not None:
                 xc.domain_destroy(dom=self.domid)
@@ -1002,11 +1058,12 @@
         """
         return self.state == STATE_VM_TERMINATED
 
+
     def release_devices(self):
-        """Release all vm devices.  Nothrow guarantee."""
+        """Release all domain's devices.  Nothrow guarantee."""
 
         while True:
-            t = xstransact("%s/device" % self.path)
+            t = xstransact("%s/device" % self.dompath)
             for n in controllerClasses.keys():
                 for d in t.list(n):
                     try:
@@ -1020,6 +1077,7 @@
             if t.commit():
                 break
 
+
     def eventChannel(self, path=None):
         """Create an event channel to the domain.
         
@@ -1030,9 +1088,29 @@
             try:
                 port = int(self.readDom(path))
             except:
-                # if anything goes wrong, assume the port was not yet set
+                # The port is not yet set, i.e. the channel has not yet been
+                # created.
                 pass
         ret = channel.eventChannel(0, self.domid, port1=port, port2=0)
+
+        # Stale port information from above causes an Invalid Argument to be
+        # thrown by the eventChannel call below.  To recover, we throw away
+        # port if it turns out to be bad, and just create a new channel.
+        # If creating a new channel with two new ports fails, then something
+        # else is going wrong, so we bail.
+        while True:
+            try:
+                ret = channel.eventChannel(0, self.domid, port1 = port,
+                                           port2 = 0)
+                break
+            except:
+                log.exception("Exception in eventChannel(0, %d, %d, %d)",
+                              self.domid, port, 0)
+                if port == 0:
+                    raise
+                else:
+                    port = 0
+                    log.error("Recovering from above exception.")
         self.storeDom(path, ret.port1)
         return ret
         
@@ -1113,9 +1191,10 @@
         """Restart the domain after it has exited. """
 
         #            self.restart_check()
-        self.cleanup()
 
         config = self.sxpr()
+
+        self.cleanupDomain()
 
         if self.readVm('xend/restart_in_progress'):
             log.error('Xend failed during restart of domain %d.  '
@@ -1188,26 +1267,23 @@
         self.storeVm("cpu/%d/availability" % vcpu, availability)
 
     def send_sysrq(self, key=0):
-        self.storeVm("control/sysrq", '%c' % key)
-
-    def dom0_init_store(self):
-        if not self.store_channel:
-            self.store_channel = self.eventChannel("store/port")
-            if not self.store_channel:
-                return
+        self.storeDom("control/sysrq", '%c' % key)
+
+
+    def initStoreConnection(self):
         ref = xc.init_store(self.store_channel.port2)
         if ref and ref >= 0:
             self.setStoreRef(ref)
             try:
                 IntroduceDomain(self.domid, ref, self.store_channel.port1,
-                                self.path)
+                                self.dompath)
             except RuntimeError, ex:
                 if ex.args[0] == errno.EISCONN:
                     pass
                 else:
                     raise
-            # get run-time value of vcpus and update store
-            self.configure_vcpus(dom_get(self.domid)['vcpus'])
+        self.configure_vcpus()
+
 
     def dom0_enforce_vcpus(self):
         dom = 0
diff -r a39510ad5c59 -r 4a2c162d3e7c 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Fri Sep 30 10:55:49 2005
+++ b/tools/python/xen/xend/server/DevController.py     Fri Sep 30 12:41:10 2005
@@ -219,7 +219,7 @@
     def backendPath(self, backdom, devid):
         """@param backdom [XendDomainInfo] The backend domain info."""
 
-        return "%s/backend/%s/%s/%d" % (backdom.getPath(),
+        return "%s/backend/%s/%s/%d" % (backdom.getDomainPath(),
                                         self.deviceClass,
                                         self.vm.getUuid(), devid)
 
@@ -229,9 +229,9 @@
 
 
     def frontendRoot(self):
-        return "%s/device/%s" % (self.vm.getPath(), self.deviceClass)
+        return "%s/device/%s" % (self.vm.getDomainPath(), self.deviceClass)
 
 
     def frontendMiscPath(self):
-        return "%s/device-misc/%s" % (self.vm.getPath(),
+        return "%s/device-misc/%s" % (self.vm.getDomainPath(),
                                       self.deviceClass)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.