[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-3.2-testing] xend: Fix and improve error handling for failed suspend/migrate
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1210688219 -3600 # Node ID fd285b18158e8bc355ac036cf9d305d06bbfbce3 # Parent 9bb37012b63e58890d4b8eab956dc2722ddb433d xend: Fix and improve error handling for failed suspend/migrate This has been broken since cset 16964:5d84464dc1fc Also deal better with very early errors (close sender side socket) Signed-off-by: Steven Hand <steven.hand@xxxxxxxxxxxx> xen-unstable changeset: 17601:c99a88623eda83d8e02f4b6d7c32bc4c6d298d8a xen-unstable date: Thu May 08 14:33:31 2008 +0100 --- tools/python/xen/xend/XendCheckpoint.py | 14 ++------------ tools/python/xen/xend/XendDomain.py | 6 ++++-- tools/python/xen/xend/XendDomainInfo.py | 13 ++++++++++++- 3 files changed, 18 insertions(+), 15 deletions(-) diff -r 9bb37012b63e -r fd285b18158e tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Tue May 13 15:15:02 2008 +0100 +++ b/tools/python/xen/xend/XendCheckpoint.py Tue May 13 15:16:59 2008 +0100 @@ -67,8 +67,6 @@ def save(fd, dominfo, network, live, dst # thing is useful for debugging. dominfo.setName('migrating-' + domain_name) - done_suspend = 0 - try: dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name) @@ -96,7 +94,6 @@ def save(fd, dominfo, network, live, dst log.debug("Suspending %d ...", dominfo.getDomid()) dominfo.shutdown('suspend') dominfo.waitForShutdown() - done_suspend = 1 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2, domain_name) log.info("Domain %d suspended.", dominfo.getDomid()) @@ -140,16 +137,9 @@ def save(fd, dominfo, network, live, dst pass except Exception, exn: - log.exception("Save failed on domain %s (%s).", domain_name, + log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) - - # If we didn't get as far as suspending the domain (for - # example, we couldn't balloon enough memory for the new - # domain), then we don't want to re-plumb the devices, as the - # domU will not be expecting it. - if done_suspend: - log.debug("XendCheckpoint.save: resumeDomain") - dominfo.resumeDomain() + dominfo.resumeDomain() try: dominfo.setName(domain_name) diff -r 9bb37012b63e -r fd285b18158e tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Tue May 13 15:15:02 2008 +0100 +++ b/tools/python/xen/xend/XendDomain.py Tue May 13 15:16:59 2008 +0100 @@ -1296,8 +1296,10 @@ class XendDomain: sock.send("receive\n") sock.recv(80) - XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst) - sock.close() + try: + XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst) + finally: + sock.close() def domain_save(self, domid, dst, checkpoint=False): """Start saving a domain to file. diff -r 9bb37012b63e -r fd285b18158e tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue May 13 15:15:02 2008 +0100 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue May 13 15:16:59 2008 +0100 @@ -1935,8 +1935,19 @@ class XendDomainInfo: def resumeDomain(self): log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid)) - if self.domid is None: + # resume a suspended domain (e.g. after live checkpoint, or after + # a later error during save or migate); checks that the domain + # is currently suspended first so safe to call from anywhere + + xeninfo = dom_get(self.domid) + if xeninfo is None: return + if not xeninfo['shutdown']: + return + reason = shutdown_reason(xeninfo['shutdown_reason']) + if reason != 'suspend': + return + try: # could also fetch a parsed note from xenstore fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |