[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] xend: Fix and improve error handling for failed suspend/migrate
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1210253611 -3600 # Node ID c99a88623eda83d8e02f4b6d7c32bc4c6d298d8a # Parent 8bd776540ab319d73b8e55656ad2c342c178a5b1 xend: Fix and improve error handling for failed suspend/migrate This has been broken since cset 16964:5d84464dc1fc Also deal better with very early errors (close sender side socket) Signed-off-by: Steven Hand <steven.hand@xxxxxxxxxxxx> --- tools/python/xen/xend/XendCheckpoint.py | 14 ++------------ tools/python/xen/xend/XendDomain.py | 6 ++++-- tools/python/xen/xend/XendDomainInfo.py | 13 ++++++++++++- 3 files changed, 18 insertions(+), 15 deletions(-) diff -r 8bd776540ab3 -r c99a88623eda tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Thu May 08 14:32:11 2008 +0100 +++ b/tools/python/xen/xend/XendCheckpoint.py Thu May 08 14:33:31 2008 +0100 @@ -81,8 +81,6 @@ def save(fd, dominfo, network, live, dst # thing is useful for debugging. dominfo.setName('migrating-' + domain_name) - done_suspend = 0 - try: dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name) @@ -110,7 +108,6 @@ def save(fd, dominfo, network, live, dst log.debug("Suspending %d ...", dominfo.getDomid()) dominfo.shutdown('suspend') dominfo.waitForShutdown() - done_suspend = 1 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2, domain_name) log.info("Domain %d suspended.", dominfo.getDomid()) @@ -154,16 +151,9 @@ def save(fd, dominfo, network, live, dst pass except Exception, exn: - log.exception("Save failed on domain %s (%s).", domain_name, + log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) - - # If we didn't get as far as suspending the domain (for - # example, we couldn't balloon enough memory for the new - # domain), then we don't want to re-plumb the devices, as the - # domU will not be expecting it. - if done_suspend: - log.debug("XendCheckpoint.save: resumeDomain") - dominfo.resumeDomain() + dominfo.resumeDomain() try: dominfo.setName(domain_name) diff -r 8bd776540ab3 -r c99a88623eda tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Thu May 08 14:32:11 2008 +0100 +++ b/tools/python/xen/xend/XendDomain.py Thu May 08 14:33:31 2008 +0100 @@ -1308,8 +1308,10 @@ class XendDomain: sock.send("receive\n") sock.recv(80) - XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, node=node) - sock.close() + try: + XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, node=node) + finally: + sock.close() def domain_save(self, domid, dst, checkpoint=False): """Start saving a domain to file. diff -r 8bd776540ab3 -r c99a88623eda tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu May 08 14:32:11 2008 +0100 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu May 08 14:33:31 2008 +0100 @@ -2378,8 +2378,19 @@ class XendDomainInfo: def resumeDomain(self): log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid)) - if self.domid is None: + # resume a suspended domain (e.g. after live checkpoint, or after + # a later error during save or migate); checks that the domain + # is currently suspended first so safe to call from anywhere + + xeninfo = dom_get(self.domid) + if xeninfo is None: return + if not xeninfo['shutdown']: + return + reason = shutdown_reason(xeninfo['shutdown_reason']) + if reason != 'suspend': + return + try: # could also fetch a parsed note from xenstore fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |