[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Fix HVM shutdown when xend is restarted.



# HG changeset patch
# User Ewan Mellor <ewan@xxxxxxxxxxxxx>
# Node ID cefb1f761f0b9894459d571618846141ec6f36e2
# Parent  dde9e37c0671cde4dd3508e434bb4322a200d85b
Fix HVM shutdown when xend is restarted.

Added a recreate call to ImageHandler, allowing the subclasses of that to
hook into the code that runs when xend restarts.  This allows us in particular
to reregister the watches for HVM shutdown, and read the PID of qemu-dm from
the store.

Signed-off-by: Ewan Mellor <ewan@xxxxxxxxxxxxx>
---
 tools/python/xen/xend/XendConstants.py  |    2 
 tools/python/xen/xend/XendDomain.py     |    1 
 tools/python/xen/xend/XendDomainInfo.py |   40 ++++++++++++-------
 tools/python/xen/xend/image.py          |   66 ++++++++++++++++++--------------
 4 files changed, 66 insertions(+), 43 deletions(-)

diff -r dde9e37c0671 -r cefb1f761f0b tools/python/xen/xend/XendConstants.py
--- a/tools/python/xen/xend/XendConstants.py    Thu Nov 30 18:05:19 2006 +0000
+++ b/tools/python/xen/xend/XendConstants.py    Thu Nov 30 18:08:34 2006 +0000
@@ -34,6 +34,8 @@ DOMAIN_SHUTDOWN_REASONS = {
     DOMAIN_CRASH   : "crash",
     DOMAIN_HALT    : "halt"
 }
+REVERSE_DOMAIN_SHUTDOWN_REASONS = \
+    dict([(y, x) for x, y in DOMAIN_SHUTDOWN_REASONS.items()])
 
 restart_modes = [
     "restart",
diff -r dde9e37c0671 -r cefb1f761f0b tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Nov 30 18:05:19 2006 +0000
+++ b/tools/python/xen/xend/XendDomain.py       Thu Nov 30 18:08:34 2006 +0000
@@ -421,7 +421,6 @@ class XendDomain:
                 self._remove_domain(dom, domid)
 
 
-
     def _add_domain(self, info):
         """Add a domain to the list of running domains
         
diff -r dde9e37c0671 -r cefb1f761f0b tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Nov 30 18:05:19 2006 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Nov 30 18:08:34 2006 +0000
@@ -226,6 +226,15 @@ def recreate(info, priv):
         vm._storeVmDetails()
         vm._storeDomDetails()
         
+    if vm.info['image']: # Only dom0 should be without an image entry when
+                         # recreating, but we cope with missing ones
+                         # elsewhere just in case.
+        vm.image = image.create(vm,
+                                vm.info,
+                                vm.info['image'],
+                                vm.info['devices'])
+        vm.image.recreate()
+
     vm._registerWatches()
     vm.refreshShutdown(xeninfo)
     return vm
@@ -470,7 +479,7 @@ class XendDomainInfo:
         
         if reason not in DOMAIN_SHUTDOWN_REASONS.values():
             raise XendError('Invalid reason: %s' % reason)
-        self._storeDom("control/shutdown", reason)
+        self.storeDom("control/shutdown", reason)
                 
     def pause(self):
         """Pause domain
@@ -497,7 +506,7 @@ class XendDomainInfo:
     def send_sysrq(self, key):
         """ Send a Sysrq equivalent key via xenstored."""
         asserts.isCharConvertible(key)
-        self._storeDom("control/sysrq", '%c' % key)
+        self.storeDom("control/sysrq", '%c' % key)
 
     def device_create(self, dev_config):
         """Create a new device.
@@ -581,7 +590,7 @@ class XendDomainInfo:
         
         self.info['memory_static_min'] = target
         self.storeVm("memory", target)
-        self._storeDom("memory/target", target << 10)
+        self.storeDom("memory/target", target << 10)
 
     def getVCPUInfo(self):
         try:
@@ -648,7 +657,7 @@ class XendDomainInfo:
         for devclass in XendDevices.valid_devices():
             devconfig = self.getDeviceController(devclass).configurations()
             if devconfig:
-                devices.extend(map(lambda conf: (devclass, conf), devconfig))
+                devices.extend(devconfig)
 
         if not self.info['devices'] and devices is not None:
             for device in devices:
@@ -677,8 +686,11 @@ class XendDomainInfo:
     # Function to update xenstore /dom/*
     #
 
-    def _readDom(self, *args):
+    def readDom(self, *args):
         return xstransact.Read(self.dompath, *args)
+
+    def gatherDom(self, *args):
+        return xstransact.Gather(self.dompath, *args)
 
     def _writeDom(self, *args):
         return xstransact.Write(self.dompath, *args)
@@ -686,7 +698,7 @@ class XendDomainInfo:
     def _removeDom(self, *args):
         return xstransact.Remove(self.dompath, *args)
 
-    def _storeDom(self, *args):
+    def storeDom(self, *args):
         return xstransact.Store(self.dompath, *args)
 
     def _recreateDom(self):
@@ -787,17 +799,17 @@ class XendDomainInfo:
     def _handleShutdownWatch(self, _):
         log.debug('XendDomainInfo.handleShutdownWatch')
         
-        reason = self._readDom('control/shutdown')
+        reason = self.readDom('control/shutdown')
 
         if reason and reason != 'suspend':
-            sst = self._readDom('xend/shutdown_start_time')
+            sst = self.readDom('xend/shutdown_start_time')
             now = time.time()
             if sst:
                 self.shutdownStartTime = float(sst)
                 timeout = float(sst) + SHUTDOWN_TIMEOUT - now
             else:
                 self.shutdownStartTime = now
-                self._storeDom('xend/shutdown_start_time', now)
+                self.storeDom('xend/shutdown_start_time', now)
                 timeout = SHUTDOWN_TIMEOUT
 
             log.trace(
@@ -828,7 +840,7 @@ class XendDomainInfo:
         return self.dompath
 
     def getShutdownReason(self):
-        return self._readDom('control/shutdown')
+        return self.readDom('control/shutdown')
 
     def getStorePort(self):
         """For use only by image.py and XendCheckpoint.py."""
@@ -914,7 +926,7 @@ class XendDomainInfo:
                 return
 
             elif xeninfo['crashed']:
-                if self._readDom('xend/shutdown_completed'):
+                if self.readDom('xend/shutdown_completed'):
                     # We've seen this shutdown already, but we are preserving
                     # the domain for debugging.  Leave it alone.
                     return
@@ -930,7 +942,7 @@ class XendDomainInfo:
 
             elif xeninfo['shutdown']:
                 self._stateSet(DOM_STATE_SHUTDOWN)
-                if self._readDom('xend/shutdown_completed'):
+                if self.readDom('xend/shutdown_completed'):
                     # We've seen this shutdown already, but we are preserving
                     # the domain for debugging.  Leave it alone.
                     return
@@ -1111,7 +1123,7 @@ class XendDomainInfo:
         log.info("Preserving dead domain %s (%d).", self.info['name_label'],
                  self.domid)
         self._unwatchVm()
-        self._storeDom('xend/shutdown_completed', 'True')
+        self.storeDom('xend/shutdown_completed', 'True')
         self._stateSet(DOM_STATE_HALTED)
 
     #
@@ -1724,7 +1736,7 @@ class XendDomainInfo:
                                    ignore_devices = ignore_store)
 
         if not ignore_store and self.dompath:
-            vnc_port = self._readDom('console/vnc-port')
+            vnc_port = self.readDom('console/vnc-port')
             if vnc_port is not None:
                 result.append(['device',
                                ['console', ['vnc-port', str(vnc_port)]]])
diff -r dde9e37c0671 -r cefb1f761f0b tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Nov 30 18:05:19 2006 +0000
+++ b/tools/python/xen/xend/image.py    Thu Nov 30 18:08:34 2006 +0000
@@ -23,6 +23,7 @@ import signal
 import signal
 
 import xen.lowlevel.xc
+from xen.xend.XendConstants import REVERSE_DOMAIN_SHUTDOWN_REASONS
 from xen.xend.XendError import VmError, XendError
 from xen.xend.XendLogging import log
 from xen.xend.server.netif import randomMAC
@@ -165,6 +166,10 @@ class ImageHandler:
         pass
 
 
+    def recreate(self):
+        pass
+
+
 class LinuxImageHandler(ImageHandler):
 
     ostype = "linux"
@@ -232,9 +237,12 @@ class PPC_LinuxImageHandler(LinuxImageHa
 
 class HVMImageHandler(ImageHandler):
 
+    ostype = "hvm"
+
     def __init__(self, vm, vmConfig, imageConfig, deviceConfig):
         ImageHandler.__init__(self, vm, vmConfig, imageConfig, deviceConfig)
         self.shutdownWatch = None
+        self.rebootFeatureWatch = None
 
     def configure(self, vmConfig, imageConfig, deviceConfig):
         ImageHandler.configure(self, vmConfig, imageConfig, deviceConfig)
@@ -257,7 +265,7 @@ class HVMImageHandler(ImageHandler):
                         ("image/device-model", self.device_model),
                         ("image/display", self.display))
 
-        self.pid = 0
+        self.pid = None
 
         self.dmargs += self.configVNC(imageConfig)
 
@@ -417,20 +425,30 @@ class HVMImageHandler(ImageHandler):
         log.info("spawning device models: %s %s", self.device_model, args)
         # keep track of pid and spawned options to kill it later
         self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
+        self.vm.storeDom("image/device-model-pid", self.pid)
         log.info("device model pid: %d", self.pid)
+
+    def recreate(self):
+        self.register_shutdown_watch()
+        self.register_reboot_feature_watch()
+        self.pid = self.vm.gatherDom(('image/device-model-pid', int))
 
     def destroy(self):
         self.unregister_shutdown_watch()
         self.unregister_reboot_feature_watch();
-        if not self.pid:
-            return
-        try:
-            os.kill(self.pid, signal.SIGKILL)
-            os.waitpid(self.pid, 0)
-        except OSError, e:
-            log.warning("Unable to kill device model (pid: %d)" % self.pid)
-            
-        self.pid = 0
+        if self.pid:
+            try:
+                os.kill(self.pid, signal.SIGKILL)
+            except OSError, exn:
+                log.exception(exn)
+            try:
+                os.waitpid(self.pid, 0)
+            except OSError, exn:
+                # This is expected if Xend has been restarted within the
+                # life of this domain.  In this case, we can kill the process,
+                # but we can't wait for it because it's not our child.
+                pass
+            self.pid = None
 
     def register_shutdown_watch(self):
         """ add xen store watch on control/shutdown """
@@ -454,23 +472,22 @@ class HVMImageHandler(ImageHandler):
         """ watch call back on node control/shutdown,
             if node changed, this function will be called
         """
-        from xen.xend.XendConstants import DOMAIN_SHUTDOWN_REASONS
         xd = xen.xend.XendDomain.instance()
         try:
             vm = xd.domain_lookup( self.vm.getDomid() )
         except XendError:
             # domain isn't registered, no need to clean it up.
-            return
+            return False
 
         reason = vm.getShutdownReason()
         log.debug("hvm_shutdown fired, shutdown reason=%s", reason)
-        for x in DOMAIN_SHUTDOWN_REASONS.keys():
-            if DOMAIN_SHUTDOWN_REASONS[x] == reason:
-                vm.info['shutdown'] = 1
-                vm.info['shutdown_reason'] = x
-                vm.refreshShutdown(vm.info)
-
-        return 1 # Keep watching
+        if reason in REVERSE_DOMAIN_SHUTDOWN_REASONS:
+            vm.info['shutdown'] = 1
+            vm.info['shutdown_reason'] = \
+                REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
+            vm.refreshShutdown(vm.info)
+
+        return True # Keep watching
 
     def register_reboot_feature_watch(self):
         """ add xen store watch on control/feature-reboot """
@@ -494,20 +511,15 @@ class HVMImageHandler(ImageHandler):
         """ watch call back on node control/feature-reboot,
             if node changed, this function will be called
         """
-        xd = xen.xend.XendDomain.instance()
-        vm = xd.domain_lookup( self.vm.getDomid() )
-
-        status = vm.readDom('control/feature-reboot')
+        status = self.vm.readDom('control/feature-reboot')
         log.debug("hvm_reboot_feature fired, module status=%s", status)
         if status == '1':
             self.unregister_shutdown_watch()
 
-        return 1 # Keep watching
+        return True # Keep watching
 
 
 class IA64_HVM_ImageHandler(HVMImageHandler):
-
-    ostype = "hvm"
 
     def getRequiredAvailableMemory(self, mem_kb):
         page_kb = 16
@@ -520,8 +532,6 @@ class IA64_HVM_ImageHandler(HVMImageHand
         return 0
 
 class X86_HVM_ImageHandler(HVMImageHandler):
-
-    ostype = "hvm"
 
     def getRequiredAvailableMemory(self, mem_kb):
         # Add 8 MiB overhead for QEMU's video RAM.

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.