[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] Domain core-dumping fixes



Hi John,

Please don't reset a domain after unpaused the domain.
Could you move the reset into xend?
e.g.

            try:
                log.info("Domain core dump requested for domain %s (%d) "
                         "live=%d crash=%d.",
                         dominfo.getName(), dominfo.getDomid(), live, crash)
                dominfo.dumpCore(filename)
                if crash:
                    self.domain_destroy(domid)
+               elif reset:
+                   self.domain_reset(domid)

Best regards,
 Kan

Wed, 11 Mar 2009 18:45:49 -0700, John Levon wrote:

># HG changeset patch
># User John Levon <john.levon@xxxxxxx>
># Date 1236822336 25200
># Node ID 88b3a560b0fb2a5adca969d9b192220d64dfd105
># Parent  e92a56f3581975496d5d9f250823e46493e58548
>Domain core-dumping fixes
>
>The code was attempting to use the domain's current number of pages
>(info.nr_pages) as a maximum index.  We then walk the memory map and can
>easily over-write past the end of the nr_pages-sized array, if the
>domain has more pages mapped in than earlier (live dump).  Restrict
>ourselves to the current number of pages.
>
>Also fix the dump core method in xend to actually implement the crash
>and live options.  In particular this means that xend clients other than
>xm now get non-live dumps by default.
>
>Signed-off-by: John Levon <john.levon@xxxxxxx>
>
>diff --git a/tools/libxc/xc_core.c b/tools/libxc/xc_core.c
>--- a/tools/libxc/xc_core.c
>+++ b/tools/libxc/xc_core.c
>@@ -518,7 +518,17 @@ xc_domain_dumpcore_via_callback(int xc_h
>     if ( sts != 0 )
>         goto out;
> 
>+    /*
>+     * Note: this is the *current* number of pages and may change under
>+     * a live dump-core.  We'll just take this value, and if more pages
>+     * exist, we'll skip them.  If there's less, then we'll just not use
>+     * all the array...
>+     *
>+     * We don't want to use the total potential size of the memory map
>+     * since that is usually much higher than info.nr_pages.
>+     */
>     nr_pages = info.nr_pages;
>+
>     if ( !auto_translated_physmap )
>     {
>         /* obtain p2m table */
>@@ -770,7 +780,7 @@ xc_domain_dumpcore_via_callback(int xc_h
> 
>         pfn_start = memory_map[map_idx].addr >> PAGE_SHIFT;
>         pfn_end = pfn_start + (memory_map[map_idx].size >> PAGE_SHIFT);
>-        for ( i = pfn_start; i < pfn_end; i++ )
>+        for ( i = pfn_start; i < pfn_end && j < nr_pages; i++ )
>         {
>             uint64_t gmfn;
>             void *vaddr;
>diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
>--- a/tools/libxc/xenctrl.h
>+++ b/tools/libxc/xenctrl.h
>@@ -158,7 +158,7 @@ typedef struct xc_dominfo {
>                   paused:1, blocked:1, running:1,
>                   hvm:1, debugged:1;
>     unsigned int  shutdown_reason; /* only meaningful if shutdown==1 */
>-    unsigned long nr_pages;
>+    unsigned long nr_pages; /* current number, not maximum */
>     unsigned long shared_info_frame;
>     uint64_t      cpu_time;
>     unsigned long max_memkb;
>diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/
>XendDomain.py
>--- a/tools/python/xen/xend/XendDomain.py
>+++ b/tools/python/xen/xend/XendDomain.py
>@@ -1252,13 +1252,23 @@ class XendDomain:
>                              POWER_STATE_NAMES[DOM_STATE_PAUSED],
>                              POWER_STATE_NAMES[dominfo._stateGet()])
> 
>-        try:
>-            log.info("Domain core dump requested for domain %s (%d) "
>-                     "live=%d crash=%d.",
>-                     dominfo.getName(), dominfo.getDomid(), live, crash)
>-            return dominfo.dumpCore(filename)
>-        except Exception, ex:
>-            raise XendError(str(ex))
>+        dopause = (not live and dominfo._stateGet() == DOM_STATE_RUNNING)
>+        if dopause:
>+            dominfo.pause()
>+
>+        try:
>+            try:
>+                log.info("Domain core dump requested for domain %s (%d) "
>+                         "live=%d crash=%d.",
>+                         dominfo.getName(), dominfo.getDomid(), live, crash)
>+                dominfo.dumpCore(filename)
>+                if crash:
>+                    self.domain_destroy(domid)
>+            except Exception, ex:
>+                raise XendError(str(ex))
>+        finally:
>+            if dopause and not crash:
>+                dominfo.unpause()
> 
>     def domain_destroy(self, domid):
>         """Terminate domain immediately.
>diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/
>xend/XendDomainInfo.py
>--- a/tools/python/xen/xend/XendDomainInfo.py
>+++ b/tools/python/xen/xend/XendDomainInfo.py
>@@ -2036,26 +2036,31 @@ class XendDomainInfo:
>         @raise: XendError if core dumping failed.
>         """
>         
>+        if not corefile:
>+            this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
>+            corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
>+                              self.info['name_label'], self.domid)
>+                
>+        if os.path.isdir(corefile):
>+            raise XendError("Cannot dump core in a directory: %s" %
>+                            corefile)
>+
>         try:
>-            if not corefile:
>-                this_time = time.strftime("%Y-%m%d-%H%M.%S", time.
>localtime())
>-                corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
>-                                  self.info['name_label'], self.domid)
>-                
>-            if os.path.isdir(corefile):
>-                raise XendError("Cannot dump core in a directory: %s" %
>-                                corefile)
>-            
>-            self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
>-            xc.domain_dumpcore(self.domid, corefile)
>+            try:
>+                self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
>+                xc.domain_dumpcore(self.domid, corefile)
>+            except RuntimeError, ex:
>+                corefile_incomp = corefile+'-incomplete'
>+                try:
>+                    os.rename(corefile, corefile_incomp)
>+                except:
>+                    pass
>+
>+                log.error("core dump failed: id = %s name = %s: %s",
>+                          self.domid, self.info['name_label'], str(ex))
>+                raise XendError("Failed to dump core: %s" %  str(ex))
>+        finally:
>             self._removeVm(DUMPCORE_IN_PROGRESS)
>-        except RuntimeError, ex:
>-            corefile_incomp = corefile+'-incomplete'
>-            os.rename(corefile, corefile_incomp)
>-            self._removeVm(DUMPCORE_IN_PROGRESS)
>-            log.exception("XendDomainInfo.dumpCore failed: id = %s name = 
>%s",
>-                          self.domid, self.info['name_label'])
>-            raise XendError("Failed to dump core: %s" %  str(ex))
> 
>     #
>     # Device creation/deletion functions
>diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
>--- a/tools/python/xen/xm/main.py
>+++ b/tools/python/xen/xm/main.py
>@@ -1351,22 +1351,10 @@ def xm_dump_core(args):
>     else:
>         filename = None
> 
>-    if not live:
>-        ds = server.xend.domain.pause(dom, True)
>-
>-    try:
>-        print "Dumping core of domain: %s ..." % str(dom)
>-        server.xend.domain.dump(dom, filename, live, crash)
>-
>-        if crash:
>-            print "Destroying domain: %s ..." % str(dom)
>-            server.xend.domain.destroy(dom)
>-        elif reset:
>-            print "Resetting domain: %s ..." % str(dom)
>-            server.xend.domain.reset(dom)
>-    finally:
>-        if not live and not crash and not reset and ds == DOM_STATE_RUNNING:
>-            server.xend.domain.unpause(dom)
>+    print "Dumping core of domain: %s ..." % str(dom)
>+    server.xend.domain.dump(dom, filename, live, crash)
>+    if reset:
>+        server.xend.domain.reset(dom)
> 
> def xm_rename(args):
>     arg_check(args, "rename", 2)
>
>_______________________________________________
>Xen-devel mailing list
>Xen-devel@xxxxxxxxxxxxxxxxxxx
>http://lists.xensource.com/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.