Domain core-dumping fixes
authorKeir Fraser <keir.fraser@citrix.com>
Thu, 12 Mar 2009 11:09:57 +0000 (11:09 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Thu, 12 Mar 2009 11:09:57 +0000 (11:09 +0000)
The code was attempting to use the domain's current number of pages
(info.nr_pages) as a maximum index.  We then walk the memory map and
can easily over-write past the end of the nr_pages-sized array, if the
domain has more pages mapped in than earlier (live dump).  Restrict
ourselves to the current number of pages.

Also fix the dump core method in xend to actually implement the crash
and live options.  In particular this means that xend clients other
than xm now get non-live dumps by default.

Signed-off-by: John Levon <john.levon@sun.com>
tools/libxc/xc_core.c
tools/libxc/xenctrl.h
tools/python/xen/xend/XendDomain.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xm/main.py

index a7fd1647f3881d082773f6405ffb4c6e9497152a..ef1e8dd7a1bde8a4c84b3c5558ccddbc04da4d7b 100644 (file)
@@ -518,7 +518,17 @@ xc_domain_dumpcore_via_callback(int xc_handle,
     if ( sts != 0 )
         goto out;
 
+    /*
+     * Note: this is the *current* number of pages and may change under
+     * a live dump-core.  We'll just take this value, and if more pages
+     * exist, we'll skip them.  If there's less, then we'll just not use
+     * all the array...
+     *
+     * We don't want to use the total potential size of the memory map
+     * since that is usually much higher than info.nr_pages.
+     */
     nr_pages = info.nr_pages;
+
     if ( !auto_translated_physmap )
     {
         /* obtain p2m table */
index 93fbd8be679910366486cb2ef47ccc9f0fbc7a8a..75995b1a922ec278b0674afd0ff819491bd5e0a6 100644 (file)
@@ -158,7 +158,7 @@ typedef struct xc_dominfo {
                   paused:1, blocked:1, running:1,
                   hvm:1, debugged:1;
     unsigned int  shutdown_reason; /* only meaningful if shutdown==1 */
-    unsigned long nr_pages;
+    unsigned long nr_pages; /* current number, not maximum */
     unsigned long shared_info_frame;
     uint64_t      cpu_time;
     unsigned long max_memkb;
index d1bfaa3f43e24d4761bc4872f240794a64366550..bba98f596b831f9bc0aecb1465d85933a46a638e 100644 (file)
@@ -1237,13 +1237,23 @@ class XendDomain:
                              POWER_STATE_NAMES[DOM_STATE_PAUSED],
                              POWER_STATE_NAMES[dominfo._stateGet()])
 
+        dopause = (not live and dominfo._stateGet() == DOM_STATE_RUNNING)
+        if dopause:
+            dominfo.pause()
+
         try:
-            log.info("Domain core dump requested for domain %s (%d) "
-                     "live=%d crash=%d.",
-                     dominfo.getName(), dominfo.getDomid(), live, crash)
-            return dominfo.dumpCore(filename)
-        except Exception, ex:
-            raise XendError(str(ex))
+            try:
+                log.info("Domain core dump requested for domain %s (%d) "
+                         "live=%d crash=%d.",
+                         dominfo.getName(), dominfo.getDomid(), live, crash)
+                dominfo.dumpCore(filename)
+                if crash:
+                    self.domain_destroy(domid)
+            except Exception, ex:
+                raise XendError(str(ex))
+        finally:
+            if dopause and not crash:
+                dominfo.unpause()
 
     def domain_destroy(self, domid):
         """Terminate domain immediately.
index 4ec5679fb8132c9d8c2fcfeefdfc971d45d63677..7b40d46de581b7c1fbdac1fd2651013fdbedeb5f 100644 (file)
@@ -2027,26 +2027,31 @@ class XendDomainInfo:
         @raise: XendError if core dumping failed.
         """
         
-        try:
-            if not corefile:
-                this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
-                corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
-                                  self.info['name_label'], self.domid)
+        if not corefile:
+            this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
+            corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
+                              self.info['name_label'], self.domid)
                 
-            if os.path.isdir(corefile):
-                raise XendError("Cannot dump core in a directory: %s" %
-                                corefile)
-            
-            self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
-            xc.domain_dumpcore(self.domid, corefile)
-            self._removeVm(DUMPCORE_IN_PROGRESS)
-        except RuntimeError, ex:
-            corefile_incomp = corefile+'-incomplete'
-            os.rename(corefile, corefile_incomp)
+        if os.path.isdir(corefile):
+            raise XendError("Cannot dump core in a directory: %s" %
+                            corefile)
+
+        try:
+            try:
+                self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
+                xc.domain_dumpcore(self.domid, corefile)
+            except RuntimeError, ex:
+                corefile_incomp = corefile+'-incomplete'
+                try:
+                    os.rename(corefile, corefile_incomp)
+                except:
+                    pass
+
+                log.error("core dump failed: id = %s name = %s: %s",
+                          self.domid, self.info['name_label'], str(ex))
+                raise XendError("Failed to dump core: %s" %  str(ex))
+        finally:
             self._removeVm(DUMPCORE_IN_PROGRESS)
-            log.exception("XendDomainInfo.dumpCore failed: id = %s name = %s",
-                          self.domid, self.info['name_label'])
-            raise XendError("Failed to dump core: %s" %  str(ex))
 
     #
     # Device creation/deletion functions
index 2ffa50a0fe653d5d724080649eb25216ffb10e43..c8bf32174cea1d58217b7dfc520f0c576efed77e 100644 (file)
@@ -1351,22 +1351,10 @@ def xm_dump_core(args):
     else:
         filename = None
 
-    if not live:
-        ds = server.xend.domain.pause(dom, True)
-
-    try:
-        print "Dumping core of domain: %s ..." % str(dom)
-        server.xend.domain.dump(dom, filename, live, crash)
-
-        if crash:
-            print "Destroying domain: %s ..." % str(dom)
-            server.xend.domain.destroy(dom)
-        elif reset:
-            print "Resetting domain: %s ..." % str(dom)
-            server.xend.domain.reset(dom)
-    finally:
-        if not live and not crash and not reset and ds == DOM_STATE_RUNNING:
-            server.xend.domain.unpause(dom)
+    print "Dumping core of domain: %s ..." % str(dom)
+    server.xend.domain.dump(dom, filename, live, crash)
+    if reset:
+        server.xend.domain.reset(dom)
 
 def xm_rename(args):
     arg_check(args, "rename", 2)