[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH for-4.5 v2] libxc: don't leak buffer containing the uncompressed PV kernel



On 11/20/2014 03:21 PM, Konrad Rzeszutek Wilk wrote:
On Thu, Nov 20, 2014 at 03:48:47PM +0000, Ian Campbell wrote:
The libxc xc_dom_* infrastructure uses a very simple malloc memory pool which
is freed by xc_dom_release. However the various xc_try_*_decode routines (other
than the gzip one) just use plain malloc/realloc and therefore the buffer ends
up leaked.

The memory pool currently supports mmap'd buffers as well as a directly
allocated buffers, however the try decode routines make use of realloc and do
not fit well into this model. Introduce a concept of an external memory block
to the memory pool and provide an interface to register such memory.

The mmap_ptr and mmap_len fields of the memblock tracking struct lose their
mmap_ prefix since they are now also used for external memory blocks.

We are only seeing this now because the gzip decoder doesn't leak and it's only
relatively recently that kernels in the wild have switched to better
compression.

This is https://bugs.debian.org/767295

Reported by: Gedalya <gedalya@xxxxxxxxxxx>
Gedelya,

Could you also test this patch to make sure it does fix the
reported issue please?

So here's what happens now.
1. Starts up tiny
2. reboot: leak
3. reboot: freed (process larger, but the delta is all/mostly shared pages)
4. reboot: leak
5. reboot: freed
etc..

root@xen:~/xen-pkgs# xl cr /etc/xen/auto/asterisk_deb80.cfg
Parsing config from /etc/xen/auto/asterisk_deb80.cfg
root@xen:~/xen-pkgs# ps aux | grep asterisk_deb80
root 22981 0.0 0.0 95968 588 ? SLsl 21:55 0:00 /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
root@xen:~/xen-pkgs# pmap -x 22981
22981:   /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
Address           Kbytes     RSS   Dirty Mode  Mapping
0000000000400000     144     128       0 r-x-- xl
0000000000623000       4       4       4 r---- xl
0000000000624000       8       8       8 rw--- xl
0000000000626000       4       4       4 rw---   [ anon ]
00000000009a6000     288     240     240 rw---   [ anon ]
00007f14d4000000     132       8       8 rw---   [ anon ]
00007f14d4021000   65404       0       0 -----   [ anon ]
<< snip >>
---------------- ------- ------- -------
total kB           95968    2728     596

--- reboot domu ---

root@xen:~/xen-pkgs# ps aux | grep asterisk_deb80
root 22981 0.6 3.3 131652 20008 ? SLsl 21:55 0:00 /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
root@xen:~/xen-pkgs# pmap -x 22981
22981:   /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
Address           Kbytes     RSS   Dirty Mode  Mapping
0000000000400000     144     144       0 r-x-- xl
0000000000623000       4       4       4 r---- xl
0000000000624000       8       8       8 rw--- xl
0000000000626000       4       4       4 rw---   [ anon ]
00000000009a6000     288     288     288 rw---   [ anon ]
00000000009ee000   35676   16772   16772 rw---   [ anon ]
00007f14d4000000     132       8       8 rw---   [ anon ]
00007f14d4021000   65404       0       0 -----   [ anon ]
00007f14d9ae0000     104     100       0 r-x-- libz.so.1.2.8
<< snip >>
---------------- ------- ------- -------
total kB          131652   20072   17424

--- reboot domu ---

root@xen:~/xen-pkgs# ps aux | grep asterisk_deb80
root 22981 0.5 0.5 95876 3136 ? SLsl 21:55 0:01 /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
root@xen:~/xen-pkgs# pmap -x 22981
22981:   /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
Address           Kbytes     RSS   Dirty Mode  Mapping
0000000000400000     144     144       0 r-x-- xl
0000000000623000       4       4       4 r---- xl
0000000000624000       8       8       8 rw--- xl
0000000000626000       4       4       4 rw---   [ anon ]
00000000009a6000     188     188     188 rw---   [ anon ]
00007f14d4000000     132       8       8 rw---   [ anon ]
00007f14d4021000   65404       0       0 -----   [ anon ]
00007f14d9ae0000     104     100       0 r-x-- libz.so.1.2.8
<< snip >>
---------------- ------- ------- -------
total kB           95876    3200     552

--- reboot domu ---

root@xen:~/xen-pkgs# ps aux | grep asterisk_deb80
root 22981 0.6 3.4 134660 20548 ? SLsl 21:55 0:02 /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
root@xen:~/xen-pkgs# pmap -x 22981
22981:   /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
Address           Kbytes     RSS   Dirty Mode  Mapping
0000000000400000     144     144       0 r-x-- xl
0000000000623000       4       4       4 r---- xl
0000000000624000       8       8       8 rw--- xl
0000000000626000       4       4       4 rw---   [ anon ]
00000000009a6000     188     188     188 rw---   [ anon ]
00000000009d5000   38784   17348   17348 rw---   [ anon ]
00007f14d4000000     132       8       8 rw---   [ anon ]
00007f14d4021000   65404       0       0 -----   [ anon ]
00007f14d9ae0000     104     100       0 r-x-- libz.so.1.2.8
<< snip >>
---------------- ------- ------- -------
total kB          134660   20548   17900

--- reboot domu ---

root@xen:~/xen-pkgs# ps aux | grep asterisk_deb80
root 22981 0.6 0.5 95876 3200 ? SLsl 21:55 0:03 /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
root@xen:~/xen-pkgs# pmap -x 22981
22981:   /usr/lib/xen-4.4/bin/xl cr /etc/xen/auto/asterisk_deb80.cfg
Address           Kbytes     RSS   Dirty Mode  Mapping
0000000000400000     144     144       0 r-x-- xl
0000000000623000       4       4       4 r---- xl
0000000000624000       8       8       8 rw--- xl
0000000000626000       4       4       4 rw---   [ anon ]
00000000009a6000     188     188     188 rw---   [ anon ]
00007f14d4000000     132       8       8 rw---   [ anon ]
00007f14d4021000   65404       0       0 -----   [ anon ]
00007f14d9ae0000     104     100       0 r-x-- libz.so.1.2.8
<< snip >>
---------------- ------- ------- -------
total kB           95876    3200     552




Tried valgrind, it doesn't look like it was able to see what was going on

root@xen:~# valgrind --leak-check=full xl cr -F /etc/xen/auto/asterisk_deb80.cfg
==24967== Memcheck, a memory error detector
==24967== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==24967== Using Valgrind-3.10.0 and LibVEX; rerun with -h for copyright info
==24967== Command: /usr/sbin/xl cr -F /etc/xen/auto/asterisk_deb80.cfg
==24967==
==24971==
==24971== HEAP SUMMARY:
==24971==     in use at exit: 11,695 bytes in 41 blocks
==24971==   total heap usage: 75 allocs, 34 frees, 44,602 bytes allocated
==24971==
==24971== LEAK SUMMARY:
==24971==    definitely lost: 0 bytes in 0 blocks
==24971==    indirectly lost: 0 bytes in 0 blocks
==24971==      possibly lost: 0 bytes in 0 blocks
==24971==    still reachable: 11,695 bytes in 41 blocks
==24971==         suppressed: 0 bytes in 0 blocks
==24971== Reachable blocks (those to which a pointer was found) are not shown.
==24971== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==24971==
==24971== For counts of detected and suppressed errors, rerun with: -v
==24971== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
==24970==
==24970== HEAP SUMMARY:
==24970==     in use at exit: 10,743 bytes in 36 blocks
==24970==   total heap usage: 64 allocs, 28 frees, 35,289 bytes allocated
==24970==
==24970== LEAK SUMMARY:
==24970==    definitely lost: 0 bytes in 0 blocks
==24970==    indirectly lost: 0 bytes in 0 blocks
==24970==      possibly lost: 0 bytes in 0 blocks
==24970==    still reachable: 10,743 bytes in 36 blocks
==24970==         suppressed: 0 bytes in 0 blocks
==24970== Reachable blocks (those to which a pointer was found) are not shown.
==24970== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==24970==
==24970== For counts of detected and suppressed errors, rerun with: -v
==24970== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
==24975==
==24975== HEAP SUMMARY:
==24975==     in use at exit: 4,859 bytes in 43 blocks
==24975==   total heap usage: 92 allocs, 49 frees, 38,375 bytes allocated
==24975==
==24975== LEAK SUMMARY:
==24975==    definitely lost: 0 bytes in 0 blocks
==24975==    indirectly lost: 0 bytes in 0 blocks
==24975==      possibly lost: 0 bytes in 0 blocks
==24975==    still reachable: 4,859 bytes in 43 blocks
==24975==         suppressed: 0 bytes in 0 blocks
==24975== Reachable blocks (those to which a pointer was found) are not shown.
==24975== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==24975==
==24975== For counts of detected and suppressed errors, rerun with: -v
==24975== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
==24976==
==24976== HEAP SUMMARY:
==24976==     in use at exit: 13,066 bytes in 45 blocks
==24976==   total heap usage: 98 allocs, 53 frees, 48,704 bytes allocated
==24976==
==24976== LEAK SUMMARY:
==24976==    definitely lost: 0 bytes in 0 blocks
==24976==    indirectly lost: 0 bytes in 0 blocks
==24976==      possibly lost: 0 bytes in 0 blocks
==24976==    still reachable: 13,066 bytes in 45 blocks
==24976==         suppressed: 0 bytes in 0 blocks
==24976== Reachable blocks (those to which a pointer was found) are not shown.
==24976== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==24976==
==24976== For counts of detected and suppressed errors, rerun with: -v
==24976== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
==24969==
==24969== HEAP SUMMARY:
==24969==     in use at exit: 11,049 bytes in 42 blocks
==24969==   total heap usage: 92 allocs, 50 frees, 48,587 bytes allocated
==24969==
==24969== LEAK SUMMARY:
==24969==    definitely lost: 0 bytes in 0 blocks
==24969==    indirectly lost: 0 bytes in 0 blocks
==24969==      possibly lost: 0 bytes in 0 blocks
==24969==    still reachable: 11,049 bytes in 42 blocks
==24969==         suppressed: 0 bytes in 0 blocks
==24969== Reachable blocks (those to which a pointer was found) are not shown.
==24969== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==24969==
==24969== For counts of detected and suppressed errors, rerun with: -v
==24969== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Parsing config from /etc/xen/auto/asterisk_deb80.cfg
Waiting for domain asterisk_deb80 (domid 31) to die [pid 24967]
Domain 31 has shut down, reason code 1 0x1
Action for shutdown reason code 1 is restart
Domain 31 needs to be cleaned up: destroying the domain
Done. Rebooting now
Waiting for domain asterisk_deb80 (domid 32) to die [pid 24967]
Domain 32 has shut down, reason code 1 0x1
Action for shutdown reason code 1 is restart
Domain 32 needs to be cleaned up: destroying the domain
Done. Rebooting now
Waiting for domain asterisk_deb80 (domid 33) to die [pid 24967]
Domain 33 has shut down, reason code 1 0x1
Action for shutdown reason code 1 is restart
Domain 33 needs to be cleaned up: destroying the domain
Done. Rebooting now
Waiting for domain asterisk_deb80 (domid 34) to die [pid 24967]
Domain 34 has shut down, reason code 0 0x0
Action for shutdown reason code 0 is destroy
Domain 34 needs to be cleaned up: destroying the domain
Done. Exiting now



Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---
v2: Correct handling in xc_try_lzo1x_decode.

This is a bug fix and should go into 4.5.

I have a sneaking suspicion this is going to need to backport a very long way...
---
  tools/libxc/include/xc_dom.h        |   10 ++++--
  tools/libxc/xc_dom_bzimageloader.c  |   20 ++++++++++++
  tools/libxc/xc_dom_core.c           |   61 +++++++++++++++++++++++++++--------
  tools/libxc/xc_dom_decompress_lz4.c |    5 +++
  4 files changed, 80 insertions(+), 16 deletions(-)

diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 6ae6a9f..07d7224 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -33,8 +33,13 @@ struct xc_dom_seg {
struct xc_dom_mem {
      struct xc_dom_mem *next;
-    void *mmap_ptr;
-    size_t mmap_len;
+    void *ptr;
+    enum {
+        XC_DOM_MEM_TYPE_MALLOC_INTERNAL,
+        XC_DOM_MEM_TYPE_MALLOC_EXTERNAL,
+        XC_DOM_MEM_TYPE_MMAP,
+    } type;
+    size_t len;
      unsigned char memory[0];
  };
@@ -298,6 +303,7 @@ void xc_dom_log_memory_footprint(struct xc_dom_image *dom);
  /* --- simple memory pool ------------------------------------------ */
void *xc_dom_malloc(struct xc_dom_image *dom, size_t size);
+int xc_dom_register_external(struct xc_dom_image *dom, void *ptr, size_t size);
  void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size);
  void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
                              const char *filename, size_t * size,
diff --git a/tools/libxc/xc_dom_bzimageloader.c 
b/tools/libxc/xc_dom_bzimageloader.c
index 2225699..964ebdc 100644
--- a/tools/libxc/xc_dom_bzimageloader.c
+++ b/tools/libxc/xc_dom_bzimageloader.c
@@ -161,6 +161,13 @@ static int xc_try_bzip2_decode(
total = (((uint64_t)stream.total_out_hi32) << 32) | stream.total_out_lo32; + if ( xc_dom_register_external(dom, out_buf, total) )
+    {
+        DOMPRINTF("BZIP2: Error registering stream output");
+        free(out_buf);
+        goto bzip2_cleanup;
+    }
+
      DOMPRINTF("%s: BZIP2 decompress OK, 0x%zx -> 0x%lx",
                __FUNCTION__, *size, (long unsigned int) total);
@@ -305,6 +312,13 @@ static int _xc_try_lzma_decode(
          }
      }
+ if ( xc_dom_register_external(dom, out_buf, stream->total_out) )
+    {
+        DOMPRINTF("%s: Error registering stream output", what);
+        free(out_buf);
+        goto lzma_cleanup;
+    }
+
      DOMPRINTF("%s: %s decompress OK, 0x%zx -> 0x%zx",
                __FUNCTION__, what, *size, (size_t)stream->total_out);
@@ -464,7 +478,13 @@ static int xc_try_lzo1x_decode( dst_len = lzo_read_32(cur);
          if ( !dst_len )
+        {
+            msg = "Error registering stream output";
+            if ( xc_dom_register_external(dom, out_buf, out_len) )
+                break;
+
              return 0;
+        }
if ( dst_len > LZOP_MAX_BLOCK_SIZE )
          {
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index baa62a1..ecbf981 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -132,6 +132,7 @@ void *xc_dom_malloc(struct xc_dom_image *dom, size_t size)
          return NULL;
      }
      memset(block, 0, sizeof(*block) + size);
+    block->type = XC_DOM_MEM_TYPE_MALLOC_INTERNAL;
      block->next = dom->memblocks;
      dom->memblocks = block;
      dom->alloc_malloc += sizeof(*block) + size;
@@ -151,23 +152,45 @@ void *xc_dom_malloc_page_aligned(struct xc_dom_image 
*dom, size_t size)
          return NULL;
      }
      memset(block, 0, sizeof(*block));
-    block->mmap_len = size;
-    block->mmap_ptr = mmap(NULL, block->mmap_len,
-                           PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
-                           -1, 0);
-    if ( block->mmap_ptr == MAP_FAILED )
+    block->len = size;
+    block->ptr = mmap(NULL, block->len,
+                      PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+                      -1, 0);
+    if ( block->ptr == MAP_FAILED )
      {
          DOMPRINTF("%s: mmap failed", __FUNCTION__);
          free(block);
          return NULL;
      }
+    block->type = XC_DOM_MEM_TYPE_MMAP;
      block->next = dom->memblocks;
      dom->memblocks = block;
      dom->alloc_malloc += sizeof(*block);
-    dom->alloc_mem_map += block->mmap_len;
+    dom->alloc_mem_map += block->len;
      if ( size > (100 * 1024) )
          print_mem(dom, __FUNCTION__, size);
-    return block->mmap_ptr;
+    return block->ptr;
+}
+
+int xc_dom_register_external(struct xc_dom_image *dom, void *ptr, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    block = malloc(sizeof(*block));
+    if ( block == NULL )
+    {
+        DOMPRINTF("%s: allocation failed", __FUNCTION__);
+        return -1;
+    }
+    memset(block, 0, sizeof(*block));
+    block->ptr = ptr;
+    block->len = size;
+    block->type = XC_DOM_MEM_TYPE_MALLOC_EXTERNAL;
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_mem_map += block->len;
+    return 0;
  }
void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
@@ -212,24 +235,25 @@ void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
      }
memset(block, 0, sizeof(*block));
-    block->mmap_len = *size;
-    block->mmap_ptr = mmap(NULL, block->mmap_len, PROT_READ,
+    block->len = *size;
+    block->ptr = mmap(NULL, block->len, PROT_READ,
                             MAP_SHARED, fd, 0);
-    if ( block->mmap_ptr == MAP_FAILED ) {
+    if ( block->ptr == MAP_FAILED ) {
          xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
                       "failed to mmap file: %s",
                       strerror(errno));
          goto err;
      }
+ block->type = XC_DOM_MEM_TYPE_MMAP;
      block->next = dom->memblocks;
      dom->memblocks = block;
      dom->alloc_malloc += sizeof(*block);
-    dom->alloc_file_map += block->mmap_len;
+    dom->alloc_file_map += block->len;
      close(fd);
      if ( *size > (100 * 1024) )
          print_mem(dom, __FUNCTION__, *size);
-    return block->mmap_ptr;
+    return block->ptr;
err:
      if ( fd != -1 )
@@ -246,8 +270,17 @@ static void xc_dom_free_all(struct xc_dom_image *dom)
      while ( (block = dom->memblocks) != NULL )
      {
          dom->memblocks = block->next;
-        if ( block->mmap_ptr )
-            munmap(block->mmap_ptr, block->mmap_len);
+        switch ( block->type )
+        {
+        case XC_DOM_MEM_TYPE_MALLOC_INTERNAL:
+            break;
+        case XC_DOM_MEM_TYPE_MALLOC_EXTERNAL:
+            free(block->ptr);
+            break;
+        case XC_DOM_MEM_TYPE_MMAP:
+            munmap(block->ptr, block->len);
+            break;
+        }
          free(block);
      }
  }
diff --git a/tools/libxc/xc_dom_decompress_lz4.c 
b/tools/libxc/xc_dom_decompress_lz4.c
index 490ec56..b6a33f2 100644
--- a/tools/libxc/xc_dom_decompress_lz4.c
+++ b/tools/libxc/xc_dom_decompress_lz4.c
@@ -103,6 +103,11 @@ int xc_try_lz4_decode(
if (size == 0)
                {
+                       if ( xc_dom_register_external(dom, output, out_len) )
+                       {
+                               msg = "Error registering stream output";
+                               goto exit_2;
+                       }
                        *blob = output;
                        *psize = out_len;
                        return 0;
--
1.7.10.4



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.