[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Reduce impact of saving/restoring/dumping large domains on Dom0 memory



# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1172328497 0
# Node ID 59b8d5168cc1561326f6749c79ea879093e37b0c
# Parent  0147ef7c3cd79675453525c9d61e4dd6c8d8bad5
Reduce impact of saving/restoring/dumping large domains on Dom0 memory
usage by means of fadvise64() to tell the OS to discard the cache
pages used for the save/dump file.

Signed-off-by: Simon Graham <Simon.Graham@xxxxxxxxxxx>
---
 tools/libxc/Makefile           |    3 +++
 tools/libxc/xc_core.c          |    9 +++++++++
 tools/libxc/xc_linux.c         |   33 +++++++++++++++++++++++++++++++++
 tools/libxc/xc_linux_restore.c |   18 ++++++++++++++++--
 tools/libxc/xc_linux_save.c    |   38 ++++++++++++++++++++++++++++++++------
 tools/libxc/xc_private.h       |   10 ++++++++++
 tools/libxc/xc_solaris.c       |    7 +++++++
 7 files changed, 110 insertions(+), 8 deletions(-)

diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/Makefile      Sat Feb 24 14:48:17 2007 +0000
@@ -58,6 +58,9 @@ CFLAGS   += -Werror -Wmissing-prototypes
 CFLAGS   += -Werror -Wmissing-prototypes
 CFLAGS   += -fno-strict-aliasing
 CFLAGS   += $(INCLUDES) -I.
+
+# Needed for posix_fadvise64() in xc_linux.c
+CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
 
 # Define this to make it possible to run valgrind on code linked with these
 # libraries.
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_core.c     Sat Feb 24 14:48:17 2007 +0000
@@ -802,6 +802,12 @@ static int local_file_dump(void *args, c
         }
     }
 
+    if (length >= DUMP_INCREMENT*PAGE_SIZE) {
+        // Now dumping pages -- make sure we discard clean pages from
+        // the cache after each write
+        discard_file_cache(da->fd, 0 /* no flush */);
+    }
+
     return 0;
 }
 
@@ -821,6 +827,9 @@ xc_domain_dumpcore(int xc_handle,
 
     sts = xc_domain_dumpcore_via_callback(
         xc_handle, domid, &da, &local_file_dump);
+
+    /* flush and discard any remaining portion of the file from cache */
+    discard_file_cache(da.fd, 1/* flush first*/);
 
     close(da.fd);
 
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_linux.c    Sat Feb 24 14:48:17 2007 +0000
@@ -328,6 +328,39 @@ int xc_evtchn_unmask(int xce_handle, evt
     return dorw(xce_handle, (char *)&port, sizeof(port), 1);
 }
 
+/* Optionally flush file to disk and discard page cache */
+int discard_file_cache(int fd, int flush) 
+{
+    off_t cur = 0;
+
+    if ( flush && (fsync(fd) < 0) )
+    {
+        PERROR("Failed to flush file: %s", strerror(errno));
+        return -errno;
+    }
+
+    /* 
+     * Calculate last page boundary of amount written so far 
+     * unless we are flushing in which case entire cache
+     * is discarded.
+     */
+    if ( !flush )
+    {
+        if ( (cur = lseek(fd, 0, SEEK_CUR)) == (off_t)-1 )
+            cur = 0;
+        cur &= ~(PAGE_SIZE-1);
+    }
+
+    /* Discard from the buffer cache. */
+    if ( posix_fadvise64(fd, 0, cur, POSIX_FADV_DONTNEED) < 0 )
+    {
+        PERROR("Failed to discard cache: %s", strerror(errno));
+        return -errno;
+    }
+
+    return 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_linux_restore.c    Sat Feb 24 14:48:17 2007 +0000
@@ -144,7 +144,7 @@ int xc_linux_restore(int xc_handle, int 
                      unsigned int console_evtchn, unsigned long *console_mfn)
 {
     DECLARE_DOMCTL;
-    int rc = 1, i, n, pae_extended_cr3 = 0;
+    int rc = 1, i, n, m, pae_extended_cr3 = 0;
     unsigned long mfn, pfn;
     unsigned int prev_pc, this_pc;
     int verify = 0;
@@ -331,7 +331,7 @@ int xc_linux_restore(int xc_handle, int 
      */
     prev_pc = 0;
 
-    n = 0;
+    n = m = 0;
     while (1) {
 
         int j, nr_mfns = 0; 
@@ -530,6 +530,17 @@ int xc_linux_restore(int xc_handle, int 
 
         munmap(region_base, j*PAGE_SIZE);
         n+= j; /* crude stats */
+
+        /* 
+         * Discard cache for portion of file read so far up to last
+         *  page boundary every 16MB or so.
+         */
+        m += j;
+        if ( m > MAX_PAGECACHE_USAGE )
+        {
+            discard_file_cache(io_fd, 0 /* no flush */);
+            m = 0;
+        }
     }
 
     /*
@@ -864,6 +875,9 @@ int xc_linux_restore(int xc_handle, int 
     free(p2m);
     free(pfn_type);
 
+    /* discard cache for save file  */
+    discard_file_cache(io_fd, 1 /*flush*/);
+
     DPRINTF("Restore exit with rc=%d\n", rc);
     
     return rc;
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_linux_save.c       Sat Feb 24 14:48:17 2007 +0000
@@ -172,6 +172,28 @@ static uint64_t tv_delta(struct timeval 
         (new->tv_usec - old->tv_usec);
 }
 
+static int noncached_write(int fd, int live, void *buffer, int len) 
+{
+    static int write_count = 0;
+
+    int rc = write(fd,buffer,len);
+
+    if (!live) {
+        write_count += len;
+
+        if (write_count >= MAX_PAGECACHE_USAGE*PAGE_SIZE) {
+            int serrno = errno;
+
+            /* Time to discard cache - dont care if this fails */
+            discard_file_cache(fd, 0 /* no flush */);
+
+            write_count = 0;
+
+            errno = serrno;
+        }
+    }
+    return rc;
+}
 
 #ifdef ADAPTIVE_SAVE
 
@@ -205,7 +227,7 @@ static inline void initialize_mbit_rate(
 }
 
 
-static int ratewrite(int io_fd, void *buf, int n)
+static int ratewrite(int io_fd, int live, void *buf, int n)
 {
     static int budget = 0;
     static int burst_time_us = -1;
@@ -215,7 +237,7 @@ static int ratewrite(int io_fd, void *bu
     long long delta;
 
     if (START_MBIT_RATE == 0)
-        return write(io_fd, buf, n);
+        return noncached_write(io_fd, live, buf, n);
 
     budget -= n;
     if (budget < 0) {
@@ -251,13 +273,13 @@ static int ratewrite(int io_fd, void *bu
             }
         }
     }
-    return write(io_fd, buf, n);
+    return noncached_write(io_fd, live, buf, n);
 }
 
 #else /* ! ADAPTIVE SAVE */
 
 #define RATE_IS_MAX() (0)
-#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
+#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), 
(_buf), (_n))
 #define initialize_mbit_rate()
 
 #endif
@@ -1082,7 +1104,7 @@ int xc_linux_save(int xc_handle, int io_
                     if(race && !live) 
                         goto out; 
 
-                    if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) {
+                    if (ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE) {
                         ERROR("Error when writing to state file (4)"
                               " (errno %d)", errno);
                         goto out;
@@ -1091,7 +1113,7 @@ int xc_linux_save(int xc_handle, int io_
                 }  else {
 
                     /* We have a normal page: just write it directly. */
-                    if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) {
+                    if (ratewrite(io_fd, live, spage, PAGE_SIZE) != PAGE_SIZE) 
{
                         ERROR("Error when writing to state file (5)"
                               " (errno %d)", errno);
                         goto out;
@@ -1261,6 +1283,10 @@ int xc_linux_save(int xc_handle, int io_
             DPRINTF("Warning - couldn't disable shadow mode");
         }
     }
+    else {
+        // flush last write and discard cache for file
+        discard_file_cache(io_fd, 1 /* flush */);
+    }            
 
     if (live_shinfo)
         munmap(live_shinfo, PAGE_SIZE);
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h  Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_private.h  Sat Feb 24 14:48:17 2007 +0000
@@ -40,6 +40,13 @@
 #define DEBUG    1
 #define INFO     1
 #define PROGRESS 0
+
+/*
+** Define max dirty page cache to permit during save/restore -- need to 
balance 
+** keeping cache usage down with CPU impact of invalidating too often.
+** (Currently 16MB)
+*/
+#define MAX_PAGECACHE_USAGE (4*1024)
 
 #if INFO
 #define IPRINTF(_f, _a...) printf(_f , ## _a)
@@ -158,4 +165,7 @@ void bitmap_64_to_byte(uint8_t *bp, cons
 void bitmap_64_to_byte(uint8_t *bp, const uint64_t *lp, int nbits);
 void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
 
+/* Optionally flush file to disk and discard page cache */
+int discard_file_cache(int fd, int flush);
+
 #endif /* __XC_PRIVATE_H__ */
diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_solaris.c
--- a/tools/libxc/xc_solaris.c  Sat Feb 24 14:33:34 2007 +0000
+++ b/tools/libxc/xc_solaris.c  Sat Feb 24 14:48:17 2007 +0000
@@ -242,3 +242,10 @@ int xc_evtchn_unmask(int xce_handle, evt
 {
     return dorw(xce_handle, (char *)&port, sizeof(port), 1);
 }
+
+/* Optionally flush file to disk and discard page cache */
+int discard_file_cache(int fd, int flush) 
+{
+    // TODO: Implement for Solaris!
+    return 0;
+}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.