[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Reduce impact of saving/restoring/dumping large domains on Dom0 memory
# HG changeset patch # User Keir Fraser <keir@xxxxxxxxxxxxx> # Date 1172328497 0 # Node ID 59b8d5168cc1561326f6749c79ea879093e37b0c # Parent 0147ef7c3cd79675453525c9d61e4dd6c8d8bad5 Reduce impact of saving/restoring/dumping large domains on Dom0 memory usage by means of fadvise64() to tell the OS to discard the cache pages used for the save/dump file. Signed-off-by: Simon Graham <Simon.Graham@xxxxxxxxxxx> --- tools/libxc/Makefile | 3 +++ tools/libxc/xc_core.c | 9 +++++++++ tools/libxc/xc_linux.c | 33 +++++++++++++++++++++++++++++++++ tools/libxc/xc_linux_restore.c | 18 ++++++++++++++++-- tools/libxc/xc_linux_save.c | 38 ++++++++++++++++++++++++++++++++------ tools/libxc/xc_private.h | 10 ++++++++++ tools/libxc/xc_solaris.c | 7 +++++++ 7 files changed, 110 insertions(+), 8 deletions(-) diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/Makefile --- a/tools/libxc/Makefile Sat Feb 24 14:33:34 2007 +0000 +++ b/tools/libxc/Makefile Sat Feb 24 14:48:17 2007 +0000 @@ -58,6 +58,9 @@ CFLAGS += -Werror -Wmissing-prototypes CFLAGS += -Werror -Wmissing-prototypes CFLAGS += -fno-strict-aliasing CFLAGS += $(INCLUDES) -I. + +# Needed for posix_fadvise64() in xc_linux.c +CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE # Define this to make it possible to run valgrind on code linked with these # libraries. diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Sat Feb 24 14:33:34 2007 +0000 +++ b/tools/libxc/xc_core.c Sat Feb 24 14:48:17 2007 +0000 @@ -802,6 +802,12 @@ static int local_file_dump(void *args, c } } + if (length >= DUMP_INCREMENT*PAGE_SIZE) { + // Now dumping pages -- make sure we discard clean pages from + // the cache after each write + discard_file_cache(da->fd, 0 /* no flush */); + } + return 0; } @@ -821,6 +827,9 @@ xc_domain_dumpcore(int xc_handle, sts = xc_domain_dumpcore_via_callback( xc_handle, domid, &da, &local_file_dump); + + /* flush and discard any remaining portion of the file from cache */ + discard_file_cache(da.fd, 1/* flush first*/); close(da.fd); diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux.c --- a/tools/libxc/xc_linux.c Sat Feb 24 14:33:34 2007 +0000 +++ b/tools/libxc/xc_linux.c Sat Feb 24 14:48:17 2007 +0000 @@ -328,6 +328,39 @@ int xc_evtchn_unmask(int xce_handle, evt return dorw(xce_handle, (char *)&port, sizeof(port), 1); } +/* Optionally flush file to disk and discard page cache */ +int discard_file_cache(int fd, int flush) +{ + off_t cur = 0; + + if ( flush && (fsync(fd) < 0) ) + { + PERROR("Failed to flush file: %s", strerror(errno)); + return -errno; + } + + /* + * Calculate last page boundary of amount written so far + * unless we are flushing in which case entire cache + * is discarded. + */ + if ( !flush ) + { + if ( (cur = lseek(fd, 0, SEEK_CUR)) == (off_t)-1 ) + cur = 0; + cur &= ~(PAGE_SIZE-1); + } + + /* Discard from the buffer cache. */ + if ( posix_fadvise64(fd, 0, cur, POSIX_FADV_DONTNEED) < 0 ) + { + PERROR("Failed to discard cache: %s", strerror(errno)); + return -errno; + } + + return 0; +} + /* * Local variables: * mode: C diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Sat Feb 24 14:33:34 2007 +0000 +++ b/tools/libxc/xc_linux_restore.c Sat Feb 24 14:48:17 2007 +0000 @@ -144,7 +144,7 @@ int xc_linux_restore(int xc_handle, int unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOMCTL; - int rc = 1, i, n, pae_extended_cr3 = 0; + int rc = 1, i, n, m, pae_extended_cr3 = 0; unsigned long mfn, pfn; unsigned int prev_pc, this_pc; int verify = 0; @@ -331,7 +331,7 @@ int xc_linux_restore(int xc_handle, int */ prev_pc = 0; - n = 0; + n = m = 0; while (1) { int j, nr_mfns = 0; @@ -530,6 +530,17 @@ int xc_linux_restore(int xc_handle, int munmap(region_base, j*PAGE_SIZE); n+= j; /* crude stats */ + + /* + * Discard cache for portion of file read so far up to last + * page boundary every 16MB or so. + */ + m += j; + if ( m > MAX_PAGECACHE_USAGE ) + { + discard_file_cache(io_fd, 0 /* no flush */); + m = 0; + } } /* @@ -864,6 +875,9 @@ int xc_linux_restore(int xc_handle, int free(p2m); free(pfn_type); + /* discard cache for save file */ + discard_file_cache(io_fd, 1 /*flush*/); + DPRINTF("Restore exit with rc=%d\n", rc); return rc; diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Sat Feb 24 14:33:34 2007 +0000 +++ b/tools/libxc/xc_linux_save.c Sat Feb 24 14:48:17 2007 +0000 @@ -172,6 +172,28 @@ static uint64_t tv_delta(struct timeval (new->tv_usec - old->tv_usec); } +static int noncached_write(int fd, int live, void *buffer, int len) +{ + static int write_count = 0; + + int rc = write(fd,buffer,len); + + if (!live) { + write_count += len; + + if (write_count >= MAX_PAGECACHE_USAGE*PAGE_SIZE) { + int serrno = errno; + + /* Time to discard cache - dont care if this fails */ + discard_file_cache(fd, 0 /* no flush */); + + write_count = 0; + + errno = serrno; + } + } + return rc; +} #ifdef ADAPTIVE_SAVE @@ -205,7 +227,7 @@ static inline void initialize_mbit_rate( } -static int ratewrite(int io_fd, void *buf, int n) +static int ratewrite(int io_fd, int live, void *buf, int n) { static int budget = 0; static int burst_time_us = -1; @@ -215,7 +237,7 @@ static int ratewrite(int io_fd, void *bu long long delta; if (START_MBIT_RATE == 0) - return write(io_fd, buf, n); + return noncached_write(io_fd, live, buf, n); budget -= n; if (budget < 0) { @@ -251,13 +273,13 @@ static int ratewrite(int io_fd, void *bu } } } - return write(io_fd, buf, n); + return noncached_write(io_fd, live, buf, n); } #else /* ! ADAPTIVE SAVE */ #define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n)) #define initialize_mbit_rate() #endif @@ -1082,7 +1104,7 @@ int xc_linux_save(int xc_handle, int io_ if(race && !live) goto out; - if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) { + if (ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (4)" " (errno %d)", errno); goto out; @@ -1091,7 +1113,7 @@ int xc_linux_save(int xc_handle, int io_ } else { /* We have a normal page: just write it directly. */ - if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) { + if (ratewrite(io_fd, live, spage, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (5)" " (errno %d)", errno); goto out; @@ -1261,6 +1283,10 @@ int xc_linux_save(int xc_handle, int io_ DPRINTF("Warning - couldn't disable shadow mode"); } } + else { + // flush last write and discard cache for file + discard_file_cache(io_fd, 1 /* flush */); + } if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h Sat Feb 24 14:33:34 2007 +0000 +++ b/tools/libxc/xc_private.h Sat Feb 24 14:48:17 2007 +0000 @@ -40,6 +40,13 @@ #define DEBUG 1 #define INFO 1 #define PROGRESS 0 + +/* +** Define max dirty page cache to permit during save/restore -- need to balance +** keeping cache usage down with CPU impact of invalidating too often. +** (Currently 16MB) +*/ +#define MAX_PAGECACHE_USAGE (4*1024) #if INFO #define IPRINTF(_f, _a...) printf(_f , ## _a) @@ -158,4 +165,7 @@ void bitmap_64_to_byte(uint8_t *bp, cons void bitmap_64_to_byte(uint8_t *bp, const uint64_t *lp, int nbits); void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits); +/* Optionally flush file to disk and discard page cache */ +int discard_file_cache(int fd, int flush); + #endif /* __XC_PRIVATE_H__ */ diff -r 0147ef7c3cd7 -r 59b8d5168cc1 tools/libxc/xc_solaris.c --- a/tools/libxc/xc_solaris.c Sat Feb 24 14:33:34 2007 +0000 +++ b/tools/libxc/xc_solaris.c Sat Feb 24 14:48:17 2007 +0000 @@ -242,3 +242,10 @@ int xc_evtchn_unmask(int xce_handle, evt { return dorw(xce_handle, (char *)&port, sizeof(port), 1); } + +/* Optionally flush file to disk and discard page cache */ +int discard_file_cache(int fd, int flush) +{ + // TODO: Implement for Solaris! + return 0; +} _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |