[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 7 of 8] xenalyze: Introduce more efficient read mechanism



The pread functionality was convenient for reading at arbitrary
offsets within a file, but it was incredibly inefficient; after
recent optimizations, a summary analysis was spending 30% of its
time doing system calls, and indications were that a big chunk
of the rest of the overhead was due to cache misses coming out of
that path.

This patch introduces a read using mmap.  Because the file may
be too big to map on 32-bit systems, I use a "mapcache" of 8
different 2MiB buffers.  If an offset is in what's already mapped,
I just copy it; if not, I unmap one of the buffers and map it instead.

This optimization alone took the summary processing time for my test
trace from 34s down to 10s.

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

diff -r 108d02354403 -r f86ebfe66384 Makefile
--- a/Makefile  Thu Jan 26 17:17:53 2012 +0000
+++ b/Makefile  Thu Jan 26 17:18:13 2012 +0000
@@ -13,7 +13,7 @@ CFLAGS  += -Werror
 
 BIN      = xenalyze dump-raw
 
-HDRS = trace.h analyze.h
+HDRS = trace.h analyze.h mread.h
 
 all: $(BIN)
 
@@ -23,3 +23,6 @@ clean:
 
 %: %.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -o $@ $<
+
+xenalyze: xenalyze.o mread.o
+       $(CC) $(CFLAGS) -o $@ $^
diff -r 108d02354403 -r f86ebfe66384 mread.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/mread.c   Thu Jan 26 17:18:13 2012 +0000
@@ -0,0 +1,160 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include "mread.h"
+
+mread_handle_t mread_init(int fd)
+{
+    struct stat64 s;
+    mread_handle_t h;
+    
+    h=malloc(sizeof(struct mread_ctrl));
+
+    if (!h)
+    {
+        perror("malloc");
+        exit(1);
+    }
+
+    bzero(h, sizeof(struct mread_ctrl));
+
+    h->fd = fd;
+
+    fstat64(fd, &s);
+    h->file_size = s.st_size;
+
+    return h;
+}
+
+ssize_t mread64(mread_handle_t h, void *rec, ssize_t len, loff_t offset)
+{
+    /* Idea: have a "cache" of N mmaped regions.  If the offset is
+     * in one of the regions, just copy it.  If not, evict one of the
+     * regions and map the appropriate range.
+     *
+     * Basic algorithm:
+     *  - See if the offset is in one of the regions
+     *    - If not, map it
+     *       - evict an old region
+     *       - map the new region
+     *  - Copy
+     */
+    char * b=NULL;
+    int bind=-1;
+    loff_t boffset=0;
+    ssize_t bsize;
+
+#define dprintf(x...)
+//#define dprintf fprintf
+
+    dprintf(warn, "%s: offset %llx len %d\n", __func__,
+            offset, len);
+    if ( offset > h->file_size )
+    {
+        dprintf(warn, " offset > file size %llx, returning 0\n",
+                h->file_size);
+        return 0;
+    }
+    if ( offset + len > h->file_size )
+    {
+        dprintf(warn, " offset+len > file size %llx, truncating\n",
+                h->file_size);
+        len = h->file_size - offset;
+    }
+
+    /* Try to find the offset in our range */
+    dprintf(warn, " Trying last, %d\n", last);
+    if ( h->map[h->last].buffer
+         && (offset & MREAD_BUF_MASK) == h->map[h->last].start_offset )
+    {
+        bind=h->last;
+        goto copy;
+    }
+
+    /* Scan to see if it's anywhere else */
+    dprintf(warn, " Scanning\n");
+    for(bind=0; bind<MREAD_MAPS; bind++)
+        if ( h->map[bind].buffer
+             && (offset & MREAD_BUF_MASK) == h->map[bind].start_offset )
+        {
+            dprintf(warn, "  Found, index %d\n", bind);
+            break;
+        }
+
+    /* If we didn't find it, evict someone and map it */
+    if ( bind == MREAD_MAPS )
+    {
+        dprintf(warn, " Clock\n");
+        while(1)
+        {
+            h->clock++;
+            if(h->clock >= MREAD_MAPS)
+                h->clock=0;
+            dprintf(warn, "  %d\n", h->clock);
+            if(h->map[h->clock].buffer == NULL)
+            {
+                dprintf(warn, "  Buffer null, using\n");
+                break;
+            }
+            if(!h->map[h->clock].accessed)
+            {
+                dprintf(warn, "  Not accessed, using\n");
+                break;
+            }
+            h->map[h->clock].accessed=0;
+        }
+        if(h->map[h->clock].buffer)
+        {
+            dprintf(warn, "  Unmapping\n");
+            munmap(h->map[h->clock].buffer, MREAD_BUF_SIZE);
+        }
+        /* FIXME: Try MAP_HUGETLB? */
+        /* FIXME: Make sure this works on large files... */
+        h->map[h->clock].start_offset = offset & MREAD_BUF_MASK;
+        dprintf(warn, "  Mapping %llx from offset %llx\n",
+                MREAD_BUF_SIZE, h->map[h->clock].start_offset);
+        h->map[h->clock].buffer = mmap(NULL, MREAD_BUF_SIZE, PROT_READ,
+                                  MAP_SHARED,
+                                  h->fd,
+                                  h->map[h->clock].start_offset);
+        dprintf(warn, "   mmap returned %p\n", h->map[h->clock].buffer);
+        if ( h->map[h->clock].buffer == MAP_FAILED )
+        {
+            h->map[h->clock].buffer = NULL;
+            perror("mmap");
+            exit(1);
+        }
+        bind = h->clock;
+    }
+
+    h->last=bind;
+copy:
+    h->map[bind].accessed=1;
+    b=h->map[bind].buffer;
+    boffset=offset - h->map[bind].start_offset;
+    if ( boffset + len > MREAD_BUF_SIZE )
+        bsize = MREAD_BUF_SIZE - boffset;
+    else
+        bsize = len;
+    dprintf(warn, " Using index %d, buffer at %p, buffer offset %llx len %d\n",
+            bind, b, boffset, bsize);
+
+    bcopy(b+boffset, rec, len);
+
+    /* Handle the boundary case; make sure this is after doing anything
+     * with the static variables*/
+    if ( len > bsize )
+    {
+        dprintf(warn, "  Finishing up by reading l %d o %llx\n",
+                len-bsize, offset+bsize);
+        mread64(h, rec+bsize, len-bsize, offset+bsize);
+    }
+
+    /* FIXME: ?? */
+    return len;
+#undef dprintf
+}
diff -r 108d02354403 -r f86ebfe66384 mread.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/mread.h   Thu Jan 26 17:18:13 2012 +0000
@@ -0,0 +1,18 @@
+#define MREAD_MAPS 8
+#define MREAD_BUF_SHIFT 9
+#define PAGE_SHIFT 12
+#define MREAD_BUF_SIZE (1ULL<<(PAGE_SHIFT+MREAD_BUF_SHIFT))
+#define MREAD_BUF_MASK (~(MREAD_BUF_SIZE-1))
+typedef struct mread_ctrl {
+    int fd;
+    loff_t file_size;
+    struct mread_buffer {
+        char * buffer;
+        loff_t start_offset;
+        int accessed;
+    } map[MREAD_MAPS];
+    int clock, last;
+} *mread_handle_t;
+
+mread_handle_t mread_init(int fd);
+ssize_t mread64(mread_handle_t h, void *dst, ssize_t len, loff_t offset);
diff -r 108d02354403 -r f86ebfe66384 xenalyze.c
--- a/xenalyze.c        Thu Jan 26 17:17:53 2012 +0000
+++ b/xenalyze.c        Thu Jan 26 17:18:13 2012 +0000
@@ -31,11 +31,15 @@
 #include <unistd.h>
 #include "trace.h"
 #include "analyze.h"
+#include "mread.h"
 #include <errno.h>
 #include <strings.h>
 #include <string.h>
 #include <assert.h>
 
+struct mread_ctrl;
+
+
 typedef unsigned long long tsc_t;
 
 #define DEFAULT_CPU_HZ 2400000000LL
@@ -60,6 +64,7 @@ struct array_struct {
 /* -- Global variables -- */
 struct {
     int fd;
+    struct mread_ctrl *mh;
     struct symbol_struct * symbols;
     char * symbol_file;
     char * trace_file;
@@ -257,18 +262,23 @@ struct {
 
 /* -- on-disk trace buffer definitions -- */
 struct trace_record {
-    unsigned event:28,
-        extra_words:3,
-        cycle_flag:1;
     union {
         struct {
-            uint32_t tsc_lo, tsc_hi;
-            uint32_t data[7];
-        } tsc;
-        struct {
-            uint32_t data[7];
-        } notsc;
-    } u;
+            unsigned event:28,
+                extra_words:3,
+                cycle_flag:1;
+            union {
+                struct {
+                    uint32_t tsc_lo, tsc_hi;
+                    uint32_t data[7];
+                } tsc;
+                struct {
+                    uint32_t data[7];
+                } notsc;
+            } u;
+        };
+        uint32_t raw[8];
+    };
 };
 
 FILE *warn = NULL;
@@ -1943,7 +1953,7 @@ char * pcpu_string(int pcpu);
 void pcpu_string_draw(struct pcpu_info *p);
 void process_generic(struct record_info *ri);
 void dump_generic(FILE *f, struct record_info *ri);
-ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset);
+ssize_t __read_record(struct trace_record *rec, loff_t offset);
 void error(enum error_level l, struct record_info *ri);
 void update_io_address(struct io_address ** list, unsigned int pa, int dir,
                        tsc_t arc_cycles, unsigned int va);
@@ -8123,17 +8133,26 @@ void dump_raw(char * s, struct record_in
     int i;
 
     if(ri->rec.cycle_flag)
-        printf("%s %x %d %lld [",
+        printf("%s %7x %d %14lld [",
                s, ri->event, ri->extra_words, ri->tsc);
     else
-        printf("%s %x %d - [",
-               s, ri->event, ri->extra_words);
-
-    for(i=0; i<ri->extra_words; i++) {
-        printf(" %x", ri->d[i]);
+        printf("%s %7x %d %14s [",
+               s, ri->event, ri->extra_words, "-");
+
+    for(i=0; i<7; i++) {
+        if ( i < ri->extra_words )
+            printf(" %8x", ri->d[i]);
+        else
+            printf("         ");
     }
         
-    printf(" ]\n");
+    printf(" ] | ");
+
+    for (i=0; i<8; i++) {
+        printf(" %08x", ri->rec.raw[i]);
+    }
+
+    printf(" |\n");
 }
 
 void error(enum error_level l, struct record_info *ri)
@@ -8455,7 +8474,7 @@ loff_t scan_for_new_pcpu(loff_t offset) 
     struct trace_record rec;
     struct cpu_change_data *cd;
     
-    r=__read_record(G.fd, &rec, offset);
+    r=__read_record(&rec, offset);
 
     if(r==0)
         return 0;
@@ -9028,11 +9047,11 @@ void progress_finish(void) {
     }
 }
 
-ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset)
+ssize_t __read_record(struct trace_record *rec, loff_t offset)
 {
     ssize_t r, rsize;
 
-    r=pread64(G.fd, rec, sizeof(*rec), offset);
+    r=mread64(G.mh, rec, sizeof(*rec), offset);
 
     if(r < 0) {
         /* Read error */
@@ -9110,14 +9129,14 @@ void __fill_in_record_info(struct pcpu_i
     ri->cpu = p->pid;
 }
 
-ssize_t read_record(int fd, struct pcpu_info * p) {
+ssize_t read_record(struct pcpu_info * p) {
     loff_t * offset;
     struct record_info *ri;
 
     offset = &p->file_offset;
     ri = &p->ri;
 
-    ri->size = __read_record(fd, &ri->rec, *offset);
+    ri->size = __read_record(&ri->rec, *offset);
     if(ri->size)
     {
         __fill_in_record_info(p);
@@ -9317,7 +9336,7 @@ void process_records(void) {
             }
         }
         else
-            read_record(G.fd, p);
+            read_record(p);
 
         /* Update this pcpu in the processing order */
         if ( p->active )
@@ -10304,6 +10323,10 @@ int main(int argc, char *argv[]) {
         fstat64(G.fd, &s);
         G.file_size = s.st_size;
     }
+
+    if ( (G.mh = mread_init(G.fd)) == NULL )
+        perror("mread");
+
     if (G.symbol_file != NULL)
         parse_symbol_file(G.symbol_file);
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.