[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen master] xenalyze: add to tools/xentrace/
commit fd19a9976e7ce63bf07f8af1b9b5e8bb5812d51d Author: Olaf Hering <olaf@xxxxxxxxx> AuthorDate: Thu Jun 11 16:30:38 2015 +0000 Commit: Ian Campbell <ian.campbell@xxxxxxxxxx> CommitDate: Tue Jun 16 11:34:59 2015 +0100 xenalyze: add to tools/xentrace/ This merges xenalyze.hg, changeset 150:24308507be1d, into tools/xentrace/xenalyze.c to have the tool and public/trace.h in one place. Adjust code to use public/trace.h instead of private trace.h Signed-off-by: Olaf Hering <olaf@xxxxxxxxx> Acked-by: Wei Liu <wei.liu2@xxxxxxxxxx> Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> Cc: Ian Campbell <ian.campbell@xxxxxxxxxx> Cc: Wei Liu <wei.liu2@xxxxxxxxxx> [ ijc -- wrap $(BIN) install in a check in case it is empty (which it is on !x86, avoid BIN += since it results in BIN = ' ' on !x86 ] --- .gitignore | 1 + tools/xentrace/Makefile | 9 +- tools/xentrace/analyze.h | 107 + tools/xentrace/mread.c | 160 + tools/xentrace/mread.h | 18 + tools/xentrace/pv.h | 41 + tools/xentrace/xenalyze.c |10407 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 10742 insertions(+), 1 deletions(-) diff --git a/.gitignore b/.gitignore index 3bc9cd9..3f42ded 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,7 @@ tools/misc/gtracestat tools/misc/xenlockprof tools/misc/lowmemd tools/misc/xencov +tools/xentrace/xenalyze tools/pygrub/build/* tools/python/build/* tools/security/secpol_tool diff --git a/tools/xentrace/Makefile b/tools/xentrace/Makefile index 5360960..7d874a3 100644 --- a/tools/xentrace/Makefile +++ b/tools/xentrace/Makefile @@ -6,7 +6,8 @@ CFLAGS += -Werror CFLAGS += $(CFLAGS_libxenctrl) LDLIBS += $(LDLIBS_libxenctrl) -BIN = +BIN-$(CONFIG_X86) = xenalyze +BIN = $(BIN-y) SBIN = xentrace xentrace_setsize LIBBIN = xenctx SCRIPTS = xentrace_format @@ -26,6 +27,9 @@ install: build [ -z "$(LIBBIN)" ] || $(INSTALL_DIR) $(DESTDIR)$(LIBEXEC_BIN) $(INSTALL_DIR) $(DESTDIR)$(MAN1DIR) $(INSTALL_DIR) $(DESTDIR)$(MAN8DIR) +ifneq ($(BIN),) + $(INSTALL_PROG) $(BIN) $(DESTDIR)$(bindir) +endif $(INSTALL_PROG) $(SBIN) $(DESTDIR)$(sbindir) $(INSTALL_PYTHON_PROG) $(SCRIPTS) $(DESTDIR)$(bindir) [ -z "$(LIBBIN)" ] || $(INSTALL_PROG) $(LIBBIN) $(DESTDIR)$(LIBEXEC_BIN) @@ -48,5 +52,8 @@ xenctx: xenctx.o xentrace_setsize: setsize.o $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) $(APPEND_LDFLAGS) +xenalyze: xenalyze.o mread.o + $(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS) $(APPEND_LDFLAGS) + -include $(DEPS) diff --git a/tools/xentrace/analyze.h b/tools/xentrace/analyze.h new file mode 100644 index 0000000..40ee551 --- /dev/null +++ b/tools/xentrace/analyze.h @@ -0,0 +1,107 @@ +#ifndef __ANALYZE_H +# define __ANALYZE_H + +#include <stdint.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define TRC_GEN_MAIN 0 +#define TRC_SCHED_MAIN 1 +#define TRC_DOM0OP_MAIN 2 +#define TRC_HVM_MAIN 3 +#define TRC_MEM_MAIN 4 +#define TRC_PV_MAIN 5 +#define TRC_SHADOW_MAIN 6 +#define TRC_HW_MAIN 7 + +#define TRC_LOST_RECORDS_END (TRC_GEN + 50) + +#define NR_CPUS 128 +#if __x86_64__ +# define BITS_PER_LONG 64 +#else +# define BITS_PER_LONG 32 +#endif + +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] +typedef struct cpumask{ DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; + +enum { + TRCE_SFLAG_SET_AD, + TRCE_SFLAG_SET_A, + TRCE_SFLAG_SHADOW_L1_GET_REF, + TRCE_SFLAG_SHADOW_L1_PUT_REF, + TRCE_SFLAG_L2_PROPAGATE, + TRCE_SFLAG_SET_CHANGED, + TRCE_SFLAG_SET_FLUSH, + TRCE_SFLAG_SET_ERROR, + TRCE_SFLAG_DEMOTE, + TRCE_SFLAG_PROMOTE, + TRCE_SFLAG_WRMAP, + TRCE_SFLAG_WRMAP_GUESS_FOUND, + TRCE_SFLAG_WRMAP_BRUTE_FORCE, + TRCE_SFLAG_EARLY_UNSHADOW, + TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN, + TRCE_SFLAG_EMULATION_LAST_FAILED, + TRCE_SFLAG_EMULATE_FULL_PT, + TRCE_SFLAG_PREALLOC_UNPIN, + TRCE_SFLAG_PREALLOC_UNHOOK +}; + +#define TRC_HVM_OP_DESTROY_PROC (TRC_HVM_HANDLER + 0x100) + +typedef unsigned long long tsc_t; + +/* -- on-disk trace buffer definitions -- */ +struct trace_record { + union { + struct { + unsigned event:28, + extra_words:3, + cycle_flag:1; + union { + struct { + uint32_t tsc_lo, tsc_hi; + uint32_t data[7]; + } tsc; + struct { + uint32_t data[7]; + } notsc; + } u; + }; + uint32_t raw[8]; + }; +}; + +/* -- General info about a current record -- */ +struct time_struct { + unsigned long long time; + unsigned int s, ns; +}; + +#define DUMP_HEADER_MAX 256 + +struct record_info { + int cpu; + tsc_t tsc; + union { + unsigned event; + struct { + unsigned minor:12, + sub:4, + main:12, + unused:4; + } evt; + }; + int extra_words; + int size; + uint32_t *d; + char dump_header[DUMP_HEADER_MAX]; + struct time_struct t; + struct trace_record rec; +}; + +#endif diff --git a/tools/xentrace/mread.c b/tools/xentrace/mread.c new file mode 100644 index 0000000..a63d16c --- /dev/null +++ b/tools/xentrace/mread.c @@ -0,0 +1,160 @@ +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <errno.h> +#include "mread.h" + +mread_handle_t mread_init(int fd) +{ + struct stat64 s; + mread_handle_t h; + + h=malloc(sizeof(struct mread_ctrl)); + + if (!h) + { + perror("malloc"); + exit(1); + } + + bzero(h, sizeof(struct mread_ctrl)); + + h->fd = fd; + + fstat64(fd, &s); + h->file_size = s.st_size; + + return h; +} + +ssize_t mread64(mread_handle_t h, void *rec, ssize_t len, loff_t offset) +{ + /* Idea: have a "cache" of N mmaped regions. If the offset is + * in one of the regions, just copy it. If not, evict one of the + * regions and map the appropriate range. + * + * Basic algorithm: + * - See if the offset is in one of the regions + * - If not, map it + * - evict an old region + * - map the new region + * - Copy + */ + char * b=NULL; + int bind=-1; + loff_t boffset=0; + ssize_t bsize; + +#define dprintf(x...) +//#define dprintf fprintf + + dprintf(warn, "%s: offset %llx len %d\n", __func__, + offset, len); + if ( offset > h->file_size ) + { + dprintf(warn, " offset > file size %llx, returning 0\n", + h->file_size); + return 0; + } + if ( offset + len > h->file_size ) + { + dprintf(warn, " offset+len > file size %llx, truncating\n", + h->file_size); + len = h->file_size - offset; + } + + /* Try to find the offset in our range */ + dprintf(warn, " Trying last, %d\n", last); + if ( h->map[h->last].buffer + && (offset & MREAD_BUF_MASK) == h->map[h->last].start_offset ) + { + bind=h->last; + goto copy; + } + + /* Scan to see if it's anywhere else */ + dprintf(warn, " Scanning\n"); + for(bind=0; bind<MREAD_MAPS; bind++) + if ( h->map[bind].buffer + && (offset & MREAD_BUF_MASK) == h->map[bind].start_offset ) + { + dprintf(warn, " Found, index %d\n", bind); + break; + } + + /* If we didn't find it, evict someone and map it */ + if ( bind == MREAD_MAPS ) + { + dprintf(warn, " Clock\n"); + while(1) + { + h->clock++; + if(h->clock >= MREAD_MAPS) + h->clock=0; + dprintf(warn, " %d\n", h->clock); + if(h->map[h->clock].buffer == NULL) + { + dprintf(warn, " Buffer null, using\n"); + break; + } + if(!h->map[h->clock].accessed) + { + dprintf(warn, " Not accessed, using\n"); + break; + } + h->map[h->clock].accessed=0; + } + if(h->map[h->clock].buffer) + { + dprintf(warn, " Unmapping\n"); + munmap(h->map[h->clock].buffer, MREAD_BUF_SIZE); + } + /* FIXME: Try MAP_HUGETLB? */ + /* FIXME: Make sure this works on large files... */ + h->map[h->clock].start_offset = offset & MREAD_BUF_MASK; + dprintf(warn, " Mapping %llx from offset %llx\n", + MREAD_BUF_SIZE, h->map[h->clock].start_offset); + h->map[h->clock].buffer = mmap(NULL, MREAD_BUF_SIZE, PROT_READ, + MAP_SHARED, + h->fd, + h->map[h->clock].start_offset); + dprintf(warn, " mmap returned %p\n", h->map[h->clock].buffer); + if ( h->map[h->clock].buffer == MAP_FAILED ) + { + h->map[h->clock].buffer = NULL; + perror("mmap"); + exit(1); + } + bind = h->clock; + } + + h->last=bind; +copy: + h->map[bind].accessed=1; + b=h->map[bind].buffer; + boffset=offset - h->map[bind].start_offset; + if ( boffset + len > MREAD_BUF_SIZE ) + bsize = MREAD_BUF_SIZE - boffset; + else + bsize = len; + dprintf(warn, " Using index %d, buffer at %p, buffer offset %llx len %d\n", + bind, b, boffset, bsize); + + bcopy(b+boffset, rec, bsize); + + /* Handle the boundary case; make sure this is after doing anything + * with the static variables*/ + if ( len > bsize ) + { + dprintf(warn, " Finishing up by reading l %d o %llx\n", + len-bsize, offset+bsize); + mread64(h, rec+bsize, len-bsize, offset+bsize); + } + + /* FIXME: ?? */ + return len; +#undef dprintf +} diff --git a/tools/xentrace/mread.h b/tools/xentrace/mread.h new file mode 100644 index 0000000..8df41a8 --- /dev/null +++ b/tools/xentrace/mread.h @@ -0,0 +1,18 @@ +#define MREAD_MAPS 8 +#define MREAD_BUF_SHIFT 9 +#define PAGE_SHIFT 12 +#define MREAD_BUF_SIZE (1ULL<<(PAGE_SHIFT+MREAD_BUF_SHIFT)) +#define MREAD_BUF_MASK (~(MREAD_BUF_SIZE-1)) +typedef struct mread_ctrl { + int fd; + loff_t file_size; + struct mread_buffer { + char * buffer; + loff_t start_offset; + int accessed; + } map[MREAD_MAPS]; + int clock, last; +} *mread_handle_t; + +mread_handle_t mread_init(int fd); +ssize_t mread64(mread_handle_t h, void *dst, ssize_t len, loff_t offset); diff --git a/tools/xentrace/pv.h b/tools/xentrace/pv.h new file mode 100644 index 0000000..3e6ad77 --- /dev/null +++ b/tools/xentrace/pv.h @@ -0,0 +1,41 @@ +/* + * PV event decoding. + * + * Copyright (C) 2012 Citrix Systems R&D Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ +#ifndef __PV_H + +#include "analyze.h" +#include <xen/trace.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define ARG_MISSING 0x0 +#define ARG_32BIT 0x1 +#define ARG_64BIT 0x2 + +#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) + +static inline uint32_t pv_hypercall_op(const struct record_info *ri) +{ + return ri->d[0] & ~TRC_PV_HYPERCALL_V2_ARG_MASK; +} + +static inline int pv_hypercall_arg_present(const struct record_info *ri, int arg) +{ + return (ri->d[0] >> (20 + 2*arg)) & 0x3; +} + +void pv_hypercall_gather_args(const struct record_info *ri, uint64_t *args); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c new file mode 100644 index 0000000..a0a3ac1 --- /dev/null +++ b/tools/xentrace/xenalyze.c @@ -0,0 +1,10407 @@ +/* + * xenalyze.c: Analyzing xentrace output + * + * Written by George Dunlap. + * + * Copyright (c) 2006-2007, XenSource Inc. + * Copyright (c) 2007-2008, Citrix Systems R&D Ltd, UK + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ +#define _XOPEN_SOURCE 600 +#include <stdio.h> +#include <stdlib.h> +#include <argp.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <xen/trace.h> +#include "analyze.h" +#include "mread.h" +#include "pv.h" +#include <errno.h> +#include <strings.h> +#include <string.h> +#include <assert.h> + +struct mread_ctrl; + + +#define DEFAULT_CPU_HZ 2400000000LL +#define QHZ_FROM_HZ(_hz) (((_hz) << 10)/ 1000000000) + +#define ADDR_SPACE_BITS 48 +#define DEFAULT_SAMPLE_SIZE 10240 +#define DEFAULT_INTERVAL_LENGTH 1000 + +struct array_struct { + unsigned long long *values; + int count; +}; + +#define warn_once(_x...) \ + do { \ + static int _w=1; \ + if ( _w ) { \ + _w=0; \ + fprintf(warn, ##_x); \ + } \ + } while(0) \ + +/* -- Global variables -- */ +struct { + int fd; + struct mread_ctrl *mh; + struct symbol_struct * symbols; + char * symbol_file; + char * trace_file; + int output_defined; + loff_t file_size; + struct { + loff_t update_offset; + int pipe[2]; + FILE* out; + int pid; + } progress; +} G = { + .fd=-1, + .symbols = NULL, + .symbol_file = NULL, + .trace_file = NULL, + .output_defined = 0, + .file_size = 0, + .progress = { .update_offset = 0 }, +}; + +/* + Kinds of errors: + Unexpected values + - RIP with information in high bits (not all 0 or 1) + - exit reason too high + Unexpected record layout + - x64 bit set in PIO,PV_PTWR_EMULATION_PAE, + - Unknown minor type (PV_PTWR_EMULATION, RUNSTATE_CHANGE + - Wrong record size + - More than one bit set in evt.main field + Unexpected sequences + - wake tsc tracking + - TSC dependency loop + - Mismatch between non-running old event states + - Runstate continue while running on another pcpu + - lost_record_end seen in non-lost pcpu + - Unexpected non-CPU_CHANGE record during new_pcpu scan + - record tsc < interval start tsc + - lost_record tsc !> order tsc + Limited resources + - interrupt interval slots + - record cpu > MAX_CPUS + Algorithm asserts + - Duplicate CR3/domain values + - Logic holes + - domain runstates + - runstate / tsc skew + - vcpu_{prev,next}_update p->current{==,!=}null + - vcpu start conditions + - lost_cpu count higher than # of seen cpus / < 0 + - lost cpu has non-null p->current + Symbol file + -file doesn't open + -file not ordered + System + - short read + - malloc failed + Args + - Invalid cpu_hz value / suffix + - No trace file + - Can't open trace file +*/ +enum error_level { + ERR_NONE=0, + ERR_STRICT, /* Be unreasonably picky */ + ERR_WARN, /* Something midly unexpected */ + ERR_SANITY, /* Sanity checks: RIP with info in high bits */ + ERR_RECORD, /* Something that keeps you from processing the record */ + ERR_FILE, /* Probably caused by a corrupt file */ + ERR_LIMIT, /* Exceeded limits; data will be lost */ + ERR_MAX_TOLERABLE=ERR_LIMIT, + /* -- Unrecoverable past this point -- */ + ERR_ASSERT, /* Algoritm assert */ + ERR_SYSTEM, /* System error: cannot allocate memory, short read, &c */ +}; + +int verbosity = 5; + +struct { + unsigned + scatterplot_interrupt_eip:1, + scatterplot_cpi:1, + scatterplot_unpin_promote:1, + scatterplot_cr3_switch:1, + scatterplot_wake_to_halt:1, + scatterplot_io:1, + scatterplot_vmexit_eip:1, + scatterplot_runstate:1, + scatterplot_runstate_time:1, + scatterplot_pcpu:1, + scatterplot_extint_cycles:1, + scatterplot_rdtsc:1, + scatterplot_irq:1, + histogram_interrupt_eip:1, + interval_mode:1, + dump_all:1, + dump_raw_process:1, + dump_raw_reads:1, + dump_no_processing:1, + dump_ipi_latency:1, + dump_trace_volume_on_lost_record:1, + dump_show_power_states:1, + with_cr3_enumeration:1, + with_pio_enumeration:1, + with_mmio_enumeration:1, + with_interrupt_eip_enumeration:1, + show_default_domain_summary:1, + mmio_enumeration_skip_vga:1, + progress:1, + svm_mode:1, + summary:1, + report_pcpu:1, + tsc_loop_fatal:1, + summary_info; + long long cpu_qhz, cpu_hz; + int scatterplot_interrupt_vector; + int scatterplot_extint_cycles_vector; + int scatterplot_io_port; + int histogram_interrupt_vector; + unsigned long long histogram_interrupt_increment; + int interrupt_eip_enumeration_vector; + int default_guest_paging_levels; + int sample_size; + enum error_level tolerance; /* Tolerate up to this level of error */ + struct { + tsc_t cycles; + /* Used if interval is specified in seconds to delay calculating + * time_interval until all arguments have been processed (specifically, + * cpu_hz). */ + unsigned msec; + enum { + INTERVAL_CR3_SCHEDULE_TIME, + INTERVAL_CR3_SCHEDULE_ORDERED, + INTERVAL_CR3_SHORT_SUMMARY, + INTERVAL_DOMAIN_TOTAL_TIME, + INTERVAL_DOMAIN_SHORT_SUMMARY, + INTERVAL_DOMAIN_GUEST_INTERRUPT, + INTERVAL_DOMAIN_GRANT_MAPS + } output; + enum { + INTERVAL_MODE_CUSTOM, + INTERVAL_MODE_ARRAY, + INTERVAL_MODE_LIST + } mode; + enum { + INTERVAL_CHECK_NONE, + INTERVAL_CHECK_CR3, + INTERVAL_CHECK_DOMAIN + } check; + /* Options for specific interval output types */ + union { + struct array_struct array; + }; + int count; + } interval; +} opt = { + .scatterplot_interrupt_eip=0, + .scatterplot_cpi=0, + .scatterplot_unpin_promote=0, + .scatterplot_cr3_switch=0, + .scatterplot_wake_to_halt=0, + .scatterplot_vmexit_eip=0, + .scatterplot_runstate=0, + .scatterplot_runstate_time=0, + .scatterplot_pcpu=0, + .scatterplot_extint_cycles=0, + .scatterplot_rdtsc=0, + .scatterplot_irq=0, + .histogram_interrupt_eip=0, + .dump_all = 0, + .dump_raw_process = 0, + .dump_raw_reads = 0, + .dump_no_processing = 0, + .dump_ipi_latency = 0, + .dump_trace_volume_on_lost_record = 0, + .dump_show_power_states = 0, + .with_cr3_enumeration = 0, + .with_pio_enumeration = 1, + .with_mmio_enumeration = 0, + .with_interrupt_eip_enumeration = 0, + .show_default_domain_summary = 0, + .mmio_enumeration_skip_vga = 1, + .progress = 0, + .svm_mode = 0, + .summary = 0, + .report_pcpu = 0, + .tsc_loop_fatal = 0, + .cpu_hz = DEFAULT_CPU_HZ, + /* Pre-calculate a multiplier that makes the rest of the + * calculations easier */ + .cpu_qhz = QHZ_FROM_HZ(DEFAULT_CPU_HZ), + .default_guest_paging_levels = 2, + .sample_size = DEFAULT_SAMPLE_SIZE, + .tolerance = ERR_SANITY, + .interval = { .msec = DEFAULT_INTERVAL_LENGTH }, +}; + +FILE *warn = NULL; + +/* -- Summary data -- */ +struct cycle_framework { + tsc_t first_tsc, last_tsc, total_cycles; +}; + +struct interval_element { + int count; + long long cycles; + long long instructions; +}; + +struct event_cycle_summary { + int count, cycles_count; + long long cycles; + long long *cycles_sample; + struct interval_element interval; +}; + +struct cycle_summary { + int count; + unsigned long long cycles; + long long *sample; + struct interval_element interval; +}; + +struct weighted_cpi_summary { + int count; + unsigned long long instructions; + unsigned long long cycles; + float *cpi; + unsigned long long *cpi_weight; + struct interval_element interval; +}; + +/* -- Symbol list information -- */ +#define SYMBOL_ENTRIES_PER_STRUCT 1023 +#define SYMBOL_NAME_SIZE 124 +struct symbol_struct { + int count; + struct { + unsigned long long addr; + char name[SYMBOL_NAME_SIZE]; + } symbols[SYMBOL_ENTRIES_PER_STRUCT]; + struct symbol_struct *next; +}; + +void error(enum error_level l, struct record_info *ri); + +void parse_symbol_file(char *fn) { + unsigned long long last_addr = 0; + FILE * symbol_file; + struct symbol_struct ** p=&G.symbols; + + if((symbol_file=fopen(fn, "rb"))==NULL) { + fprintf(stderr, "Could not open symbol file %s\n", fn); + perror("open"); + error(ERR_SYSTEM, NULL); + } + while(!feof(symbol_file)) { + /* Allocate a new struct if we need it */ + if(!*p) { + *p = malloc(sizeof(**p)); + if(!*p) { + fprintf(stderr, "Malloc failed!\n"); + error(ERR_SYSTEM, NULL); + } + (*p)->count=0; + (*p)->next=NULL; + } + + /* FIXME -- use SYMBOL_NAME_SIZE */ + /* FIXME -- use regexp. This won't work for symbols with spaces (yes they exist) */ + (*p)->symbols[(*p)->count].addr = 0xDEADBEEF; + if ( fscanf(symbol_file, "%llx %128s", + &(*p)->symbols[(*p)->count].addr, + (*p)->symbols[(*p)->count].name) == 0 ) + break; + + + if( ((*p)->symbols[(*p)->count].addr > 0) + && ((*p)->symbols[(*p)->count].addr < last_addr) ) { + fprintf(stderr, "Symbol file not properly ordered: %llx %s < %llx!\n", + (*p)->symbols[(*p)->count].addr, + (*p)->symbols[(*p)->count].name, + last_addr); + /* Could be recovered from; just free existing strings and set symbols to NULL */ + error(ERR_ASSERT, NULL); + } else + last_addr = (*p)->symbols[(*p)->count].addr; + + (*p)->count++; + + /* If this struct is full, point to the next. It will be allocated + if needed. */ + if((*p)->count == SYMBOL_ENTRIES_PER_STRUCT) { + p=&((*p)->next); + } + } +} + +/* WARNING not thread safe */ +char * find_symbol(unsigned long long addr) { + struct symbol_struct * p=G.symbols; + int i; + char * lastname="ZERO"; + unsigned long long offset=addr; + static char name[128]; + + if(!p) { + name[0]=0; + return name; + } + + while(1) { + if(!p) + goto finish; + for(i=0; i<p->count; i++) { + if(p->symbols[i].addr > addr) + goto finish; + else { + lastname=p->symbols[i].name; + offset=addr - p->symbols[i].addr; + } + } + p=p->next; + } + finish: + snprintf(name, 128, "(%s +%llx)", + lastname, offset); + return name; +} + +/* -- Eip list data -- */ +enum { + EIP_LIST_TYPE_NONE=0, + EIP_LIST_TYPE_MAX +}; + +struct eip_list_struct { + struct eip_list_struct *next; + unsigned long long eip; + struct event_cycle_summary summary; + int type; + void * extra; +}; + +struct { + void (*update)(struct eip_list_struct *, void *); + void (*new)(struct eip_list_struct *, void *); + void (*dump)(struct eip_list_struct *); +} eip_list_type[EIP_LIST_TYPE_MAX] = { + [EIP_LIST_TYPE_NONE] = { + .update=NULL, + .new=NULL, + .dump=NULL }, +}; + + +/* --- HVM class of events --- */ + +/* + * -- Algorithms -- + * + * Interrupt Wake-to-halt detection + * + * Purpose: To correlate device interrupts to vcpu runtime. + * + * Diagram: + * ... + * blocked -> runnable <- set to waking + * ... + * runnable -> running + * inj_virq A <- Note "waking" interrupt + * vmenter <- Start tsc of "wake-to-halt" interval. + Turn off 'waking'. + * ... + * inj_virq B <- Note alternate interrupt + * vmenter <- Start tsc of "interrupt-to-halt" interval + * ... + * vmexit <- End tsc of "x-to-halt" interval + * running -> blocked <- Process + * + * The "waking" interrupts we want to sub-classify into + * "wake-only" (when interrupt was the only interrupt from wake to halt) and + * "wake-all" (whether this was the only interrupt or not). + */ + +/* VMX data */ +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_INIT 3 +#define EXIT_REASON_SIPI 4 +#define EXIT_REASON_IO_SMI 5 +#define EXIT_REASON_OTHER_SMI 6 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_PENDING_VIRT_NMI 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_GETSEC 11 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_RSM 17 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_GUEST_STATE 33 +#define EXIT_REASON_MSR_LOADING 34 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_TRAP_FLAG 37 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MACHINE_CHECK 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_ACCESS_GDTR_OR_IDTR 46 +#define EXIT_REASON_ACCESS_LDTR_OR_TR 47 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 + +#define HVM_VMX_EXIT_REASON_MAX (EXIT_REASON_XSETBV+1) + +char * hvm_vmx_exit_reason_name[HVM_VMX_EXIT_REASON_MAX] = { + [0] = "NONE", + [EXIT_REASON_EXCEPTION_NMI]="EXCEPTION_NMI", + [EXIT_REASON_EXTERNAL_INTERRUPT]="EXTERNAL_INTERRUPT", + [EXIT_REASON_TRIPLE_FAULT]="TRIPLE_FAULT", + [EXIT_REASON_INIT]="INIT", + [EXIT_REASON_SIPI]="SIPI", + [EXIT_REASON_IO_SMI]="IO_SMI", + [EXIT_REASON_OTHER_SMI]="OTHER_SMI", + [EXIT_REASON_PENDING_INTERRUPT]="PENDING_INTERRUPT", + [EXIT_REASON_PENDING_VIRT_NMI]="PENDING_VIRT_NMI", + [EXIT_REASON_TASK_SWITCH]="TASK_SWITCH", + [EXIT_REASON_CPUID]="CPUID", + [EXIT_REASON_GETSEC]="GETSEC", + [EXIT_REASON_HLT]="HLT", + [EXIT_REASON_INVD]="INVD", + [EXIT_REASON_INVLPG]="INVLPG", + [EXIT_REASON_RDPMC]="RDPMC", + [EXIT_REASON_RDTSC]="RDTSC", + [EXIT_REASON_RSM]="RSM", + [EXIT_REASON_VMCALL]="VMCALL", + [EXIT_REASON_VMCLEAR]="VMCLEAR", + [EXIT_REASON_VMLAUNCH]="VMLAUNCH", + [EXIT_REASON_VMPTRLD]="VMPTRLD", + [EXIT_REASON_VMPTRST]="VMPTRST", + [EXIT_REASON_VMREAD]="VMREAD", + [EXIT_REASON_VMRESUME]="VMRESUME", + [EXIT_REASON_VMWRITE]="VMWRITE", + [EXIT_REASON_VMOFF]="VMOFF", + [EXIT_REASON_VMON]="VMON", + [EXIT_REASON_CR_ACCESS]="CR_ACCESS", + [EXIT_REASON_DR_ACCESS]="DR_ACCESS", + [EXIT_REASON_IO_INSTRUCTION]="IO_INSTRUCTION", + [EXIT_REASON_MSR_READ]="MSR_READ", + [EXIT_REASON_MSR_WRITE]="MSR_WRITE", + [EXIT_REASON_INVALID_GUEST_STATE]="INVALID_GUEST_STATE", + [EXIT_REASON_MSR_LOADING]="MSR_LOADING", + [EXIT_REASON_MWAIT_INSTRUCTION]="MWAIT_INSTRUCTION", + [EXIT_REASON_MONITOR_TRAP_FLAG]="MONITOR_TRAP_FLAG", + [EXIT_REASON_MONITOR_INSTRUCTION]="MONITOR_INSTRUCTION", + [EXIT_REASON_PAUSE_INSTRUCTION]="PAUSE_INSTRUCTION", + [EXIT_REASON_MACHINE_CHECK]="MACHINE_CHECK", + [EXIT_REASON_TPR_BELOW_THRESHOLD]="TPR_BELOW_THRESHOLD", + [EXIT_REASON_APIC_ACCESS]="APIC_ACCESS", + [EXIT_REASON_EPT_VIOLATION]="EPT_VIOLATION", + [EXIT_REASON_EPT_MISCONFIG]="EPT_MISCONFIG", + [EXIT_REASON_INVEPT]="INVEPT", + [EXIT_REASON_RDTSCP]="RDTSCP", + [EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED]="VMX_PREEMPTION_TIMER_EXPIRED", + [EXIT_REASON_INVVPID]="INVVPID", + [EXIT_REASON_WBINVD]="WBINVD", + [EXIT_REASON_XSETBV]="XSETBV", +}; + +/* SVM data */ +enum VMEXIT_EXITCODE +{ + /* control register read exitcodes */ + VMEXIT_CR0_READ = 0, + VMEXIT_CR1_READ = 1, + VMEXIT_CR2_READ = 2, + VMEXIT_CR3_READ = 3, + VMEXIT_CR4_READ = 4, + VMEXIT_CR5_READ = 5, + VMEXIT_CR6_READ = 6, + VMEXIT_CR7_READ = 7, + VMEXIT_CR8_READ = 8, + VMEXIT_CR9_READ = 9, + VMEXIT_CR10_READ = 10, + VMEXIT_CR11_READ = 11, + VMEXIT_CR12_READ = 12, + VMEXIT_CR13_READ = 13, + VMEXIT_CR14_READ = 14, + VMEXIT_CR15_READ = 15, + + /* control register write exitcodes */ + VMEXIT_CR0_WRITE = 16, + VMEXIT_CR1_WRITE = 17, + VMEXIT_CR2_WRITE = 18, + VMEXIT_CR3_WRITE = 19, + VMEXIT_CR4_WRITE = 20, + VMEXIT_CR5_WRITE = 21, + VMEXIT_CR6_WRITE = 22, + VMEXIT_CR7_WRITE = 23, + VMEXIT_CR8_WRITE = 24, + VMEXIT_CR9_WRITE = 25, + VMEXIT_CR10_WRITE = 26, + VMEXIT_CR11_WRITE = 27, + VMEXIT_CR12_WRITE = 28, + VMEXIT_CR13_WRITE = 29, + VMEXIT_CR14_WRITE = 30, + VMEXIT_CR15_WRITE = 31, + + /* debug register read exitcodes */ + VMEXIT_DR0_READ = 32, + VMEXIT_DR1_READ = 33, + VMEXIT_DR2_READ = 34, + VMEXIT_DR3_READ = 35, + VMEXIT_DR4_READ = 36, + VMEXIT_DR5_READ = 37, + VMEXIT_DR6_READ = 38, + VMEXIT_DR7_READ = 39, + VMEXIT_DR8_READ = 40, + VMEXIT_DR9_READ = 41, + VMEXIT_DR10_READ = 42, + VMEXIT_DR11_READ = 43, + VMEXIT_DR12_READ = 44, + VMEXIT_DR13_READ = 45, + VMEXIT_DR14_READ = 46, + VMEXIT_DR15_READ = 47, + + /* debug register write exitcodes */ + VMEXIT_DR0_WRITE = 48, + VMEXIT_DR1_WRITE = 49, + VMEXIT_DR2_WRITE = 50, + VMEXIT_DR3_WRITE = 51, + VMEXIT_DR4_WRITE = 52, + VMEXIT_DR5_WRITE = 53, + VMEXIT_DR6_WRITE = 54, + VMEXIT_DR7_WRITE = 55, + VMEXIT_DR8_WRITE = 56, + VMEXIT_DR9_WRITE = 57, + VMEXIT_DR10_WRITE = 58, + VMEXIT_DR11_WRITE = 59, + VMEXIT_DR12_WRITE = 60, + VMEXIT_DR13_WRITE = 61, + VMEXIT_DR14_WRITE = 62, + VMEXIT_DR15_WRITE = 63, + + /* processor exception exitcodes (VMEXIT_EXCP[0-31]) */ + VMEXIT_EXCEPTION_DE = 64, /* divide-by-zero-error */ + VMEXIT_EXCEPTION_DB = 65, /* debug */ + VMEXIT_EXCEPTION_NMI = 66, /* non-maskable-interrupt */ + VMEXIT_EXCEPTION_BP = 67, /* breakpoint */ + VMEXIT_EXCEPTION_OF = 68, /* overflow */ + VMEXIT_EXCEPTION_BR = 69, /* bound-range */ + VMEXIT_EXCEPTION_UD = 70, /* invalid-opcode*/ + VMEXIT_EXCEPTION_NM = 71, /* device-not-available */ + VMEXIT_EXCEPTION_DF = 72, /* double-fault */ + VMEXIT_EXCEPTION_09 = 73, /* unsupported (reserved) */ + VMEXIT_EXCEPTION_TS = 74, /* invalid-tss */ + VMEXIT_EXCEPTION_NP = 75, /* segment-not-present */ + VMEXIT_EXCEPTION_SS = 76, /* stack */ + VMEXIT_EXCEPTION_GP = 77, /* general-protection */ + VMEXIT_EXCEPTION_PF = 78, /* page-fault */ + VMEXIT_EXCEPTION_15 = 79, /* reserved */ + VMEXIT_EXCEPTION_MF = 80, /* x87 floating-point exception-pending */ + VMEXIT_EXCEPTION_AC = 81, /* alignment-check */ + VMEXIT_EXCEPTION_MC = 82, /* machine-check */ + VMEXIT_EXCEPTION_XF = 83, /* simd floating-point */ + + /* exceptions 20-31 (exitcodes 84-95) are reserved */ + + /* ...and the rest of the #VMEXITs */ + VMEXIT_INTR = 96, + VMEXIT_NMI = 97, + VMEXIT_SMI = 98, + VMEXIT_INIT = 99, + VMEXIT_VINTR = 100, + VMEXIT_CR0_SEL_WRITE = 101, + VMEXIT_IDTR_READ = 102, + VMEXIT_GDTR_READ = 103, + VMEXIT_LDTR_READ = 104, + VMEXIT_TR_READ = 105, + VMEXIT_IDTR_WRITE = 106, + VMEXIT_GDTR_WRITE = 107, + VMEXIT_LDTR_WRITE = 108, + VMEXIT_TR_WRITE = 109, + VMEXIT_RDTSC = 110, + VMEXIT_RDPMC = 111, + VMEXIT_PUSHF = 112, + VMEXIT_POPF = 113, + VMEXIT_CPUID = 114, + VMEXIT_RSM = 115, + VMEXIT_IRET = 116, + VMEXIT_SWINT = 117, + VMEXIT_INVD = 118, + VMEXIT_PAUSE = 119, + VMEXIT_HLT = 120, + VMEXIT_INVLPG = 121, + VMEXIT_INVLPGA = 122, + VMEXIT_IOIO = 123, + VMEXIT_MSR = 124, + VMEXIT_TASK_SWITCH = 125, + VMEXIT_FERR_FREEZE = 126, + VMEXIT_SHUTDOWN = 127, + VMEXIT_VMRUN = 128, + VMEXIT_VMMCALL = 129, + VMEXIT_VMLOAD = 130, + VMEXIT_VMSAVE = 131, + VMEXIT_STGI = 132, + VMEXIT_CLGI = 133, + VMEXIT_SKINIT = 134, + VMEXIT_RDTSCP = 135, + VMEXIT_ICEBP = 136, + VMEXIT_WBINVD = 137, + VMEXIT_MONITOR = 138, + VMEXIT_MWAIT = 139, + VMEXIT_MWAIT_CONDITIONAL= 140, + VMEXIT_NPF = 1024, /* nested paging fault */ + VMEXIT_INVALID = -1 +}; + +#define HVM_SVM_EXIT_REASON_MAX 1025 +char * hvm_svm_exit_reason_name[HVM_SVM_EXIT_REASON_MAX] = { + /* 0-15 */ + "VMEXIT_CR0_READ", + "VMEXIT_CR1_READ", + "VMEXIT_CR2_READ", + "VMEXIT_CR3_READ", + "VMEXIT_CR4_READ", + "VMEXIT_CR5_READ", + "VMEXIT_CR6_READ", + "VMEXIT_CR7_READ", + "VMEXIT_CR8_READ", + "VMEXIT_CR9_READ", + "VMEXIT_CR10_READ", + "VMEXIT_CR11_READ", + "VMEXIT_CR12_READ", + "VMEXIT_CR13_READ", + "VMEXIT_CR14_READ", + "VMEXIT_CR15_READ", + /* 16-31 */ + "VMEXIT_CR0_WRITE", + "VMEXIT_CR1_WRITE", + "VMEXIT_CR2_WRITE", + "VMEXIT_CR3_WRITE", + "VMEXIT_CR4_WRITE", + "VMEXIT_CR5_WRITE", + "VMEXIT_CR6_WRITE", + "VMEXIT_CR7_WRITE", + "VMEXIT_CR8_WRITE", + "VMEXIT_CR9_WRITE", + "VMEXIT_CR10_WRITE", + "VMEXIT_CR11_WRITE", + "VMEXIT_CR12_WRITE", + "VMEXIT_CR13_WRITE", + "VMEXIT_CR14_WRITE", + "VMEXIT_CR15_WRITE", + /* 32-47 */ + "VMEXIT_DR0_READ", + "VMEXIT_DR1_READ", + "VMEXIT_DR2_READ", + "VMEXIT_DR3_READ", + "VMEXIT_DR4_READ", + "VMEXIT_DR5_READ", + "VMEXIT_DR6_READ", + "VMEXIT_DR7_READ", + "VMEXIT_DR8_READ", + "VMEXIT_DR9_READ", + "VMEXIT_DR10_READ", + "VMEXIT_DR11_READ", + "VMEXIT_DR12_READ", + "VMEXIT_DR13_READ", + "VMEXIT_DR14_READ", + "VMEXIT_DR15_READ", + /* 48-63 */ + "VMEXIT_DR0_WRITE", + "VMEXIT_DR1_WRITE", + "VMEXIT_DR2_WRITE", + "VMEXIT_DR3_WRITE", + "VMEXIT_DR4_WRITE", + "VMEXIT_DR5_WRITE", + "VMEXIT_DR6_WRITE", + "VMEXIT_DR7_WRITE", + "VMEXIT_DR8_WRITE", + "VMEXIT_DR9_WRITE", + "VMEXIT_DR10_WRITE", + "VMEXIT_DR11_WRITE", + "VMEXIT_DR12_WRITE", + "VMEXIT_DR13_WRITE", + "VMEXIT_DR14_WRITE", + "VMEXIT_DR15_WRITE", + /* 64-83 */ + "VMEXIT_EXCEPTION_DE", + "VMEXIT_EXCEPTION_DB", + "VMEXIT_EXCEPTION_NMI", + "VMEXIT_EXCEPTION_BP", + "VMEXIT_EXCEPTION_OF", + "VMEXIT_EXCEPTION_BR", + "VMEXIT_EXCEPTION_UD", + "VMEXIT_EXCEPTION_NM", + "VMEXIT_EXCEPTION_DF", + "VMEXIT_EXCEPTION_09", + "VMEXIT_EXCEPTION_TS", + "VMEXIT_EXCEPTION_NP", + "VMEXIT_EXCEPTION_SS", + "VMEXIT_EXCEPTION_GP", + "VMEXIT_EXCEPTION_PF", + "VMEXIT_EXCEPTION_15", + "VMEXIT_EXCEPTION_MF", + "VMEXIT_EXCEPTION_AC", + "VMEXIT_EXCEPTION_MC", + "VMEXIT_EXCEPTION_XF", + /* 84-95 */ + "VMEXIT_EXCEPTION_20", + "VMEXIT_EXCEPTION_21", + "VMEXIT_EXCEPTION_22", + "VMEXIT_EXCEPTION_23", + "VMEXIT_EXCEPTION_24", + "VMEXIT_EXCEPTION_25", + "VMEXIT_EXCEPTION_26", + "VMEXIT_EXCEPTION_27", + "VMEXIT_EXCEPTION_28", + "VMEXIT_EXCEPTION_29", + "VMEXIT_EXCEPTION_30", + "VMEXIT_EXCEPTION_31", + /* 96-99 */ + "VMEXIT_INTR", + "VMEXIT_NMI", + "VMEXIT_SMI", + "VMEXIT_INIT", + /* 100-109 */ + "VMEXIT_VINTR", + "VMEXIT_CR0_SEL_WRITE", + "VMEXIT_IDTR_READ", + "VMEXIT_GDTR_READ", + "VMEXIT_LDTR_READ", + "VMEXIT_TR_READ", + "VMEXIT_IDTR_WRITE", + "VMEXIT_GDTR_WRITE", + "VMEXIT_LDTR_WRITE", + "VMEXIT_TR_WRITE", + /* 110-119 */ + "VMEXIT_RDTSC", + "VMEXIT_RDPMC", + "VMEXIT_PUSHF", + "VMEXIT_POPF", + "VMEXIT_CPUID", + "VMEXIT_RSM", + "VMEXIT_IRET", + "VMEXIT_SWINT", + "VMEXIT_INVD", + "VMEXIT_PAUSE", + /* 120-129 */ + "VMEXIT_HLT", + "VMEXIT_INVLPG", + "VMEXIT_INVLPGA", + "VMEXIT_IOIO", + "VMEXIT_MSR", + "VMEXIT_TASK_SWITCH", + "VMEXIT_FERR_FREEZE", + "VMEXIT_SHUTDOWN", + "VMEXIT_VMRUN", + "VMEXIT_VMMCALL", + /* 130-139 */ + "VMEXIT_VMLOAD", + "VMEXIT_VMSAVE", + "VMEXIT_STGI", + "VMEXIT_CLGI", + "VMEXIT_SKINIT", + "VMEXIT_RDTSCP", + "VMEXIT_ICEBP", + "VMEXIT_WBINVD", + "VMEXIT_MONITOR", + "VMEXIT_MWAIT", + /* 140 */ + "VMEXIT_MWAIT_CONDITIONAL", + [VMEXIT_NPF] = "VMEXIT_NPF", /* nested paging fault */ +}; + + +#if ( HVM_VMX_EXIT_REASON_MAX > HVM_SVM_EXIT_REASON_MAX ) +# define HVM_EXIT_REASON_MAX HVM_VMX_EXIT_REASON_MAX +# error - Strange! +#else +# define HVM_EXIT_REASON_MAX HVM_SVM_EXIT_REASON_MAX +#endif + +/* General hvm information */ +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define INVALIDATE_TLB_VECTOR 0xfd +#define EVENT_CHECK_VECTOR 0xfc +#define CALL_FUNCTION_VECTOR 0xfb +#define THERMAL_APIC_VECTOR 0xfa +#define LOCAL_TIMER_VECTOR 0xf9 + +#define EXTERNAL_INTERRUPT_MAX 256 + +/* Stringify numbers */ +char * hvm_extint_vector_name[EXTERNAL_INTERRUPT_MAX] = { + [SPURIOUS_APIC_VECTOR] = "SPURIOS_APIC", + [ERROR_APIC_VECTOR] = "ERROR_APIC", + [INVALIDATE_TLB_VECTOR]= "INVALIDATE_TLB", + [EVENT_CHECK_VECTOR]= "EVENT_CHECK", + [CALL_FUNCTION_VECTOR]= "CALL_FUNCTION", + [THERMAL_APIC_VECTOR]= "THERMAL_APIC", + [LOCAL_TIMER_VECTOR] = "LOCAL_TIMER", +}; + +#define HVM_TRAP_MAX 20 + +char * hvm_trap_name[HVM_TRAP_MAX] = { + [0] = "Divide", + [1] = "RESERVED", + [2] = "NMI", + [3] = "Breakpoint", + [4] = "Overflow", + [5] = "BOUND", + [6] = "Invalid Op", + [7] = "Coprocessor not present", + [8] = "Double Fault", + [9] = "Coprocessor segment overrun", + [10] = "TSS", + [11] = "Segment not present", + [12] = "Stack-segment fault", + [13] = "GP", + [14] = "Page fault", + [15] = "RESERVED", + [16] = "FPU", + [17] = "Alignment check", + [18] = "Machine check", + [19] = "SIMD", +}; + + +enum { + HVM_EVENT_HANDLER_NONE = 0, + HVM_EVENT_HANDLER_PF_XEN = 1, + HVM_EVENT_HANDLER_PF_INJECT, + HVM_EVENT_HANDLER_INJ_EXC, + HVM_EVENT_HANDLER_INJ_VIRQ, + HVM_EVENT_HANDLER_REINJ_VIRQ, + HVM_EVENT_HANDLER_IO_READ, + HVM_EVENT_HANDLER_IO_WRITE, + HVM_EVENT_HANDLER_CR_READ, /* 8 */ + HVM_EVENT_HANDLER_CR_WRITE, + HVM_EVENT_HANDLER_DR_READ, + HVM_EVENT_HANDLER_DR_WRITE, + HVM_EVENT_HANDLER_MSR_READ, + HVM_EVENT_HANDLER_MSR_WRITE, + HVM_EVENT_HANDLER_CPUID, + HVM_EVENT_HANDLER_INTR, + HVM_EVENT_HANDLER_NMI, /* 16 */ + HVM_EVENT_HANDLER_SMI, + HVM_EVENT_HANDLER_VMCALL, + HVM_EVENT_HANDLER_HLT, + HVM_EVENT_HANDLER_INVLPG, + HVM_EVENT_HANDLER_MCE, + HVM_EVENT_HANDLER_IO_ASSIST, + HVM_EVENT_HANDLER_MMIO_ASSIST, + HVM_EVENT_HANDLER_CLTS, + HVM_EVENT_HANDLER_LMSW, + HVM_EVENT_RDTSC, + HVM_EVENT_INTR_WINDOW=0x20, /* Oops... skipped 0x1b-1f */ + HVM_EVENT_NPF, + HVM_EVENT_REALMODE_EMULATE, + HVM_EVENT_TRAP, + HVM_EVENT_TRAP_DEBUG, + HVM_EVENT_VLAPIC, + HVM_EVENT_HANDLER_MAX +}; +char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = { + "(no handler)", + "pf_xen", + "pf_inject", + "inj_exc", + "inj_virq", + "reinj_virq", + "io_read", + "io_write", + "cr_read", /* 8 */ + "cr_write", + "dr_read", + "dr_write", + "msr_read", + "msr_write", + "cpuid", + "intr", + "nmi", /* 16 */ + "smi", + "vmcall", + "hlt", + "invlpg", + "mce", + "io_assist", + "mmio_assist", + "clts", /* 24 */ + "lmsw", + "rdtsc", + [HVM_EVENT_INTR_WINDOW]="intr_window", + "npf", + "realmode_emulate", + "trap", + "trap_debug", + "vlapic" +}; + +enum { + HVM_VOL_VMENTRY, + HVM_VOL_VMEXIT, + HVM_VOL_HANDLER, + HVM_VOL_MAX +}; + +enum { + GUEST_INTERRUPT_CASE_NONE, + /* This interrupt woke, no other interrupts until halt */ + GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE, + /* This interrupt woke, maybe another interrupt before halt */ + GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY, + /* Time from interrupt (running) to halt */ + GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT, + GUEST_INTERRUPT_CASE_MAX, +}; + +char *guest_interrupt_case_name[] = { + [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE]="wake to halt alone", + /* This interrupt woke, maybe another interrupt before halt */ + [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY] ="wake to halt any ", + /* Time from interrupt (running) to halt */ + [GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT] ="intr to halt ", +}; + +char *hvm_vol_name[HVM_VOL_MAX] = { + [HVM_VOL_VMENTRY]="vmentry", + [HVM_VOL_VMEXIT] ="vmexit", + [HVM_VOL_HANDLER]="handler", +}; + +enum { + HYPERCALL_set_trap_table = 0, + HYPERCALL_mmu_update, + HYPERCALL_set_gdt, + HYPERCALL_stack_switch, + HYPERCALL_set_callbacks, + HYPERCALL_fpu_taskswitch, + HYPERCALL_sched_op_compat, + HYPERCALL_platform_op, + HYPERCALL_set_debugreg, + HYPERCALL_get_debugreg, + HYPERCALL_update_descriptor, + HYPERCALL_memory_op=12, + HYPERCALL_multicall, + HYPERCALL_update_va_mapping, + HYPERCALL_set_timer_op, + HYPERCALL_event_channel_op_compat, + HYPERCALL_xen_version, + HYPERCALL_console_io, + HYPERCALL_physdev_op_compat, + HYPERCALL_grant_table_op, + HYPERCALL_vm_assist, + HYPERCALL_update_va_mapping_otherdomain, + HYPERCALL_iret, + HYPERCALL_vcpu_op, + HYPERCALL_set_segment_base, + HYPERCALL_mmuext_op, + HYPERCALL_acm_op, + HYPERCALL_nmi_op, + HYPERCALL_sched_op, + HYPERCALL_callback_op, + HYPERCALL_xenoprof_op, + HYPERCALL_event_channel_op, + HYPERCALL_physdev_op, + HYPERCALL_hvm_op, + HYPERCALL_sysctl, + HYPERCALL_domctl, + HYPERCALL_kexec_op, + HYPERCALL_MAX +}; + +char *hypercall_name[HYPERCALL_MAX] = { + [HYPERCALL_set_trap_table]="set_trap_table", + [HYPERCALL_mmu_update]="mmu_update", + [HYPERCALL_set_gdt]="set_gdt", + [HYPERCALL_stack_switch]="stack_switch", + [HYPERCALL_set_callbacks]="set_callbacks", + [HYPERCALL_fpu_taskswitch]="fpu_taskswitch", + [HYPERCALL_sched_op_compat]="sched_op(compat)", + [HYPERCALL_platform_op]="platform_op", + [HYPERCALL_set_debugreg]="set_debugreg", + [HYPERCALL_get_debugreg]="get_debugreg", + [HYPERCALL_update_descriptor]="update_descriptor", + [HYPERCALL_memory_op]="memory_op", + [HYPERCALL_multicall]="multicall", + [HYPERCALL_update_va_mapping]="update_va_mapping", + [HYPERCALL_set_timer_op]="set_timer_op", + [HYPERCALL_event_channel_op_compat]="evtchn_op(compat)", + [HYPERCALL_xen_version]="xen_version", + [HYPERCALL_console_io]="console_io", + [HYPERCALL_physdev_op_compat]="physdev_op(compat)", + [HYPERCALL_grant_table_op]="grant_table_op", + [HYPERCALL_vm_assist]="vm_assist", + [HYPERCALL_update_va_mapping_otherdomain]="update_va_mapping_otherdomain", + [HYPERCALL_iret]="iret", + [HYPERCALL_vcpu_op]="vcpu_op", + [HYPERCALL_set_segment_base]="set_segment_base", + [HYPERCALL_mmuext_op]="mmuext_op", + [HYPERCALL_acm_op]="acm_op", + [HYPERCALL_nmi_op]="nmi_op", + [HYPERCALL_sched_op]="sched_op", + [HYPERCALL_callback_op]="callback_op", + [HYPERCALL_xenoprof_op]="xenoprof_op", + [HYPERCALL_event_channel_op]="evtchn_op", + [HYPERCALL_physdev_op]="physdev_op", + [HYPERCALL_hvm_op]="hvm_op", + [HYPERCALL_sysctl]="sysctl", + [HYPERCALL_domctl]="domctl", + [HYPERCALL_kexec_op]="kexec_op" +}; + +enum { + PF_XEN_EMUL_LVL_0, + PF_XEN_EMUL_LVL_1, + PF_XEN_EMUL_LVL_2, + PF_XEN_EMUL_LVL_3, + PF_XEN_EMUL_LVL_4, + PF_XEN_EMUL_EARLY_UNSHADOW, + PF_XEN_EMUL_SET_CHANGED, + PF_XEN_EMUL_SET_UNCHANGED, + PF_XEN_EMUL_SET_FLUSH, + PF_XEN_EMUL_SET_ERROR, + PF_XEN_EMUL_PROMOTE, + PF_XEN_EMUL_DEMOTE, + PF_XEN_EMUL_PREALLOC_UNPIN, + PF_XEN_EMUL_PREALLOC_UNHOOK, + PF_XEN_EMUL_MAX, +}; + +char * pf_xen_emul_name[PF_XEN_EMUL_MAX] = { + [PF_XEN_EMUL_LVL_0]="non-linmap", + [PF_XEN_EMUL_LVL_1]="linmap l1", + [PF_XEN_EMUL_LVL_2]="linmap l2", + [PF_XEN_EMUL_LVL_3]="linmap l3", + [PF_XEN_EMUL_LVL_4]="linmap l4", + [PF_XEN_EMUL_EARLY_UNSHADOW]="early unshadow", + [PF_XEN_EMUL_SET_UNCHANGED]="set unchanged", + [PF_XEN_EMUL_SET_CHANGED]="set changed", + [PF_XEN_EMUL_SET_FLUSH]="set changed", + [PF_XEN_EMUL_SET_ERROR]="set changed", + [PF_XEN_EMUL_PROMOTE]="promote", + [PF_XEN_EMUL_DEMOTE]="demote", + [PF_XEN_EMUL_PREALLOC_UNPIN]="unpin", + [PF_XEN_EMUL_PREALLOC_UNHOOK]="unhook", +}; + +/* Rio only */ +enum { + PF_XEN_NON_EMUL_VA_USER, + PF_XEN_NON_EMUL_VA_KERNEL, + PF_XEN_NON_EMUL_EIP_USER, + PF_XEN_NON_EMUL_EIP_KERNEL, + PF_XEN_NON_EMUL_MAX, +}; + +char * pf_xen_non_emul_name[PF_XEN_NON_EMUL_MAX] = { + [PF_XEN_NON_EMUL_VA_USER]="va user", + [PF_XEN_NON_EMUL_VA_KERNEL]="va kernel", + [PF_XEN_NON_EMUL_EIP_USER]="eip user", + [PF_XEN_NON_EMUL_EIP_KERNEL]="eip kernel", +}; + +enum { + PF_XEN_FIXUP_PREALLOC_UNPIN, + PF_XEN_FIXUP_PREALLOC_UNHOOK, + PF_XEN_FIXUP_UNSYNC, + PF_XEN_FIXUP_OOS_ADD, + PF_XEN_FIXUP_OOS_EVICT, + PF_XEN_FIXUP_PROMOTE, + PF_XEN_FIXUP_UPDATE_ONLY, + PF_XEN_FIXUP_WRMAP, + PF_XEN_FIXUP_BRUTE_FORCE, + PF_XEN_FIXUP_MAX, +}; + +char * pf_xen_fixup_name[PF_XEN_FIXUP_MAX] = { + [PF_XEN_FIXUP_PREALLOC_UNPIN] = "unpin", + [PF_XEN_FIXUP_PREALLOC_UNHOOK] = "unhook", + [PF_XEN_FIXUP_UNSYNC] = "unsync", + [PF_XEN_FIXUP_OOS_ADD] = "oos-add", + [PF_XEN_FIXUP_OOS_EVICT] = "oos-evict", + [PF_XEN_FIXUP_PROMOTE] = "promote", + [PF_XEN_FIXUP_UPDATE_ONLY] = "update", + [PF_XEN_FIXUP_WRMAP] = "wrmap", + [PF_XEN_FIXUP_BRUTE_FORCE] = "wrmap-bf", +}; + +enum { + PF_XEN_NOT_SHADOW = 1, + PF_XEN_FAST_PROPAGATE, + PF_XEN_FAST_MMIO, + PF_XEN_FALSE_FAST_PATH, + PF_XEN_MMIO, + PF_XEN_FIXUP, + PF_XEN_DOMF_DYING, + PF_XEN_EMULATE, + PF_XEN_EMULATE_UNSHADOW_USER, + PF_XEN_EMULATE_UNSHADOW_EVTINJ, + PF_XEN_EMULATE_UNSHADOW_UNHANDLED, + PF_XEN_LAST_FAULT=PF_XEN_EMULATE_UNSHADOW_UNHANDLED, + PF_XEN_NON_EMULATE, + PF_XEN_NO_HANDLER, + PF_XEN_MAX, +}; + +#define SHADOW_WRMAP_BF 12 +#define SHADOW_PREALLOC_UNPIN 13 +#define SHADOW_RESYNC_FULL 14 +#define SHADOW_RESYNC_ONLY 15 + +char * pf_xen_name[PF_XEN_MAX] = { + [PF_XEN_NOT_SHADOW]="propagate", + [PF_XEN_FAST_PROPAGATE]="fast propagate", + [PF_XEN_FAST_MMIO]="fast mmio", + [PF_XEN_FALSE_FAST_PATH]="false fast path", + [PF_XEN_MMIO]="mmio", + [PF_XEN_FIXUP]="fixup", + [PF_XEN_DOMF_DYING]="dom dying", + [PF_XEN_EMULATE]="emulate", + [PF_XEN_EMULATE_UNSHADOW_USER]="unshadow:user-mode", + [PF_XEN_EMULATE_UNSHADOW_EVTINJ]="unshadow:evt inj", + [PF_XEN_EMULATE_UNSHADOW_UNHANDLED]="unshadow:unhandled instr", + [PF_XEN_NON_EMULATE]="fixup|mmio", + [PF_XEN_NO_HANDLER]="(no handler)", +}; + +#define CORR_VA_INVALID (0ULL-1) + +enum { + NONPF_MMIO_APIC, + NONPF_MMIO_NPF, + NONPF_MMIO_UNKNOWN, + NONPF_MMIO_MAX +}; + +struct mmio_info { + unsigned long long gpa; + unsigned long long va; /* Filled only by shadow */ + unsigned data; + unsigned data_valid:1, is_write:1; +}; + +struct pf_xen_extra { + unsigned long long va; + union { + unsigned flags; + struct { + unsigned flag_set_ad:1, + flag_set_a:1, + flag_shadow_l1_get_ref:1, + flag_shadow_l1_put_ref:1, + flag_l2_propagate:1, + flag_set_changed:1, + flag_set_flush:1, + flag_set_error:1, + flag_demote:1, + flag_promote:1, + flag_wrmap:1, + flag_wrmap_guess_found:1, + flag_wrmap_brute_force:1, + flag_early_unshadow:1, + flag_emulation_2nd_pt_written:1, + flag_emulation_last_failed:1, + flag_emulate_full_pt:1, + flag_prealloc_unhook:1, + flag_unsync:1, + flag_oos_fixup_add:1, + flag_oos_fixup_evict:1; + }; + }; /* Miami + ; fixup & emulate */ + unsigned int error_code; /* Rio only */ + + /* Calculated */ + int pf_case; /* Rio */ + + /* MMIO only */ + unsigned long long gpa; + unsigned int data; + + /* Emulate only */ + unsigned long long gl1e; /* Miami + */ + unsigned long long wval; /* Miami */ + unsigned long long corresponding_va; + unsigned int pt_index[5], pt_is_lo; + int pt_level; + + /* Other */ + unsigned long long gfn; + + /* Flags */ + unsigned corr_valid:1, + corr_is_kernel:1, + va_is_kernel:1; +}; + +struct pcpu_info; + +#define GUEST_INTERRUPT_MAX 350 +#define FAKE_VECTOR 349 +#define CR_MAX 9 +#define RESYNCS_MAX 17 +#define PF_XEN_FIXUP_UNSYNC_RESYNC_MAX 2 + +struct hvm_data; + +struct hvm_summary_handler_node { + void (*handler)(struct hvm_data *, void* data); + void *data; + struct hvm_summary_handler_node *next; +}; + +struct hvm_data { + /* Summary information */ + int init; + int vmexit_valid; + int summary_info; + struct vcpu_data *v; /* up-pointer */ + + /* SVM / VMX compatibility. FIXME - should be global */ + char ** exit_reason_name; + int exit_reason_max; + struct hvm_summary_handler_node *exit_reason_summary_handler_list[HVM_EXIT_REASON_MAX]; + + /* Information about particular exit reasons */ + struct { + struct event_cycle_summary exit_reason[HVM_EXIT_REASON_MAX]; + int extint[EXTERNAL_INTERRUPT_MAX+1]; + int *extint_histogram; + struct event_cycle_summary trap[HVM_TRAP_MAX]; + struct event_cycle_summary pf_xen[PF_XEN_MAX]; + struct event_cycle_summary pf_xen_emul[PF_XEN_EMUL_MAX]; + struct event_cycle_summary pf_xen_emul_early_unshadow[5]; + struct event_cycle_summary pf_xen_non_emul[PF_XEN_NON_EMUL_MAX]; + struct event_cycle_summary pf_xen_fixup[PF_XEN_FIXUP_MAX]; + struct event_cycle_summary pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX+1]; + struct event_cycle_summary cr_write[CR_MAX]; + struct event_cycle_summary cr3_write_resyncs[RESYNCS_MAX+1]; + struct event_cycle_summary vmcall[HYPERCALL_MAX+1]; + struct event_cycle_summary generic[HVM_EVENT_HANDLER_MAX]; + struct event_cycle_summary mmio[NONPF_MMIO_MAX]; + struct hvm_gi_struct { + int count; + struct cycle_summary runtime[GUEST_INTERRUPT_CASE_MAX]; + /* OK, not summary info, but still... */ + int is_wake; + tsc_t start_tsc; + } guest_interrupt[GUEST_INTERRUPT_MAX + 1]; + /* IPI Latency */ + struct event_cycle_summary ipi_latency; + int ipi_count[256]; + struct { + struct io_address *mmio, *pio; + } io; + } summary; + + /* In-flight accumulation information */ + struct { + union { + struct { + unsigned port:31, + is_write:1; + unsigned int val; + } io; + struct pf_xen_extra pf_xen; + struct { + unsigned cr; + unsigned long long val; + int repromote; + } cr_write; + struct { + unsigned addr; + unsigned long long val; + } msr; + struct { + unsigned int event; + uint32_t d[4]; + } generic; + struct { + unsigned eax; + } vmcall; + struct { + unsigned vec; + } intr; + }; + /* MMIO gets its separate area, since many exits may use it */ + struct mmio_info mmio; + }inflight; + int resyncs; + void (*post_process)(struct hvm_data *); + tsc_t exit_tsc, arc_cycles, entry_tsc; + unsigned long long rip; + unsigned exit_reason, event_handler; + int short_summary_done:1, prealloc_unpin:1, wrmap_bf:1; + + /* Immediate processing */ + void *d; + + /* Wake-to-halt detection. See comment above. */ + struct { + unsigned waking:1; + /* Wake vector: keep track of time from vmentry until: + next halt, or next interrupt */ + int vector, interrupts, interrupts_wanting_tsc; + } w2h; + + /* Historical info */ + tsc_t last_rdtsc; +}; + +enum { + HVM_SHORT_SUMMARY_EMULATE, + HVM_SHORT_SUMMARY_UNSYNC, + HVM_SHORT_SUMMARY_FIXUP, + HVM_SHORT_SUMMARY_MMIO, + HVM_SHORT_SUMMARY_PROPAGATE, + HVM_SHORT_SUMMARY_CR3, + HVM_SHORT_SUMMARY_VMCALL, + HVM_SHORT_SUMMARY_INTERRUPT, + HVM_SHORT_SUMMARY_HLT, + HVM_SHORT_SUMMARY_OTHER, + HVM_SHORT_SUMMARY_MAX, +}; + +char *hvm_short_summary_name[HVM_SHORT_SUMMARY_MAX] = { + [HVM_SHORT_SUMMARY_EMULATE] ="emulate", + [HVM_SHORT_SUMMARY_UNSYNC] ="unsync", + [HVM_SHORT_SUMMARY_FIXUP] ="fixup", + [HVM_SHORT_SUMMARY_MMIO] ="mmio", + [HVM_SHORT_SUMMARY_PROPAGATE]="propagate", + [HVM_SHORT_SUMMARY_CR3] ="cr3", + [HVM_SHORT_SUMMARY_VMCALL] ="vmcall", + [HVM_SHORT_SUMMARY_INTERRUPT]="intr", + [HVM_SHORT_SUMMARY_HLT] ="hlt", + [HVM_SHORT_SUMMARY_OTHER] ="other", +}; + +struct hvm_short_summary_struct { + struct cycle_summary s[HVM_SHORT_SUMMARY_MAX]; +}; + +void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) { + int i; + + if(h->init) + return; + + h->v = v; + + h->init = 1; + + if(opt.svm_mode) { + h->exit_reason_max = HVM_SVM_EXIT_REASON_MAX; + h->exit_reason_name = hvm_svm_exit_reason_name; + } else { + h->exit_reason_max = HVM_VMX_EXIT_REASON_MAX; + h->exit_reason_name = hvm_vmx_exit_reason_name; + } + + if(opt.histogram_interrupt_eip) { + int count = ((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment); + size_t size = count * sizeof(int); + h->summary.extint_histogram = malloc(size); + if(h->summary.extint_histogram) + bzero(h->summary.extint_histogram, size); + else { + fprintf(stderr, "FATAL: Could not allocate %zd bytes for interrupt histogram!\n", + size); + error(ERR_SYSTEM, NULL); + } + + } + for(i=0; i<GUEST_INTERRUPT_MAX+1; i++) + h->summary.guest_interrupt[i].count=0; +} + +/* PV data */ +enum { + PV_HYPERCALL=1, + PV_TRAP=3, + PV_PAGE_FAULT, + PV_FORCED_INVALID_OP, + PV_EMULATE_PRIVOP, + PV_EMULATE_4GB, + PV_MATH_STATE_RESTORE, + PV_PAGING_FIXUP, + PV_GDT_LDT_MAPPING_FAULT, + PV_PTWR_EMULATION, + PV_PTWR_EMULATION_PAE, + PV_HYPERCALL_V2 = 13, + PV_HYPERCALL_SUBCALL = 14, + PV_MAX +}; + +char *pv_name[PV_MAX] = { + [PV_HYPERCALL]="hypercall", + [PV_TRAP]="trap", + [PV_PAGE_FAULT]="page_fault", + [PV_FORCED_INVALID_OP]="forced_invalid_op", + [PV_EMULATE_PRIVOP]="emulate privop", + [PV_EMULATE_4GB]="emulate 4g", + [PV_MATH_STATE_RESTORE]="math state restore", + [PV_PAGING_FIXUP]="paging fixup", + [PV_GDT_LDT_MAPPING_FAULT]="gdt/ldt mapping fault", + [PV_PTWR_EMULATION]="ptwr", + [PV_PTWR_EMULATION_PAE]="ptwr(pae)", + [PV_HYPERCALL_V2]="hypercall", + [PV_HYPERCALL_SUBCALL]="hypercall (subcall)", +}; + +#define PV_HYPERCALL_MAX 56 +#define PV_TRAP_MAX 20 + +struct pv_data { + unsigned summary_info:1; + int count[PV_MAX]; + int hypercall_count[PV_HYPERCALL_MAX]; + int trap_count[PV_TRAP_MAX]; +}; + +/* Sched data */ + +enum { + SCHED_DOM_ADD=1, + SCHED_DOM_REM, + SCHED_SLEEP, + SCHED_WAKE, + SCHED_YIELD, + SCHED_BLOCK, + SCHED_SHUTDOWN, + SCHED_CTL, + SCHED_ADJDOM, + SCHED_SWITCH, + SCHED_S_TIMER_FN, + SCHED_T_TIMER_FN, + SCHED_DOM_TIMER_FN, + SCHED_SWITCH_INFPREV, + SCHED_SWITCH_INFNEXT, + SCHED_SHUTDOWN_CODE, + SCHED_MAX +}; + +enum { + RUNSTATE_RUNNING=0, + RUNSTATE_RUNNABLE, + RUNSTATE_BLOCKED, + RUNSTATE_OFFLINE, + RUNSTATE_LOST, + RUNSTATE_QUEUED, + RUNSTATE_INIT, + RUNSTATE_MAX +}; + +int runstate_graph[RUNSTATE_MAX] = +{ + [RUNSTATE_BLOCKED]=0, + [RUNSTATE_OFFLINE]=1, + [RUNSTATE_RUNNABLE]=2, + [RUNSTATE_RUNNING]=3, + [RUNSTATE_LOST]=-1, + [RUNSTATE_QUEUED]=-2, + [RUNSTATE_INIT]=-2, +}; + +char * runstate_name[RUNSTATE_MAX]={ + [RUNSTATE_RUNNING]= "running", + [RUNSTATE_RUNNABLE]="runnable", + [RUNSTATE_BLOCKED]= "blocked", /* to be blocked */ + [RUNSTATE_OFFLINE]= "offline", + [RUNSTATE_QUEUED]= "queued", + [RUNSTATE_INIT]= "init", + [RUNSTATE_LOST]= "lost", +}; + +enum { + RUNNABLE_STATE_INVALID, + RUNNABLE_STATE_WAKE, + RUNNABLE_STATE_PREEMPT, + RUNNABLE_STATE_OTHER, + RUNNABLE_STATE_MAX +}; + +char * runnable_state_name[RUNNABLE_STATE_MAX]={ + [RUNNABLE_STATE_INVALID]="invalid", /* Should never show up */ + [RUNNABLE_STATE_WAKE]="wake", + [RUNNABLE_STATE_PREEMPT]="preempt", + [RUNNABLE_STATE_OTHER]="other", +}; + +/* Memory data */ +enum { + MEM_PAGE_GRANT_MAP = 1, + MEM_PAGE_GRANT_UNMAP, + MEM_PAGE_GRANT_TRANSFER, + MEM_SET_P2M_ENTRY, + MEM_DECREASE_RESERVATION, + MEM_POD_POPULATE = 16, + MEM_POD_ZERO_RECLAIM, + MEM_POD_SUPERPAGE_SPLINTER, + MEM_MAX +}; + +char *mem_name[MEM_MAX] = { + [MEM_PAGE_GRANT_MAP] = "grant-map", + [MEM_PAGE_GRANT_UNMAP] = "grant-unmap", + [MEM_PAGE_GRANT_TRANSFER] = "grant-transfer", + [MEM_SET_P2M_ENTRY] = "set-p2m", + [MEM_DECREASE_RESERVATION] = "decrease-reservation", + [MEM_POD_POPULATE] = "pod-populate", + [MEM_POD_ZERO_RECLAIM] = "pod-zero-reclaim", + [MEM_POD_SUPERPAGE_SPLINTER] = "pod-superpage-splinter", +}; + +/* Per-unit information. */ + +struct cr3_value_struct { + struct cr3_value_struct * next; + struct cr3_value_struct * gnext; + unsigned long long gmfn; + int cr3_id; + unsigned long long first_time, last_time, run_time; + struct cycle_summary total_time, guest_time, hv_time; + int switch_count, flush_count; + + struct hvm_short_summary_struct hvm; + + struct { + int now; + int count; + } prealloc_unpin; + + struct { + unsigned callback:1; + unsigned flush_count, switch_count; + unsigned fixup_user, emulate_corr_user; + } destroy; +}; + +#ifndef MAX_CPUS +#define MAX_CPUS 256 +#endif +typedef uint32_t cpu_mask_t; + +#define IDLE_DOMAIN 32767 +#define DEFAULT_DOMAIN 32768 + +#define MAX_VLAPIC_LIST 8 +struct vlapic_struct { + struct { + struct outstanding_ipi { + tsc_t first_tsc; + int vec, count; + int injected, valid; + } list[MAX_VLAPIC_LIST]; + } outstanding; +}; + +struct vcpu_data { + int vid; + struct domain_data *d; /* up-pointer */ + unsigned activated:1; + + int guest_paging_levels; + + /* Schedule info */ + struct { + int state; + int runnable_state; /* Only valid when state==RUNSTATE_RUNNABLE */ + tsc_t tsc; + /* TSC skew detection/correction */ + struct last_oldstate_struct { + int wrong, actual, pid; + tsc_t tsc; + } last_oldstate; + /* Performance counters */ + unsigned long long p1_start, p2_start; + } runstate; + struct pcpu_info *p; + tsc_t pcpu_tsc; + + /* Hardware tracking */ + struct { + long long val; + tsc_t start_time; + struct cr3_value_struct *data; + } cr3; + + /* IPI latency tracking */ + struct vlapic_struct vlapic; + + /* Summary info */ + struct cycle_framework f; + struct cycle_summary runstates[RUNSTATE_MAX]; + struct cycle_summary runnable_states[RUNNABLE_STATE_MAX]; + struct weighted_cpi_summary cpi; + struct cycle_summary cpu_affinity_all, + cpu_affinity_pcpu[MAX_CPUS]; + enum { + VCPU_DATA_NONE=0, + VCPU_DATA_HVM, + VCPU_DATA_PV + } data_type; + union { + struct hvm_data hvm; + struct pv_data pv; + }; +}; + +enum { + DOMAIN_RUNSTATE_BLOCKED=0, + DOMAIN_RUNSTATE_PARTIAL_RUN, + DOMAIN_RUNSTATE_FULL_RUN, + DOMAIN_RUNSTATE_PARTIAL_CONTENTION, + DOMAIN_RUNSTATE_CONCURRENCY_HAZARD, + DOMAIN_RUNSTATE_FULL_CONTENTION, + DOMAIN_RUNSTATE_LOST, + DOMAIN_RUNSTATE_MAX +}; + +char * domain_runstate_name[] = { + [DOMAIN_RUNSTATE_BLOCKED]="blocked", + [DOMAIN_RUNSTATE_PARTIAL_RUN]="partial run", + [DOMAIN_RUNSTATE_FULL_RUN]="full run", + [DOMAIN_RUNSTATE_PARTIAL_CONTENTION]="partial contention", + [DOMAIN_RUNSTATE_CONCURRENCY_HAZARD]="concurrency_hazard", + [DOMAIN_RUNSTATE_FULL_CONTENTION]="full_contention", + [DOMAIN_RUNSTATE_LOST]="lost", +}; + +enum { + POD_RECLAIM_CONTEXT_UNKNOWN=0, + POD_RECLAIM_CONTEXT_FAULT, + POD_RECLAIM_CONTEXT_BALLOON, + POD_RECLAIM_CONTEXT_MAX +}; + +char * pod_reclaim_context_name[] = { + [POD_RECLAIM_CONTEXT_UNKNOWN]="unknown", + [POD_RECLAIM_CONTEXT_FAULT]="fault", + [POD_RECLAIM_CONTEXT_BALLOON]="balloon", +}; + +#define POD_ORDER_MAX 4 + +struct domain_data { + struct domain_data *next; + int did; + struct vcpu_data *vcpu[MAX_CPUS]; + + int max_vid; + + int runstate; + tsc_t runstate_tsc; + struct cycle_summary total_time; + struct cycle_summary runstates[DOMAIN_RUNSTATE_MAX]; + struct cr3_value_struct *cr3_value_head; + struct eip_list_struct *emulate_eip_list; + struct eip_list_struct *interrupt_eip_list; + + int guest_interrupt[GUEST_INTERRUPT_MAX+1]; + struct hvm_short_summary_struct hvm_short; + struct { + int done[MEM_MAX]; + int done_interval[MEM_MAX]; + + int done_for[MEM_MAX]; + int done_for_interval[MEM_MAX]; + } memops; + + struct { + int reclaim_order[POD_ORDER_MAX]; + int reclaim_context[POD_RECLAIM_CONTEXT_MAX]; + int reclaim_context_order[POD_RECLAIM_CONTEXT_MAX][POD_ORDER_MAX]; + /* FIXME: Do a full cycle summary */ + int populate_order[POD_ORDER_MAX]; + } pod; +}; + +struct domain_data * domain_list=NULL; + +struct domain_data default_domain; + +enum { + TOPLEVEL_GEN=0, + TOPLEVEL_SCHED, + TOPLEVEL_DOM0OP, + TOPLEVEL_HVM, + TOPLEVEL_MEM, + TOPLEVEL_PV, + TOPLEVEL_SHADOW, + TOPLEVEL_HW, + TOPLEVEL_MAX=TOPLEVEL_HW+1, +}; + +char * toplevel_name[TOPLEVEL_MAX] = { + [TOPLEVEL_GEN]="gen", + [TOPLEVEL_SCHED]="sched", + [TOPLEVEL_DOM0OP]="dom0op", + [TOPLEVEL_HVM]="hvm", + [TOPLEVEL_MEM]="mem", + [TOPLEVEL_PV]="pv", + [TOPLEVEL_SHADOW]="shadow", + [TOPLEVEL_HW]="hw", +}; + +struct trace_volume { + unsigned long long toplevel[TOPLEVEL_MAX]; + unsigned long long sched_verbose; + unsigned long long hvm[HVM_VOL_MAX]; +} volume; + +#define UPDATE_VOLUME(_p,_x,_s) \ + do { \ + (_p)->volume.total._x += _s; \ + (_p)->volume.last_buffer._x += _s; \ + } while(0) + +void volume_clear(struct trace_volume *vol) +{ + bzero(vol, sizeof(*vol)); +} + +void volume_summary(struct trace_volume *vol) +{ + int j, k; + for(j=0; j<TOPLEVEL_MAX; j++) + if(vol->toplevel[j]) { + printf(" %-6s: %10lld\n", + toplevel_name[j], vol->toplevel[j]); + switch(j) { + case TOPLEVEL_SCHED: + if(vol->sched_verbose) + printf(" +-verbose: %10lld\n", + vol->sched_verbose); + break; + case TOPLEVEL_HVM: + for(k=0; k<HVM_VOL_MAX; k++) { + if(vol->hvm[k]) + printf(" +-%-7s: %10lld\n", + hvm_vol_name[k], vol->hvm[k]); + } + + break; + } + } +} + +struct pcpu_info { + /* Information about this pcpu */ + unsigned active:1, summary:1; + int pid; + + /* Information related to scanning thru the file */ + tsc_t first_tsc, last_tsc, order_tsc; + loff_t file_offset; + loff_t next_cpu_change_offset; + struct record_info ri; + int last_cpu_change_pid; + int power_state; + + /* Information related to tsc skew detection / correction */ + struct { + tsc_t offset; + cpu_mask_t downstream; /* To detect cycles in dependencies */ + } tsc_skew; + + /* Information related to domain tracking */ + struct vcpu_data * current; + struct { + unsigned active:1, + domain_valid:1, + seen_valid_schedule:1; /* Seen an actual schedule since lost records */ + unsigned did:16,vid:16; + tsc_t tsc; + } lost_record; + + /* Record volume */ + struct { + tsc_t buffer_first_tsc, + buffer_dom0_runstate_tsc, + buffer_dom0_runstate_cycles[RUNSTATE_MAX]; + int buffer_dom0_runstate; + unsigned buffer_size; + struct trace_volume total, last_buffer; + } volume; + + /* Time report */ + struct { + tsc_t tsc; + struct cycle_summary idle, running, lost; + } time; +}; + +void __fill_in_record_info(struct pcpu_info *p); + +#define INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX 10 + +struct { + int max_active_pcpu; + loff_t last_epoch_offset; + int early_eof; + int lost_cpus; + tsc_t now; + struct cycle_framework f; + tsc_t buffer_trace_virq_tsc; + struct pcpu_info pcpu[MAX_CPUS]; + + struct { + int id; + /* Invariant: head null => tail null; head !null => tail valid */ + struct cr3_value_struct *head, **tail; + } cr3; + + struct { + tsc_t start_tsc; + /* Information about specific interval output types */ + union { + struct { + struct interval_element ** values; + int count; + } array; + struct { + struct interval_list *head, **tail; + } list; + struct cr3_value_struct *cr3; + struct { + struct domain_data *d; + int guest_vector[INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX]; + } domain; + }; + } interval; +} P = { 0 }; + +/* Function prototypes */ +char * pcpu_string(int pcpu); +void pcpu_string_draw(struct pcpu_info *p); +void process_generic(struct record_info *ri); +void dump_generic(FILE *f, struct record_info *ri); +ssize_t __read_record(struct trace_record *rec, loff_t offset); +void error(enum error_level l, struct record_info *ri); +void update_io_address(struct io_address ** list, unsigned int pa, int dir, + tsc_t arc_cycles, unsigned int va); +int check_extra_words(struct record_info *ri, int expected_size, const char *record); +int vcpu_set_data_type(struct vcpu_data *v, int type); + +void cpumask_init(cpu_mask_t *c) { + *c = 0UL; +} + +void cpumask_clear(cpu_mask_t *c, int cpu) { + *c &= ~(1UL << cpu); +} + +void cpumask_set(cpu_mask_t *c, int cpu) { + *c |= (1UL << cpu); +} + +int cpumask_isset(const cpu_mask_t *c, int cpu) { + if(*c & (1UL<<cpu)) + return 1; + else + return 0; +} + +void cpumask_union(cpu_mask_t *d, const cpu_mask_t *s) { + *d |= *s; +} + +/* -- Time code -- */ + +void cycles_to_time(unsigned long long c, struct time_struct *t) { + t->time = ((c - P.f.first_tsc) << 10) / opt.cpu_qhz; + t->s = t->time / 1000000000; + t->ns = t->time - (t->s * 1000000000); +} + +void abs_cycles_to_time(unsigned long long ac, struct time_struct *t) { + if(ac > P.f.first_tsc) { + /* t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 ); */ + /* t->s = t->time / 1000000000; */ + /* t->ns = t->time % 1000000000; */ + t->time = ((ac - P.f.first_tsc) << 10) / opt.cpu_qhz; + t->s = t->time / 1000000000; + t->ns = t->time - (t->s * 1000000000); + } else { + t->time = t->s = t->ns = 0; + } +} + +tsc_t abs_cycles_to_global(unsigned long long ac) { + if(ac > P.f.first_tsc) + return ac - P.f.first_tsc; + else + return 0; +} + +void scatterplot_vs_time(tsc_t atsc, long long y) { + struct time_struct t; + + abs_cycles_to_time(atsc, &t); + + printf("%u.%09u %lld\n", t.s, t.ns, y); +} + +/* -- Summary Code -- */ + +/* With compliments to "Numerical Recipes in C", which provided the algorithm + * and basic template for this function. */ +long long percentile(long long * A, int N, int ple) { + int I, J, L, R, K; + + long long X, W; + + /* No samples! */ + if ( N == 0 ) + return 0; + + /* Find K, the element # we want */ + K=N*ple/100; + + /* Set the left and right boundaries of the current search space */ + L=0; R=N-1; + + while(L < R) { + /* X: The value to order everything higher / lower than */ + X=A[K]; + + /* Starting at the left and the right... */ + I=L; + J=R; + + do { + /* Find the first element on the left that is out-of-order w/ X */ + while(A[I]<X) + I++; + /* Find the first element on the right that is out-of-order w/ X */ + while(X<A[J]) + J--; + + /* If we found something out-of-order */ + if(I<=J) { + /* Switch the values */ + W=A[I]; + A[I]=A[J]; + A[J]=W; + + /* And move on */ + I++; J--; + } + } while (I <= J); /* Keep going until our pointers meet or pass */ + + /* Re-adjust L and R, based on which element we're looking for */ + if(J<K) + L=I; + if(K<I) + R=J; + } + + return A[K]; +} + +float weighted_percentile(float * A, /* values */ + unsigned long long * w, /* weights */ + int N, /* total */ + int ple) /* percentile */ +{ + int L, R, I, J, K; + unsigned long long L_weight, R_weight, I_weight, J_weight, + K_weight, N_weight; + + float X, t1; + unsigned long long t2; + + /* Calculate total weight */ + N_weight=0; + + for(I=0; I<N; I++) { + assert(w[I]!=0); + N_weight += w[I]; + } + + /* Find K_weight, the target weight we want */ + K_weight = N_weight * ple / 100; + + /* Set the left and right boundaries of the current search space */ + L=0; + L_weight = 0; + R=N-1; + R_weight = N_weight - w[R]; + + /* Search between L and R, narrowing down until we're done */ + while(L < R) { + /* Chose an ordering value from right in the middle */ + K = (L + R) >> 1; + /* X: The value to order everything higher / lower than */ + X=A[K]; + + /* Starting at the left and the right... */ + I=L; I_weight = L_weight; + J=R; J_weight = R_weight; + + do { + /* Find the first element on the left that is out-of-order w/ X */ + while(A[I]<X) { + I_weight += w[I]; + I++; + } + /* Find the first element on the right that is out-of-order w/ X */ + while(X<A[J]) { + J_weight -= w[J]; + J--; + } + + /* If we actually found something... */ + if(I<=J) { + /* Switch the values */ + t1=A[I]; + A[I]=A[J]; + A[J]=t1; + + t2=w[I]; + w[I]=w[J]; + w[J]=t2; + + /* And move in */ + I_weight += w[I]; + I++; + + J_weight -= w[J]; + J--; + } + } while (I <= J); /* Keep going until our pointers meet or pass */ + + /* Re-adjust L and R, based on which element we're looking for */ + if(J_weight<K_weight) + L=I; L_weight = I_weight; + if(K_weight<I_weight) + R=J; R_weight = J_weight; + } + + return A[L]; +} + +long long self_weighted_percentile(long long * A, + int N, /* total */ + int ple) /* percentile */ +{ + int L, R, I, J, K; + long long L_weight, R_weight, I_weight, J_weight, + K_weight, N_weight; + + long long X, t1; + + /* Calculate total weight */ + N_weight=0; + + for(I=0; I<N; I++) { + if(A[I] < 0) + fprintf(warn, "%s: Value %lld less than zero!\n", + __func__, A[I]); + assert(A[I]!=0); + N_weight += A[I]; + } + + /* Find K_weight, the target weight we want */ + K_weight = N_weight * ple / 100; + + /* Set the left and right boundaries of the current search space */ + L=0; + L_weight = 0; + R=N-1; + R_weight = N_weight - A[R]; + + /* Search between L and R, narrowing down until we're done */ + while(L < R) { + /* Chose an ordering value from right in the middle */ + K = (L + R) >> 1; + /* X: The value to order everything higher / lower than */ + X=A[K]; + + /* Starting at the left and the right... */ + I=L; I_weight = L_weight; + J=R; J_weight = R_weight; + + do { + /* Find the first element on the left that is out-of-order w/ X */ + while(A[I]<X) { + I_weight += A[I]; + I++; + } + /* Find the first element on the right that is out-of-order w/ X */ + while(X<A[J]) { + J_weight -= A[J]; + J--; + } + + /* If we actually found something... */ + if(I<=J) { + /* Switch the values */ + t1=A[I]; + A[I]=A[J]; + A[J]=t1; + + /* And move in */ + I_weight += A[I]; + I++; + + J_weight -= A[J]; + J--; + } + } while (I <= J); /* Keep going until our pointers meet or pass */ + + /* Re-adjust L and R, based on which element we're looking for */ + if(J_weight<K_weight) + L=I; L_weight = I_weight; + if(K_weight<I_weight) + R=J; R_weight = J_weight; + } + + return A[L]; +} + +static inline double __cycles_percent(long long cycles, long long total) { + return (double)(cycles*100) / total; +} + +static inline double __summary_percent(struct event_cycle_summary *s, + struct cycle_framework *f) { + return __cycles_percent(s->cycles, f->total_cycles); +} + +static inline double summary_percent_global(struct event_cycle_summary *s) { + return __summary_percent(s, &P.f); +} + +static inline void update_summary(struct event_cycle_summary *s, long long c) { +/* We don't know ahead of time how many samples there are, and working + * with dynamic stuff is a pain, and unnecessary. This algorithm will + * generate a sample set that approximates an even sample. We can + * then take the percentiles on this, and get an approximate value. */ + if(c) { + if(opt.sample_size) { + int lap = (s->cycles_count/opt.sample_size)+1, + index =s->cycles_count % opt.sample_size; + if((index - (lap/3))%lap == 0) { + if(!s->cycles_sample) { + s->cycles_sample = malloc(sizeof(*s->cycles_sample) * opt.sample_size); + if(!s->cycles_sample) { + fprintf(stderr, "%s: malloc failed!\n", __func__); + error(ERR_SYSTEM, NULL); + } + } + s->cycles_sample[index]=c; + } + } + s->cycles_count++; + s->cycles += c; + + s->interval.count++; + s->interval.cycles += c; + } + s->count++; +} + +static inline void clear_interval_summary(struct event_cycle_summary *s) { + s->interval.count = 0; + s->interval.cycles = 0; +} + +static inline void update_cycles(struct cycle_summary *s, long long c) { +/* We don't know ahead of time how many samples there are, and working + * with dynamic stuff is a pain, and unnecessary. This algorithm will + * generate a sample set that approximates an even sample. We can + * then take the percentiles on this, and get an approximate value. */ + int lap, index; + + if ( c == 0 ) + { + fprintf(warn, "%s: cycles 0! Not updating...\n", + __func__); + return; + } + + if ( opt.sample_size ) { + lap = (s->count/opt.sample_size)+1; + index =s->count % opt.sample_size; + + if((index - (lap/3))%lap == 0) { + if(!s->sample) { + s->sample = malloc(sizeof(*s->sample) * opt.sample_size); + if(!s->sample) { + fprintf(stderr, "%s: malloc failed!\n", __func__); + error(ERR_SYSTEM, NULL); + } + } + s->sample[index] = c; + } + } + + if(c > 0) { + s->cycles += c; + s->interval.cycles += c; + } else { + s->cycles += -c; + s->interval.cycles += -c; + } + s->count++; + s->interval.count++; +} + +static inline void clear_interval_cycles(struct interval_element *e) { + e->cycles = 0; + e->count = 0; + e->instructions = 0; +} + +static inline void update_cpi(struct weighted_cpi_summary *s, + unsigned long long i, + unsigned long long c) { +/* We don't know ahead of time how many samples there are, and working + * with dynamic stuff is a pain, and unnecessary. This algorithm will + * generate a sample set that approximates an even sample. We can + * then take the percentiles on this, and get an approximate value. */ + int lap, index; + + if ( opt.sample_size ) { + lap = (s->count/opt.sample_size)+1; + index =s->count % opt.sample_size; + + if((index - (lap/3))%lap == 0) { + if(!s->cpi) { + assert(!s->cpi_weight); + + s->cpi = malloc(sizeof(*s->cpi) * opt.sample_size); + s->cpi_weight = malloc(sizeof(*s->cpi_weight) * opt.sample_size); + if(!s->cpi || !s->cpi_weight) { + fprintf(stderr, "%s: malloc failed!\n", __func__); + error(ERR_SYSTEM, NULL); + } + } + assert(s->cpi_weight); + + s->cpi[index] = (float) c / i; + s->cpi_weight[index]=c; + } + } + + s->instructions += i; + s->cycles += c; + s->count++; + + s->interval.instructions += i; + s->interval.cycles += c; + s->interval.count++; +} + +static inline void clear_interval_cpi(struct weighted_cpi_summary *s) { + s->interval.cycles = 0; + s->interval.count = 0; + s->interval.instructions = 0; +} + +static inline void print_cpu_affinity(struct cycle_summary *s, char *p) { + if(s->count) { + long long avg; + + avg = s->cycles / s->count; + + if ( opt.sample_size ) { + long long p5, p50, p95; + int data_size = s->count; + if(data_size > opt.sample_size) + data_size = opt.sample_size; + + p50 = percentile(s->sample, data_size, 50); + p5 = percentile(s->sample, data_size, 5); + p95 = percentile(s->sample, data_size, 95); + + printf("%s: %7d %6lld {%6lld|%6lld|%6lld}\n", + p, s->count, avg, p5, p50, p95); + } else { + printf("%s: %7d %6lld\n", + p, s->count, avg); + } + } +} + +static inline void print_cpi_summary(struct weighted_cpi_summary *s) { + if(s->count) { + float avg; + + avg = (float)s->cycles / s->instructions; + + if ( opt.sample_size ) { + float p5, p50, p95; + int data_size = s->count; + + if(data_size > opt.sample_size) + data_size = opt.sample_size; + + p50 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 50); + p5 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 5); + p95 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 95); + + printf(" CPI summary: %2.2f {%2.2f|%2.2f|%2.2f}\n", + avg, p5, p50, p95); + } else { + printf(" CPI summary: %2.2f\n", avg); + } + } +} + +static inline void print_cycle_percent_summary(struct cycle_summary *s, + tsc_t total, char *p) { + if(s->count) { + long long avg; + double percent, seconds; + + avg = s->cycles / s->count; + + seconds = ((double)s->cycles) / opt.cpu_hz; + + percent = ((double)(s->cycles * 100)) / total; + + if ( opt.sample_size ) { + long long p5, p50, p95; + int data_size = s->count; + + if(data_size > opt.sample_size) + data_size = opt.sample_size; + + p50 = self_weighted_percentile(s->sample, data_size, 50); + p5 = self_weighted_percentile(s->sample, data_size, 5); + p95 = self_weighted_percentile(s->sample, data_size, 95); + + printf("%s: %7d %5.2lfs %5.2lf%% %6lld {%6lld|%6lld|%6lld}\n", + p, s->count, + seconds, + percent, + avg, p5, p50, p95); + } else { + printf("%s: %7d %5.2lfs %5.2lf%% %6lld\n", + p, s->count, + seconds, + percent, + avg); + } + } +} + +static inline void print_cycle_summary(struct cycle_summary *s, char *p) { + if(s->count) { + long long avg; + + avg = s->cycles / s->count; + + if ( opt.sample_size ) { + long long p5, p50, p95; + int data_size = s->count; + + if(data_size > opt.sample_size) + data_size = opt.sample_size; + + p50 = self_weighted_percentile(s->sample, data_size, 50); + p5 = self_weighted_percentile(s->sample, data_size, 5); + p95 = self_weighted_percentile(s->sample, data_size, 95); + + printf("%s: %7d %5.2lfs %6lld {%6lld|%6lld|%6lld}\n", + p, s->count, ((double)s->cycles)/opt.cpu_hz, + avg, p5, p50, p95); + } else { + printf("%s: %7d %5.2lfs %6lld\n", + p, s->count, ((double)s->cycles)/opt.cpu_hz, avg); + } + } +} + +#define PRINT_SUMMARY(_s, _p...) \ + do { \ + if((_s).count) { \ + if ( opt.sample_size ) { \ + unsigned long long p5, p50, p95; \ + int data_size=(_s).cycles_count; \ + if(data_size > opt.sample_size) \ + data_size=opt.sample_size; \ + p50=percentile((_s).cycles_sample, data_size, 50); \ + p5=percentile((_s).cycles_sample, data_size, 5); \ + p95=percentile((_s).cycles_sample, data_size, 95); \ + printf(_p); \ + printf(" %7d %5.2lfs %5.2lf%% %5lld cyc {%5lld|%5lld|%5lld}\n", \ + (_s).count, \ + ((double)(_s).cycles)/opt.cpu_hz, \ + summary_percent_global(&(_s)), \ + (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0, \ + p5, p50, p95); \ + } else { \ + printf(_p); \ + printf(" %7d %5.2lfs %5.2lf%% %5lld cyc\n", \ + (_s).count, \ + ((double)(_s).cycles)/opt.cpu_hz, \ + summary_percent_global(&(_s)), \ + (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0); \ + } \ + } \ + } while(0) + +#define INTERVAL_DESC_MAX 31 +struct interval_list { + struct interval_element *elem; + struct interval_list *next; + char desc[INTERVAL_DESC_MAX+1]; /* +1 for the null terminator */ +}; + +void __interval_cycle_percent_output(struct interval_element *e, tsc_t cycles) { + printf(" %.02lf", + __cycles_percent(e->cycles, cycles)); + clear_interval_cycles(e); +} + +void interval_cycle_percent_output(struct interval_element *e) { + __interval_cycle_percent_output(e, opt.interval.cycles); +} + +void interval_time_output(void) { + struct time_struct t; + abs_cycles_to_time(P.interval.start_tsc, &t); + + printf("%u.%09u", t.s, t.ns); +} + +void interval_table_output(void) { + int i; + + interval_time_output(); + + if(opt.interval.mode == INTERVAL_MODE_ARRAY) { + for(i=0; i<P.interval.array.count; i++) { + struct interval_element *e = P.interval.array.values[i]; + if(e) { + interval_cycle_percent_output(e); + } else { + printf(" 0.0"); + } + } + } else if(opt.interval.mode == INTERVAL_MODE_LIST) { + struct interval_list *p; + for(p = P.interval.list.head; p; p = p->next) + interval_cycle_percent_output(p->elem); + } + printf("\n"); +} + +void interval_table_tail(void) { + struct interval_list *p; + + printf("time"); + + for(p=P.interval.list.head; p; p = p->next) + printf(" %s", p->desc); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |