[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] Calculating real cpu usage of Xen domains correctly! (PATCH)
Anthony Liguori wrote: Rob Gardner wrote:Anyway, I can send out what I've got now, which is based on xen-unstable. Is that the base most people are interested in?Absolutely! I'd personally love to see it. OK, here is a patch which provides fine grained cpu utilization reporting. Some notes: - part of the code runs in the hypervisor to collect data, and another part of it runs in dom0 userland to process and display the data - the code contains vestiges of old features, and partially implemented new features; it is a work in progress. - this is the first time I'm sending out a patch to this list so please be gentle on me ;-) Feedback appreciated! Rob Gardner diff -Naur xen-unstable-orig/tools/qos/Makefile xen-unstable/tools/qos/Makefile --- xen-unstable-orig/tools/qos/Makefile 1969-12-31 17:00:00.000000000 -0700 +++ xen-unstable/tools/qos/Makefile 2005-02-25 12:20:36.000000000 -0700 @@ -0,0 +1,39 @@ + +XEN_ROOT=../.. +include $(XEN_ROOT)/tools/Rules.mk + +CC = gcc +#CFLAGS = -Wall -Werror -O3 +CFLAGS = -Wall -Werror -g +CFLAGS += -I $(XEN_ROOT)/xen/include + +CFLAGS += -I $(XEN_HYPERVISOR_IFS) +CFLAGS += -I $(XEN_LINUX_INCLUDE) +CFLAGS += -I $(XEN_XC) +CFLAGS += -I $(XEN_LIBXC) +CFLAGS += -I $(XEN_LIBXUTIL) + + +HDRS = $(wildcard *.h) +OBJS = $(patsubst %.c,%.o,$(wildcard *.c)) + +BIN = xenqos +#SCRIPTS = xentrace_format +MAN1 = $(wildcard *.1) +MAN8 = $(wildcard *.8) + +all: $(BIN) + +install: all + mkdir -p $(prefix)/usr/bin + mkdir -p $(prefix)/usr/man/man1 + mkdir -p $(prefix)/usr/man/man8 + install -m0755 $(BIN) $(SCRIPTS) $(prefix)/usr/bin + install -m0644 $(MAN1) $(prefix)/usr/man/man1 + install -m0644 $(MAN8) $(prefix)/usr/man/man8 + +clean: + $(RM) *.a *.so *.o *.rpm $(BIN) + +%: %.c $(HDRS) Makefile + $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -lxc -lxutil diff -Naur xen-unstable-orig/tools/qos/xenqos.c xen-unstable/tools/qos/xenqos.c --- xen-unstable-orig/tools/qos/xenqos.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-unstable/tools/qos/xenqos.c 2005-02-25 12:20:36.000000000 -0700 @@ -0,0 +1,968 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + ****************************************************************************** + * xenqos.c + * + * Tool for collecting QoS data from Xen + * + * Copyright (C) 2004 by Hewlett Packard Fort Collins + * + * Author: Rob Gardner, rob.gardner@xxxxxx + * Date: September 2004 + */ + +#include <time.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <argp.h> +#include <signal.h> +#include <getopt.h> +#include <termios.h> +#include <sys/select.h> +#include <netdb.h> +#include <netinet/in.h> +#include "xc_private.h" +#define XENQOS + +#include <public/qos.h> + +#define MAX_DOMAIN_NAME 16 + +_new_qos_data *new_qos = NULL; + +int debug_focus = -1; +#define NFDS 8 +int data_stream_fd[NFDS]; +int server_socket = -1; + +u64 ns_intervals[] = + { + 60LL * (u64) billion, + 10LL * (u64) billion, + 1LL * (u64) billion + }; + + +// +// The idea of the data stream thing is to allow semi-processed +// to be saved or routed into another program. There are two ways +// in which a stream can be set up. First, if a file descriptor has +// already been opened when we start up, a command line option can +// be used to pass in the fd number. Second, we have a socket interface +// so that arbitrary network apps can connect to us and get qos data. +// +void init_data_streams(void) +{ + int i; + + for (i=0; i<NFDS; i++) + data_stream_fd[i] = -1; +} + +void setup_stream(int fd) +{ + int i; + struct stat s; + + if (fstat(fd, &s) == -1) { + fprintf(stderr, "Can't set up data stream on fd#%d cause it's not open\r\n", fd); + exit(1); + } + for (i=0; i<NFDS; i++) + if (data_stream_fd[i] == -1) { + data_stream_fd[i] = fd; + fprintf(stderr, "fd#%d set up as stream#%d\r\n",fd, i); + return; + } + + fprintf(stderr, "Can't set up data stream on fd#%d cause no space exists\r\n", fd); +} + +void streamlog(char *s) +{ + int len, i; + + len = strlen(s); + for (i=0; i<NFDS; i++) + if (data_stream_fd[i] != -1) + write(data_stream_fd[i], s, len); +} + +// called via command line +void setup_socket(char *arg) +{ + int port, sock; + struct sockaddr_in myAddr; +#define MAX_HOSTNAME 120 + char hostname[MAX_HOSTNAME + 1]; + struct hostent *hp; + + port = atoi(arg); /* port to listen on */ + // open socket and listen on it for incoming connections. + // periodically select on it to check for connections. + + + bzero ((char *)&myAddr, sizeof(struct sockaddr_in)); + + gethostname(hostname, MAX_HOSTNAME); + hp = gethostbyname(hostname); + if (hp == NULL) { + fprintf(stderr, "gethostbyname call failed, cannot setup socket\r\n"); + return; + } + + myAddr.sin_family = hp->h_addrtype; + myAddr.sin_port = htons(port); + myAddr.sin_addr.s_addr = htons(INADDR_ANY); + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock < 0) { + perror("socket()"); + return; + } + + if (bind(sock, (struct sockaddr *) &myAddr, sizeof(myAddr)) == -1) { + perror("bind()"); + fprintf(stderr, "Could not bind to port %d\r\n", port); + close(sock); + return; + } + + if (listen(sock, 3) == -1) { + perror("listen"); + return; + } + + fcntl(sock, F_SETFL, O_NONBLOCK); + fprintf(stderr, "Listening for incoming connections on port %d\r\n", port); + server_socket = sock; +} + +// called when connect arrives on server socket +void handle_incoming(void) +{ + int i, sock; + struct sockaddr isa; + + i = sizeof(isa); + sock = accept(server_socket, &isa, &i); + if (sock == -1){ + perror("accept"); + return; + } + + setup_stream(sock); +} + + + +// +// this routine pretty prints "time" as minutes, seconds, +// and fractions of seconds. +// +char *cvttime(u64 ns) +{ + static char output[100]; + u64 intpart, fracpart; + u64 min, sec; + + // printk("cvttime: ns=%lld; ", ns); + + intpart = ns / billion; + fracpart = ns % billion; + + // printk(" intpart=%lld, fracpart=%lld; ", intpart, fracpart); + + // this is necessary since our printf doesn't support %2.3f format + // while (fracpart >= 100) + // fracpart = fracpart / 10; + fracpart = ((fracpart * 100) + 499) / billion; + + min = intpart / 60; + sec = intpart % 60; + + // printk("min=%lld, sec=%lld", min, sec); + + if (min) + snprintf(output, 100, "%3lld:%02lld.%02lld", min, sec, fracpart); + else + snprintf(output, 100, " :%02lld.%02lld", sec, fracpart); + + // printk("output=%s\r\n", output); + + return output; +} + +// macros to hide the internal data structure format +#define NEXT_DATAPOINT new_qos->next_datapoint + +#define PREV_DATAPOINT QOS_DECR(NEXT_DATAPOINT) + +#define NS_RUNNABLE_SINCE_BOOT(X) new_qos->domain_info[X].ns_runnable_since_boot + +#define NS_ONCPU_SINCE_BOOT(X) new_qos->domain_info[X].ns_oncpu_since_boot + +#define NS_PASSED(DP) new_qos->qdata[DP].ns_passed + +#define NS_GOTTEN(DIDX,DP) new_qos->qdata[DP].ns_gotten[DIDX] + +#define NS_RUNNABLE(DIDX,DP) new_qos->qdata[DP].ns_runnable[DIDX] + +#define NS_IODOMAIN(DIDX,DP) new_qos->qdata[DP].ns_iodomain[DIDX] + +#define IO_COUNT(DIDX,DP) new_qos->qdata[DP].io_count[DIDX] + +#define EVENT_COUNT(DIDX,DP) new_qos->qdata[DP].event_count[DIDX] + +#define SWITCHIN_COUNT(DIDX,DP) new_qos->qdata[DP].switchin_count[DIDX] + +#define DOMAIN_ID(DIDX) new_qos->domain_info[DIDX].id + +#define DOMAIN_NAME(DIDX) new_qos->domain_info[DIDX].name + +#define IN_USE(DOM_IDX) new_qos->domain_info[DOM_IDX].in_use + + +// for the given thread, look at its history for the last +// few ns, and figure out its instantaneous cpu +// utilization +double zation_for_recent_ns(int didx, u64 ns, + int *nsamples, int start_dp, double *iozation) +{ + u64 ns_runnable = 0, ns_passed = 0, ns_gotten = 0; + int dp, loops, last_dp; + + last_dp = QOS_INCR(start_dp); + + for (loops = 0, dp = start_dp; ns_passed < ns; loops++) { + u64 passed = NS_PASSED(dp); + u64 gotten = NS_GOTTEN(didx,dp); + if (dp == last_dp) + break; + ns_passed += passed; + ns_gotten += gotten; + ns_runnable += NS_RUNNABLE(didx,dp); + dp = QOS_DECR(dp); + } + *nsamples = loops; + + if (ns_passed == 0) + return 0; + + return (double) (100 * ns_gotten) / (double) ns_passed; +} + + + +// for the given thread, look at its history for the last +// few ns, and figure out how many events were processed +// during this time +int events_for_recent_ns(int didx, u64 ns, int *nsamples) +{ + u64 ns_passed = 0; + int dp, loops, event_count = 0; + + for (loops = 0, dp = PREV_DATAPOINT; ns_passed < ns; loops++) { + ns_passed += NS_PASSED(dp); + event_count += EVENT_COUNT(didx,dp); + dp = QOS_DECR(dp); + if (dp == NEXT_DATAPOINT) + break; + } + *nsamples = loops; + return event_count; +} + +int switchins_for_recent_ns(int didx, u64 ns, int *nsamples) +{ + u64 ns_passed = 0; + int dp, loops, event_count = 0; + + for (loops = 0, dp = PREV_DATAPOINT; ns_passed < ns; loops++) { + ns_passed += NS_PASSED(dp); + event_count += SWITCHIN_COUNT(didx,dp); + dp = QOS_DECR(dp); + if (dp == NEXT_DATAPOINT) + break; + } + *nsamples = loops; + return event_count; +} + + +// total up all cpu time used by anybody over a given recent interval +u64 cpu_ns_over_interval(u64 ns, u64 *error) +{ + u64 ns_passed, ns_gotten, total_cpu; + int dp, loops, i; + + total_cpu = 0; + for (i = 0; i < NDOMAINS; i++) { + if (new_qos->domain_info[i].in_use == 0) + continue; + + ns_gotten = 0; + ns_passed = 0; + for (loops = 0, dp = QOS_DECR(new_qos->next_datapoint); ns_passed < ns; loops++) { + ns_passed += new_qos->qdata[dp].ns_passed; + ns_gotten += new_qos->qdata[dp].ns_gotten[i]; + dp = QOS_DECR(dp); + if (dp == new_qos->next_datapoint) + break; + } + // printf("dom%5d: used %10lldns over %10lldns, error = %10lldns\r\n", q->id, + // ns_gotten, ns_passed, ns_passed - ns); + if (ns_gotten > ns) + ns_gotten = ns; + total_cpu += ns_gotten; + + *error += ns_passed - ns; + } + return total_cpu; +} + +void calculate_zation(int didx, int start_datapoint, double *cpu_zation, + double *iod_usage, + int *samples_used) +{ + int i; + + for (i=0; i<3; i++) + *cpu_zation++ = zation_for_recent_ns(didx, ns_intervals[i], samples_used++, + start_datapoint, iod_usage++); +} + +void calculate_exec_count(int didx, int *exec_count, int *samples_used) +{ + int i; + + for (i=0; i<3; i++) + *exec_count++ = switchins_for_recent_ns(didx, ns_intervals[i], samples_used++); +} + +void calculate_event_count(int didx, int *event_counters, int *samples_used) +{ + int i; + + for (i=0; i<3; i++) + *event_counters++ = events_for_recent_ns(didx, ns_intervals[i], samples_used++); +} + + +void sprint_thread_qos_info(char *output, int BSize, int didx, double cpu_totals[]) +{ + u64 cpu_percentage; + int size, start_datapoint, i; + double cpu_zation[3]; + double iod_usage[3]; + int exec_counters[3], samples_used[3]; + char *leading_space = " "; + + + *output = '\0'; + start_datapoint = PREV_DATAPOINT; + + if (NS_RUNNABLE_SINCE_BOOT(didx) == 0) + cpu_percentage = 0; + else + cpu_percentage = (100 * NS_ONCPU_SINCE_BOOT(didx) / + NS_RUNNABLE_SINCE_BOOT(didx)); + + size = snprintf(output, BSize, "%5d %s ", DOMAIN_ID(didx), + cvttime(NS_ONCPU_SINCE_BOOT(didx))); + + + // must do two calls to sprintf here because cvttime returns + // a string stored in a static array. + size += snprintf(output+size, BSize-size, "%s %3ld%%", + cvttime(NS_RUNNABLE_SINCE_BOOT(didx)), + (long)cpu_percentage); + + // now find utilization for 60 sec, 10 sec, 1 sec + calculate_zation(didx, start_datapoint, &cpu_zation[0], + &iod_usage[0], + &samples_used[0]); + + // util = zation_for_recent_ns(q, seconds60, &nsamples, start_datapoint, &iod_usage[0]); + for (i=0; i<3; i++) { + cpu_totals[i] += cpu_zation[i]; + + size += snprintf(output+size, BSize-size, " %5.1f%%", cpu_zation[i]); +#if 1 + size += snprintf(output+size, BSize-size, " "); +#else + size += snprintf(output+size, BSize-size, "(%3d)", samples_used[i]); +#endif + } + + size += snprintf(output+size, BSize-size, "%s (cpu)\r\n", DOMAIN_NAME(didx)); + + + calculate_exec_count(didx, &exec_counters[0], &samples_used[0]); + + // print run counters + size += snprintf(output+size, BSize-size, "%s", leading_space); + + for (i=0; i<3; i++) + size += snprintf(output+size, BSize-size, "%7d ", exec_counters[i]); + + size += snprintf(output+size, BSize-size, " %s (exec count)\r\n", DOMAIN_NAME(didx)); + + // here write some stuff to the + // secondary data streams + // (unimplemented) +} + + + +void qos_dump(void) +{ + int i; + static char line[1024]; + double cpu_totals[3]; + + for (i=0; i<3; i++) cpu_totals[i] = 0.0; + + // printk("%s", clear_screen_string); + printf("Domain Total Total QoS QoS QoS QoS Domain\r\n"); + printf(" ID CPU Runnable 4Ever 60 sec 10 sec 1 sec Name\r\n"); + + printf("----------------------------------------------------------------------------------------\r\n"); + + for (i=0; i<NDOMAINS; i++) { + // printf("Entry %02d: ", i); + if (IN_USE(i) == 0) + continue; + + sprint_thread_qos_info(line, sizeof(line), i, &cpu_totals[0]); + printf("%s", line); + printf("----------------------------------------------------------------------------------------\r\n"); + } + // printf(" --------------------------------\r\n"); + printf(" Totals: %5.1f%% %5.1f%% %5.1f%%\r\n", + cpu_totals[0], cpu_totals[1], cpu_totals[2]); +} + + +void map_data(void) +{ + int ret, size, err; + dom0_op_t op; /* dom0 op we'll build */ + unsigned long paddr; + void *q; + int xc_handle = xc_interface_open(); + + op.cmd = DOM0_GETQOSBUF; + op.interface_version = DOM0_INTERFACE_VERSION; + + ret = do_dom0_op(xc_handle, &op); + if ( ret != 0 ) { + perror("Failure to get qos data pointer from Xen"); + exit(1); + } + + paddr = op.u.gettbufs.mach_addr; + size = op.u.gettbufs.size; + + printf("calling mapper(size=0x%x, paddr=0x%lx)\r\n", size, paddr); + + q = xc_map_foreign_range(xc_handle, 0, // domain 0 id + size, PROT_READ|PROT_WRITE, + paddr >> PAGE_SHIFT); + + if (q == (void *)-1) { + err = errno; + perror("xc_map qos_data"); + printf("errno was %d\r\n", err); + exit(3); + } + printf("Got back 0x%lx from xc_map()\r\n", (unsigned long) q); + new_qos = q; + + xc_interface_close(xc_handle); +} + + + +void hypervisor_overhead(void) +{ + u64 error = 0, cpu_used; + + cpu_used = cpu_ns_over_interval(billion, &error); + printf("\r\ncpu time used over the last 1s = %lld\r\n", cpu_used); + printf("error = %lld\r\n", error); +} + +void event_count_dump(void) +{ + int ecount, i, nsamples; + + for (i=0; i<NDOMAINS; i++) { + if (IN_USE(i) == 0) + continue; + ecount = events_for_recent_ns(i, billion, &nsamples); + printf("Domain %d: \t%d events over the past second\r\n", DOMAIN_ID(i), ecount); + } +} + + +#define DP_TRACKER_SIZE 5 +int dp_tracker[DP_TRACKER_SIZE]; +int dp_tracker_p = 0; +int dp_last_dp = 0; + +void dump_datapoint_rate(void) +{ + int avg_rate, i; + int last_rate, new_rate; + int this_dp; + + for (avg_rate=0, i=0; i<DP_TRACKER_SIZE; i++) + avg_rate = avg_rate + dp_tracker[i]; + avg_rate = avg_rate / DP_TRACKER_SIZE; + printf("\r\nAverage datapoint advancement rate = %d/sec (over %d seconds)\r\n", + avg_rate, DP_TRACKER_SIZE); + + last_rate = dp_tracker[dp_tracker_p]; + dp_tracker_p = (dp_tracker_p + 1) % DP_TRACKER_SIZE; + + this_dp = NEXT_DATAPOINT; + + if (this_dp < dp_last_dp) /* handle wraparound */ + new_rate = this_dp + (NSAMPLES - dp_last_dp); + else + new_rate = this_dp - dp_last_dp; + + dp_tracker[dp_tracker_p] = new_rate; + dp_last_dp = this_dp; + + printf("next DP = %d (advanced %d during last 1 second.)\r\n", + this_dp, new_rate); +} + + +void set_measurement_frequency(int new_freq) +{ + printf("old measurement frequency = %d\r\n", new_qos->measurement_frequency); + if (new_freq <= 0) { + printf("can't set new frequency to %d\r\n", new_freq); + return; + } + printf("setting new frequency = %d\r\n", new_freq); + new_qos->measurement_frequency = new_freq; +} + +void check_qdata_size(void) +{ + unsigned long size; + + printf("I think the size of qdata[n] is 0x%x\r\n", + sizeof(u64)*(2+5*NDOMAINS)); + printf("calculating it by subtracting consecutive qdata elements gives "); + + size = (unsigned long)(&new_qos->qdata[1]) - (unsigned long)(&new_qos->qdata[0]); + printf(" 0x%lx\r\n", size); +} + + + + +//////////////////// +// interactive commands +int dump_selection[5]; + + +void dump_selected(int didx) +{ + int i, dp; + u64 gotten = 0; + + if (dump_selection[0]) { + + printf("Gotten:\t"); + + for (i=0, dp=PREV_DATAPOINT; i<10; i++) { + gotten = new_qos->qdata[dp].ns_gotten[didx]; + printf("%lld ", gotten); + dp = QOS_DECR(dp); + } + printf("\r\n"); + } + + if (dump_selection[1]) { + + printf("Passed:\t"); + + for (i=0, dp=PREV_DATAPOINT; i<10; i++) { + gotten = new_qos->qdata[dp].ns_passed; + printf("%lld ", gotten); + dp = QOS_DECR(dp); + } + printf("\r\n"); + } + if (dump_selection[2]) { + + printf("Runnable:\t"); + + for (i=0, dp=PREV_DATAPOINT; i<10; i++) { + gotten = new_qos->qdata[dp].ns_runnable[didx]; + printf("%lld ", gotten); + dp = QOS_DECR(dp); + } + printf("\r\n"); + } +} + + +struct termios orig, new; + +void clean_exit(int signo) +{ + if (isatty(0)) + tcsetattr(0, TCSANOW, &orig); + exit(0); +} + + +void test_command(char *arg) +{ + char *p; + + printf("in test_command, with arg='%s'\r\n", arg); + + for (p=arg; *p != '\0'; p++) + printf("%x ", *p); + printf("\r\n"); +} + +void focus(char *arg) +{ + if (*arg == '\0') { + printf("Deleting focus.\r\n"); + debug_focus = -1; + return; + } + + debug_focus = atoi(arg); + printf("Setting focus = %d\r\n", debug_focus); +} + +void display_select(char *arg) +{ + int on = 1; + int option; + + + if (*arg == '\0') + return; + if (*arg == '!') { + on = 0; + arg++; + } + + option = atoi(arg); + if (option < 0 || option > 4) { + printf("option %d doesn't exist\r\n", option); + return; + } + + dump_selection[option] = on; +} + + + +typedef enum {K_IDLE, K_INPUT} keypress_states; + +#define K_INPUT_SIZE 32 + +int handle_keypress(void) +{ + static keypress_states state = K_IDLE; + static char input[K_INPUT_SIZE]; + static int ip = 0; + static void (*action)(char *); + static char *prompt; + char c; + + printf("\r \r"); + fflush(stdout); + + switch (state) { + case K_IDLE: + input[0] = '\0'; + ip = 0; + printf("\rcommand>>> "); + break; + case K_INPUT: + printf("\r%s: %s", prompt, input); + break; + } + + fflush(stdout); + + while (read(0, &c, 1) == 1) { + + switch (state) { + case K_IDLE: + switch (c) { + case 'd': /* display */ + prompt = "Gotten(0), Passed(1), Runnable(2), NS_IO(3), IO(4)"; + state = K_INPUT; + action = display_select; + return 1; + + case 'f': /* focus */ + prompt = "Focus"; + state = K_INPUT; + action = focus; + return 1; + + case 'q': + case 'Q': + printf("Quit\r\n"); + clean_exit(0); + break; + default: + printf("?"); + return 0; + } + break; + case K_INPUT: + switch (c) { + case '\r': + case '\n': + printf("\r\ninput was '%s'\r\n", input); + state = K_IDLE; + (*action)(input); + return 1; + case '\b': + case 0x1b: + case 0x7f: + if (ip == 0) { + state = K_IDLE; + return 1; + } + else { + ip--; + input[ip] = '\0'; + } + return 1; + default: + if (ip >= K_INPUT_SIZE) + write(1, ".", 1); + else { + write(1, &c, 1); + input[ip++] = c; + input[ip] = '\0'; + } + return 1; + // printf("\r\nip=%d, lastchar=0x%x\r\n", ip, c); + break; + } + } + } + return 0; +} + +void poll_all(int sec) +{ + struct timeval t; + fd_set readfds; + int maxfds = 1; + + FD_ZERO(&readfds); + if (server_socket != -1) { + FD_SET(server_socket, &readfds); + maxfds = server_socket+1; + } + + t.tv_sec = 1; + t.tv_usec = 0; + + select(maxfds, &readfds, NULL, NULL, &t); + + if (server_socket != -1 && FD_ISSET(server_socket, &readfds)) + handle_incoming(); +} + + + +int poll_stdin(int sec) +{ + struct timeval t; + fd_set readfds; + int maxfds = 1; + + FD_ZERO(&readfds); + FD_SET(0, &readfds); + if (server_socket != -1) { + FD_SET(server_socket, &readfds); + maxfds = server_socket+1; + } + + t.tv_sec = 1; + t.tv_usec = 0; + + select(maxfds, &readfds, NULL, NULL, &t); + + if (server_socket != -1 && FD_ISSET(server_socket, &readfds)) + handle_incoming(); + + if (FD_ISSET(0, &readfds)) + return 1; + + return 0; +} + + + +#define clear_screen_string "\033[H\033[J\r\n" +int main(int argc, char **argv) +{ + int freq = 0; + char *dumpfile = NULL, *simfile = NULL; + + // parse options + + init_data_streams(); + while (1) { + // int this_option_optind = optind ? optind : 1; + int option_index = 0; + int c; + // int digit_optind = 0; + static struct option long_options[] = { + {"old", 0, 0, 0}, + {"frequency", 1, 0, 0}, + {"dump", 1, 0, 0}, + {"simulate", 1, 0, 0}, + {"stream", 1, 0, 0}, + {"listen", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:012", + long_options, &option_index); + if (c == -1) + break; + + if (c == 0) { + // printf ("option %s", long_options[option_index].name); + // if (optarg) + // printf (" with arg %s", optarg); + // printf ("\n"); + switch (option_index) { + case 0: /* old */ + // newstuff = 0; + printf("using old stuff\n"); + break; + case 1: /* frequency */ + if (optarg) { + freq = atoi(optarg); + } + break; + case 2: /* dump */ + dumpfile = strdup(optarg); + break; + case 3: /* simulate */ + simfile = strdup(optarg); + break; + case 4: + setup_stream(atoi(optarg)); + break; + case 5: + setup_socket(optarg); + break; + } + } + else { + printf ("illegal option: '%c'\n", c); + exit(1); + } + } + + + map_data(); + + if (freq) + set_measurement_frequency(freq); + printf("measurement frequency = %d\r\n", new_qos->measurement_frequency); + + // check_qdata_size(); + + if (dumpfile != NULL) { + _new_qos_data *dump_qos; + int fd; + + fd = creat(dumpfile, 0777); + if (fd < 0) { + perror(dumpfile); + _exit(1); + } + dump_qos = malloc(sizeof(_new_qos_data)); + memcpy(dump_qos, new_qos, sizeof(_new_qos_data)); + if (write(fd, dump_qos, sizeof(_new_qos_data)) != sizeof(_new_qos_data)) + perror("write"); + close(fd); + printf("Data dumped\r\n"); + exit(0); + } + + + if (simfile != NULL) { + _new_qos_data *sim_qos; + int fd; + + fd = open(simfile, O_RDONLY); + if (fd < 0) { + perror(simfile); + _exit(1); + } + sim_qos = malloc(sizeof(_new_qos_data)); + if (read(fd, sim_qos, sizeof(_new_qos_data)) != sizeof(_new_qos_data)) { + perror("read failed on sim file"); + exit(1); + } + + close(fd); + printf("Data read into simulation buffer\r\n"); + new_qos = sim_qos; + } + + + sleep(2); + + if (isatty(0)) { + tcgetattr(0, &orig); + signal(SIGINT, clean_exit); + signal(SIGTERM, clean_exit); + tcgetattr(0, &new); + cfmakeraw(&new); + tcsetattr(0, TCSANOW, &new); + fcntl(0, F_SETFL, O_NONBLOCK); + } + + while (1) { + printf("%s", clear_screen_string); + qos_dump(); + if (debug_focus != -1) { + printf("Debug focus = %d\r\n", debug_focus); + dump_selected(debug_focus); + } + + if (isatty(0)) { + do { + while (handle_keypress()); + } while (poll_stdin(1)); + } + else + poll_all(1); + } + + return 0; +} diff -Naur xen-unstable-orig/xen/common/dom0_ops.c xen-unstable/xen/common/dom0_ops.c --- xen-unstable-orig/xen/common/dom0_ops.c 2005-02-13 21:20:45.000000000 -0700 +++ xen-unstable/xen/common/dom0_ops.c 2005-02-25 12:20:36.000000000 -0700 @@ -19,6 +19,7 @@ #include <xen/console.h> #include <asm/shadow.h> #include <public/sched_ctl.h> +#include <public/qos.h> #define TRC_DOM0OP_ENTER_BASE 0x00020000 #define TRC_DOM0OP_LEAVE_BASE 0x00030000 @@ -392,6 +393,13 @@ break; #endif + case DOM0_GETQOSBUF: + { + ret = (int) get_qos_info(&op->u.gettbufs); + copy_to_user(u_dom0_op, op, sizeof(*op)); + } + break; + case DOM0_READCONSOLE: { ret = read_console_ring(op->u.readconsole.str, diff -Naur xen-unstable-orig/xen/common/qos.c xen-unstable/xen/common/qos.c --- xen-unstable-orig/xen/common/qos.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-unstable/xen/common/qos.c 2005-02-25 12:20:36.000000000 -0700 @@ -0,0 +1,350 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2004 - Rob Gardner - HP Fort Collins + **************************************************************************** + * + * File: common/qos.c + * Author: qos.c + * + * Description: Quality of Service (QoS) handling routines + * + */ + +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/delay.h> +#include <xen/event.h> +#include <xen/time.h> +#include <xen/ac_timer.h> +#include <xen/perfc.h> +#include <xen/sched-if.h> +#include <xen/softirq.h> +#include <xen/trace.h> +#include <public/sched_ctl.h> +#include <public/qos.h> + +_new_qos_data *new_qos = (void *)0; + +int qos_on = 0; +int qos_defeat = 0; + + + +#define QOS_DEFEAT() if (qos_defeat) return +#define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X) + + + +// advance to next datapoint for all domains +void advance_next_datapoint(u64 now) +{ + int new, old, didx; + + old = new_qos->next_datapoint; + new = QOS_INCR(old); + new_qos->next_datapoint = new; + // memset(&new_qos->qdata[new], 0, sizeof(u64)*(2+5*NDOMAINS)); + for (didx = 0; didx < NDOMAINS; didx++) { + new_qos->qdata[new].ns_runnable[didx] = 0; + new_qos->qdata[new].ns_gotten[didx] = 0; + new_qos->qdata[new].event_count[didx] = 0; + new_qos->qdata[new].switchin_count[didx] = 0; + } + new_qos->qdata[new].ns_passed = 0; + new_qos->qdata[new].timestamp = now; +} + + +void qos_update_thread(struct exec_domain *ed, u64 now) +{ + int n, id; + u64 last_update_time, time_since_update; + u64 start, run_time = 0; + struct domain *d = ed->domain; + + QOS_DEFEAT(); + + id = ID(d->id); + + // printk("new stuff 0, d->id=%d, new_qos=%p\r\n", d->id, new_qos); + n = new_qos->next_datapoint; + // printk("new stuff 0.1, ID=%d\r\n", id); + last_update_time = new_qos->domain_info[id].last_update_time; + // printk("new stuff 0.2\r\n"); + // handle wraparound + if (last_update_time > now) + time_since_update = now + (~0LL - last_update_time); + else + time_since_update = now - last_update_time; + + // printk("new stuff 0.3\r\n"); + new_qos->domain_info[id].last_update_time = now; + // printk("new stuff 1\r\n"); + + if (new_qos->domain_info[id].runnable_at_last_update && (d == current->domain)) { + start = new_qos->domain_info[id].start_time; + if (start > now) { // wrapped around + run_time = now + (~0LL - start); + } + else + run_time = now - start; + new_qos->domain_info[id].ns_oncpu_since_boot += run_time; + new_qos->domain_info[id].time_on_cpu += run_time; + new_qos->domain_info[id].start_time = now; + new_qos->domain_info[id].ns_since_boot += time_since_update; + } + // printk("new stuff 2\r\n"); + + if (new_qos->domain_info[id].runnable_at_last_update) { + new_qos->domain_info[id].ns_runnable_since_boot += time_since_update; + } + // printk("new stuff 3\r\n"); + new_qos->qdata[n].ns_gotten[id] += run_time; + // new_qos->qdata[n].event_count[id]++; + // printk("new stuff 4\r\n"); + + if (new_qos->domain_info[id].runnable_at_last_update) + new_qos->qdata[n].ns_runnable[id] += time_since_update; + new_qos->domain_info[id].runnable_at_last_update = domain_runnable(ed); + // printk("new stuff 5\r\n"); + + // how much time passed since this datapoint was updated? + // time_since_update == time since this domain was updated + if (now > new_qos->qdata[n].timestamp) { + // all is right with the world, time is increasing + new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp); + } + else { + // time wrapped around + new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp)); + } + + new_qos->qdata[n].timestamp = now; +} + + + +// called by dump routines to update all structures +void qos_update_all(u64 now) +{ + struct domain *d; + struct exec_domain *ed; + + QOS_DEFEAT(); + + qos_update_thread(&idle0_exec_domain, now); + for_each_domain(d) { + for_each_exec_domain(d, ed) { + qos_update_thread(ed, now); + } + } +} + + +void qos_update_thread_stats(struct exec_domain *ed) +{ + u64 now = NOW(); + + QOS_DEFEAT(); + // is it time to create a new data point? + if (new_qos->qdata[new_qos->next_datapoint].ns_passed > NS_PER_SAMPLE) { + qos_update_all(now); + advance_next_datapoint(now); + return; + } + + qos_update_thread(ed, now); +} + + + +// called when domain is put to sleep, may also be called +// when thread is already asleep +void qos_state_sleeping(struct exec_domain *ed) +{ + // printk("in qos_state_sleeping,1\r\n"); + QOS_DEFEAT(); + if (unlikely(!domain_runnable(ed))) + return; + qos_update_thread_stats(ed); +} + +// called when thread becomes runnable, may also be called +// when thread is already runnable +void qos_state_runnable(struct exec_domain *ed) +{ + static int init2_done = 0; + + QOS_DEFEAT(); + if ( (init2_done == 0) && (ed->domain->id == 0) ) { + extern void qos_init_stage2(void); + qos_init_stage2(); + init2_done = 1; + } + + if (unlikely(domain_runnable(ed))) + return; + + qos_update_thread_stats(ed); +} + +// called when a new thread gets the cpu +void qos_switch_in(struct exec_domain *ed) +{ + struct domain *d = ed->domain; + + // printk("in qos_switch_in,1\r\n"); + QOS_DEFEAT(); + if (ed == current) + printk("switching in domain %d but it's already current\r\n", d->id); + new_qos->domain_info[ID(d->id)].time_on_cpu += 0; + new_qos->domain_info[ID(d->id)].start_time = NOW(); + new_qos->qdata[new_qos->next_datapoint].switchin_count[ID(d->id)]++; + + qos_update_thread_stats(ed); + + // printk("in qos_switch_in,2\r\n"); +} + +// called when the current thread is taken off the cpu +void qos_switch_out(struct exec_domain *ed) +{ + struct domain *d = ed->domain; + + QOS_DEFEAT(); + if (ed != current) + printk("switching out domain %d but it is not current.\r\n", d->id); + qos_update_thread_stats(ed); +} + + + + +void qos_init_stage1(void) +{ + int size, nr_pages, order; + char *rawbuf; + + QOS_DEFEAT(); + + // allocate new data + size = sizeof(_new_qos_data); + nr_pages = (size / PAGE_SIZE) + 1; + printk("NEWQOS: init, page_size = %ld, need %d pages\r\n", PAGE_SIZE, nr_pages); + order = get_order(nr_pages * PAGE_SIZE); + + // this alloc's contiguous pages + if ( (rawbuf = (char *)alloc_xenheap_pages(order)) == NULL ) + { + printk("QosInit: memory allocation failed\n"); + return; + } + + new_qos = (_new_qos_data *) rawbuf; + printk("NEWQoS: in qos_init,1; new_qos_data = 0x%p\r\n", new_qos); + printk("NEWQoS: __pa(new_qos) = 0x%lx\r\n", __pa(new_qos)); + printk("NEWQoS: __pa(rawbuf) = 0x%lx\r\n", __pa(rawbuf)); + memset(new_qos, 0, sizeof(_new_qos_data)); + qos_on = 1; + +} + +// +// this routine exists so that we can defer doing the SHARE_PFN thing until +// such time as domain 0 is up and running; doing this too early results in +// sadness. +// +void qos_init_stage2(void) +{ + int i, size, nr_pages; + char *rawbuf; + // struct domain *dom0; + + QOS_DEFEAT(); + + size = NDOMAINS * sizeof(_new_qos_data); + nr_pages = (size / PAGE_SIZE) + 1; + + rawbuf = (char *) new_qos; + + /* Share pages so that xenqos can map them. */ + // dom0 = find_domain_by_id(0); + + for( i = 0; i < nr_pages; i++) + SHARE_PFN_WITH_DOMAIN(virt_to_page(rawbuf+(i*PAGE_SIZE)), dom0); +} + + +void qos_init_thread(struct exec_domain *ed) +{ + int i, id; + static int init1_done = 0; + u64 now = NOW(); + struct domain *d = ed->domain; + + QOS_DEFEAT(); + id = ID(d->id); + if (init1_done == 0) { + qos_init_stage1(); + init1_done = 1; + } + + + printk("in qos_init_thread,1 for domain %d\r\n", d->id); + printk("in qos_init_thread,2\r\n"); + + + // init new stuff + // new_qos->domain_info[id] + // and + // new_qos->qdata[0..NSAMPLES-1].*[id] + + memset(&new_qos->domain_info[id], 0, sizeof(_domain_info)); + new_qos->domain_info[id].last_update_time = now; + new_qos->domain_info[id].ed = ed; + new_qos->domain_info[id].in_use = 1; + new_qos->domain_info[id].id = d->id; + + if (d->id == IDLE_DOMAIN_ID) + sprintf(new_qos->domain_info[id].name, "Idle Task%d", ed->processor); + else + sprintf(new_qos->domain_info[id].name, "Domain#%d", d->id); + + for (i=0; i<NSAMPLES; i++) { + new_qos->qdata[i].ns_runnable[id] = 0; + new_qos->qdata[i].ns_gotten[id] = 0; + new_qos->qdata[i].event_count[id] = 0; + new_qos->qdata[i].switchin_count[id] = 0; + } +} + + +void qos_kill_thread(struct exec_domain *ed) +{ + QOS_DEFEAT(); + new_qos->domain_info[ID(ed->domain->id)].in_use = 0; +} + +/** + * Called by the %DOM0_GETQOSBUF dom0 op to fetch the machine address of the + * trace buffers. + */ +int get_qos_info(dom0_gettbufs_t *st) +{ + if (qos_on) + { + st->mach_addr = __pa(new_qos); + st->size = ((sizeof(_new_qos_data) / PAGE_SIZE) + 1) * PAGE_SIZE; + + return 0; + } + else + { + st->mach_addr = 0; + st->size = 0; + return -ENODATA; + } +} + diff -Naur xen-unstable-orig/xen/common/schedule.c xen-unstable/xen/common/schedule.c --- xen-unstable-orig/xen/common/schedule.c 2005-02-13 21:20:49.000000000 -0700 +++ xen-unstable/xen/common/schedule.c 2005-02-25 12:20:36.000000000 -0700 @@ -27,6 +27,7 @@ #include <xen/softirq.h> #include <xen/trace.h> #include <public/sched_ctl.h> +#include <public/qos.h> /* opt_sched: scheduler - default to Borrowed Virtual Time */ static char opt_sched[10] = "bvt"; @@ -190,13 +191,15 @@ } SCHED_OP(add_task, ed); + qos_init_thread(ed); + qos_state_sleeping(ed); TRACE_2D(TRC_SCHED_DOM_ADD, d->id, ed); } void sched_rem_domain(struct exec_domain *ed) { - + qos_kill_thread(ed); rem_ac_timer(&ed->timer); SCHED_OP(rem_task, ed); TRACE_3D(TRC_SCHED_DOM_REM, ed->domain->id, ed->eid, ed); @@ -204,6 +207,7 @@ void init_idle_task(void) { + qos_init_thread(current); if ( SCHED_OP(init_idle_task, current) < 0 ) BUG(); } @@ -225,6 +229,7 @@ smp_mb(); cpu_relax(); } + qos_state_sleeping(d); } void domain_wake(struct exec_domain *ed) @@ -242,6 +247,7 @@ #endif } + qos_state_runnable(ed); clear_bit(EDF_MIGRATED, &ed->ed_flags); spin_unlock_irqrestore(&schedule_data[ed->processor].schedule_lock, flags); @@ -254,6 +260,7 @@ current->vcpu_info->evtchn_upcall_mask = 0; set_bit(EDF_BLOCKED, ¤t->ed_flags); TRACE_2D(TRC_SCHED_BLOCK, current->domain->id, current); + qos_state_sleeping(current); __enter_scheduler(); return 0; } @@ -419,9 +426,11 @@ if ( !is_idle_task(next->domain) ) update_dom_time(next->domain); - if ( unlikely(prev == next) ) + if ( unlikely(prev == next) ) { + qos_update_thread_stats(current); return; - + } + perfc_incrc(sched_ctx); if ( !is_idle_task(prev->domain) ) @@ -451,6 +460,8 @@ TRACE_2D(TRC_SCHED_SWITCH, next->domain->id, next); + qos_switch_out(prev); + qos_switch_in(next); switch_to(prev, next); /* diff -Naur xen-unstable-orig/xen/include/public/dom0_ops.h xen-unstable/xen/include/public/dom0_ops.h --- xen-unstable-orig/xen/include/public/dom0_ops.h 2005-02-13 21:20:46.000000000 -0700 +++ xen-unstable/xen/include/public/dom0_ops.h 2005-02-25 12:20:36.000000000 -0700 @@ -414,6 +414,8 @@ u32 _pad0; } PACKED dom0_microcode_t; /* 16 bytes */ +#define DOM0_GETQOSBUF 41 + typedef struct { u32 cmd; /* 0 */ u32 interface_version; /* 4 */ /* DOM0_INTERFACE_VERSION */ diff -Naur xen-unstable-orig/xen/include/public/qos.h xen-unstable/xen/include/public/qos.h --- xen-unstable-orig/xen/include/public/qos.h 1969-12-31 17:00:00.000000000 -0700 +++ xen-unstable/xen/include/public/qos.h 2005-02-25 12:20:36.000000000 -0700 @@ -0,0 +1,87 @@ +#ifndef __QOS_H__ +#define __QOS_H__ + +///// qos stuff +#define million 1000000LL +#define billion (1000LL*million) + +#define NDOMAINS 16 +#define NSAMPLES 600 +#define NFLAGS 24 +#define NS_PER_SAMPLE (billion/10) +#define QOS_INCR(N) ((N<(NSAMPLES-2)) ? (N+1) : 0) +#define QOS_DECR(N) ((N==0) ? (NSAMPLES-1) : (N-1)) + +#define MAX_NAME_SIZE 32 + + +//// qos defs +#ifndef XENQOS +extern void qos_state_sleeping(struct exec_domain *ed); +extern void qos_state_runnable(struct exec_domain *ed); +extern void qos_switch_in(struct exec_domain *ed); +extern void qos_switch_out(struct exec_domain *ed); +extern void qos_update_current(struct exec_domain *ed); +extern void qos_init_thread(struct exec_domain *ed); +extern void qos_kill_thread(struct exec_domain *ed); +extern void qos_update_thread_stats(struct exec_domain *ed); +extern int get_qos_info(dom0_gettbufs_t *); +#endif + + +// data point: +// stuff that is recorded once for each measurement interval +typedef struct +{ + u64 ns_runnable[NDOMAINS]; // ns "wanted" in the last sample period + u64 ns_gotten[NDOMAINS]; // ns used in the last sample period + u64 event_count[NDOMAINS]; // # of accounting events + u64 switchin_count[NDOMAINS]; +} _data_points; + +// per domain stuff +typedef struct +{ + u64 last_update_time; + u64 start_time; // when the thread started running + u64 time_on_cpu; // how long the thread has been running + u64 ns_since_boot; // time gone by since boot + u64 ns_oncpu_since_boot; // total cpu time used by thread since boot + u64 ns_runnable_since_boot; + struct exec_domain *ed; + int runnable_at_last_update; // true if the thread was runnable last time we checked. + // tells us something about what happened during the + // sample period that we are analysing right now + int in_use; // + domid_t id; + char name[MAX_NAME_SIZE]; +} _domain_info; + + + +typedef struct +{ + struct + { + _data_points; + u64 ns_passed; // ns gone by on the wall clock, ie, the sample period + u64 timestamp; + } qdata[NSAMPLES]; + + _domain_info domain_info[NDOMAINS]; + + // control information + int next_datapoint; + + // parameters + int measurement_frequency; // for example + + // feedback + int dom0_invoke_counter; // for example + + +} _new_qos_data; + + + +#endif
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |