[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH V2] Switch from select() to poll() in xenconsoled's IO loop.
In Linux select() typically supports up to 1024 file descriptors. This can be a problem when user tries to boot up many guests. Switching to poll() has minimum impact on existing code and has better scalibility. Tracking arrays are dynamically allocated / reallocated. If the tracking arrays fail to expand, we just ignore the incoming fd. Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> --- tools/console/daemon/io.c | 159 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 121 insertions(+), 38 deletions(-) diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c index 48fe151..830fc18 100644 --- a/tools/console/daemon/io.c +++ b/tools/console/daemon/io.c @@ -28,7 +28,7 @@ #include <stdlib.h> #include <errno.h> #include <string.h> -#include <sys/select.h> +#include <poll.h> #include <fcntl.h> #include <unistd.h> #include <termios.h> @@ -928,9 +928,98 @@ static void handle_log_reload(void) } } + +/* Should have at least max_fd + 1 elements */ +#define DEFAULT_ARRAY_SIZE 1024 +#define GROWTH_LENGTH 512 +static struct pollfd *fds; +static struct pollfd **fd_to_pollfd; +static unsigned int current_array_size; +static unsigned int nr_fds; + +static int initialize_pollfd_arrays(void) +{ + fds = (struct pollfd *) + malloc(sizeof(struct pollfd) * DEFAULT_ARRAY_SIZE); + if (!fds) + goto fail; + fd_to_pollfd = (struct pollfd **) + malloc(sizeof(struct pollfd *) * DEFAULT_ARRAY_SIZE); + if (!fd_to_pollfd) + goto fail; + memset(fds, 0, sizeof(struct pollfd) * DEFAULT_ARRAY_SIZE); + memset(fd_to_pollfd, 0, sizeof(struct pollfd *) * DEFAULT_ARRAY_SIZE); + current_array_size = DEFAULT_ARRAY_SIZE; + return 0; +fail: + free(fds); + free(fd_to_pollfd); + return -ENOMEM; +} + +static void destroy_pollfd_arrays(void) +{ + free(fds); + free(fd_to_pollfd); + current_array_size = 0; +} + +static void set_fds(int fd, short events) +{ + if (current_array_size < fd+1) { + struct pollfd *p1 = NULL; + struct pollfd **p2 = NULL; + unsigned int newsize = current_array_size; + + do { newsize += GROWTH_LENGTH; } while (newsize < fd+1); + + p1 = realloc(fds, sizeof(struct pollfd)*newsize); + if (!p1) + goto fail; + fds = p1; + + p2 = realloc(fd_to_pollfd, sizeof(struct pollfd *)*newsize); + if (!p2) + goto fail; + fd_to_pollfd = p2; + + memset(&fds[0] + current_array_size, 0, + sizeof(struct pollfd) * (newsize-current_array_size)); + memset(&fd_to_pollfd[0] + current_array_size, 0, + sizeof(struct pollfd *) * (newsize-current_array_size)); + current_array_size = newsize; + } + + fds[nr_fds].fd = fd; + fds[nr_fds].events = events; + fd_to_pollfd[fd] = &fds[nr_fds]; + nr_fds++; + + return; +fail: + dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd); + return; +} + +static short fd_revents(int fd) +{ + if (fd >= current_array_size) + return 0; + if (fd_to_pollfd[fd] == NULL) + return 0; + return fd_to_pollfd[fd]->revents; +} + +static void reset_fds(void) +{ + nr_fds = 0; + memset(fds, 0, sizeof(struct pollfd) * current_array_size); + memset(fd_to_pollfd, 0, + sizeof(struct pollfd *) * current_array_size); +} + void handle_io(void) { - fd_set readfds, writefds; int ret; if (log_hv) { @@ -957,23 +1046,21 @@ void handle_io(void) } } + if (initialize_pollfd_arrays()) + goto out; + for (;;) { struct domain *d, *n; - int max_fd = -1; - struct timeval timeout; + int poll_timeout; /* timeout in milliseconds */ struct timespec ts; long long now, next_timeout = 0; - FD_ZERO(&readfds); - FD_ZERO(&writefds); + reset_fds(); - FD_SET(xs_fileno(xs), &readfds); - max_fd = MAX(xs_fileno(xs), max_fd); + set_fds(xs_fileno(xs), POLLIN); - if (log_hv) { - FD_SET(xc_evtchn_fd(xce_handle), &readfds); - max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd); - } + if (log_hv) + set_fds(xc_evtchn_fd(xce_handle), POLLIN); if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) return; @@ -982,11 +1069,7 @@ void handle_io(void) /* Re-calculate any event counter allowances & unblock domains with new allowance */ for (d = dom_head; d; d = d->next) { - /* Add 5ms of fuzz since select() often returns - a couple of ms sooner than requested. Without - the fuzz we typically do an extra spin in select() - with a 1/2 ms timeout every other iteration */ - if ((now+5) > d->next_period) { + if (now > d->next_period) { d->next_period = now + RATE_LIMIT_PERIOD; if (d->event_count >= RATE_LIMIT_ALLOWANCE) { (void)xc_evtchn_unmask(d->xce_handle, d->local_port); @@ -1006,74 +1089,73 @@ void handle_io(void) !d->buffer.max_capacity || d->buffer.size < d->buffer.max_capacity) { int evtchn_fd = xc_evtchn_fd(d->xce_handle); - FD_SET(evtchn_fd, &readfds); - max_fd = MAX(evtchn_fd, max_fd); + set_fds(evtchn_fd, POLLIN); } } if (d->master_fd != -1) { + short events = 0; if (!d->is_dead && ring_free_bytes(d)) - FD_SET(d->master_fd, &readfds); + events |= POLLIN; if (!buffer_empty(&d->buffer)) - FD_SET(d->master_fd, &writefds); - max_fd = MAX(d->master_fd, max_fd); + events |= POLLOUT; + + if (events) + set_fds(d->master_fd, events); } } /* If any domain has been rate limited, we need to work - out what timeout to supply to select */ + out what timeout to supply to poll */ if (next_timeout) { long long duration = (next_timeout - now); if (duration <= 0) /* sanity check */ duration = 1; - timeout.tv_sec = duration / 1000; - timeout.tv_usec = ((duration - (timeout.tv_sec * 1000)) - * 1000); + poll_timeout = (int)duration; } - ret = select(max_fd + 1, &readfds, &writefds, 0, - next_timeout ? &timeout : NULL); + ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1); if (log_reload) { handle_log_reload(); log_reload = 0; } - /* Abort if select failed, except for EINTR cases + /* Abort if poll failed, except for EINTR cases which indicate a possible log reload */ if (ret == -1) { if (errno == EINTR) continue; - dolog(LOG_ERR, "Failure in select: %d (%s)", + dolog(LOG_ERR, "Failure in poll: %d (%s)", errno, strerror(errno)); break; } - if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds)) + if (log_hv && fd_revents(xc_evtchn_fd(xce_handle)) & POLLIN) handle_hv_logs(); if (ret <= 0) continue; - if (FD_ISSET(xs_fileno(xs), &readfds)) + if (fd_revents(xs_fileno(xs)) & POLLIN) handle_xs(); for (d = dom_head; d; d = n) { n = d->next; if (d->event_count < RATE_LIMIT_ALLOWANCE) { if (d->xce_handle != NULL && - FD_ISSET(xc_evtchn_fd(d->xce_handle), - &readfds)) + fd_revents(xc_evtchn_fd(d->xce_handle)) & + POLLIN) handle_ring_read(d); } - if (d->master_fd != -1 && FD_ISSET(d->master_fd, - &readfds)) + if (d->master_fd != -1 && + fd_revents(d->master_fd) & POLLIN) handle_tty_read(d); - if (d->master_fd != -1 && FD_ISSET(d->master_fd, - &writefds)) + if (d->master_fd != -1 && + fd_revents(d->master_fd) & POLLOUT) handle_tty_write(d); if (d->last_seen != enum_pass) @@ -1084,6 +1166,7 @@ void handle_io(void) } } + destroy_pollfd_arrays(); out: if (log_hv_fd != -1) { close(log_hv_fd); -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |