[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] xen: Implement domain runstates
Is this really any more useful than just pulling all the individual vcpustate records out via domctl (not supported yet except in a digested form, but easily added), and then aggregating in userspace? It doesn't look like it. Also, I would never take a patch for a new control interface without the thing that uses it being supplied at the same time; Otherwise you can just as well maintain it out of tree. -- Keir On 23/11/2010 11:26, "George Dunlap" <george.dunlap@xxxxxxxxxxxxx> wrote: > To help simplify the load configuration of servers, I developed the > idea of a "domain runstate". The "runstate" of a domain is defined > by the runstate of its constituent vcpus. The "states" are defined > as follows: > > Blocked: All vcpus blocked > Partial run: Some vcpus running, some blocked > Partial contention: Some vcpus waiting for the cpu, some blocked > Full run: All vcpus running > Concurrency hazard: Some vcpus running, some waiting for the cpu > Full contention: All vcpus waiting for the cpu > > The idea is that by looking at the amount of time in each state > over the last unit of time, an administrator (or an automated > load-balancing program) can determine whether their vcpu and/or > domain configuration needs to be tweaked. For example: > > If a VM spends the majority of its time in "full run", it may > not have enough vcpus to do its work. > > If a VM spends a large amount of time in full contention, the system > is probably too busy, and some workloads should be moved onto other > servers. > > If a VM spends a large amount of time in concurrency hazard, it means > that the VM may be wasting a lot of time in spinlocks and other > synchronization (like cache misses). Either work should be moved off > the machine, or the number of vcpus assigned to the VM should be > reduced. > > The state is protected by a lock, but to avoid tricky deadlock > issues, if the lock cannot be acquired, the state is simply not > updated. Number of lost state updates is recorded. > > Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> > > diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/domain.c > --- a/xen/common/domain.c Mon Nov 22 19:16:34 2010 +0000 > +++ b/xen/common/domain.c Tue Nov 23 11:25:50 2010 +0000 > @@ -238,6 +238,7 @@ > spin_lock_init_prof(d, domain_lock); > spin_lock_init_prof(d, page_alloc_lock); > spin_lock_init(&d->hypercall_deadlock_mutex); > + spin_lock_init(&d->runstate_lock); > INIT_PAGE_LIST_HEAD(&d->page_list); > INIT_PAGE_LIST_HEAD(&d->xenpage_list); > > diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/domctl.c > --- a/xen/common/domctl.c Mon Nov 22 19:16:34 2010 +0000 > +++ b/xen/common/domctl.c Tue Nov 23 11:25:50 2010 +0000 > @@ -970,6 +970,24 @@ > } > break; > > + case XEN_DOMCTL_get_runstate_info: > + { > + struct domain *d; > + > + ret = -ESRCH; > + d = rcu_lock_domain_by_id(op->domain); > + if ( d != NULL ) > + { > + domain_runstate_get(d, &op->u.domain_runstate); > + ret = 0; > + > + rcu_unlock_domain(d); > + > + if ( copy_to_guest(u_domctl, op, 1) ) > + ret = -EFAULT; > + } > + break; > + } > default: > ret = arch_do_domctl(op, u_domctl); > break; > diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/schedule.c > --- a/xen/common/schedule.c Mon Nov 22 19:16:34 2010 +0000 > +++ b/xen/common/schedule.c Tue Nov 23 11:25:50 2010 +0000 > @@ -135,10 +135,31 @@ > } > } > > +/* Used to quickly map the vcpu runstate mask to a domain runstate */ > +static int mask_to_state[] = { > + /* 000: Nothing in any runstate. Should never happen. */ > + -1, > + /* 001: All running */ > + DOMAIN_RUNSTATE_full_run, > + /* 010: All runnable */ > + DOMAIN_RUNSTATE_full_contention, > + /* 011: Some running, some runnable */ > + DOMAIN_RUNSTATE_concurrency_hazard, > + /* 100: All blocked / offline */ > + DOMAIN_RUNSTATE_blocked, > + /* 101: Some running, some blocked / offline */ > + DOMAIN_RUNSTATE_partial_run, > + /* 110: Some blocked / offline, some runnable */ > + DOMAIN_RUNSTATE_partial_contention, > + /* 111: Some in every state. Mixed running + runnable is most important. > */ > + DOMAIN_RUNSTATE_concurrency_hazard > +}; > + > static inline void vcpu_runstate_change( > struct vcpu *v, int new_state, s_time_t new_entry_time) > { > s_time_t delta; > + struct domain *d = v->domain; > > ASSERT(v->runstate.state != new_state); > > ASSERT(spin_is_locked(per_cpu(schedule_data,v->processor).schedule_lock)); > @@ -155,6 +176,37 @@ > } > > v->runstate.state = new_state; > + > + /* Update domain runstate */ > + if(spin_trylock(&d->runstate_lock)) { > + unsigned mask=0; > + struct vcpu *ov; > + > + BUG_ON(d->runstate.state > DOMAIN_RUNSTATE_partial_contention); > + > + d->runstate.time[d->runstate.state] += > + (new_entry_time - d->runstate.state_entry_time); > + d->runstate.state_entry_time = new_entry_time; > + > + /* Determine new runstate. First, see what states we have */ > + for_each_vcpu(d, ov) { > + /* Don't count vcpus that have beent taken offline by the guest > */ > + if(! (ov->runstate.state == RUNSTATE_offline > + && test_bit(_VPF_down, &ov->pause_flags)) ) > + mask |= (1 << ov->runstate.state); > + } > + > + BUG_ON(mask == 0); > + > + /* Offline & blocked are the same */ > + mask |= ((1 << RUNSTATE_offline) & mask) >> 1; > + > + d->runstate.state = mask_to_state[mask&0x7]; > + > + spin_unlock(&d->runstate_lock); > + } else { > + atomic_inc(&d->runstate_missed_changes); > + } > } > > void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) > @@ -173,6 +225,18 @@ > vcpu_schedule_unlock_irq(v); > } > > +void domain_runstate_get(struct domain *d, > + domain_runstate_info_t *runstate) > +{ > + spin_lock(&d->runstate_lock); > + > + memcpy(runstate, &d->runstate, sizeof(*runstate)); > + runstate->time[d->runstate.state] += NOW() - runstate->state_entry_time; > + runstate->missed_changes = atomic_read(&d->runstate_missed_changes); > + > + spin_unlock(&d->runstate_lock); > +} > + > uint64_t get_cpu_idle_time(unsigned int cpu) > { > struct vcpu_runstate_info state; > diff -r c0c1f5f0745e -r 692200e7ce1e xen/include/public/domctl.h > --- a/xen/include/public/domctl.h Mon Nov 22 19:16:34 2010 +0000 > +++ b/xen/include/public/domctl.h Tue Nov 23 11:25:50 2010 +0000 > @@ -806,6 +806,46 @@ > DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t); > #endif > > +/* > + * Return information about the state and running time of a domain. > + * The "domain runstate" is based on the runstates of all the vcpus of the > + * domain (see below). > + * @extra_arg == pointer to domain_runstate_info structure. > + */ > +struct xen_domctl_runstate_info { > + /* Domain's's current state (DOMAIN_RUNSTATE_*). */ > + uint32_t state; > + /* Number of times we missed an update due to contention */ > + uint32_t missed_changes; > + /* When was current state entered (system time, ns)? */ > + uint64_t state_entry_time; > + /* > + * Time spent in each RUNSTATE_* (ns). The sum of these times is > + * NOT guaranteed not to drift from system time. > + */ > + uint64_t time[6]; > +}; > +typedef struct xen_domctl_runstate_info xen_domctl_runstate_info_t; > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_runstate_info_t); > + > +/* All vcpus are running */ > +#define DOMAIN_RUNSTATE_full_run 0 > + > +/* All vcpus are runnable (i.e., waiting for cpu) */ > +#define DOMAIN_RUNSTATE_full_contention 1 > + > +/* Some vcpus are running, some are runnable */ > +#define DOMAIN_RUNSTATE_concurrency_hazard 2 > + > +/* All vcpus are blocked / offline */ > +#define DOMAIN_RUNSTATE_blocked 3 > + > +/* Some vpcus are running, some are blocked */ > +#define DOMAIN_RUNSTATE_partial_run 4 > + > +/* Some vcpus are runnable, some are blocked */ > +#define DOMAIN_RUNSTATE_partial_contention 5 > + > struct xen_domctl { > uint32_t cmd; > #define XEN_DOMCTL_createdomain 1 > @@ -868,6 +908,7 @@ > #define XEN_DOMCTL_getpageframeinfo3 61 > #define XEN_DOMCTL_setvcpuextstate 62 > #define XEN_DOMCTL_getvcpuextstate 63 > +#define XEN_DOMCTL_get_runstate_info 64 > #define XEN_DOMCTL_gdbsx_guestmemio 1000 > #define XEN_DOMCTL_gdbsx_pausevcpu 1001 > #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 > @@ -923,6 +964,7 @@ > struct xen_domctl_gdbsx_memio gdbsx_guest_memio; > struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; > struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; > + struct xen_domctl_runstate_info domain_runstate; > uint8_t pad[128]; > } u; > }; > diff -r c0c1f5f0745e -r 692200e7ce1e xen/include/xen/sched.h > --- a/xen/include/xen/sched.h Mon Nov 22 19:16:34 2010 +0000 > +++ b/xen/include/xen/sched.h Tue Nov 23 11:25:50 2010 +0000 > @@ -200,6 +200,8 @@ > int xen_port; > }; > > +typedef struct xen_domctl_runstate_info domain_runstate_info_t; > + > struct domain > { > domid_t domain_id; > @@ -332,6 +334,11 @@ > nodemask_t node_affinity; > unsigned int last_alloc_node; > spinlock_t node_affinity_lock; > + > + /* Domain runstate tracking */ > + spinlock_t runstate_lock; > + atomic_t runstate_missed_changes; > + domain_runstate_info_t runstate; > }; > > struct domain_setup_info > @@ -614,6 +621,7 @@ > int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity); > > void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate); > +void domain_runstate_get(struct domain *d, domain_runstate_info_t *runstate); > uint64_t get_cpu_idle_time(unsigned int cpu); > > /* > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |