[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] xen: Implement domain runstates


  • To: George Dunlap <george.dunlap@xxxxxxxxxxxxx>, <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Keir Fraser <keir@xxxxxxx>
  • Date: Tue, 23 Nov 2010 12:51:25 +0000
  • Cc:
  • Delivery-date: Tue, 23 Nov 2010 04:53:01 -0800
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=sender:user-agent:date:subject:from:to:message-id:thread-topic :thread-index:in-reply-to:mime-version:content-type :content-transfer-encoding; b=FC0pYvF/FnN/hfijHM3SGhGoQuAVnvo+AvvQzRMQqIShqdz7b7cFwGecXUBcYtWWmI kXratjnjgPaV46+8hvIhWPcNQqpnueunZQEfGXN/T62UPHqJ1el/vMRj9dV/q6cR/xu/ iXwteLbWFgrdamuGxD9UO6Q9Sf8lCXkSV70pw=
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AcuLDSMb88CSzs7PJESCHWnyIREhsQ==
  • Thread-topic: [Xen-devel] [PATCH] xen: Implement domain runstates

Is this really any more useful than just pulling all the individual
vcpustate records out via domctl (not supported yet except in a digested
form, but easily added), and then aggregating in userspace? It doesn't look
like it. Also, I would never take a patch for a new control interface
without the thing that uses it being supplied at the same time; Otherwise
you can just as well maintain it out of tree.

 -- Keir

On 23/11/2010 11:26, "George Dunlap" <george.dunlap@xxxxxxxxxxxxx> wrote:

> To help simplify the load configuration of servers, I developed the
> idea of a "domain runstate".  The "runstate" of a domain is defined
> by the runstate of its constituent vcpus.  The "states" are defined
> as follows:
> 
> Blocked: All vcpus blocked
> Partial run: Some vcpus running, some blocked
> Partial contention: Some vcpus waiting for the cpu, some blocked
> Full run: All vcpus running
> Concurrency hazard: Some vcpus running, some waiting for the cpu
> Full contention: All vcpus waiting for the cpu
> 
> The idea is that by looking at the amount of time in each state
> over the last unit of time, an administrator (or an automated
> load-balancing program) can determine whether their vcpu and/or
> domain configuration needs to be tweaked.  For example:
> 
> If a VM spends the majority of its time in "full run", it may
> not have enough vcpus to do its work.
> 
> If a VM spends a large amount of time in full contention, the system
> is probably too busy, and some workloads should be moved onto other
> servers.
> 
> If a VM spends a large amount of time in concurrency hazard, it means
> that the VM may be wasting a lot of time in spinlocks and other
> synchronization (like cache misses).  Either work should be moved off
> the machine, or the number of vcpus assigned to the VM should be
> reduced.
> 
> The state is protected by a lock, but to avoid tricky deadlock
> issues, if the lock cannot be acquired, the state is simply not
> updated.  Number of lost state updates is recorded.
> 
> Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
> 
> diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/domain.c
> --- a/xen/common/domain.c Mon Nov 22 19:16:34 2010 +0000
> +++ b/xen/common/domain.c Tue Nov 23 11:25:50 2010 +0000
> @@ -238,6 +238,7 @@
>      spin_lock_init_prof(d, domain_lock);
>      spin_lock_init_prof(d, page_alloc_lock);
>      spin_lock_init(&d->hypercall_deadlock_mutex);
> +    spin_lock_init(&d->runstate_lock);
>      INIT_PAGE_LIST_HEAD(&d->page_list);
>      INIT_PAGE_LIST_HEAD(&d->xenpage_list);
>  
> diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/domctl.c
> --- a/xen/common/domctl.c Mon Nov 22 19:16:34 2010 +0000
> +++ b/xen/common/domctl.c Tue Nov 23 11:25:50 2010 +0000
> @@ -970,6 +970,24 @@
>      }
>      break;
>  
> +    case XEN_DOMCTL_get_runstate_info:
> +    {
> +        struct domain *d;
> +
> +        ret = -ESRCH;
> +        d = rcu_lock_domain_by_id(op->domain);
> +        if ( d != NULL )
> +        {
> +            domain_runstate_get(d, &op->u.domain_runstate);
> +            ret = 0;
> +
> +            rcu_unlock_domain(d);
> +
> +            if ( copy_to_guest(u_domctl, op, 1) )
> +                ret = -EFAULT;
> +        }
> +        break;
> +    }
>      default:
>          ret = arch_do_domctl(op, u_domctl);
>          break;
> diff -r c0c1f5f0745e -r 692200e7ce1e xen/common/schedule.c
> --- a/xen/common/schedule.c Mon Nov 22 19:16:34 2010 +0000
> +++ b/xen/common/schedule.c Tue Nov 23 11:25:50 2010 +0000
> @@ -135,10 +135,31 @@
>      }
>  }
>  
> +/* Used to quickly map the vcpu runstate mask to a domain runstate */
> +static int mask_to_state[] = {
> +    /* 000: Nothing in any runstate.  Should never happen. */
> +    -1,
> +    /* 001: All running */
> +    DOMAIN_RUNSTATE_full_run,
> +    /* 010: All runnable */
> +    DOMAIN_RUNSTATE_full_contention,
> +    /* 011: Some running, some runnable */
> +    DOMAIN_RUNSTATE_concurrency_hazard,
> +    /* 100: All blocked / offline */
> +    DOMAIN_RUNSTATE_blocked,
> +    /* 101: Some running, some blocked / offline */
> +    DOMAIN_RUNSTATE_partial_run,
> +    /* 110: Some blocked / offline, some runnable */
> +    DOMAIN_RUNSTATE_partial_contention,
> +    /* 111: Some in every state.  Mixed running + runnable is most important.
> */
> +    DOMAIN_RUNSTATE_concurrency_hazard
> +};
> +
>  static inline void vcpu_runstate_change(
>      struct vcpu *v, int new_state, s_time_t new_entry_time)
>  {
>      s_time_t delta;
> +    struct domain *d = v->domain;
>  
>      ASSERT(v->runstate.state != new_state);
>      
> ASSERT(spin_is_locked(per_cpu(schedule_data,v->processor).schedule_lock));
> @@ -155,6 +176,37 @@
>      }
>  
>      v->runstate.state = new_state;
> +
> +    /* Update domain runstate */
> +    if(spin_trylock(&d->runstate_lock)) {
> +        unsigned mask=0;
> +        struct vcpu *ov;
> +        
> +        BUG_ON(d->runstate.state > DOMAIN_RUNSTATE_partial_contention);
> +        
> +        d->runstate.time[d->runstate.state] +=
> +            (new_entry_time - d->runstate.state_entry_time);
> +        d->runstate.state_entry_time = new_entry_time;
> +        
> +        /* Determine new runstate.  First, see what states we have */
> +        for_each_vcpu(d, ov) {
> +            /* Don't count vcpus that have beent taken offline by the guest
> */ 
> +            if(! (ov->runstate.state == RUNSTATE_offline
> +                  && test_bit(_VPF_down, &ov->pause_flags)) )
> +                mask |= (1 << ov->runstate.state);
> +        }
> +        
> +        BUG_ON(mask == 0);
> +        
> +        /* Offline & blocked are the same */
> +        mask |= ((1 << RUNSTATE_offline) & mask) >> 1;
> +        
> +        d->runstate.state = mask_to_state[mask&0x7];
> +        
> +        spin_unlock(&d->runstate_lock);
> +    } else {
> +        atomic_inc(&d->runstate_missed_changes);
> +    }
>  }
>  
>  void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
> @@ -173,6 +225,18 @@
>          vcpu_schedule_unlock_irq(v);
>  }
>  
> +void domain_runstate_get(struct domain *d,
> +                         domain_runstate_info_t *runstate)
> +{
> +    spin_lock(&d->runstate_lock);
> +
> +    memcpy(runstate, &d->runstate, sizeof(*runstate));
> +    runstate->time[d->runstate.state] += NOW() - runstate->state_entry_time;
> +    runstate->missed_changes = atomic_read(&d->runstate_missed_changes);
> +
> +    spin_unlock(&d->runstate_lock);
> +}
> +
>  uint64_t get_cpu_idle_time(unsigned int cpu)
>  {
>      struct vcpu_runstate_info state;
> diff -r c0c1f5f0745e -r 692200e7ce1e xen/include/public/domctl.h
> --- a/xen/include/public/domctl.h Mon Nov 22 19:16:34 2010 +0000
> +++ b/xen/include/public/domctl.h Tue Nov 23 11:25:50 2010 +0000
> @@ -806,6 +806,46 @@
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t);
>  #endif
>  
> +/*
> + * Return information about the state and running time of a domain.
> + * The "domain runstate" is based on the runstates of all the vcpus of the
> + * domain (see below).
> + * @extra_arg == pointer to domain_runstate_info structure.
> + */
> +struct xen_domctl_runstate_info {
> +    /* Domain's's current state (DOMAIN_RUNSTATE_*). */
> +    uint32_t      state;
> +    /* Number of times we missed an update due to contention */
> +    uint32_t missed_changes;
> +    /* When was current state entered (system time, ns)? */
> +    uint64_t state_entry_time;
> +    /*
> +     * Time spent in each RUNSTATE_* (ns). The sum of these times is
> +     * NOT guaranteed not to drift from system time.
> +     */
> +    uint64_t time[6];
> +};
> +typedef struct xen_domctl_runstate_info xen_domctl_runstate_info_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_runstate_info_t);
> +
> +/* All vcpus are running */
> +#define DOMAIN_RUNSTATE_full_run           0
> +
> +/* All vcpus are runnable (i.e., waiting for cpu) */
> +#define DOMAIN_RUNSTATE_full_contention    1
> +
> +/* Some vcpus are running, some are runnable */
> +#define DOMAIN_RUNSTATE_concurrency_hazard 2
> +
> +/* All vcpus are blocked / offline */
> +#define DOMAIN_RUNSTATE_blocked            3
> +
> +/* Some vpcus are running, some are blocked */
> +#define DOMAIN_RUNSTATE_partial_run        4
> +
> +/* Some vcpus are runnable, some are blocked */
> +#define DOMAIN_RUNSTATE_partial_contention 5
> +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -868,6 +908,7 @@
>  #define XEN_DOMCTL_getpageframeinfo3             61
>  #define XEN_DOMCTL_setvcpuextstate               62
>  #define XEN_DOMCTL_getvcpuextstate               63
> +#define XEN_DOMCTL_get_runstate_info             64
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -923,6 +964,7 @@
>          struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
> +        struct xen_domctl_runstate_info     domain_runstate;
>          uint8_t                             pad[128];
>      } u;
>  };
> diff -r c0c1f5f0745e -r 692200e7ce1e xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h Mon Nov 22 19:16:34 2010 +0000
> +++ b/xen/include/xen/sched.h Tue Nov 23 11:25:50 2010 +0000
> @@ -200,6 +200,8 @@
>      int xen_port;
>  };
>  
> +typedef struct xen_domctl_runstate_info domain_runstate_info_t;
> +
>  struct domain
>  {
>      domid_t          domain_id;
> @@ -332,6 +334,11 @@
>      nodemask_t node_affinity;
>      unsigned int last_alloc_node;
>      spinlock_t node_affinity_lock;
> +
> +    /* Domain runstate tracking */
> +    spinlock_t runstate_lock;
> +    atomic_t runstate_missed_changes;
> +    domain_runstate_info_t runstate;
>  };
>  
>  struct domain_setup_info
> @@ -614,6 +621,7 @@
>  int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
>  
>  void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
> +void domain_runstate_get(struct domain *d, domain_runstate_info_t *runstate);
>  uint64_t get_cpu_idle_time(unsigned int cpu);
>  
>  /*
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.