[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/2] sched: credit2: consider per-vcpu soft affinity



when deciding which run queue to assign each vcpu to.

There are two main changes in functionality that this patch introduces.

First, in function runq_tickle, it tries to find idle pcpus in other run
queues that the vcpu would prefer to run on (soft affinity) or can run on
(hard affinity), in that order.

Second, in function balance_load, if moving a vcpu with soft affinity from
one run queue to another means moving it away from its soft affinity to hard
affinity only, then that move is not considered for load balancing.

Signed-off-by: Justin T. Weaver <jtweaver@xxxxxxxxxx>
---
 xen/common/sched_credit2.c |  222 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 218 insertions(+), 4 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 90e9cdf..ad867f2 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -127,6 +127,15 @@
 #define CSCHED2_CREDIT_RESET         0
 /* Max timer: Maximum time a guest can be run for. */
 #define CSCHED2_MAX_TIMER            MILLISECS(2)
+/* Two step balancing logic; consider soft affinity first. */
+#define CSCHED2_BALANCE_SOFT_AFFINITY 0
+#define CSCHED2_BALANCE_HARD_AFFINITY 1
+/* vcpu runq migration away from vcpu's soft affinity. */
+#define CSCHED2_MIGRATE_SOFT_TO_HARD 0
+/* vcpu runq migration to vcpu's soft affinity. */
+#define CSCHED2_MIGRATE_HARD_TO_SOFT 1
+/* vcpu runq migration soft to soft or vcpu has no soft affinity. */
+#define CSCHED2_MIGRATE_ANY_TO_ANY   2
 
 
 #define CSCHED2_IDLE_CREDIT                 (-(1<<30))
@@ -176,6 +185,8 @@ integer_param("sched_credit2_migrate_resist", 
opt_migrate_resist);
 #define c2r(_ops, _cpu)     (CSCHED2_PRIV(_ops)->runq_map[(_cpu)])
 /* CPU to runqueue struct macro */
 #define RQD(_ops, _cpu)     (&CSCHED2_PRIV(_ops)->rqd[c2r(_ops, _cpu)])
+#define for_each_csched2_balance_step(step) \
+    for ( (step) = 0; (step) <= CSCHED2_BALANCE_HARD_AFFINITY; (step)++ )
 
 /*
  * Shifts for load average.
@@ -268,6 +279,35 @@ struct csched2_dom {
     uint16_t nr_vcpus;
 };
 
+/*
+ * A vcpu has meaningful soft affinity if...
+ * - its soft affinity mask is not full, and
+ * - the passed in mask (usually its hard affinity mask) intersects
+ *   with its soft affinity mask
+ */
+static inline int __vcpu_has_soft_affinity(const struct vcpu *vc,
+                                           const cpumask_t *mask)
+{
+    if ( cpumask_full(vc->cpu_soft_affinity) ||
+        !cpumask_intersects(vc->cpu_soft_affinity, mask) )
+        return 0;
+
+    return 1;
+}
+
+static void
+csched2_balance_cpumask(const struct vcpu *vc, int step, cpumask_t *mask)
+{
+    if ( step == CSCHED2_BALANCE_SOFT_AFFINITY )
+    {
+        cpumask_and(mask, vc->cpu_soft_affinity, vc->cpu_hard_affinity);
+
+        if ( unlikely(cpumask_empty(mask)) )
+            cpumask_copy(mask, vc->cpu_hard_affinity);
+    }
+    else /* step == CSCHED2_BALANCE_HARD_AFFINITY */
+        cpumask_copy(mask, vc->cpu_hard_affinity);
+}
 
 /*
  * Time-to-credit, credit-to-time.
@@ -474,6 +514,9 @@ __runq_remove(struct csched2_vcpu *svc)
 }
 
 void burn_credits(struct csched2_runqueue_data *rqd, struct csched2_vcpu *, 
s_time_t);
+static void __runq_deassign(struct csched2_vcpu *svc);
+static void __runq_assign(struct csched2_vcpu *svc,
+                          struct csched2_runqueue_data *rqd);
 
 /* Check to see if the item on the runqueue is higher priority than what's
  * currently running; if so, wake up the processor */
@@ -485,6 +528,9 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, 
struct csched2_vcpu *
     struct csched2_runqueue_data *rqd = RQD(ops, cpu);
     cpumask_t mask;
     struct csched2_vcpu * cur;
+    struct csched2_private *prv = CSCHED2_PRIV(ops);
+    int balance_step = CSCHED2_BALANCE_SOFT_AFFINITY;
+    bool_t prv_lock_held = 0;
 
     d2printk("rqt %pv curr %pv\n", new->vcpu, current);
 
@@ -513,6 +559,68 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, 
struct csched2_vcpu *
         goto tickle;
     }
 
+    /* Look for idle cpus in other runqs; consider soft affinity first. */
+    for_each_csched2_balance_step( balance_step )
+    {
+        cpumask_t balance_mask;
+
+        /* Skip the soft affinity balance step if new doesn't have any. */
+        if ( balance_step == CSCHED2_BALANCE_SOFT_AFFINITY &&
+            !__vcpu_has_soft_affinity(
+                new->vcpu, new->vcpu->cpu_hard_affinity) )
+            continue;
+
+        /* Skip this step if can't get a lock on the credit2 private data. */
+        if ( !prv_lock_held || !spin_trylock(&prv->lock) )
+            continue;
+        prv_lock_held = 1;
+
+        csched2_balance_cpumask(new->vcpu, balance_step, &balance_mask);
+
+        for_each_cpu(i, &prv->active_queues)
+        {
+            struct csched2_runqueue_data *temp_rqd;
+
+            temp_rqd = prv->rqd + i;
+
+            if ( temp_rqd == rqd || !spin_trylock(&temp_rqd->lock) )
+                continue;
+
+            /* Find idle cpus in the balance mask that are not tickled. */
+            cpumask_andnot(&mask, &temp_rqd->idle, &temp_rqd->tickled);
+            cpumask_and(&mask, &mask, &balance_mask);
+
+            if ( !cpumask_empty(&mask) )
+            {
+                /* Found an idle cpu on another run queue; move new. */
+                s_time_t now = 0;
+
+                ipid = cpumask_any(&mask);
+                new->vcpu->processor = ipid;
+                __runq_remove(new);
+                now = NOW();
+                update_load(ops, rqd, new, -1, now);
+                __runq_deassign(new);
+                __runq_assign(new, temp_rqd);
+                update_load(ops, temp_rqd, new, 1, now);
+                runq_insert(ops, ipid, new);
+                cpumask_set_cpu(ipid, &temp_rqd->tickled);
+                cpu_raise_softirq(ipid, SCHEDULE_SOFTIRQ);
+
+                spin_unlock(&temp_rqd->lock);
+                spin_unlock(&prv->lock);
+                return;
+            }
+            else
+                /* No suitable idlers found in runq temp_rqd. */
+                spin_unlock(&temp_rqd->lock);
+        }
+
+        if ( prv_lock_held && balance_step == CSCHED2_BALANCE_HARD_AFFINITY )
+            /* No suitable other-runq idlers found; unlock private data. */
+            spin_unlock(&prv->lock);
+    }
+
     /* Otherwise, look for the non-idle cpu with the lowest credit,
      * skipping cpus which have been tickled but not scheduled yet,
      * that new is allowed to run on. */
@@ -1039,12 +1147,22 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
 {
     struct csched2_private *prv = CSCHED2_PRIV(ops);
     int i, min_rqi = -1, new_cpu;
+    int max_soft_cpus = 0, max_soft_cpus_rqi = -1;
+    bool_t consider_soft_affinity = 0;
     struct csched2_vcpu *svc = CSCHED2_VCPU(vc);
     s_time_t min_avgload;
-    cpumask_t temp_mask;
+    cpumask_t temp_mask, vc_soft_affinity;
 
     BUG_ON(cpumask_empty(&prv->active_queues));
 
+    /* Consider soft affinity in the cpu descision? */
+    if ( __vcpu_has_soft_affinity(vc, vc->cpu_hard_affinity) )
+    {
+        consider_soft_affinity = 1;
+        cpumask_and(&vc_soft_affinity, vc->cpu_soft_affinity,
+            vc->cpu_hard_affinity);
+    }
+
     /* Locking:
      * - vc->processor is already locked
      * - Need to grab prv lock to make sure active runqueues don't
@@ -1075,6 +1193,13 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             /* Can't be assigned to current runqueue; return a safe pcpu. */
             cpumask_and(&temp_mask, vc->cpu_hard_affinity,
                 cpupool_online_cpumask(vc->domain->cpupool));
+            if ( consider_soft_affinity )
+            {
+                cpumask_t safe_soft_mask;
+                cpumask_and(&safe_soft_mask, &vc_soft_affinity, &temp_mask);
+                if ( !cpumask_empty(&safe_soft_mask) )
+                    cpumask_copy(&temp_mask, &safe_soft_mask);
+            }
             return cpumask_any(&temp_mask);
         }
         else
@@ -1112,11 +1237,15 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
 
     min_avgload = MAX_LOAD;
 
-    /* Find the runqueue with the lowest instantaneous load */
+    /*
+     * Find the run queue with the most cpus in vc's soft affniity, or the
+     * the lowest instantaneous load if not considering soft affinity.
+     */
     for_each_cpu(i, &prv->active_queues)
     {
         struct csched2_runqueue_data *rqd;
         s_time_t rqd_avgload;
+        int rqd_soft_cpus = 0;
 
         rqd = prv->rqd + i;
 
@@ -1131,6 +1260,11 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             cpumask_and(&temp_mask, vc->cpu_hard_affinity, &rqd->active);
             if ( cpumask_empty(&temp_mask) )
                 continue;
+            if ( consider_soft_affinity )
+            {
+                cpumask_and(&temp_mask, &vc_soft_affinity, &rqd->active);
+                rqd_soft_cpus = cpumask_weight(&temp_mask);
+            }
             rqd_avgload = rqd->b_avgload - svc->avgload;
         }
         else if ( spin_trylock(&rqd->lock) )
@@ -1141,6 +1275,11 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
                 spin_unlock(&rqd->lock);
                 continue;
             }
+            if ( consider_soft_affinity )
+            {
+                cpumask_and(&temp_mask, &vc_soft_affinity, &rqd->active);
+                rqd_soft_cpus = cpumask_weight(&temp_mask);
+            }
             rqd_avgload = rqd->b_avgload;
             spin_unlock(&rqd->lock);
         }
@@ -1152,9 +1291,14 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             min_avgload = rqd_avgload;
             min_rqi=i;
         }
+        if ( consider_soft_affinity && rqd_soft_cpus > max_soft_cpus )
+        {
+            max_soft_cpus = rqd_soft_cpus;
+            max_soft_cpus_rqi = i;
+        }
     }
 
-    if ( min_rqi == -1 )
+    if ( min_rqi == -1 && max_soft_cpus_rqi == -1 )
     {
         /* No runqs found (most likely because of spinlock contention). */
         cpumask_and(&temp_mask, vc->cpu_hard_affinity, &svc->rqd->active);
@@ -1163,6 +1307,13 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             /* Can't be assigned to current runqueue; return a safe pcpu. */
             cpumask_and(&temp_mask, vc->cpu_hard_affinity,
                 cpupool_online_cpumask(vc->domain->cpupool));
+            if ( consider_soft_affinity )
+            {
+                cpumask_t safe_soft_mask;
+                cpumask_and(&safe_soft_mask, &vc_soft_affinity, &temp_mask);
+                if ( !cpumask_empty(&safe_soft_mask) )
+                    cpumask_copy(&temp_mask, &safe_soft_mask);
+            }
             new_cpu = cpumask_any(&temp_mask);
         }
         else
@@ -1173,8 +1324,16 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
                 /* Same runq, different cpu; affinity must have changed. */
                 new_cpu = cpumask_any(&temp_mask);
     }
+    else if ( max_soft_cpus_rqi != -1 )
+    {
+        /* Prefer soft affinity here over minimizing run queue load. */
+        cpumask_and(&temp_mask, &vc_soft_affinity,
+            &prv->rqd[max_soft_cpus_rqi].active);
+        new_cpu = cpumask_any(&temp_mask);
+    }
     else
     {
+        /* vc does not have soft affinity; use the rq that minimizes load. */
         cpumask_and(&temp_mask, vc->cpu_hard_affinity,
             &prv->rqd[min_rqi].active);
         new_cpu = cpumask_any(&temp_mask);
@@ -1197,6 +1356,46 @@ typedef struct {
     struct csched2_runqueue_data *orqd;                  
 } balance_state_t;
 
+static int classify_vcpu_migration(const struct vcpu *vc,
+    const struct csched2_runqueue_data *src_rqd,
+    const struct csched2_runqueue_data *dst_rqd)
+{
+    cpumask_t soft_mask, temp_mask;
+
+    /*
+     * Must already hold the locks on src_rqd and dst_rqd.
+     * Function assumes vc has at least hard affinity with one or more
+     * pcpus in both the source and destination run queues.
+     */
+
+    /* Does vcpu not have soft affinity? */
+    if ( !__vcpu_has_soft_affinity(vc, vc->cpu_hard_affinity) )
+        return CSCHED2_MIGRATE_ANY_TO_ANY;
+    else
+        cpumask_and(&soft_mask, vc->cpu_soft_affinity, vc->cpu_hard_affinity);
+
+    /* Does vcpu have soft affinity with pcpu(s) in the source runq? */
+    cpumask_and(&temp_mask, &soft_mask, &src_rqd->active);
+    if ( !cpumask_empty(&temp_mask) )
+    {
+        /* ... and soft affinity with the destination runq? */
+        cpumask_and(&temp_mask, &soft_mask, &dst_rqd->active);
+        if ( !cpumask_empty(&temp_mask) )
+            return CSCHED2_MIGRATE_ANY_TO_ANY;
+        else
+            return CSCHED2_MIGRATE_SOFT_TO_HARD;
+    }
+    else
+    {
+        /* Does vcpu only have soft affinity with the destination runq? */
+        cpumask_and(&temp_mask, &soft_mask, &dst_rqd->active);
+        if ( !cpumask_empty(&temp_mask) )
+            return CSCHED2_MIGRATE_HARD_TO_SOFT;
+        else
+            return CSCHED2_MIGRATE_ANY_TO_ANY;
+    }
+}
+
 static void consider(balance_state_t *st, 
                      struct csched2_vcpu *push_svc,
                      struct csched2_vcpu *pull_svc)
@@ -1294,7 +1493,7 @@ static void balance_load(const struct scheduler *ops, int 
cpu, s_time_t now)
     cpumask_t temp_mask;
 
     balance_state_t st = { .best_push_svc = NULL, .best_pull_svc = NULL };
-    
+
     /*
      * Basic algorithm: Push, pull, or swap.
      * - Find the runqueue with the furthest load distance
@@ -1450,6 +1649,11 @@ retry:
         if ( cpumask_empty(&temp_mask) )
             continue;
 
+        /* Skip if result is moving vcpu away from its soft affinity. */
+        if ( classify_vcpu_migration(push_svc->vcpu, st.lrqd, st.orqd) ==
+            CSCHED2_MIGRATE_SOFT_TO_HARD )
+            continue;
+
         list_for_each( pull_iter, &st.orqd->svc )
         {
             struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct 
csched2_vcpu, rqd_elem);
@@ -1469,6 +1673,11 @@ retry:
             if ( cpumask_empty(&temp_mask) )
                 continue;
 
+            /* Skip if result is moving vcpu away from its soft affinity. */
+            if ( classify_vcpu_migration(pull_svc->vcpu, st.orqd, st.lrqd) ==
+                CSCHED2_MIGRATE_SOFT_TO_HARD )
+                continue;
+
             consider(&st, push_svc, pull_svc);
         }
 
@@ -1492,6 +1701,11 @@ retry:
         if ( cpumask_empty(&temp_mask) )
             continue;
 
+        /* Skip if result is moving vcpu away from its soft affinity. */
+        if ( classify_vcpu_migration(pull_svc->vcpu, st.orqd, st.lrqd) ==
+            CSCHED2_MIGRATE_SOFT_TO_HARD )
+            continue;
+
         /* Consider pull only */
         consider(&st, NULL, pull_svc);
     }
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.