[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/4] xen: credit2: improve distribution of budget (for domains with caps)



Instead of letting the vCPU that for first tries to get
some budget take it all (although temporarily), allow each
vCPU to only get a specific quota of the total budget.

This improves fairness, allows for more parallelism, and
prevents vCPUs from not being able to get any budget (e.g.,
because some other vCPU always comes before and gets it all)
for one or more period, and hence starve (and couse troubles
in guest kernels, such as livelocks, triggering ofwhatchdogs,
etc.).

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
---
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Anshul Makkar <anshul.makkar@xxxxxxxxxx>
---
 xen/common/sched_credit2.c |   48 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 3f7b8f0..97efde8 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -506,7 +506,7 @@ struct csched2_vcpu {
 
     int credit;
 
-    s_time_t budget;
+    s_time_t budget, budget_quota;
     struct list_head parked_elem;      /* On the parked_vcpus list   */
 
     s_time_t start_time; /* When we were scheduled (used for credit) */
@@ -1627,8 +1627,16 @@ static bool vcpu_try_to_get_budget(struct csched2_vcpu 
*svc)
 
     if ( sdom->budget > 0 )
     {
-        svc->budget = sdom->budget;
-        sdom->budget = 0;
+        s_time_t budget;
+
+        /* Get our quote, if there's at least as much budget */
+        if ( likely(sdom->budget >= svc->budget_quota) )
+            budget = svc->budget_quota;
+        else
+            budget = sdom->budget;
+
+        svc->budget = budget;
+        sdom->budget -= budget;
     }
     else
     {
@@ -1841,6 +1849,7 @@ csched2_alloc_vdata(const struct scheduler *ops, struct 
vcpu *vc, void *dd)
     svc->tickled_cpu = -1;
 
     svc->budget = STIME_MAX;
+    svc->budget_quota = 0;
     INIT_LIST_HEAD(&svc->parked_elem);
 
     SCHED_STAT_CRANK(vcpu_alloc);
@@ -2548,10 +2557,33 @@ csched2_dom_cntl(
         /* Cap */
         if ( op->u.credit2.cap != 0 )
         {
+            struct csched2_vcpu *svc;
+            spinlock_t *lock;
+
             spin_lock(&sdom->budget_lock);
             sdom->tot_budget = (CSCHED2_BDGT_REPL_PERIOD / 100) * 
op->u.credit2.cap;
             spin_unlock(&sdom->budget_lock);
 
+            /*
+             * When trying to get some budget and run, each vCPU will grab
+             * from the pool 1/N (with N = nr of vCPUs of the domain) of
+             * the total budget. Roughly speaking, this means each vCPU will
+             * have at least one chance to run during every period.
+             */
+            for_each_vcpu ( d, v )
+            {
+                svc = csched2_vcpu(v);
+                lock = vcpu_schedule_lock(svc->vcpu);
+                /*
+                 * Too small quotas would in theory cause a lot of overhead,
+                 * which then won't happen because, in csched2_runtime(),
+                 * CSCHED2_MIN_TIMER is what would be used anyway.
+                 */
+                svc->budget_quota = max(sdom->tot_budget / sdom->nr_vcpus,
+                                        CSCHED2_MIN_TIMER);
+                vcpu_schedule_unlock(lock, svc->vcpu);
+            }
+
             if ( sdom->cap == 0 )
             {
                 /*
@@ -2583,9 +2615,8 @@ csched2_dom_cntl(
                  */
                 for_each_vcpu ( d, v )
                 {
-                    struct csched2_vcpu *svc = csched2_vcpu(v);
-                    spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
-
+                    svc = csched2_vcpu(v);
+                    lock = vcpu_schedule_lock(svc->vcpu);
                     if ( v->is_running )
                     {
                         unsigned int cpu = v->processor;
@@ -2619,6 +2650,7 @@ csched2_dom_cntl(
                     vcpu_schedule_unlock(lock, svc->vcpu);
                 }
             }
+
             sdom->cap = op->u.credit2.cap;
         }
         else if ( sdom->cap != 0 )
@@ -2632,6 +2664,7 @@ csched2_dom_cntl(
                 spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
 
                 svc->budget = STIME_MAX;
+                svc->budget_quota = 0;
 
                 vcpu_schedule_unlock(lock, svc->vcpu);
             }
@@ -3266,7 +3299,8 @@ csched2_dump_vcpu(struct csched2_private *prv, struct 
csched2_vcpu *svc)
     printk(" credit=%" PRIi32" [w=%u]", svc->credit, svc->weight);
 
     if ( has_cap(svc) )
-        printk(" budget=%"PRI_stime, svc->budget);
+        printk(" budget=%"PRI_stime"(%"PRI_stime")",
+               svc->budget, svc->budget_quota);
 
     printk(" load=%"PRI_stime" (~%"PRI_stime"%%)", svc->avgload,
            (svc->avgload * 100) >> prv->load_precision_shift);


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.