[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] credit2: fix credit reset happening too few times



commit dae7b62e976b28af9c8efa150618c25501bf1650
Author:     Dario Faggioli <dfaggioli@xxxxxxxx>
AuthorDate: Fri Apr 3 10:46:53 2020 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Apr 3 10:46:53 2020 +0200

    credit2: fix credit reset happening too few times
    
    There is a bug in commit 5e4b4199667b9 ("xen: credit2: only reset
    credit on reset condition"). In fact, the aim of that commit was to
    make sure that we do not perform too many credit reset operations
    (which are not super cheap, and in an hot-path). But the check used
    to determine whether a reset is necessary was the wrong one.
    
    In fact, knowing just that some vCPUs have been skipped, while
    traversing the runqueue (in runq_candidate()), is not enough. We
    need to check explicitly whether the first vCPU in the runqueue
    has a negative amount of credit.
    
    Since a trace record is changed, this patch updates xentrace format file
    and xenalyze as well
    
    This should be backported.
    
    Signed-off-by: Dario Faggioli <dfaggioli@xxxxxxxx>
    Acked-by: George Dunlap <george.dunlap@xxxxxxxxxx>
---
 tools/xentrace/formats     |  2 +-
 tools/xentrace/xenalyze.c  |  8 +++-----
 xen/common/sched/credit2.c | 30 +++++++++++++-----------------
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/tools/xentrace/formats b/tools/xentrace/formats
index 8f126f65f1..deac4d8598 100644
--- a/tools/xentrace/formats
+++ b/tools/xentrace/formats
@@ -67,7 +67,7 @@
 0x00022210  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:load_check     [ 
lrq_id[16]:orq_id[16] = 0x%(1)08x, delta = %(2)d ]
 0x00022211  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:load_balance   [ 
l_bavgload = 0x%(2)08x%(1)08x, o_bavgload = 0x%(4)08x%(3)08x, 
lrq_id[16]:orq_id[16] = 0x%(5)08x ]
 0x00022212  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:pick_cpu       [ 
b_avgload = 0x%(2)08x%(1)08x, dom:vcpu = 0x%(3)08x, rq_id[16]:new_cpu[16] = 
%(4)d ]
-0x00022213  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:runq_candidate [ 
dom:vcpu = 0x%(1)08x, credit = %(4)d, skipped_vcpus = %(3)d, tickled_cpu = 
%(2)d ]
+0x00022213  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:runq_candidate [ 
dom:vcpu = 0x%(1)08x, credit = %(3)d, tickled_cpu = %(2)d ]
 0x00022214  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:schedule       [ 
rq:cpu = 0x%(1)08x, tasklet[8]:idle[8]:smt_idle[8]:tickled[8] = %(2)08x ]
 0x00022215  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:ratelimit      [ 
dom:vcpu = 0x%(1)08x, runtime = %(2)d ]
 0x00022216  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  csched2:runq_cand_chk  [ 
dom:vcpu = 0x%(1)08x ]
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index d3c8368e9d..b7f4e2bea8 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -7855,14 +7855,12 @@ void sched_process(struct pcpu_info *p)
             if (opt.dump_all) {
                 struct {
                     unsigned vcpuid:16, domid:16;
-                    unsigned tickled_cpu, skipped;
+                    unsigned tickled_cpu;
                     int credit;
                 } *r = (typeof(r))ri->d;
 
-                printf(" %s csched2:runq_candidate d%uv%u, credit = %d, "
-                       "%u vcpus skipped, ",
-                       ri->dump_header, r->domid, r->vcpuid,
-                       r->credit, r->skipped);
+                printf(" %s csched2:runq_candidate d%uv%u, credit = %d, ",
+                       ri->dump_header, r->domid, r->vcpuid, r->credit);
                 if (r->tickled_cpu == (unsigned)-1)
                     printf("no cpu was tickled\n");
                 else
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index cbf9ce2b97..34f05c3e2a 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -3224,8 +3224,7 @@ csched2_runtime(const struct scheduler *ops, int cpu,
 static struct csched2_unit *
 runq_candidate(struct csched2_runqueue_data *rqd,
                struct csched2_unit *scurr,
-               int cpu, s_time_t now,
-               unsigned int *skipped)
+               int cpu, s_time_t now)
 {
     struct list_head *iter, *temp;
     const struct sched_resource *sr = get_sched_res(cpu);
@@ -3233,8 +3232,6 @@ runq_candidate(struct csched2_runqueue_data *rqd,
     struct csched2_private *prv = csched2_priv(sr->scheduler);
     bool yield = false, soft_aff_preempt = false;
 
-    *skipped = 0;
-
     if ( unlikely(is_idle_unit(scurr->unit)) )
     {
         snext = scurr;
@@ -3328,12 +3325,9 @@ runq_candidate(struct csched2_runqueue_data *rqd,
                         (unsigned char *)&d);
         }
 
-        /* Only consider units that are allowed to run on this processor. */
+        /* Only consider vcpus that are allowed to run on this processor. */
         if ( !cpumask_test_cpu(cpu, svc->unit->cpu_hard_affinity) )
-        {
-            (*skipped)++;
             continue;
-        }
 
         /*
          * If an unit is meant to be picked up by another processor, and such
@@ -3342,7 +3336,6 @@ runq_candidate(struct csched2_runqueue_data *rqd,
         if ( svc->tickled_cpu != -1 && svc->tickled_cpu != cpu &&
              cpumask_test_cpu(svc->tickled_cpu, &rqd->tickled) )
         {
-            (*skipped)++;
             SCHED_STAT_CRANK(deferred_to_tickled_cpu);
             continue;
         }
@@ -3354,7 +3347,6 @@ runq_candidate(struct csched2_runqueue_data *rqd,
         if ( sched_unit_master(svc->unit) != cpu
              && snext->credit + CSCHED2_MIGRATE_RESIST > svc->credit )
         {
-            (*skipped)++;
             SCHED_STAT_CRANK(migrate_resisted);
             continue;
         }
@@ -3378,14 +3370,13 @@ runq_candidate(struct csched2_runqueue_data *rqd,
     {
         struct {
             unsigned unit:16, dom:16;
-            unsigned tickled_cpu, skipped;
+            unsigned tickled_cpu;
             int credit;
         } d;
         d.dom = snext->unit->domain->domain_id;
         d.unit = snext->unit->unit_id;
         d.credit = snext->credit;
         d.tickled_cpu = snext->tickled_cpu;
-        d.skipped = *skipped;
         __trace_var(TRC_CSCHED2_RUNQ_CANDIDATE, 1,
                     sizeof(d),
                     (unsigned char *)&d);
@@ -3417,7 +3408,6 @@ static void csched2_schedule(
     struct csched2_runqueue_data *rqd;
     struct csched2_unit * const scurr = csched2_unit(currunit);
     struct csched2_unit *snext = NULL;
-    unsigned int skipped_units = 0;
     bool tickled;
     bool migrated = false;
 
@@ -3495,7 +3485,7 @@ static void csched2_schedule(
         snext = csched2_unit(sched_idle_unit(sched_cpu));
     }
     else
-        snext = runq_candidate(rqd, scurr, sched_cpu, now, &skipped_units);
+        snext = runq_candidate(rqd, scurr, sched_cpu, now);
 
     /* If switching from a non-idle runnable unit, put it
      * back on the runqueue. */
@@ -3507,6 +3497,8 @@ static void csched2_schedule(
     /* Accounting for non-idle tasks */
     if ( !is_idle_unit(snext->unit) )
     {
+        int top_credit;
+
         /* If switching, remove this from the runqueue and mark it scheduled */
         if ( snext != scurr )
         {
@@ -3534,11 +3526,15 @@ static void csched2_schedule(
          *  2) no other unit with higher credits wants to run.
          *
          * Here, where we want to check for reset, we need to make sure the
-         * proper unit is being used. In fact, runqueue_candidate() may have
-         * not returned the first unit in the runqueue, for various reasons
+         * proper unit is being used. In fact, runq_candidate() may have not
+         * returned the first unit in the runqueue, for various reasons
          * (e.g., affinity). Only trigger a reset when it does.
          */
-        if ( skipped_units == 0 && snext->credit <= CSCHED2_CREDIT_RESET )
+        if ( list_empty(&rqd->runq) )
+            top_credit = snext->credit;
+        else
+            top_credit = max(snext->credit, runq_elem(rqd->runq.next)->credit);
+        if ( top_credit <= CSCHED2_CREDIT_RESET )
         {
             reset_credit(sched_cpu, now, snext);
             balance_load(ops, sched_cpu, now);
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.