[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1/1] sched/cputime: do not decrease steal time after live migration on xen
>>> On 10.10.17 at 11:14, <dongli.zhang@xxxxxxxxxx> wrote: > --- a/kernel/sched/cputime.c > +++ b/kernel/sched/cputime.c > @@ -238,10 +238,17 @@ static __always_inline u64 > steal_account_process_time(u64 maxtime) > { > #ifdef CONFIG_PARAVIRT > if (static_key_false(¶virt_steal_enabled)) { > - u64 steal; > + u64 steal, steal_time; > + s64 steal_delta; > + > + steal_time = paravirt_steal_clock(smp_processor_id()); > + steal = steal_delta = steal_time - this_rq()->prev_steal_time; > + > + if (unlikely(steal_delta < 0)) { > + this_rq()->prev_steal_time = steal_time; > + return 0; > + } > > - steal = paravirt_steal_clock(smp_processor_id()); > - steal -= this_rq()->prev_steal_time; > steal = min(steal, maxtime); > account_steal_time(steal); > this_rq()->prev_steal_time += steal; While I can see this making the issue less pronounced, I don't see how it fully addresses it: Why would only a negative delta represent a discontinuity? In our old XenoLinux derived kernel we had the change below (unlikely to be upstreamable as is, so just to give you an idea). Jan --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -112,6 +112,47 @@ static inline void task_group_account_fi cpuacct_account_field(p, index, tmp); } +#if !defined(CONFIG_XEN) || defined(CONFIG_VIRT_CPU_ACCOUNTING) +# define _cputime_adjust(t) (t) +#else +# include <linux/syscore_ops.h> +# define NS_PER_TICK (1000000000 / HZ) + +static DEFINE_PER_CPU(u64, steal_snapshot); +static DEFINE_PER_CPU(unsigned int, steal_residual); + +static u64 _cputime_adjust(u64 t) +{ + u64 s = this_vcpu_read(runstate.time[RUNSTATE_runnable]); + unsigned long adj = div_u64_rem(s - __this_cpu_read(steal_snapshot) + + __this_cpu_read(steal_residual), + NS_PER_TICK, + this_cpu_ptr(&steal_residual)); + + __this_cpu_write(steal_snapshot, s); + if (t < jiffies_to_nsecs(adj)) + return 0; + + return t - jiffies_to_nsecs(adj); +} + +static void steal_resume(void) +{ + _cputime_adjust((1ULL << 63) - 1); +} + +static struct syscore_ops steal_syscore_ops = { + .resume = steal_resume, +}; + +static int __init steal_register(void) +{ + register_syscore_ops(&steal_syscore_ops); + return 0; +} +core_initcall(steal_register); +#endif + /* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to @@ -128,7 +169,7 @@ void account_user_time(struct task_struc index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; /* Add user time to cpustat. */ - task_group_account_field(p, index, cputime); + task_group_account_field(p, index, _cputime_adjust(cputime)); /* Account for user time used */ acct_account_cputime(p); @@ -172,7 +213,7 @@ void account_system_index_time(struct ta account_group_system_time(p, cputime); /* Add system time to cpustat. */ - task_group_account_field(p, index, cputime); + task_group_account_field(p, index, _cputime_adjust(cputime)); /* Account for system time used */ acct_account_cputime(p); @@ -224,9 +265,9 @@ void account_idle_time(u64 cputime) struct rq *rq = this_rq(); if (atomic_read(&rq->nr_iowait) > 0) - cpustat[CPUTIME_IOWAIT] += cputime; + cpustat[CPUTIME_IOWAIT] += _cputime_adjust(cputime); else - cpustat[CPUTIME_IDLE] += cputime; + cpustat[CPUTIME_IDLE] += _cputime_adjust(cputime); } /* _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |