[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH for 4.7 2/4] xen: sched: fix killing an uninitialized timer in free_pdata.



On 03/05/16 22:46, Dario Faggioli wrote:
> commit 64269d9365 "sched: implement .init_pdata in Credit,
> Credit2 and RTDS" helped fixing Credit2 runqueues, and
> the races we had in switching scheduler for pCPUs, but
> introduced another issue. In fact, if CPU bringup fails
> during __cpu_up() (and, more precisely, after CPU_UP_PREPARE,
> but before CPU_STARTING) the CPU_UP_CANCELED notifier
> would be executed, which calls the free_pdata hook.
> 
> Such hook does, right now, two things: (1) undo the
> initialization done inside the init_pdata hook and (2)
> free the memory allocated by the alloc_pdata hook.
> 
> However, in the failure path just described, it is possible
> that only alloc_pdata were called, and this is potentially
> an issue (depending on how actually free_pdata does).
> 
> In fact, for Credit1 (the only scheduler that actually
> allocates per-pCPU data), this result in calling kill_timer()
> on a timer that had not yet been initialized, which causes
> the following:
> 
> (XEN) Xen call trace:
> (XEN)    [<000000000022e304>] timer.c#active_timer+0x8/0x24 (PC)
> (XEN)    [<000000000022f624>] kill_timer+0x108/0x2e0 (LR)
> (XEN)    [<00000000002208c0>] sched_credit.c#csched_free_pdata+0xd8/0x114
> (XEN)    [<0000000000227a18>] schedule.c#cpu_schedule_callback+0xc0/0x12c
> (XEN)    [<0000000000219944>] notifier_call_chain+0x78/0x9c
> (XEN)    [<00000000002015fc>] cpu_up+0x104/0x130
> (XEN)    [<000000000028f7c0>] start_xen+0xaf8/0xce0
> (XEN)    [<00000000810021d8>] 00000000810021d8
> (XEN)
> (XEN)
> (XEN) ****************************************
> (XEN) Panic on CPU 0:
> (XEN) Assertion 'timer->status >= TIMER_STATUS_inactive' failed at timer.c:279
> (XEN) ****************************************
> 
> Solve this by making the scheduler hooks API symmetric again,
> i.e., by adding a deinit_pdata hook and making it responsible
> of undoing what init_pdata did, rather than asking to free_pdata
> to do everything.
> 
> This is cleaner and, in the case at hand, makes it possible to
> only call free_pdata (which is the right thing to do) as only
> allocation and no initialization was performed.
> 
> Reported-by: Julien Grall <julien.grall@xxxxxxx>
> Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>

Looks good, thanks!

Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx>


> ---
> Cc: George Dunlap <george.dunlap@xxxxxxxxxx>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
> Cc: Varun.Swara@xxxxxxx
> Cc: Steve Capper <Steve.Capper@xxxxxxx>
> Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
>  xen/common/sched_credit.c  |   31 +++++++++++++++++++++++++++----
>  xen/common/sched_credit2.c |   16 +++++++++++++---
>  xen/common/schedule.c      |   38 +++++++++++++++++++++++++++++++++++++-
>  xen/include/xen/sched-if.h |    1 +
>  4 files changed, 78 insertions(+), 8 deletions(-)
> 
> diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
> index bc36837..db4d42a 100644
> --- a/xen/common/sched_credit.c
> +++ b/xen/common/sched_credit.c
> @@ -485,11 +485,35 @@ static void
>  csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
>  {
>      struct csched_private *prv = CSCHED_PRIV(ops);
> +
> +    /*
> +     * pcpu either points to a valid struct csched_pcpu, or is NULL, if we're
> +     * beeing called from CPU_UP_CANCELLED, because bringing up a pCPU failed
> +     * very early. xfree() does not really mind, but we want to be sure that,
> +     * when we get here, either init_pdata has never been called, or
> +     * deinit_pdata has been called already.
> +     */
> +    ASSERT(!cpumask_test_cpu(cpu, prv->cpus));
> +
> +    xfree(pcpu);
> +}
> +
> +static void
> +csched_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
> +{
> +    struct csched_private *prv = CSCHED_PRIV(ops);
>      struct csched_pcpu *spc = pcpu;
>      unsigned long flags;
>  
> -    if ( spc == NULL )
> -        return;
> +    /*
> +     * Scheduler specific data for this pCPU must still be there and and be
> +     * valid. In fact, if we are here:
> +     *  1. alloc_pdata must have been called for this cpu, and free_pdata
> +     *     must not have been called on it before us,
> +     *  2. init_pdata must have been called on this cpu, and deinit_pdata
> +     *     (us!) must not have been called on it already.
> +     */
> +    ASSERT(spc && cpumask_test_cpu(cpu, prv->cpus));
>  
>      spin_lock_irqsave(&prv->lock, flags);
>  
> @@ -507,8 +531,6 @@ csched_free_pdata(const struct scheduler *ops, void 
> *pcpu, int cpu)
>          kill_timer(&prv->master_ticker);
>  
>      spin_unlock_irqrestore(&prv->lock, flags);
> -
> -    xfree(spc);
>  }
>  
>  static void *
> @@ -2091,6 +2113,7 @@ static const struct scheduler sched_credit_def = {
>      .free_vdata     = csched_free_vdata,
>      .alloc_pdata    = csched_alloc_pdata,
>      .init_pdata     = csched_init_pdata,
> +    .deinit_pdata   = csched_deinit_pdata,
>      .free_pdata     = csched_free_pdata,
>      .switch_sched   = csched_switch_sched,
>      .alloc_domdata  = csched_alloc_domdata,
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 50c1b9d..803cc44 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -2201,6 +2201,12 @@ csched2_init_pdata(const struct scheduler *ops, void 
> *pdata, int cpu)
>      unsigned long flags;
>      unsigned rqi;
>  
> +    /*
> +     * pdata contains what alloc_pdata returned. But since we don't (need to)
> +     * implement alloc_pdata, either that's NULL, or something is very wrong!
> +     */
> +    ASSERT(!pdata);
> +
>      spin_lock_irqsave(&prv->lock, flags);
>      old_lock = pcpu_schedule_lock(cpu);
>  
> @@ -2261,7 +2267,7 @@ csched2_switch_sched(struct scheduler *new_ops, 
> unsigned int cpu,
>  }
>  
>  static void
> -csched2_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
> +csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
>  {
>      unsigned long flags;
>      struct csched2_private *prv = CSCHED2_PRIV(ops);
> @@ -2270,7 +2276,11 @@ csched2_free_pdata(const struct scheduler *ops, void 
> *pcpu, int cpu)
>  
>      spin_lock_irqsave(&prv->lock, flags);
>  
> -    ASSERT(cpumask_test_cpu(cpu, &prv->initialized));
> +    /*
> +     * alloc_pdata is not implemented, so pcpu must be NULL. On the other
> +     * hand, init_pdata must have been called for this pCPU.
> +     */
> +    ASSERT(!pcpu && cpumask_test_cpu(cpu, &prv->initialized));
>      
>      /* Find the old runqueue and remove this cpu from it */
>      rqi = prv->runq_map[cpu];
> @@ -2387,7 +2397,7 @@ static const struct scheduler sched_credit2_def = {
>      .alloc_vdata    = csched2_alloc_vdata,
>      .free_vdata     = csched2_free_vdata,
>      .init_pdata     = csched2_init_pdata,
> -    .free_pdata     = csched2_free_pdata,
> +    .deinit_pdata   = csched2_deinit_pdata,
>      .switch_sched   = csched2_switch_sched,
>      .alloc_domdata  = csched2_alloc_domdata,
>      .free_domdata   = csched2_free_domdata,
> diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> index 5546999..80fea39 100644
> --- a/xen/common/schedule.c
> +++ b/xen/common/schedule.c
> @@ -1546,6 +1546,38 @@ static int cpu_schedule_callback(
>      struct schedule_data *sd = &per_cpu(schedule_data, cpu);
>      int rc = 0;
>  
> +    /*
> +     * From the scheduler perspective, bringing up a pCPU requires
> +     * allocating and initializing the per-pCPU scheduler specific data,
> +     * as well as "registering" this pCPU to the scheduler (which may
> +     * involve modifying some scheduler wide data structures).
> +     * This happens by calling the alloc_pdata and init_pdata hooks, in
> +     * this order. A scheduler that does not need to allocate any per-pCPU
> +     * data can avoid implementing alloc_pdata. init_pdata may, however, be
> +     * necessary/useful in this case too (e.g., it can contain the "register
> +     * the pCPU to the scheduler" part). alloc_pdata (if present) is called
> +     * during CPU_UP_PREPARE. init_pdata (if present) is called during
> +     * CPU_STARTING.
> +     *
> +     * On the other hand, at teardown, we need to reverse what has been done
> +     * during initialization, and then free the per-pCPU specific data. This
> +     * happens by calling the deinit_pdata and free_pdata hooks, in this
> +     * order. If no per-pCPU memory was allocated, there is no need to
> +     * provide an implementation of free_pdata. deinit_pdata may, however,
> +     * be necessary/useful in this case too (e.g., it can undo something done
> +     * on scheduler wide data structure during init_pdata). Both deinit_pdata
> +     * and free_pdata are called during CPU_DEAD.
> +     *
> +     * If someting goes wrong during bringup, we go to CPU_UP_CANCELLED
> +     * *before* having called init_pdata. In this case, as there is no
> +     * initialization needing undoing, only free_pdata should be called.
> +     * This means it is possible to call free_pdata just after alloc_pdata,
> +     * without a init_pdata/deinit_pdata "cycle" in between the two.
> +     *
> +     * So, in summary, the usage pattern should look either
> +     *  - alloc_pdata-->init_pdata-->deinit_pdata-->free_pdata, or
> +     *  - alloc_pdata-->free_pdata.
> +     */
>      switch ( action )
>      {
>      case CPU_STARTING:
> @@ -1554,8 +1586,10 @@ static int cpu_schedule_callback(
>      case CPU_UP_PREPARE:
>          rc = cpu_schedule_up(cpu);
>          break;
> -    case CPU_UP_CANCELED:
>      case CPU_DEAD:
> +        SCHED_OP(sched, deinit_pdata, sd->sched_priv, cpu);
> +        /* Fallthrough */
> +    case CPU_UP_CANCELED:
>          cpu_schedule_down(cpu);
>          break;
>      default:
> @@ -1713,6 +1747,8 @@ int schedule_cpu_switch(unsigned int cpu, struct 
> cpupool *c)
>  
>      SCHED_OP(new_ops, tick_resume, cpu);
>  
> +    SCHED_OP(old_ops, deinit_pdata, ppriv_old, cpu);
> +
>      SCHED_OP(old_ops, free_vdata, vpriv_old);
>      SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
>  
> diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
> index 1db7c8d..bc0e794 100644
> --- a/xen/include/xen/sched-if.h
> +++ b/xen/include/xen/sched-if.h
> @@ -138,6 +138,7 @@ struct scheduler {
>      void         (*free_pdata)     (const struct scheduler *, void *, int);
>      void *       (*alloc_pdata)    (const struct scheduler *, int);
>      void         (*init_pdata)     (const struct scheduler *, void *, int);
> +    void         (*deinit_pdata)   (const struct scheduler *, void *, int);
>      void         (*free_domdata)   (const struct scheduler *, void *);
>      void *       (*alloc_domdata)  (const struct scheduler *, struct domain 
> *);
>  
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.