[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC] xen: arm: context switch vtimer PPI state.



On Tue, 10 Feb 2015, Ian Campbell wrote:
> Stefano,
> 
> do you have any comments on the viability of the general approach here
> before I go off and start addressing the review comments?

I think that the general approach is OK


> Cheers,
> Ian.
> 
> On Mon, 2015-01-26 at 15:55 +0000, Ian Campbell wrote:
> > ... instead of artificially masking the timer interrupt in the timer
> > state and relying on the guest to unmask (which it isn't required to
> > do per the h/w spec, although Linux does)
> > 
> > To do this introduce the concept of routing a PPI to the currently
> > running VCPU. For such interrupts irq_get_domain returns NULL.
> > 
> > Then we make use of the GICD I[SC]ACTIVER registers to save and
> > restore the active state of the interrupt, which prevents the nested
> > invocations which the current masking works around.
> > 
> > For edge triggered interrupts we also need to context switch the
> > pending bit via I[SC]PENDR. Note that for level triggered interrupts
> > SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> > state changes), therefore we do not want to context switch the pending
> > state for level PPIs -- instead we rely on the context switch of the
> > peripheral to restore the correct level.
> > 
> > RFC Only:
> >  - Not implemented for GIC v3 yet.
> >  - Lightly tested with hackbench on systems with level and edge
> >    triggered vtimer PPI.
> >  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
> >    best idea? Any alternatives?
> > 
> > Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> > ---
> >  xen/arch/arm/gic-v2.c        |   84 
> > ++++++++++++++++++++++++++++++++++++++++++
> >  xen/arch/arm/gic.c           |   32 +++++++++++++---
> >  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
> >  xen/arch/arm/time.c          |   26 +------------
> >  xen/arch/arm/vtimer.c        |   24 ++++++++++--
> >  xen/include/asm-arm/domain.h |   11 ++++++
> >  xen/include/asm-arm/gic.h    |   14 +++++++
> >  xen/include/asm-arm/irq.h    |    1 +
> >  8 files changed, 204 insertions(+), 36 deletions(-)
> > 
> > diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> > index 31fb81a..9074aca 100644
> > --- a/xen/arch/arm/gic-v2.c
> > +++ b/xen/arch/arm/gic-v2.c
> > @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
> >      writel_gich(0, GICH_HCR);
> >  }
> >  
> > +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,

Shouldn't the argument be a physical irq? Maybe irq_desc?


> > +                                      struct hwppi_state *s)
> > +{
> > +    struct pending_irq *p = irq_to_pending(v, virq);
> > +    const unsigned int offs = virq / 32;
> > +    const unsigned int mask = (1u << (virq % 32));
> > +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
> > +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
> > +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
> > +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> > +
> > +    ASSERT(!is_idle_vcpu(v));
> > +
> > +    s->active = !!(activer & mask);
> > +    s->enabled = !!(enabler & mask);
> > +    s->pending = !!(pendingr & mask);
> > +    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
> > +
> > +    /* Write a 1 to IC...R to clear the corresponding bit of state */
> > +    if ( s->active )
> > +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> > +    /*
> > +     * For an edge interrupt clear the pending state, for a level interrupt
> > +     * this clears the latch there is no need since saving the peripheral 
> > state
> > +     * (and/or restoring the next VCPU) will cause the correct action.
> > +     */
> > +    if ( is_edge && s->pending )
> > +        writel_gicd(mask, GICD_ICPENDR + offs*4);
> > +
> > +    if ( s->enabled )
> > +    {
> > +        writel_gicd(mask, GICD_ICENABLER + offs*4);
> > +        set_bit(_IRQ_DISABLED, &p->desc->status);
> > +    }
> > +
> > +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> > +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> > +}
> > +
> >  static void gicv2_restore_state(const struct vcpu *v)
> >  {
> >      int i;
> > @@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
> >      writel_gich(GICH_HCR_EN, GICH_HCR);
> >  }
> >  
> > +static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,

same here


> > +                                const struct hwppi_state *s)
> > +{
> > +    struct pending_irq *p = irq_to_pending(v, virq);
> > +    const unsigned int offs = virq / 32;
> > +    const unsigned int mask = (1u << (virq % 32));
> > +    struct irq_desc *desc = irq_to_desc(virq);
> > +    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> > +    struct pending_irq *pending = irq_to_pending(v, virq);
> > +
> > +    pending->desc = desc; /* Migrate to new physical processor */
> > +
> > +    /*
> > +     * The IRQ must always have been set inactive and masked etc by
> > +     * the saving of the previous state via save_and_mask_hwppi.
> > +     */
> > +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> > +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> > +
> > +    if ( s->active )
> > +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> > +
> > +    /*
> > +     * Restore pending state for edge triggered interrupts only. For
> > +     * level triggered interrupts the level will be restored as
> > +     * necessary by restoring the state of the relevant peripheral.
> > +     *
> > +     * For a level triggered interrupt ISPENDR acts as a *latch* which
> > +     * is only cleared by ICPENDR (i.e. the input level is no longer
> > +     * relevant). We certainly do not want that here.
> > +     */
> > +    if ( is_edge && s->pending )
> > +        writel_gicd(mask, GICD_ISPENDR + offs*4);
> > +    if ( s->enabled )
> > +    {
> > +        clear_bit(_IRQ_DISABLED, &p->desc->status);
> > +        dsb(sy);
> > +        writel_gicd(mask, GICD_ISENABLER + offs*4);
> > +    }
> > +    if ( s->inprogress )
> > +        set_bit(_IRQ_INPROGRESS, &p->desc->status);
> > +}
> > +
> >  static void gicv2_dump_state(const struct vcpu *v)
> >  {
> >      int i;
> > @@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
> >      .info                = &gicv2_info,
> >      .secondary_init      = gicv2_secondary_cpu_init,
> >      .save_state          = gicv2_save_state,
> > +    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
> >      .restore_state       = gicv2_restore_state,
> > +    .restore_hwppi       = gicv2_restore_hwppi,
> >      .dump_state          = gicv2_dump_state,
> >      .gicv_setup          = gicv2v_setup,
> >      .gic_host_irq_type   = &gicv2_host_irq_type,
> > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> > index 7d4ee19..7ea980d 100644
> > --- a/xen/arch/arm/gic.c
> > +++ b/xen/arch/arm/gic.c
> > @@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
> >      isb();
> >  }
> >  
> > +void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> > +                             struct hwppi_state *s)
> > +{
> > +    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
> > +}
> > +
> >  void gic_restore_state(struct vcpu *v)
> >  {
> >      ASSERT(!local_irq_is_enabled());
> > @@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
> >      gic_restore_pending_irqs(v);
> >  }
> >  
> > +void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
> > +                       const struct hwppi_state *s)
> > +{
> > +    gic_hw_ops->restore_hwppi(v, virq, s);
> > +}
> > +
> >  /*
> >   * needs to be called with a valid cpu_mask, ie each cpu in the mask has
> >   * already called gic_cpu_init
> > @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, 
> > const cpumask_t *cpu_mask,
> >  
> >  /* Program the GIC to route an interrupt to a guest
> >   *   - desc.lock must be held
> > + *   - d may be NULL, in which case interrupt *must* be a PPI and is 
> > routed to
> > + *     the vcpu currently running when that PPI fires. In this case the 
> > code
> > + *     responsible for the related hardware must save and restore the PPI 
> > with
> > + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
> > + *   - if d is non-NULL then the interrupt *must* be an SPI.
> >   */
> >  void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
> >                              const cpumask_t *cpu_mask, unsigned int 
> > priority)
> >  {
> > -    struct pending_irq *p;
> >      ASSERT(spin_is_locked(&desc->lock));
> >  
> >      desc->handler = gic_hw_ops->gic_guest_irq_type;
> > @@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct 
> > irq_desc *desc,
> >  
> >      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), 
> > GIC_PRI_IRQ);
> >  
> > -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > -     * route SPIs to guests, it doesn't make any difference. */
> > -    p = irq_to_pending(d->vcpu[0], desc->irq);
> > -    p->desc = desc;
> > +    if ( d )
> > +    {
> > +        struct pending_irq *p;
> > +
> > +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > +         * route SPIs to guests, it doesn't make any difference. */
> > +        p = irq_to_pending(d->vcpu[0], desc->irq);
> > +        p->desc = desc;
> > +    }
> > +    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
> >  }
> >  
> >  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> > index ebdf19a..93c38ff 100644
> > --- a/xen/arch/arm/irq.c
> > +++ b/xen/arch/arm/irq.c
> > @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int 
> > irq, int is_fiq)
> >          goto out;
> >      }
> >  
> > +    if ( irq == current->arch.virt_timer.irq )
> > +    {
> > +        ASSERT(!irq_get_domain(desc));
> > +
> > +        desc->handler->end(desc);
> > +
> > +        set_bit(_IRQ_INPROGRESS, &desc->status);
> > +        desc->arch.eoi_cpu = smp_processor_id();
> > +
> > +        vgic_vcpu_inject_irq(current, irq);
> > +        goto out_no_end;
> > +    }

I don't think we should special case virt_timer.irq here. I would try to
reuse the normal _IRQ_GUEST path or make this if generic for any PPIs
routed to guests.


> >      if ( test_bit(_IRQ_GUEST, &desc->status) )
> >      {
> >          struct domain *d = irq_get_domain(desc);
> > @@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, 
> > struct irqaction *new)
> >          struct domain *d = irq_get_domain(desc);
> >  
> >          spin_unlock_irqrestore(&desc->lock, flags);
> > -        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain 
> > %u\n",
> > -               irq, d->domain_id);
> > +        if ( d )
> > +            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain 
> > %u\n",
> > +                   irq, d->domain_id);
> > +        else
> > +            printk(XENLOG_ERR
> > +                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", 
> > irq);
> >          return -EBUSY;
> >      }
> >  
> > @@ -378,6 +395,10 @@ err:
> >      return rc;
> >  }
> >  
> > +/*
> > + * If d == NULL then IRQ is routed to current vcpu at time it is received 
> > and
> > + * must be a PPI.
> > + */
> >  int route_irq_to_guest(struct domain *d, unsigned int irq,
> >                         const char * devname)
> >  {
> > @@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int 
> > irq,
> >      unsigned long flags;
> >      int retval = 0;
> >  
> > +    ASSERT( d || ( irq >=16 && irq < 32 ) );
> > +
> >      action = xmalloc(struct irqaction);
> >      if (!action)
> >          return -ENOMEM;
> > @@ -406,11 +429,23 @@ int route_irq_to_guest(struct domain *d, unsigned int 
> > irq,
> >          struct domain *ad = irq_get_domain(desc);
> >  
> >          if ( test_bit(_IRQ_GUEST, &desc->status) && d == ad )
> > +        {
> > +            printk(XENLOG_ERR
> > +                   "ERROR: IRQ %u is already routed to domain %p\n",
> > +                   irq, ad);
> >              goto out;
> > +        }
> >  
> >          if ( test_bit(_IRQ_GUEST, &desc->status) )
> > -            printk(XENLOG_ERR "ERROR: IRQ %u is already used by domain 
> > %u\n",
> > -                   irq, ad->domain_id);
> > +        {
> > +            if ( ad )
> > +                printk(XENLOG_ERR
> > +                       "ERROR: IRQ %u is already used by domain %u\n",
> > +                       irq, ad->domain_id);
> > +            else
> > +                printk(XENLOG_ERR
> > +                       "ERROR: IRQ %u is already used by 
> > <current-vcpu>\n", irq);
> > +        }
> >          else
> >              printk(XENLOG_ERR "ERROR: IRQ %u is already used by Xen\n", 
> > irq);
> >          retval = -EBUSY;
> > @@ -433,6 +468,11 @@ out:
> >      return retval;
> >  }
> >  
> > +int route_irq_to_current_vcpu(unsigned int irq, const char *devname)
> > +{
> > +    return route_irq_to_guest(NULL, irq, devname);
> > +}
> > +
> >  /*
> >   * pirq event channels. We don't use these on ARM, instead we use the
> >   * features of the GIC to inject virtualised normal interrupts.
> > diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
> > index 455f217..ffa5eef 100644
> > --- a/xen/arch/arm/time.c
> > +++ b/xen/arch/arm/time.c
> > @@ -169,28 +169,6 @@ static void timer_interrupt(int irq, void *dev_id, 
> > struct cpu_user_regs *regs)
> >      }
> >  }
> >  
> > -static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs 
> > *regs)
> > -{
> > -    /*
> > -     * Edge-triggered interrupts can be used for the virtual timer. Even
> > -     * if the timer output signal is masked in the context switch, the
> > -     * GIC will keep track that of any interrupts raised while IRQS are
> > -     * disabled. As soon as IRQs are re-enabled, the virtual interrupt
> > -     * will be injected to Xen.
> > -     *
> > -     * If an IDLE vCPU was scheduled next then we should ignore the
> > -     * interrupt.
> > -     */
> > -    if ( unlikely(is_idle_vcpu(current)) )
> > -        return;
> > -
> > -    perfc_incr(virt_timer_irqs);
> > -
> > -    current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0);
> > -    WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, 
> > CNTV_CTL_EL0);
> > -    vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq);
> > -}
> > -
> >  /* Set up the timer interrupt on this CPU */
> >  void __cpuinit init_timer_interrupt(void)
> >  {
> > @@ -204,8 +182,8 @@ void __cpuinit init_timer_interrupt(void)
> >  
> >      request_irq(timer_irq[TIMER_HYP_PPI], 0, timer_interrupt,
> >                  "hyptimer", NULL);
> > -    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
> > -                   "virtimer", NULL);
> > +    route_irq_to_current_vcpu(timer_irq[TIMER_VIRT_PPI], "virtimer");
> > +
> >      request_irq(timer_irq[TIMER_PHYS_NONSECURE_PPI], 0, timer_interrupt,
> >                  "phytimer", NULL);
> >  }
> > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> > index 848e2c6..d1f21a1 100644
> > --- a/xen/arch/arm/vtimer.c
> > +++ b/xen/arch/arm/vtimer.c
> > @@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
> >  static void virt_timer_expired(void *data)
> >  {
> >      struct vtimer *t = data;
> > -    t->ctl |= CNTx_CTL_MASK;
> > -    vgic_vcpu_inject_irq(t->v, t->irq);
> > -    perfc_incr(vtimer_virt_inject);
> > +    t->ctl |= CNTx_CTL_PENDING;
> > +    if ( !(t->ctl & CNTx_CTL_MASK) )
> > +    {
> > +        /* An edge triggered interrupt should now be pending. */
> > +        t->ppi_state.pending = true;
> > +        vcpu_unblock(t->v);

I was going to say that this is trouble because
local_events_need_delivery wouldn't recognize that we actually have an
event pending. But it doesn't matter because local_events_need_delivery
only works with the current vcpu and if this code trigger then the vcpu
that needs to receive the event cannot be current. So I think that is OK
but for clarity it might be better to add a check or a comment in
local_events_need_delivery_nomask anyway.


> > +        perfc_incr(vtimer_virt_inject);
> > +    }
> >  }
> >  
> >  int domain_vtimer_init(struct domain *d)
> > @@ -112,6 +117,15 @@ int virt_timer_save(struct vcpu *v)
> >          set_timer(&v->arch.virt_timer.timer, 
> > ticks_to_ns(v->arch.virt_timer.cval +
> >                    v->domain->arch.virt_timer_base.offset - boot_count));
> >      }
> > +
> > +    /*
> > +     * Since the vtimer irq is a PPI we don't need to worry about
> > +     * racing against it becoming active while we are saving the
> > +     * state, since that requires the guest to be reading the IAR.
> > +     */
> > +    gic_save_and_mask_hwppi(v, v->arch.virt_timer.irq,
> > +                            &v->arch.virt_timer.ppi_state);
> > +
> >      return 0;
> >  }
> >  
> > @@ -126,6 +140,10 @@ int virt_timer_restore(struct vcpu *v)
> >      WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2);
> >      WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0);
> >      WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
> > +
> > +    gic_restore_hwppi(v, v->arch.virt_timer.irq,
> > +                      &v->arch.virt_timer.ppi_state);
> > +
> >      return 0;
> >  }
> >  
> > diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> > index 90ab9c3..dd50e2c 100644
> > --- a/xen/include/asm-arm/domain.h
> > +++ b/xen/include/asm-arm/domain.h
> > @@ -34,12 +34,23 @@ enum domain_type {
> >  extern int dom0_11_mapping;
> >  #define is_domain_direct_mapped(d) ((d) == hardware_domain && 
> > dom0_11_mapping)
> >  
> > +struct hwppi_state {
> > +    /* h/w state */
> > +    unsigned long enabled:1;
> > +    unsigned long pending:1;
> > +    unsigned long active:1;
> > +
> > +    /* Xen s/w state */
> > +    unsigned long inprogress:1;
> > +};

Using a uint32_t bitmask would be more in line the rest of the code
style, but it is just a matter of taste.


> >  struct vtimer {
> >      struct vcpu *v;
> >      int irq;
> >      struct timer timer;
> >      uint32_t ctl;
> >      uint64_t cval;
> > +    struct hwppi_state ppi_state;
> >  };
> >
> >  struct arch_domain
> > diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> > index 187dc46..aa8cbac 100644
> > --- a/xen/include/asm-arm/gic.h
> > +++ b/xen/include/asm-arm/gic.h
> > @@ -247,6 +247,16 @@ extern int gicv_setup(struct domain *d);
> >  extern void gic_save_state(struct vcpu *v);
> >  extern void gic_restore_state(struct vcpu *v);
> >  
> > +/*
> > + * Save/restore the state of a single PPI which must be routed to
> > + * <current-vcpu> (that is, defined to be injected to the current vcpu).
> > + */
> > +struct hwppi_state;
> > +extern void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> > +                                    struct hwppi_state *s);
> > +extern void gic_restore_hwppi(struct vcpu *v, unsigned virq,
> > +                              const struct hwppi_state *s);
> > +
> >  /* SGI (AKA IPIs) */
> >  enum gic_sgi {
> >      GIC_SGI_EVENT_CHECK = 0,
> > @@ -293,8 +303,12 @@ struct gic_hw_operations {
> >      const struct gic_info *info;
> >      /* Save GIC registers */
> >      void (*save_state)(struct vcpu *);
> > +    void (*save_and_mask_hwppi)(struct vcpu *v, const unsigned virq,
> > +                                struct hwppi_state *s);
> >      /* Restore GIC registers */
> >      void (*restore_state)(const struct vcpu *);
> > +    void (*restore_hwppi)(struct vcpu *v, const unsigned virq,
> > +                          const struct hwppi_state *s);
> >      /* Dump GIC LR register information */
> >      void (*dump_state)(const struct vcpu *);
> >      /* Map MMIO region of GIC */
> > diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
> > index 435dfcd..a08438e 100644
> > --- a/xen/include/asm-arm/irq.h
> > +++ b/xen/include/asm-arm/irq.h
> > @@ -42,6 +42,7 @@ void init_secondary_IRQ(void);
> >  
> >  int route_irq_to_guest(struct domain *d, unsigned int irq,
> >                         const char *devname);
> > +int route_irq_to_current_vcpu(unsigned int irq, const char *devname);
> >  void arch_move_irqs(struct vcpu *v);
> >  
> >  /* Set IRQ type for an SPI */
> 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.