Xen project Mailing List

Re: [Xen-devel] [PATCH RFC] xen: arm: context switch vtimer PPI state.

On Tue, 10 Feb 2015, Ian Campbell wrote: > Stefano, > > do you have any comments on the viability of the general approach here > before I go off and start addressing the review comments? I think that the general approach is OK > Cheers, > Ian. > > On Mon, 2015-01-26 at 15:55 +0000, Ian Campbell wrote: > > ... instead of artificially masking the timer interrupt in the timer > > state and relying on the guest to unmask (which it isn't required to > > do per the h/w spec, although Linux does) > > > > To do this introduce the concept of routing a PPI to the currently > > running VCPU. For such interrupts irq_get_domain returns NULL. > > > > Then we make use of the GICD I[SC]ACTIVER registers to save and > > restore the active state of the interrupt, which prevents the nested > > invocations which the current masking works around. > > > > For edge triggered interrupts we also need to context switch the > > pending bit via I[SC]PENDR. Note that for level triggered interrupts > > SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w > > state changes), therefore we do not want to context switch the pending > > state for level PPIs -- instead we rely on the context switch of the > > peripheral to restore the correct level. > > > > RFC Only: > > - Not implemented for GIC v3 yet. > > - Lightly tested with hackbench on systems with level and edge > > triggered vtimer PPI. > > - Is irq_get_domain == NULL to indicate route-to-current-vcpu the > > best idea? Any alternatives? > > > > Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx> > > --- > > xen/arch/arm/gic-v2.c | 84 > > ++++++++++++++++++++++++++++++++++++++++++ > > xen/arch/arm/gic.c | 32 +++++++++++++--- > > xen/arch/arm/irq.c | 48 ++++++++++++++++++++++-- > > xen/arch/arm/time.c | 26 +------------ > > xen/arch/arm/vtimer.c | 24 ++++++++++-- > > xen/include/asm-arm/domain.h | 11 ++++++ > > xen/include/asm-arm/gic.h | 14 +++++++ > > xen/include/asm-arm/irq.h | 1 + > > 8 files changed, 204 insertions(+), 36 deletions(-) > > > > diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c > > index 31fb81a..9074aca 100644 > > --- a/xen/arch/arm/gic-v2.c > > +++ b/xen/arch/arm/gic-v2.c > > @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v) > > writel_gich(0, GICH_HCR); > > } > > > > +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq, Shouldn't the argument be a physical irq? Maybe irq_desc? > > + struct hwppi_state *s) > > +{ > > + struct pending_irq *p = irq_to_pending(v, virq); > > + const unsigned int offs = virq / 32; > > + const unsigned int mask = (1u << (virq % 32)); > > + const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4); > > + const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4); > > + const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4); > > + const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH); > > + > > + ASSERT(!is_idle_vcpu(v)); > > + > > + s->active = !!(activer & mask); > > + s->enabled = !!(enabler & mask); > > + s->pending = !!(pendingr & mask); > > + s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status); > > + > > + /* Write a 1 to IC...R to clear the corresponding bit of state */ > > + if ( s->active ) > > + writel_gicd(mask, GICD_ICACTIVER + offs*4); > > + /* > > + * For an edge interrupt clear the pending state, for a level interrupt > > + * this clears the latch there is no need since saving the peripheral > > state > > + * (and/or restoring the next VCPU) will cause the correct action. > > + */ > > + if ( is_edge && s->pending ) > > + writel_gicd(mask, GICD_ICPENDR + offs*4); > > + > > + if ( s->enabled ) > > + { > > + writel_gicd(mask, GICD_ICENABLER + offs*4); > > + set_bit(_IRQ_DISABLED, &p->desc->status); > > + } > > + > > + ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask)); > > + ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask)); > > +} > > + > > static void gicv2_restore_state(const struct vcpu *v) > > { > > int i; > > @@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v) > > writel_gich(GICH_HCR_EN, GICH_HCR); > > } > > > > +static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq, same here > > + const struct hwppi_state *s) > > +{ > > + struct pending_irq *p = irq_to_pending(v, virq); > > + const unsigned int offs = virq / 32; > > + const unsigned int mask = (1u << (virq % 32)); > > + struct irq_desc *desc = irq_to_desc(virq); > > + const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH); > > + struct pending_irq *pending = irq_to_pending(v, virq); > > + > > + pending->desc = desc; /* Migrate to new physical processor */ > > + > > + /* > > + * The IRQ must always have been set inactive and masked etc by > > + * the saving of the previous state via save_and_mask_hwppi. > > + */ > > + ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask)); > > + ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask)); > > + > > + if ( s->active ) > > + writel_gicd(mask, GICD_ICACTIVER + offs*4); > > + > > + /* > > + * Restore pending state for edge triggered interrupts only. For > > + * level triggered interrupts the level will be restored as > > + * necessary by restoring the state of the relevant peripheral. > > + * > > + * For a level triggered interrupt ISPENDR acts as a *latch* which > > + * is only cleared by ICPENDR (i.e. the input level is no longer > > + * relevant). We certainly do not want that here. > > + */ > > + if ( is_edge && s->pending ) > > + writel_gicd(mask, GICD_ISPENDR + offs*4); > > + if ( s->enabled ) > > + { > > + clear_bit(_IRQ_DISABLED, &p->desc->status); > > + dsb(sy); > > + writel_gicd(mask, GICD_ISENABLER + offs*4); > > + } > > + if ( s->inprogress ) > > + set_bit(_IRQ_INPROGRESS, &p->desc->status); > > +} > > + > > static void gicv2_dump_state(const struct vcpu *v) > > { > > int i; > > @@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = { > > .info = &gicv2_info, > > .secondary_init = gicv2_secondary_cpu_init, > > .save_state = gicv2_save_state, > > + .save_and_mask_hwppi = gicv2_save_and_mask_hwppi, > > .restore_state = gicv2_restore_state, > > + .restore_hwppi = gicv2_restore_hwppi, > > .dump_state = gicv2_dump_state, > > .gicv_setup = gicv2v_setup, > > .gic_host_irq_type = &gicv2_host_irq_type, > > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c > > index 7d4ee19..7ea980d 100644 > > --- a/xen/arch/arm/gic.c > > +++ b/xen/arch/arm/gic.c > > @@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v) > > isb(); > > } > > > > +void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq, > > + struct hwppi_state *s) > > +{ > > + gic_hw_ops->save_and_mask_hwppi(v, virq, s); > > +} > > + > > void gic_restore_state(struct vcpu *v) > > { > > ASSERT(!local_irq_is_enabled()); > > @@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v) > > gic_restore_pending_irqs(v); > > } > > > > +void gic_restore_hwppi(struct vcpu *v, const unsigned virq, > > + const struct hwppi_state *s) > > +{ > > + gic_hw_ops->restore_hwppi(v, virq, s); > > +} > > + > > /* > > * needs to be called with a valid cpu_mask, ie each cpu in the mask has > > * already called gic_cpu_init > > @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, > > const cpumask_t *cpu_mask, > > > > /* Program the GIC to route an interrupt to a guest > > * - desc.lock must be held > > + * - d may be NULL, in which case interrupt *must* be a PPI and is > > routed to > > + * the vcpu currently running when that PPI fires. In this case the > > code > > + * responsible for the related hardware must save and restore the PPI > > with > > + * gic_save_and_mask_hwppi/gic_restore_hwppi. > > + * - if d is non-NULL then the interrupt *must* be an SPI. > > */ > > void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc, > > const cpumask_t *cpu_mask, unsigned int > > priority) > > { > > - struct pending_irq *p; > > ASSERT(spin_is_locked(&desc->lock)); > > > > desc->handler = gic_hw_ops->gic_guest_irq_type; > > @@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct > > irq_desc *desc, > > > > gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), > > GIC_PRI_IRQ); > > > > - /* Use vcpu0 to retrieve the pending_irq struct. Given that we only > > - * route SPIs to guests, it doesn't make any difference. */ > > - p = irq_to_pending(d->vcpu[0], desc->irq); > > - p->desc = desc; > > + if ( d ) > > + { > > + struct pending_irq *p; > > + > > + /* Use vcpu0 to retrieve the pending_irq struct. Given that we only > > + * route SPIs to guests, it doesn't make any difference. */ > > + p = irq_to_pending(d->vcpu[0], desc->irq); > > + p->desc = desc; > > + } > > + /* else p->desc init'd on ctxt restore in gic_restore_hwppi */ > > } > > > > int gic_irq_xlate(const u32 *intspec, unsigned int intsize, > > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c > > index ebdf19a..93c38ff 100644 > > --- a/xen/arch/arm/irq.c > > +++ b/xen/arch/arm/irq.c > > @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int > > irq, int is_fiq) > > goto out; > > } > > > > + if ( irq == current->arch.virt_timer.irq ) > > + { > > + ASSERT(!irq_get_domain(desc)); > > + > > + desc->handler->end(desc); > > + > > + set_bit(_IRQ_INPROGRESS, &desc->status); > > + desc->arch.eoi_cpu = smp_processor_id(); > > + > > + vgic_vcpu_inject_irq(current, irq); > > + goto out_no_end; > > + } I don't think we should special case virt_timer.irq here. I would try to reuse the normal _IRQ_GUEST path or make this if generic for any PPIs routed to guests. > > if ( test_bit(_IRQ_GUEST, &desc->status) ) > > { > > struct domain *d = irq_get_domain(desc); > > @@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, > > struct irqaction *new) > > struct domain *d = irq_get_domain(desc); > > > > spin_unlock_irqrestore(&desc->lock, flags); > > - printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain > > %u\n", > > - irq, d->domain_id); > > + if ( d ) > > + printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain > > %u\n", > > + irq, d->domain_id); > > + else > > + printk(XENLOG_ERR > > + "ERROR: IRQ %u is already in use by <current-vcpu>\n", > > irq); > > return -EBUSY; > > } > > > > @@ -378,6 +395,10 @@ err: > > return rc; > > } > > > > +/* > > + * If d == NULL then IRQ is routed to current vcpu at time it is received > > and > > + * must be a PPI. > > + */ > > int route_irq_to_guest(struct domain *d, unsigned int irq, > > const char * devname) > > { > > @@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int > > irq, > > unsigned long flags; > > int retval = 0; > > > > + ASSERT( d || ( irq >=16 && irq < 32 ) ); > > + > > action = xmalloc(struct irqaction); > > if (!action) > > return -ENOMEM; > > @@ -406,11 +429,23 @@ int route_irq_to_guest(struct domain *d, unsigned int > > irq, > > struct domain *ad = irq_get_domain(desc); > > > > if ( test_bit(_IRQ_GUEST, &desc->status) && d == ad ) > > + { > > + printk(XENLOG_ERR > > + "ERROR: IRQ %u is already routed to domain %p\n", > > + irq, ad); > > goto out; > > + } > > > > if ( test_bit(_IRQ_GUEST, &desc->status) ) > > - printk(XENLOG_ERR "ERROR: IRQ %u is already used by domain > > %u\n", > > - irq, ad->domain_id); > > + { > > + if ( ad ) > > + printk(XENLOG_ERR > > + "ERROR: IRQ %u is already used by domain %u\n", > > + irq, ad->domain_id); > > + else > > + printk(XENLOG_ERR > > + "ERROR: IRQ %u is already used by > > <current-vcpu>\n", irq); > > + } > > else > > printk(XENLOG_ERR "ERROR: IRQ %u is already used by Xen\n", > > irq); > > retval = -EBUSY; > > @@ -433,6 +468,11 @@ out: > > return retval; > > } > > > > +int route_irq_to_current_vcpu(unsigned int irq, const char *devname) > > +{ > > + return route_irq_to_guest(NULL, irq, devname); > > +} > > + > > /* > > * pirq event channels. We don't use these on ARM, instead we use the > > * features of the GIC to inject virtualised normal interrupts. > > diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c > > index 455f217..ffa5eef 100644 > > --- a/xen/arch/arm/time.c > > +++ b/xen/arch/arm/time.c > > @@ -169,28 +169,6 @@ static void timer_interrupt(int irq, void *dev_id, > > struct cpu_user_regs *regs) > > } > > } > > > > -static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs > > *regs) > > -{ > > - /* > > - * Edge-triggered interrupts can be used for the virtual timer. Even > > - * if the timer output signal is masked in the context switch, the > > - * GIC will keep track that of any interrupts raised while IRQS are > > - * disabled. As soon as IRQs are re-enabled, the virtual interrupt > > - * will be injected to Xen. > > - * > > - * If an IDLE vCPU was scheduled next then we should ignore the > > - * interrupt. > > - */ > > - if ( unlikely(is_idle_vcpu(current)) ) > > - return; > > - > > - perfc_incr(virt_timer_irqs); > > - > > - current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0); > > - WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, > > CNTV_CTL_EL0); > > - vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq); > > -} > > - > > /* Set up the timer interrupt on this CPU */ > > void __cpuinit init_timer_interrupt(void) > > { > > @@ -204,8 +182,8 @@ void __cpuinit init_timer_interrupt(void) > > > > request_irq(timer_irq[TIMER_HYP_PPI], 0, timer_interrupt, > > "hyptimer", NULL); > > - request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt, > > - "virtimer", NULL); > > + route_irq_to_current_vcpu(timer_irq[TIMER_VIRT_PPI], "virtimer"); > > + > > request_irq(timer_irq[TIMER_PHYS_NONSECURE_PPI], 0, timer_interrupt, > > "phytimer", NULL); > > } > > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c > > index 848e2c6..d1f21a1 100644 > > --- a/xen/arch/arm/vtimer.c > > +++ b/xen/arch/arm/vtimer.c > > @@ -47,9 +47,14 @@ static void phys_timer_expired(void *data) > > static void virt_timer_expired(void *data) > > { > > struct vtimer *t = data; > > - t->ctl |= CNTx_CTL_MASK; > > - vgic_vcpu_inject_irq(t->v, t->irq); > > - perfc_incr(vtimer_virt_inject); > > + t->ctl |= CNTx_CTL_PENDING; > > + if ( !(t->ctl & CNTx_CTL_MASK) ) > > + { > > + /* An edge triggered interrupt should now be pending. */ > > + t->ppi_state.pending = true; > > + vcpu_unblock(t->v); I was going to say that this is trouble because local_events_need_delivery wouldn't recognize that we actually have an event pending. But it doesn't matter because local_events_need_delivery only works with the current vcpu and if this code trigger then the vcpu that needs to receive the event cannot be current. So I think that is OK but for clarity it might be better to add a check or a comment in local_events_need_delivery_nomask anyway. > > + perfc_incr(vtimer_virt_inject); > > + } > > } > > > > int domain_vtimer_init(struct domain *d) > > @@ -112,6 +117,15 @@ int virt_timer_save(struct vcpu *v) > > set_timer(&v->arch.virt_timer.timer, > > ticks_to_ns(v->arch.virt_timer.cval + > > v->domain->arch.virt_timer_base.offset - boot_count)); > > } > > + > > + /* > > + * Since the vtimer irq is a PPI we don't need to worry about > > + * racing against it becoming active while we are saving the > > + * state, since that requires the guest to be reading the IAR. > > + */ > > + gic_save_and_mask_hwppi(v, v->arch.virt_timer.irq, > > + &v->arch.virt_timer.ppi_state); > > + > > return 0; > > } > > > > @@ -126,6 +140,10 @@ int virt_timer_restore(struct vcpu *v) > > WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2); > > WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0); > > WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0); > > + > > + gic_restore_hwppi(v, v->arch.virt_timer.irq, > > + &v->arch.virt_timer.ppi_state); > > + > > return 0; > > } > > > > diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h > > index 90ab9c3..dd50e2c 100644 > > --- a/xen/include/asm-arm/domain.h > > +++ b/xen/include/asm-arm/domain.h > > @@ -34,12 +34,23 @@ enum domain_type { > > extern int dom0_11_mapping; > > #define is_domain_direct_mapped(d) ((d) == hardware_domain && > > dom0_11_mapping) > > > > +struct hwppi_state { > > + /* h/w state */ > > + unsigned long enabled:1; > > + unsigned long pending:1; > > + unsigned long active:1; > > + > > + /* Xen s/w state */ > > + unsigned long inprogress:1; > > +}; Using a uint32_t bitmask would be more in line the rest of the code style, but it is just a matter of taste. > > struct vtimer { > > struct vcpu *v; > > int irq; > > struct timer timer; > > uint32_t ctl; > > uint64_t cval; > > + struct hwppi_state ppi_state; > > }; > > > > struct arch_domain > > diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h > > index 187dc46..aa8cbac 100644 > > --- a/xen/include/asm-arm/gic.h > > +++ b/xen/include/asm-arm/gic.h > > @@ -247,6 +247,16 @@ extern int gicv_setup(struct domain *d); > > extern void gic_save_state(struct vcpu *v); > > extern void gic_restore_state(struct vcpu *v); > > > > +/* > > + * Save/restore the state of a single PPI which must be routed to > > + * <current-vcpu> (that is, defined to be injected to the current vcpu). > > + */ > > +struct hwppi_state; > > +extern void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq, > > + struct hwppi_state *s); > > +extern void gic_restore_hwppi(struct vcpu *v, unsigned virq, > > + const struct hwppi_state *s); > > + > > /* SGI (AKA IPIs) */ > > enum gic_sgi { > > GIC_SGI_EVENT_CHECK = 0, > > @@ -293,8 +303,12 @@ struct gic_hw_operations { > > const struct gic_info *info; > > /* Save GIC registers */ > > void (*save_state)(struct vcpu *); > > + void (*save_and_mask_hwppi)(struct vcpu *v, const unsigned virq, > > + struct hwppi_state *s); > > /* Restore GIC registers */ > > void (*restore_state)(const struct vcpu *); > > + void (*restore_hwppi)(struct vcpu *v, const unsigned virq, > > + const struct hwppi_state *s); > > /* Dump GIC LR register information */ > > void (*dump_state)(const struct vcpu *); > > /* Map MMIO region of GIC */ > > diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h > > index 435dfcd..a08438e 100644 > > --- a/xen/include/asm-arm/irq.h > > +++ b/xen/include/asm-arm/irq.h > > @@ -42,6 +42,7 @@ void init_secondary_IRQ(void); > > > > int route_irq_to_guest(struct domain *d, unsigned int irq, > > const char *devname); > > +int route_irq_to_current_vcpu(unsigned int irq, const char *devname); > > void arch_move_irqs(struct vcpu *v); > > > > /* Set IRQ type for an SPI */ > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.