[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: NetBSD dom0 PVH: hardware interrupts stalls
On Thu, Nov 26, 2020 at 03:26:35PM +0100, Roger Pau Monné wrote: > On Thu, Nov 26, 2020 at 03:16:08PM +0100, Manuel Bouyer wrote: > > On Thu, Nov 26, 2020 at 02:34:44PM +0100, Roger Pau Monné wrote: > > > On Tue, Nov 24, 2020 at 05:09:14PM +0100, Manuel Bouyer wrote: > > > > On Tue, Nov 24, 2020 at 04:49:17PM +0100, Roger Pau Monné wrote: > > > > > Could you also give a try with ioapic_ack=new on the Xen command line? > > > > > > > > With this I still have the interrupt issue, but Xen doesn't panic on > > > > 'i'. > > > > http://www-soc.lip6.fr/~bouyer/xen-log8.txt > > > > > > Sorry for the delay, I have yet another debug patch for you to try. > > > Can you remove the ioapic_ack=new from the command line and rebuild > > > the hypervisor with the provided patch applied and debug trace > > > enabled? (`gmake -C xen menuconfig` and go into Debugging Options to > > > find it). > > > > menuconfig doens't build on NetBSD, I set CONFIG_DEBUG_TRACE=y in > > .config. I guess it is enough ? > > > > For the record, my boot commad line is now > > menu=Boot Xen PVH:load /test console=com0 root=dk0 -vx; multiboot > > /xen-test.gz dom0_mem=1024M console=com2 com2=57600,8n1,,0 loglvl=all > > guest_loglvl=all gnttab_max_nr_frames=64 dom0=pvh iommu=debug > > dom0_vcpus_pin sync_console dom0_max_vcpus=1 watchdog=force > > iommu=no-intremap > > > > > > > > > > Then once the system stalls use the 'T' debug key to dump the buffer. > > > > Here it is. It seems to be stuck in an infinite loop, I hit the 'R' key > > after several minutes > > http://www-soc.lip6.fr/~bouyer/xen-log9.txt > > Oh, that's actually very useful. The interrupt is being constantly > injected from the hardware and received by Xen, it's just not then > injected into dom0 - that's the bit we are missing. Let me look into > adding some more debug to that path, hopefully it will tell us where > things are getting blocked. So I have yet one more patch for you to try, this one has more debugging and a slight change in the emulated IO-APIC behavior. Depending on the result I might have to find a way to mask the interrupt so it doesn't spam the whole buffer in order for us to see exactly what triggered this scenario you are in. Thanks, Roger. ---8<--- diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c index 38ac5fb6c7..9db3dcc957 100644 --- a/xen/arch/x86/hvm/irq.c +++ b/xen/arch/x86/hvm/irq.c @@ -187,6 +187,10 @@ void hvm_gsi_assert(struct domain *d, unsigned int gsi) * to know if the GSI is pending or not. */ spin_lock(&d->arch.hvm.irq_lock); + if ( gsi == TRACK_IRQ ) + debugtrace_printk("hvm_gsi_assert irq %u trig %u assert count %u\n", + gsi, trig, hvm_irq->gsi_assert_count[gsi]); + if ( trig == VIOAPIC_EDGE_TRIG || !hvm_irq->gsi_assert_count[gsi] ) { if ( trig == VIOAPIC_LEVEL_TRIG ) diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c index 67d4a6237f..aeff9c7687 100644 --- a/xen/arch/x86/hvm/vioapic.c +++ b/xen/arch/x86/hvm/vioapic.c @@ -257,7 +257,17 @@ static void vioapic_write_redirent( vlapic_adjust_i8259_target(d); } else if ( ent.fields.trig_mode == VIOAPIC_EDGE_TRIG ) + { + if ( gsi == TRACK_IRQ ) + debugtrace_printk("vIO-APIC set edge trigger irq %u\n", gsi); pent->fields.remote_irr = 0; + if ( is_iommu_enabled(d) ) + { + spin_unlock(&d->arch.hvm.irq_lock); + hvm_dpci_eoi(d, gsi, pent); + spin_lock(&d->arch.hvm.irq_lock); + } + } else if ( !ent.fields.mask && !ent.fields.remote_irr && hvm_irq->gsi_assert_count[idx] ) @@ -278,6 +288,10 @@ static void vioapic_write_redirent( */ int ret = vioapic_hwdom_map_gsi(gsi, ent.fields.trig_mode, ent.fields.polarity); + + if ( gsi == TRACK_IRQ ) + debugtrace_printk("vIO-APIC UNMASK irq %u\n", gsi); + if ( ret ) { gprintk(XENLOG_ERR, @@ -285,6 +299,9 @@ static void vioapic_write_redirent( unmasked = 0; } } + else if ( is_hardware_domain(d) && gsi == TRACK_IRQ ) + debugtrace_printk("vIO-APIC MASK irq %u\n", gsi); + if ( gsi == 0 || unmasked ) pt_may_unmask_irq(d, NULL); @@ -405,6 +422,10 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin) ASSERT(spin_is_locked(&d->arch.hvm.irq_lock)); + if ( irq == TRACK_IRQ ) + debugtrace_printk("vIO-APIC deliver irq %u vector %u\n", + irq, vector); + HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x", diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c index 49bd778484..db7167eb4b 100644 --- a/xen/arch/x86/io_apic.c +++ b/xen/arch/x86/io_apic.c @@ -1641,6 +1641,9 @@ static void mask_and_ack_level_ioapic_irq(struct irq_desc *desc) unsigned long v; int i; + if ( desc->irq == TRACK_IRQ ) + debugtrace_printk("ACK irq %u\n", desc->irq); + irq_complete_move(desc); if ( !directed_eoi_enabled ) @@ -1688,6 +1691,9 @@ static void mask_and_ack_level_ioapic_irq(struct irq_desc *desc) static void end_level_ioapic_irq_old(struct irq_desc *desc, u8 vector) { + if ( desc->irq == TRACK_IRQ ) + debugtrace_printk("END irq %u\n", desc->irq); + if ( directed_eoi_enabled ) { if ( !(desc->status & (IRQ_DISABLED|IRQ_MOVE_PENDING)) ) diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 8d1f9a9fc6..baef41cd37 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -1109,6 +1109,10 @@ static void irq_guest_eoi_timer_fn(void *data) unsigned int i, irq = desc - irq_desc; irq_guest_action_t *action; + if ( desc->irq == TRACK_IRQ ) + debugtrace_printk("irq_guest_eoi_timer_fn irq %u status %x\n", + desc->irq, desc->status); + spin_lock_irq(&desc->lock); if ( !(desc->status & IRQ_GUEST) ) @@ -1118,6 +1122,10 @@ static void irq_guest_eoi_timer_fn(void *data) ASSERT(action->ack_type != ACKTYPE_NONE); + if ( desc->irq == TRACK_IRQ ) + debugtrace_printk("ack_type %u in_flight %u\n", + action->ack_type, action->in_flight); + /* * Is no IRQ in flight at all, or another instance of this timer already * running? Skip everything to avoid forcing an EOI early. @@ -1837,6 +1845,10 @@ static void do_IRQ_guest(struct irq_desc *desc, unsigned int vector) unsigned int i; struct pending_eoi *peoi = this_cpu(pending_eoi); + if ( desc->irq == TRACK_IRQ ) + debugtrace_printk("do_IRQ_guest irq %u\n", desc->irq); + + if ( unlikely(!action->nr_guests) ) { /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */ diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index 6b1305a3e5..92f3670508 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -828,6 +828,9 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq) !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) return 0; + if ( pirq->pirq == TRACK_IRQ ) + debugtrace_printk("hvm_do_IRQ_dpci irq %u\n", pirq->pirq); + pirq_dpci->masked = 1; raise_softirq_for(pirq_dpci); return 1; @@ -1010,6 +1013,9 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, if ( !is_iommu_enabled(d) ) return; + if ( guest_gsi == TRACK_IRQ ) + debugtrace_printk("hvm_dpci_eoi irq %u\n", guest_gsi); + if ( is_hardware_domain(d) ) { spin_lock(&d->event_lock); diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h index 43d567fe44..91579c33b9 100644 --- a/xen/include/xen/irq.h +++ b/xen/include/xen/irq.h @@ -174,4 +174,6 @@ unsigned int arch_hwdom_irqs(domid_t); void arch_evtchn_bind_pirq(struct domain *, int pirq); #endif +#define TRACK_IRQ 34 + #endif /* __XEN_IRQ_H__ */
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |