[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: NetBSD dom0 PVH: hardware interrupts stalls


  • To: Manuel Bouyer <bouyer@xxxxxxxxxxxxxxx>
  • From: Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Date: Thu, 26 Nov 2020 16:09:37 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=citrix.com; dmarc=pass action=none header.from=citrix.com; dkim=pass header.d=citrix.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=Wu0uj+4INHqlDzBIKmTM1MaLYOMF9jLy6dsO+VElkyo=; b=mt0EhFiiEji+qD1WrZlW3aPQja5kD59a9KazNrtCE9fcqBWefT65NTfcygDMXMWDeh0wUWno1S1J+Mu+hA4H1pstrLdbAHgmSYwCZ6kbsRwj34xD5tn2ZniDPtik0u1vVWN+1Zdjiuzk2yBkK5nO6pfF716u7gmqWadpT74gt+3jteFDozjntspgSxI6+39UFOLfcaQbsOXDeI9zOuP0S/P4UUB4/RUkWSuSHyj7HgL8/ixe05wfTmBq6biWg3G6YJiKHxj9chutrDZ5uwz/cCYs5IKNNff9Ers+wLFMmoZfh5CRsJlBI08yZMSiWDLX+Kpce32JjUYUhEHI5VK/SA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=emFxJG9hYcgVeUFIx4Nh9horP5kZlQDXp4qLuF2PhAFbwqh2L5F3H43CyQZsrqGxRjR3Ihb8qS+Gu7/1j+GWcs6GiYin7Cnn1MxudMHdysDpyZEMC2vRhBrDN75UIguUoj0N5wP9lfVUbNJeUfF9n6yZxQopEHMm3XbqcYZctZ/Xo2c+nyuqevPW5reF0vondFvS3iFk7kpZsr5klXTxo94GXAkg+hxF/MMgeEujC6j6xhNwWAn2Byml0Zu+C85tsuzMBeEl3x1yVzfdXmmqhc9lusdYWcfQAmqGnVSUDup+IMPlXS5J9goLsPfqlSS2Dfbm9g8fmWoMvJFNMdw0uA==
  • Authentication-results: esa3.hc3370-68.iphmx.com; dkim=pass (signature verified) header.i=@citrix.onmicrosoft.com
  • Cc: Jan Beulich <jbeulich@xxxxxxxx>, <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • Delivery-date: Thu, 26 Nov 2020 15:09:52 +0000
  • Ironport-sdr: GEClyimU1g6/KT5QycSY5EDEOjXRQRHUQ3ke+9+iSHW2REUnebOQL8pu/zknLYw7/ysg9Ifsd9 19RLeYK6AdONEq5Slk5ocpwrnM4FS909ShzG6Npu8zH3O2UsuXB0zkQvvXJPw8kq+e0sKbNX+K JNxXBx53DMZtvSJC1lnn3+kXUnq25KMs/mH7C+bnq1n7dmjdDjNOVtZDTF5ON+dGEM3HbdObPn 01tYjAcZmrGxbiZOaXPANPQY6tUb+Cweo/KGwaZKABUGDY7o20N3pZqUVT65Rltwzp/X3FkO+l vrg=
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On Thu, Nov 26, 2020 at 03:26:35PM +0100, Roger Pau Monné wrote:
> On Thu, Nov 26, 2020 at 03:16:08PM +0100, Manuel Bouyer wrote:
> > On Thu, Nov 26, 2020 at 02:34:44PM +0100, Roger Pau Monné wrote:
> > > On Tue, Nov 24, 2020 at 05:09:14PM +0100, Manuel Bouyer wrote:
> > > > On Tue, Nov 24, 2020 at 04:49:17PM +0100, Roger Pau Monné wrote:
> > > > > Could you also give a try with ioapic_ack=new on the Xen command line?
> > > > 
> > > > With this I still have the interrupt issue, but Xen doesn't panic on 
> > > > 'i'.
> > > > http://www-soc.lip6.fr/~bouyer/xen-log8.txt
> > > 
> > > Sorry for the delay, I have yet another debug patch for you to try.
> > > Can you remove the ioapic_ack=new from the command line and rebuild
> > > the hypervisor with the provided patch applied and debug trace
> > > enabled? (`gmake -C xen menuconfig` and go into Debugging Options to
> > > find it).
> > 
> > menuconfig doens't build on NetBSD, I set CONFIG_DEBUG_TRACE=y in
> > .config. I guess it is enough ?
> > 
> > For the record, my boot commad line is now
> > menu=Boot Xen PVH:load /test console=com0 root=dk0 -vx; multiboot 
> > /xen-test.gz dom0_mem=1024M console=com2 com2=57600,8n1,,0 loglvl=all 
> > guest_loglvl=all gnttab_max_nr_frames=64 dom0=pvh iommu=debug 
> > dom0_vcpus_pin sync_console dom0_max_vcpus=1 watchdog=force 
> > iommu=no-intremap
> > 
> > 
> > > 
> > > Then once the system stalls use the 'T' debug key to dump the buffer.
> > 
> > Here it is. It seems to be stuck in an infinite loop, I hit the 'R' key
> > after several minutes
> > http://www-soc.lip6.fr/~bouyer/xen-log9.txt
> 
> Oh, that's actually very useful. The interrupt is being constantly
> injected from the hardware and received by Xen, it's just not then
> injected into dom0 - that's the bit we are missing. Let me look into
> adding some more debug to that path, hopefully it will tell us where
> things are getting blocked.

So I have yet one more patch for you to try, this one has more
debugging and a slight change in the emulated IO-APIC behavior.
Depending on the result I might have to find a way to mask the
interrupt so it doesn't spam the whole buffer in order for us to see
exactly what triggered this scenario you are in.

Thanks, Roger.
---8<---
diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 38ac5fb6c7..9db3dcc957 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -187,6 +187,10 @@ void hvm_gsi_assert(struct domain *d, unsigned int gsi)
      * to know if the GSI is pending or not.
      */
     spin_lock(&d->arch.hvm.irq_lock);
+    if ( gsi == TRACK_IRQ )
+        debugtrace_printk("hvm_gsi_assert irq %u trig %u assert count %u\n",
+                          gsi, trig, hvm_irq->gsi_assert_count[gsi]);
+
     if ( trig == VIOAPIC_EDGE_TRIG || !hvm_irq->gsi_assert_count[gsi] )
     {
         if ( trig == VIOAPIC_LEVEL_TRIG )
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index 67d4a6237f..aeff9c7687 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -257,7 +257,17 @@ static void vioapic_write_redirent(
         vlapic_adjust_i8259_target(d);
     }
     else if ( ent.fields.trig_mode == VIOAPIC_EDGE_TRIG )
+    {
+        if ( gsi == TRACK_IRQ )
+            debugtrace_printk("vIO-APIC set edge trigger irq %u\n", gsi);
         pent->fields.remote_irr = 0;
+        if ( is_iommu_enabled(d) )
+        {
+            spin_unlock(&d->arch.hvm.irq_lock);
+            hvm_dpci_eoi(d, gsi, pent);
+            spin_lock(&d->arch.hvm.irq_lock);
+        }
+    }
     else if ( !ent.fields.mask &&
               !ent.fields.remote_irr &&
               hvm_irq->gsi_assert_count[idx] )
@@ -278,6 +288,10 @@ static void vioapic_write_redirent(
          */
         int ret = vioapic_hwdom_map_gsi(gsi, ent.fields.trig_mode,
                                         ent.fields.polarity);
+
+        if ( gsi == TRACK_IRQ )
+            debugtrace_printk("vIO-APIC UNMASK irq %u\n", gsi);
+
         if ( ret )
         {
             gprintk(XENLOG_ERR,
@@ -285,6 +299,9 @@ static void vioapic_write_redirent(
             unmasked = 0;
         }
     }
+    else if ( is_hardware_domain(d) && gsi == TRACK_IRQ )
+        debugtrace_printk("vIO-APIC MASK irq %u\n", gsi);
+
 
     if ( gsi == 0 || unmasked )
         pt_may_unmask_irq(d, NULL);
@@ -405,6 +422,10 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, 
unsigned int pin)
 
     ASSERT(spin_is_locked(&d->arch.hvm.irq_lock));
 
+    if ( irq == TRACK_IRQ )
+            debugtrace_printk("vIO-APIC deliver irq %u vector %u\n",
+                              irq, vector);
+
     HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
                 "dest=%x dest_mode=%x delivery_mode=%x "
                 "vector=%x trig_mode=%x",
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index 49bd778484..db7167eb4b 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1641,6 +1641,9 @@ static void mask_and_ack_level_ioapic_irq(struct irq_desc 
*desc)
     unsigned long v;
     int i;
 
+    if ( desc->irq == TRACK_IRQ )
+        debugtrace_printk("ACK irq %u\n", desc->irq);
+
     irq_complete_move(desc);
 
     if ( !directed_eoi_enabled )
@@ -1688,6 +1691,9 @@ static void mask_and_ack_level_ioapic_irq(struct irq_desc 
*desc)
 
 static void end_level_ioapic_irq_old(struct irq_desc *desc, u8 vector)
 {
+    if ( desc->irq == TRACK_IRQ )
+        debugtrace_printk("END irq %u\n", desc->irq);
+
     if ( directed_eoi_enabled )
     {
         if ( !(desc->status & (IRQ_DISABLED|IRQ_MOVE_PENDING)) )
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index 8d1f9a9fc6..baef41cd37 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1109,6 +1109,10 @@ static void irq_guest_eoi_timer_fn(void *data)
     unsigned int i, irq = desc - irq_desc;
     irq_guest_action_t *action;
 
+    if ( desc->irq == TRACK_IRQ )
+        debugtrace_printk("irq_guest_eoi_timer_fn irq %u status %x\n",
+                          desc->irq, desc->status);
+
     spin_lock_irq(&desc->lock);
     
     if ( !(desc->status & IRQ_GUEST) )
@@ -1118,6 +1122,10 @@ static void irq_guest_eoi_timer_fn(void *data)
 
     ASSERT(action->ack_type != ACKTYPE_NONE);
 
+    if ( desc->irq == TRACK_IRQ )
+        debugtrace_printk("ack_type %u in_flight %u\n",
+                          action->ack_type, action->in_flight);
+
     /*
      * Is no IRQ in flight at all, or another instance of this timer already
      * running? Skip everything to avoid forcing an EOI early.
@@ -1837,6 +1845,10 @@ static void do_IRQ_guest(struct irq_desc *desc, unsigned 
int vector)
     unsigned int        i;
     struct pending_eoi *peoi = this_cpu(pending_eoi);
 
+    if ( desc->irq == TRACK_IRQ )
+        debugtrace_printk("do_IRQ_guest irq %u\n", desc->irq);
+
+
     if ( unlikely(!action->nr_guests) )
     {
         /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 6b1305a3e5..92f3670508 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -828,6 +828,9 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
          !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         return 0;
 
+    if ( pirq->pirq == TRACK_IRQ )
+        debugtrace_printk("hvm_do_IRQ_dpci irq %u\n", pirq->pirq);
+
     pirq_dpci->masked = 1;
     raise_softirq_for(pirq_dpci);
     return 1;
@@ -1010,6 +1013,9 @@ void hvm_dpci_eoi(struct domain *d, unsigned int 
guest_gsi,
     if ( !is_iommu_enabled(d) )
         return;
 
+    if ( guest_gsi == TRACK_IRQ )
+        debugtrace_printk("hvm_dpci_eoi irq %u\n", guest_gsi);
+
     if ( is_hardware_domain(d) )
     {
         spin_lock(&d->event_lock);
diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h
index 43d567fe44..91579c33b9 100644
--- a/xen/include/xen/irq.h
+++ b/xen/include/xen/irq.h
@@ -174,4 +174,6 @@ unsigned int arch_hwdom_irqs(domid_t);
 void arch_evtchn_bind_pirq(struct domain *, int pirq);
 #endif
 
+#define TRACK_IRQ 34
+
 #endif /* __XEN_IRQ_H__ */




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.