[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] RE: bnx2 FTQ issues on 2.6.32 + xen 4.0.1;



Hi:
 
       I've spent more time on this.
       It's easy to reproduce. I have two physical host, on top of which,  totally start 15VMS, doing migrating over
and over again. Since xen live migration invovle large mount of memory copy, so I can see that the network is
saturated. About 16 hours later, the FTQ dump will show up.
 
      I've debugged more.
      Below debug code is added when FTQ happens.
      Does anyone know what is going on?
      Thanks.

--------------
   static void
bnx2_tx_timeout(struct net_device *dev)
{
        struct bnx2 *bp = netdev_priv(dev);
        struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
        struct bnx2_tx_ring_info *txr = &bnapi->tx_ring;
        struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring;
        int i ;
       
        bnx2_dump_ftq(bp);
        bnx2_dump_state(bp);
       
        if (stop_on_tx_timeout) {
                printk(KERN_WARNING PFX
                       "%s: prevent chip reset during tx timeout\n",
                        bp->dev->name);
                smp_rmb();
               
                printk("last status idx %d \n", bnapi->last_status_idx);
                printk("hw_tx_cons %d, txr->hw_tx_conds %d txr->tx_prod %d txr->tx_cons %d\n",
                         bnx2_get_hw_tx_cons(bnapi), txr->hw_tx_cons, txr->tx_prod, txr->tx_cons);
                printk("hw_rx_cons %d, txr->hw_rx_conds %d\n", bnx2_get_hw_rx_cons(bnapi), rxr->rx_cons);
                printk("sblk->status_attn_bits %d\n",bnapi->status_blk.msi->status_attn_bits);
                printk("sblk->status_attn_bits_ack %d\n",bnapi->status_blk.msi->status_attn_bits_ack);
                printk("bnx2_tx_avail %d \n",(bnx2_tx_avail(bp, txr)));
               
                printk("sblk->status_tx_quick_consumer_index0 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index0);
                printk("sblk->status_tx_quick_consumer_index1 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index1);
                printk("sblk->status_tx_quick_consumer_index2 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index2);
                printk("sblk->status_tx_quick_consumer_index3 %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index3);
                printk("sblk->status_rx_quick_consumer_index0 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index0);
                printk("sblk->status_rx_quick_consumer_index1 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index1);
                printk("sblk->status_rx_quick_consumer_index2 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index2);
                printk("sblk->status_rx_quick_consumer_index3 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index3);
                printk("sblk->status_rx_quick_consumer_index4 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index4);
                printk("sblk->status_rx_quick_consumer_index5 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index5);
                printk("sblk->status_rx_quick_consumer_index6 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index6);
                printk("sblk->status_rx_quick_consumer_index7 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index7);
                printk("sblk->status_rx_quick_consumer_index8 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index8);
                printk("sblk->status_rx_quick_consumer_index9 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index9);
                printk("sblk->status_rx_quick_consumer_index10 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index10);
                printk("sblk->status_rx_quick_consumer_index11 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index11);
                printk("sblk->status_rx_quick_consumer_index12 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index12);
                printk("sblk->status_rx_quick_consumer_index13 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index13);
                printk("sblk->status_rx_quick_consumer_index14 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index14);
                printk("sblk->status_rx_quick_consumer_index15 %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index15);
                printk("sblk->status_completion_producer_index %d\n",bnapi->status_blk.msi->status_completion_producer_index);
                printk("sblk->status_cmd_consumer_index %d\n",bnapi->status_blk.msi->status_cmd_consumer_index);
                printk("sblk->status_idx %d\n",bnapi->status_blk.msi->status_idx);
                printk("sblk->status_unused %d\n",bnapi->status_blk.msi->status_unused);
                printk("sblk->status_blk_num %d\n",bnapi->status_blk.msi->status_blk_num);

                is_timedout = 1;
                for (i = 0; i < bp->irq_nvecs; i++) {
                        bnapi = &bp->bnx2_napi[i];
                        bnx2_tx_int(bp, bnapi, 0);
                }
                return;
        }
-----------------
 
last status idx 2426
hw_tx_cons 32474, txr->hw_tx_conds 32474 txr->tx_prod 32641 txr->tx_cons 32474
hw_rx_cons 19665, txr->hw_rx_conds 19665
sblk->status_attn_bits 1
sblk->status_attn_bits_ack 1
bnx2_tx_avail 88
sblk->status_tx_quick_consumer_index0 32474
sblk->status_tx_quick_consumer_index1 0
sblk->status_tx_quick_consumer_index2 0
sblk->status_tx_quick_consumer_index3 0
sblk->status_rx_quick_consumer_index0 19665
sblk->status_rx_quick_consumer_index1 0
sblk->status_rx_quick_consumer_index2 0
sblk->status_rx_quick_consumer_index3 0
sblk->status_rx_quick_consumer_index4 0
sblk->status_rx_quick_consumer_index5 0
sblk->status_rx_quick_consumer_index6 0
sblk->status_rx_quick_consumer_index7 0
sblk->status_rx_quick_consumer_index8 0
sblk->status_rx_quick_consumer_index9 0
sblk->status_rx_quick_consumer_index10 0
sblk->status_rx_quick_consumer_index11 0
sblk->status_rx_quick_consumer_index12 0
sblk->status_rx_quick_consumer_index13 0
sblk->status_rx_quick_consumer_index14 0
sblk->status_rx_quick_consumer_index15 0
sblk->status_completion_producer_index 0
sblk->status_cmd_consumer_index 0
sblk->status_idx 2426
sblk->status_unused 0
sblk->status_blk_num 0
hw_cons 32474 sw_cons 32474 ffff8801d27f85c0 bnapi
return hw_cons 32474 sw_cons 32474 ffff8801d27f85c0 bnapi
hw_cons 3628 sw_cons 3625 ffff8801d27f8bc0 bnapi
return hw_cons 3628 sw_cons 3625 ffff8801d27f8bc0 bnapi
hw_cons 62094 sw_cons 62090 ffff8801d27f91c0 bnapi
return hw_cons 62094 sw_cons 62090 ffff8801d27f91c0 bnapi
hw_cons 3184 sw_cons 3173 ffff8801d27f97c0 bnapi
return hw_cons 3184 sw_cons 3173 ffff8801d27f97c0 bnapi
hw_cons 0 sw_cons 0 ffff8801d27f9dc0 bnapi
return hw_cons 0 sw_cons 0 ffff8801d27f9dc0 bnapi

 >--------------------------------------------------------------------------------
>From: tinnycloud@xxxxxxxxxxx
>To: xen-devel@xxxxxxxxxxxxxxxxxxx
>CC: mchan@xxxxxxxxxxxx
>Subject: bnx2 FTQ issues on 2.6.32 + xen 4.0.1;
>Date: Mon, 27 Jun 2011 13:38:07 +0800
>
>
>Hi Michael:
>
>     Sorry to  brother. But I've been suffered this issue quite a long time.
>     My test environment is 2.6.32.36 + xen 4.0.1 + bnx2, 2.3.
>
>     Also cpu idle is disabled in grub.
>
>     Grub info:
>     title Red Hat Enterprise Linux Server (2.6.32.36xen)
>        kernel /xen-4.0.1.gz iommu=off x2apic=off console=com1,vga com1=115200,8n1 noreboot cpuidle=0 cpufreq=none no-xsave max_cstate=1
>        module /vmlinuz-2.6.32.36xen ro root=LABEL=/ hda=noprobe console=hvc0 console=ttyS0,115200n8
>        module /initrd-2.6.32.36xen.img
>    
>     Someone has pointed out that this might be interrupt lost, but I don't understand the log info, if the log
>info is documented somewhere, could you let me know?
>     
>      Many thanks.
>
>root@xmao # ethtool  -i eth0
>driver: bridge
>version: 2.3
>firmware-version: N/A
>bus-info: N/A
>
>
>/var/log/message:
>
>196299 Jun 27 07:36:56 xmao kernel: ------------[ cut here ]------------
>196300 Jun 27 07:36:56 xmao kernel: WARNING: at net/sched/sch_generic.c:261 dev_watchdog+0x105/0x16a()
>196301 Jun 27 07:36:56 xmao kernel: Hardware name: Tecal RH2285
>196302 Jun 27 07:36:56 xmao kernel: NETDEV WATCHDOG: peth0 (bnx2): transmit queue 0 timed out                                                       
>196303 Jun 27 07:36:56 xmao kernel: Modules linked in: iptable_filter nfs fscache nfs_acl auth_rpcgss bridge stp llc autofs4 ipmi_devintf ipmi_si ipm       i_msghandler lockd sunrpc ipv6 xenfs dm_multipath fuse nf_conntrack ip_tables xen_netback xen_blkback blktap blkback_pagemap loop nbd video output sbs        sbshc parport_pc lp parport snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device serio_raw bnx2 snd_pcm_oss snd_mixer_oss snd_pcm snd_       timer snd i2c_i801 iTCO_wdt soundcore iTCO_vendor_support pata_acpi snd_page_alloc i2c_core ata_generic pcspkr ata_piix shpchp mptsas mptscsih mptbase        [last unloaded: freq_table]
>196304 Jun 27 07:36:56 xmao kernel: Pid: 0, comm: swapper Not tainted 2.6.32.36xen #1
>196305 Jun 27 07:36:56 xmao kernel: Call Trace:
>196306 Jun 27 07:36:56 xmao kernel:  <IRQ>  [<ffffffff813ba154>] ? dev_watchdog+0x105/0x16a
>196307 Jun 27 07:36:56 xmao kernel:  [<ffffffff81056666>] warn_slowpath_common+0x7c/0x94
>196308 Jun 27 07:36:56 xmao kernel:  [<ffffffff81056738>] warn_slowpath_fmt+0xa4/0xa6
>196309 Jun 27 07:36:56 xmao kernel:  [<ffffffff81080bfa>] ? clockevents_program_event+0x78/0x81
>196310 Jun 27 07:36:56 xmao kernel:  [<ffffffff81081fce>] ? tick_program_event+0x2a/0x2c
>196311 Jun 27 07:36:56 xmao kernel:  [<ffffffff813b951d>] ? __netif_tx_lock+0x1b/0x24
>196312 Jun 27 07:36:56 xmao kernel:  [<ffffffff813b95a8>] ? netif_tx_lock+0x46/0x6e
>196313 Jun 27 07:36:56 xmao kernel:  [<ffffffff813a3ed1>] ? netdev_drivername+0x48/0x4f
>196314 Jun 27 07:36:56 xmao kernel:  [<ffffffff813ba154>] dev_watchdog+0x105/0x16a
>196315 Jun 27 07:36:56 xmao kernel:  [<ffffffff81063d98>] run_timer_softirq+0x156/0x1f8
>196316 Jun 27 07:36:56 xmao kernel:  [<ffffffff813ba04f>] ? dev_watchdog+0x0/0x16a
>196317 Jun 27 07:36:56 xmao kernel:  [<ffffffff8105d6f0>] __do_softirq+0xd7/0x19e
>196318 Jun 27 07:36:56 xmao kernel:  [<ffffffff81013eac>] call_softirq+0x1c/0x30
>196319 Jun 27 07:36:56 xmao kernel:  [<ffffffff8101564b>] do_softirq+0x46/0x87
>196320 Jun 27 07:36:56 xmao kernel:  [<ffffffff8105d575>] irq_exit+0x3b/0x7a
>196321 Jun 27 07:36:56 xmao kernel:  [<ffffffff8128dcfe>] xen_evtchn_do_upcall+0x38/0x46
>196322 Jun 27 07:36:56 xmao kernel:  [<ffffffff81013efe>] xen_do_hypervisor_callback+0x1e/0x30
>196323 Jun 27 07:36:56 xmao kernel:  <EOI>  [<ffffffff8103f642>] ? pick_next_task_idle+0x18/0x22
>196324 Jun 27 07:36:56 xmao kernel:  [<ffffffff810093aa>] ? hypercall_page+0x3aa/0x1000
>196325 Jun 27 07:36:56 xmao kernel:  [<ffffffff810093aa>] ? hypercall_page+0x3aa/0x1000
>196326 Jun 27 07:36:56 xmao kernel:  [<ffffffff8100f1bb>] ? xen_safe_halt+0x10/0x1a
>196327 Jun 27 07:36:56 xmao kernel:  [<ffffffff81019e14>] ? default_idle+0x39/0x56
>196328 Jun 27 07:36:56 xmao kernel:  [<ffffffff81011cd0>] ? cpu_idle+0x5d/0x8c
>196329 Jun 27 07:36:56 xmao kernel:  [<ffffffff8143375d>] ? cpu_bringup_and_idle+0x13/0x15
>196330 Jun 27 07:36:56 xmao kernel: ---[ end trace 8ea3df16a1603e41 ]---
>196331 Jun 27 07:36:56 xmao kernel: bnx2: <--- start FTQ dump on peth0 --->
>196332 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RV2P_PFTQ_CTL 10000
>196333 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RV2P_TFTQ_CTL 20000
>196334 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RV2P_MFTQ_CTL 4000
>196335 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TBDR_FTQ_CTL 1004002
>196336 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TDMA_FTQ_CTL 4010002
>196337 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TXP_FTQ_CTL 4010002
>196338 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TPAT_FTQ_CTL 10002
>196339 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RXP_CFTQ_CTL 8000
>196340 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RXP_FTQ_CTL 100000
>196341 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_COM_COMXQ_FTQ_CTL 10000
>196342 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_COM_COMTQ_FTQ_CTL 20000
>196343 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_COM_COMQ_FTQ_CTL 10000
>196344 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_CP_CPQ_FTQ_CTL 4000
>196345 Jun 27 07:36:56 xmao kernel: bnx2: peth0: TXP mode b84c state 80005000 evt_mask 500 pc 8000758 pc 8000758 instr 8fa90030
>196346 Jun 27 07:36:56 xmao kernel: bnx2: peth0: TPAT mode b84c state 80001000 evt_mask 500 pc 8000a5c pc 8000a4c instr 38420001
>196347 Jun 27 07:36:56 xmao kernel: bnx2: peth0: RXP mode b84c state 80001000 evt_mask 500 pc 8004c1c pc 8004c20 instr 32070001
>196348 Jun 27 07:36:56 xmao kernel: bnx2: peth0: COM mode b8cc state 80008000 evt_mask 500 pc 8000a94 pc 8000b28 instr 3c028000
>196349 Jun 27 07:36:56 xmao kernel: bnx2: peth0: CP mode b8cc state 80004000 evt_mask 500 pc 8000c6c pc 8000928 instr 8ce800e8
>196350 Jun 27 07:36:56 xmao kernel: bnx2: <--- end FTQ dump on peth0 --->
>196351 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: intr_sem[0]
>196351 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: intr_sem[0]
>196352 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: intr_sem[0] PCI_CMD[20100406]
>196353 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: PCI_PM[19002008] PCI_MISC_CFG[92000088]
>196354 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: EMAC_TX_STATUS[00000008] EMAC_RX_STATUS[00000000]
>196355 Jun 27 07:36:56 xmao kernel: bnx2: peth0 RPM_MGMT_PKT_CTRL[40000088]
>196356 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: MCP_STATE_P0[0007e10e] MCP_STATE_P1[0003e00e]
>196357 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: HC_STATS_INTERRUPT_STATUS[01ff0000]
>196358 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: PBA[00000000]
>196359 Jun 27 07:36:56 xmao kernel: bnx2: peth0 NIC Copper Link is Down
>196360 Jun 27 07:36:56 xmao kernel: eth0: port 1(peth0) entering disabled state
>196361 Jun 27 07:36:59 xmao kernel: bnx2: peth0 NIC Copper Link is Up, 1000 Mbps full duplex
>196362 Jun 27 07:36:59 xmao kernel: eth0: port 1(peth0) entering forwarding state

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.