[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Lockdep show 6.6-rc regression in Xen HVM CPU hotplug



Since commit 87797fad6cce ("xen/events: replace evtchn_rwlock with
RCU"), I can no longer hotplug CPUs under Xen with lockdep enabled.

(This is real Xen 4.17.1; annoyingly I have different failure modes
with Xen guests under qemu/kvm and I'll deal with those next.)

Offlining complains thus:

[root@localhost cpu1]# echo 0 > online 
[   52.936265] 
[   52.936271] =============================
[   52.936274] WARNING: suspicious RCU usage
[   52.936277] 6.6.0-rc5+ #1357 Not tainted
[   52.936280] -----------------------------
[   52.936282] lib/maple_tree.c:856 suspicious rcu_dereference_check() usage!
[   52.936287] 
[   52.936287] other info that might help us debug this:
[   52.936287] 
[   52.936289] 
[   52.936289] RCU used illegally from offline CPU!
[   52.936289] rcu_scheduler_active = 2, debug_locks = 1
[   52.936293] 1 lock held by swapper/1/0:
[   52.936296]  #0: ffffffff89c03820 (rcu_read_lock){....}-{1:3}, at: 
mtree_load+0x90/0x590
[   52.936321] 
[   52.936321] stack backtrace:
[   52.936324] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.0-rc5+ #1357
[   52.936329] Hardware name: Xen HVM domU, BIOS 4.17.1 09/26/2023
[   52.936332] Call Trace:
[   52.936335]  <TASK>
[   52.936340]  dump_stack_lvl+0x5b/0x90
[   52.936350]  lockdep_rcu_suspicious+0x15a/0x1c0
[   52.936366]  mtree_load+0x49e/0x590
[   52.936385]  irq_get_irq_data+0xe/0x20
[   52.936394]  xen_send_IPI_one+0xa4/0x100
[   52.936404]  __xen_send_IPI_mask+0x1b/0x50
[   52.936414]  generic_exec_single+0x35/0x1c0
[   52.936423]  smp_call_function_single+0xc2/0x140
[   52.936436]  ? cpuhp_report_idle_dead+0x42/0x60
[   52.936444]  do_idle+0xda/0xe0
[   52.936451]  cpu_startup_entry+0x2a/0x30
[   52.936456]  start_secondary+0x123/0x140
[   52.936465]  secondary_startup_64_no_verify+0x178/0x17b
[   52.936490]  </TASK>
[   52.936492] 
[   52.936494] =============================
[   52.936496] WARNING: suspicious RCU usage
[   52.936498] 6.6.0-rc5+ #1357 Not tainted
[   52.936500] -----------------------------
[   52.936502] lib/maple_tree.c:812 suspicious rcu_dereference_check() usage!
[   52.936505] 
[   52.936505] other info that might help us debug this:
[   52.936505] 
[   52.936507] 
[   52.936507] RCU used illegally from offline CPU!
[   52.936507] rcu_scheduler_active = 2, debug_locks = 1
[   52.936510] 1 lock held by swapper/1/0:
[   52.936513]  #0: ffffffff89c03820 (rcu_read_lock){....}-{1:3}, at: 
mtree_load+0x90/0x590
[   52.936530] 
[   52.936530] stack backtrace:
[   52.936532] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.0-rc5+ #1357
[   52.936536] Hardware name: Xen HVM domU, BIOS 4.17.1 09/26/2023
[   52.936538] Call Trace:
[   52.936540]  <TASK>
[   52.936543]  dump_stack_lvl+0x5b/0x90
[   52.936549]  lockdep_rcu_suspicious+0x15a/0x1c0
[   52.936562]  mtree_load+0x3b7/0x590
[   52.936580]  irq_get_irq_data+0xe/0x20
[   52.936586]  xen_send_IPI_one+0xa4/0x100
[   52.936594]  __xen_send_IPI_mask+0x1b/0x50
[   52.936601]  generic_exec_single+0x35/0x1c0
[   52.936609]  smp_call_function_single+0xc2/0x140
[   52.936621]  ? cpuhp_report_idle_dead+0x42/0x60
[   52.936626]  do_idle+0xda/0xe0
[   52.936632]  cpu_startup_entry+0x2a/0x30
[   52.936643]  start_secondary+0x123/0x140
[   52.936649]  secondary_startup_64_no_verify+0x178/0x17b
[   52.936672]  </TASK>
[   52.937164] smpboot: CPU 1 is now offline
[root@localhost cpu1]# 

Onlining triple-faults:

[root@localhost cpu1]# echo 1 > online 
[   58.049051] installing Xen timer for CPU 1
[   58.051533] smpboot: Booting Node 0 Processor 1 APIC 0x2

... and it dumps me back to the host prompt, where 'xl dmesg' says:

(XEN) *** Dumping Dom7 vcpu#1 state: ***
(XEN) ----[ Xen-4.17.1  x86_64  debug=n  Not tainted ]----
(XEN) CPU:    6
(XEN) RIP:    0010:[<ffffffff88041879>]
(XEN) RFLAGS: 0000000000010002   CONTEXT: hvm guest (d7v1)
(XEN) rax: 0000000000000001   rbx: 0000000000000000   rcx: 0000000000000008
(XEN) rdx: fffffe0000001000   rsi: ffff8f364e10b048   rdi: 0000000000000001
(XEN) rbp: 0000000000000000   rsp: ffffa6f6400aff38   r8:  00000000fffffe5b
(XEN) r9:  0000000000000000   r10: ffff8f364e106078   r11: 0000000000000000
(XEN) r12: 0000000000000000   r13: 0000000000000000   r14: 0000000000000000
(XEN) r15: 0000000000000000   cr0: 0000000080050033   cr4: 00000000001300a0
(XEN) cr3: 000000002583c000   cr2: 0000000000000000
(XEN) fsb: 0000000000000000   gsb: ffff8f364e100000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: 0010

Not 100% sure where ffffffff88041879 is but there's a 'ud' at 
0xffffffff81041879 in load_current_idt() for the
lockdep_assert_irqs_disabled().

And yes, if I comment that assertion out then onlining does succeed
without a triplefault but with a different warning:

[root@localhost cpu1]# echo 1 > online 
[   35.843897] installing Xen timer for CPU 1
[   35.846134] smpboot: Booting Node 0 Processor 1 APIC 0x2
[   35.847297] ------------[ cut here ]------------
[   35.847307] WARNING: CPU: 1 PID: 0 at arch/x86/kernel/cpu/common.c:454 
cr4_update_irqsoff+0x32/0x60
[   35.847323] Modules linked in:
[   35.847329] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.0-rc5+ #1358
[   35.847335] Hardware name: Xen HVM domU, BIOS 4.17.1 09/26/2023
[   35.847338] RIP: 0010:cr4_update_irqsoff+0x32/0x60
[   35.847345] Code: 89 f7 65 48 8b 15 86 0a fd 68 8b 0d 60 1b dc 01 85 c9 74 
18 65 8b 0d 1d f8 fb 68 85 c9 75 0d 65 8b 0d 0e f6 fb 68 85 c9 74 02 <0f> 0b 48 
f7 d7 48 21 d7 48 09 c7 48 39 fa 75 05 c3 cc cc cc cc 65
[   35.847350] RSP: 0000:ffffad98400aff18 EFLAGS: 00010002
[   35.847356] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001
[   35.847359] RDX: 00000000001300a0 RSI: 000000000000000f RDI: 000000000000000f
[   35.847362] RBP: ffff9c7a81398000 R08: 00000000fffffe14 R09: 0000000000000000
[   35.847365] R10: ffff9c7ace106078 R11: 0000000000000000 R12: 0000000000000001
[   35.847368] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[   35.847372] FS:  0000000000000000(0000) GS:ffff9c7ace100000(0000) 
knlGS:0000000000000000
[   35.847376] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   35.847379] CR2: 0000000000000000 CR3: 000000002883c000 CR4: 00000000001300a0
[   35.847385] Call Trace:
[   35.847389]  <TASK>
[   35.847392]  ? cr4_update_irqsoff+0x32/0x60
[   35.847398]  ? __warn+0x85/0x170
[   35.847410]  ? cr4_update_irqsoff+0x32/0x60
[   35.847419]  ? report_bug+0x171/0x1a0
[   35.847432]  ? handle_bug+0x3c/0x80
[   35.847438]  ? exc_invalid_op+0x17/0x70
[   35.847443]  ? asm_exc_invalid_op+0x1a/0x20
[   35.847464]  ? cr4_update_irqsoff+0x32/0x60
[   35.847473]  cpu_init+0x58/0x1a0
[   35.847482]  start_secondary+0x31/0x140
[   35.847490]  ? __pfx_start_secondary+0x10/0x10
[   35.847496]  secondary_startup_64_no_verify+0x178/0x17b
[   35.847519]  </TASK>
[   35.847522] irq event stamp: 111393
[   35.847524] hardirqs last  enabled at (111393): [<ffffffff9773008a>] 
acpi_idle_play_dead+0x4a/0x70
[   35.847533] hardirqs last disabled at (111392): [<ffffffff97126ca2>] 
do_idle+0x92/0xe0
[   35.847540] softirqs last  enabled at (111328): [<ffffffff97d084a6>] 
__do_softirq+0x2e6/0x3a9
[   35.847549] softirqs last disabled at (111315): [<ffffffff970bbdbb>] 
__irq_exit_rcu+0xab/0xd0
[   35.847555] ---[ end trace 0000000000000000 ]---
[   35.847585] ------------[ cut here ]------------
[   35.847588] WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4429 
rcu_cpu_starting+0x16b/0x1d0
[   35.847602] Modules linked in:
[   35.847606] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W          
6.6.0-rc5+ #1358
[   35.847610] Hardware name: Xen HVM domU, BIOS 4.17.1 09/26/2023
[   35.847613] RIP: 0010:rcu_cpu_starting+0x16b/0x1d0
[   35.847620] Code: cc 65 8b 05 97 64 e9 68 85 c0 75 cd 65 8b 05 88 62 e9 68 
85 c0 74 c2 0f 0b eb be 65 8b 05 79 62 e9 68 85 c0 0f 84 bf fe ff ff <0f> 0b e9 
b8 fe ff ff 89 c6 4c 89 ef e8 04 06 b8 00 90 e9 eb fe ff
[   35.847624] RSP: 0000:ffffad98400aff10 EFLAGS: 00010002
[   35.847628] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000000
[   35.847631] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000001
[   35.847633] RBP: 0000000000000000 R08: 000000001883c000 R09: 0000000000000000
[   35.847636] R10: ffff9c7ace106078 R11: 0000000000000000 R12: 0000000000000000
[   35.847639] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[   35.847642] FS:  0000000000000000(0000) GS:ffff9c7ace100000(0000) 
knlGS:0000000000000000
[   35.847645] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   35.847648] CR2: 0000000000000000 CR3: 000000002883c001 CR4: 00000000001706a0
[   35.847651] Call Trace:
[   35.847654]  <TASK>
[   35.847656]  ? rcu_cpu_starting+0x16b/0x1d0
[   35.847663]  ? __warn+0x85/0x170
[   35.847681]  ? rcu_cpu_starting+0x16b/0x1d0
[   35.847691]  ? report_bug+0x171/0x1a0
[   35.847701]  ? handle_bug+0x3c/0x80
[   35.847706]  ? exc_invalid_op+0x17/0x70
[   35.847711]  ? asm_exc_invalid_op+0x1a/0x20
[   35.847729]  ? rcu_cpu_starting+0x16b/0x1d0
[   35.847742]  start_secondary+0x42/0x140
[   35.847747]  ? __pfx_start_secondary+0x10/0x10
[   35.847753]  secondary_startup_64_no_verify+0x178/0x17b
[   35.847774]  </TASK>
[   35.847775] irq event stamp: 111393
[   35.847778] hardirqs last  enabled at (111393): [<ffffffff9773008a>] 
acpi_idle_play_dead+0x4a/0x70
[   35.847783] hardirqs last disabled at (111392): [<ffffffff97126ca2>] 
do_idle+0x92/0xe0
[   35.847788] softirqs last  enabled at (111328): [<ffffffff97d084a6>] 
__do_softirq+0x2e6/0x3a9
[   35.847795] softirqs last disabled at (111315): [<ffffffff970bbdbb>] 
__irq_exit_rcu+0xab/0xd0
[   35.847799] ---[ end trace 0000000000000000 ]---
[   35.847810] ------------[ cut here ]------------
[   35.847842] WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4462 
rcu_cpu_starting+0x158/0x1d0
[   35.847851] Modules linked in:
[   35.847855] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W          
6.6.0-rc5+ #1358
[   35.847859] Hardware name: Xen HVM domU, BIOS 4.17.1 09/26/2023
[   35.847862] RIP: 0010:rcu_cpu_starting+0x158/0x1d0
[   35.847869] Code: 01 f0 83 44 24 fc 00 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc 
cc 65 8b 05 97 64 e9 68 85 c0 75 cd 65 8b 05 88 62 e9 68 85 c0 74 c2 <0f> 0b eb 
be 65 8b 05 79 62 e9 68 85 c0 0f 84 bf fe ff ff 0f 0b e9
[   35.847872] RSP: 0000:ffffad98400aff10 EFLAGS: 00010002
[   35.847877] RAX: 0000000000000001 RBX: ffff9c7ace130280 RCX: 4000000000001492
[   35.847880] RDX: 3fffffffffffffff RSI: ffffffff98c03d18 RDI: ffff9c7a81398d48
[   35.847883] RBP: ffffffff98c03d00 R08: 0000000000000001 R09: 000000000000003e
[   35.847886] R10: 0000000000000000 R11: ffff9c7a81398d48 R12: 0000000000000002
[   35.847889] R13: ffffffff98cb7280 R14: 0000000000000000 R15: 0000000000000000
[   35.847892] FS:  0000000000000000(0000) GS:ffff9c7ace100000(0000) 
knlGS:0000000000000000
[   35.847895] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   35.847898] CR2: 0000000000000000 CR3: 000000002883c001 CR4: 00000000001706a0
[   35.847901] Call Trace:
[   35.847903]  <TASK>
[   35.847905]  ? rcu_cpu_starting+0x158/0x1d0
[   35.847912]  ? __warn+0x85/0x170
[   35.847919]  ? rcu_cpu_starting+0x158/0x1d0
[   35.847929]  ? report_bug+0x171/0x1a0
[   35.847940]  ? handle_bug+0x3c/0x80
[   35.847958]  ? exc_invalid_op+0x17/0x70
[   35.847960]  ? asm_exc_invalid_op+0x1a/0x20
[   35.847969]  ? rcu_cpu_starting+0x158/0x1d0
[   35.847976]  start_secondary+0x42/0x140
[   35.847978]  ? __pfx_start_secondary+0x10/0x10
[   35.847981]  secondary_startup_64_no_verify+0x178/0x17b
[   35.847991]  </TASK>
[   35.847992] irq event stamp: 111393
[   35.847993] hardirqs last  enabled at (111393): [<ffffffff9773008a>] 
acpi_idle_play_dead+0x4a/0x70
[   35.847996] hardirqs last disabled at (111392): [<ffffffff97126ca2>] 
do_idle+0x92/0xe0
[   35.847998] softirqs last  enabled at (111328): [<ffffffff97d084a6>] 
__do_softirq+0x2e6/0x3a9
[   35.848001] softirqs last disabled at (111315): [<ffffffff970bbdbb>] 
__irq_exit_rcu+0xab/0xd0
[   35.848003] ---[ end trace 0000000000000000 ]---
[   35.850720] cpu 1 spinlock event irq 57


Attachment: smime.p7s
Description: S/MIME cryptographic signature


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.