From: Ian Campbell Subject: xen: netback: take net_schedule_list_lock when removing entry from net_schedule_list There is a race in net_tx_build_mops between checking if net_schedule_list is empty and actually dequeuing the first entry on the list. If another thread dequeues the only entry on the list during this window we crash because list_first_entry expects a non-empty list, like so: [ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008 [ 0.133132] IP: [] net_tx_build_mops+0x91/0xa70 [ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f [ 0.133147] Oops: 0002 1 SMP [ 0.133150] last sysfs file: [ 0.133152] Modules linked in: [ 0.133154] [ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310 [ 0.133158] EIP: 0061:[] EFLAGS: 00010202 CPU: 1 [ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70 [ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c [ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98 [ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069 [ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000) [ 0.133170] Stack: [ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000 [ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa [ 0.133193] Call Trace: [ 0.133202] [] net_tx_action+0x42f/0xac0 [ 0.133206] [] netbk_action_thread+0x6a/0x1b0 [ 0.133212] [] kthread+0x74/0x80 [ 0.133218] [] kernel_thread_helper+0x7/0x10 [ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \ 58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \ 58 ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c [ 0.133261] EIP: [] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98 [ 0.133265] CR2: 0000000000000008 [ 0.133274] --[ end trace e2c5c15f54bd9d93 ]-- Therefore after the initial lock free check for an empty list check again with the lock held before dequeueing the entry. Based on a patch by Tomasz Wroblewski. Signed-off-by: Ian Campbell Signed-off-by: Jan Beulich --- a/drivers/xen/netback/netback.c +++ b/drivers/xen/netback/netback.c @@ -784,15 +784,28 @@ static int __on_net_schedule_list(netif_ return netif->list.next != NULL; } +/* Must be called with net_schedule_list_lock held. */ static void remove_from_net_schedule_list(netif_t *netif) { - spin_lock_irq(&net_schedule_list_lock); if (likely(__on_net_schedule_list(netif))) { list_del(&netif->list); netif->list.next = NULL; netif_put(netif); } +} + +static netif_t *poll_net_schedule_list(void) +{ + netif_t *netif = NULL; + + spin_lock_irq(&net_schedule_list_lock); + if (!list_empty(&net_schedule_list)) { + netif = list_first_entry(&net_schedule_list, netif_t, list); + netif_get(netif); + remove_from_net_schedule_list(netif); + } spin_unlock_irq(&net_schedule_list_lock); + return netif; } static void add_to_net_schedule_list_tail(netif_t *netif) @@ -837,7 +850,9 @@ void netif_schedule_work(netif_t *netif) void netif_deschedule_work(netif_t *netif) { + spin_lock_irq(&net_schedule_list_lock); remove_from_net_schedule_list(netif); + spin_unlock_irq(&net_schedule_list_lock); } @@ -1224,7 +1239,6 @@ static int netbk_set_skb_gso(struct sk_b /* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { - struct list_head *ent; struct sk_buff *skb; netif_t *netif; netif_tx_request_t txreq; @@ -1242,10 +1256,9 @@ static void net_tx_action(unsigned long while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && !list_empty(&net_schedule_list)) { /* Get a netif from the list with work to do. */ - ent = net_schedule_list.next; - netif = list_entry(ent, netif_t, list); - netif_get(netif); - remove_from_net_schedule_list(netif); + netif = poll_net_schedule_list(); + if (!netif) + continue; RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); if (!work_to_do) {