Some servers experienced fatal deadlocks because of a combination of bugs, leading to multiple cpus calling dump_stack(). The checksumming bug was fixed in commit34ae6a1aa0("ipv6: update skb->csum when CE mark is propagated"). The second problem is a faulty locking in dump_stack() CPU1 runs in process context and calls dump_stack(), grabs dump_lock. CPU2 receives a TCP packet under softirq, grabs socket spinlock, and call dump_stack() from netdev_rx_csum_fault(). dump_stack() spins on atomic_cmpxchg(&dump_lock, -1, 2), since dump_lock is owned by CPU1 While dumping its stack, CPU1 is interrupted by a softirq, and happens to process a packet for the TCP socket locked by CPU2. CPU1 spins forever in spin_lock() : deadlock Stack trace on CPU1 looked like : NMI backtrace for cpu 1 RIP: _raw_spin_lock+0x25/0x30 ... Call Trace: <IRQ> tcp_v6_rcv+0x243/0x620 ip6_input_finish+0x11f/0x330 ip6_input+0x38/0x40 ip6_rcv_finish+0x3c/0x90 ipv6_rcv+0x2a9/0x500 process_backlog+0x461/0xaa0 net_rx_action+0x147/0x430 __do_softirq+0x167/0x2d0 call_softirq+0x1c/0x30 do_softirq+0x3f/0x80 irq_exit+0x6e/0xc0 smp_call_function_single_interrupt+0x35/0x40 call_function_single_interrupt+0x6a/0x70 <EOI> printk+0x4d/0x4f printk_address+0x31/0x33 print_trace_address+0x33/0x3c print_context_stack+0x7f/0x119 dump_trace+0x26b/0x28e show_trace_log_lvl+0x4f/0x5c show_stack_log_lvl+0x104/0x113 show_stack+0x42/0x44 dump_stack+0x46/0x58 netdev_rx_csum_fault+0x38/0x3c __skb_checksum_complete_head+0x6e/0x80 __skb_checksum_complete+0x11/0x20 tcp_rcv_established+0x2bd5/0x2fd0 tcp_v6_do_rcv+0x13c/0x620 sk_backlog_rcv+0x15/0x30 release_sock+0xd2/0x150 tcp_recvmsg+0x1c1/0xfc0 inet_recvmsg+0x7d/0x90 sock_recvmsg+0xaf/0xe0 ___sys_recvmsg+0x111/0x3b0 SyS_recvmsg+0x5c/0xb0 system_call_fastpath+0x16/0x1b Fixes:b58d977432("dump_stack: serialize the output from dump_stack()") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Alex Thorlton <athorlton@sgi.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			65 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			65 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Provide a default dump_stack() function for architectures
 | |
|  * which don't implement their own.
 | |
|  */
 | |
| 
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/export.h>
 | |
| #include <linux/sched.h>
 | |
| #include <linux/smp.h>
 | |
| #include <linux/atomic.h>
 | |
| 
 | |
| static void __dump_stack(void)
 | |
| {
 | |
| 	dump_stack_print_info(KERN_DEFAULT);
 | |
| 	show_stack(NULL, NULL);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * dump_stack - dump the current task information and its stack trace
 | |
|  *
 | |
|  * Architectures can override this implementation by implementing its own.
 | |
|  */
 | |
| #ifdef CONFIG_SMP
 | |
| static atomic_t dump_lock = ATOMIC_INIT(-1);
 | |
| 
 | |
| asmlinkage __visible void dump_stack(void)
 | |
| {
 | |
| 	unsigned long flags;
 | |
| 	int was_locked;
 | |
| 	int old;
 | |
| 	int cpu;
 | |
| 
 | |
| 	/*
 | |
| 	 * Permit this cpu to perform nested stack dumps while serialising
 | |
| 	 * against other CPUs
 | |
| 	 */
 | |
| retry:
 | |
| 	local_irq_save(flags);
 | |
| 	cpu = smp_processor_id();
 | |
| 	old = atomic_cmpxchg(&dump_lock, -1, cpu);
 | |
| 	if (old == -1) {
 | |
| 		was_locked = 0;
 | |
| 	} else if (old == cpu) {
 | |
| 		was_locked = 1;
 | |
| 	} else {
 | |
| 		local_irq_restore(flags);
 | |
| 		cpu_relax();
 | |
| 		goto retry;
 | |
| 	}
 | |
| 
 | |
| 	__dump_stack();
 | |
| 
 | |
| 	if (!was_locked)
 | |
| 		atomic_set(&dump_lock, -1);
 | |
| 
 | |
| 	local_irq_restore(flags);
 | |
| }
 | |
| #else
 | |
| asmlinkage __visible void dump_stack(void)
 | |
| {
 | |
| 	__dump_stack();
 | |
| }
 | |
| #endif
 | |
| EXPORT_SYMBOL(dump_stack);
 |