From c17af4dd96aa99e6e58b5d715a7c66db63a15106 Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Mon, 15 Jun 2015 16:15:43 -0400
Subject: [PATCH 1/4] sparc: perf: Disable pagefaults while walking userspace
 stacks

Page faults generated walking userspace stacks can call schedule to switch
out the task. When collecting callchains for scheduler tracepoints this
causes a deadlock as the tracepoints can be hit with the runqueue lock held:

[ 8138.159054] WARNING: CPU: 758 PID: 12488 at /opt/dahern/linux.git/arch/sparc/kernel/nmi.c:80 perfctr_irq+0x1f8/0x2b4()

[ 8138.203152] Watchdog detected hard LOCKUP on cpu 758

[ 8138.410969] CPU: 758 PID: 12488 Comm: perf Not tainted 4.0.0-rc6+ #6
[ 8138.437146] Call Trace:
[ 8138.447193]  [000000000045cdd4] warn_slowpath_common+0x7c/0xa0
[ 8138.471238]  [000000000045ce90] warn_slowpath_fmt+0x30/0x40
[ 8138.494189]  [0000000000983e38] perfctr_irq+0x1f8/0x2b4
[ 8138.515716]  [00000000004209f4] tl0_irq15+0x14/0x20
[ 8138.535791]  [00000000009839ec] _raw_spin_trylock_bh+0x68/0x108
[ 8138.560180]  [0000000000980018] __schedule+0xcc/0x710
[ 8138.580981]  [00000000009806dc] preempt_schedule_common+0x10/0x3c
[ 8138.606082]  [000000000098077c] _cond_resched+0x34/0x44
[ 8138.627603]  [0000000000565990] kmem_cache_alloc_node+0x24/0x1a0
[ 8138.652345]  [0000000000450b60] tsb_grow+0xac/0x488
[ 8138.672429]  [0000000000985040] do_sparc64_fault+0x4dc/0x6e4
[ 8138.695736]  [0000000000407c2c] sparc64_realfault_common+0x10/0x20
[ 8138.721202]  [00000000006f2e24] NG4copy_from_user+0xa4/0x3c0
[ 8138.744510]  [000000000044f900] perf_callchain_user+0x5c/0x6c
[ 8138.768182]  [0000000000517b5c] perf_callchain+0x16c/0x19c
[ 8138.790774]  [0000000000515f84] perf_prepare_sample+0x68/0x218
[ 8138.814801] ---[ end trace 42ca6294b1ff7573 ]---

As with PowerPC (b59a1bfcc240, "powerpc/perf: Disable pagefaults during
callchain stack read") disable pagefaults while walking userspace stacks.

Signed-off-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_event.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 59cf917a77b5..48387be665e9 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -21,7 +21,7 @@
 
 #include <asm/stacktrace.h>
 #include <asm/cpudata.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <asm/nmi.h>
 #include <asm/pcr.h>
@@ -1803,8 +1803,13 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		return;
 
 	flushw_user();
+
+	pagefault_disable();
+
 	if (test_thread_flag(TIF_32BIT))
 		perf_callchain_user_32(entry, regs);
 	else
 		perf_callchain_user_64(entry, regs);
+
+	pagefault_enable();
 }

From 2bf7c3efc393937d1e5f92681501a914dbfbae07 Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Mon, 15 Jun 2015 16:15:44 -0400
Subject: [PATCH 2/4] sparc64: Convert BUG_ON to warning

Pagefault handling has a BUG_ON path that panics the system. Convert it to
a warning instead. There is no need to bring down the system for this kind
of failure.

The following was hit while running:
    perf sched record -g -- make -j 16

[3609412.782801] kernel BUG at /opt/dahern/linux.git/arch/sparc/mm/fault_64.c:416!
[3609412.782833]               \|/ ____ \|/
[3609412.782833]               "@'/ .. \`@"
[3609412.782833]               /_| \__/ |_\
[3609412.782833]                  \__U_/
[3609412.782870] cat(4516): Kernel bad sw trap 5 [#1]
[3609412.782889] CPU: 0 PID: 4516 Comm: cat Tainted: G            E   4.1.0-rc8+ #6
[3609412.782909] task: fff8000126e31f80 ti: fff8000110d90000 task.ti: fff8000110d90000
[3609412.782931] TSTATE: 0000004411001603 TPC: 000000000096b164 TNPC: 000000000096b168 Y: 0000004e    Tainted: G            E
[3609412.782964] TPC: <do_sparc64_fault+0x5e4/0x6a0>
[3609412.782979] g0: 000000000096abe0 g1: 0000000000d314c4 g2: 0000000000000000 g3: 0000000000000001
[3609412.783009] g4: fff8000126e31f80 g5: fff80001302d2000 g6: fff8000110d90000 g7: 00000000000000ff
[3609412.783045] o0: 0000000000aff6a8 o1: 00000000000001a0 o2: 0000000000000001 o3: 0000000000000054
[3609412.783080] o4: fff8000100026820 o5: 0000000000000001 sp: fff8000110d935f1 ret_pc: 000000000096b15c
[3609412.783117] RPC: <do_sparc64_fault+0x5dc/0x6a0>
[3609412.783137] l0: 000007feff996000 l1: 0000000000030001 l2: 0000000000000004 l3: fff8000127bd0120
[3609412.783174] l4: 0000000000000054 l5: fff8000127bd0188 l6: 0000000000000000 l7: fff8000110d9dba8
[3609412.783210] i0: fff8000110d93f60 i1: fff8000110ca5530 i2: 000000000000003f i3: 0000000000000054
[3609412.783244] i4: fff800010000081a i5: fff8000100000398 i6: fff8000110d936a1 i7: 0000000000407c6c
[3609412.783286] I7: <sparc64_realfault_common+0x10/0x20>
[3609412.783308] Call Trace:
[3609412.783329]  [0000000000407c6c] sparc64_realfault_common+0x10/0x20
[3609412.783353] Disabling lock debugging due to kernel taint
[3609412.783379] Caller[0000000000407c6c]: sparc64_realfault_common+0x10/0x20
[3609412.783449] Caller[fff80001002283e4]: 0xfff80001002283e4
[3609412.783471] Instruction DUMP: 921021a0  7feaff91  901222a8 <91d02005> 82086100  02f87f7b  808a2873  81cfe008  01000000
[3609412.783542] Kernel panic - not syncing: Fatal exception
[3609412.784605] Press Stop-A (L1-A) to return to the boot prom
[3609412.784615] ---[ end Kernel panic - not syncing: Fatal exception

With this patch rather than a panic I occasionally get something like this:
    perf sched record -g -m 1024  -- make -j N

where N is based on number of cpus (128 to 1024 for a T7-4 and 8 for an 8 cpu
VM on a T5-2).

WARNING: CPU: 211 PID: 52565 at /opt/dahern/linux.git/arch/sparc/mm/fault_64.c:417 do_sparc64_fault+0x340/0x70c()
address (7feffcd6000) != regs->tpc (fff80001004873c0)
Modules linked in: ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_reject_ipv6 xt_tcpudp nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 cdc_ether usbnet mii ixgbe mdio igb i2c_algo_bit i2c_core ptp crc32c_sparc64 camellia_sparc64 des_sparc64 des_generic md5_sparc64 sha512_sparc64 sha1_sparc64 uio_pdrv_genirq uio usb_storage mpt3sas scsi_transport_sas raid_class aes_sparc64 sunvnet sunvdc sha256_sparc64(E) sha256_generic(E)
CPU: 211 PID: 52565 Comm: ld Tainted: G        W   E   4.1.0-rc8+ #19
Call Trace:
 [000000000045ce30] warn_slowpath_common+0x7c/0xa0
 [000000000045ceec] warn_slowpath_fmt+0x30/0x40
 [000000000098ad64] do_sparc64_fault+0x340/0x70c
 [0000000000407c2c] sparc64_realfault_common+0x10/0x20
---[ end trace 62ee02065a01a049 ]---
ld[52565]: segfault at fff80001004873c0 ip fff80001004873c0 (rpc fff8000100158868) sp 000007feffcd70e1 error 30002 in libc-2.12.so[fff8000100410000+184000]

The segfault is horrible, but better than a system panic.

An 8-cpu VM on a T5-2 also showed the above traces from time to time,
so it is a general problem and not specific to the T7 or baremetal.

Signed-off-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/fault_64.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index e9268ea1a68d..dbabe5713a15 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -413,8 +413,9 @@ good_area:
 	 * that here.
 	 */
 	if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) {
-		BUG_ON(address != regs->tpc);
-		BUG_ON(regs->tstate & TSTATE_PRIV);
+		WARN(address != regs->tpc,
+		     "address (%lx) != regs->tpc (%lx)\n", address, regs->tpc);
+		WARN_ON(regs->tstate & TSTATE_PRIV);
 		goto bad_area;
 	}
 

From b69fb7699c92f85991672fc144b0adb7c717fbc8 Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Mon, 15 Jun 2015 16:15:45 -0400
Subject: [PATCH 3/4] sparc64: perf: Add sanity checking on addresses in user
 stack

Processes are getting killed (sigbus or segv) while walking userspace
callchains when using perf. In some instances I have seen ufp = 0x7ff
which does not seem like a proper stack address.

This patch adds a function to run validity checks against the address
before attempting the copy_from_user. The checks are copied from the
x86 version as a start point with the addition of a 4-byte alignment
check.

Signed-off-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/uaccess_64.h | 22 ++++++++++++++++++++++
 arch/sparc/kernel/perf_event.c      | 13 +++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index a35194b7dba0..ea6e9a20f3ff 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -49,6 +49,28 @@ do {										\
 	__asm__ __volatile__ ("wr %%g0, %0, %%asi" : : "r" ((val).seg));	\
 } while(0)
 
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ */
+static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
+{
+	if (__builtin_constant_p(size))
+		return addr > limit - size;
+
+	addr += size;
+	if (addr < size)
+		return true;
+
+	return addr > limit;
+}
+
+#define __range_not_ok(addr, size, limit)                               \
+({                                                                      \
+	__chk_user_ptr(addr);                                           \
+	__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
+})
+
 static inline int __access_ok(const void __user * addr, unsigned long size)
 {
 	return 1;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 48387be665e9..a665e3f8c6c6 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1741,6 +1741,16 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+	/* addresses should be at least 4-byte aligned */
+	if (((unsigned long) fp) & 3)
+		return 0;
+
+	return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
+
 static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 				   struct pt_regs *regs)
 {
@@ -1753,6 +1763,9 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 		unsigned long pc;
 
 		usf = (struct sparc_stackf __user *)ufp;
+		if (!valid_user_frame(usf, sizeof(sf)))
+			break;
+
 		if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
 			break;
 

From 2d89cd8625c4af01a2683b18c3c8194cc3b3067c Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Mon, 15 Jun 2015 16:15:46 -0400
Subject: [PATCH 4/4] sparc64: perf: Use UREG_FP rather than UREG_I6

perf walks userspace callchains by following frame pointers. Use the
UREG_FP macro to make it clearer that the %fp is being used.

Signed-off-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index a665e3f8c6c6..689db65f8529 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1756,7 +1756,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 {
 	unsigned long ufp;
 
-	ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
+	ufp = regs->u_regs[UREG_FP] + STACK_BIAS;
 	do {
 		struct sparc_stackf __user *usf;
 		struct sparc_stackf sf;
@@ -1780,7 +1780,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
 {
 	unsigned long ufp;
 
-	ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
+	ufp = regs->u_regs[UREG_FP] & 0xffffffffUL;
 	do {
 		unsigned long pc;