From 67d5268908283c187e0a460048a423256c2fb288 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 27 Feb 2016 21:21:12 +0100
Subject: [PATCH 01/23] perf tools: Fix python extension build

The util/python-ext-sources file contains source files required to build
the python extension relative to $(srctree)/tools/perf,

Such a file path $(FILE).c is handed over to the python extension build
system, which builds the final object in the
$(PYTHON_EXTBUILD)/tmp/$(FILE).o path.

After the build is done all files from $(PYTHON_EXTBUILD)lib/ are
carried as the result binaries.

Above system fails when we add source file relative to ../lib, which we
do for:

  ../lib/bitmap.c
  ../lib/find_bit.c
  ../lib/hweight.c
  ../lib/rbtree.c

All above objects will be built like:

  $(PYTHON_EXTBUILD)/tmp/../lib/bitmap.c
  $(PYTHON_EXTBUILD)/tmp/../lib/find_bit.c
  $(PYTHON_EXTBUILD)/tmp/../lib/hweight.c
  $(PYTHON_EXTBUILD)/tmp/../lib/rbtree.c

which accidentally happens to be final library path:

  $(PYTHON_EXTBUILD)/lib/

Changing setup.py to pass full paths of source files to Extension build
class and thus keep all built objects under $(PYTHON_EXTBUILD)tmp
directory.

Reported-by: Jeff Bastian <jbastian@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Josh Boyer <jwboyer@fedoraproject.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@vger.kernel.org # v4.2+
Link: http://lkml.kernel.org/r/20160227201350.GB28494@krava.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/setup.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 1833103768cb..c8680984d2d6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 
+src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
@@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI')
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
 
+# use full paths with source files
+ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+
 perf = Extension('perf',
 		  sources = ext_sources,
 		  include_dirs = ['util/include'],

From c42de706dad3f39c1f65e473a1d165ea33f8b6e8 Mon Sep 17 00:00:00 2001
From: Taeung Song <treeze.taeung@gmail.com>
Date: Fri, 26 Feb 2016 22:14:25 +0900
Subject: [PATCH 02/23] perf trace: Check and discard not only 'nr' but also
 '__syscall_nr'

Format fields of a syscall have the first variable '__syscall_nr' or
'nr' that mean the syscall number.  But it isn't relevant here so drop
it.

'nr' among fields of syscall was renamed '__syscall_nr'.  So add
exception handling to drop '__syscall_nr' and modify the comment for
this excpetion handling.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Taeung Song <treeze.taeung@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1456492465-5946-1-git-send-email-treeze.taeung@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 26a337f939d8..8dc98c598b1a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1725,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 
 	sc->args = sc->tp_format->format.fields;
 	sc->nr_args = sc->tp_format->format.nr_fields;
-	/* drop nr field - not relevant here; does not exist on older kernels */
-	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+	/*
+	 * We need to check and discard the first variable '__syscall_nr'
+	 * or 'nr' that mean the syscall number. It is needless here.
+	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+	 */
+	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
 		sc->args = sc->args->next;
 		--sc->nr_args;
 	}

From 026842d148b920dc28f0499ede4950dcb098d4d5 Mon Sep 17 00:00:00 2001
From: Taeung Song <treeze.taeung@gmail.com>
Date: Fri, 26 Feb 2016 13:23:01 -0500
Subject: [PATCH 03/23] tracing/syscalls: Rename "/format" tracepoint field
 name "nr" to "__syscall_nr:

Some tracepoint have multiple fields with the same name, "nr", the first
one is a unique syscall ID, the other is a syscall argument:

  # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_io_getevents/format
  name: sys_enter_io_getevents
  ID: 747
  format:
 	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
 	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
 	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
 	field:int common_pid;	offset:4;	size:4;	signed:1;

 	field:int nr;	offset:8;	size:4;	signed:1;
 	field:aio_context_t ctx_id;	offset:16;	size:8;	signed:0;
 	field:long min_nr;	offset:24;	size:8;	signed:0;
 	field:long nr;	offset:32;	size:8;	signed:0;
 	field:struct io_event * events;	offset:40;	size:8;	signed:0;
 	field:struct timespec * timeout;	offset:48;	size:8;	signed:0;

  print fmt: "ctx_id: 0x%08lx, min_nr: 0x%08lx, nr: 0x%08lx, events: 0x%08lx, timeout: 0x%08lx", ((unsigned long)(REC->ctx_id)), ((unsigned long)(REC->min_nr)), ((unsigned long)(REC->nr)), ((unsigned long)(REC->events)), ((unsigned long)(REC->timeout))
  #

Fix it by renaming the "/format" common tracepoint field "nr" to "__syscall_nr".

Signed-off-by: Taeung Song <treeze.taeung@gmail.com>
[ Do not rename the struct member, just the '/format' field name ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160226132301.3ae065a4@gandalf.local.home
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 kernel/trace/trace_syscalls.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 0655afbea83f..d1663083d903 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -186,11 +186,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
 
 extern char *__bad_type_size(void);
 
-#define SYSCALL_FIELD(type, name)					\
-	sizeof(type) != sizeof(trace.name) ?				\
+#define SYSCALL_FIELD(type, field, name)				\
+	sizeof(type) != sizeof(trace.field) ?				\
 		__bad_type_size() :					\
-		#type, #name, offsetof(typeof(trace), name),		\
-		sizeof(trace.name), is_signed_type(type)
+		#type, #name, offsetof(typeof(trace), field),		\
+		sizeof(trace.field), is_signed_type(type)
 
 static int __init
 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
@@ -261,7 +261,8 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 		return ret;
 
@@ -281,11 +282,12 @@ static int __init syscall_exit_define_fields(struct trace_event_call *call)
 	struct syscall_trace_exit trace;
 	int ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 		return ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
+	ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
 				 FILTER_OTHER);
 
 	return ret;

From a6745330789f25103e67011bcfeec908fcc3b341 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 29 Feb 2016 09:01:28 -0500
Subject: [PATCH 04/23] tools lib traceevent: Split pevent_print_event() into
 specific functionality functions

Currently there's a single function that is used to display a record's
data in human readable format. That's pevent_print_event().
Unfortunately, this gives little room for adding other output within the
line without updating that function call.

I've decided to split that function into 3 parts.

 pevent_print_event_task() which prints the task comm, pid and the CPU
 pevent_print_event_time() which outputs the record's timestamp
 pevent_print_event_data() which outputs the rest of the event data.

pevent_print_event() now simply calls these three functions.

To save time from doing the search for event from the record's type, I
created a new helper function called pevent_find_event_by_record(),
which returns the record's event, and this event has to be passed to the
above functions.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20160229090128.43a56704@gandalf.local.home
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 136 ++++++++++++++++++++++-------
 tools/lib/traceevent/event-parse.h |  13 +++
 2 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 575e75174087..9a1e48afcf89 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5339,19 +5339,74 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
 	return false;
 }
 
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record, bool use_trace_clock)
+/**
+ * pevent_find_event_by_record - return the event from a given record
+ * @pevent: a handle to the pevent
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
+{
+	int type;
+
+	if (record->size < 0) {
+		do_warning("ug! negative record size %d", record->size);
+		return NULL;
+	}
+
+	type = trace_parse_common_type(pevent, record->data);
+
+	return pevent_find_event(pevent, type);
+}
+
+/**
+ * pevent_print_event_task - Write the event task comm, pid and CPU
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the tasks comm, pid and CPU to @s.
+ */
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	void *data = record->data;
+	const char *comm;
+	int pid;
+
+	pid = parse_common_pid(pevent, data);
+	comm = find_cmdline(pevent, pid);
+
+	if (pevent->latency_format) {
+		trace_seq_printf(s, "%8.8s-%-5d %3d",
+		       comm, pid, record->cpu);
+	} else
+		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+}
+
+/**
+ * pevent_print_event_time - Write the event timestamp
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ *
+ * Writes the timestamp of the record into @s.
+ */
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock)
 {
-	static const char *spaces = "                    "; /* 20 spaces */
-	struct event_format *event;
 	unsigned long secs;
 	unsigned long usecs;
 	unsigned long nsecs;
-	const char *comm;
-	void *data = record->data;
-	int type;
-	int pid;
-	int len;
 	int p;
 	bool use_usec_format;
 
@@ -5362,28 +5417,11 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 		nsecs = record->ts - secs * NSECS_PER_SEC;
 	}
 
-	if (record->size < 0) {
-		do_warning("ug! negative record size %d", record->size);
-		return;
-	}
-
-	type = trace_parse_common_type(pevent, data);
-
-	event = pevent_find_event(pevent, type);
-	if (!event) {
-		do_warning("ug! no event found for type %d", type);
-		return;
-	}
-
-	pid = parse_common_pid(pevent, data);
-	comm = find_cmdline(pevent, pid);
-
 	if (pevent->latency_format) {
-		trace_seq_printf(s, "%8.8s-%-5d %3d",
-		       comm, pid, record->cpu);
+		trace_seq_printf(s, " %3d", record->cpu);
 		pevent_data_lat_fmt(pevent, s, record);
 	} else
-		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+		trace_seq_printf(s, " [%03d]", record->cpu);
 
 	if (use_usec_format) {
 		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
@@ -5394,11 +5432,28 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 			p = 6;
 		}
 
-		trace_seq_printf(s, " %5lu.%0*lu: %s: ",
-					secs, p, usecs, event->name);
+		trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs);
 	} else
-		trace_seq_printf(s, " %12llu: %s: ",
-					record->ts, event->name);
+		trace_seq_printf(s, " %12llu:", record->ts);
+}
+
+/**
+ * pevent_print_event_data - Write the event data section
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the parsing of the record's data to @s.
+ */
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	static const char *spaces = "                    "; /* 20 spaces */
+	int len;
+
+	trace_seq_printf(s, " %s: ", event->name);
 
 	/* Space out the event names evenly. */
 	len = strlen(event->name);
@@ -5408,6 +5463,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 	pevent_event_info(s, event, record);
 }
 
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+			struct pevent_record *record, bool use_trace_clock)
+{
+	struct event_format *event;
+
+	event = pevent_find_event_by_record(pevent, record);
+	if (!event) {
+		do_warning("ug! no event found for type %d",
+			   trace_parse_common_type(pevent, record->data));
+		return;
+	}
+
+	pevent_print_event_task(pevent, s, event, record);
+	pevent_print_event_time(pevent, s, event, record, use_trace_clock);
+	pevent_print_event_data(pevent, s, event, record);
+}
+
 static int events_id_cmp(const void *a, const void *b)
 {
 	struct event_format * const * ea = a;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 706d9bc24066..9ffde377e89d 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt,
 				 unsigned long long addr);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock);
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 			struct pevent_record *record, bool use_trace_clock);
 
@@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id);
 struct event_format *
 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
 
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record);
+
 void pevent_data_lat_fmt(struct pevent *pevent,
 			 struct trace_seq *s, struct pevent_record *record);
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);

From f9a5978ac4ede901fa73d7c28ae1c5d89bc2a46a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 3 Mar 2016 10:53:48 +0100
Subject: [PATCH 05/23] perf tools: Fix locale handling in pmu parsing

Ingo reported regression on display format of big numbers, which is
missing separators (in default perf stat output).

 triton:~/tip> perf stat -a sleep 1
         ...
         127008602      cycles                    #    0.011 GHz
         279538533      stalled-cycles-frontend   #  220.09% frontend cycles idle
         119213269      instructions              #    0.94  insn per cycle

This is caused by recent change:

  perf stat: Check existence of frontend/backed stalled cycles

that added call to pmu_have_event, that subsequently calls
perf_pmu__parse_scale, which has a bug in locale handling.

The lc string returned from setlocale, that we use to store old locale
value, may be allocated in static storage. Getting a dynamic copy to
make it survive another setlocale call.

  $ perf stat ls
         ...
         2,360,602      cycles                    #    3.080 GHz
         2,703,090      instructions              #    1.15  insn per cycle
           546,031      branches                  #  712.511 M/sec

Committer note:

Since the patch introducing the regression didn't made to perf/core,
move it to just before where the regression was introduced, so that we
don't break bisection for this feature.

Reported-by: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160303095348.GA24511@krava.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ce61f79dbaae..d8cd038baed2 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -123,6 +123,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	 */
 	lc = setlocale(LC_NUMERIC, NULL);
 
+	/*
+	 * The lc string may be allocated in static storage,
+	 * so get a dynamic copy to make it survive setlocale
+	 * call below.
+	 */
+	lc = strdup(lc);
+	if (!lc) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
 	/*
 	 * force to C locale to ensure kernel
 	 * scale string is converted correctly.
@@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	/* restore locale */
 	setlocale(LC_NUMERIC, lc);
 
+	free((char *) lc);
+
 	ret = 0;
 error:
 	close(fd);

From 9dec4473abe7967c204fe700baf5344ade34e9c8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 26 Feb 2016 16:27:56 -0800
Subject: [PATCH 06/23] perf stat: Check existence of frontend/backed stalled
 cycles

Only put the frontend/backend stalled cycles into the default perf stat
events when the CPU actually supports them.

This avoids empty columns with --metric-only on newer Intel CPUs.

Committer note:

Before:

  $ perf stat ls

    Performance counter stats for 'ls':

          1.080893     task-clock (msec)      #    0.619 CPUs utilized
                 0     context-switches       #    0.000 K/sec
                 0     cpu-migrations         #    0.000 K/sec
                97     page-faults            #    0.090 M/sec
         3,327,741     cycles                 #    3.079 GHz
   <not supported>     stalled-cycles-frontend
   <not supported>     stalled-cycles-backend
         1,609,544     instructions           #    0.48  insn per cycle
           319,117     branches               #  295.235 M/sec
            12,246     branch-misses          #    3.84% of all branches

       0.001746508 seconds time elapsed
  $

After:

  $ perf stat ls

    Performance counter stats for 'ls':

          0.693948     task-clock (msec)      #    0.662 CPUs utilized
                 0     context-switches       #    0.000 K/sec
                 0     cpu-migrations         #    0.000 K/sec
                95     page-faults            #    0.137 M/sec
         1,792,509     cycles                 #    2.583 GHz
         1,599,047     instructions           #    0.89  insn per cycle
           316,328     branches               #  455.838 M/sec
            12,453     branch-misses          #    3.94% of all branches

       0.001048987 seconds time elapsed
  $

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1456532881-26621-2-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8c0bc0fe5179..24f222dd2a8a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1441,7 +1441,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
  */
 static int add_default_attributes(void)
 {
-	struct perf_event_attr default_attrs[] = {
+	struct perf_event_attr default_attrs0[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
@@ -1449,8 +1449,14 @@ static int add_default_attributes(void)
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+};
+	struct perf_event_attr frontend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+};
+	struct perf_event_attr backend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
+};
+	struct perf_event_attr default_attrs1[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
@@ -1567,7 +1573,19 @@ static int add_default_attributes(void)
 	}
 
 	if (!evsel_list->nr_entries) {
-		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+			return -1;
+		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						frontend_attrs) < 0)
+				return -1;
+		}
+		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						backend_attrs) < 0)
+				return -1;
+		}
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
 			return -1;
 	}
 

From 6122d57e9f7c6cb0f0aa276fbd3a12e3af826ef2 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 26 Feb 2016 09:31:56 +0000
Subject: [PATCH 07/23] perf data: Support converting data from
 bpf_perf_event_output()

bpf_perf_event_output() outputs data through sample->raw_data. This
patch adds support to convert those data into CTF. A python script then
can be used to process output data from BPF programs.

Test result:

  # cat ./test_bpf_output_2.c
  /************************ BEGIN **************************/
  #include <uapi/linux/bpf.h>
  struct bpf_map_def {
 	unsigned int type;
 	unsigned int key_size;
 	unsigned int value_size;
 	unsigned int max_entries;
  };
  #define SEC(NAME) __attribute__((section(NAME), used))
  static u64 (*ktime_get_ns)(void) =
 	(void *)BPF_FUNC_ktime_get_ns;
  static int (*trace_printk)(const char *fmt, int fmt_size, ...) =
 	(void *)BPF_FUNC_trace_printk;
  static int (*get_smp_processor_id)(void) =
 	(void *)BPF_FUNC_get_smp_processor_id;
  static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) =
 	(void *)BPF_FUNC_perf_event_output;

  struct bpf_map_def SEC("maps") channel = {
 	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 	.key_size = sizeof(int),
 	.value_size = sizeof(u32),
 	.max_entries = __NR_CPUS__,
  };

  static inline int __attribute__((always_inline))
  func(void *ctx, int type)
  {
 	struct {
 		u64 ktime;
 		int type;
 	} __attribute__((packed)) output_data;
 	char error_data[] = "Error: failed to output\n";
 	int err;

 	output_data.type = type;
 	output_data.ktime = ktime_get_ns();
 	err = perf_event_output(ctx, &channel, get_smp_processor_id(),
 				&output_data, sizeof(output_data));
 	if (err)
 		trace_printk(error_data, sizeof(error_data));
 	return 0;
  }
  SEC("func_begin=sys_nanosleep")
  int func_begin(void *ctx) {return func(ctx, 1);}
  SEC("func_end=sys_nanosleep%return")
  int func_end(void *ctx) { return func(ctx, 2);}
  char _license[] SEC("license") = "GPL";
  int _version SEC("version") = LINUX_VERSION_CODE;
  /************************* END ***************************/

  # ./perf record -e bpf-output/no-inherit,name=evt/ \
                 -e ./test_bpf_output_2.c/map:channel.event=evt/ \
                 usleep 100000
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.012 MB perf.data (2 samples) ]

  # ./perf script
          usleep 14942 92503.198504: evt:  ffffffff810e0ba1 sys_nanosleep (/lib/modules/4.3.0....
          usleep 14942 92503.298562: evt:  ffffffff810585e9 kretprobe_trampoline_holder (/lib....

  # ./perf data convert --to-ctf ./out.ctf
  [ perf data convert: Converted 'perf.data' into CTF data './out.ctf' ]
  [ perf data convert: Converted and wrote 0.000 MB (2 samples) ]

  # babeltrace ./out.ctf
  [01:41:43.198504134] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E0BA1, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x32C0C07B, [1] = 0x5421, [2] = 0x1 ] }
  [01:41:43.298562257] (+0.100058123) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810585E9, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x38B77FAA, [1] = 0x5421, [2] = 0x2 ] }

  # cat ./test_bpf_output_2.py
  from babeltrace import TraceCollection
  tc = TraceCollection()
  tc.add_trace('./out.ctf', 'ctf')
  d = {1:[], 2:[]}
  for event in tc.events:
     if not event.name.startswith('evt'):
         continue
     raw_data = event['raw_data']
     (time, type) = ((raw_data[0] + (raw_data[1] << 32)), raw_data[2])
     d[type].append(time)
  print(list(map(lambda i: d[2][i] - d[1][i], range(len(d[1])))));

  # python3 ./test_bpf_output_2.py
  [100056879]

Committer note:

Make sure you have python3-devel installed, not python-devel, which may
be for python2, which will lead to some "PyInstance_Type" errors. Also
make sure that you use the right libbabeltrace, because it is shipped
in Fedora, for instance, but an older version.

To build libbabeltrace's python binding one also needs to use:

 ./configure --enable-python-bindings

And then set PYTHONPATH=/usr/local/lib64/python3.4/site-packages/.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456479154-136027-9-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 112 +++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 6729f4d9df7c..1f608a6e2c14 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw,
 	return ret;
 }
 
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct perf_sample *sample)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int raw_size = sample->raw_size;
+	unsigned int nr_elements = raw_size / sizeof(u32);
+	unsigned int i;
+	int ret;
+
+	if (nr_elements * sizeof(u32) != raw_size)
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+			   raw_size, nr_elements * sizeof(u32) - raw_size);
+
+	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'raw_len' for bpf output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for raw_len\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+	if (ret) {
+		pr_err("failed to set payload to raw_len\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'raw_data' for bpf output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'raw_data'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u32 *)(sample->raw_data))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set raw_data[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
 static int add_generic_values(struct ctf_writer *cw,
 			      struct bt_ctf_event *event,
 			      struct perf_evsel *evsel,
@@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool,
 			return -1;
 	}
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_values(event_class, event, sample);
+		if (ret)
+			return -1;
+	}
+
 	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
 	if (cs) {
 		if (is_flush_needed(cs))
@@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw,
 	return ret;
 }
 
+static int add_bpf_output_types(struct ctf_writer *cw,
+				struct bt_ctf_event_class *class)
+{
+	struct bt_ctf_field_type *len_type = cw->data.u32;
+	struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+	struct bt_ctf_field_type *seq_type;
+	int ret;
+
+	ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+	if (ret)
+		return ret;
+
+	seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+	if (!seq_type)
+		return -1;
+
+	return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 			     struct bt_ctf_event_class *event_class)
 {
@@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 	 *                              ctf event header
 	 *   PERF_SAMPLE_READ         - TODO
 	 *   PERF_SAMPLE_CALLCHAIN    - TODO
-	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+	 *                              are handled separately
 	 *   PERF_SAMPLE_BRANCH_STACK - TODO
 	 *   PERF_SAMPLE_REGS_USER    - TODO
 	 *   PERF_SAMPLE_STACK_USER   - TODO
@@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
 			goto err;
 	}
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_types(cw, event_class);
+		if (ret)
+			goto err;
+	}
+
 	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
 	if (ret) {
 		pr("Failed to add event class into stream.\n");

From f8dd2d5ff953bc498d682ae8022439c940a7d5c4 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 26 Feb 2016 09:31:57 +0000
Subject: [PATCH 08/23] perf data: Explicitly set byte order for integer types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After babeltrace commit 5cec03e402aa ("ir: copy variants and sequences
when setting a field path"), 'perf data convert' gets incorrect result
if there's bpf output data. For example:

 # perf data convert --to-ctf ./out.ctf
 # babeltrace ./out.ctf
 [10:44:31.186045346] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E7DD1, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0xC028E32F, [1] = 0x815D0100, [2] = 0x1000000 ] }
 [10:44:31.286101003] (+0.100055657) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF8105B609, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0x35D9F1EB, [1] = 0x15D81, [2] = 0x2 ] }

The expected result of the first sample should be:

 raw_data = [ [0] = 0x2FE328C0, [1] = 0x15D81, [2] = 0x1 ] }

however, 'perf data convert' output big endian value to resuling CTF
file.

The reason is a internal change (or a bug?) of babeltrace.

Before this patch, at the first add_bpf_output_values(), byte order of
all integer type is uncertain (is 0, neither 1234 (le) nor 4321 (be)).
It would be fixed by:

perf_evlist__deliver_sample
 -> process_sample_event
   -> ctf_stream
      ...
      ->bt_ctf_trace_add_stream_class
        ->bt_ctf_field_type_structure_set_byte_order
          ->bt_ctf_field_type_integer_set_byte_order

during creating the stream.

However, the babeltrace commit mentioned above duplicates types in
sequence to prevent potential conflict in following call stack and link
the newly allocated type into the 'raw_data' sequence:

perf_evlist__deliver_sample
 -> process_sample_event
   -> ctf_stream
      ...
      -> bt_ctf_trace_add_stream_class
        -> bt_ctf_stream_class_resolve_types
           ...
           -> bt_ctf_field_type_sequence_copy
             ->bt_ctf_field_type_integer_copy

This happens before byte order setting, so only the newly allocated
type is initialized, the byte order of original type perf choose to
create the first raw_data is still uncertain.

Byte order in CTF output is not related to byte order in perf.data.
Setting it to anything other than BT_CTF_BYTE_ORDER_NATIVE solves this
problem (only BT_CTF_BYTE_ORDER_NATIVE needs to be fixed). To reduce
behavior changing, set byte order according to compiling options.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456479154-136027-10-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 1f608a6e2c14..811af89ce0bb 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1080,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
 	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
 		goto err;
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
 	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
 	    size, sign ? "un" : "", hex ? "hex" : "");
 	return type;

From d8871ea71281ed689dc3303d1b50eb00c5d06141 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 26 Feb 2016 09:32:06 +0000
Subject: [PATCH 09/23] perf record: Use WARN_ONCE to replace 'if' condition

Commits in a BPF patchkit will extract kernel and module synthesizing
code into a separated function and call it multiple times. This patch
replace 'if (err < 0)' using WARN_ONCE, makes sure the error message
show one time.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456479154-136027-19-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7d11162b6c41..9dec7e529832 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -33,6 +33,7 @@
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
 #include "util/bpf-loader.h"
+#include "asm/bug.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -615,17 +616,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 						 machine);
-	if (err < 0)
-		pr_err("Couldn't record kernel reference relocation symbol\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/kallsyms permission or run as root.\n");
+	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/kallsyms permission or run as root.\n");
 
 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
 					     machine);
-	if (err < 0)
-		pr_err("Couldn't record kernel module information.\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/modules permission or run as root.\n");
+	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/modules permission or run as root.\n");
 
 	if (perf_guest) {
 		machines__process_guests(&session->machines,

From c45c86eb70964615bd13b4c1e647bf9ee60c3db9 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 26 Feb 2016 09:32:07 +0000
Subject: [PATCH 10/23] perf record: Extract synthesize code to
 record__synthesize()

Create record__synthesize(). It can be used to create tracking events
for each perf.data after perf supporting splitting into multiple
outputs.

Signed-off-by: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456479154-136027-20-git-send-email-wangnan0@huawei.com
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 125 ++++++++++++++++++++----------------
 1 file changed, 70 insertions(+), 55 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9dec7e529832..cb583b49a175 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -486,6 +486,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 
 static void snapshot_sig_handler(int sig);
 
+static int record__synthesize(struct record *rec)
+{
+	struct perf_session *session = rec->session;
+	struct machine *machine = &session->machines.host;
+	struct perf_data_file *file = &rec->file;
+	struct record_opts *opts = &rec->opts;
+	struct perf_tool *tool = &rec->tool;
+	int fd = perf_data_file__fd(file);
+	int err = 0;
+
+	if (file->is_pipe) {
+		err = perf_event__synthesize_attrs(tool, session,
+						   process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			goto out;
+		}
+
+		if (have_tracepoints(&rec->evlist->entries)) {
+			/*
+			 * FIXME err <= 0 here actually means that
+			 * there were no tracepoints so its not really
+			 * an error, just that we don't need to
+			 * synthesize anything.  We really have to
+			 * return this more properly and also
+			 * propagate errors that now are calling die()
+			 */
+			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
+								  process_synthesized_event);
+			if (err <= 0) {
+				pr_err("Couldn't record tracing data.\n");
+				goto out;
+			}
+			rec->bytes_written += err;
+		}
+	}
+
+	if (rec->opts.full_auxtrace) {
+		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+					session, process_synthesized_event);
+		if (err)
+			goto out;
+	}
+
+	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+						 machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/kallsyms permission or run as root.\n");
+
+	err = perf_event__synthesize_modules(tool, process_synthesized_event,
+					     machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/modules permission or run as root.\n");
+
+	if (perf_guest) {
+		machines__process_guests(&session->machines,
+					 perf_event__synthesize_guest_os, tool);
+	}
+
+	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+					    process_synthesized_event, opts->sample_address,
+					    opts->proc_map_timeout);
+out:
+	return err;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 	int err;
@@ -580,61 +648,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	machine = &session->machines.host;
 
-	if (file->is_pipe) {
-		err = perf_event__synthesize_attrs(tool, session,
-						   process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize attrs.\n");
-			goto out_child;
-		}
-
-		if (have_tracepoints(&rec->evlist->entries)) {
-			/*
-			 * FIXME err <= 0 here actually means that
-			 * there were no tracepoints so its not really
-			 * an error, just that we don't need to
-			 * synthesize anything.  We really have to
-			 * return this more properly and also
-			 * propagate errors that now are calling die()
-			 */
-			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
-								  process_synthesized_event);
-			if (err <= 0) {
-				pr_err("Couldn't record tracing data.\n");
-				goto out_child;
-			}
-			rec->bytes_written += err;
-		}
-	}
-
-	if (rec->opts.full_auxtrace) {
-		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
-					session, process_synthesized_event);
-		if (err)
-			goto out_delete_session;
-	}
-
-	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine);
-	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
-			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-			   "Check /proc/kallsyms permission or run as root.\n");
-
-	err = perf_event__synthesize_modules(tool, process_synthesized_event,
-					     machine);
-	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
-			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-			   "Check /proc/modules permission or run as root.\n");
-
-	if (perf_guest) {
-		machines__process_guests(&session->machines,
-					 perf_event__synthesize_guest_os, tool);
-	}
-
-	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-					    process_synthesized_event, opts->sample_address,
-					    opts->proc_map_timeout);
-	if (err != 0)
+	err = record__synthesize(rec);
+	if (err < 0)
 		goto out_child;
 
 	if (rec->realtime_prio) {

From e1ab48ba63ee6b2494a67bb60bf99692ecdaca7c Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 26 Feb 2016 09:32:10 +0000
Subject: [PATCH 11/23] perf record: Introduce record__finish_output() to
 finish a perf.data

Move code for finalizing 'perf.data' to record__finish_output(). It will
be used by following commits to split output to multiple files.

Signed-off-by: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456479154-136027-23-git-send-email-wangnan0@huawei.com
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index cb583b49a175..46e2772f838e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -468,6 +468,29 @@ static void record__init_features(struct record *rec)
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
+static void
+record__finish_output(struct record *rec)
+{
+	struct perf_data_file *file = &rec->file;
+	int fd = perf_data_file__fd(file);
+
+	if (file->is_pipe)
+		return;
+
+	rec->session->header.data_size += rec->bytes_written;
+	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
+
+	if (!rec->no_buildid) {
+		process_buildids(rec);
+
+		if (rec->buildid_all)
+			dsos__hit_all(rec->session);
+	}
+	perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+	return;
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -785,18 +808,8 @@ out_child:
 	/* this will be recalculated during process_buildids() */
 	rec->samples = 0;
 
-	if (!err && !file->is_pipe) {
-		rec->session->header.data_size += rec->bytes_written;
-		file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-
-		if (!rec->no_buildid) {
-			process_buildids(rec);
-
-			if (rec->buildid_all)
-				dsos__hit_all(rec->session);
-		}
-		perf_session__write_header(rec->session, rec->evlist, fd, true);
-	}
+	if (!err)
+		record__finish_output(rec);
 
 	if (!err && !quiet) {
 		char samples[128];

From 95c365617aa37878592f2f1c6c64e1abb19f0d4a Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 26 Feb 2016 09:32:17 +0000
Subject: [PATCH 12/23] perf record: Ensure return non-zero rc when mmap fail

perf_evlist__mmap_ex() can fail without setting errno (for example, fail
in condition checking. In this case all syscall is success).

If this happen, record__open() incorrectly returns 0. Force setting rc
is a quick way to avoid this problem, or we have to follow all possible
code path in perf_evlist__mmap_ex() to make sure there's at least one
system call before returning an error.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456479154-136027-30-git-send-email-wangnan0@huawei.com
Signed-off-by: He Kuang <hekuang@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 46e2772f838e..515510ecc76a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -324,7 +324,10 @@ try_again:
 		} else {
 			pr_err("failed to mmap with %d (%s)\n", errno,
 				strerror_r(errno, msg, sizeof(msg)));
-			rc = -errno;
+			if (errno)
+				rc = -errno;
+			else
+				rc = -EINVAL;
 		}
 		goto out;
 	}

From 92a61f6412d3a09d6462252a522fa79c9290f405 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 29 Feb 2016 14:36:21 -0800
Subject: [PATCH 13/23] perf stat: Implement CSV metrics output

Now support CSV output for metrics. With the new output callbacks this
is relatively straight forward by creating new callbacks.

This allows to easily plot metrics from CSV files.

The new line callback needs to know the number of fields to skip them
correctly

Example output before:

  % perf stat -x, true
  0.200687,,task-clock,200687,100.00
  0,,context-switches,200687,100.00
  0,,cpu-migrations,200687,100.00
  40,,page-faults,200687,100.00
  730871,,cycles,203601,100.00
  551056,,stalled-cycles-frontend,203601,100.00
  <not supported>,,stalled-cycles-backend,0,100.00
  385523,,instructions,203601,100.00
  78028,,branches,203601,100.00
  3946,,branch-misses,203601,100.00

After:

  % perf stat -x, true
  .502457,,task-clock,502457,100.00,0.485,CPUs utilized
  0,,context-switches,502457,100.00,0.000,K/sec
  0,,cpu-migrations,502457,100.00,0.000,K/sec
  45,,page-faults,502457,100.00,0.090,M/sec
  644692,,cycles,509102,100.00,1.283,GHz
  423470,,stalled-cycles-frontend,509102,100.00,65.69,frontend cycles idle
  <not supported>,,stalled-cycles-backend,0,100.00,,,,
  492701,,instructions,509102,100.00,0.76,insn per cycle
  ,,,,,0.86,stalled cycles per insn
  97767,,branches,509102,100.00,194.578,M/sec
  4788,,branch-misses,509102,100.00,4.90,of all branches

or easier readable

  $ perf stat  -x, -o x.csv true
  $ column -s, -t x.csv
  0.490635        task-clock              490635 100.00 0.489   CPUs utilized
  0               context-switches        490635 100.00 0.000   K/sec
  0               cpu-migrations          490635 100.00 0.000   K/sec
  45              page-faults             490635 100.00 0.092   M/sec
  629080          cycles                  497698 100.00 1.282   GHz
  409498          stalled-cycles-frontend 497698 100.00 65.09   frontend cycles idle
  <not supported> stalled-cycles-backend  0      100.00
  491424          instructions            497698 100.00 0.78    insn per cycle
                                                        0.83    stalled cycles per insn
  97278           branches                497698 100.00 198.270 M/sec
  4569            branch-misses           497698 100.00 4.70    of all branches

Two new fields are added: metric value and metric name.

v2: Split out function argument changes
v3: Reenable metrics for real.
v4: Fix wrong hunk from refactoring.
v5: Remove extra "noise" printing (Jiri), but add it to the not counted case.
Print empty metrics for not counted.
v6: Avoid outputting metric on empty format.
v7: Print metric at the end
v8: Remove extra run, ena fields
v9: Avoid extra new line for unsupported counters

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/1456785386-19481-3-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c     | 73 +++++++++++++++++++++++++++++++++--
 tools/perf/util/stat-shadow.c |  2 +-
 2 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 24f222dd2a8a..2ffb8221917a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -739,6 +739,7 @@ struct outstate {
 	FILE *fh;
 	bool newline;
 	const char *prefix;
+	int  nfields;
 };
 
 #define METRIC_LEN  35
@@ -789,6 +790,43 @@ static void print_metric_std(void *ctx, const char *color, const char *fmt,
 	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
 }
 
+static void new_line_csv(void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+	for (i = 0; i < os->nfields; i++)
+		fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	while (isspace(*unit))
+		unit++;
+	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -860,6 +898,22 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 
 	nl = new_line_std;
 
+	if (csv_output) {
+		static int aggr_fields[] = {
+			[AGGR_GLOBAL] = 0,
+			[AGGR_THREAD] = 1,
+			[AGGR_NONE] = 1,
+			[AGGR_SOCKET] = 2,
+			[AGGR_CORE] = 2,
+		};
+
+		pm = print_metric_csv;
+		nl = new_line_csv;
+		os.nfields = 3;
+		os.nfields += aggr_fields[stat_config.aggr_mode];
+		if (counter->cgrp)
+			os.nfields++;
+	}
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
 		aggr_printout(counter, id, nr);
 
@@ -880,7 +934,12 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 			fprintf(stat_config.output, "%s%s",
 				csv_sep, counter->cgrp->name);
 
+		if (!csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		print_noise(counter, noise);
 		print_running(run, ena);
+		if (csv_output)
+			pm(&os, NULL, NULL, "", 0);
 		return;
 	}
 
@@ -893,14 +952,20 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	out.new_line = nl;
 	out.ctx = &os;
 
-	if (!csv_output)
-		perf_stat__print_shadow_stats(counter, uval,
+	if (csv_output) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+
+	perf_stat__print_shadow_stats(counter, uval,
 				stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
 				cpu_map__id_to_cpu(id),
 				&out);
 
-	print_noise(counter, noise);
-	print_running(run, ena);
+	if (!csv_output) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
 }
 
 static void print_aggr(char *prefix)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 4d8f18581b9b..367e220e93d5 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -310,8 +310,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
-		out->new_line(ctxp);
 		if (total && avg) {
+			out->new_line(ctxp);
 			ratio = total / avg;
 			print_metric(ctxp, NULL, "%7.2f ",
 					"stalled cycles per insn",

From 44d49a6002595ccb95712e86ad2857cd55207602 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 29 Feb 2016 14:36:22 -0800
Subject: [PATCH 14/23] perf stat: Support metrics in --per-core/socket mode

Enable metrics printing in --per-core / --per-socket mode. We need to
save the shadow metrics in a unique place. Always use the first CPU in
the aggregation. Then use the same CPU to retrieve the shadow value
later.

Example output:

  % perf stat --per-core -a ./BC1s

   Performance counter stats for 'system wide':

  S0-C0 2   2966.020381 task-clock (msec) #   2.004 CPUs utilized  (100.00%)
  S0-C0 2            49 context-switches  #   0.017 K/sec          (100.00%)
  S0-C0 2             4 cpu-migrations    #   0.001 K/sec          (100.00%)
  S0-C0 2           467 page-faults       #   0.157 K/sec
  S0-C0 2 4,599,061,773 cycles            #   1.551 GHz            (100.00%)
  S0-C0 2 9,755,886,883 instructions      #   2.12  insn per cycle (100.00%)
  S0-C0 2 1,906,272,125 branches          # 642.704 M/sec          (100.00%)
  S0-C0 2    81,180,867 branch-misses     #   4.26% of all branches
  S0-C1 2   2965.995373 task-clock (msec) #   2.003 CPUs utilized  (100.00%)
  S0-C1 2            62 context-switches  #   0.021 K/sec          (100.00%)
  S0-C1 2             8 cpu-migrations    #   0.003 K/sec          (100.00%)
  S0-C1 2           281 page-faults       #   0.095 K/sec
  S0-C1 2     6,347,290 cycles            #   0.002 GHz            (100.00%)
  S0-C1 2     4,654,156 instructions      #   0.73  insn per cycle (100.00%)
  S0-C1 2       947,121 branches          #   0.319 M/sec          (100.00%)
  S0-C1 2        37,322 branch-misses     #   3.94% of all branches

         1.480409747 seconds time elapsed

v2: Rebase to older patches
v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1456785386-19481-4-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c     | 64 ++++++++++++++++++++++++++++++-----
 tools/perf/util/stat-shadow.c |  7 ++++
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2ffb8221917a..9b5089c5dffe 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -740,6 +740,8 @@ struct outstate {
 	bool newline;
 	const char *prefix;
 	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
 };
 
 #define METRIC_LEN  35
@@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os)
 {
 	fputc('\n', os->fh);
 	fputs(os->prefix, os->fh);
+	aggr_printout(os->evsel, os->id, os->nr);
 	if (stat_config.aggr_mode == AGGR_NONE)
 		fprintf(os->fh, "        ");
-	if (stat_config.aggr_mode == AGGR_CORE)
-		fprintf(os->fh, "                  ");
-	if (stat_config.aggr_mode == AGGR_SOCKET)
-		fprintf(os->fh, "            ");
 	fprintf(os->fh, "                                                 ");
 }
 
@@ -798,6 +797,7 @@ static void new_line_csv(void *ctx)
 	fputc('\n', os->fh);
 	if (os->prefix)
 		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+	aggr_printout(os->evsel, os->id, os->nr);
 	for (i = 0; i < os->nfields; i++)
 		fputs(csv_sep, os->fh);
 }
@@ -855,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+	int i;
+
+	if (!aggr_get_id)
+		return 0;
+
+	if (stat_config.aggr_mode == AGGR_NONE)
+		return id;
+
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -891,7 +913,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	struct perf_stat_output_ctx out;
 	struct outstate os = {
 		.fh = stat_config.output,
-		.prefix = prefix ? prefix : ""
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
 	};
 	print_metric_t pm = print_metric_std;
 	void (*nl)(void *);
@@ -958,16 +983,37 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	}
 
 	perf_stat__print_shadow_stats(counter, uval,
-				stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
-				cpu_map__id_to_cpu(id),
+				first_shadow_cpu(counter, id),
 				&out);
-
 	if (!csv_output) {
 		print_noise(counter, noise);
 		print_running(run, ena);
 	}
 }
 
+static void aggr_update_shadow(void)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
+
+	for (s = 0; s < aggr_map->nr; s++) {
+		id = aggr_map->map[s];
+		evlist__for_each(evsel_list, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = aggr_get_id(evsel_list->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			val = val * counter->scale;
+			perf_stat__update_shadow_stats(counter, &val,
+						       first_shadow_cpu(counter, id));
+		}
+	}
+}
+
 static void print_aggr(char *prefix)
 {
 	FILE *output = stat_config.output;
@@ -979,6 +1025,8 @@ static void print_aggr(char *prefix)
 	if (!(aggr_map || aggr_get_id))
 		return;
 
+	aggr_update_shadow();
+
 	for (s = 0; s < aggr_map->nr; s++) {
 		id = aggr_map->map[s];
 		evlist__for_each(evsel_list, counter) {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 367e220e93d5..5e2d2e34e1bc 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -14,6 +14,13 @@ enum {
 
 #define NUM_CTX CTX_BIT_MAX
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];

From 676787939ef8ccfcf8039104f766ebe5ebe23924 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 1 Feb 2016 02:59:00 +0900
Subject: [PATCH 15/23] tools build: Use .s extension for preprocessed
 assembler code

The "man gcc" says .i extension represents the file is C source code
that should not be preprocessed.  Here, .s should be used.

For clarification,
  .c  ---(preprocess)--->  .i
  .S  ---(preprocess)--->  .s

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Aaro Koskinen <aaro.koskinen@nokia.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Lukas Wunner <lukas@wunner.de>
Link: http://lkml.kernel.org/r/1454263140-19670-1-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/Makefile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 4a96473b180f..ee566e8bd1cf 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 
-$(OUTPUT)%.i: %.S FORCE
+$(OUTPUT)%.s: %.S FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 

From 979ac257b00c53aacec3eacf86142e8c00bee889 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 1 Mar 2016 23:46:20 +0000
Subject: [PATCH 16/23] perf script: Fix double free on command_line

The 'command_line' variable is free'd twice if db_export__branch_types()
fails. To avoid this, defer the free'ing of 'command_line' to after this
call so that the error return path will just free 'command_line' once.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Javi Merino <javi.merino@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1456875980-25606-1-git-send-email-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/scripting-engines/trace-event-python.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 309d90fa7698..fbd05242b4e5 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1094,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv)
 		goto error;
 	}
 
-	free(command_line);
-
 	set_table_handlers(tables);
 
 	if (tables->db_export_mode) {
@@ -1104,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv)
 			goto error;
 	}
 
+	free(command_line);
+
 	return err;
 error:
 	Py_Finalize();

From 21a30100453516004905d4d5f0806ebaffa95131 Mon Sep 17 00:00:00 2001
From: "Chaos.Chen" <rainboy1215@gmail.com>
Date: Tue, 9 Feb 2016 15:40:14 -0500
Subject: [PATCH 17/23] tools lib traceevent: Fix time stamp rounding issue

When rounding to microseconds, if the timestamp subsecond is between
.999999500 and .999999999, it is rounded to .1000000, when it should
instead increment the second counter due to the overflow.

For example, if the timestamp is 1234.999999501 instead of seeing:

  1235.000000

we see:

  1234.1000000

Signed-off-by: Chaos.Chen <rainboy1215@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20160209204236.824426460@goodmis.org
[ fixed incrementing "secs" instead of decrementing it ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 9a1e48afcf89..ce59f4891fa2 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5429,6 +5429,11 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
 			p = 9;
 		} else {
 			usecs = (nsecs + 500) / NSECS_PER_USEC;
+			/* To avoid usecs larger than 1 sec */
+			if (usecs >= 1000000) {
+				usecs -= 1000000;
+				secs++;
+			}
 			p = 6;
 		}
 

From 9ec72eafee61f68cd57310a99db129ffb71302f3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 9 Feb 2016 15:40:16 -0500
Subject: [PATCH 18/23] tools lib traceevent: Set int_array fields to NULL if
 freeing from error

Had a bug where on error of parsing __print_array() where the fields are
freed after they were allocated, but since they were not set to NULL,
the freeing of the arg also tried to free the already freed fields
causing a double free.

Fix process_hex() while at it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20160209204237.188327674@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ce59f4891fa2..fb790aa757eb 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2635,6 +2635,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok)
 
 free_field:
 	free_arg(arg->hex.field);
+	arg->hex.field = NULL;
 out:
 	*tok = NULL;
 	return EVENT_ERROR;
@@ -2659,8 +2660,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
 
 free_size:
 	free_arg(arg->int_array.count);
+	arg->int_array.count = NULL;
 free_field:
 	free_arg(arg->int_array.field);
+	arg->int_array.field = NULL;
 out:
 	*tok = NULL;
 	return EVENT_ERROR;

From a66673a07e260807f570db8f08a9c1207932c665 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 9 Feb 2016 15:40:17 -0500
Subject: [PATCH 19/23] tools lib traceevent: Fix output of %llu for 64 bit
 values read on 32 bit machines

When a long value is read on 32 bit machines for 64 bit output, the
parsing needs to change "%lu" into "%llu", as the value is read
natively.

Unfortunately, if "%llu" is already there, the code will add another "l"
to it and fail to parse it properly.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20160209204237.337024613@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index fb790aa757eb..865dea55454b 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4978,7 +4978,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 						break;
 					}
 				}
-				if (pevent->long_size == 8 && ls &&
+				if (pevent->long_size == 8 && ls == 1 &&
 				    sizeof(long) != 8) {
 					char *p;
 

From 9b240637eb9b9677a9e9bc2dc568f5e0811e04d6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 2 Mar 2016 09:58:00 -0300
Subject: [PATCH 20/23] perf test: Fix hists related entries

That got broken by d3a72fd8187b ("perf report: Fix indentation of
dynamic entries in hierarchy"), by using the evlist in setup_sorting()
without checking if it is NULL, as done in some 'perf test' entries:

  $ find tools/ -name "*.c" | xargs grep 'setup_sorting(NULL);'
  tools/perf/tests/hists_output.c:      setup_sorting(NULL);
  tools/perf/tests/hists_output.c:      setup_sorting(NULL);
  tools/perf/tests/hists_output.c:      setup_sorting(NULL);
  tools/perf/tests/hists_output.c:      setup_sorting(NULL);
  tools/perf/tests/hists_output.c:      setup_sorting(NULL);
  tools/perf/tests/hists_cumulate.c:    setup_sorting(NULL);
  tools/perf/tests/hists_cumulate.c:    setup_sorting(NULL);
  tools/perf/tests/hists_cumulate.c:    setup_sorting(NULL);
  tools/perf/tests/hists_cumulate.c:    setup_sorting(NULL);
  $

Fix it.

Before:

  [root@jouet ~]# perf test
  <SNIP>
  15: Test matching and linking multiple hists                 : FAILED!
  16: Try 'import perf' in python, checking link problems      : Ok
  17: Test breakpoint overflow signal handler                  : Ok
  18: Test breakpoint overflow sampling                        : Ok
  19: Test number of exit event of a simple workload           : Ok
  20: Test software clock events have valid period values      : Ok
  21: Test object code reading                                 : Ok
  22: Test sample parsing                                      : Ok
  23: Test using a dummy software event to keep tracking       : Ok
  24: Test parsing with no sample_id_all bit set               : Ok
  25: Test filtering hist entries                              : FAILED!
  26: Test mmap thread lookup                                  : Ok
  27: Test thread mg sharing                                   : Ok
  28: Test output sorting of hist entries                      : FAILED!
  29: Test cumulation of child hist entries                    : FAILED!
  <SNIP>

After the patch the above failed tests complete successfully.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: d3a72fd8187b ("perf report: Fix indentation of dynamic entries in hierarchy")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5888bfe9a193..4380a2858802 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2635,25 +2635,14 @@ out:
 	return ret;
 }
 
-int setup_sorting(struct perf_evlist *evlist)
+static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist)
 {
-	int err;
-	struct hists *hists;
 	struct perf_evsel *evsel;
-	struct perf_hpp_fmt *fmt;
-
-	err = __setup_sorting(evlist);
-	if (err < 0)
-		return err;
-
-	if (parent_pattern != default_parent_pattern) {
-		err = sort_dimension__add("parent", evlist);
-		if (err < 0)
-			return err;
-	}
 
 	evlist__for_each(evlist, evsel) {
-		hists = evsel__hists(evsel);
+		struct perf_hpp_fmt *fmt;
+		struct hists *hists = evsel__hists(evsel);
+
 		hists->nr_sort_keys = perf_hpp_list.nr_sort_keys;
 
 		/*
@@ -2667,6 +2656,24 @@ int setup_sorting(struct perf_evlist *evlist)
 				hists->nr_sort_keys--;
 		}
 	}
+}
+
+int setup_sorting(struct perf_evlist *evlist)
+{
+	int err;
+
+	err = __setup_sorting(evlist);
+	if (err < 0)
+		return err;
+
+	if (parent_pattern != default_parent_pattern) {
+		err = sort_dimension__add("parent", evlist);
+		if (err < 0)
+			return err;
+	}
+
+	if (evlist != NULL)
+		evlist__set_hists_nr_sort_keys(evlist);
 
 	reset_dimensions();
 

From e17a0e16ca3a63d1bafbcba313586cf137418f45 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 2 Mar 2016 12:55:22 +0000
Subject: [PATCH 21/23] perf tests: Initialize sa.sa_flags

The sa_flags field is not being initialized, so a garbage value is being
passed to sigaction.  Initialize it to zero.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1456923322-29697-1-git-send-email-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/rdpmc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index 7bb0d13c235f..7945462851a4 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -103,6 +103,7 @@ static int __test__rdpmc(void)
 
 	sigfillset(&sa.sa_mask);
 	sa.sa_sigaction = segfault_handler;
+	sa.sa_flags = 0;
 	sigaction(SIGSEGV, &sa, NULL);
 
 	fd = sys_perf_event_open(&attr, 0, -1, -1,

From 1b69317d2dc80bc8e1d005e1a771c4f5bff3dabe Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 2 Mar 2016 13:50:25 +0000
Subject: [PATCH 22/23] tools/power turbostat: fix various build warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building with gcc 6 we're getting various build warnings that just
require some trivial function declaration and call fixes:

  turbostat.c: In function ‘dump_cstate_pstate_config_info’:
  turbostat.c:1973:1: warning: type of ‘family’ defaults to ‘int’
   dump_cstate_pstate_config_info(family, model)
  turbostat.c:1973:1: warning: type of ‘model’ defaults to ‘int’
  turbostat.c: In function ‘get_tdp’:
  turbostat.c:2145:8: warning: type of ‘model’ defaults to ‘int’
   double get_tdp(model)
  turbostat.c: In function ‘perf_limit_reasons_probe’:
  turbostat.c:2259:6: warning: type of ‘family’ defaults to ‘int’
   void perf_limit_reasons_probe(family, model)
  turbostat.c:2259:6: warning: type of ‘model’ defaults to ‘int’

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-wbicer8n0s9qe6ql8h9x478e@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/power/x86/turbostat/turbostat.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0dac7e05a6ac..3fa94e291d16 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1970,7 +1970,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
 }
 
 static void
-dump_cstate_pstate_config_info(family, model)
+dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 {
 	if (!do_nhm_platform_info)
 		return;
@@ -2142,7 +2142,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 
-double get_tdp(model)
+double get_tdp(unsigned int model)
 {
 	unsigned long long msr;
 
@@ -2256,7 +2256,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 	return;
 }
 
-void perf_limit_reasons_probe(family, model)
+void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
 		return;
@@ -2792,7 +2792,7 @@ void process_cpuid()
 	perf_limit_reasons_probe(family, model);
 
 	if (debug)
-		dump_cstate_pstate_config_info();
+		dump_cstate_pstate_config_info(family, model);
 
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();

From fb4605ba47e772ff9d62d1d54218a832ec8b3e1d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 1 Mar 2016 10:57:52 -0800
Subject: [PATCH 23/23] perf stat: Check for frontend stalled for metrics

Add an extra check for frontend stalled in the metrics.  This avoids an
extra column for the --metric-only case when the CPU does not support
frontend stalled.

v2: Add separate init function

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1456858672-21594-8-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c     | 1 +
 tools/perf/util/stat-shadow.c | 9 ++++++++-
 tools/perf/util/stat.h        | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9b5089c5dffe..baa82078c148 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1966,6 +1966,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
 					(const char **) stat_usage,
 					PARSE_OPT_STOP_AT_NON_OPTION);
+	perf_stat__init_shadow_stats();
 
 	if (csv_sep) {
 		csv_output = true;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 5e2d2e34e1bc..b33ffb2af2cf 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -2,6 +2,7 @@
 #include "evsel.h"
 #include "stat.h"
 #include "color.h"
+#include "pmu.h"
 
 enum {
 	CTX_BIT_USER	= 1 << 0,
@@ -35,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static bool have_frontend_stalled;
 
 struct stats walltime_nsecs_stats;
 
+void perf_stat__init_shadow_stats(void)
+{
+	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+}
+
 static int evsel_context(struct perf_evsel *evsel)
 {
 	int ctx = 0;
@@ -323,7 +330,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			print_metric(ctxp, NULL, "%7.2f ",
 					"stalled cycles per insn",
 					ratio);
-		} else {
+		} else if (have_frontend_stalled) {
 			print_metric(ctxp, NULL, NULL,
 				     "stalled cycles per insn", 0);
 		}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index f02af68adc04..0150e786ccc7 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -72,6 +72,7 @@ typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
 			       const char *fmt, double val);
 typedef void (*new_line_t )(void *ctx);
 
+void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 				    int cpu);