From e8fcaa5c54e3b0371230e5d43a6f650c667da9c5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 7 Aug 2013 22:28:01 +0200
Subject: [PATCH 01/46] nohz: Convert a few places to use local per cpu
 accesses

A few functions use remote per CPU access APIs when they
deal with local values.

Just do the right conversion to improve performance, code
readability and debug checks.

While at it, lets extend some of these function names with *_this_cpu()
suffix in order to display their purpose more clearly.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tick.h         |  6 +++---
 kernel/softirq.c             |  4 +---
 kernel/time/tick-broadcast.c |  6 +++---
 kernel/time/tick-internal.h  |  4 ++--
 kernel/time/tick-sched.c     | 39 +++++++++++++++---------------------
 5 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 5128d33bbb39..a004f66a6cf0 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -104,7 +104,7 @@ extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 extern void tick_clock_notify(void);
 extern int tick_check_oneshot_change(int allow_nohz);
 extern struct tick_sched *tick_get_tick_sched(int cpu);
-extern void tick_check_idle(int cpu);
+extern void tick_check_idle(void);
 extern int tick_oneshot_mode_active(void);
 #  ifndef arch_needs_cpu
 #   define arch_needs_cpu(cpu) (0)
@@ -112,7 +112,7 @@ extern int tick_oneshot_mode_active(void);
 # else
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
-static inline void tick_check_idle(int cpu) { }
+static inline void tick_check_idle(void) { }
 static inline int tick_oneshot_mode_active(void) { return 0; }
 # endif
 
@@ -121,7 +121,7 @@ static inline void tick_init(void) { }
 static inline void tick_cancel_sched_timer(int cpu) { }
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
-static inline void tick_check_idle(int cpu) { }
+static inline void tick_check_idle(void) { }
 static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 11025ccc06dd..11348de09400 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -311,8 +311,6 @@ asmlinkage void do_softirq(void)
  */
 void irq_enter(void)
 {
-	int cpu = smp_processor_id();
-
 	rcu_irq_enter();
 	if (is_idle_task(current) && !in_interrupt()) {
 		/*
@@ -320,7 +318,7 @@ void irq_enter(void)
 		 * here, as softirq will be serviced on return from interrupt.
 		 */
 		local_bh_disable();
-		tick_check_idle(cpu);
+		tick_check_idle();
 		_local_bh_enable();
 	}
 
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 9532690daaa9..43780ab5e279 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -538,10 +538,10 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  * Called from irq_enter() when idle was interrupted to reenable the
  * per cpu device.
  */
-void tick_check_oneshot_broadcast(int cpu)
+void tick_check_oneshot_broadcast_this_cpu(void)
 {
-	if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
-		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
+	if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
+		struct tick_device *td = &__get_cpu_var(tick_cpu_device);
 
 		/*
 		 * We might be in the middle of switching over from
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 18e71f7fbc2a..e2bced59b6dd 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -51,7 +51,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
 extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
-extern void tick_check_oneshot_broadcast(int cpu);
+extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 # else /* BROADCAST */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
@@ -62,7 +62,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline void tick_check_oneshot_broadcast(int cpu) { }
+static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return true; }
 # endif /* !BROADCAST */
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3612fc77f834..2afd43fca93b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -391,11 +391,9 @@ __setup("nohz=", setup_tick_nohz);
  */
 static void tick_nohz_update_jiffies(ktime_t now)
 {
-	int cpu = smp_processor_id();
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	unsigned long flags;
 
-	ts->idle_waketime = now;
+	__this_cpu_write(tick_cpu_sched.idle_waketime, now);
 
 	local_irq_save(flags);
 	tick_do_update_jiffies64(now);
@@ -426,17 +424,15 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
 
 }
 
-static void tick_nohz_stop_idle(int cpu, ktime_t now)
+static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 {
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-
-	update_ts_time_stats(cpu, ts, now, NULL);
+	update_ts_time_stats(smp_processor_id(), ts, now, NULL);
 	ts->idle_active = 0;
 
 	sched_clock_idle_wakeup_event(0);
 }
 
-static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
+static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
 {
 	ktime_t now = ktime_get();
 
@@ -752,7 +748,7 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
 	ktime_t now, expires;
 	int cpu = smp_processor_id();
 
-	now = tick_nohz_start_idle(cpu, ts);
+	now = tick_nohz_start_idle(ts);
 
 	if (can_stop_idle_tick(cpu, ts)) {
 		int was_stopped = ts->tick_stopped;
@@ -914,8 +910,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
  */
 void tick_nohz_idle_exit(void)
 {
-	int cpu = smp_processor_id();
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 	ktime_t now;
 
 	local_irq_disable();
@@ -928,7 +923,7 @@ void tick_nohz_idle_exit(void)
 		now = ktime_get();
 
 	if (ts->idle_active)
-		tick_nohz_stop_idle(cpu, now);
+		tick_nohz_stop_idle(ts, now);
 
 	if (ts->tick_stopped) {
 		tick_nohz_restart_sched_tick(ts, now);
@@ -1012,12 +1007,10 @@ static void tick_nohz_switch_to_nohz(void)
  * timer and do not touch the other magic bits which need to be done
  * when idle is left.
  */
-static void tick_nohz_kick_tick(int cpu, ktime_t now)
+static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)
 {
 #if 0
 	/* Switch back to 2.6.27 behaviour */
-
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	ktime_t delta;
 
 	/*
@@ -1032,36 +1025,36 @@ static void tick_nohz_kick_tick(int cpu, ktime_t now)
 #endif
 }
 
-static inline void tick_check_nohz(int cpu)
+static inline void tick_check_nohz_this_cpu(void)
 {
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 	ktime_t now;
 
 	if (!ts->idle_active && !ts->tick_stopped)
 		return;
 	now = ktime_get();
 	if (ts->idle_active)
-		tick_nohz_stop_idle(cpu, now);
+		tick_nohz_stop_idle(ts, now);
 	if (ts->tick_stopped) {
 		tick_nohz_update_jiffies(now);
-		tick_nohz_kick_tick(cpu, now);
+		tick_nohz_kick_tick(ts, now);
 	}
 }
 
 #else
 
 static inline void tick_nohz_switch_to_nohz(void) { }
-static inline void tick_check_nohz(int cpu) { }
+static inline void tick_check_nohz_this_cpu(void) { }
 
 #endif /* CONFIG_NO_HZ_COMMON */
 
 /*
  * Called from irq_enter to notify about the possible interruption of idle()
  */
-void tick_check_idle(int cpu)
+void tick_check_idle(void)
 {
-	tick_check_oneshot_broadcast(cpu);
-	tick_check_nohz(cpu);
+	tick_check_oneshot_broadcast_this_cpu();
+	tick_check_nohz_this_cpu();
 }
 
 /*

From 99c8b1ea0972be82ce1842d830e0173e70907065 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 24 Oct 2013 10:07:47 -0400
Subject: [PATCH 02/46] trivial: fix spelling in CONTEXT_TRACKING_FORCE help
 text

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 init/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 79383d3aa5dc..12d61f82e5f7 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -532,7 +532,7 @@ config CONTEXT_TRACKING_FORCE
 	  dynticks subsystem by forcing the context tracking on all
 	  CPUs in the system.
 
-	  Say Y only if you're working on the developpement of an
+	  Say Y only if you're working on the development of an
 	  architecture backend for the context tracking.
 
 	  Say N otherwise, this option brings an overhead that you

From 58135f574f1b791c926622387780ed3d090116d6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 6 Nov 2013 14:45:57 +0100
Subject: [PATCH 03/46] context_tracking: Wrap static key check into more
 intuitive function name

Use a function with a meaningful name to check the global context
tracking state. static_key_false() is a bit confusing for reviewers.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/context_tracking.h       | 10 +++++-----
 include/linux/context_tracking_state.h |  4 ++++
 include/linux/tick.h                   |  2 +-
 include/linux/vtime.h                  |  2 +-
 kernel/context_tracking.c              |  8 ++++----
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 158158704c30..37b81bd51ec0 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -17,13 +17,13 @@ extern void __context_tracking_task_switch(struct task_struct *prev,
 
 static inline void user_enter(void)
 {
-	if (static_key_false(&context_tracking_enabled))
+	if (context_tracking_is_enabled())
 		context_tracking_user_enter();
 
 }
 static inline void user_exit(void)
 {
-	if (static_key_false(&context_tracking_enabled))
+	if (context_tracking_is_enabled())
 		context_tracking_user_exit();
 }
 
@@ -31,7 +31,7 @@ static inline enum ctx_state exception_enter(void)
 {
 	enum ctx_state prev_ctx;
 
-	if (!static_key_false(&context_tracking_enabled))
+	if (!context_tracking_is_enabled())
 		return 0;
 
 	prev_ctx = this_cpu_read(context_tracking.state);
@@ -42,7 +42,7 @@ static inline enum ctx_state exception_enter(void)
 
 static inline void exception_exit(enum ctx_state prev_ctx)
 {
-	if (static_key_false(&context_tracking_enabled)) {
+	if (context_tracking_is_enabled()) {
 		if (prev_ctx == IN_USER)
 			context_tracking_user_enter();
 	}
@@ -51,7 +51,7 @@ static inline void exception_exit(enum ctx_state prev_ctx)
 static inline void context_tracking_task_switch(struct task_struct *prev,
 						struct task_struct *next)
 {
-	if (static_key_false(&context_tracking_enabled))
+	if (context_tracking_is_enabled())
 		__context_tracking_task_switch(prev, next);
 }
 #else
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 0f1979d0674f..0db535b79be7 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -22,6 +22,10 @@ struct context_tracking {
 extern struct static_key context_tracking_enabled;
 DECLARE_PER_CPU(struct context_tracking, context_tracking);
 
+static inline bool context_tracking_is_enabled(void)
+{
+	return static_key_false(&context_tracking_enabled);
+}
 static inline bool context_tracking_in_user(void)
 {
 	return __this_cpu_read(context_tracking.state) == IN_USER;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index a004f66a6cf0..0175d8663b6c 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -165,7 +165,7 @@ extern cpumask_var_t tick_nohz_full_mask;
 
 static inline bool tick_nohz_full_enabled(void)
 {
-	if (!static_key_false(&context_tracking_enabled))
+	if (!context_tracking_is_enabled())
 		return false;
 
 	return tick_nohz_full_running;
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index f5b72b364bda..807c732cbf29 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -19,7 +19,7 @@ static inline bool vtime_accounting_enabled(void) { return true; }
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 static inline bool vtime_accounting_enabled(void)
 {
-	if (static_key_false(&context_tracking_enabled)) {
+	if (context_tracking_is_enabled()) {
 		if (context_tracking_active())
 			return true;
 	}
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e5f3917aa05b..6cb20d2e7ee0 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -53,10 +53,10 @@ void context_tracking_user_enter(void)
 	/*
 	 * Repeat the user_enter() check here because some archs may be calling
 	 * this from asm and if no CPU needs context tracking, they shouldn't
-	 * go further. Repeat the check here until they support the static key
-	 * check.
+	 * go further. Repeat the check here until they support the inline static
+	 * key check.
 	 */
-	if (!static_key_false(&context_tracking_enabled))
+	if (!context_tracking_is_enabled())
 		return;
 
 	/*
@@ -160,7 +160,7 @@ void context_tracking_user_exit(void)
 {
 	unsigned long flags;
 
-	if (!static_key_false(&context_tracking_enabled))
+	if (!context_tracking_is_enabled())
 		return;
 
 	if (in_interrupt())

From d0df09ebfc126b23c1005f98ddecc9907f9c5d25 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 6 Nov 2013 15:11:57 +0100
Subject: [PATCH 04/46] context_tracking: Rename context_tracking_active() to
 context_tracking_cpu_is_enabled()

We currently have a confusing couple of API naming with the existing
context_tracking_active() and context_tracking_is_enabled().

Lets keep the latter one, context_tracking_is_enabled(), for global
context tracking state check and use context_tracking_cpu_is_enabled()
for local state check.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/context_tracking_state.h | 11 ++++++-----
 include/linux/vtime.h                  |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 0db535b79be7..97a81225d037 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -26,15 +26,16 @@ static inline bool context_tracking_is_enabled(void)
 {
 	return static_key_false(&context_tracking_enabled);
 }
+
+static inline bool context_tracking_cpu_is_enabled(void)
+{
+	return __this_cpu_read(context_tracking.active);
+}
+
 static inline bool context_tracking_in_user(void)
 {
 	return __this_cpu_read(context_tracking.state) == IN_USER;
 }
-
-static inline bool context_tracking_active(void)
-{
-	return __this_cpu_read(context_tracking.active);
-}
 #else
 static inline bool context_tracking_in_user(void) { return false; }
 static inline bool context_tracking_active(void) { return false; }
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 807c732cbf29..c5165fd256f9 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -20,7 +20,7 @@ static inline bool vtime_accounting_enabled(void) { return true; }
 static inline bool vtime_accounting_enabled(void)
 {
 	if (context_tracking_is_enabled()) {
-		if (context_tracking_active())
+		if (context_tracking_cpu_is_enabled())
 			return true;
 	}
 

From d4283c654130c3d01b6842d3821dbdc3c15ceb46 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 6 Nov 2013 15:42:04 +0100
Subject: [PATCH 05/46] posix-timers: Spare workqueue if there is no full
 dynticks CPU to kick

After a posix cpu timer is set, a workqueue is scheduled in order to
kick the full dynticks CPUs and let them restart their tick if
necessary in case the task they are running is concerned by the
new timer.

This kick is implemented by way of IPIs, which require interrupts
to be enabled, hence the need for a workqueue to raise them because
the posix cpu timer set path has interrupts disabled.

Now if there is no full dynticks CPU on the system, the workqueue is
still scheduled but it simply won't send any IPI and return immediately.

So lets spare that worqueue when it is not needed.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/posix-cpu-timers.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c7f31aa272f7..35509c5a3ffb 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -608,7 +608,8 @@ static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
  */
 static void posix_cpu_timer_kick_nohz(void)
 {
-	schedule_work(&nohz_kick_work);
+	if (context_tracking_is_enabled())
+		schedule_work(&nohz_kick_work);
 }
 
 bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)

From c925077c33fc9a546e7cf6c3be2adf4a2afe2608 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 6 Nov 2013 17:18:30 +0100
Subject: [PATCH 06/46] posix-timers: Fix full dynticks CPUs kick on timer
 rescheduling

A posix CPU timer can be rearmed while it is firing or after it is
notified with a signal. This can happen for example with timers that
were set with a non zero interval in timer_settime().

This rearming can happen in two places:

1) On timer firing time, which happens on the target's tick. If the timer
can't trigger a signal because it is ignored, it reschedules itself
to honour the timer interval.

2) On signal handling from the timer's notification target. This one
can be a different task than the timer's target itself. Once the
signal is notified, the notification target rearms the timer, again
to honour the timer interval.

When a timer is rearmed, we need to notify the full dynticks CPUs
such that they restart their tick in case they are running tasks that
may have a share in elapsing this timer.

Now the 1st case above handles full dynticks CPUs with a call to
posix_cpu_timer_kick_nohz() from the posix cpu timer firing code. But
the second case ignores the fact that some CPUs may run non-idle tasks
with their tick off. As a result, when a timer is resheduled after its signal
notification, the full dynticks CPUs may completely ignore it and not
tick on the timer as expected

This patch fixes this bug by handling both cases in one. All we need
is to move the kick to the rearming common code in posix_cpu_timer_schedule().

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Olivier Langlois <olivier@olivierlanglois.net>
---
 kernel/posix-cpu-timers.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 35509c5a3ffb..79747b7d9420 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1091,7 +1091,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 			put_task_struct(p);
 			timer->it.cpu.task = p = NULL;
 			timer->it.cpu.expires = 0;
-			goto out_unlock;
+			read_unlock(&tasklist_lock);
+			goto out;
 		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
 			/*
 			 * We've noticed that the thread is dead, but
@@ -1100,7 +1101,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 			 */
 			cpu_timer_sample_group(timer->it_clock, p, &now);
 			clear_dead_task(timer, now);
-			goto out_unlock;
+			read_unlock(&tasklist_lock);
+			goto out;
 		}
 		spin_lock(&p->sighand->siglock);
 		cpu_timer_sample_group(timer->it_clock, p, &now);
@@ -1114,10 +1116,11 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	BUG_ON(!irqs_disabled());
 	arm_timer(timer);
 	spin_unlock(&p->sighand->siglock);
-
-out_unlock:
 	read_unlock(&tasklist_lock);
 
+	/* Kick full dynticks CPUs in case they need to tick on the new timer */
+	posix_cpu_timer_kick_nohz();
+
 out:
 	timer->it_overrun_last = timer->it_overrun;
 	timer->it_overrun = -1;
@@ -1257,13 +1260,6 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 			cpu_timer_fire(timer);
 		spin_unlock(&timer->it_lock);
 	}
-
-	/*
-	 * In case some timers were rescheduled after the queue got emptied,
-	 * wake up full dynticks CPUs.
-	 */
-	if (tsk->signal->cputimer.running)
-		posix_cpu_timer_kick_nohz();
 }
 
 /*

From 724a371396e8162cc883a40cd8e525dfc7e5f3ff Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 10 Oct 2013 16:55:57 +0200
Subject: [PATCH 07/46] posix-timers: Remove dead thread posix cpu timers
 caching

When a task is exiting or has exited, its posix cpu timers
don't tick anymore and won't elapse further. It's too late
for them to expire.

So any further call to timer_gettime() on these timers will
return the same remaining expiry time.

The current code optimize this by caching the remaining delta
and storing it where we use to save the absolute expiration time.
This way, the future calls to timer_gettime() won't need to
compute the difference between the absolute expiration time and
the current time anymore.

Now this optimization doesn't seem to bring much value. Computing
the timer remaining delta is not very costly. Fetching the timer
value OTOH can be costly in two ways:

* CPUCLOCK_SCHED read requires to lock the target's rq. But some
optimizations are on the way to make task_sched_runtime() not holding
the rq lock of a non-running target.

* CPUCLOCK_VIRT/CPUCLOCK_PROF read simply consist in fetching
current->utime/current->stime except when the system uses full
dynticks cputime accounting. The latter requires a per task lock
in order to correctly compute user and system time. But once the
target is dead, this lock shouldn't be contended anyway.

All in one this caching doesn't seem to be justified.
Given that it complicates the code significantly for
few wins, let's remove it on single thread timers.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 79747b7d9420..3b7df8653913 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -788,7 +788,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 {
 	unsigned long long now;
 	struct task_struct *p = timer->it.cpu.task;
-	int clear_dead;
 
 	/*
 	 * Easy part: convert the reload time.
@@ -802,6 +801,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 	}
 
 	if (unlikely(p == NULL)) {
+		WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock));
 		/*
 		 * This task already died and the timer will never fire.
 		 * In this case, expires is actually the dead value.
@@ -817,7 +817,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 	 */
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 		cpu_clock_sample(timer->it_clock, p, &now);
-		clear_dead = p->exit_state;
 	} else {
 		read_lock(&tasklist_lock);
 		if (unlikely(p->sighand == NULL)) {
@@ -833,22 +832,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 			goto dead;
 		} else {
 			cpu_timer_sample_group(timer->it_clock, p, &now);
-			clear_dead = (unlikely(p->exit_state) &&
-				      thread_group_empty(p));
+			if (unlikely(p->exit_state) && thread_group_empty(p)) {
+				read_unlock(&tasklist_lock);
+				/*
+				 * We've noticed that the thread is dead, but
+				 * not yet reaped.  Take this opportunity to
+				 * drop our task ref.
+				 */
+				clear_dead_task(timer, now);
+				goto dead;
+			}
 		}
 		read_unlock(&tasklist_lock);
 	}
 
-	if (unlikely(clear_dead)) {
-		/*
-		 * We've noticed that the thread is dead, but
-		 * not yet reaped.  Take this opportunity to
-		 * drop our task ref.
-		 */
-		clear_dead_task(timer, now);
-		goto dead;
-	}
-
 	if (now < timer->it.cpu.expires) {
 		sample_to_timespec(timer->it_clock,
 				   timer->it.cpu.expires - now,
@@ -1063,11 +1060,13 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	struct task_struct *p = timer->it.cpu.task;
 	unsigned long long now;
 
-	if (unlikely(p == NULL))
+	if (unlikely(p == NULL)) {
+		WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock));
 		/*
 		 * The task was cleaned up already, no future firings.
 		 */
 		goto out;
+	}
 
 	/*
 	 * Fetch the current sample and update the timer's expiry time.
@@ -1075,10 +1074,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 		cpu_clock_sample(timer->it_clock, p, &now);
 		bump_cpu_timer(timer, now);
-		if (unlikely(p->exit_state)) {
-			clear_dead_task(timer, now);
+		if (unlikely(p->exit_state))
 			goto out;
-		}
+
 		read_lock(&tasklist_lock); /* arm_timer needs it.  */
 		spin_lock(&p->sighand->siglock);
 	} else {

From d430b9173a9a50a83e10d1c70baead3e625b522f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 10 Oct 2013 16:55:57 +0200
Subject: [PATCH 08/46] posix-timers: Remove dead process posix cpu timers
 caching

Now that we removed dead thread posix cpu timers caching,
lets remove the dead process wide version. This caching
is similar to the per thread version but it should be even
more rare:

* If the process id dead, we are not reading its timers
status from a thread belonging to its group since they
are all dead. So this caching only concern remote process
timers reads. Now posix cpu timers using itimers or timer_settime()
can't do remote process timers anyway so it's not even clear if there
is actually a user for this caching.

* Unlike per thread timers caching, this only applies to
zombies targets. Buried targets' process wide timers return
0 values. But then again, timer_gettime() can't read remote
process timers, so if the process is dead, there can't be
any reader left anyway.

Then again this caching seem to complicate the code for
corner cases that are probably not worth it. So lets get
rid of it.

Also remove the sample snapshot on dying process timer
that is now useless, as suggested by Kosaki.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 35 +----------------------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 3b7df8653913..c5d1ef530268 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -453,23 +453,6 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 
-static void clear_dead_task(struct k_itimer *itimer, unsigned long long now)
-{
-	struct cpu_timer_list *timer = &itimer->it.cpu;
-
-	/*
-	 * That's all for this thread or process.
-	 * We leave our residual in expires to be reported.
-	 */
-	put_task_struct(timer->task);
-	timer->task = NULL;
-	if (timer->expires < now) {
-		timer->expires = 0;
-	} else {
-		timer->expires -= now;
-	}
-}
-
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 {
 	return expires == 0 || expires > new_exp;
@@ -832,16 +815,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 			goto dead;
 		} else {
 			cpu_timer_sample_group(timer->it_clock, p, &now);
-			if (unlikely(p->exit_state) && thread_group_empty(p)) {
-				read_unlock(&tasklist_lock);
-				/*
-				 * We've noticed that the thread is dead, but
-				 * not yet reaped.  Take this opportunity to
-				 * drop our task ref.
-				 */
-				clear_dead_task(timer, now);
-				goto dead;
-			}
 		}
 		read_unlock(&tasklist_lock);
 	}
@@ -1092,14 +1065,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 			read_unlock(&tasklist_lock);
 			goto out;
 		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
-			/*
-			 * We've noticed that the thread is dead, but
-			 * not yet reaped.  Take this opportunity to
-			 * drop our task ref.
-			 */
-			cpu_timer_sample_group(timer->it_clock, p, &now);
-			clear_dead_task(timer, now);
 			read_unlock(&tasklist_lock);
+			/* Optimizations: if the process is dying, no need to rearm */
 			goto out;
 		}
 		spin_lock(&p->sighand->siglock);

From e26d70d271ee1a68a925796b411cb0239394c7a1 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 00:27:19 +0200
Subject: [PATCH 09/46] posix-timers: Cleanup reaped target handling

When a timer's target is seen to be buried, for example on calls
to timer_gettime(), the posix cpu timers code behaves a bit
like a garbage collector and releases early the reference to the
task.

Then again, this optimization complicates the code for no much
value: it's up to the user to release the timer and its associated
ressources by calling timer_delete() after it buries the target
tasks.

Remove this to simplify the code.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c5d1ef530268..dc4355b967db 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -639,8 +639,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	 */
 	if (unlikely(p->sighand == NULL)) {
 		read_unlock(&tasklist_lock);
-		put_task_struct(p);
-		timer->it.cpu.task = NULL;
 		return -ESRCH;
 	}
 
@@ -808,8 +806,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 			 * We can't even collect a sample any more.
 			 * Call the timer disarmed, nothing else to do.
 			 */
-			put_task_struct(p);
-			timer->it.cpu.task = NULL;
 			timer->it.cpu.expires = 0;
 			read_unlock(&tasklist_lock);
 			goto dead;
@@ -1059,8 +1055,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 			 * The process has been reaped.
 			 * We can't even collect a sample any more.
 			 */
-			put_task_struct(p);
-			timer->it.cpu.task = p = NULL;
 			timer->it.cpu.expires = 0;
 			read_unlock(&tasklist_lock);
 			goto out;

From a3222f88fa4f2ebec4632aef527dd2c9a41b997d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 00:37:39 +0200
Subject: [PATCH 10/46] posix-timers: Remove dead task special case

Now that we've removed all the optimizations that could
result in NULL timer's targets, we can remove all the
associated special case handling.

Also add some warnings on NULL targets to spot any possible
leftover.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 70 ++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 45 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index dc4355b967db..ab9911b54faf 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -374,27 +374,27 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
 	struct task_struct *p = timer->it.cpu.task;
 	int ret = 0;
 
-	if (likely(p != NULL)) {
-		read_lock(&tasklist_lock);
-		if (unlikely(p->sighand == NULL)) {
-			/*
-			 * We raced with the reaping of the task.
-			 * The deletion should have cleared us off the list.
-			 */
-			BUG_ON(!list_empty(&timer->it.cpu.entry));
-		} else {
-			spin_lock(&p->sighand->siglock);
-			if (timer->it.cpu.firing)
-				ret = TIMER_RETRY;
-			else
-				list_del(&timer->it.cpu.entry);
-			spin_unlock(&p->sighand->siglock);
-		}
-		read_unlock(&tasklist_lock);
+	WARN_ON_ONCE(p == NULL);
 
-		if (!ret)
-			put_task_struct(p);
+	read_lock(&tasklist_lock);
+	if (unlikely(p->sighand == NULL)) {
+		/*
+		 * We raced with the reaping of the task.
+		 * The deletion should have cleared us off the list.
+		 */
+		BUG_ON(!list_empty(&timer->it.cpu.entry));
+	} else {
+		spin_lock(&p->sighand->siglock);
+		if (timer->it.cpu.firing)
+			ret = TIMER_RETRY;
+		else
+			list_del(&timer->it.cpu.entry);
+		spin_unlock(&p->sighand->siglock);
 	}
+	read_unlock(&tasklist_lock);
+
+	if (!ret)
+		put_task_struct(p);
 
 	return ret;
 }
@@ -622,12 +622,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	unsigned long long old_expires, new_expires, old_incr, val;
 	int ret;
 
-	if (unlikely(p == NULL)) {
-		/*
-		 * Timer refers to a dead task's clock.
-		 */
-		return -ESRCH;
-	}
+	WARN_ON_ONCE(p == NULL);
 
 	new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
 
@@ -770,6 +765,8 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 	unsigned long long now;
 	struct task_struct *p = timer->it.cpu.task;
 
+	WARN_ON_ONCE(p == NULL);
+
 	/*
 	 * Easy part: convert the reload time.
 	 */
@@ -781,18 +778,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 		return;
 	}
 
-	if (unlikely(p == NULL)) {
-		WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock));
-		/*
-		 * This task already died and the timer will never fire.
-		 * In this case, expires is actually the dead value.
-		 */
-	dead:
-		sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
-				   &itp->it_value);
-		return;
-	}
-
 	/*
 	 * Sample the clock to take the difference with the expiry time.
 	 */
@@ -807,8 +792,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 			 * Call the timer disarmed, nothing else to do.
 			 */
 			timer->it.cpu.expires = 0;
+			sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
+					   &itp->it_value);
 			read_unlock(&tasklist_lock);
-			goto dead;
 		} else {
 			cpu_timer_sample_group(timer->it_clock, p, &now);
 		}
@@ -1029,13 +1015,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	struct task_struct *p = timer->it.cpu.task;
 	unsigned long long now;
 
-	if (unlikely(p == NULL)) {
-		WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock));
-		/*
-		 * The task was cleaned up already, no future firings.
-		 */
-		goto out;
-	}
+	WARN_ON_ONCE(p == NULL);
 
 	/*
 	 * Fetch the current sample and update the timer's expiry time.

From af82eb3c3068877a6b1989796a06b846b1e9e1c3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 16:11:43 +0200
Subject: [PATCH 11/46] posix-timers: Remove useless clock sample on timers
 cleanup

a0b2062b0904ef07944c4a6e4d0f88ee44f1e9f2
("posix_timers: fix racy timer delta caching on task exit") forgot
to remove the arguments used for timer caching.

Fix this leftover.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index ab9911b54faf..e6389f915bcb 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -399,8 +399,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
 	return ret;
 }
 
-static void cleanup_timers_list(struct list_head *head,
-				unsigned long long curr)
+static void cleanup_timers_list(struct list_head *head)
 {
 	struct cpu_timer_list *timer, *next;
 
@@ -414,16 +413,11 @@ static void cleanup_timers_list(struct list_head *head,
  * time for later timer_gettime calls to return.
  * This must be called with the siglock held.
  */
-static void cleanup_timers(struct list_head *head,
-			   cputime_t utime, cputime_t stime,
-			   unsigned long long sum_exec_runtime)
+static void cleanup_timers(struct list_head *head)
 {
-
-	cputime_t ptime = utime + stime;
-
-	cleanup_timers_list(head, cputime_to_expires(ptime));
-	cleanup_timers_list(++head, cputime_to_expires(utime));
-	cleanup_timers_list(++head, sum_exec_runtime);
+	cleanup_timers_list(head);
+	cleanup_timers_list(++head);
+	cleanup_timers_list(++head);
 }
 
 /*
@@ -433,24 +427,14 @@ static void cleanup_timers(struct list_head *head,
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
-	cputime_t utime, stime;
-
 	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 						sizeof(unsigned long long));
-	task_cputime(tsk, &utime, &stime);
-	cleanup_timers(tsk->cpu_timers,
-		       utime, stime, tsk->se.sum_exec_runtime);
+	cleanup_timers(tsk->cpu_timers);
 
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-	struct signal_struct *const sig = tsk->signal;
-	cputime_t utime, stime;
-
-	task_cputime(tsk, &utime, &stime);
-	cleanup_timers(tsk->signal->cpu_timers,
-		       utime + sig->utime, stime + sig->stime,
-		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
+	cleanup_timers(tsk->signal->cpu_timers);
 }
 
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)

From 33ab0fec33527e8b5ab124cff6aefd4746508e04 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 17:41:11 +0200
Subject: [PATCH 12/46] posix-timers: Consolidate posix_cpu_clock_get()

Consolidate the clock sampling common code used for both local
and remote targets.

Note that this introduces a tiny user ABI change: if a
PID is passed to clock_gettime() along the clockid,
we used to forbid a process wide clock sample when that
PID doesn't belong to a group leader. Now after this patch
we allow process wide clock samples if that PID belongs to
the current task, even if the current task is not the
group leader.

But local process wide clock samples are allowed if PID == 0
(current task) even if the current task is not the group leader.
So in the end this should be no big deal as this actually harmonize
the behaviour when the remote sample is actually a local one.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 64 ++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 34 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index e6389f915bcb..03c5d6c3e614 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -260,30 +260,43 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
 	return 0;
 }
 
+static int posix_cpu_clock_get_task(struct task_struct *tsk,
+				    const clockid_t which_clock,
+				    struct timespec *tp)
+{
+	int err = -EINVAL;
+	unsigned long long rtn;
+
+	if (CPUCLOCK_PERTHREAD(which_clock)) {
+		if (same_thread_group(tsk, current))
+			err = cpu_clock_sample(which_clock, tsk, &rtn);
+	} else {
+		read_lock(&tasklist_lock);
+
+		if (tsk->sighand && (tsk == current || thread_group_leader(tsk)))
+			err = cpu_clock_sample_group(which_clock, tsk, &rtn);
+
+		read_unlock(&tasklist_lock);
+	}
+
+	if (!err)
+		sample_to_timespec(which_clock, rtn, tp);
+
+	return err;
+}
+
 
 static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
 	const pid_t pid = CPUCLOCK_PID(which_clock);
-	int error = -EINVAL;
-	unsigned long long rtn;
+	int err = -EINVAL;
 
 	if (pid == 0) {
 		/*
 		 * Special case constant value for our own clocks.
 		 * We don't have to do any lookup to find ourselves.
 		 */
-		if (CPUCLOCK_PERTHREAD(which_clock)) {
-			/*
-			 * Sampling just ourselves we can do with no locking.
-			 */
-			error = cpu_clock_sample(which_clock,
-						 current, &rtn);
-		} else {
-			read_lock(&tasklist_lock);
-			error = cpu_clock_sample_group(which_clock,
-						       current, &rtn);
-			read_unlock(&tasklist_lock);
-		}
+		err = posix_cpu_clock_get_task(current, which_clock, tp);
 	} else {
 		/*
 		 * Find the given PID, and validate that the caller
@@ -292,29 +305,12 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 		struct task_struct *p;
 		rcu_read_lock();
 		p = find_task_by_vpid(pid);
-		if (p) {
-			if (CPUCLOCK_PERTHREAD(which_clock)) {
-				if (same_thread_group(p, current)) {
-					error = cpu_clock_sample(which_clock,
-								 p, &rtn);
-				}
-			} else {
-				read_lock(&tasklist_lock);
-				if (thread_group_leader(p) && p->sighand) {
-					error =
-					    cpu_clock_sample_group(which_clock,
-							           p, &rtn);
-				}
-				read_unlock(&tasklist_lock);
-			}
-		}
+		if (p)
+			err = posix_cpu_clock_get_task(p, which_clock, tp);
 		rcu_read_unlock();
 	}
 
-	if (error)
-		return error;
-	sample_to_timespec(which_clock, rtn, tp);
-	return 0;
+	return err;
 }
 
 

From 50875788a1d4a3f662a27ed13cd05282d835939a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 17:41:11 +0200
Subject: [PATCH 13/46] posix-timers: Use sighand lock instead of tasklist_lock
 for task clock sample

There is no need for the tasklist_lock just to take a process
wide clock sample.

All we need is to get a coherent sample that doesn't race with
exit() and exec():

* exit() may be concurrently reaping a task and flushing its time

* sighand is unstable under exit() and exec(), and the latter also
  result in group leader that can change

To protect against these, locking the target's sighand is enough.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 03c5d6c3e614..71a07699a36b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -271,12 +271,22 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk,
 		if (same_thread_group(tsk, current))
 			err = cpu_clock_sample(which_clock, tsk, &rtn);
 	} else {
-		read_lock(&tasklist_lock);
+		unsigned long flags;
+		struct sighand_struct *sighand;
 
-		if (tsk->sighand && (tsk == current || thread_group_leader(tsk)))
+		/*
+		 * while_each_thread() is not yet entirely RCU safe,
+		 * keep locking the group while sampling process
+		 * clock for now.
+		 */
+		sighand = lock_task_sighand(tsk, &flags);
+		if (!sighand)
+			return err;
+
+		if (tsk == current || thread_group_leader(tsk))
 			err = cpu_clock_sample_group(which_clock, tsk, &rtn);
 
-		read_unlock(&tasklist_lock);
+		unlock_task_sighand(tsk, &flags);
 	}
 
 	if (!err)

From 3d7a1427e4ce545e949e9bccb75d0ca8d941d93c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 17:41:11 +0200
Subject: [PATCH 14/46] posix-timers: Use sighand lock instead of tasklist_lock
 on timer deletion

Timer deletion doesn't need the tasklist lock.
We need to protect against:

* concurrent access to the lists p->cputime_expires and
  p->sighand->cputime_expires

* task reaping that may also delete the timer list entry

* timer firing

We already hold the timer lock which protects us against concurrent
timer firing.

The rest only need the targets sighand to be locked.
So hold it and drop the use of tasklist_lock there.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 71a07699a36b..9641958ddb3e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -377,27 +377,32 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
  */
 static int posix_cpu_timer_del(struct k_itimer *timer)
 {
-	struct task_struct *p = timer->it.cpu.task;
 	int ret = 0;
+	unsigned long flags;
+	struct sighand_struct *sighand;
+	struct task_struct *p = timer->it.cpu.task;
 
 	WARN_ON_ONCE(p == NULL);
 
-	read_lock(&tasklist_lock);
-	if (unlikely(p->sighand == NULL)) {
+	/*
+	 * Protect against sighand release/switch in exit/exec and process/
+	 * thread timer list entry concurrent read/writes.
+	 */
+	sighand = lock_task_sighand(p, &flags);
+	if (unlikely(sighand == NULL)) {
 		/*
 		 * We raced with the reaping of the task.
 		 * The deletion should have cleared us off the list.
 		 */
 		BUG_ON(!list_empty(&timer->it.cpu.entry));
 	} else {
-		spin_lock(&p->sighand->siglock);
 		if (timer->it.cpu.firing)
 			ret = TIMER_RETRY;
 		else
 			list_del(&timer->it.cpu.entry);
-		spin_unlock(&p->sighand->siglock);
+
+		unlock_task_sighand(p, &flags);
 	}
-	read_unlock(&tasklist_lock);
 
 	if (!ret)
 		put_task_struct(p);

From e73d84e33f15c099ed1df60437700093cb14e46e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 18:56:49 +0200
Subject: [PATCH 15/46] posix-timers: Remove remaining uses of tasklist_lock

The remaining uses of tasklist_lock were mostly about synchronizing
against sighand modifications, getting coherent and safe group samples
and also thread/process wide timers list handling.

All of this is already safely synchronizable with the target's
sighand lock. Let's use it on these places instead.

Also update the comments about locking.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 76 ++++++++++++++++++++++-----------------
 1 file changed, 44 insertions(+), 32 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9641958ddb3e..d9dc5edc318c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -233,7 +233,8 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 
 /*
  * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
  */
 static int cpu_clock_sample_group(const clockid_t which_clock,
 				  struct task_struct *p,
@@ -455,8 +456,7 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 
 /*
  * Insert the timer on the appropriate list before any timers that
- * expire later.  This must be called with the tasklist_lock held
- * for reading, interrupts disabled and p->sighand->siglock taken.
+ * expire later.  This must be called with the sighand lock held.
  */
 static void arm_timer(struct k_itimer *timer)
 {
@@ -547,7 +547,8 @@ static void cpu_timer_fire(struct k_itimer *timer)
 
 /*
  * Sample a process (thread group) timer for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
  */
 static int cpu_timer_sample_group(const clockid_t which_clock,
 				  struct task_struct *p,
@@ -610,9 +611,11 @@ static inline void posix_cpu_timer_kick_nohz(void) { }
  * If we return TIMER_RETRY, it's necessary to release the timer's lock
  * and try again.  (This happens when the timer is in the middle of firing.)
  */
-static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
+static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 			       struct itimerspec *new, struct itimerspec *old)
 {
+	unsigned long flags;
+	struct sighand_struct *sighand;
 	struct task_struct *p = timer->it.cpu.task;
 	unsigned long long old_expires, new_expires, old_incr, val;
 	int ret;
@@ -621,14 +624,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 
 	new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
 
-	read_lock(&tasklist_lock);
 	/*
-	 * We need the tasklist_lock to protect against reaping that
-	 * clears p->sighand.  If p has just been reaped, we can no
+	 * Protect against sighand release/switch in exit/exec and p->cpu_timers
+	 * and p->signal->cpu_timers read/write in arm_timer()
+	 */
+	sighand = lock_task_sighand(p, &flags);
+	/*
+	 * If p has just been reaped, we can no
 	 * longer get any information about it at all.
 	 */
-	if (unlikely(p->sighand == NULL)) {
-		read_unlock(&tasklist_lock);
+	if (unlikely(sighand == NULL)) {
 		return -ESRCH;
 	}
 
@@ -639,7 +644,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 
 	ret = 0;
 	old_incr = timer->it.cpu.incr;
-	spin_lock(&p->sighand->siglock);
 	old_expires = timer->it.cpu.expires;
 	if (unlikely(timer->it.cpu.firing)) {
 		timer->it.cpu.firing = -1;
@@ -696,12 +700,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 		 * disable this firing since we are already reporting
 		 * it as an overrun (thanks to bump_cpu_timer above).
 		 */
-		spin_unlock(&p->sighand->siglock);
-		read_unlock(&tasklist_lock);
+		unlock_task_sighand(p, &flags);
 		goto out;
 	}
 
-	if (new_expires != 0 && !(flags & TIMER_ABSTIME)) {
+	if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
 		new_expires += val;
 	}
 
@@ -715,9 +718,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 		arm_timer(timer);
 	}
 
-	spin_unlock(&p->sighand->siglock);
-	read_unlock(&tasklist_lock);
-
+	unlock_task_sighand(p, &flags);
 	/*
 	 * Install the new reload setting, and
 	 * set up the signal and overrun bookkeeping.
@@ -779,8 +780,16 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 		cpu_clock_sample(timer->it_clock, p, &now);
 	} else {
-		read_lock(&tasklist_lock);
-		if (unlikely(p->sighand == NULL)) {
+		struct sighand_struct *sighand;
+		unsigned long flags;
+
+		/*
+		 * Protect against sighand release/switch in exit/exec and
+		 * also make timer sampling safe if it ends up calling
+		 * thread_group_cputime().
+		 */
+		sighand = lock_task_sighand(p, &flags);
+		if (unlikely(sighand == NULL)) {
 			/*
 			 * The process has been reaped.
 			 * We can't even collect a sample any more.
@@ -789,11 +798,10 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 			timer->it.cpu.expires = 0;
 			sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
 					   &itp->it_value);
-			read_unlock(&tasklist_lock);
 		} else {
 			cpu_timer_sample_group(timer->it_clock, p, &now);
+			unlock_task_sighand(p, &flags);
 		}
-		read_unlock(&tasklist_lock);
 	}
 
 	if (now < timer->it.cpu.expires) {
@@ -1007,6 +1015,8 @@ static void check_process_timers(struct task_struct *tsk,
  */
 void posix_cpu_timer_schedule(struct k_itimer *timer)
 {
+	struct sighand_struct *sighand;
+	unsigned long flags;
 	struct task_struct *p = timer->it.cpu.task;
 	unsigned long long now;
 
@@ -1021,27 +1031,31 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 		if (unlikely(p->exit_state))
 			goto out;
 
-		read_lock(&tasklist_lock); /* arm_timer needs it.  */
-		spin_lock(&p->sighand->siglock);
+		/* Protect timer list r/w in arm_timer() */
+		sighand = lock_task_sighand(p, &flags);
+		if (!sighand)
+			goto out;
 	} else {
-		read_lock(&tasklist_lock);
-		if (unlikely(p->sighand == NULL)) {
+		/*
+		 * Protect arm_timer() and timer sampling in case of call to
+		 * thread_group_cputime().
+		 */
+		sighand = lock_task_sighand(p, &flags);
+		if (unlikely(sighand == NULL)) {
 			/*
 			 * The process has been reaped.
 			 * We can't even collect a sample any more.
 			 */
 			timer->it.cpu.expires = 0;
-			read_unlock(&tasklist_lock);
 			goto out;
 		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
-			read_unlock(&tasklist_lock);
+			unlock_task_sighand(p, &flags);
 			/* Optimizations: if the process is dying, no need to rearm */
 			goto out;
 		}
-		spin_lock(&p->sighand->siglock);
 		cpu_timer_sample_group(timer->it_clock, p, &now);
 		bump_cpu_timer(timer, now);
-		/* Leave the tasklist_lock locked for the call below.  */
+		/* Leave the sighand locked for the call below.  */
 	}
 
 	/*
@@ -1049,12 +1063,10 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	 */
 	BUG_ON(!irqs_disabled());
 	arm_timer(timer);
-	spin_unlock(&p->sighand->siglock);
-	read_unlock(&tasklist_lock);
+	unlock_task_sighand(p, &flags);
 
 	/* Kick full dynticks CPUs in case they need to tick on the new timer */
 	posix_cpu_timer_kick_nohz();
-
 out:
 	timer->it_overrun_last = timer->it_overrun;
 	timer->it_overrun = -1;

From 531f64fd6f46a3f2a3edb1b97ecc827c775932c5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Oct 2013 17:58:08 +0200
Subject: [PATCH 16/46] posix-timers: Convert abuses of BUG_ON to WARN_ON

The posix cpu timers code makes a heavy use of BUG_ON()
but none of these concern fatal issues that require
to stop the machine. So let's just warn the user when
some internal state slips out of our hands.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/posix-cpu-timers.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index d9dc5edc318c..3b8946416a5f 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -395,7 +395,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
 		 * We raced with the reaping of the task.
 		 * The deletion should have cleared us off the list.
 		 */
-		BUG_ON(!list_empty(&timer->it.cpu.entry));
+		WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
 	} else {
 		if (timer->it.cpu.firing)
 			ret = TIMER_RETRY;
@@ -640,7 +640,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 	/*
 	 * Disarm any old timer after extracting its expiry time.
 	 */
-	BUG_ON(!irqs_disabled());
+	WARN_ON_ONCE(!irqs_disabled());
 
 	ret = 0;
 	old_incr = timer->it.cpu.incr;
@@ -1061,7 +1061,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	/*
 	 * Now re-arm for the new expiry time.
 	 */
-	BUG_ON(!irqs_disabled());
+	WARN_ON_ONCE(!irqs_disabled());
 	arm_timer(timer);
 	unlock_task_sighand(p, &flags);
 
@@ -1150,7 +1150,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 	struct k_itimer *timer, *next;
 	unsigned long flags;
 
-	BUG_ON(!irqs_disabled());
+	WARN_ON_ONCE(!irqs_disabled());
 
 	/*
 	 * The fast path checks that there are no expired thread or thread
@@ -1217,7 +1217,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 {
 	unsigned long long now;
 
-	BUG_ON(clock_idx == CPUCLOCK_SCHED);
+	WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
 	cpu_timer_sample_group(clock_idx, tsk, &now);
 
 	if (oldval) {

From 247f325aaddb8b6117959f70c26ba735360c4160 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Fri, 25 Oct 2013 12:16:10 +0100
Subject: [PATCH 17/46] clockevent: sun4i: Fill the irq field in the clockevent
 structure

The clock event structure irq field was not filled previously to the
interrupt we're using.

This was resulting in the timer not being used at all when using a
configuration with SMP enabled on a system with several CPUs, and with
the cpumask set to the cpu_possible_mask.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clocksource/sun4i_timer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index 2fb4695a28d8..a7f492e0c7fc 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -188,6 +188,7 @@ static void __init sun4i_timer_init(struct device_node *node)
 	writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
 
 	sun4i_clockevent.cpumask = cpumask_of(0);
+	sun4i_clockevent.irq = irq;
 
 	clockevents_config_and_register(&sun4i_clockevent, rate,
 					TIMER_SYNC_TICKS, 0xffffffff);

From 2c28f32ca4c98b41ad95f62fa27f59f3117931d4 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Fri, 25 Oct 2013 12:16:11 +0100
Subject: [PATCH 18/46] clocksource: sun4i: Change CPU mask to
 cpu_possible_mask

The interrupt for the timer is a shared processor interrupt, so any CPU
found in the system can handle it. Switch to our cpumask to
cpu_possible_mask instead of cpumask_of(0).

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clocksource/sun4i_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index a7f492e0c7fc..6a76b4ec3470 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -187,7 +187,7 @@ static void __init sun4i_timer_init(struct device_node *node)
 	val = readl(timer_base + TIMER_IRQ_EN_REG);
 	writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
 
-	sun4i_clockevent.cpumask = cpumask_of(0);
+	sun4i_clockevent.cpumask = cpu_possible_mask;
 	sun4i_clockevent.irq = irq;
 
 	clockevents_config_and_register(&sun4i_clockevent, rate,

From 5df9affb50a09e0cb571c4fa3e2d577db85c7475 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: [PATCH 19/46] clocksource: sun4i: Increase a bit the clock event and
 sources rating
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We want to keep this driver as the default provider of the clock events
and source, yet some other driver might fit in the "desired" category of
ratings. Hence, we need to increase a bit the rating so that we can have
more flexibility in the ratings we choose.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/sun4i_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index 6a76b4ec3470..d7a1a1ad8f05 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -114,7 +114,7 @@ static int sun4i_clkevt_next_event(unsigned long evt,
 
 static struct clock_event_device sun4i_clockevent = {
 	.name = "sun4i_tick",
-	.rating = 300,
+	.rating = 350,
 	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode = sun4i_clkevt_mode,
 	.set_next_event = sun4i_clkevt_next_event,
@@ -172,7 +172,7 @@ static void __init sun4i_timer_init(struct device_node *node)
 
 	setup_sched_clock(sun4i_timer_sched_read, 32, rate);
 	clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name,
-			      rate, 300, 32, clocksource_mmio_readl_down);
+			      rate, 350, 32, clocksource_mmio_readl_down);
 
 	ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
 

From 67905540e8b8eaf51e621cfd2ef15641d6d5b9a7 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: [PATCH 20/46] clocksource: Add Allwinner SoCs HS timers driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most of the Allwinner SoCs (at this time, all but the A10) also have a
High Speed timers that are not using the 24MHz oscillator as a source
but rather the AHB clock running much faster.

The IP is slightly different between the A10s/A13 and the one used in
the A20/A31, since the latter have 4 timers available, while the former
have only 2 of them.

[dlezcano] : Fixed conflict with b788beda "Order Kconfig options
		alphabetically"

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../timer/allwinner,sun5i-a13-hstimer.txt     |  22 ++
 arch/arm/mach-sunxi/Kconfig                   |   1 +
 drivers/clocksource/Kconfig                   |   4 +
 drivers/clocksource/Makefile                  |   1 +
 drivers/clocksource/timer-sun5i.c             | 192 ++++++++++++++++++
 5 files changed, 220 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
 create mode 100644 drivers/clocksource/timer-sun5i.c

diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
new file mode 100644
index 000000000000..7c26154b8bbb
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
@@ -0,0 +1,22 @@
+Allwinner SoCs High Speed Timer Controller
+
+Required properties:
+
+- compatible :	should be "allwinner,sun5i-a13-hstimer" or
+		"allwinner,sun7i-a20-hstimer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts :	The interrupts of these timers (2 for the sun5i IP, 4 for the sun7i
+		one)
+- clocks: phandle to the source clock (usually the AHB clock)
+
+Example:
+
+timer@01c60000 {
+	compatible = "allwinner,sun7i-a20-hstimer";
+	reg = <0x01c60000 0x1000>;
+	interrupts = <0 51 1>,
+		     <0 52 1>,
+		     <0 53 1>,
+		     <0 54 1>;
+	clocks = <&ahb1_gates 19>;
+};
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index c9e72c89066a..bce0d4277f71 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -12,3 +12,4 @@ config ARCH_SUNXI
 	select PINCTRL_SUNXI
 	select SPARSE_IRQ
 	select SUN4I_TIMER
+	select SUN5I_HSTIMER
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index bdb953e15d2a..884eeff8e32d 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -37,6 +37,10 @@ config SUN4I_TIMER
 	select CLKSRC_MMIO
 	bool
 
+config SUN5I_HSTIMER
+	select CLKSRC_MMIO
+	bool
+
 config VT8500_TIMER
 	bool
 
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 33621efb9148..358358d87b6d 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
 obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
 obj-$(CONFIG_ARCH_PRIMA2)	+= timer-prima2.o
 obj-$(CONFIG_SUN4I_TIMER)	+= sun4i_timer.o
+obj-$(CONFIG_SUN5I_HSTIMER)	+= timer-sun5i.o
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
 obj-$(CONFIG_ARCH_NSPIRE)	+= zevio-timer.o
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
new file mode 100644
index 000000000000..bddc52233d2a
--- /dev/null
+++ b/drivers/clocksource/timer-sun5i.c
@@ -0,0 +1,192 @@
+/*
+ * Allwinner SoCs hstimer driver.
+ *
+ * Copyright (C) 2013 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqreturn.h>
+#include <linux/sched_clock.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define TIMER_IRQ_EN_REG		0x00
+#define TIMER_IRQ_EN(val)			BIT(val)
+#define TIMER_IRQ_ST_REG		0x04
+#define TIMER_CTL_REG(val)		(0x20 * (val) + 0x10)
+#define TIMER_CTL_ENABLE			BIT(0)
+#define TIMER_CTL_RELOAD			BIT(1)
+#define TIMER_CTL_CLK_PRES(val)			(((val) & 0x7) << 4)
+#define TIMER_CTL_ONESHOT			BIT(7)
+#define TIMER_INTVAL_LO_REG(val)	(0x20 * (val) + 0x14)
+#define TIMER_INTVAL_HI_REG(val)	(0x20 * (val) + 0x18)
+#define TIMER_CNTVAL_LO_REG(val)	(0x20 * (val) + 0x1c)
+#define TIMER_CNTVAL_HI_REG(val)	(0x20 * (val) + 0x20)
+
+#define TIMER_SYNC_TICKS	3
+
+static void __iomem *timer_base;
+static u32 ticks_per_jiffy;
+
+/*
+ * When we disable a timer, we need to wait at least for 2 cycles of
+ * the timer source clock. We will use for that the clocksource timer
+ * that is already setup and runs at the same frequency than the other
+ * timers, and we never will be disabled.
+ */
+static void sun5i_clkevt_sync(void)
+{
+	u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+
+	while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
+		cpu_relax();
+}
+
+static void sun5i_clkevt_time_stop(u8 timer)
+{
+	u32 val = readl(timer_base + TIMER_CTL_REG(timer));
+	writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer));
+
+	sun5i_clkevt_sync();
+}
+
+static void sun5i_clkevt_time_setup(u8 timer, u32 delay)
+{
+	writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer));
+}
+
+static void sun5i_clkevt_time_start(u8 timer, bool periodic)
+{
+	u32 val = readl(timer_base + TIMER_CTL_REG(timer));
+
+	if (periodic)
+		val &= ~TIMER_CTL_ONESHOT;
+	else
+		val |= TIMER_CTL_ONESHOT;
+
+	writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+	       timer_base + TIMER_CTL_REG(timer));
+}
+
+static void sun5i_clkevt_mode(enum clock_event_mode mode,
+			      struct clock_event_device *clk)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		sun5i_clkevt_time_stop(0);
+		sun5i_clkevt_time_setup(0, ticks_per_jiffy);
+		sun5i_clkevt_time_start(0, true);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		sun5i_clkevt_time_stop(0);
+		sun5i_clkevt_time_start(0, false);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		sun5i_clkevt_time_stop(0);
+		break;
+	}
+}
+
+static int sun5i_clkevt_next_event(unsigned long evt,
+				   struct clock_event_device *unused)
+{
+	sun5i_clkevt_time_stop(0);
+	sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS);
+	sun5i_clkevt_time_start(0, false);
+
+	return 0;
+}
+
+static struct clock_event_device sun5i_clockevent = {
+	.name = "sun5i_tick",
+	.rating = 340,
+	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_mode = sun5i_clkevt_mode,
+	.set_next_event = sun5i_clkevt_next_event,
+};
+
+
+static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+
+	writel(0x1, timer_base + TIMER_IRQ_ST_REG);
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction sun5i_timer_irq = {
+	.name = "sun5i_timer0",
+	.flags = IRQF_TIMER | IRQF_IRQPOLL,
+	.handler = sun5i_timer_interrupt,
+	.dev_id = &sun5i_clockevent,
+};
+
+static u32 sun5i_timer_sched_read(void)
+{
+	return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+}
+
+static void __init sun5i_timer_init(struct device_node *node)
+{
+	unsigned long rate;
+	struct clk *clk;
+	int ret, irq;
+	u32 val;
+
+	timer_base = of_iomap(node, 0);
+	if (!timer_base)
+		panic("Can't map registers");
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (irq <= 0)
+		panic("Can't parse IRQ");
+
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk))
+		panic("Can't get timer clock");
+	clk_prepare_enable(clk);
+	rate = clk_get_rate(clk);
+
+	writel(~0, timer_base + TIMER_INTVAL_LO_REG(1));
+	writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+	       timer_base + TIMER_CTL_REG(1));
+
+	setup_sched_clock(sun5i_timer_sched_read, 32, rate);
+	clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name,
+			      rate, 340, 32, clocksource_mmio_readl_down);
+
+	ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
+
+	ret = setup_irq(irq, &sun5i_timer_irq);
+	if (ret)
+		pr_warn("failed to setup irq %d\n", irq);
+
+	/* Enable timer0 interrupt */
+	val = readl(timer_base + TIMER_IRQ_EN_REG);
+	writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
+
+	sun5i_clockevent.cpumask = cpu_possible_mask;
+	sun5i_clockevent.irq = irq;
+
+	clockevents_config_and_register(&sun5i_clockevent, rate,
+					TIMER_SYNC_TICKS, 0xffffffff);
+}
+CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
+		       sun5i_timer_init);
+CLOCKSOURCE_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer",
+		       sun5i_timer_init);

From f2b5002889cd2ca25d1dfe522755ade701f49044 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: [PATCH 21/46] ARM: sun5i: a10s: Add support for the High Speed Timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Allwinner A10s has support for two high speed timers. Now that we
have a driver to support it, we can enable them in the device tree.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/boot/dts/sun5i-a10s.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 52476742a104..e674c94c7206 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -332,5 +332,12 @@
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
+
+		timer@01c60000 {
+			compatible = "allwinner,sun5i-a13-hstimer";
+			reg = <0x01c60000 0x1000>;
+			interrupts = <82>, <83>;
+			clocks = <&ahb_gates 28>;
+		};
 	};
 };

From 4411902a13e6b64873dc21abafeb57db335efcf1 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: [PATCH 22/46] ARM: sun5i: a13: Add support for the High Speed Timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Allwinner A13 has support for two high speed timers. Now that we
have a driver to support it, we can enable them in the device tree.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/boot/dts/sun5i-a13.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index ce8ef2a45be0..1ccd75d37f49 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -273,5 +273,12 @@
 			clock-frequency = <100000>;
 			status = "disabled";
 		};
+
+		timer@01c60000 {
+			compatible = "allwinner,sun5i-a13-hstimer";
+			reg = <0x01c60000 0x1000>;
+			interrupts = <82>, <83>;
+			clocks = <&ahb_gates 28>;
+		};
 	};
 };

From 31f8ad387e4306ec1fb2a01c5cd0d648b5e9bff5 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 7 Nov 2013 12:01:48 +0100
Subject: [PATCH 23/46] ARM: sun7i: a20: Add support for the High Speed Timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Allwinner A20 has support for four high speed timers. Apart for the
number of timers (4 vs 2), it's basically the same logic than the high
speed timers found in the sun5i chips.

Now that we have a driver to support it, we can enable them in the
device tree.

[dlezcano] : Fixed conflict with 428abbb8 "Enable the I2C controllers"

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Tested-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/boot/dts/sun7i-a20.dtsi | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index e46cfedde74c..ee6cec7b0c90 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -395,6 +395,16 @@
 			status = "disabled";
 		};
 
+		hstimer@01c60000 {
+			compatible = "allwinner,sun7i-a20-hstimer";
+			reg = <0x01c60000 0x1000>;
+			interrupts = <0 81 1>,
+				     <0 82 1>,
+				     <0 83 1>,
+				     <0 84 1>;
+			clocks = <&ahb_gates 28>;
+		};
+
 		gic: interrupt-controller@01c81000 {
 			compatible = "arm,cortex-a7-gic", "arm,cortex-a15-gic";
 			reg = <0x01c81000 0x1000>,

From af066fce5fa6b615588732cad6909c450a9eb616 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 20 Nov 2013 00:47:32 +0100
Subject: [PATCH 24/46] clocksource: arm_global_timer: Switch to
 sched_clock_register()

The 32 bit sched_clock interface now supports 64 bits. Upgrade to
the 64 bit function to allow us to remove the 32 bit registration
interface. While we're here increase the number of bits that
sched_clock can handle to 64 to make full use of the counter.

Cc: Stuart Menefy <stuart.menefy@st.com>
Cc: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Acked-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/arm_global_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index c639b1a9e996..0fc31d029e52 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -202,7 +202,7 @@ static struct clocksource gt_clocksource = {
 };
 
 #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
-static u32 notrace gt_sched_clock_read(void)
+static u64 notrace gt_sched_clock_read(void)
 {
 	return gt_counter_read();
 }
@@ -217,7 +217,7 @@ static void __init gt_clocksource_init(void)
 	writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
 
 #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
-	setup_sched_clock(gt_sched_clock_read, 32, gt_clk_rate);
+	sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate);
 #endif
 	clocksource_register_hz(&gt_clocksource, gt_clk_rate);
 }

From dfded00902d7437963870accbcf4b39114e85f59 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 20 Nov 2013 00:47:32 +0100
Subject: [PATCH 25/46] clocksource: cadence_ttc_timer: Switch to
 sched_clock_register()

The 32 bit sched_clock interface now supports 64 bits. Upgrade to
the 64 bit function to allow us to remove the 32 bit registration
interface.

Cc: Soren Brinkmann <soren.brinkmann@xilinx.com>
Cc: Michal Simek <monstr@monstr.eu>
Tested-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/cadence_ttc_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index b2bb3a4bc205..b865b4e3d67b 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -158,7 +158,7 @@ static cycle_t __ttc_clocksource_read(struct clocksource *cs)
 				TTC_COUNT_VAL_OFFSET);
 }
 
-static u32 notrace ttc_sched_clock_read(void)
+static u64 notrace ttc_sched_clock_read(void)
 {
 	return __raw_readl(ttc_sched_clock_val_reg);
 }
@@ -306,7 +306,7 @@ static void __init ttc_setup_clocksource(struct clk *clk, void __iomem *base)
 	}
 
 	ttc_sched_clock_val_reg = base + TTC_COUNT_VAL_OFFSET;
-	setup_sched_clock(ttc_sched_clock_read, 16,
+	sched_clock_register(ttc_sched_clock_read, 16,
 			clk_get_rate(ttccs->ttc.clk) / PRESCALE);
 }
 

From 662e7230ee16951e6858c01e72db87c5dc46150e Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 20 Nov 2013 00:47:32 +0100
Subject: [PATCH 26/46] clocksource: sun4i: Switch to sched_clock_register()

The 32 bit sched_clock interface now supports 64 bits. Upgrade to
the 64 bit function to allow us to remove the 32 bit registration
interface. While we're here, mark the sched_clock function as
notrace to prevent ftrace recursion crashes.

Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/sun4i_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index d7a1a1ad8f05..191187470aa6 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -138,7 +138,7 @@ static struct irqaction sun4i_timer_irq = {
 	.dev_id = &sun4i_clockevent,
 };
 
-static u32 sun4i_timer_sched_read(void)
+static u64 notrace sun4i_timer_sched_read(void)
 {
 	return ~readl(timer_base + TIMER_CNTVAL_REG(1));
 }
@@ -170,7 +170,7 @@ static void __init sun4i_timer_init(struct device_node *node)
 	       TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M),
 	       timer_base + TIMER_CTL_REG(1));
 
-	setup_sched_clock(sun4i_timer_sched_read, 32, rate);
+	sched_clock_register(sun4i_timer_sched_read, 32, rate);
 	clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name,
 			      rate, 350, 32, clocksource_mmio_readl_down);
 

From 2e8bac532f8bcd3834853e0a22b130b9fd59270d Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 20 Nov 2013 00:47:32 +0100
Subject: [PATCH 27/46] clocksource: orion: Switch to sched_clock_register()

The 32 bit sched_clock interface now supports 64 bits. Upgrade to
the 64 bit function to allow us to remove the 32 bit registration
interface.

Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Tested-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/time-orion.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c
index 9c7f018a67ca..20066222f3f2 100644
--- a/drivers/clocksource/time-orion.c
+++ b/drivers/clocksource/time-orion.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(orion_timer_ctrl_clrset);
 /*
  * Free-running clocksource handling.
  */
-static u32 notrace orion_read_sched_clock(void)
+static u64 notrace orion_read_sched_clock(void)
 {
 	return ~readl(timer_base + TIMER0_VAL);
 }
@@ -135,7 +135,7 @@ static void __init orion_timer_init(struct device_node *np)
 	clocksource_mmio_init(timer_base + TIMER0_VAL, "orion_clocksource",
 			      clk_get_rate(clk), 300, 32,
 			      clocksource_mmio_readl_down);
-	setup_sched_clock(orion_read_sched_clock, 32, clk_get_rate(clk));
+	sched_clock_register(orion_read_sched_clock, 32, clk_get_rate(clk));
 
 	/* setup timer1 as clockevent timer */
 	if (setup_irq(irq, &orion_clkevt_irq))

From fdca679d87bb4ac0fdc882dbf3deb47a1f58b813 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 20 Nov 2013 09:58:56 +0100
Subject: [PATCH 28/46] clocksource: clksrc-of: Warn if no clock sources are
 found

Many platforms rely on clocksource_of_init() being implicitly
called for registering clock sources and will get zero warnings
if no working clock source is available. Let's print a critical
error message if no clock source is found.

Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/clksrc-of.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c
index 35639cf4e5a2..a30b42c3ac3b 100644
--- a/drivers/clocksource/clksrc-of.c
+++ b/drivers/clocksource/clksrc-of.c
@@ -28,6 +28,7 @@ void __init clocksource_of_init(void)
 	struct device_node *np;
 	const struct of_device_id *match;
 	clocksource_of_init_fn init_func;
+	unsigned clocksources = 0;
 
 	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
 		if (!of_device_is_available(np))
@@ -36,5 +37,8 @@ void __init clocksource_of_init(void)
 		init_func = match->data;
 		init_func(np);
 		of_node_put(np);
+		clocksources++;
 	}
+	if (!clocksources)
+		pr_crit("%s: no matching clocksources found\n", __func__);
 }

From 08cb8e460956489fcfbfab5a7d33e62acd190b9a Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Mon, 2 Dec 2013 11:39:56 +0100
Subject: [PATCH 29/46] clocksource: armada-370-xp: Enable timer divider only
 when needed

The current code sets the timer divider bits always. However, when
the 25 MHz timer is enabled, this is not needed and has no effect.
As this causes some confusion, rework the code so the divider is
set only when needed, i.e. when the 25 MHz timer is not in use.

Acked-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/time-armada-370-xp.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c
index d8e47e502785..0450f6b69ade 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -76,6 +76,7 @@
 static void __iomem *timer_base, *local_base;
 static unsigned int timer_clk;
 static bool timer25Mhz = true;
+static u32 enable_mask;
 
 /*
  * Number of timer ticks per jiffy.
@@ -121,8 +122,7 @@ armada_370_xp_clkevt_next_event(unsigned long delta,
 	/*
 	 * Enable the timer.
 	 */
-	local_timer_ctrl_clrset(TIMER0_RELOAD_EN,
-				TIMER0_EN | TIMER0_DIV(TIMER_DIVIDER_SHIFT));
+	local_timer_ctrl_clrset(TIMER0_RELOAD_EN, enable_mask);
 	return 0;
 }
 
@@ -141,9 +141,7 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode,
 		/*
 		 * Enable timer.
 		 */
-		local_timer_ctrl_clrset(0, TIMER0_RELOAD_EN |
-					   TIMER0_EN |
-					   TIMER0_DIV(TIMER_DIVIDER_SHIFT));
+		local_timer_ctrl_clrset(0, TIMER0_RELOAD_EN | enable_mask);
 	} else {
 		/*
 		 * Disable timer.
@@ -240,10 +238,13 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np)
 	WARN_ON(!timer_base);
 	local_base = of_iomap(np, 1);
 
-	if (timer25Mhz)
+	if (timer25Mhz) {
 		set = TIMER0_25MHZ;		
-	else
+		enable_mask = TIMER0_EN;
+	} else {
 		clr = TIMER0_25MHZ;
+		enable_mask = TIMER0_EN | TIMER0_DIV(TIMER_DIVIDER_SHIFT);
+	}
 	timer_ctrl_clrset(clr, set);
 	local_timer_ctrl_clrset(clr, set);
 
@@ -267,8 +268,7 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np)
 	writel(0xffffffff, timer_base + TIMER0_VAL_OFF);
 	writel(0xffffffff, timer_base + TIMER0_RELOAD_OFF);
 
-	timer_ctrl_clrset(0, TIMER0_EN | TIMER0_RELOAD_EN |
-			     TIMER0_DIV(TIMER_DIVIDER_SHIFT));
+	timer_ctrl_clrset(0, TIMER0_RELOAD_EN | enable_mask);
 
 	clocksource_mmio_init(timer_base + TIMER0_VAL_OFF,
 			      "armada_370_xp_clocksource",

From 5707f18c28f4df0f993d965aff1c168b69637d63 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Tue, 3 Dec 2013 15:50:09 +0900
Subject: [PATCH 30/46] clocksource: sh_tmu: Remove unnecessary
 platform_set_drvdata()

The driver core clears the driver data to NULL after device_release
or on probe failure. Thus, it is not needed to manually clear the
device driver data to NULL.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/sh_tmu.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 78b8dae49628..54ab47553701 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -509,7 +509,6 @@ static int sh_tmu_probe(struct platform_device *pdev)
 	ret = sh_tmu_setup(p, pdev);
 	if (ret) {
 		kfree(p);
-		platform_set_drvdata(pdev, NULL);
 		pm_runtime_idle(&pdev->dev);
 		return ret;
 	}

From 87d4bb9fced08054afb83af2d85f5cf0ba0e21e4 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Tue, 3 Dec 2013 15:51:06 +0900
Subject: [PATCH 31/46] clocksource: sh_mtu2: Remove unnecessary
 platform_set_drvdata()

The driver core clears the driver data to NULL after device_release
or on probe failure. Thus, it is not needed to manually clear the
device driver data to NULL.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/sh_mtu2.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index 4aac9ee0d0c0..f02648e27f7e 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -346,7 +346,6 @@ static int sh_mtu2_probe(struct platform_device *pdev)
 	ret = sh_mtu2_setup(p, pdev);
 	if (ret) {
 		kfree(p);
-		platform_set_drvdata(pdev, NULL);
 		pm_runtime_idle(&pdev->dev);
 		return ret;
 	}

From 38c30a8421ce8b06492121deee422ba7ecfaeef2 Mon Sep 17 00:00:00 2001
From: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Date: Mon, 9 Dec 2013 10:12:10 +0100
Subject: [PATCH 32/46] clocksource: misc drivers: Remove deprecated
 IRQF_DISABLED

This patch removes the use of the IRQF_DISABLED flag

It's a NOOP since 2.6.35 and it will be removed one day.

[dlezcano] : slightly changed the changelog

Signed-off-by: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/cadence_ttc_timer.c | 3 +--
 drivers/clocksource/cs5535-clockevt.c   | 2 +-
 drivers/clocksource/dw_apb_timer.c      | 3 +--
 drivers/clocksource/nomadik-mtu.c       | 2 +-
 drivers/clocksource/samsung_pwm_timer.c | 2 +-
 drivers/clocksource/sh_cmt.c            | 3 +--
 drivers/clocksource/sh_mtu2.c           | 3 +--
 drivers/clocksource/sh_tmu.c            | 3 +--
 8 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index b865b4e3d67b..8c7382bf260c 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -388,8 +388,7 @@ static void __init ttc_setup_clockevent(struct clk *clk,
 	__raw_writel(0x1,  ttcce->ttc.base_addr + TTC_IER_OFFSET);
 
 	err = request_irq(irq, ttc_clock_event_interrupt,
-			  IRQF_DISABLED | IRQF_TIMER,
-			  ttcce->ce.name, ttcce);
+			  IRQF_TIMER, ttcce->ce.name, ttcce);
 	if (WARN_ON(err)) {
 		kfree(ttcce);
 		return;
diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c
index ea210482dd20..db2105290898 100644
--- a/drivers/clocksource/cs5535-clockevt.c
+++ b/drivers/clocksource/cs5535-clockevt.c
@@ -131,7 +131,7 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id)
 
 static struct irqaction mfgptirq  = {
 	.handler = mfgpt_tick,
-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER | IRQF_SHARED,
+	.flags = IRQF_NOBALANCING | IRQF_TIMER | IRQF_SHARED,
 	.name = DRV_NAME,
 };
 
diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index e54ca1062d8e..f3656a6b0382 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -243,8 +243,7 @@ dw_apb_clockevent_init(int cpu, const char *name, unsigned rating,
 	dw_ced->irqaction.dev_id	= &dw_ced->ced;
 	dw_ced->irqaction.irq		= irq;
 	dw_ced->irqaction.flags		= IRQF_TIMER | IRQF_IRQPOLL |
-					  IRQF_NOBALANCING |
-					  IRQF_DISABLED;
+					  IRQF_NOBALANCING;
 
 	dw_ced->eoi = apbt_eoi;
 	err = setup_irq(irq, &dw_ced->irqaction);
diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c
index ed7b73b508e0..152a3f3875ee 100644
--- a/drivers/clocksource/nomadik-mtu.c
+++ b/drivers/clocksource/nomadik-mtu.c
@@ -187,7 +187,7 @@ static irqreturn_t nmdk_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction nmdk_timer_irq = {
 	.name		= "Nomadik Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
+	.flags		= IRQF_TIMER,
 	.handler	= nmdk_timer_interrupt,
 	.dev_id		= &nmdk_clkevt,
 };
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 85082e8d3052..5645cfc90c41 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -264,7 +264,7 @@ static irqreturn_t samsung_clock_event_isr(int irq, void *dev_id)
 
 static struct irqaction samsung_clock_event_irq = {
 	.name		= "samsung_time_irq",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= samsung_clock_event_isr,
 	.dev_id		= &time_event_device,
 };
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 0965e9848b3d..c6186339dd52 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -726,8 +726,7 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 	p->irqaction.name = dev_name(&p->pdev->dev);
 	p->irqaction.handler = sh_cmt_interrupt;
 	p->irqaction.dev_id = p;
-	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | \
-			     IRQF_IRQPOLL  | IRQF_NOBALANCING;
+	p->irqaction.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING;
 
 	/* get hold of clock */
 	p->clk = clk_get(&p->pdev->dev, "cmt_fck");
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index f02648e27f7e..b6a56b1c8947 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -302,8 +302,7 @@ static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
 	p->irqaction.handler = sh_mtu2_interrupt;
 	p->irqaction.dev_id = p;
 	p->irqaction.irq = irq;
-	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | \
-			     IRQF_IRQPOLL  | IRQF_NOBALANCING;
+	p->irqaction.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING;
 
 	/* get hold of clock */
 	p->clk = clk_get(&p->pdev->dev, "mtu2_fck");
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 54ab47553701..fc752f7b2719 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -462,8 +462,7 @@ static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
 	p->irqaction.handler = sh_tmu_interrupt;
 	p->irqaction.dev_id = p;
 	p->irqaction.irq = irq;
-	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | \
-			     IRQF_IRQPOLL  | IRQF_NOBALANCING;
+	p->irqaction.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING;
 
 	/* get hold of clock */
 	p->clk = clk_get(&p->pdev->dev, "tmu_fck");

From 39304fad8f31b2114492e9a09fe0bd1ac7eb1834 Mon Sep 17 00:00:00 2001
From: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Date: Mon, 9 Dec 2013 10:35:45 +0100
Subject: [PATCH 33/46] clocksource: tegra: Remove deprecated IRQF_DISABLED

This patch removes the use of the IRQF_DISABLED flag.

It's a NOOP since 2.6.35 and it will be removed one day.

[dlezcano] : slightly changed the changelog

Signed-off-by: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/tegra20_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index 642849256d82..d1869f02051c 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -149,7 +149,7 @@ static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction tegra_timer_irq = {
 	.name		= "timer0",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_TRIGGER_HIGH,
+	.flags		= IRQF_TIMER | IRQF_TRIGGER_HIGH,
 	.handler	= tegra_timer_interrupt,
 	.dev_id		= &tegra_clockevent,
 };

From 39039eb31c6e5252e25ec6336d92ddef938ccafa Mon Sep 17 00:00:00 2001
From: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Date: Mon, 9 Dec 2013 10:38:50 +0100
Subject: [PATCH 34/46] clocksource: vt8500: Remove deprecated IRQF_DISABLED

This patch removes the use of the IRQF_DISABLED flag.

It's a NOOP since 2.6.35 and it will be removed one day.

[dlezcano] : slightly changed the changelog

Signed-off-by: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/vt8500_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index ad3c0e83a779..1098ed3b9b89 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -124,7 +124,7 @@ static irqreturn_t vt8500_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction irq = {
 	.name    = "vt8500_timer",
-	.flags   = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags   = IRQF_TIMER | IRQF_IRQPOLL,
 	.handler = vt8500_timer_interrupt,
 	.dev_id  = &clockevent,
 };

From 6d19944bd2609a1d7d48149605aa9f62ca8bf640 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 25 Nov 2013 09:54:45 +0800
Subject: [PATCH 35/46] clocksource: bcm_kona_timer: Remove unused
 bcm_timer_ids

bcm_timer_ids is no longer used after converting to CLOCKSOURCE_OF_DECLARE.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/bcm_kona_timer.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
index 0d7d8c3ed6b2..5176e761166b 100644
--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -98,12 +98,6 @@ kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw)
 	return;
 }
 
-static const struct of_device_id bcm_timer_ids[] __initconst = {
-	{.compatible = "brcm,kona-timer"},
-	{.compatible = "bcm,kona-timer"}, /* deprecated name */
-	{},
-};
-
 static void __init kona_timers_init(struct device_node *node)
 {
 	u32 freq;

From 57dee992df244ccce6a6a3a88a43160e285da5d8 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 14 Dec 2013 15:07:32 +0900
Subject: [PATCH 36/46] clocksource: sh_cmt: Add clk_prepare/unprepare support

Prepare the clock at probe time, as there is no other appropriate place
in the driver where we're allowed to sleep.

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/sh_cmt.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 0965e9848b3d..940341a185d7 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -634,12 +634,18 @@ static int sh_cmt_clock_event_next(unsigned long delta,
 
 static void sh_cmt_clock_event_suspend(struct clock_event_device *ced)
 {
-	pm_genpd_syscore_poweroff(&ced_to_sh_cmt(ced)->pdev->dev);
+	struct sh_cmt_priv *p = ced_to_sh_cmt(ced);
+
+	pm_genpd_syscore_poweroff(&p->pdev->dev);
+	clk_unprepare(p->clk);
 }
 
 static void sh_cmt_clock_event_resume(struct clock_event_device *ced)
 {
-	pm_genpd_syscore_poweron(&ced_to_sh_cmt(ced)->pdev->dev);
+	struct sh_cmt_priv *p = ced_to_sh_cmt(ced);
+
+	clk_prepare(p->clk);
+	pm_genpd_syscore_poweron(&p->pdev->dev);
 }
 
 static void sh_cmt_register_clockevent(struct sh_cmt_priv *p,
@@ -737,6 +743,10 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 		goto err2;
 	}
 
+	ret = clk_prepare(p->clk);
+	if (ret < 0)
+		goto err3;
+
 	if (res2 && (resource_size(res2) == 4)) {
 		/* assume both CMSTR and CMCSR to be 32-bit */
 		p->read_control = sh_cmt_read32;
@@ -773,19 +783,21 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 			      cfg->clocksource_rating);
 	if (ret) {
 		dev_err(&p->pdev->dev, "registration failed\n");
-		goto err3;
+		goto err4;
 	}
 	p->cs_enabled = false;
 
 	ret = setup_irq(irq, &p->irqaction);
 	if (ret) {
 		dev_err(&p->pdev->dev, "failed to request irq %d\n", irq);
-		goto err3;
+		goto err4;
 	}
 
 	platform_set_drvdata(pdev, p);
 
 	return 0;
+err4:
+	clk_unprepare(p->clk);
 err3:
 	clk_put(p->clk);
 err2:

From f55c07607a38f84b5c7e6066ee1cfe433fa5643c Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Dec 2013 18:50:25 -0800
Subject: [PATCH 37/46] timekeeping: Fix lost updates to tai adjustment

Since 48cdc135d4840 (Implement a shadow timekeeper), we have to
call timekeeping_update() after any adjustment to the timekeeping
structure in order to make sure that any adjustments to the structure
persist.

Unfortunately, the updates to the tai offset via adjtimex do not
trigger this update, causing adjustments to the tai offset to be
made and then over-written by the previous value at the next
update_wall_time() call.

This patch resovles the issue by calling timekeeping_update()
right after setting the tai offset.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable <stable@vger.kernel.org> #3.10+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3abf53418b67..7488f0b97dee 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -610,6 +610,7 @@ void timekeeping_set_tai_offset(s32 tai_offset)
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&timekeeper_seq);
 	__timekeeping_set_tai_offset(tk, tai_offset);
+	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 	clock_was_set();
@@ -1698,7 +1699,7 @@ int do_adjtimex(struct timex *txc)
 
 	if (tai != orig_tai) {
 		__timekeeping_set_tai_offset(tk, tai);
-		update_pvclock_gtod(tk, true);
+		timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 		clock_was_set_delayed();
 	}
 	write_seqcount_end(&timekeeper_seq);

From 5258d3f25c76f6ab86e9333abf97a55a877d3870 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Dec 2013 20:07:49 -0800
Subject: [PATCH 38/46] timekeeping: Fix potential lost pv notification of time
 change

In 780427f0e11 (Indicate that clock was set in the pvclock
gtod notifier), logic was added to pass a CLOCK_WAS_SET
notification to the pvclock notifier chain.

While that patch added a action flag returned from
accumulate_nsecs_to_secs(), it only uses the returned value
in one location, and not in the logarithmic accumulation.

This means if a leap second triggered during the logarithmic
accumulation (which is most likely where it would happen),
the notification that the clock was set would not make it to
the pv notifiers.

This patch extends the logarithmic_accumulation pass down
that action flag so proper notification will occur.

This patch also changes the varialbe action -> clock_set
per Ingo's suggestion.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: <xen-devel@lists.xen.org>
Cc: stable <stable@vger.kernel.org> #3.11+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7488f0b97dee..051855fe68bc 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1256,7 +1256,7 @@ out_adjust:
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
 	u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
-	unsigned int action = 0;
+	unsigned int clock_set = 0;
 
 	while (tk->xtime_nsec >= nsecps) {
 		int leap;
@@ -1279,10 +1279,10 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
 
 			clock_was_set_delayed();
-			action = TK_CLOCK_WAS_SET;
+			clock_set = TK_CLOCK_WAS_SET;
 		}
 	}
-	return action;
+	return clock_set;
 }
 
 /**
@@ -1295,7 +1295,8 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
  * Returns the unconsumed cycles.
  */
 static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
-						u32 shift)
+						u32 shift,
+						unsigned int *clock_set)
 {
 	cycle_t interval = tk->cycle_interval << shift;
 	u64 raw_nsecs;
@@ -1309,7 +1310,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	tk->cycle_last += interval;
 
 	tk->xtime_nsec += tk->xtime_interval << shift;
-	accumulate_nsecs_to_secs(tk);
+	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
 	raw_nsecs = (u64)tk->raw_interval << shift;
@@ -1367,7 +1368,7 @@ static void update_wall_time(void)
 	struct timekeeper *tk = &shadow_timekeeper;
 	cycle_t offset;
 	int shift = 0, maxshift;
-	unsigned int action;
+	unsigned int clock_set = 0;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1402,7 +1403,8 @@ static void update_wall_time(void)
 	maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
 	shift = min(shift, maxshift);
 	while (offset >= tk->cycle_interval) {
-		offset = logarithmic_accumulation(tk, offset, shift);
+		offset = logarithmic_accumulation(tk, offset, shift,
+							&clock_set);
 		if (offset < tk->cycle_interval<<shift)
 			shift--;
 	}
@@ -1420,7 +1422,7 @@ static void update_wall_time(void)
 	 * Finally, make sure that after the rounding
 	 * xtime_nsec isn't larger than NSEC_PER_SEC
 	 */
-	action = accumulate_nsecs_to_secs(tk);
+	clock_set |= accumulate_nsecs_to_secs(tk);
 
 	write_seqcount_begin(&timekeeper_seq);
 	/* Update clock->cycle_last with the new value */
@@ -1436,7 +1438,7 @@ static void update_wall_time(void)
 	 * updating.
 	 */
 	memcpy(real_tk, tk, sizeof(*tk));
-	timekeeping_update(real_tk, action);
+	timekeeping_update(real_tk, clock_set);
 	write_seqcount_end(&timekeeper_seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

From 6fdda9a9c5db367130cf32df5d6618d08b89f46a Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 10 Dec 2013 17:18:18 -0800
Subject: [PATCH 39/46] timekeeping: Avoid possible deadlock from
 clock_was_set_delayed

As part of normal operaions, the hrtimer subsystem frequently calls
into the timekeeping code, creating a locking order of
  hrtimer locks -> timekeeping locks

clock_was_set_delayed() was suppoed to allow us to avoid deadlocks
between the timekeeping the hrtimer subsystem, so that we could
notify the hrtimer subsytem the time had changed while holding
the timekeeping locks. This was done by scheduling delayed work
that would run later once we were out of the timekeeing code.

But unfortunately the lock chains are complex enoguh that in
scheduling delayed work, we end up eventually trying to grab
an hrtimer lock.

Sasha Levin noticed this in testing when the new seqlock lockdep
enablement triggered the following (somewhat abrieviated) message:

[  251.100221] ======================================================
[  251.100221] [ INFO: possible circular locking dependency detected ]
[  251.100221] 3.13.0-rc2-next-20131206-sasha-00005-g8be2375-dirty #4053 Not tainted
[  251.101967] -------------------------------------------------------
[  251.101967] kworker/10:1/4506 is trying to acquire lock:
[  251.101967]  (timekeeper_seq){----..}, at: [<ffffffff81160e96>] retrigger_next_event+0x56/0x70
[  251.101967]
[  251.101967] but task is already holding lock:
[  251.101967]  (hrtimer_bases.lock#11){-.-...}, at: [<ffffffff81160e7c>] retrigger_next_event+0x3c/0x70
[  251.101967]
[  251.101967] which lock already depends on the new lock.
[  251.101967]
[  251.101967]
[  251.101967] the existing dependency chain (in reverse order) is:
[  251.101967]
-> #5 (hrtimer_bases.lock#11){-.-...}:
[snipped]
-> #4 (&rt_b->rt_runtime_lock){-.-...}:
[snipped]
-> #3 (&rq->lock){-.-.-.}:
[snipped]
-> #2 (&p->pi_lock){-.-.-.}:
[snipped]
-> #1 (&(&pool->lock)->rlock){-.-...}:
[  251.101967]        [<ffffffff81194803>] validate_chain+0x6c3/0x7b0
[  251.101967]        [<ffffffff81194d9d>] __lock_acquire+0x4ad/0x580
[  251.101967]        [<ffffffff81194ff2>] lock_acquire+0x182/0x1d0
[  251.101967]        [<ffffffff84398500>] _raw_spin_lock+0x40/0x80
[  251.101967]        [<ffffffff81153e69>] __queue_work+0x1a9/0x3f0
[  251.101967]        [<ffffffff81154168>] queue_work_on+0x98/0x120
[  251.101967]        [<ffffffff81161351>] clock_was_set_delayed+0x21/0x30
[  251.101967]        [<ffffffff811c4bd1>] do_adjtimex+0x111/0x160
[  251.101967]        [<ffffffff811e2711>] compat_sys_adjtimex+0x41/0x70
[  251.101967]        [<ffffffff843a4b49>] ia32_sysret+0x0/0x5
[  251.101967]
-> #0 (timekeeper_seq){----..}:
[snipped]
[  251.101967] other info that might help us debug this:
[  251.101967]
[  251.101967] Chain exists of:
  timekeeper_seq --> &rt_b->rt_runtime_lock --> hrtimer_bases.lock#11

[  251.101967]  Possible unsafe locking scenario:
[  251.101967]
[  251.101967]        CPU0                    CPU1
[  251.101967]        ----                    ----
[  251.101967]   lock(hrtimer_bases.lock#11);
[  251.101967]                                lock(&rt_b->rt_runtime_lock);
[  251.101967]                                lock(hrtimer_bases.lock#11);
[  251.101967]   lock(timekeeper_seq);
[  251.101967]
[  251.101967]  *** DEADLOCK ***
[  251.101967]
[  251.101967] 3 locks held by kworker/10:1/4506:
[  251.101967]  #0:  (events){.+.+.+}, at: [<ffffffff81154960>] process_one_work+0x200/0x530
[  251.101967]  #1:  (hrtimer_work){+.+...}, at: [<ffffffff81154960>] process_one_work+0x200/0x530
[  251.101967]  #2:  (hrtimer_bases.lock#11){-.-...}, at: [<ffffffff81160e7c>] retrigger_next_event+0x3c/0x70
[  251.101967]
[  251.101967] stack backtrace:
[  251.101967] CPU: 10 PID: 4506 Comm: kworker/10:1 Not tainted 3.13.0-rc2-next-20131206-sasha-00005-g8be2375-dirty #4053
[  251.101967] Workqueue: events clock_was_set_work

So the best solution is to avoid calling clock_was_set_delayed() while
holding the timekeeping lock, and instead using a flag variable to
decide if we should call clock_was_set() once we've released the locks.

This works for the case here, where the do_adjtimex() was the deadlock
trigger point. Unfortuantely, in update_wall_time() we still hold
the jiffies lock, which would deadlock with the ipi triggered by
clock_was_set(), preventing us from calling it even after we drop the
timekeeping lock. So instead call clock_was_set_delayed() at that point.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: stable <stable@vger.kernel.org> #3.10+
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 051855fe68bc..d62682b6df4a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1278,7 +1278,6 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 
 			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
 
-			clock_was_set_delayed();
 			clock_set = TK_CLOCK_WAS_SET;
 		}
 	}
@@ -1442,6 +1441,19 @@ static void update_wall_time(void)
 	write_seqcount_end(&timekeeper_seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+	if (clock_was_set) {
+		/*
+		 * XXX -  I'd rather we just call clock_was_set(), but
+		 * since we're currently holding the jiffies lock, calling
+		 * clock_was_set would trigger an ipi which would then grab
+		 * the jiffies lock and we'd deadlock. :(
+		 * The right solution should probably be droping
+		 * the jiffies lock before calling update_wall_time
+		 * but that requires some rework of the tick sched
+		 * code.
+		 */
+		clock_was_set_delayed();
+	}
 }
 
 /**
@@ -1702,11 +1714,13 @@ int do_adjtimex(struct timex *txc)
 	if (tai != orig_tai) {
 		__timekeeping_set_tai_offset(tk, tai);
 		timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-		clock_was_set_delayed();
 	}
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
+	if (tai != orig_tai)
+		clock_was_set();
+
 	ntp_notify_cmos_timer();
 
 	return ret;

From 47a1b796306356f358e515149d86baf0cc6bf007 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 12 Dec 2013 13:10:55 -0800
Subject: [PATCH 40/46] tick/timekeeping: Call update_wall_time outside the
 jiffies lock

Since the xtime lock was split into the timekeeping lock and
the jiffies lock, we no longer need to call update_wall_time()
while holding the jiffies lock.

Thus, this patch splits update_wall_time() out from do_timer().

This allows us to get away from calling clock_was_set_delayed()
in update_wall_time() and instead use the standard clock_was_set()
call that previously would deadlock, as it causes the jiffies lock
to be acquired.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/tick-common.c   |  1 +
 kernel/time/tick-internal.h |  1 +
 kernel/time/tick-sched.c    |  1 +
 kernel/time/timekeeping.c   | 19 ++++---------------
 4 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 64522ecdfe0e..91c5f27e82a3 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -70,6 +70,7 @@ static void tick_periodic(int cpu)
 
 		do_timer(1);
 		write_sequnlock(&jiffies_lock);
+		update_wall_time();
 	}
 
 	update_process_times(user_mode(get_irq_regs()));
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index e2bced59b6dd..8329669b51ec 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -155,3 +155,4 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
 #endif
 
 extern void do_timer(unsigned long ticks);
+extern void update_wall_time(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2afd43fca93b..c58b03d89951 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -86,6 +86,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 		tick_next_period = ktime_add(last_jiffies_update, tick_period);
 	}
 	write_sequnlock(&jiffies_lock);
+	update_wall_time();
 }
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d62682b6df4a..44b7e6bb081b 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1360,7 +1360,7 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
  */
-static void update_wall_time(void)
+void update_wall_time(void)
 {
 	struct clocksource *clock;
 	struct timekeeper *real_tk = &timekeeper;
@@ -1441,19 +1441,8 @@ static void update_wall_time(void)
 	write_seqcount_end(&timekeeper_seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
-	if (clock_was_set) {
-		/*
-		 * XXX -  I'd rather we just call clock_was_set(), but
-		 * since we're currently holding the jiffies lock, calling
-		 * clock_was_set would trigger an ipi which would then grab
-		 * the jiffies lock and we'd deadlock. :(
-		 * The right solution should probably be droping
-		 * the jiffies lock before calling update_wall_time
-		 * but that requires some rework of the tick sched
-		 * code.
-		 */
-		clock_was_set_delayed();
-	}
+	if (clock_set)
+		clock_was_set();
 }
 
 /**
@@ -1598,7 +1587,6 @@ struct timespec get_monotonic_coarse(void)
 void do_timer(unsigned long ticks)
 {
 	jiffies_64 += ticks;
-	update_wall_time();
 	calc_global_load(ticks);
 }
 
@@ -1756,4 +1744,5 @@ void xtime_update(unsigned long ticks)
 	write_seqlock(&jiffies_lock);
 	do_timer(ticks);
 	write_sequnlock(&jiffies_lock);
+	update_wall_time();
 }

From 04005f6011e3b504cd4d791d9769f7cb9a3b2eae Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 10 Dec 2013 17:13:35 -0800
Subject: [PATCH 41/46] timekeeping: Fix CLOCK_TAI timer/nanosleep delays

A think-o in the calculation of the monotonic -> tai time offset
results in CLOCK_TAI timers and nanosleeps to expire late (the
latency is ~2x the tai offset).

Fix this by adding the tai offset from the realtime offset instead
of subtracting.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable <stable@vger.kernel.org> #3.10+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 44b7e6bb081b..3f6a827680fa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -77,7 +77,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
 	tk->wall_to_monotonic = wtm;
 	set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
 	tk->offs_real = timespec_to_ktime(tmp);
-	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0));
+	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
 
 static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
@@ -595,7 +595,7 @@ s32 timekeeping_get_tai_offset(void)
 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
 {
 	tk->tai_offset = tai_offset;
-	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
+	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
 }
 
 /**

From 330a1617b0a6268d427aa5922c94d082b1d3e96d Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Dec 2013 19:10:36 -0800
Subject: [PATCH 42/46] timekeeping: Fix missing timekeeping_update in suspend
 path

Since 48cdc135d4840 (Implement a shadow timekeeper), we have to
call timekeeping_update() after any adjustment to the timekeeping
structure in order to make sure that any adjustments to the structure
persist.

In the timekeeping suspend path, we udpate the timekeeper
structure, so we should be sure to update the shadow-timekeeper
before releasing the timekeeping locks. Currently this isn't done.

In most cases, the next time related code to run would be
timekeeping_resume, which does update the shadow-timekeeper, but
in an abundence of caution, this patch adds the call to
timekeeping_update() in the suspend path.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable <stable@vger.kernel.org> #3.10+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3f6a827680fa..2793c4382529 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1024,6 +1024,8 @@ static int timekeeping_suspend(void)
 		timekeeping_suspend_time =
 			timespec_add(timekeeping_suspend_time, delta_delta);
 	}
+
+	timekeeping_update(tk, TK_MIRROR);
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 

From d26e4fe0dbe95778b9dbe80b6aa884d71fb6f459 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Thu, 28 Nov 2013 16:28:55 +0800
Subject: [PATCH 43/46] timekeeper: fix comment typo for tk_setup_internals()

Fix trivial comment typo for tk_setup_internals().

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2793c4382529..3ff30640fc9d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -90,8 +90,9 @@ static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
 }
 
 /**
- * timekeeper_setup_internals - Set up internals to use clocksource clock.
+ * tk_setup_internals - Set up internals to use clocksource clock.
  *
+ * @tk:		The target timekeeper to setup.
  * @clock:		Pointer to clocksource.
  *
  * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment

From d5a1c7e3fc38d9c7d629e1e47f32f863acbdec3d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@alien8.de>
Date: Sat, 20 Jul 2013 19:00:23 +0200
Subject: [PATCH 44/46] rtc-cmos: Add an alarm disable quirk

41c7f7424259f ("rtc: Disable the alarm in the hardware (v2)") added the
functionality to disable the RTC wake alarm when shutting down the box.

However, there are at least two b0rked BIOSes we know about:

https://bugzilla.novell.com/show_bug.cgi?id=812592
https://bugzilla.novell.com/show_bug.cgi?id=805740

where, when wakeup alarm is enabled in the BIOS, the machine reboots
automatically right after shutdown, regardless of what wakeup time is
programmed.

Bisecting the issue lead to this patch so disable its functionality with
a DMI quirk only for those boxes.

Cc: Brecht Machiels <brecht@mos6581.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Rabin Vincent <rabin.vincent@stericsson.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
[jstultz: Changed variable name for clarity, added extra dmi entry]
Tested-by: Brecht Machiels <brecht@mos6581.org>
Tested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/rtc-cmos.c | 52 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index f14876256a4a..a2325bc5e497 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -34,11 +34,11 @@
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
-#include <linux/mod_devicetable.h>
 #include <linux/log2.h>
 #include <linux/pm.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/dmi.h>
 
 /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
 #include <asm-generic/rtc.h>
@@ -377,6 +377,51 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	return 0;
 }
 
+/*
+ * Do not disable RTC alarm on shutdown - workaround for b0rked BIOSes.
+ */
+static bool alarm_disable_quirk;
+
+static int __init set_alarm_disable_quirk(const struct dmi_system_id *id)
+{
+	alarm_disable_quirk = true;
+	pr_info("rtc-cmos: BIOS has alarm-disable quirk. ");
+	pr_info("RTC alarms disabled\n");
+	return 0;
+}
+
+static const struct dmi_system_id rtc_quirks[] __initconst = {
+	/* https://bugzilla.novell.com/show_bug.cgi?id=805740 */
+	{
+		.callback = set_alarm_disable_quirk,
+		.ident    = "IBM Truman",
+		.matches  = {
+			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "4852570"),
+		},
+	},
+	/* https://bugzilla.novell.com/show_bug.cgi?id=812592 */
+	{
+		.callback = set_alarm_disable_quirk,
+		.ident    = "Gigabyte GA-990XA-UD3",
+		.matches  = {
+			DMI_MATCH(DMI_SYS_VENDOR,
+					"Gigabyte Technology Co., Ltd."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "GA-990XA-UD3"),
+		},
+	},
+	/* http://permalink.gmane.org/gmane.linux.kernel/1604474 */
+	{
+		.callback = set_alarm_disable_quirk,
+		.ident    = "Toshiba Satellite L300",
+		.matches  = {
+			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Satellite L300"),
+		},
+	},
+	{}
+};
+
 static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
@@ -385,6 +430,9 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
 	if (!is_valid_irq(cmos->irq))
 		return -EINVAL;
 
+	if (alarm_disable_quirk)
+		return 0;
+
 	spin_lock_irqsave(&rtc_lock, flags);
 
 	if (enabled)
@@ -1157,6 +1205,8 @@ static int __init cmos_init(void)
 			platform_driver_registered = true;
 	}
 
+	dmi_check_system(rtc_quirks);
+
 	if (retval == 0)
 		return 0;
 

From 38aef31ce7773624c8f09ff58c4c27b3b955faaf Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 23 Dec 2013 12:53:22 -0800
Subject: [PATCH 45/46] timekeeping: Remove comment that's mostly out of date

Prior to 92bb1fcf57a0c2e45f7e67fbf0a8ed475a749236 (Only
do nanosecond rounding on GENERIC_TIME_VSYSCALL_OLD
systems), the comment here was accuate, but now we can
mostly avoid the extra rounding which causes the unlikey
to be actually likely here.

So remove the out of date comment.

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3ff30640fc9d..abfa4e86ac54 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1134,16 +1134,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 		 * we can adjust by 1.
 		 */
 		error >>= 2;
-		/*
-		 * XXX - In update_wall_time, we round up to the next
-		 * nanosecond, and store the amount rounded up into
-		 * the error. This causes the likely below to be unlikely.
-		 *
-		 * The proper fix is to avoid rounding up by using
-		 * the high precision tk->xtime_nsec instead of
-		 * xtime.tv_nsec everywhere. Fixing this will take some
-		 * time.
-		 */
 		if (likely(error <= interval))
 			adj = 1;
 		else

From 00e2bcd6d35f59fce7fa0e76e24d08f74c6a8506 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 16 Jan 2014 17:38:06 -0800
Subject: [PATCH 46/46] clocksource: Timer-sun5i: Switch to
 sched_clock_register()

The 32-bit sched_clock() interface supports 64 bits since
3.13-rc1. Upgrade to the 64-bit function to allow us to remove
the 32-bit registration interface.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1389922686-6249-1-git-send-email-sboyd@codeaurora.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/clocksource/timer-sun5i.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index bddc52233d2a..deebcd6469fc 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -136,7 +136,7 @@ static struct irqaction sun5i_timer_irq = {
 	.dev_id = &sun5i_clockevent,
 };
 
-static u32 sun5i_timer_sched_read(void)
+static u64 sun5i_timer_sched_read(void)
 {
 	return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1));
 }
@@ -166,7 +166,7 @@ static void __init sun5i_timer_init(struct device_node *node)
 	writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
 	       timer_base + TIMER_CTL_REG(1));
 
-	setup_sched_clock(sun5i_timer_sched_read, 32, rate);
+	sched_clock_register(sun5i_timer_sched_read, 32, rate);
 	clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name,
 			      rate, 340, 32, clocksource_mmio_readl_down);