From 402d9a1e02f7215628f13b7c80ff3e98c3a0cadc Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 26 Feb 2014 05:37:29 +0530 Subject: [PATCH 1/7] powerpc: Free up the slot of PPC_MSG_CALL_FUNC_SINGLE IPI message The IPI handlers for both PPC_MSG_CALL_FUNC and PPC_MSG_CALL_FUNC_SINGLE map to a common implementation - generic_smp_call_function_single_interrupt(). So, we can consolidate them and save one of the IPI message slots, (which are precious on powerpc, since only 4 of those slots are available). So, implement the functionality of PPC_MSG_CALL_FUNC_SINGLE using PPC_MSG_CALL_FUNC itself and release its IPI message slot, so that it can be used for something else in the future, if desired. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Preeti U. Murthy Acked-by: Geoff Levand [For the PS3 part] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/smp.h | 2 +- arch/powerpc/kernel/smp.c | 12 +++++------- arch/powerpc/platforms/cell/interrupt.c | 2 +- arch/powerpc/platforms/ps3/smp.c | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 084e0807db98..9f7356bf6482 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -120,7 +120,7 @@ extern int cpu_to_core_id(int cpu); * in /proc/interrupts will be wrong!!! --Troy */ #define PPC_MSG_CALL_FUNCTION 0 #define PPC_MSG_RESCHEDULE 1 -#define PPC_MSG_CALL_FUNC_SINGLE 2 +#define PPC_MSG_UNUSED 2 #define PPC_MSG_DEBUGGER_BREAK 3 /* for irq controllers that have dedicated ipis per message (4) */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ac2621af3154..ee7d76bfcb4c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -145,9 +145,9 @@ static irqreturn_t reschedule_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t call_function_single_action(int irq, void *data) +static irqreturn_t unused_action(int irq, void *data) { - generic_smp_call_function_single_interrupt(); + /* This slot is unused and hence available for use, if needed */ return IRQ_HANDLED; } @@ -168,14 +168,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data) static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, - [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action, + [PPC_MSG_UNUSED] = unused_action, [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, }; const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", - [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single", + [PPC_MSG_UNUSED] = "ipi unused", [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", }; @@ -251,8 +251,6 @@ irqreturn_t smp_ipi_demux(void) generic_smp_call_function_interrupt(); if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); - if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE)) - generic_smp_call_function_single_interrupt(); if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) debug_ipi_action(0, NULL); } while (info->messages); @@ -280,7 +278,7 @@ EXPORT_SYMBOL_GPL(smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { - do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); + do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 2d42f3bb66d6..adf372606a1c 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -215,7 +215,7 @@ void iic_request_IPIs(void) { iic_request_ipi(PPC_MSG_CALL_FUNCTION); iic_request_ipi(PPC_MSG_RESCHEDULE); - iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE); + iic_request_ipi(PPC_MSG_UNUSED); iic_request_ipi(PPC_MSG_DEBUGGER_BREAK); } diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 4b35166229fe..00d1a7c2a5a4 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -76,7 +76,7 @@ static int __init ps3_smp_probe(void) BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION != 0); BUILD_BUG_ON(PPC_MSG_RESCHEDULE != 1); - BUILD_BUG_ON(PPC_MSG_CALL_FUNC_SINGLE != 2); + BUILD_BUG_ON(PPC_MSG_UNUSED != 2); BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK != 3); for (i = 0; i < MSG_COUNT; i++) { From 1b67bee129a36c22c17186cc2a9981678e9323ee Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 26 Feb 2014 05:37:43 +0530 Subject: [PATCH 2/7] powerpc: Implement tick broadcast IPI as a fixed IPI message For scalability and performance reasons, we want the tick broadcast IPIs to be handled as efficiently as possible. Fixed IPI messages are one of the most efficient mechanisms available - they are faster than the smp_call_function mechanism because the IPI handlers are fixed and hence they don't involve costly operations such as adding IPI handlers to the target CPU's function queue, acquiring locks for synchronization etc. Luckily we have an unused IPI message slot, so use that to implement tick broadcast IPIs efficiently. Signed-off-by: Srivatsa S. Bhat [Functions renamed to tick_broadcast* and Changelog modified by Preeti U. Murthy] Signed-off-by: Preeti U. Murthy Acked-by: Geoff Levand [For the PS3 part] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/smp.h | 2 +- arch/powerpc/include/asm/time.h | 1 + arch/powerpc/kernel/smp.c | 21 +++++++++++++++++---- arch/powerpc/kernel/time.c | 5 +++++ arch/powerpc/platforms/cell/interrupt.c | 2 +- arch/powerpc/platforms/ps3/smp.c | 2 +- 6 files changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 9f7356bf6482..ff51046b6466 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -120,7 +120,7 @@ extern int cpu_to_core_id(int cpu); * in /proc/interrupts will be wrong!!! --Troy */ #define PPC_MSG_CALL_FUNCTION 0 #define PPC_MSG_RESCHEDULE 1 -#define PPC_MSG_UNUSED 2 +#define PPC_MSG_TICK_BROADCAST 2 #define PPC_MSG_DEBUGGER_BREAK 3 /* for irq controllers that have dedicated ipis per message (4) */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index c1f267694acb..1d428e6007ca 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -28,6 +28,7 @@ extern struct clock_event_device decrementer_clockevent; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); extern void GregorianDay(struct rtc_time *tm); +extern void tick_broadcast_ipi_handler(void); extern void generic_calibrate_decr(void); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ee7d76bfcb4c..e2a4232c5871 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -145,9 +146,9 @@ static irqreturn_t reschedule_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t unused_action(int irq, void *data) +static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) { - /* This slot is unused and hence available for use, if needed */ + tick_broadcast_ipi_handler(); return IRQ_HANDLED; } @@ -168,14 +169,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data) static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, - [PPC_MSG_UNUSED] = unused_action, + [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, }; const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", - [PPC_MSG_UNUSED] = "ipi unused", + [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", }; @@ -251,6 +252,8 @@ irqreturn_t smp_ipi_demux(void) generic_smp_call_function_interrupt(); if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); + if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) + tick_broadcast_ipi_handler(); if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) debug_ipi_action(0, NULL); } while (info->messages); @@ -289,6 +292,16 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) + do_message_pass(cpu, PPC_MSG_TICK_BROADCAST); +} +#endif + #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) void smp_send_debugger_break(void) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b3dab20acf34..3ff97dbb35be 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -825,6 +825,11 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } +/* Interrupt handler for the timer broadcast IPI */ +void tick_broadcast_ipi_handler(void) +{ +} + static void register_decrementer_clockevent(int cpu) { struct clock_event_device *dec = &per_cpu(decrementers, cpu); diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index adf372606a1c..8a106b4172e0 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -215,7 +215,7 @@ void iic_request_IPIs(void) { iic_request_ipi(PPC_MSG_CALL_FUNCTION); iic_request_ipi(PPC_MSG_RESCHEDULE); - iic_request_ipi(PPC_MSG_UNUSED); + iic_request_ipi(PPC_MSG_TICK_BROADCAST); iic_request_ipi(PPC_MSG_DEBUGGER_BREAK); } diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 00d1a7c2a5a4..b358bec6c8cb 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -76,7 +76,7 @@ static int __init ps3_smp_probe(void) BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION != 0); BUILD_BUG_ON(PPC_MSG_RESCHEDULE != 1); - BUILD_BUG_ON(PPC_MSG_UNUSED != 2); + BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST != 2); BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK != 3); for (i = 0; i < MSG_COUNT; i++) { From 1b7839559b3f1c7a09ff94904788a732063ce2de Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Wed, 26 Feb 2014 05:38:01 +0530 Subject: [PATCH 3/7] powerpc: Split timer_interrupt() into timer handling and interrupt handling routines Split timer_interrupt(), which is the local timer interrupt handler on ppc into routines called during regular interrupt handling and __timer_interrupt(), which takes care of running local timers and collecting time related stats. This will enable callers interested only in running expired local timers to directly call into __timer_interupt(). One of the use cases of this is the tick broadcast IPI handling in which the sleeping CPUs need to handle the local timers that have expired. Signed-off-by: Preeti U Murthy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 83 +++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 3ff97dbb35be..df2989b0d4c0 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -478,47 +478,13 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -/* - * timer_interrupt - gets called when the decrementer overflows, - * with interrupts disabled. - */ -void timer_interrupt(struct pt_regs * regs) +void __timer_interrupt(void) { - struct pt_regs *old_regs; + struct pt_regs *regs = get_irq_regs(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); struct clock_event_device *evt = &__get_cpu_var(decrementers); u64 now; - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continue to take decrementer exceptions. - */ - set_dec(DECREMENTER_MAX); - - /* Some implementations of hotplug will get timer interrupts while - * offline, just ignore these and we also need to set - * decrementers_next_tb as MAX to make sure __check_irq_replay - * don't replay timer interrupt when return, otherwise we'll trap - * here infinitely :( - */ - if (!cpu_online(smp_processor_id())) { - *next_tb = ~(u64)0; - return; - } - - /* Conditionally hard-enable interrupts now that the DEC has been - * bumped to its maximum value - */ - may_hard_irq_enable(); - - -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) - if (atomic_read(&ppc_n_lost_interrupts) != 0) - do_IRQ(regs); -#endif - - old_regs = set_irq_regs(regs); - irq_enter(); - trace_timer_interrupt_entry(regs); if (test_irq_work_pending()) { @@ -551,7 +517,48 @@ void timer_interrupt(struct pt_regs * regs) #endif trace_timer_interrupt_exit(regs); +} +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +void timer_interrupt(struct pt_regs * regs) +{ + struct pt_regs *old_regs; + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continue to take decrementer exceptions. + */ + set_dec(DECREMENTER_MAX); + + /* Some implementations of hotplug will get timer interrupts while + * offline, just ignore these and we also need to set + * decrementers_next_tb as MAX to make sure __check_irq_replay + * don't replay timer interrupt when return, otherwise we'll trap + * here infinitely :( + */ + if (!cpu_online(smp_processor_id())) { + *next_tb = ~(u64)0; + return; + } + + /* Conditionally hard-enable interrupts now that the DEC has been + * bumped to its maximum value + */ + may_hard_irq_enable(); + + +#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) + if (atomic_read(&ppc_n_lost_interrupts) != 0) + do_IRQ(regs); +#endif + + old_regs = set_irq_regs(regs); + irq_enter(); + + __timer_interrupt(); irq_exit(); set_irq_regs(old_regs); } @@ -828,6 +835,10 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + *next_tb = get_tb_or_rtc(); + __timer_interrupt(); } static void register_decrementer_clockevent(int cpu) From aca79d2b6ec2c0b955b22abb71c6dab90fa1d4d5 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Wed, 26 Feb 2014 05:38:25 +0530 Subject: [PATCH 4/7] powerpc/powernv: Add context management for Fast Sleep Before adding Fast-Sleep into the cpuidle framework, some low level support needs to be added to enable it. This includes saving and restoring of certain registers at entry and exit time of this state respectively just like we do in the NAP idle state. Signed-off-by: Vaidyanathan Srinivasan [Changelog modified by Preeti U. Murthy ] Signed-off-by: Preeti U. Murthy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 10 ++++- arch/powerpc/kernel/idle_power7.S | 63 +++++++++++++++++++--------- 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index b62de43ae5f3..d660dc36831a 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -450,6 +450,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_nap(void); +extern void power7_sleep(void); extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 38d507306a11..b01a9cb441e4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -121,9 +121,10 @@ BEGIN_FTR_SECTION cmpwi cr1,r13,2 /* Total loss of HV state is fatal, we could try to use the * PIR to locate a PACA, then use an emergency stack etc... - * but for now, let's just stay stuck here + * OPAL v3 based powernv platforms have new idle states + * which fall in this catagory. */ - bgt cr1,. + bgt cr1,8f GET_PACA(r13) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -141,6 +142,11 @@ BEGIN_FTR_SECTION beq cr1,2f b .power7_wakeup_noloss 2: b .power7_wakeup_loss + + /* Fast Sleep wakeup on PowerNV */ +8: GET_PACA(r13) + b .power7_wakeup_loss + 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 3fdef0f0c67f..14f78bec62c4 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -20,17 +20,27 @@ #undef DEBUG +/* Idle state entry routines */ + +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . + .text -_GLOBAL(power7_idle) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDRBASE(r3,powersave_nap) - lwz r4,ADDROFF(powersave_nap)(r3) - cmpwi 0,r4,0 - beqlr - /* fall through */ - -_GLOBAL(power7_nap) +/* + * Pass requested state in r3: + * 0 - nap + * 1 - sleep + */ +_GLOBAL(power7_powersave_common) + /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and * stick a pointer to it in PACAR1. We really only @@ -79,8 +89,8 @@ _GLOBAL(power7_nap) /* Continue saving state */ SAVE_GPR(2, r1) SAVE_NVGPRS(r1) - mfcr r3 - std r3,_CCR(r1) + mfcr r4 + std r4,_CCR(r1) std r9,_MSR(r1) std r1,PACAR1(r13) @@ -90,15 +100,30 @@ _GLOBAL(power7_enter_nap_mode) li r4,KVM_HWTHREAD_IN_NAP stb r4,HSTATE_HWTHREAD_STATE(r13) #endif + cmpwi cr0,r3,1 + beq 2f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + /* No return */ - /* Magic NAP mode enter sequence */ - std r0,0(r1) - ptesync - ld r0,0(r1) -1: cmp cr0,r0,r0 - bne 1b - PPC_NAP - b . +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + /* fall through */ + +_GLOBAL(power7_nap) + li r3,0 + b power7_powersave_common + /* No return */ + +_GLOBAL(power7_sleep) + li r3,1 + b power7_powersave_common + /* No return */ _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) From 97eb001f03494758a938300908b88929163650ce Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Wed, 26 Feb 2014 05:38:43 +0530 Subject: [PATCH 5/7] powerpc/powernv: Add OPAL call to resync timebase on wakeup During "Fast-sleep" and deeper power savings state, decrementer and timebase could be stopped making it out of sync with rest of the cores in the system. Add a firmware call to request platform to resync timebase using low level platform methods. Signed-off-by: Vaidyanathan Srinivasan Signed-off-by: Preeti U. Murthy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/idle_power7.S | 27 +++++++++++++++++++ .../powerpc/platforms/powernv/opal-wrappers.S | 1 + 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 40157e2ca691..c71c72e47d47 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -154,6 +154,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_FLASH_VALIDATE 76 #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 +#define OPAL_RESYNC_TIMEBASE 79 #define OPAL_GET_MSG 85 #define OPAL_CHECK_ASYNC_COMPLETION 86 #define OPAL_SYNC_HOST_REBOOT 87 @@ -865,6 +866,7 @@ extern void opal_flash_init(void); extern int opal_machine_check(struct pt_regs *regs); extern void opal_shutdown(void); +extern int opal_resync_timebase(void); extern void opal_lpc_init(void); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b01a9cb441e4..9533d7a9223c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -145,7 +145,7 @@ BEGIN_FTR_SECTION /* Fast Sleep wakeup on PowerNV */ 8: GET_PACA(r13) - b .power7_wakeup_loss + b .power7_wakeup_tb_loss 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 14f78bec62c4..c3ab86975614 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -17,6 +17,7 @@ #include #include #include +#include #undef DEBUG @@ -125,6 +126,32 @@ _GLOBAL(power7_sleep) b power7_powersave_common /* No return */ +_GLOBAL(power7_wakeup_tb_loss) + ld r2,PACATOC(r13); + ld r1,PACAR1(r13) + + /* Time base re-sync */ + li r0,OPAL_RESYNC_TIMEBASE + LOAD_REG_ADDR(r11,opal); + ld r12,8(r11); + ld r2,0(r11); + mtctr r12 + bctrl + + /* TODO: Check r3 for failure */ + + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r3,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r3 + mfspr r3,SPRN_SRR1 /* Return SRR1 */ + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) REST_NVGPRS(r1) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3e8829c40fbb..aab54b60334f 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -126,6 +126,7 @@ OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); +OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); OPAL_CALL(opal_get_msg, OPAL_GET_MSG); OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); From 0d94873011a10cea78d1bc0ba8cfc4203559d534 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Wed, 26 Feb 2014 05:39:06 +0530 Subject: [PATCH 6/7] cpuidle/powernv: Add "Fast-Sleep" CPU idle state Fast sleep is one of the deep idle states on Power8 in which local timers of CPUs stop. On PowerPC we do not have an external clock device which can handle wakeup of such CPUs. Now that we have the support in the tick broadcast framework for archs that do not sport such a device and the low level support for fast sleep, enable it in the cpuidle framework on PowerNV. Signed-off-by: Preeti U Murthy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 2 ++ arch/powerpc/kernel/time.c | 4 +++- drivers/cpuidle/cpuidle-powernv.c | 34 +++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 957bf344c0f5..b84142000a4d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -130,6 +130,8 @@ config PPC select GENERIC_CMOS_UPDATE select GENERIC_TIME_VSYSCALL_OLD select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_MOD_ARCH_SPECIFIC diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index df2989b0d4c0..122a580f7322 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -106,7 +107,7 @@ struct clock_event_device decrementer_clockevent = { .irq = 0, .set_next_event = decrementer_set_next_event, .set_mode = decrementer_set_mode, - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, }; EXPORT_SYMBOL(decrementer_clockevent); @@ -944,6 +945,7 @@ void __init time_init(void) clocksource_init(); init_decrementer_clockevent(); + tick_setup_hrtimer_broadcast(); } diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 78fd174c57e8..4fb97cef82fd 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,32 @@ static int nap_loop(struct cpuidle_device *dev, return index; } +static int fastsleep_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long old_lpcr = mfspr(SPRN_LPCR); + unsigned long new_lpcr; + + if (unlikely(system_state < SYSTEM_RUNNING)) + return index; + + new_lpcr = old_lpcr; + new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */ + + /* exit powersave upon external interrupt, but not decrementer + * interrupt. + */ + new_lpcr |= LPCR_PECE0; + + mtspr(SPRN_LPCR, new_lpcr); + power7_sleep(); + + mtspr(SPRN_LPCR, old_lpcr); + + return index; +} + /* * States for dedicated partition case. */ @@ -67,6 +94,13 @@ static struct cpuidle_state powernv_states[] = { .exit_latency = 10, .target_residency = 100, .enter = &nap_loop }, + { /* Fastsleep */ + .name = "fastsleep", + .desc = "fastsleep", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 100, + .enter = &fastsleep_loop }, }; static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n, From 0888839c5b62c44a55ac9d28acc273ba663c65ea Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Wed, 26 Feb 2014 05:39:20 +0530 Subject: [PATCH 7/7] cpuidle/powernv: Parse device tree to setup idle states Add deep idle states such as nap and fast sleep to the cpuidle state table only if they are discovered from the device tree during cpuidle initialization. Signed-off-by: Preeti U Murthy Signed-off-by: Benjamin Herrenschmidt --- drivers/cpuidle/cpuidle-powernv.c | 82 ++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 17 deletions(-) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 4fb97cef82fd..fdae1c476e27 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -12,10 +12,17 @@ #include #include #include +#include #include #include +/* Flags and constants used in PowerNV platform */ + +#define MAX_POWERNV_IDLE_STATES 8 +#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */ +#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */ + struct cpuidle_driver powernv_idle_driver = { .name = "powernv_idle", .owner = THIS_MODULE, @@ -79,7 +86,7 @@ static int fastsleep_loop(struct cpuidle_device *dev, /* * States for dedicated partition case. */ -static struct cpuidle_state powernv_states[] = { +static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = { { /* Snooze */ .name = "snooze", .desc = "snooze", @@ -87,20 +94,6 @@ static struct cpuidle_state powernv_states[] = { .exit_latency = 0, .target_residency = 0, .enter = &snooze_loop }, - { /* NAP */ - .name = "NAP", - .desc = "NAP", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 10, - .target_residency = 100, - .enter = &nap_loop }, - { /* Fastsleep */ - .name = "fastsleep", - .desc = "fastsleep", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 10, - .target_residency = 100, - .enter = &fastsleep_loop }, }; static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n, @@ -161,19 +154,74 @@ static int powernv_cpuidle_driver_init(void) return 0; } +static int powernv_add_idle_states(void) +{ + struct device_node *power_mgt; + struct property *prop; + int nr_idle_states = 1; /* Snooze */ + int dt_idle_states; + u32 *flags; + int i; + + /* Currently we have snooze statically defined */ + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + return nr_idle_states; + } + + prop = of_find_property(power_mgt, "ibm,cpu-idle-state-flags", NULL); + if (!prop) { + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); + return nr_idle_states; + } + + dt_idle_states = prop->length / sizeof(u32); + flags = (u32 *) prop->value; + + for (i = 0; i < dt_idle_states; i++) { + + if (flags[i] & IDLE_USE_INST_NAP) { + /* Add NAP state */ + strcpy(powernv_states[nr_idle_states].name, "Nap"); + strcpy(powernv_states[nr_idle_states].desc, "Nap"); + powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID; + powernv_states[nr_idle_states].exit_latency = 10; + powernv_states[nr_idle_states].target_residency = 100; + powernv_states[nr_idle_states].enter = &nap_loop; + nr_idle_states++; + } + + if (flags[i] & IDLE_USE_INST_SLEEP) { + /* Add FASTSLEEP state */ + strcpy(powernv_states[nr_idle_states].name, "FastSleep"); + strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); + powernv_states[nr_idle_states].flags = + CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP; + powernv_states[nr_idle_states].exit_latency = 300; + powernv_states[nr_idle_states].target_residency = 1000000; + powernv_states[nr_idle_states].enter = &fastsleep_loop; + nr_idle_states++; + } + } + + return nr_idle_states; +} + /* * powernv_idle_probe() * Choose state table for shared versus dedicated partition */ static int powernv_idle_probe(void) { - if (cpuidle_disable != IDLE_NO_OVERRIDE) return -ENODEV; if (firmware_has_feature(FW_FEATURE_OPALv3)) { cpuidle_state_table = powernv_states; - max_idle_state = ARRAY_SIZE(powernv_states); + /* Device tree can indicate more idle states */ + max_idle_state = powernv_add_idle_states(); } else return -ENODEV;