2012-08-16 15:41:40 +04:00
/*
* linux / drivers / thermal / cpu_cooling . c
*
* Copyright ( C ) 2012 Samsung Electronics Co . , Ltd ( http : //www.samsung.com)
* Copyright ( C ) 2012 Amit Daniel < amit . kachhap @ linaro . org >
*
2014-12-04 07:12:08 +03:00
* Copyright ( C ) 2014 Viresh Kumar < viresh . kumar @ linaro . org >
*
2012-08-16 15:41:40 +04:00
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; version 2 of the License .
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public License along
* with this program ; if not , write to the Free Software Foundation , Inc . ,
* 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA .
*
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
*/
# include <linux/module.h>
# include <linux/thermal.h>
# include <linux/cpufreq.h>
# include <linux/err.h>
2015-02-26 22:00:29 +03:00
# include <linux/pm_opp.h>
2012-08-16 15:41:40 +04:00
# include <linux/slab.h>
# include <linux/cpu.h>
# include <linux/cpu_cooling.h>
2015-03-02 20:17:20 +03:00
# include <trace/events/thermal.h>
2014-12-04 07:11:49 +03:00
/*
* Cooling state < - > CPUFreq frequency
*
* Cooling states are translated to frequencies throughout this driver and this
* is the relation between them .
*
* Highest cooling state corresponds to lowest possible frequency .
*
* i . e .
* level 0 - - > 1 st Max Freq
* level 1 - - > 2 nd Max Freq
* . . .
*/
2015-02-26 22:00:29 +03:00
/**
* struct power_table - frequency to power conversion
* @ frequency : frequency in KHz
* @ power : power in mW
*
* This structure is built when the cooling device registers and helps
* in translating frequency to power and viceversa .
*/
struct power_table {
u32 frequency ;
u32 power ;
} ;
2012-08-16 15:41:40 +04:00
/**
2013-04-17 21:11:56 +04:00
* struct cpufreq_cooling_device - data for cooling device with cpufreq
2012-08-16 15:41:40 +04:00
* @ id : unique integer value corresponding to each cpufreq_cooling_device
* registered .
2013-04-17 21:11:56 +04:00
* @ cool_dev : thermal_cooling_device pointer to keep track of the
* registered cooling device .
2012-08-16 15:41:40 +04:00
* @ cpufreq_state : integer value representing the current state of cpufreq
* cooling devices .
2015-07-30 10:10:33 +03:00
* @ clipped_freq : integer value representing the absolute value of the clipped
2012-08-16 15:41:40 +04:00
* frequency .
2014-12-04 07:12:02 +03:00
* @ max_level : maximum cooling level . One less than total number of valid
* cpufreq frequencies .
2012-08-16 15:41:40 +04:00
* @ allowed_cpus : all the cpus involved for this cpufreq_cooling_device .
2014-12-15 19:55:52 +03:00
* @ node : list_head to link all cpufreq_cooling_device together .
2015-02-26 22:00:29 +03:00
* @ last_load : load measured by the latest call to cpufreq_get_actual_power ( )
* @ time_in_idle : previous reading of the absolute time that this cpu was idle
* @ time_in_idle_timestamp : wall time of the last invocation of
* get_cpu_idle_time_us ( )
* @ dyn_power_table : array of struct power_table for frequency to power
* conversion , sorted in ascending order .
* @ dyn_power_table_entries : number of entries in the @ dyn_power_table array
* @ cpu_dev : the first cpu_device from @ allowed_cpus that has OPPs registered
* @ plat_get_static_power : callback to calculate the static power
2012-08-16 15:41:40 +04:00
*
2014-12-04 07:11:48 +03:00
* This structure is required for keeping information of each registered
* cpufreq_cooling_device .
2012-08-16 15:41:40 +04:00
*/
struct cpufreq_cooling_device {
int id ;
struct thermal_cooling_device * cool_dev ;
unsigned int cpufreq_state ;
2015-07-30 10:10:33 +03:00
unsigned int clipped_freq ;
2014-12-04 07:12:02 +03:00
unsigned int max_level ;
2014-12-04 07:12:06 +03:00
unsigned int * freq_table ; /* In descending order */
2012-08-16 15:41:40 +04:00
struct cpumask allowed_cpus ;
2014-11-07 16:42:29 +03:00
struct list_head node ;
2015-02-26 22:00:29 +03:00
u32 last_load ;
u64 * time_in_idle ;
u64 * time_in_idle_timestamp ;
struct power_table * dyn_power_table ;
int dyn_power_table_entries ;
struct device * cpu_dev ;
get_static_t plat_get_static_power ;
2012-08-16 15:41:40 +04:00
} ;
static DEFINE_IDR ( cpufreq_idr ) ;
2012-10-30 20:48:59 +04:00
static DEFINE_MUTEX ( cooling_cpufreq_lock ) ;
2012-08-16 15:41:40 +04:00
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
static unsigned int cpufreq_dev_count ;
static DEFINE_MUTEX ( cooling_list_lock ) ;
2014-11-07 16:42:29 +03:00
static LIST_HEAD ( cpufreq_dev_list ) ;
2012-08-16 15:41:40 +04:00
/**
* get_idr - function to get a unique id .
* @ idr : struct idr * handle used to create a id .
* @ id : int * value generated by this function .
2013-04-17 21:11:59 +04:00
*
* This function will populate @ id with an unique
* id , using the idr API .
*
* Return : 0 on success , an error code on failure .
2012-08-16 15:41:40 +04:00
*/
static int get_idr ( struct idr * idr , int * id )
{
2013-02-28 05:04:46 +04:00
int ret ;
2012-08-16 15:41:40 +04:00
mutex_lock ( & cooling_cpufreq_lock ) ;
2013-02-28 05:04:46 +04:00
ret = idr_alloc ( idr , NULL , 0 , 0 , GFP_KERNEL ) ;
2012-08-16 15:41:40 +04:00
mutex_unlock ( & cooling_cpufreq_lock ) ;
2013-02-28 05:04:46 +04:00
if ( unlikely ( ret < 0 ) )
return ret ;
* id = ret ;
2013-04-17 21:11:59 +04:00
2012-08-16 15:41:40 +04:00
return 0 ;
}
/**
* release_idr - function to free the unique id .
* @ idr : struct idr * handle used for creating the id .
* @ id : int value representing the unique id .
*/
static void release_idr ( struct idr * idr , int id )
{
mutex_lock ( & cooling_cpufreq_lock ) ;
idr_remove ( idr , id ) ;
mutex_unlock ( & cooling_cpufreq_lock ) ;
}
/* Below code defines functions to be used for cpufreq as cooling device */
/**
2014-12-04 07:12:07 +03:00
* get_level : Find the level for a particular frequency
2014-12-04 07:12:05 +03:00
* @ cpufreq_dev : cpufreq_dev for which the property is required
2014-12-04 07:12:07 +03:00
* @ freq : Frequency
2013-04-17 21:12:00 +04:00
*
2014-12-04 07:12:07 +03:00
* Return : level on success , THERMAL_CSTATE_INVALID on error .
2012-08-16 15:41:40 +04:00
*/
2014-12-04 07:12:07 +03:00
static unsigned long get_level ( struct cpufreq_cooling_device * cpufreq_dev ,
unsigned int freq )
2012-08-16 15:41:40 +04:00
{
2014-12-04 07:12:07 +03:00
unsigned long level ;
2014-01-02 07:57:48 +04:00
2014-12-04 07:12:07 +03:00
for ( level = 0 ; level < = cpufreq_dev - > max_level ; level + + ) {
if ( freq = = cpufreq_dev - > freq_table [ level ] )
return level ;
2012-08-16 15:41:40 +04:00
2014-12-04 07:12:07 +03:00
if ( freq > cpufreq_dev - > freq_table [ level ] )
break ;
2013-02-08 09:09:32 +04:00
}
2012-08-16 15:41:40 +04:00
2014-12-04 07:12:07 +03:00
return THERMAL_CSTATE_INVALID ;
2013-02-08 09:09:32 +04:00
}
2013-04-17 21:12:05 +04:00
/**
2014-12-04 07:11:47 +03:00
* cpufreq_cooling_get_level - for a given cpu , return the cooling level .
2013-04-17 21:12:05 +04:00
* @ cpu : cpu for which the level is required
* @ freq : the frequency of interest
*
* This function will match the cooling level corresponding to the
* requested @ freq and return it .
*
* Return : The matched cooling level on success or THERMAL_CSTATE_INVALID
* otherwise .
*/
2013-02-08 10:52:06 +04:00
unsigned long cpufreq_cooling_get_level ( unsigned int cpu , unsigned int freq )
{
2014-12-04 07:12:05 +03:00
struct cpufreq_cooling_device * cpufreq_dev ;
2012-08-16 15:41:40 +04:00
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
mutex_lock ( & cooling_list_lock ) ;
2014-12-04 07:12:05 +03:00
list_for_each_entry ( cpufreq_dev , & cpufreq_dev_list , node ) {
if ( cpumask_test_cpu ( cpu , & cpufreq_dev - > allowed_cpus ) ) {
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
mutex_unlock ( & cooling_list_lock ) ;
2014-12-04 07:12:07 +03:00
return get_level ( cpufreq_dev , freq ) ;
2014-12-04 07:12:05 +03:00
}
2012-08-16 15:41:40 +04:00
}
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
mutex_unlock ( & cooling_list_lock ) ;
2012-08-16 15:41:40 +04:00
2014-12-04 07:12:05 +03:00
pr_err ( " %s: cpu:%d not part of any cooling device \n " , __func__ , cpu ) ;
return THERMAL_CSTATE_INVALID ;
2012-08-16 15:41:40 +04:00
}
2013-04-17 21:11:57 +04:00
EXPORT_SYMBOL_GPL ( cpufreq_cooling_get_level ) ;
2012-08-16 15:41:40 +04:00
/**
* cpufreq_thermal_notifier - notifier callback for cpufreq policy change .
* @ nb : struct notifier_block * with callback info .
* @ event : value showing cpufreq event for which this function invoked .
* @ data : callback - specific data
2013-04-17 21:12:09 +04:00
*
2014-06-25 21:11:17 +04:00
* Callback to hijack the notification on cpufreq policy transition .
2013-04-17 21:12:09 +04:00
* Every time there is a change in policy , we will intercept and
* update the cpufreq policy with thermal constraints .
*
* Return : 0 ( success )
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_thermal_notifier ( struct notifier_block * nb ,
2013-04-17 21:12:11 +04:00
unsigned long event , void * data )
2012-08-16 15:41:40 +04:00
{
struct cpufreq_policy * policy = data ;
2015-07-30 10:10:34 +03:00
unsigned long clipped_freq ;
2014-11-07 16:42:29 +03:00
struct cpufreq_cooling_device * cpufreq_dev ;
2012-08-16 15:41:40 +04:00
2015-07-30 10:10:32 +03:00
if ( event ! = CPUFREQ_ADJUST )
return NOTIFY_DONE ;
2012-08-16 15:41:40 +04:00
2015-07-30 10:10:32 +03:00
mutex_lock ( & cooling_list_lock ) ;
list_for_each_entry ( cpufreq_dev , & cpufreq_dev_list , node ) {
if ( ! cpumask_test_cpu ( policy - > cpu , & cpufreq_dev - > allowed_cpus ) )
continue ;
2015-02-26 22:00:29 +03:00
2015-07-30 10:10:34 +03:00
clipped_freq = cpufreq_dev - > clipped_freq ;
2015-02-26 22:00:29 +03:00
2015-07-30 10:10:34 +03:00
if ( policy - > max ! = clipped_freq )
cpufreq_verify_within_limits ( policy , 0 , clipped_freq ) ;
2015-02-26 22:00:29 +03:00
break ;
}
2015-07-30 10:10:32 +03:00
mutex_unlock ( & cooling_list_lock ) ;
2015-02-26 22:00:29 +03:00
return NOTIFY_OK ;
}
/**
* build_dyn_power_table ( ) - create a dynamic power to frequency table
* @ cpufreq_device : the cpufreq cooling device in which to store the table
* @ capacitance : dynamic power coefficient for these cpus
*
* Build a dynamic power to frequency table for this cpu and store it
* in @ cpufreq_device . This table will be used in cpu_power_to_freq ( ) and
* cpu_freq_to_power ( ) to convert between power and frequency
* efficiently . Power is stored in mW , frequency in KHz . The
* resulting table is in ascending order .
*
* Return : 0 on success , - E * on error .
*/
static int build_dyn_power_table ( struct cpufreq_cooling_device * cpufreq_device ,
u32 capacitance )
{
struct power_table * power_table ;
struct dev_pm_opp * opp ;
struct device * dev = NULL ;
int num_opps = 0 , cpu , i , ret = 0 ;
unsigned long freq ;
rcu_read_lock ( ) ;
for_each_cpu ( cpu , & cpufreq_device - > allowed_cpus ) {
dev = get_cpu_device ( cpu ) ;
if ( ! dev ) {
dev_warn ( & cpufreq_device - > cool_dev - > device ,
" No cpu device for cpu %d \n " , cpu ) ;
2014-11-07 16:42:29 +03:00
continue ;
2015-02-26 22:00:29 +03:00
}
2014-11-07 16:42:29 +03:00
2015-02-26 22:00:29 +03:00
num_opps = dev_pm_opp_get_opp_count ( dev ) ;
if ( num_opps > 0 ) {
break ;
} else if ( num_opps < 0 ) {
ret = num_opps ;
goto unlock ;
}
}
2012-08-16 15:41:40 +04:00
2015-02-26 22:00:29 +03:00
if ( num_opps = = 0 ) {
ret = - EINVAL ;
goto unlock ;
2014-11-07 16:42:29 +03:00
}
2012-08-16 15:41:40 +04:00
2015-02-26 22:00:29 +03:00
power_table = kcalloc ( num_opps , sizeof ( * power_table ) , GFP_KERNEL ) ;
2015-03-20 21:20:13 +03:00
if ( ! power_table ) {
ret = - ENOMEM ;
goto unlock ;
}
2015-02-26 22:00:29 +03:00
for ( freq = 0 , i = 0 ;
opp = dev_pm_opp_find_freq_ceil ( dev , & freq ) , ! IS_ERR ( opp ) ;
freq + + , i + + ) {
u32 freq_mhz , voltage_mv ;
u64 power ;
freq_mhz = freq / 1000000 ;
voltage_mv = dev_pm_opp_get_voltage ( opp ) / 1000 ;
/*
* Do the multiplication with MHz and millivolt so as
* to not overflow .
*/
power = ( u64 ) capacitance * freq_mhz * voltage_mv * voltage_mv ;
do_div ( power , 1000000000 ) ;
/* frequency is stored in power_table in KHz */
power_table [ i ] . frequency = freq / 1000 ;
/* power is stored in mW */
power_table [ i ] . power = power ;
}
if ( i = = 0 ) {
ret = PTR_ERR ( opp ) ;
goto unlock ;
}
cpufreq_device - > cpu_dev = dev ;
cpufreq_device - > dyn_power_table = power_table ;
cpufreq_device - > dyn_power_table_entries = i ;
unlock :
rcu_read_unlock ( ) ;
return ret ;
}
static u32 cpu_freq_to_power ( struct cpufreq_cooling_device * cpufreq_device ,
u32 freq )
{
int i ;
struct power_table * pt = cpufreq_device - > dyn_power_table ;
for ( i = 1 ; i < cpufreq_device - > dyn_power_table_entries ; i + + )
if ( freq < pt [ i ] . frequency )
break ;
return pt [ i - 1 ] . power ;
}
static u32 cpu_power_to_freq ( struct cpufreq_cooling_device * cpufreq_device ,
u32 power )
{
int i ;
struct power_table * pt = cpufreq_device - > dyn_power_table ;
for ( i = 1 ; i < cpufreq_device - > dyn_power_table_entries ; i + + )
if ( power < pt [ i ] . power )
break ;
return pt [ i - 1 ] . frequency ;
}
/**
* get_load ( ) - get load for a cpu since last updated
* @ cpufreq_device : & struct cpufreq_cooling_device for this cpu
* @ cpu : cpu number
*
* Return : The average load of cpu @ cpu in percentage since this
* function was last called .
*/
static u32 get_load ( struct cpufreq_cooling_device * cpufreq_device , int cpu )
{
u32 load ;
u64 now , now_idle , delta_time , delta_idle ;
now_idle = get_cpu_idle_time ( cpu , & now , 0 ) ;
delta_idle = now_idle - cpufreq_device - > time_in_idle [ cpu ] ;
delta_time = now - cpufreq_device - > time_in_idle_timestamp [ cpu ] ;
if ( delta_time < = delta_idle )
load = 0 ;
else
load = div64_u64 ( 100 * ( delta_time - delta_idle ) , delta_time ) ;
cpufreq_device - > time_in_idle [ cpu ] = now_idle ;
cpufreq_device - > time_in_idle_timestamp [ cpu ] = now ;
return load ;
}
/**
* get_static_power ( ) - calculate the static power consumed by the cpus
* @ cpufreq_device : struct & cpufreq_cooling_device for this cpu cdev
* @ tz : thermal zone device in which we ' re operating
* @ freq : frequency in KHz
* @ power : pointer in which to store the calculated static power
*
* Calculate the static power consumed by the cpus described by
* @ cpu_actor running at frequency @ freq . This function relies on a
* platform specific function that should have been provided when the
* actor was registered . If it wasn ' t , the static power is assumed to
* be negligible . The calculated static power is stored in @ power .
*
* Return : 0 on success , - E * on failure .
*/
static int get_static_power ( struct cpufreq_cooling_device * cpufreq_device ,
struct thermal_zone_device * tz , unsigned long freq ,
u32 * power )
{
struct dev_pm_opp * opp ;
unsigned long voltage ;
struct cpumask * cpumask = & cpufreq_device - > allowed_cpus ;
unsigned long freq_hz = freq * 1000 ;
if ( ! cpufreq_device - > plat_get_static_power | |
! cpufreq_device - > cpu_dev ) {
* power = 0 ;
return 0 ;
}
rcu_read_lock ( ) ;
opp = dev_pm_opp_find_freq_exact ( cpufreq_device - > cpu_dev , freq_hz ,
true ) ;
voltage = dev_pm_opp_get_voltage ( opp ) ;
rcu_read_unlock ( ) ;
if ( voltage = = 0 ) {
dev_warn_ratelimited ( cpufreq_device - > cpu_dev ,
" Failed to get voltage for frequency %lu: %ld \n " ,
freq_hz , IS_ERR ( opp ) ? PTR_ERR ( opp ) : 0 ) ;
return - EINVAL ;
}
return cpufreq_device - > plat_get_static_power ( cpumask , tz - > passive_delay ,
voltage , power ) ;
}
/**
* get_dynamic_power ( ) - calculate the dynamic power
* @ cpufreq_device : & cpufreq_cooling_device for this cdev
* @ freq : current frequency
*
* Return : the dynamic power consumed by the cpus described by
* @ cpufreq_device .
*/
static u32 get_dynamic_power ( struct cpufreq_cooling_device * cpufreq_device ,
unsigned long freq )
{
u32 raw_cpu_power ;
raw_cpu_power = cpu_freq_to_power ( cpufreq_device , freq ) ;
return ( raw_cpu_power * cpufreq_device - > last_load ) / 100 ;
2012-08-16 15:41:40 +04:00
}
2013-04-17 21:12:02 +04:00
/* cpufreq cooling device callback functions are defined below */
2012-08-16 15:41:40 +04:00
/**
* cpufreq_get_max_state - callback function to get the max cooling state .
* @ cdev : thermal cooling device pointer .
* @ state : fill this variable with the max cooling state .
2013-04-17 21:12:12 +04:00
*
* Callback for the thermal cooling device to return the cpufreq
* max cooling state .
*
* Return : 0 on success , an error code otherwise .
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_get_max_state ( struct thermal_cooling_device * cdev ,
unsigned long * state )
{
2012-10-30 20:48:59 +04:00
struct cpufreq_cooling_device * cpufreq_device = cdev - > devdata ;
2012-10-30 20:48:58 +04:00
2014-12-04 07:12:02 +03:00
* state = cpufreq_device - > max_level ;
return 0 ;
2012-08-16 15:41:40 +04:00
}
/**
* cpufreq_get_cur_state - callback function to get the current cooling state .
* @ cdev : thermal cooling device pointer .
* @ state : fill this variable with the current cooling state .
2013-04-17 21:12:13 +04:00
*
* Callback for the thermal cooling device to return the cpufreq
* current cooling state .
*
* Return : 0 on success , an error code otherwise .
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_get_cur_state ( struct thermal_cooling_device * cdev ,
unsigned long * state )
{
2012-10-30 20:48:59 +04:00
struct cpufreq_cooling_device * cpufreq_device = cdev - > devdata ;
2012-08-16 15:41:40 +04:00
2012-10-30 20:48:59 +04:00
* state = cpufreq_device - > cpufreq_state ;
2013-04-17 21:11:59 +04:00
2012-10-30 20:48:59 +04:00
return 0 ;
2012-08-16 15:41:40 +04:00
}
/**
* cpufreq_set_cur_state - callback function to set the current cooling state .
* @ cdev : thermal cooling device pointer .
* @ state : set this variable to the current cooling state .
2013-04-17 21:12:14 +04:00
*
* Callback for the thermal cooling device to change the cpufreq
* current cooling state .
*
* Return : 0 on success , an error code otherwise .
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_set_cur_state ( struct thermal_cooling_device * cdev ,
unsigned long state )
{
2012-10-30 20:48:59 +04:00
struct cpufreq_cooling_device * cpufreq_device = cdev - > devdata ;
2014-12-04 07:12:00 +03:00
unsigned int cpu = cpumask_any ( & cpufreq_device - > allowed_cpus ) ;
unsigned int clip_freq ;
2014-12-04 07:12:07 +03:00
/* Request state should be less than max_level */
if ( WARN_ON ( state > cpufreq_device - > max_level ) )
return - EINVAL ;
2014-12-04 07:12:00 +03:00
/* Check if the old cooling action is same as new cooling action */
if ( cpufreq_device - > cpufreq_state = = state )
return 0 ;
2012-08-16 15:41:40 +04:00
2014-12-04 07:12:07 +03:00
clip_freq = cpufreq_device - > freq_table [ state ] ;
2014-12-04 07:12:00 +03:00
cpufreq_device - > cpufreq_state = state ;
2015-07-30 10:10:33 +03:00
cpufreq_device - > clipped_freq = clip_freq ;
2014-12-04 07:12:00 +03:00
cpufreq_update_policy ( cpu ) ;
return 0 ;
2012-08-16 15:41:40 +04:00
}
2015-02-26 22:00:29 +03:00
/**
* cpufreq_get_requested_power ( ) - get the current power
* @ cdev : & thermal_cooling_device pointer
* @ tz : a valid thermal zone device pointer
* @ power : pointer in which to store the resulting power
*
* Calculate the current power consumption of the cpus in milliwatts
* and store it in @ power . This function should actually calculate
* the requested power , but it ' s hard to get the frequency that
* cpufreq would have assigned if there were no thermal limits .
* Instead , we calculate the current power on the assumption that the
* immediate future will look like the immediate past .
*
* We use the current frequency and the average load since this
* function was last called . In reality , there could have been
* multiple opps since this function was last called and that affects
* the load calculation . While it ' s not perfectly accurate , this
* simplification is good enough and works . REVISIT this , as more
* complex code may be needed if experiments show that it ' s not
* accurate enough .
*
* Return : 0 on success , - E * if getting the static power failed .
*/
static int cpufreq_get_requested_power ( struct thermal_cooling_device * cdev ,
struct thermal_zone_device * tz ,
u32 * power )
{
unsigned long freq ;
2015-03-02 20:17:20 +03:00
int i = 0 , cpu , ret ;
2015-02-26 22:00:29 +03:00
u32 static_power , dynamic_power , total_load = 0 ;
struct cpufreq_cooling_device * cpufreq_device = cdev - > devdata ;
2015-03-02 20:17:20 +03:00
u32 * load_cpu = NULL ;
2015-02-26 22:00:29 +03:00
2015-03-16 15:00:51 +03:00
cpu = cpumask_any_and ( & cpufreq_device - > allowed_cpus , cpu_online_mask ) ;
/*
* All the CPUs are offline , thus the requested power by
* the cdev is 0
*/
if ( cpu > = nr_cpu_ids ) {
* power = 0 ;
return 0 ;
}
freq = cpufreq_quick_get ( cpu ) ;
2015-02-26 22:00:29 +03:00
2015-03-02 20:17:20 +03:00
if ( trace_thermal_power_cpu_get_power_enabled ( ) ) {
u32 ncpus = cpumask_weight ( & cpufreq_device - > allowed_cpus ) ;
load_cpu = devm_kcalloc ( & cdev - > device , ncpus , sizeof ( * load_cpu ) ,
GFP_KERNEL ) ;
}
2015-02-26 22:00:29 +03:00
for_each_cpu ( cpu , & cpufreq_device - > allowed_cpus ) {
u32 load ;
if ( cpu_online ( cpu ) )
load = get_load ( cpufreq_device , cpu ) ;
else
load = 0 ;
total_load + = load ;
2015-03-02 20:17:20 +03:00
if ( trace_thermal_power_cpu_limit_enabled ( ) & & load_cpu )
load_cpu [ i ] = load ;
i + + ;
2015-02-26 22:00:29 +03:00
}
cpufreq_device - > last_load = total_load ;
dynamic_power = get_dynamic_power ( cpufreq_device , freq ) ;
ret = get_static_power ( cpufreq_device , tz , freq , & static_power ) ;
2015-03-02 20:17:20 +03:00
if ( ret ) {
if ( load_cpu )
devm_kfree ( & cdev - > device , load_cpu ) ;
2015-02-26 22:00:29 +03:00
return ret ;
2015-03-02 20:17:20 +03:00
}
if ( load_cpu ) {
trace_thermal_power_cpu_get_power (
& cpufreq_device - > allowed_cpus ,
freq , load_cpu , i , dynamic_power , static_power ) ;
devm_kfree ( & cdev - > device , load_cpu ) ;
}
2015-02-26 22:00:29 +03:00
* power = static_power + dynamic_power ;
return 0 ;
}
/**
* cpufreq_state2power ( ) - convert a cpu cdev state to power consumed
* @ cdev : & thermal_cooling_device pointer
* @ tz : a valid thermal zone device pointer
* @ state : cooling device state to be converted
* @ power : pointer in which to store the resulting power
*
* Convert cooling device state @ state into power consumption in
* milliwatts assuming 100 % load . Store the calculated power in
* @ power .
*
* Return : 0 on success , - EINVAL if the cooling device state could not
* be converted into a frequency or other - E * if there was an error
* when calculating the static power .
*/
static int cpufreq_state2power ( struct thermal_cooling_device * cdev ,
struct thermal_zone_device * tz ,
unsigned long state , u32 * power )
{
unsigned int freq , num_cpus ;
cpumask_t cpumask ;
u32 static_power , dynamic_power ;
int ret ;
struct cpufreq_cooling_device * cpufreq_device = cdev - > devdata ;
cpumask_and ( & cpumask , & cpufreq_device - > allowed_cpus , cpu_online_mask ) ;
num_cpus = cpumask_weight ( & cpumask ) ;
/* None of our cpus are online, so no power */
if ( num_cpus = = 0 ) {
* power = 0 ;
return 0 ;
}
freq = cpufreq_device - > freq_table [ state ] ;
if ( ! freq )
return - EINVAL ;
dynamic_power = cpu_freq_to_power ( cpufreq_device , freq ) * num_cpus ;
ret = get_static_power ( cpufreq_device , tz , freq , & static_power ) ;
if ( ret )
return ret ;
* power = static_power + dynamic_power ;
return 0 ;
}
/**
* cpufreq_power2state ( ) - convert power to a cooling device state
* @ cdev : & thermal_cooling_device pointer
* @ tz : a valid thermal zone device pointer
* @ power : power in milliwatts to be converted
* @ state : pointer in which to store the resulting state
*
* Calculate a cooling device state for the cpus described by @ cdev
* that would allow them to consume at most @ power mW and store it in
* @ state . Note that this calculation depends on external factors
* such as the cpu load or the current static power . Calling this
* function with the same power as input can yield different cooling
* device states depending on those external factors .
*
* Return : 0 on success , - ENODEV if no cpus are online or - EINVAL if
* the calculated frequency could not be converted to a valid state .
* The latter should not happen unless the frequencies available to
* cpufreq have changed since the initialization of the cpu cooling
* device .
*/
static int cpufreq_power2state ( struct thermal_cooling_device * cdev ,
struct thermal_zone_device * tz , u32 power ,
unsigned long * state )
{
unsigned int cpu , cur_freq , target_freq ;
int ret ;
s32 dyn_power ;
u32 last_load , normalised_power , static_power ;
struct cpufreq_cooling_device * cpufreq_device = cdev - > devdata ;
cpu = cpumask_any_and ( & cpufreq_device - > allowed_cpus , cpu_online_mask ) ;
/* None of our cpus are online */
if ( cpu > = nr_cpu_ids )
return - ENODEV ;
cur_freq = cpufreq_quick_get ( cpu ) ;
ret = get_static_power ( cpufreq_device , tz , cur_freq , & static_power ) ;
if ( ret )
return ret ;
dyn_power = power - static_power ;
dyn_power = dyn_power > 0 ? dyn_power : 0 ;
last_load = cpufreq_device - > last_load ? : 1 ;
normalised_power = ( dyn_power * 100 ) / last_load ;
target_freq = cpu_power_to_freq ( cpufreq_device , normalised_power ) ;
* state = cpufreq_cooling_get_level ( cpu , target_freq ) ;
if ( * state = = THERMAL_CSTATE_INVALID ) {
dev_warn_ratelimited ( & cdev - > device ,
" Failed to convert %dKHz for cpu %d into a cdev state \n " ,
target_freq , cpu ) ;
return - EINVAL ;
}
2015-03-02 20:17:20 +03:00
trace_thermal_power_cpu_limit ( & cpufreq_device - > allowed_cpus ,
target_freq , * state , power ) ;
2015-02-26 22:00:29 +03:00
return 0 ;
}
2012-08-16 15:41:40 +04:00
/* Bind cpufreq callbacks to thermal cooling device ops */
2015-02-26 22:00:29 +03:00
static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
2012-08-16 15:41:40 +04:00
. get_max_state = cpufreq_get_max_state ,
. get_cur_state = cpufreq_get_cur_state ,
. set_cur_state = cpufreq_set_cur_state ,
} ;
/* Notifier for cpufreq policy change */
static struct notifier_block thermal_cpufreq_notifier_block = {
. notifier_call = cpufreq_thermal_notifier ,
} ;
2014-12-04 07:12:06 +03:00
static unsigned int find_next_max ( struct cpufreq_frequency_table * table ,
unsigned int prev_max )
{
struct cpufreq_frequency_table * pos ;
unsigned int max = 0 ;
cpufreq_for_each_valid_entry ( pos , table ) {
if ( pos - > frequency > max & & pos - > frequency < prev_max )
max = pos - > frequency ;
}
return max ;
}
2012-08-16 15:41:40 +04:00
/**
2013-09-13 03:26:45 +04:00
* __cpufreq_cooling_register - helper function to create cpufreq cooling device
* @ np : a valid struct device_node to the cooling device device tree node
2012-08-16 15:41:40 +04:00
* @ clip_cpus : cpumask of cpus where the frequency constraints will happen .
2014-12-04 07:11:55 +03:00
* Normally this should be same as cpufreq policy - > related_cpus .
2015-02-26 22:00:29 +03:00
* @ capacitance : dynamic power coefficient for these cpus
* @ plat_static_func : function to calculate the static power consumed by these
* cpus ( optional )
2013-04-17 21:12:15 +04:00
*
* This interface function registers the cpufreq cooling device with the name
* " thermal-cpufreq-%x " . This api can support multiple instances of cpufreq
2013-09-13 03:26:45 +04:00
* cooling devices . It also gives the opportunity to link the cooling device
* with a device tree node , in order to bind it via the thermal DT code .
2013-04-17 21:12:15 +04:00
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
2012-08-16 15:41:40 +04:00
*/
2013-09-13 03:26:45 +04:00
static struct thermal_cooling_device *
__cpufreq_cooling_register ( struct device_node * np ,
2015-02-26 22:00:29 +03:00
const struct cpumask * clip_cpus , u32 capacitance ,
get_static_t plat_static_func )
2012-08-16 15:41:40 +04:00
{
struct thermal_cooling_device * cool_dev ;
2014-12-04 07:11:52 +03:00
struct cpufreq_cooling_device * cpufreq_dev ;
2012-08-16 15:41:40 +04:00
char dev_name [ THERMAL_NAME_LENGTH ] ;
2014-12-04 07:12:02 +03:00
struct cpufreq_frequency_table * pos , * table ;
2015-02-26 22:00:29 +03:00
unsigned int freq , i , num_cpus ;
2014-12-04 07:11:55 +03:00
int ret ;
2012-08-16 15:41:40 +04:00
2014-12-04 07:12:02 +03:00
table = cpufreq_frequency_get_table ( cpumask_first ( clip_cpus ) ) ;
if ( ! table ) {
2014-12-04 07:11:43 +03:00
pr_debug ( " %s: CPUFreq table not found \n " , __func__ ) ;
return ERR_PTR ( - EPROBE_DEFER ) ;
2012-08-16 15:41:40 +04:00
}
2014-12-04 07:11:43 +03:00
2014-12-04 07:11:50 +03:00
cpufreq_dev = kzalloc ( sizeof ( * cpufreq_dev ) , GFP_KERNEL ) ;
2012-08-16 15:41:40 +04:00
if ( ! cpufreq_dev )
return ERR_PTR ( - ENOMEM ) ;
2015-02-26 22:00:29 +03:00
num_cpus = cpumask_weight ( clip_cpus ) ;
cpufreq_dev - > time_in_idle = kcalloc ( num_cpus ,
sizeof ( * cpufreq_dev - > time_in_idle ) ,
GFP_KERNEL ) ;
if ( ! cpufreq_dev - > time_in_idle ) {
cool_dev = ERR_PTR ( - ENOMEM ) ;
goto free_cdev ;
}
cpufreq_dev - > time_in_idle_timestamp =
kcalloc ( num_cpus , sizeof ( * cpufreq_dev - > time_in_idle_timestamp ) ,
GFP_KERNEL ) ;
if ( ! cpufreq_dev - > time_in_idle_timestamp ) {
cool_dev = ERR_PTR ( - ENOMEM ) ;
goto free_time_in_idle ;
}
2014-12-04 07:12:02 +03:00
/* Find max levels */
cpufreq_for_each_valid_entry ( pos , table )
cpufreq_dev - > max_level + + ;
2014-12-04 07:12:06 +03:00
cpufreq_dev - > freq_table = kmalloc ( sizeof ( * cpufreq_dev - > freq_table ) *
cpufreq_dev - > max_level , GFP_KERNEL ) ;
if ( ! cpufreq_dev - > freq_table ) {
cool_dev = ERR_PTR ( - ENOMEM ) ;
2015-02-26 22:00:29 +03:00
goto free_time_in_idle_timestamp ;
2014-12-04 07:12:06 +03:00
}
2014-12-04 07:12:02 +03:00
/* max_level is an index, not a counter */
cpufreq_dev - > max_level - - ;
2012-08-16 15:41:40 +04:00
cpumask_copy ( & cpufreq_dev - > allowed_cpus , clip_cpus ) ;
2015-02-26 22:00:29 +03:00
if ( capacitance ) {
cpufreq_cooling_ops . get_requested_power =
cpufreq_get_requested_power ;
cpufreq_cooling_ops . state2power = cpufreq_state2power ;
cpufreq_cooling_ops . power2state = cpufreq_power2state ;
cpufreq_dev - > plat_get_static_power = plat_static_func ;
ret = build_dyn_power_table ( cpufreq_dev , capacitance ) ;
if ( ret ) {
cool_dev = ERR_PTR ( ret ) ;
goto free_table ;
}
}
2012-08-16 15:41:40 +04:00
ret = get_idr ( & cpufreq_idr , & cpufreq_dev - > id ) ;
if ( ret ) {
2014-12-04 07:11:58 +03:00
cool_dev = ERR_PTR ( ret ) ;
2014-12-04 07:12:06 +03:00
goto free_table ;
2012-08-16 15:41:40 +04:00
}
2013-04-17 21:12:17 +04:00
snprintf ( dev_name , sizeof ( dev_name ) , " thermal-cpufreq-%d " ,
cpufreq_dev - > id ) ;
2012-08-16 15:41:40 +04:00
2013-09-13 03:26:45 +04:00
cool_dev = thermal_of_cooling_device_register ( np , dev_name , cpufreq_dev ,
& cpufreq_cooling_ops ) ;
2014-12-04 07:11:58 +03:00
if ( IS_ERR ( cool_dev ) )
goto remove_idr ;
2014-12-04 07:12:06 +03:00
/* Fill freq-table in descending order of frequencies */
for ( i = 0 , freq = - 1 ; i < = cpufreq_dev - > max_level ; i + + ) {
freq = find_next_max ( table , freq ) ;
cpufreq_dev - > freq_table [ i ] = freq ;
/* Warn for duplicate entries */
if ( ! freq )
pr_warn ( " %s: table has duplicate entries \n " , __func__ ) ;
else
pr_debug ( " %s: freq:%u KHz \n " , __func__ , freq ) ;
2012-08-16 15:41:40 +04:00
}
2014-12-04 07:12:06 +03:00
2015-07-30 10:10:33 +03:00
cpufreq_dev - > clipped_freq = cpufreq_dev - > freq_table [ 0 ] ;
2012-08-16 15:41:40 +04:00
cpufreq_dev - > cool_dev = cool_dev ;
2014-12-04 07:11:51 +03:00
2012-08-16 15:41:40 +04:00
mutex_lock ( & cooling_cpufreq_lock ) ;
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
mutex_lock ( & cooling_list_lock ) ;
list_add ( & cpufreq_dev - > node , & cpufreq_dev_list ) ;
mutex_unlock ( & cooling_list_lock ) ;
2012-08-16 15:41:40 +04:00
/* Register the notifier for first cpufreq cooling device */
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
if ( ! cpufreq_dev_count + + )
2012-08-16 15:41:40 +04:00
cpufreq_register_notifier ( & thermal_cpufreq_notifier_block ,
2013-04-17 21:12:11 +04:00
CPUFREQ_POLICY_NOTIFIER ) ;
2012-08-16 15:41:40 +04:00
mutex_unlock ( & cooling_cpufreq_lock ) ;
2013-04-17 21:11:59 +04:00
2014-12-04 07:11:58 +03:00
return cool_dev ;
remove_idr :
release_idr ( & cpufreq_idr , cpufreq_dev - > id ) ;
2014-12-04 07:12:06 +03:00
free_table :
kfree ( cpufreq_dev - > freq_table ) ;
2015-02-26 22:00:29 +03:00
free_time_in_idle_timestamp :
kfree ( cpufreq_dev - > time_in_idle_timestamp ) ;
free_time_in_idle :
kfree ( cpufreq_dev - > time_in_idle ) ;
2014-12-04 07:11:58 +03:00
free_cdev :
kfree ( cpufreq_dev ) ;
2012-08-16 15:41:40 +04:00
return cool_dev ;
}
2013-09-13 03:26:45 +04:00
/**
* cpufreq_cooling_register - function to create cpufreq cooling device .
* @ clip_cpus : cpumask of cpus where the frequency constraints will happen .
*
* This interface function registers the cpufreq cooling device with the name
* " thermal-cpufreq-%x " . This api can support multiple instances of cpufreq
* cooling devices .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
cpufreq_cooling_register ( const struct cpumask * clip_cpus )
{
2015-02-26 22:00:29 +03:00
return __cpufreq_cooling_register ( NULL , clip_cpus , 0 , NULL ) ;
2013-09-13 03:26:45 +04:00
}
2013-04-17 21:11:57 +04:00
EXPORT_SYMBOL_GPL ( cpufreq_cooling_register ) ;
2012-08-16 15:41:40 +04:00
2013-09-13 03:26:45 +04:00
/**
* of_cpufreq_cooling_register - function to create cpufreq cooling device .
* @ np : a valid struct device_node to the cooling device device tree node
* @ clip_cpus : cpumask of cpus where the frequency constraints will happen .
*
* This interface function registers the cpufreq cooling device with the name
* " thermal-cpufreq-%x " . This api can support multiple instances of cpufreq
* cooling devices . Using this API , the cpufreq cooling device will be
* linked to the device tree node provided .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
of_cpufreq_cooling_register ( struct device_node * np ,
const struct cpumask * clip_cpus )
{
if ( ! np )
return ERR_PTR ( - EINVAL ) ;
2015-02-26 22:00:29 +03:00
return __cpufreq_cooling_register ( np , clip_cpus , 0 , NULL ) ;
2013-09-13 03:26:45 +04:00
}
EXPORT_SYMBOL_GPL ( of_cpufreq_cooling_register ) ;
2015-02-26 22:00:29 +03:00
/**
* cpufreq_power_cooling_register ( ) - create cpufreq cooling device with power extensions
* @ clip_cpus : cpumask of cpus where the frequency constraints will happen
* @ capacitance : dynamic power coefficient for these cpus
* @ plat_static_func : function to calculate the static power consumed by these
* cpus ( optional )
*
* This interface function registers the cpufreq cooling device with
* the name " thermal-cpufreq-%x " . This api can support multiple
* instances of cpufreq cooling devices . Using this function , the
* cooling device will implement the power extensions by using a
* simple cpu power model . The cpus must have registered their OPPs
* using the OPP library .
*
* An optional @ plat_static_func may be provided to calculate the
* static power consumed by these cpus . If the platform ' s static
* power consumption is unknown or negligible , make it NULL .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
cpufreq_power_cooling_register ( const struct cpumask * clip_cpus , u32 capacitance ,
get_static_t plat_static_func )
{
return __cpufreq_cooling_register ( NULL , clip_cpus , capacitance ,
plat_static_func ) ;
}
EXPORT_SYMBOL ( cpufreq_power_cooling_register ) ;
/**
* of_cpufreq_power_cooling_register ( ) - create cpufreq cooling device with power extensions
* @ np : a valid struct device_node to the cooling device device tree node
* @ clip_cpus : cpumask of cpus where the frequency constraints will happen
* @ capacitance : dynamic power coefficient for these cpus
* @ plat_static_func : function to calculate the static power consumed by these
* cpus ( optional )
*
* This interface function registers the cpufreq cooling device with
* the name " thermal-cpufreq-%x " . This api can support multiple
* instances of cpufreq cooling devices . Using this API , the cpufreq
* cooling device will be linked to the device tree node provided .
* Using this function , the cooling device will implement the power
* extensions by using a simple cpu power model . The cpus must have
* registered their OPPs using the OPP library .
*
* An optional @ plat_static_func may be provided to calculate the
* static power consumed by these cpus . If the platform ' s static
* power consumption is unknown or negligible , make it NULL .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
of_cpufreq_power_cooling_register ( struct device_node * np ,
const struct cpumask * clip_cpus ,
u32 capacitance ,
get_static_t plat_static_func )
{
if ( ! np )
return ERR_PTR ( - EINVAL ) ;
return __cpufreq_cooling_register ( np , clip_cpus , capacitance ,
plat_static_func ) ;
}
EXPORT_SYMBOL ( of_cpufreq_power_cooling_register ) ;
2012-08-16 15:41:40 +04:00
/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device .
* @ cdev : thermal cooling device pointer .
2013-04-17 21:12:16 +04:00
*
* This interface function unregisters the " thermal-cpufreq-%x " cooling device .
2012-08-16 15:41:40 +04:00
*/
void cpufreq_cooling_unregister ( struct thermal_cooling_device * cdev )
{
2013-08-15 18:54:46 +04:00
struct cpufreq_cooling_device * cpufreq_dev ;
2012-08-16 15:41:40 +04:00
2013-08-15 18:54:46 +04:00
if ( ! cdev )
return ;
cpufreq_dev = cdev - > devdata ;
2012-08-16 15:41:40 +04:00
/* Unregister the notifier for the last cpufreq cooling device */
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
mutex_lock ( & cooling_cpufreq_lock ) ;
if ( ! - - cpufreq_dev_count )
2012-08-16 15:41:40 +04:00
cpufreq_unregister_notifier ( & thermal_cpufreq_notifier_block ,
2013-04-17 21:12:11 +04:00
CPUFREQ_POLICY_NOTIFIER ) ;
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
mutex_lock ( & cooling_list_lock ) ;
list_del ( & cpufreq_dev - > node ) ;
mutex_unlock ( & cooling_list_lock ) ;
2012-08-16 15:41:40 +04:00
mutex_unlock ( & cooling_cpufreq_lock ) ;
2012-10-30 20:48:59 +04:00
2012-08-16 15:41:40 +04:00
thermal_cooling_device_unregister ( cpufreq_dev - > cool_dev ) ;
release_idr ( & cpufreq_idr , cpufreq_dev - > id ) ;
2015-02-26 22:00:29 +03:00
kfree ( cpufreq_dev - > time_in_idle_timestamp ) ;
kfree ( cpufreq_dev - > time_in_idle ) ;
2014-12-04 07:12:06 +03:00
kfree ( cpufreq_dev - > freq_table ) ;
2012-08-16 15:41:40 +04:00
kfree ( cpufreq_dev ) ;
}
2013-04-17 21:11:57 +04:00
EXPORT_SYMBOL_GPL ( cpufreq_cooling_unregister ) ;