2012-08-16 15:41:40 +04:00
/*
* linux / drivers / thermal / cpu_cooling . c
*
* Copyright ( C ) 2012 Samsung Electronics Co . , Ltd ( http : //www.samsung.com)
* Copyright ( C ) 2012 Amit Daniel < amit . kachhap @ linaro . org >
*
2014-12-04 07:12:08 +03:00
* Copyright ( C ) 2014 Viresh Kumar < viresh . kumar @ linaro . org >
*
2012-08-16 15:41:40 +04:00
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; version 2 of the License .
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public License along
* with this program ; if not , write to the Free Software Foundation , Inc . ,
* 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA .
*
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
*/
# include <linux/module.h>
# include <linux/thermal.h>
# include <linux/cpufreq.h>
# include <linux/err.h>
2016-12-21 20:47:05 +03:00
# include <linux/idr.h>
2015-02-26 22:00:29 +03:00
# include <linux/pm_opp.h>
2012-08-16 15:41:40 +04:00
# include <linux/slab.h>
# include <linux/cpu.h>
# include <linux/cpu_cooling.h>
2015-03-02 20:17:20 +03:00
# include <trace/events/thermal.h>
2014-12-04 07:11:49 +03:00
/*
* Cooling state < - > CPUFreq frequency
*
* Cooling states are translated to frequencies throughout this driver and this
* is the relation between them .
*
* Highest cooling state corresponds to lowest possible frequency .
*
* i . e .
* level 0 - - > 1 st Max Freq
* level 1 - - > 2 nd Max Freq
* . . .
*/
2015-02-26 22:00:29 +03:00
/**
2017-04-25 13:27:19 +03:00
* struct freq_table - frequency table along with power entries
2015-02-26 22:00:29 +03:00
* @ frequency : frequency in KHz
* @ power : power in mW
*
* This structure is built when the cooling device registers and helps
2017-04-25 13:27:19 +03:00
* in translating frequency to power and vice versa .
2015-02-26 22:00:29 +03:00
*/
2017-04-25 13:27:19 +03:00
struct freq_table {
2015-02-26 22:00:29 +03:00
u32 frequency ;
u32 power ;
} ;
2017-04-25 13:27:20 +03:00
/**
* struct time_in_idle - Idle time stats
* @ time : previous reading of the absolute time that this cpu was idle
* @ timestamp : wall time of the last invocation of get_cpu_idle_time_us ( )
*/
struct time_in_idle {
u64 time ;
u64 timestamp ;
} ;
2012-08-16 15:41:40 +04:00
/**
2013-04-17 21:11:56 +04:00
* struct cpufreq_cooling_device - data for cooling device with cpufreq
2012-08-16 15:41:40 +04:00
* @ id : unique integer value corresponding to each cpufreq_cooling_device
* registered .
2017-04-25 13:27:11 +03:00
* @ cdev : thermal_cooling_device pointer to keep track of the
2013-04-17 21:11:56 +04:00
* registered cooling device .
2017-04-25 13:27:16 +03:00
* @ policy : cpufreq policy .
2012-08-16 15:41:40 +04:00
* @ cpufreq_state : integer value representing the current state of cpufreq
* cooling devices .
2015-07-30 10:10:33 +03:00
* @ clipped_freq : integer value representing the absolute value of the clipped
2012-08-16 15:41:40 +04:00
* frequency .
2014-12-04 07:12:02 +03:00
* @ max_level : maximum cooling level . One less than total number of valid
* cpufreq frequencies .
2014-12-15 19:55:52 +03:00
* @ node : list_head to link all cpufreq_cooling_device together .
2016-09-07 03:35:39 +03:00
* @ last_load : load measured by the latest call to cpufreq_get_requested_power ( )
2017-04-25 13:27:20 +03:00
* @ idle_time : idle time stats
2017-04-25 13:27:17 +03:00
* @ cpu_dev : the cpu_device of policy - > cpu .
2015-02-26 22:00:29 +03:00
* @ plat_get_static_power : callback to calculate the static power
2012-08-16 15:41:40 +04:00
*
2014-12-04 07:11:48 +03:00
* This structure is required for keeping information of each registered
* cpufreq_cooling_device .
2012-08-16 15:41:40 +04:00
*/
struct cpufreq_cooling_device {
int id ;
2017-04-25 13:27:11 +03:00
struct thermal_cooling_device * cdev ;
2017-04-25 13:27:16 +03:00
struct cpufreq_policy * policy ;
2012-08-16 15:41:40 +04:00
unsigned int cpufreq_state ;
2015-07-30 10:10:33 +03:00
unsigned int clipped_freq ;
2014-12-04 07:12:02 +03:00
unsigned int max_level ;
2017-04-25 13:27:19 +03:00
struct freq_table * freq_table ; /* In descending order */
2014-11-07 16:42:29 +03:00
struct list_head node ;
2015-02-26 22:00:29 +03:00
u32 last_load ;
2017-04-25 13:27:20 +03:00
struct time_in_idle * idle_time ;
2015-02-26 22:00:29 +03:00
struct device * cpu_dev ;
get_static_t plat_get_static_power ;
2012-08-16 15:41:40 +04:00
} ;
2017-04-25 13:27:09 +03:00
static DEFINE_IDA ( cpufreq_ida ) ;
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
static DEFINE_MUTEX ( cooling_list_lock ) ;
2017-04-25 13:27:10 +03:00
static LIST_HEAD ( cpufreq_cdev_list ) ;
2012-08-16 15:41:40 +04:00
/* Below code defines functions to be used for cpufreq as cooling device */
/**
2014-12-04 07:12:07 +03:00
* get_level : Find the level for a particular frequency
2017-04-25 13:27:10 +03:00
* @ cpufreq_cdev : cpufreq_cdev for which the property is required
2014-12-04 07:12:07 +03:00
* @ freq : Frequency
2013-04-17 21:12:00 +04:00
*
2014-12-04 07:12:07 +03:00
* Return : level on success , THERMAL_CSTATE_INVALID on error .
2012-08-16 15:41:40 +04:00
*/
2017-04-25 13:27:10 +03:00
static unsigned long get_level ( struct cpufreq_cooling_device * cpufreq_cdev ,
2014-12-04 07:12:07 +03:00
unsigned int freq )
2012-08-16 15:41:40 +04:00
{
2014-12-04 07:12:07 +03:00
unsigned long level ;
2014-01-02 07:57:48 +04:00
2017-04-25 13:27:10 +03:00
for ( level = 0 ; level < = cpufreq_cdev - > max_level ; level + + ) {
2017-04-25 13:27:19 +03:00
if ( freq = = cpufreq_cdev - > freq_table [ level ] . frequency )
2014-12-04 07:12:07 +03:00
return level ;
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:19 +03:00
if ( freq > cpufreq_cdev - > freq_table [ level ] . frequency )
2014-12-04 07:12:07 +03:00
break ;
2013-02-08 09:09:32 +04:00
}
2012-08-16 15:41:40 +04:00
2014-12-04 07:12:07 +03:00
return THERMAL_CSTATE_INVALID ;
2013-02-08 09:09:32 +04:00
}
2012-08-16 15:41:40 +04:00
/**
* cpufreq_thermal_notifier - notifier callback for cpufreq policy change .
* @ nb : struct notifier_block * with callback info .
* @ event : value showing cpufreq event for which this function invoked .
* @ data : callback - specific data
2013-04-17 21:12:09 +04:00
*
2014-06-25 21:11:17 +04:00
* Callback to hijack the notification on cpufreq policy transition .
2013-04-17 21:12:09 +04:00
* Every time there is a change in policy , we will intercept and
* update the cpufreq policy with thermal constraints .
*
* Return : 0 ( success )
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_thermal_notifier ( struct notifier_block * nb ,
2013-04-17 21:12:11 +04:00
unsigned long event , void * data )
2012-08-16 15:41:40 +04:00
{
struct cpufreq_policy * policy = data ;
2015-07-30 10:10:34 +03:00
unsigned long clipped_freq ;
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev ;
2012-08-16 15:41:40 +04:00
2015-07-30 10:10:32 +03:00
if ( event ! = CPUFREQ_ADJUST )
return NOTIFY_DONE ;
2012-08-16 15:41:40 +04:00
2015-07-30 10:10:32 +03:00
mutex_lock ( & cooling_list_lock ) ;
2017-04-25 13:27:10 +03:00
list_for_each_entry ( cpufreq_cdev , & cpufreq_cdev_list , node ) {
2017-04-25 13:27:18 +03:00
/*
* A new copy of the policy is sent to the notifier and can ' t
* compare that directly .
*/
if ( policy - > cpu ! = cpufreq_cdev - > policy - > cpu )
2015-07-30 10:10:32 +03:00
continue ;
2015-02-26 22:00:29 +03:00
2015-07-30 10:10:35 +03:00
/*
* policy - > max is the maximum allowed frequency defined by user
* and clipped_freq is the maximum that thermal constraints
* allow .
*
* If clipped_freq is lower than policy - > max , then we need to
* readjust policy - > max .
*
* But , if clipped_freq is greater than policy - > max , we don ' t
* need to do anything .
*/
2017-04-25 13:27:10 +03:00
clipped_freq = cpufreq_cdev - > clipped_freq ;
2015-02-26 22:00:29 +03:00
2015-07-30 10:10:35 +03:00
if ( policy - > max > clipped_freq )
2015-07-30 10:10:34 +03:00
cpufreq_verify_within_limits ( policy , 0 , clipped_freq ) ;
2015-02-26 22:00:29 +03:00
break ;
}
2015-07-30 10:10:32 +03:00
mutex_unlock ( & cooling_list_lock ) ;
2015-02-26 22:00:29 +03:00
return NOTIFY_OK ;
}
/**
2017-04-25 13:27:19 +03:00
* update_freq_table ( ) - Update the freq table with power numbers
* @ cpufreq_cdev : the cpufreq cooling device in which to update the table
2015-02-26 22:00:29 +03:00
* @ capacitance : dynamic power coefficient for these cpus
*
2017-04-25 13:27:19 +03:00
* Update the freq table with power numbers . This table will be used in
* cpu_power_to_freq ( ) and cpu_freq_to_power ( ) to convert between power and
* frequency efficiently . Power is stored in mW , frequency in KHz . The
* resulting table is in descending order .
2015-02-26 22:00:29 +03:00
*
2015-08-17 21:21:42 +03:00
* Return : 0 on success , - EINVAL if there are no OPPs for any CPUs ,
2017-04-25 13:27:19 +03:00
* or - ENOMEM if we run out of memory .
2015-02-26 22:00:29 +03:00
*/
2017-04-25 13:27:19 +03:00
static int update_freq_table ( struct cpufreq_cooling_device * cpufreq_cdev ,
u32 capacitance )
2015-02-26 22:00:29 +03:00
{
2017-04-25 13:27:19 +03:00
struct freq_table * freq_table = cpufreq_cdev - > freq_table ;
2015-02-26 22:00:29 +03:00
struct dev_pm_opp * opp ;
struct device * dev = NULL ;
2017-04-25 13:27:19 +03:00
int num_opps = 0 , cpu = cpufreq_cdev - > policy - > cpu , i ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:17 +03:00
dev = get_cpu_device ( cpu ) ;
if ( unlikely ( ! dev ) ) {
dev_warn ( & cpufreq_cdev - > cdev - > device ,
" No cpu device for cpu %d \n " , cpu ) ;
return - ENODEV ;
2015-02-26 22:00:29 +03:00
}
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:17 +03:00
num_opps = dev_pm_opp_get_opp_count ( dev ) ;
if ( num_opps < 0 )
return num_opps ;
2017-04-25 13:27:19 +03:00
/*
* The cpufreq table is also built from the OPP table and so the count
* should match .
*/
if ( num_opps ! = cpufreq_cdev - > max_level + 1 ) {
dev_warn ( dev , " Number of OPPs not matching with max_levels \n " ) ;
2015-08-17 21:21:42 +03:00
return - EINVAL ;
2017-04-25 13:27:19 +03:00
}
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:19 +03:00
for ( i = 0 ; i < = cpufreq_cdev - > max_level ; i + + ) {
unsigned long freq = freq_table [ i ] . frequency * 1000 ;
u32 freq_mhz = freq_table [ i ] . frequency / 1000 ;
2015-02-26 22:00:29 +03:00
u64 power ;
2017-04-25 13:27:19 +03:00
u32 voltage_mv ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:19 +03:00
/*
* Find ceil frequency as ' freq ' may be slightly lower than OPP
* freq due to truncation while converting to kHz .
*/
opp = dev_pm_opp_find_freq_ceil ( dev , & freq ) ;
if ( IS_ERR ( opp ) ) {
dev_err ( dev , " failed to get opp for %lu frequency \n " ,
freq ) ;
return - EINVAL ;
2015-08-17 21:21:42 +03:00
}
2015-02-26 22:00:29 +03:00
voltage_mv = dev_pm_opp_get_voltage ( opp ) / 1000 ;
2017-01-23 07:41:47 +03:00
dev_pm_opp_put ( opp ) ;
2015-02-26 22:00:29 +03:00
/*
* Do the multiplication with MHz and millivolt so as
* to not overflow .
*/
power = ( u64 ) capacitance * freq_mhz * voltage_mv * voltage_mv ;
do_div ( power , 1000000000 ) ;
/* power is stored in mW */
2017-04-25 13:27:19 +03:00
freq_table [ i ] . power = power ;
2015-08-17 21:21:43 +03:00
}
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:10 +03:00
cpufreq_cdev - > cpu_dev = dev ;
2015-02-26 22:00:29 +03:00
2015-08-17 21:21:42 +03:00
return 0 ;
2015-02-26 22:00:29 +03:00
}
2017-04-25 13:27:10 +03:00
static u32 cpu_freq_to_power ( struct cpufreq_cooling_device * cpufreq_cdev ,
2015-02-26 22:00:29 +03:00
u32 freq )
{
int i ;
2017-04-25 13:27:19 +03:00
struct freq_table * freq_table = cpufreq_cdev - > freq_table ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:19 +03:00
for ( i = 1 ; i < = cpufreq_cdev - > max_level ; i + + )
if ( freq > freq_table [ i ] . frequency )
2015-02-26 22:00:29 +03:00
break ;
2017-04-25 13:27:19 +03:00
return freq_table [ i - 1 ] . power ;
2015-02-26 22:00:29 +03:00
}
2017-04-25 13:27:10 +03:00
static u32 cpu_power_to_freq ( struct cpufreq_cooling_device * cpufreq_cdev ,
2015-02-26 22:00:29 +03:00
u32 power )
{
int i ;
2017-04-25 13:27:19 +03:00
struct freq_table * freq_table = cpufreq_cdev - > freq_table ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:19 +03:00
for ( i = 1 ; i < = cpufreq_cdev - > max_level ; i + + )
if ( power > freq_table [ i ] . power )
2015-02-26 22:00:29 +03:00
break ;
2017-04-25 13:27:19 +03:00
return freq_table [ i - 1 ] . frequency ;
2015-02-26 22:00:29 +03:00
}
/**
* get_load ( ) - get load for a cpu since last updated
2017-04-25 13:27:10 +03:00
* @ cpufreq_cdev : & struct cpufreq_cooling_device for this cpu
2015-02-26 22:00:29 +03:00
* @ cpu : cpu number
2017-04-25 13:27:18 +03:00
* @ cpu_idx : index of the cpu in time_in_idle *
2015-02-26 22:00:29 +03:00
*
* Return : The average load of cpu @ cpu in percentage since this
* function was last called .
*/
2017-04-25 13:27:10 +03:00
static u32 get_load ( struct cpufreq_cooling_device * cpufreq_cdev , int cpu ,
2016-02-11 15:00:51 +03:00
int cpu_idx )
2015-02-26 22:00:29 +03:00
{
u32 load ;
u64 now , now_idle , delta_time , delta_idle ;
2017-04-25 13:27:20 +03:00
struct time_in_idle * idle_time = & cpufreq_cdev - > idle_time [ cpu_idx ] ;
2015-02-26 22:00:29 +03:00
now_idle = get_cpu_idle_time ( cpu , & now , 0 ) ;
2017-04-25 13:27:20 +03:00
delta_idle = now_idle - idle_time - > time ;
delta_time = now - idle_time - > timestamp ;
2015-02-26 22:00:29 +03:00
if ( delta_time < = delta_idle )
load = 0 ;
else
load = div64_u64 ( 100 * ( delta_time - delta_idle ) , delta_time ) ;
2017-04-25 13:27:20 +03:00
idle_time - > time = now_idle ;
idle_time - > timestamp = now ;
2015-02-26 22:00:29 +03:00
return load ;
}
/**
* get_static_power ( ) - calculate the static power consumed by the cpus
2017-04-25 13:27:10 +03:00
* @ cpufreq_cdev : struct & cpufreq_cooling_device for this cpu cdev
2015-02-26 22:00:29 +03:00
* @ tz : thermal zone device in which we ' re operating
* @ freq : frequency in KHz
* @ power : pointer in which to store the calculated static power
*
* Calculate the static power consumed by the cpus described by
* @ cpu_actor running at frequency @ freq . This function relies on a
* platform specific function that should have been provided when the
* actor was registered . If it wasn ' t , the static power is assumed to
* be negligible . The calculated static power is stored in @ power .
*
* Return : 0 on success , - E * on failure .
*/
2017-04-25 13:27:10 +03:00
static int get_static_power ( struct cpufreq_cooling_device * cpufreq_cdev ,
2015-02-26 22:00:29 +03:00
struct thermal_zone_device * tz , unsigned long freq ,
u32 * power )
{
struct dev_pm_opp * opp ;
unsigned long voltage ;
2017-04-25 13:27:18 +03:00
struct cpumask * cpumask = cpufreq_cdev - > policy - > related_cpus ;
2015-02-26 22:00:29 +03:00
unsigned long freq_hz = freq * 1000 ;
2017-04-25 13:27:10 +03:00
if ( ! cpufreq_cdev - > plat_get_static_power | | ! cpufreq_cdev - > cpu_dev ) {
2015-02-26 22:00:29 +03:00
* power = 0 ;
return 0 ;
}
2017-04-25 13:27:10 +03:00
opp = dev_pm_opp_find_freq_exact ( cpufreq_cdev - > cpu_dev , freq_hz ,
2015-02-26 22:00:29 +03:00
true ) ;
2017-02-07 07:10:05 +03:00
if ( IS_ERR ( opp ) ) {
2017-04-25 13:27:10 +03:00
dev_warn_ratelimited ( cpufreq_cdev - > cpu_dev ,
2017-02-07 07:10:05 +03:00
" Failed to find OPP for frequency %lu: %ld \n " ,
freq_hz , PTR_ERR ( opp ) ) ;
return - EINVAL ;
}
2015-02-26 22:00:29 +03:00
voltage = dev_pm_opp_get_voltage ( opp ) ;
2017-01-23 07:41:47 +03:00
dev_pm_opp_put ( opp ) ;
2015-02-26 22:00:29 +03:00
if ( voltage = = 0 ) {
2017-04-25 13:27:10 +03:00
dev_err_ratelimited ( cpufreq_cdev - > cpu_dev ,
2017-02-07 07:10:05 +03:00
" Failed to get voltage for frequency %lu \n " ,
freq_hz ) ;
2015-02-26 22:00:29 +03:00
return - EINVAL ;
}
2017-04-25 13:27:10 +03:00
return cpufreq_cdev - > plat_get_static_power ( cpumask , tz - > passive_delay ,
voltage , power ) ;
2015-02-26 22:00:29 +03:00
}
/**
* get_dynamic_power ( ) - calculate the dynamic power
2017-04-25 13:27:10 +03:00
* @ cpufreq_cdev : & cpufreq_cooling_device for this cdev
2015-02-26 22:00:29 +03:00
* @ freq : current frequency
*
* Return : the dynamic power consumed by the cpus described by
2017-04-25 13:27:10 +03:00
* @ cpufreq_cdev .
2015-02-26 22:00:29 +03:00
*/
2017-04-25 13:27:10 +03:00
static u32 get_dynamic_power ( struct cpufreq_cooling_device * cpufreq_cdev ,
2015-02-26 22:00:29 +03:00
unsigned long freq )
{
u32 raw_cpu_power ;
2017-04-25 13:27:10 +03:00
raw_cpu_power = cpu_freq_to_power ( cpufreq_cdev , freq ) ;
return ( raw_cpu_power * cpufreq_cdev - > last_load ) / 100 ;
2012-08-16 15:41:40 +04:00
}
2013-04-17 21:12:02 +04:00
/* cpufreq cooling device callback functions are defined below */
2012-08-16 15:41:40 +04:00
/**
* cpufreq_get_max_state - callback function to get the max cooling state .
* @ cdev : thermal cooling device pointer .
* @ state : fill this variable with the max cooling state .
2013-04-17 21:12:12 +04:00
*
* Callback for the thermal cooling device to return the cpufreq
* max cooling state .
*
* Return : 0 on success , an error code otherwise .
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_get_max_state ( struct thermal_cooling_device * cdev ,
unsigned long * state )
{
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev = cdev - > devdata ;
2012-10-30 20:48:58 +04:00
2017-04-25 13:27:10 +03:00
* state = cpufreq_cdev - > max_level ;
2014-12-04 07:12:02 +03:00
return 0 ;
2012-08-16 15:41:40 +04:00
}
/**
* cpufreq_get_cur_state - callback function to get the current cooling state .
* @ cdev : thermal cooling device pointer .
* @ state : fill this variable with the current cooling state .
2013-04-17 21:12:13 +04:00
*
* Callback for the thermal cooling device to return the cpufreq
* current cooling state .
*
* Return : 0 on success , an error code otherwise .
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_get_cur_state ( struct thermal_cooling_device * cdev ,
unsigned long * state )
{
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev = cdev - > devdata ;
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:10 +03:00
* state = cpufreq_cdev - > cpufreq_state ;
2013-04-17 21:11:59 +04:00
2012-10-30 20:48:59 +04:00
return 0 ;
2012-08-16 15:41:40 +04:00
}
/**
* cpufreq_set_cur_state - callback function to set the current cooling state .
* @ cdev : thermal cooling device pointer .
* @ state : set this variable to the current cooling state .
2013-04-17 21:12:14 +04:00
*
* Callback for the thermal cooling device to change the cpufreq
* current cooling state .
*
* Return : 0 on success , an error code otherwise .
2012-08-16 15:41:40 +04:00
*/
static int cpufreq_set_cur_state ( struct thermal_cooling_device * cdev ,
unsigned long state )
{
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev = cdev - > devdata ;
2014-12-04 07:12:00 +03:00
unsigned int clip_freq ;
2014-12-04 07:12:07 +03:00
/* Request state should be less than max_level */
2017-04-25 13:27:10 +03:00
if ( WARN_ON ( state > cpufreq_cdev - > max_level ) )
2014-12-04 07:12:07 +03:00
return - EINVAL ;
2014-12-04 07:12:00 +03:00
/* Check if the old cooling action is same as new cooling action */
2017-04-25 13:27:10 +03:00
if ( cpufreq_cdev - > cpufreq_state = = state )
2014-12-04 07:12:00 +03:00
return 0 ;
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:19 +03:00
clip_freq = cpufreq_cdev - > freq_table [ state ] . frequency ;
2017-04-25 13:27:10 +03:00
cpufreq_cdev - > cpufreq_state = state ;
cpufreq_cdev - > clipped_freq = clip_freq ;
2014-12-04 07:12:00 +03:00
2017-04-25 13:27:18 +03:00
cpufreq_update_policy ( cpufreq_cdev - > policy - > cpu ) ;
2014-12-04 07:12:00 +03:00
return 0 ;
2012-08-16 15:41:40 +04:00
}
2015-02-26 22:00:29 +03:00
/**
* cpufreq_get_requested_power ( ) - get the current power
* @ cdev : & thermal_cooling_device pointer
* @ tz : a valid thermal zone device pointer
* @ power : pointer in which to store the resulting power
*
* Calculate the current power consumption of the cpus in milliwatts
* and store it in @ power . This function should actually calculate
* the requested power , but it ' s hard to get the frequency that
* cpufreq would have assigned if there were no thermal limits .
* Instead , we calculate the current power on the assumption that the
* immediate future will look like the immediate past .
*
* We use the current frequency and the average load since this
* function was last called . In reality , there could have been
* multiple opps since this function was last called and that affects
* the load calculation . While it ' s not perfectly accurate , this
* simplification is good enough and works . REVISIT this , as more
* complex code may be needed if experiments show that it ' s not
* accurate enough .
*
* Return : 0 on success , - E * if getting the static power failed .
*/
static int cpufreq_get_requested_power ( struct thermal_cooling_device * cdev ,
struct thermal_zone_device * tz ,
u32 * power )
{
unsigned long freq ;
2015-03-02 20:17:20 +03:00
int i = 0 , cpu , ret ;
2015-02-26 22:00:29 +03:00
u32 static_power , dynamic_power , total_load = 0 ;
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev = cdev - > devdata ;
2017-04-25 13:27:18 +03:00
struct cpufreq_policy * policy = cpufreq_cdev - > policy ;
2015-03-02 20:17:20 +03:00
u32 * load_cpu = NULL ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:18 +03:00
freq = cpufreq_quick_get ( policy - > cpu ) ;
2015-02-26 22:00:29 +03:00
2015-03-02 20:17:20 +03:00
if ( trace_thermal_power_cpu_get_power_enabled ( ) ) {
2017-04-25 13:27:18 +03:00
u32 ncpus = cpumask_weight ( policy - > related_cpus ) ;
2015-03-02 20:17:20 +03:00
2015-08-19 09:22:19 +03:00
load_cpu = kcalloc ( ncpus , sizeof ( * load_cpu ) , GFP_KERNEL ) ;
2015-03-02 20:17:20 +03:00
}
2017-04-25 13:27:18 +03:00
for_each_cpu ( cpu , policy - > related_cpus ) {
2015-02-26 22:00:29 +03:00
u32 load ;
if ( cpu_online ( cpu ) )
2017-04-25 13:27:10 +03:00
load = get_load ( cpufreq_cdev , cpu , i ) ;
2015-02-26 22:00:29 +03:00
else
load = 0 ;
total_load + = load ;
2015-03-02 20:17:20 +03:00
if ( trace_thermal_power_cpu_limit_enabled ( ) & & load_cpu )
load_cpu [ i ] = load ;
i + + ;
2015-02-26 22:00:29 +03:00
}
2017-04-25 13:27:10 +03:00
cpufreq_cdev - > last_load = total_load ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:10 +03:00
dynamic_power = get_dynamic_power ( cpufreq_cdev , freq ) ;
ret = get_static_power ( cpufreq_cdev , tz , freq , & static_power ) ;
2015-03-02 20:17:20 +03:00
if ( ret ) {
2015-08-19 09:22:19 +03:00
kfree ( load_cpu ) ;
2015-02-26 22:00:29 +03:00
return ret ;
2015-03-02 20:17:20 +03:00
}
if ( load_cpu ) {
2017-04-25 13:27:18 +03:00
trace_thermal_power_cpu_get_power ( policy - > related_cpus , freq ,
load_cpu , i , dynamic_power ,
static_power ) ;
2015-03-02 20:17:20 +03:00
2015-08-19 09:22:19 +03:00
kfree ( load_cpu ) ;
2015-03-02 20:17:20 +03:00
}
2015-02-26 22:00:29 +03:00
* power = static_power + dynamic_power ;
return 0 ;
}
/**
* cpufreq_state2power ( ) - convert a cpu cdev state to power consumed
* @ cdev : & thermal_cooling_device pointer
* @ tz : a valid thermal zone device pointer
* @ state : cooling device state to be converted
* @ power : pointer in which to store the resulting power
*
* Convert cooling device state @ state into power consumption in
* milliwatts assuming 100 % load . Store the calculated power in
* @ power .
*
* Return : 0 on success , - EINVAL if the cooling device state could not
* be converted into a frequency or other - E * if there was an error
* when calculating the static power .
*/
static int cpufreq_state2power ( struct thermal_cooling_device * cdev ,
struct thermal_zone_device * tz ,
unsigned long state , u32 * power )
{
unsigned int freq , num_cpus ;
u32 static_power , dynamic_power ;
int ret ;
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev = cdev - > devdata ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:18 +03:00
num_cpus = cpumask_weight ( cpufreq_cdev - > policy - > cpus ) ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:19 +03:00
freq = cpufreq_cdev - > freq_table [ state ] . frequency ;
2017-04-25 13:27:18 +03:00
if ( ! freq )
return - EINVAL ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:10 +03:00
dynamic_power = cpu_freq_to_power ( cpufreq_cdev , freq ) * num_cpus ;
ret = get_static_power ( cpufreq_cdev , tz , freq , & static_power ) ;
2015-02-26 22:00:29 +03:00
if ( ret )
2017-04-25 13:27:18 +03:00
return ret ;
2015-02-26 22:00:29 +03:00
* power = static_power + dynamic_power ;
2017-02-02 17:46:26 +03:00
return ret ;
2015-02-26 22:00:29 +03:00
}
/**
* cpufreq_power2state ( ) - convert power to a cooling device state
* @ cdev : & thermal_cooling_device pointer
* @ tz : a valid thermal zone device pointer
* @ power : power in milliwatts to be converted
* @ state : pointer in which to store the resulting state
*
* Calculate a cooling device state for the cpus described by @ cdev
* that would allow them to consume at most @ power mW and store it in
* @ state . Note that this calculation depends on external factors
* such as the cpu load or the current static power . Calling this
* function with the same power as input can yield different cooling
* device states depending on those external factors .
*
* Return : 0 on success , - ENODEV if no cpus are online or - EINVAL if
* the calculated frequency could not be converted to a valid state .
* The latter should not happen unless the frequencies available to
* cpufreq have changed since the initialization of the cpu cooling
* device .
*/
static int cpufreq_power2state ( struct thermal_cooling_device * cdev ,
struct thermal_zone_device * tz , u32 power ,
unsigned long * state )
{
2017-04-25 13:27:18 +03:00
unsigned int cur_freq , target_freq ;
2015-02-26 22:00:29 +03:00
int ret ;
s32 dyn_power ;
u32 last_load , normalised_power , static_power ;
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev = cdev - > devdata ;
2017-04-25 13:27:18 +03:00
struct cpufreq_policy * policy = cpufreq_cdev - > policy ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:18 +03:00
cur_freq = cpufreq_quick_get ( policy - > cpu ) ;
2017-04-25 13:27:10 +03:00
ret = get_static_power ( cpufreq_cdev , tz , cur_freq , & static_power ) ;
2015-02-26 22:00:29 +03:00
if ( ret )
return ret ;
dyn_power = power - static_power ;
dyn_power = dyn_power > 0 ? dyn_power : 0 ;
2017-04-25 13:27:10 +03:00
last_load = cpufreq_cdev - > last_load ? : 1 ;
2015-02-26 22:00:29 +03:00
normalised_power = ( dyn_power * 100 ) / last_load ;
2017-04-25 13:27:10 +03:00
target_freq = cpu_power_to_freq ( cpufreq_cdev , normalised_power ) ;
2015-02-26 22:00:29 +03:00
2017-04-25 13:27:12 +03:00
* state = get_level ( cpufreq_cdev , target_freq ) ;
2015-02-26 22:00:29 +03:00
if ( * state = = THERMAL_CSTATE_INVALID ) {
2017-02-07 07:10:04 +03:00
dev_err_ratelimited ( & cdev - > device ,
" Failed to convert %dKHz for cpu %d into a cdev state \n " ,
2017-04-25 13:27:18 +03:00
target_freq , policy - > cpu ) ;
2015-02-26 22:00:29 +03:00
return - EINVAL ;
}
2017-04-25 13:27:18 +03:00
trace_thermal_power_cpu_limit ( policy - > related_cpus , target_freq , * state ,
power ) ;
2015-02-26 22:00:29 +03:00
return 0 ;
}
2012-08-16 15:41:40 +04:00
/* Bind cpufreq callbacks to thermal cooling device ops */
2016-08-17 18:14:59 +03:00
2015-02-26 22:00:29 +03:00
static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
2012-08-16 15:41:40 +04:00
. get_max_state = cpufreq_get_max_state ,
. get_cur_state = cpufreq_get_cur_state ,
. set_cur_state = cpufreq_set_cur_state ,
} ;
2016-08-17 18:14:59 +03:00
static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
. get_max_state = cpufreq_get_max_state ,
. get_cur_state = cpufreq_get_cur_state ,
. set_cur_state = cpufreq_set_cur_state ,
. get_requested_power = cpufreq_get_requested_power ,
. state2power = cpufreq_state2power ,
. power2state = cpufreq_power2state ,
} ;
2012-08-16 15:41:40 +04:00
/* Notifier for cpufreq policy change */
static struct notifier_block thermal_cpufreq_notifier_block = {
. notifier_call = cpufreq_thermal_notifier ,
} ;
2014-12-04 07:12:06 +03:00
static unsigned int find_next_max ( struct cpufreq_frequency_table * table ,
unsigned int prev_max )
{
struct cpufreq_frequency_table * pos ;
unsigned int max = 0 ;
cpufreq_for_each_valid_entry ( pos , table ) {
if ( pos - > frequency > max & & pos - > frequency < prev_max )
max = pos - > frequency ;
}
return max ;
}
2012-08-16 15:41:40 +04:00
/**
2013-09-13 03:26:45 +04:00
* __cpufreq_cooling_register - helper function to create cpufreq cooling device
* @ np : a valid struct device_node to the cooling device device tree node
2017-04-25 13:27:14 +03:00
* @ policy : cpufreq policy
2014-12-04 07:11:55 +03:00
* Normally this should be same as cpufreq policy - > related_cpus .
2015-02-26 22:00:29 +03:00
* @ capacitance : dynamic power coefficient for these cpus
* @ plat_static_func : function to calculate the static power consumed by these
* cpus ( optional )
2013-04-17 21:12:15 +04:00
*
* This interface function registers the cpufreq cooling device with the name
* " thermal-cpufreq-%x " . This api can support multiple instances of cpufreq
2013-09-13 03:26:45 +04:00
* cooling devices . It also gives the opportunity to link the cooling device
* with a device tree node , in order to bind it via the thermal DT code .
2013-04-17 21:12:15 +04:00
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
2012-08-16 15:41:40 +04:00
*/
2013-09-13 03:26:45 +04:00
static struct thermal_cooling_device *
__cpufreq_cooling_register ( struct device_node * np ,
2017-04-25 13:27:14 +03:00
struct cpufreq_policy * policy , u32 capacitance ,
2015-02-26 22:00:29 +03:00
get_static_t plat_static_func )
2012-08-16 15:41:40 +04:00
{
2017-04-25 13:27:11 +03:00
struct thermal_cooling_device * cdev ;
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev ;
2012-08-16 15:41:40 +04:00
char dev_name [ THERMAL_NAME_LENGTH ] ;
2015-02-26 22:00:29 +03:00
unsigned int freq , i , num_cpus ;
2014-12-04 07:11:55 +03:00
int ret ;
2016-08-17 18:14:59 +03:00
struct thermal_cooling_device_ops * cooling_ops ;
2017-03-10 21:33:28 +03:00
bool first ;
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:14 +03:00
if ( IS_ERR_OR_NULL ( policy ) ) {
pr_err ( " %s: cpufreq policy isn't valid: %p " , __func__ , policy ) ;
return ERR_PTR ( - EINVAL ) ;
2016-06-03 08:28:47 +03:00
}
2017-04-25 13:27:15 +03:00
i = cpufreq_table_count_valid_entries ( policy ) ;
if ( ! i ) {
pr_debug ( " %s: CPUFreq table not found or has no valid entries \n " ,
__func__ ) ;
2017-04-25 13:27:14 +03:00
return ERR_PTR ( - ENODEV ) ;
2012-08-16 15:41:40 +04:00
}
2014-12-04 07:11:43 +03:00
2017-04-25 13:27:10 +03:00
cpufreq_cdev = kzalloc ( sizeof ( * cpufreq_cdev ) , GFP_KERNEL ) ;
2017-04-25 13:27:14 +03:00
if ( ! cpufreq_cdev )
return ERR_PTR ( - ENOMEM ) ;
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:16 +03:00
cpufreq_cdev - > policy = policy ;
2017-04-25 13:27:14 +03:00
num_cpus = cpumask_weight ( policy - > related_cpus ) ;
2017-04-25 13:27:20 +03:00
cpufreq_cdev - > idle_time = kcalloc ( num_cpus ,
sizeof ( * cpufreq_cdev - > idle_time ) ,
GFP_KERNEL ) ;
if ( ! cpufreq_cdev - > idle_time ) {
2017-04-25 13:27:11 +03:00
cdev = ERR_PTR ( - ENOMEM ) ;
2015-02-26 22:00:29 +03:00
goto free_cdev ;
}
2017-04-25 13:27:15 +03:00
/* max_level is an index, not a counter */
cpufreq_cdev - > max_level = i - 1 ;
2014-12-04 07:12:02 +03:00
2017-04-25 13:27:15 +03:00
cpufreq_cdev - > freq_table = kmalloc ( sizeof ( * cpufreq_cdev - > freq_table ) * i ,
GFP_KERNEL ) ;
2017-04-25 13:27:10 +03:00
if ( ! cpufreq_cdev - > freq_table ) {
2017-04-25 13:27:11 +03:00
cdev = ERR_PTR ( - ENOMEM ) ;
2017-04-25 13:27:20 +03:00
goto free_idle_time ;
2014-12-04 07:12:06 +03:00
}
2016-12-21 20:47:05 +03:00
ret = ida_simple_get ( & cpufreq_ida , 0 , 0 , GFP_KERNEL ) ;
if ( ret < 0 ) {
2017-04-25 13:27:11 +03:00
cdev = ERR_PTR ( ret ) ;
2017-04-25 13:27:19 +03:00
goto free_table ;
2012-08-16 15:41:40 +04:00
}
2017-04-25 13:27:10 +03:00
cpufreq_cdev - > id = ret ;
2012-08-16 15:41:40 +04:00
2017-04-25 13:27:19 +03:00
snprintf ( dev_name , sizeof ( dev_name ) , " thermal-cpufreq-%d " ,
cpufreq_cdev - > id ) ;
2014-12-04 07:12:06 +03:00
/* Fill freq-table in descending order of frequencies */
2017-04-25 13:27:10 +03:00
for ( i = 0 , freq = - 1 ; i < = cpufreq_cdev - > max_level ; i + + ) {
2017-04-25 13:27:15 +03:00
freq = find_next_max ( policy - > freq_table , freq ) ;
2017-04-25 13:27:19 +03:00
cpufreq_cdev - > freq_table [ i ] . frequency = freq ;
2014-12-04 07:12:06 +03:00
/* Warn for duplicate entries */
if ( ! freq )
pr_warn ( " %s: table has duplicate entries \n " , __func__ ) ;
else
pr_debug ( " %s: freq:%u KHz \n " , __func__ , freq ) ;
2012-08-16 15:41:40 +04:00
}
2014-12-04 07:12:06 +03:00
2017-04-25 13:27:19 +03:00
if ( capacitance ) {
cpufreq_cdev - > plat_get_static_power = plat_static_func ;
ret = update_freq_table ( cpufreq_cdev , capacitance ) ;
if ( ret ) {
cdev = ERR_PTR ( ret ) ;
goto remove_ida ;
}
cooling_ops = & cpufreq_power_cooling_ops ;
} else {
cooling_ops = & cpufreq_cooling_ops ;
}
2016-05-31 13:32:02 +03:00
2017-04-25 13:27:11 +03:00
cdev = thermal_of_cooling_device_register ( np , dev_name , cpufreq_cdev ,
cooling_ops ) ;
if ( IS_ERR ( cdev ) )
2016-12-21 20:47:05 +03:00
goto remove_ida ;
2016-05-31 13:32:02 +03:00
2017-04-25 13:27:19 +03:00
cpufreq_cdev - > clipped_freq = cpufreq_cdev - > freq_table [ 0 ] . frequency ;
2017-04-25 13:27:11 +03:00
cpufreq_cdev - > cdev = cdev ;
2014-12-04 07:11:51 +03:00
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
mutex_lock ( & cooling_list_lock ) ;
2017-03-10 21:33:28 +03:00
/* Register the notifier for first cpufreq cooling device */
2017-04-25 13:27:10 +03:00
first = list_empty ( & cpufreq_cdev_list ) ;
list_add ( & cpufreq_cdev - > node , & cpufreq_cdev_list ) ;
2017-03-10 21:33:28 +03:00
mutex_unlock ( & cooling_list_lock ) ;
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
2017-03-10 21:33:28 +03:00
if ( first )
2012-08-16 15:41:40 +04:00
cpufreq_register_notifier ( & thermal_cpufreq_notifier_block ,
2013-04-17 21:12:11 +04:00
CPUFREQ_POLICY_NOTIFIER ) ;
2013-04-17 21:11:59 +04:00
2017-04-25 13:27:14 +03:00
return cdev ;
2014-12-04 07:11:58 +03:00
2016-12-21 20:47:05 +03:00
remove_ida :
2017-04-25 13:27:10 +03:00
ida_simple_remove ( & cpufreq_ida , cpufreq_cdev - > id ) ;
2014-12-04 07:12:06 +03:00
free_table :
2017-04-25 13:27:10 +03:00
kfree ( cpufreq_cdev - > freq_table ) ;
2017-04-25 13:27:20 +03:00
free_idle_time :
kfree ( cpufreq_cdev - > idle_time ) ;
2014-12-04 07:11:58 +03:00
free_cdev :
2017-04-25 13:27:10 +03:00
kfree ( cpufreq_cdev ) ;
2017-04-25 13:27:11 +03:00
return cdev ;
2012-08-16 15:41:40 +04:00
}
2013-09-13 03:26:45 +04:00
/**
* cpufreq_cooling_register - function to create cpufreq cooling device .
2017-04-25 13:27:14 +03:00
* @ policy : cpufreq policy
2013-09-13 03:26:45 +04:00
*
* This interface function registers the cpufreq cooling device with the name
* " thermal-cpufreq-%x " . This api can support multiple instances of cpufreq
* cooling devices .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
2017-04-25 13:27:14 +03:00
cpufreq_cooling_register ( struct cpufreq_policy * policy )
2013-09-13 03:26:45 +04:00
{
2017-04-25 13:27:14 +03:00
return __cpufreq_cooling_register ( NULL , policy , 0 , NULL ) ;
2013-09-13 03:26:45 +04:00
}
2013-04-17 21:11:57 +04:00
EXPORT_SYMBOL_GPL ( cpufreq_cooling_register ) ;
2012-08-16 15:41:40 +04:00
2013-09-13 03:26:45 +04:00
/**
* of_cpufreq_cooling_register - function to create cpufreq cooling device .
* @ np : a valid struct device_node to the cooling device device tree node
2017-04-25 13:27:14 +03:00
* @ policy : cpufreq policy
2013-09-13 03:26:45 +04:00
*
* This interface function registers the cpufreq cooling device with the name
* " thermal-cpufreq-%x " . This api can support multiple instances of cpufreq
* cooling devices . Using this API , the cpufreq cooling device will be
* linked to the device tree node provided .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
of_cpufreq_cooling_register ( struct device_node * np ,
2017-04-25 13:27:14 +03:00
struct cpufreq_policy * policy )
2013-09-13 03:26:45 +04:00
{
if ( ! np )
return ERR_PTR ( - EINVAL ) ;
2017-04-25 13:27:14 +03:00
return __cpufreq_cooling_register ( np , policy , 0 , NULL ) ;
2013-09-13 03:26:45 +04:00
}
EXPORT_SYMBOL_GPL ( of_cpufreq_cooling_register ) ;
2015-02-26 22:00:29 +03:00
/**
* cpufreq_power_cooling_register ( ) - create cpufreq cooling device with power extensions
2017-04-25 13:27:14 +03:00
* @ policy : cpufreq policy
2015-02-26 22:00:29 +03:00
* @ capacitance : dynamic power coefficient for these cpus
* @ plat_static_func : function to calculate the static power consumed by these
* cpus ( optional )
*
* This interface function registers the cpufreq cooling device with
* the name " thermal-cpufreq-%x " . This api can support multiple
* instances of cpufreq cooling devices . Using this function , the
* cooling device will implement the power extensions by using a
* simple cpu power model . The cpus must have registered their OPPs
* using the OPP library .
*
* An optional @ plat_static_func may be provided to calculate the
* static power consumed by these cpus . If the platform ' s static
* power consumption is unknown or negligible , make it NULL .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
2017-04-25 13:27:14 +03:00
cpufreq_power_cooling_register ( struct cpufreq_policy * policy , u32 capacitance ,
2015-02-26 22:00:29 +03:00
get_static_t plat_static_func )
{
2017-04-25 13:27:14 +03:00
return __cpufreq_cooling_register ( NULL , policy , capacitance ,
2015-02-26 22:00:29 +03:00
plat_static_func ) ;
}
EXPORT_SYMBOL ( cpufreq_power_cooling_register ) ;
/**
* of_cpufreq_power_cooling_register ( ) - create cpufreq cooling device with power extensions
* @ np : a valid struct device_node to the cooling device device tree node
2017-04-25 13:27:14 +03:00
* @ policy : cpufreq policy
2015-02-26 22:00:29 +03:00
* @ capacitance : dynamic power coefficient for these cpus
* @ plat_static_func : function to calculate the static power consumed by these
* cpus ( optional )
*
* This interface function registers the cpufreq cooling device with
* the name " thermal-cpufreq-%x " . This api can support multiple
* instances of cpufreq cooling devices . Using this API , the cpufreq
* cooling device will be linked to the device tree node provided .
* Using this function , the cooling device will implement the power
* extensions by using a simple cpu power model . The cpus must have
* registered their OPPs using the OPP library .
*
* An optional @ plat_static_func may be provided to calculate the
* static power consumed by these cpus . If the platform ' s static
* power consumption is unknown or negligible , make it NULL .
*
* Return : a valid struct thermal_cooling_device pointer on success ,
* on failure , it returns a corresponding ERR_PTR ( ) .
*/
struct thermal_cooling_device *
of_cpufreq_power_cooling_register ( struct device_node * np ,
2017-04-25 13:27:14 +03:00
struct cpufreq_policy * policy ,
2015-02-26 22:00:29 +03:00
u32 capacitance ,
get_static_t plat_static_func )
{
if ( ! np )
return ERR_PTR ( - EINVAL ) ;
2017-04-25 13:27:14 +03:00
return __cpufreq_cooling_register ( np , policy , capacitance ,
2015-02-26 22:00:29 +03:00
plat_static_func ) ;
}
EXPORT_SYMBOL ( of_cpufreq_power_cooling_register ) ;
2012-08-16 15:41:40 +04:00
/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device .
* @ cdev : thermal cooling device pointer .
2013-04-17 21:12:16 +04:00
*
* This interface function unregisters the " thermal-cpufreq-%x " cooling device .
2012-08-16 15:41:40 +04:00
*/
void cpufreq_cooling_unregister ( struct thermal_cooling_device * cdev )
{
2017-04-25 13:27:10 +03:00
struct cpufreq_cooling_device * cpufreq_cdev ;
2017-03-10 21:33:28 +03:00
bool last ;
2012-08-16 15:41:40 +04:00
2013-08-15 18:54:46 +04:00
if ( ! cdev )
return ;
2017-04-25 13:27:10 +03:00
cpufreq_cdev = cdev - > devdata ;
2012-08-16 15:41:40 +04:00
2016-12-21 20:47:05 +03:00
mutex_lock ( & cooling_list_lock ) ;
2017-04-25 13:27:10 +03:00
list_del ( & cpufreq_cdev - > node ) ;
2012-08-16 15:41:40 +04:00
/* Unregister the notifier for the last cpufreq cooling device */
2017-04-25 13:27:10 +03:00
last = list_empty ( & cpufreq_cdev_list ) ;
2017-03-10 21:33:28 +03:00
mutex_unlock ( & cooling_list_lock ) ;
if ( last )
2012-08-16 15:41:40 +04:00
cpufreq_unregister_notifier ( & thermal_cpufreq_notifier_block ,
2013-04-17 21:12:11 +04:00
CPUFREQ_POLICY_NOTIFIER ) ;
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
2015-08-12 12:52:16 +03:00
2017-04-25 13:27:11 +03:00
thermal_cooling_device_unregister ( cpufreq_cdev - > cdev ) ;
2017-04-25 13:27:10 +03:00
ida_simple_remove ( & cpufreq_ida , cpufreq_cdev - > id ) ;
2017-04-25 13:27:20 +03:00
kfree ( cpufreq_cdev - > idle_time ) ;
2017-04-25 13:27:10 +03:00
kfree ( cpufreq_cdev - > freq_table ) ;
kfree ( cpufreq_cdev ) ;
2012-08-16 15:41:40 +04:00
}
2013-04-17 21:11:57 +04:00
EXPORT_SYMBOL_GPL ( cpufreq_cooling_unregister ) ;