2019-05-27 09:55:05 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2009-09-18 23:41:09 +04:00
/*
* A hwmon driver for ACPI 4.0 power meters
* Copyright ( C ) 2009 IBM
*
2013-08-27 02:42:27 +04:00
* Author : Darrick J . Wong < darrick . wong @ oracle . com >
2009-09-18 23:41:09 +04:00
*/
# include <linux/module.h>
# include <linux/hwmon.h>
# include <linux/hwmon-sysfs.h>
# include <linux/jiffies.h>
# include <linux/mutex.h>
# include <linux/dmi.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 11:04:11 +03:00
# include <linux/slab.h>
2009-09-18 23:41:09 +04:00
# include <linux/kdev_t.h>
# include <linux/sched.h>
# include <linux/time.h>
2012-10-10 17:25:56 +04:00
# include <linux/err.h>
2013-12-03 04:49:16 +04:00
# include <linux/acpi.h>
2009-09-18 23:41:09 +04:00
# define ACPI_POWER_METER_NAME "power_meter"
# define ACPI_POWER_METER_DEVICE_NAME "Power Meter"
2010-04-28 01:01:07 +04:00
# define ACPI_POWER_METER_CLASS "pwr_meter_resource"
2009-09-18 23:41:09 +04:00
# define NUM_SENSORS 17
# define POWER_METER_CAN_MEASURE (1 << 0)
# define POWER_METER_CAN_TRIP (1 << 1)
# define POWER_METER_CAN_CAP (1 << 2)
# define POWER_METER_CAN_NOTIFY (1 << 3)
# define POWER_METER_IS_BATTERY (1 << 8)
# define UNKNOWN_HYSTERESIS 0xFFFFFFFF
# define METER_NOTIFY_CONFIG 0x80
# define METER_NOTIFY_TRIP 0x81
# define METER_NOTIFY_CAP 0x82
# define METER_NOTIFY_CAPPING 0x83
# define METER_NOTIFY_INTERVAL 0x84
# define POWER_AVERAGE_NAME "power1_average"
# define POWER_CAP_NAME "power1_cap"
# define POWER_AVG_INTERVAL_NAME "power1_average_interval"
# define POWER_ALARM_NAME "power1_alarm"
static int cap_in_hardware ;
2012-01-13 03:02:20 +04:00
static bool force_cap_on ;
2009-09-18 23:41:09 +04:00
static int can_cap_in_hardware ( void )
{
return force_cap_on | | cap_in_hardware ;
}
2010-01-10 19:15:36 +03:00
static const struct acpi_device_id power_meter_ids [ ] = {
2009-09-18 23:41:09 +04:00
{ " ACPI000D " , 0 } ,
{ " " , 0 } ,
} ;
MODULE_DEVICE_TABLE ( acpi , power_meter_ids ) ;
struct acpi_power_meter_capabilities {
2010-01-28 05:53:19 +03:00
u64 flags ;
u64 units ;
u64 type ;
u64 accuracy ;
u64 sampling_time ;
u64 min_avg_interval ;
u64 max_avg_interval ;
u64 hysteresis ;
u64 configurable_cap ;
u64 min_cap ;
u64 max_cap ;
2009-09-18 23:41:09 +04:00
} ;
struct acpi_power_meter_resource {
struct acpi_device * acpi_dev ;
acpi_bus_id name ;
struct mutex lock ;
struct device * hwmon_dev ;
struct acpi_power_meter_capabilities caps ;
acpi_string model_number ;
acpi_string serial_number ;
acpi_string oem_info ;
2010-01-28 05:53:19 +03:00
u64 power ;
u64 cap ;
u64 avg_interval ;
2009-09-18 23:41:09 +04:00
int sensors_valid ;
unsigned long sensors_last_updated ;
struct sensor_device_attribute sensors [ NUM_SENSORS ] ;
int num_sensors ;
2012-06-21 17:21:05 +04:00
s64 trip [ 2 ] ;
2009-09-18 23:41:09 +04:00
int num_domain_devices ;
struct acpi_device * * domain_devices ;
struct kobject * holders_dir ;
} ;
2012-04-02 22:19:00 +04:00
struct sensor_template {
2009-09-18 23:41:09 +04:00
char * label ;
ssize_t ( * show ) ( struct device * dev ,
struct device_attribute * devattr ,
char * buf ) ;
ssize_t ( * set ) ( struct device * dev ,
struct device_attribute * devattr ,
const char * buf , size_t count ) ;
int index ;
} ;
/* Averaging interval */
static int update_avg_interval ( struct acpi_power_meter_resource * resource )
{
unsigned long long data ;
acpi_status status ;
status = acpi_evaluate_integer ( resource - > acpi_dev - > handle , " _GAI " ,
NULL , & data ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _GAI " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - ENODEV ;
}
resource - > avg_interval = data ;
return 0 ;
}
static ssize_t show_avg_interval ( struct device * dev ,
struct device_attribute * devattr ,
char * buf )
{
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
mutex_lock ( & resource - > lock ) ;
update_avg_interval ( resource ) ;
mutex_unlock ( & resource - > lock ) ;
return sprintf ( buf , " %llu \n " , resource - > avg_interval ) ;
}
static ssize_t set_avg_interval ( struct device * dev ,
struct device_attribute * devattr ,
const char * buf , size_t count )
{
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
union acpi_object arg0 = { ACPI_TYPE_INTEGER } ;
struct acpi_object_list args = { 1 , & arg0 } ;
int res ;
unsigned long temp ;
unsigned long long data ;
acpi_status status ;
2012-01-04 23:58:52 +04:00
res = kstrtoul ( buf , 10 , & temp ) ;
2009-09-18 23:41:09 +04:00
if ( res )
return res ;
if ( temp > resource - > caps . max_avg_interval | |
temp < resource - > caps . min_avg_interval )
return - EINVAL ;
arg0 . integer . value = temp ;
mutex_lock ( & resource - > lock ) ;
status = acpi_evaluate_integer ( resource - > acpi_dev - > handle , " _PAI " ,
& args , & data ) ;
2021-01-26 23:23:17 +03:00
if ( ACPI_SUCCESS ( status ) )
2009-09-18 23:41:09 +04:00
resource - > avg_interval = temp ;
mutex_unlock ( & resource - > lock ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _PAI " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - EINVAL ;
}
/* _PAI returns 0 on success, nonzero otherwise */
if ( data )
return - EINVAL ;
return count ;
}
/* Cap functions */
static int update_cap ( struct acpi_power_meter_resource * resource )
{
unsigned long long data ;
acpi_status status ;
status = acpi_evaluate_integer ( resource - > acpi_dev - > handle , " _GHL " ,
NULL , & data ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _GHL " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - ENODEV ;
}
resource - > cap = data ;
return 0 ;
}
static ssize_t show_cap ( struct device * dev ,
struct device_attribute * devattr ,
char * buf )
{
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
mutex_lock ( & resource - > lock ) ;
update_cap ( resource ) ;
mutex_unlock ( & resource - > lock ) ;
return sprintf ( buf , " %llu \n " , resource - > cap * 1000 ) ;
}
static ssize_t set_cap ( struct device * dev , struct device_attribute * devattr ,
const char * buf , size_t count )
{
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
union acpi_object arg0 = { ACPI_TYPE_INTEGER } ;
struct acpi_object_list args = { 1 , & arg0 } ;
int res ;
unsigned long temp ;
unsigned long long data ;
acpi_status status ;
2012-01-04 23:58:52 +04:00
res = kstrtoul ( buf , 10 , & temp ) ;
2009-09-18 23:41:09 +04:00
if ( res )
return res ;
2012-06-19 09:37:13 +04:00
temp = DIV_ROUND_CLOSEST ( temp , 1000 ) ;
2009-09-18 23:41:09 +04:00
if ( temp > resource - > caps . max_cap | | temp < resource - > caps . min_cap )
return - EINVAL ;
arg0 . integer . value = temp ;
mutex_lock ( & resource - > lock ) ;
status = acpi_evaluate_integer ( resource - > acpi_dev - > handle , " _SHL " ,
& args , & data ) ;
2021-01-26 23:23:17 +03:00
if ( ACPI_SUCCESS ( status ) )
2009-09-18 23:41:09 +04:00
resource - > cap = temp ;
mutex_unlock ( & resource - > lock ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _SHL " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - EINVAL ;
}
/* _SHL returns 0 on success, nonzero otherwise */
if ( data )
return - EINVAL ;
return count ;
}
/* Power meter trip points */
static int set_acpi_trip ( struct acpi_power_meter_resource * resource )
{
union acpi_object arg_objs [ ] = {
{ ACPI_TYPE_INTEGER } ,
{ ACPI_TYPE_INTEGER }
} ;
struct acpi_object_list args = { 2 , arg_objs } ;
unsigned long long data ;
acpi_status status ;
/* Both trip levels must be set */
if ( resource - > trip [ 0 ] < 0 | | resource - > trip [ 1 ] < 0 )
return 0 ;
/* This driver stores min, max; ACPI wants max, min. */
arg_objs [ 0 ] . integer . value = resource - > trip [ 1 ] ;
arg_objs [ 1 ] . integer . value = resource - > trip [ 0 ] ;
status = acpi_evaluate_integer ( resource - > acpi_dev - > handle , " _PTP " ,
& args , & data ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _PTP " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - EINVAL ;
}
2009-10-22 05:01:37 +04:00
/* _PTP returns 0 on success, nonzero otherwise */
if ( data )
return - EINVAL ;
return 0 ;
2009-09-18 23:41:09 +04:00
}
static ssize_t set_trip ( struct device * dev , struct device_attribute * devattr ,
const char * buf , size_t count )
{
struct sensor_device_attribute * attr = to_sensor_dev_attr ( devattr ) ;
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
int res ;
unsigned long temp ;
2012-01-04 23:58:52 +04:00
res = kstrtoul ( buf , 10 , & temp ) ;
2009-09-18 23:41:09 +04:00
if ( res )
return res ;
2012-06-19 09:37:13 +04:00
temp = DIV_ROUND_CLOSEST ( temp , 1000 ) ;
2009-09-18 23:41:09 +04:00
mutex_lock ( & resource - > lock ) ;
resource - > trip [ attr - > index - 7 ] = temp ;
res = set_acpi_trip ( resource ) ;
mutex_unlock ( & resource - > lock ) ;
if ( res )
return res ;
return count ;
}
/* Power meter */
static int update_meter ( struct acpi_power_meter_resource * resource )
{
unsigned long long data ;
acpi_status status ;
unsigned long local_jiffies = jiffies ;
if ( time_before ( local_jiffies , resource - > sensors_last_updated +
msecs_to_jiffies ( resource - > caps . sampling_time ) ) & &
resource - > sensors_valid )
return 0 ;
status = acpi_evaluate_integer ( resource - > acpi_dev - > handle , " _PMM " ,
NULL , & data ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _PMM " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - ENODEV ;
}
resource - > power = data ;
resource - > sensors_valid = 1 ;
resource - > sensors_last_updated = jiffies ;
return 0 ;
}
static ssize_t show_power ( struct device * dev ,
struct device_attribute * devattr ,
char * buf )
{
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
mutex_lock ( & resource - > lock ) ;
update_meter ( resource ) ;
mutex_unlock ( & resource - > lock ) ;
return sprintf ( buf , " %llu \n " , resource - > power * 1000 ) ;
}
/* Miscellaneous */
static ssize_t show_str ( struct device * dev ,
struct device_attribute * devattr ,
char * buf )
{
struct sensor_device_attribute * attr = to_sensor_dev_attr ( devattr ) ;
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
acpi_string val ;
hwmon: (acpi_power_meter) Fix lockdep splat
Damien Le Moal reports a lockdep splat with the acpi_power_meter,
observed with Linux v5.5 and later.
======================================================
WARNING: possible circular locking dependency detected
5.6.0-rc2+ #629 Not tainted
------------------------------------------------------
python/1397 is trying to acquire lock:
ffff888619080070 (&resource->lock){+.+.}, at: show_power+0x3c/0xa0 [acpi_power_meter]
but task is already holding lock:
ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (kn->count#119){++++}:
__kernfs_remove+0x626/0x7e0
kernfs_remove_by_name_ns+0x41/0x80
remove_attrs+0xcb/0x3c0 [acpi_power_meter]
acpi_power_meter_notify+0x1f7/0x310 [acpi_power_meter]
acpi_ev_notify_dispatch+0x198/0x1f3
acpi_os_execute_deferred+0x4d/0x70
process_one_work+0x7c8/0x1340
worker_thread+0x94/0xc70
kthread+0x2ed/0x3f0
ret_from_fork+0x24/0x30
-> #0 (&resource->lock){+.+.}:
__lock_acquire+0x20be/0x49b0
lock_acquire+0x127/0x340
__mutex_lock+0x15b/0x1350
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
vfs_read+0x152/0x2c0
ksys_read+0xf3/0x1d0
do_syscall_64+0x95/0x1010
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(kn->count#119);
lock(&resource->lock);
lock(kn->count#119);
lock(&resource->lock);
*** DEADLOCK ***
4 locks held by python/1397:
#0: ffff8890242d64e0 (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x9b/0xb0
#1: ffff889040be74e0 (&p->lock){+.+.}, at: seq_read+0x6b/0xf90
#2: ffff8890448eb880 (&of->mutex){+.+.}, at: kernfs_seq_start+0x47/0x160
#3: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
stack backtrace:
CPU: 10 PID: 1397 Comm: python Not tainted 5.6.0-rc2+ #629
Hardware name: Supermicro Super Server/X11DPL-i, BIOS 3.1 05/21/2019
Call Trace:
dump_stack+0x97/0xe0
check_noncircular+0x32e/0x3e0
? print_circular_bug.isra.0+0x1e0/0x1e0
? unwind_next_frame+0xb9a/0x1890
? entry_SYSCALL_64_after_hwframe+0x49/0xbe
? graph_lock+0x79/0x170
? __lockdep_reset_lock+0x3c0/0x3c0
? mark_lock+0xbc/0x1150
__lock_acquire+0x20be/0x49b0
? mark_held_locks+0xe0/0xe0
? stack_trace_save+0x91/0xc0
lock_acquire+0x127/0x340
? show_power+0x3c/0xa0 [acpi_power_meter]
? device_remove_bin_file+0x10/0x10
? device_remove_bin_file+0x10/0x10
__mutex_lock+0x15b/0x1350
? show_power+0x3c/0xa0 [acpi_power_meter]
? show_power+0x3c/0xa0 [acpi_power_meter]
? mutex_lock_io_nested+0x11f0/0x11f0
? lock_downgrade+0x6a0/0x6a0
? kernfs_seq_start+0x47/0x160
? lock_acquire+0x127/0x340
? kernfs_seq_start+0x6a/0x160
? device_remove_bin_file+0x10/0x10
? show_power+0x3c/0xa0 [acpi_power_meter]
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
? memset+0x20/0x40
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
? security_file_permission+0x16f/0x2c0
vfs_read+0x152/0x2c0
Problem is that reading an attribute takes the kernfs lock in the kernfs
code, then resource->lock in the driver. During an ACPI notification, the
opposite happens: The resource lock is taken first, followed by the kernfs
lock when sysfs attributes are removed and re-created. Presumably this is
now seen due to some locking related changes in kernfs after v5.4, but it
was likely always a problem.
Fix the problem by not blindly acquiring the lock in the notification
function. It is only needed to protect the various update functions.
However, those update functions are called anyway when sysfs attributes
are read. This means that we can just stop calling those functions from
the notifier, and the resource lock in the notifier function is no longer
needed.
That leaves two situations:
First, METER_NOTIFY_CONFIG removes and re-allocates capability strings.
While it did so under the resource lock, _displaying_ those strings was not
protected, creating a race condition. To solve this problem, selectively
protect both removal/creation and reporting of capability attributes with
the resource lock.
Second, removing and re-creating the attribute files is no longer protected
by the resource lock. That doesn't matter since access to each individual
attribute is protected by the kernfs lock. Userspace may get messed up if
attributes disappear and reappear under its nose, but that is not different
than today, and there is nothing we can do about it without major driver
restructuring.
Last but not least, when removing the driver, remove attribute functions
first, then release capability strings. This avoids yet another race
condition.
Reported-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: stable@vger.kernel.org # v5.5+
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2020-02-20 01:36:14 +03:00
int ret ;
2009-09-18 23:41:09 +04:00
hwmon: (acpi_power_meter) Fix lockdep splat
Damien Le Moal reports a lockdep splat with the acpi_power_meter,
observed with Linux v5.5 and later.
======================================================
WARNING: possible circular locking dependency detected
5.6.0-rc2+ #629 Not tainted
------------------------------------------------------
python/1397 is trying to acquire lock:
ffff888619080070 (&resource->lock){+.+.}, at: show_power+0x3c/0xa0 [acpi_power_meter]
but task is already holding lock:
ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (kn->count#119){++++}:
__kernfs_remove+0x626/0x7e0
kernfs_remove_by_name_ns+0x41/0x80
remove_attrs+0xcb/0x3c0 [acpi_power_meter]
acpi_power_meter_notify+0x1f7/0x310 [acpi_power_meter]
acpi_ev_notify_dispatch+0x198/0x1f3
acpi_os_execute_deferred+0x4d/0x70
process_one_work+0x7c8/0x1340
worker_thread+0x94/0xc70
kthread+0x2ed/0x3f0
ret_from_fork+0x24/0x30
-> #0 (&resource->lock){+.+.}:
__lock_acquire+0x20be/0x49b0
lock_acquire+0x127/0x340
__mutex_lock+0x15b/0x1350
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
vfs_read+0x152/0x2c0
ksys_read+0xf3/0x1d0
do_syscall_64+0x95/0x1010
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(kn->count#119);
lock(&resource->lock);
lock(kn->count#119);
lock(&resource->lock);
*** DEADLOCK ***
4 locks held by python/1397:
#0: ffff8890242d64e0 (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x9b/0xb0
#1: ffff889040be74e0 (&p->lock){+.+.}, at: seq_read+0x6b/0xf90
#2: ffff8890448eb880 (&of->mutex){+.+.}, at: kernfs_seq_start+0x47/0x160
#3: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
stack backtrace:
CPU: 10 PID: 1397 Comm: python Not tainted 5.6.0-rc2+ #629
Hardware name: Supermicro Super Server/X11DPL-i, BIOS 3.1 05/21/2019
Call Trace:
dump_stack+0x97/0xe0
check_noncircular+0x32e/0x3e0
? print_circular_bug.isra.0+0x1e0/0x1e0
? unwind_next_frame+0xb9a/0x1890
? entry_SYSCALL_64_after_hwframe+0x49/0xbe
? graph_lock+0x79/0x170
? __lockdep_reset_lock+0x3c0/0x3c0
? mark_lock+0xbc/0x1150
__lock_acquire+0x20be/0x49b0
? mark_held_locks+0xe0/0xe0
? stack_trace_save+0x91/0xc0
lock_acquire+0x127/0x340
? show_power+0x3c/0xa0 [acpi_power_meter]
? device_remove_bin_file+0x10/0x10
? device_remove_bin_file+0x10/0x10
__mutex_lock+0x15b/0x1350
? show_power+0x3c/0xa0 [acpi_power_meter]
? show_power+0x3c/0xa0 [acpi_power_meter]
? mutex_lock_io_nested+0x11f0/0x11f0
? lock_downgrade+0x6a0/0x6a0
? kernfs_seq_start+0x47/0x160
? lock_acquire+0x127/0x340
? kernfs_seq_start+0x6a/0x160
? device_remove_bin_file+0x10/0x10
? show_power+0x3c/0xa0 [acpi_power_meter]
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
? memset+0x20/0x40
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
? security_file_permission+0x16f/0x2c0
vfs_read+0x152/0x2c0
Problem is that reading an attribute takes the kernfs lock in the kernfs
code, then resource->lock in the driver. During an ACPI notification, the
opposite happens: The resource lock is taken first, followed by the kernfs
lock when sysfs attributes are removed and re-created. Presumably this is
now seen due to some locking related changes in kernfs after v5.4, but it
was likely always a problem.
Fix the problem by not blindly acquiring the lock in the notification
function. It is only needed to protect the various update functions.
However, those update functions are called anyway when sysfs attributes
are read. This means that we can just stop calling those functions from
the notifier, and the resource lock in the notifier function is no longer
needed.
That leaves two situations:
First, METER_NOTIFY_CONFIG removes and re-allocates capability strings.
While it did so under the resource lock, _displaying_ those strings was not
protected, creating a race condition. To solve this problem, selectively
protect both removal/creation and reporting of capability attributes with
the resource lock.
Second, removing and re-creating the attribute files is no longer protected
by the resource lock. That doesn't matter since access to each individual
attribute is protected by the kernfs lock. Userspace may get messed up if
attributes disappear and reappear under its nose, but that is not different
than today, and there is nothing we can do about it without major driver
restructuring.
Last but not least, when removing the driver, remove attribute functions
first, then release capability strings. This avoids yet another race
condition.
Reported-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: stable@vger.kernel.org # v5.5+
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2020-02-20 01:36:14 +03:00
mutex_lock ( & resource - > lock ) ;
2009-09-18 23:41:09 +04:00
switch ( attr - > index ) {
case 0 :
val = resource - > model_number ;
break ;
case 1 :
val = resource - > serial_number ;
break ;
case 2 :
val = resource - > oem_info ;
break ;
default :
2013-09-13 21:51:35 +04:00
WARN ( 1 , " Implementation error: unexpected attribute index %d \n " ,
attr - > index ) ;
2012-03-28 20:03:26 +04:00
val = " " ;
2013-09-13 21:51:35 +04:00
break ;
2009-09-18 23:41:09 +04:00
}
hwmon: (acpi_power_meter) Fix lockdep splat
Damien Le Moal reports a lockdep splat with the acpi_power_meter,
observed with Linux v5.5 and later.
======================================================
WARNING: possible circular locking dependency detected
5.6.0-rc2+ #629 Not tainted
------------------------------------------------------
python/1397 is trying to acquire lock:
ffff888619080070 (&resource->lock){+.+.}, at: show_power+0x3c/0xa0 [acpi_power_meter]
but task is already holding lock:
ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (kn->count#119){++++}:
__kernfs_remove+0x626/0x7e0
kernfs_remove_by_name_ns+0x41/0x80
remove_attrs+0xcb/0x3c0 [acpi_power_meter]
acpi_power_meter_notify+0x1f7/0x310 [acpi_power_meter]
acpi_ev_notify_dispatch+0x198/0x1f3
acpi_os_execute_deferred+0x4d/0x70
process_one_work+0x7c8/0x1340
worker_thread+0x94/0xc70
kthread+0x2ed/0x3f0
ret_from_fork+0x24/0x30
-> #0 (&resource->lock){+.+.}:
__lock_acquire+0x20be/0x49b0
lock_acquire+0x127/0x340
__mutex_lock+0x15b/0x1350
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
vfs_read+0x152/0x2c0
ksys_read+0xf3/0x1d0
do_syscall_64+0x95/0x1010
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(kn->count#119);
lock(&resource->lock);
lock(kn->count#119);
lock(&resource->lock);
*** DEADLOCK ***
4 locks held by python/1397:
#0: ffff8890242d64e0 (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x9b/0xb0
#1: ffff889040be74e0 (&p->lock){+.+.}, at: seq_read+0x6b/0xf90
#2: ffff8890448eb880 (&of->mutex){+.+.}, at: kernfs_seq_start+0x47/0x160
#3: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
stack backtrace:
CPU: 10 PID: 1397 Comm: python Not tainted 5.6.0-rc2+ #629
Hardware name: Supermicro Super Server/X11DPL-i, BIOS 3.1 05/21/2019
Call Trace:
dump_stack+0x97/0xe0
check_noncircular+0x32e/0x3e0
? print_circular_bug.isra.0+0x1e0/0x1e0
? unwind_next_frame+0xb9a/0x1890
? entry_SYSCALL_64_after_hwframe+0x49/0xbe
? graph_lock+0x79/0x170
? __lockdep_reset_lock+0x3c0/0x3c0
? mark_lock+0xbc/0x1150
__lock_acquire+0x20be/0x49b0
? mark_held_locks+0xe0/0xe0
? stack_trace_save+0x91/0xc0
lock_acquire+0x127/0x340
? show_power+0x3c/0xa0 [acpi_power_meter]
? device_remove_bin_file+0x10/0x10
? device_remove_bin_file+0x10/0x10
__mutex_lock+0x15b/0x1350
? show_power+0x3c/0xa0 [acpi_power_meter]
? show_power+0x3c/0xa0 [acpi_power_meter]
? mutex_lock_io_nested+0x11f0/0x11f0
? lock_downgrade+0x6a0/0x6a0
? kernfs_seq_start+0x47/0x160
? lock_acquire+0x127/0x340
? kernfs_seq_start+0x6a/0x160
? device_remove_bin_file+0x10/0x10
? show_power+0x3c/0xa0 [acpi_power_meter]
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
? memset+0x20/0x40
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
? security_file_permission+0x16f/0x2c0
vfs_read+0x152/0x2c0
Problem is that reading an attribute takes the kernfs lock in the kernfs
code, then resource->lock in the driver. During an ACPI notification, the
opposite happens: The resource lock is taken first, followed by the kernfs
lock when sysfs attributes are removed and re-created. Presumably this is
now seen due to some locking related changes in kernfs after v5.4, but it
was likely always a problem.
Fix the problem by not blindly acquiring the lock in the notification
function. It is only needed to protect the various update functions.
However, those update functions are called anyway when sysfs attributes
are read. This means that we can just stop calling those functions from
the notifier, and the resource lock in the notifier function is no longer
needed.
That leaves two situations:
First, METER_NOTIFY_CONFIG removes and re-allocates capability strings.
While it did so under the resource lock, _displaying_ those strings was not
protected, creating a race condition. To solve this problem, selectively
protect both removal/creation and reporting of capability attributes with
the resource lock.
Second, removing and re-creating the attribute files is no longer protected
by the resource lock. That doesn't matter since access to each individual
attribute is protected by the kernfs lock. Userspace may get messed up if
attributes disappear and reappear under its nose, but that is not different
than today, and there is nothing we can do about it without major driver
restructuring.
Last but not least, when removing the driver, remove attribute functions
first, then release capability strings. This avoids yet another race
condition.
Reported-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: stable@vger.kernel.org # v5.5+
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2020-02-20 01:36:14 +03:00
ret = sprintf ( buf , " %s \n " , val ) ;
mutex_unlock ( & resource - > lock ) ;
return ret ;
2009-09-18 23:41:09 +04:00
}
static ssize_t show_val ( struct device * dev ,
struct device_attribute * devattr ,
char * buf )
{
struct sensor_device_attribute * attr = to_sensor_dev_attr ( devattr ) ;
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
2010-01-28 05:53:19 +03:00
u64 val = 0 ;
2009-09-18 23:41:09 +04:00
switch ( attr - > index ) {
case 0 :
val = resource - > caps . min_avg_interval ;
break ;
case 1 :
val = resource - > caps . max_avg_interval ;
break ;
case 2 :
val = resource - > caps . min_cap * 1000 ;
break ;
case 3 :
val = resource - > caps . max_cap * 1000 ;
break ;
case 4 :
if ( resource - > caps . hysteresis = = UNKNOWN_HYSTERESIS )
return sprintf ( buf , " unknown \n " ) ;
val = resource - > caps . hysteresis * 1000 ;
break ;
case 5 :
if ( resource - > caps . flags & POWER_METER_IS_BATTERY )
val = 1 ;
else
val = 0 ;
break ;
case 6 :
if ( resource - > power > resource - > cap )
val = 1 ;
else
val = 0 ;
break ;
case 7 :
case 8 :
if ( resource - > trip [ attr - > index - 7 ] < 0 )
return sprintf ( buf , " unknown \n " ) ;
val = resource - > trip [ attr - > index - 7 ] * 1000 ;
break ;
default :
2013-09-13 21:51:35 +04:00
WARN ( 1 , " Implementation error: unexpected attribute index %d \n " ,
attr - > index ) ;
break ;
2009-09-18 23:41:09 +04:00
}
return sprintf ( buf , " %llu \n " , val ) ;
}
static ssize_t show_accuracy ( struct device * dev ,
struct device_attribute * devattr ,
char * buf )
{
struct acpi_device * acpi_dev = to_acpi_device ( dev ) ;
struct acpi_power_meter_resource * resource = acpi_dev - > driver_data ;
unsigned int acc = resource - > caps . accuracy ;
return sprintf ( buf , " %u.%u%% \n " , acc / 1000 , acc % 1000 ) ;
}
static ssize_t show_name ( struct device * dev ,
struct device_attribute * devattr ,
char * buf )
{
return sprintf ( buf , " %s \n " , ACPI_POWER_METER_NAME ) ;
}
2012-04-02 22:19:01 +04:00
# define RO_SENSOR_TEMPLATE(_label, _show, _index) \
{ \
. label = _label , \
. show = _show , \
. index = _index , \
}
# define RW_SENSOR_TEMPLATE(_label, _show, _set, _index) \
{ \
. label = _label , \
. show = _show , \
. set = _set , \
. index = _index , \
}
2009-09-18 23:41:09 +04:00
/* Sensor descriptions. If you add a sensor, update NUM_SENSORS above! */
2012-04-02 22:19:03 +04:00
static struct sensor_template meter_attrs [ ] = {
2012-04-02 22:19:01 +04:00
RO_SENSOR_TEMPLATE ( POWER_AVERAGE_NAME , show_power , 0 ) ,
RO_SENSOR_TEMPLATE ( " power1_accuracy " , show_accuracy , 0 ) ,
RO_SENSOR_TEMPLATE ( " power1_average_interval_min " , show_val , 0 ) ,
RO_SENSOR_TEMPLATE ( " power1_average_interval_max " , show_val , 1 ) ,
RO_SENSOR_TEMPLATE ( " power1_is_battery " , show_val , 5 ) ,
RW_SENSOR_TEMPLATE ( POWER_AVG_INTERVAL_NAME , show_avg_interval ,
set_avg_interval , 0 ) ,
2012-04-02 22:19:00 +04:00
{ } ,
2009-09-18 23:41:09 +04:00
} ;
2012-04-02 22:19:00 +04:00
static struct sensor_template misc_cap_attrs [ ] = {
2012-04-02 22:19:01 +04:00
RO_SENSOR_TEMPLATE ( " power1_cap_min " , show_val , 2 ) ,
RO_SENSOR_TEMPLATE ( " power1_cap_max " , show_val , 3 ) ,
RO_SENSOR_TEMPLATE ( " power1_cap_hyst " , show_val , 4 ) ,
RO_SENSOR_TEMPLATE ( POWER_ALARM_NAME , show_val , 6 ) ,
2012-04-02 22:19:00 +04:00
{ } ,
2009-09-18 23:41:09 +04:00
} ;
2012-04-02 22:19:00 +04:00
static struct sensor_template ro_cap_attrs [ ] = {
2012-04-02 22:19:01 +04:00
RO_SENSOR_TEMPLATE ( POWER_CAP_NAME , show_cap , 0 ) ,
2012-04-02 22:19:00 +04:00
{ } ,
2009-09-18 23:41:09 +04:00
} ;
2012-04-02 22:19:00 +04:00
static struct sensor_template rw_cap_attrs [ ] = {
2012-04-02 22:19:01 +04:00
RW_SENSOR_TEMPLATE ( POWER_CAP_NAME , show_cap , set_cap , 0 ) ,
2012-04-02 22:19:00 +04:00
{ } ,
2009-09-18 23:41:09 +04:00
} ;
2012-04-02 22:19:00 +04:00
static struct sensor_template trip_attrs [ ] = {
2012-04-02 22:19:01 +04:00
RW_SENSOR_TEMPLATE ( " power1_average_min " , show_val , set_trip , 7 ) ,
RW_SENSOR_TEMPLATE ( " power1_average_max " , show_val , set_trip , 8 ) ,
2012-04-02 22:19:00 +04:00
{ } ,
2009-09-18 23:41:09 +04:00
} ;
2012-04-02 22:19:00 +04:00
static struct sensor_template misc_attrs [ ] = {
2012-04-02 22:19:01 +04:00
RO_SENSOR_TEMPLATE ( " name " , show_name , 0 ) ,
RO_SENSOR_TEMPLATE ( " power1_model_number " , show_str , 0 ) ,
RO_SENSOR_TEMPLATE ( " power1_oem_info " , show_str , 2 ) ,
RO_SENSOR_TEMPLATE ( " power1_serial_number " , show_str , 1 ) ,
2012-04-02 22:19:00 +04:00
{ } ,
2009-09-18 23:41:09 +04:00
} ;
2012-04-02 22:19:01 +04:00
# undef RO_SENSOR_TEMPLATE
# undef RW_SENSOR_TEMPLATE
2009-09-18 23:41:09 +04:00
/* Read power domain data */
static void remove_domain_devices ( struct acpi_power_meter_resource * resource )
{
int i ;
if ( ! resource - > num_domain_devices )
return ;
for ( i = 0 ; i < resource - > num_domain_devices ; i + + ) {
struct acpi_device * obj = resource - > domain_devices [ i ] ;
if ( ! obj )
continue ;
sysfs_remove_link ( resource - > holders_dir ,
kobject_name ( & obj - > dev . kobj ) ) ;
put_device ( & obj - > dev ) ;
}
kfree ( resource - > domain_devices ) ;
kobject_put ( resource - > holders_dir ) ;
2010-01-12 15:37:07 +03:00
resource - > num_domain_devices = 0 ;
2009-09-18 23:41:09 +04:00
}
static int read_domain_devices ( struct acpi_power_meter_resource * resource )
{
int res = 0 ;
int i ;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER , NULL } ;
union acpi_object * pss ;
acpi_status status ;
status = acpi_evaluate_object ( resource - > acpi_dev - > handle , " _PMD " , NULL ,
& buffer ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _PMD " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - ENODEV ;
}
pss = buffer . pointer ;
if ( ! pss | |
pss - > type ! = ACPI_TYPE_PACKAGE ) {
dev_err ( & resource - > acpi_dev - > dev , ACPI_POWER_METER_NAME
" Invalid _PMD data \n " ) ;
res = - EFAULT ;
goto end ;
}
if ( ! pss - > package . count )
goto end ;
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 00:03:40 +03:00
resource - > domain_devices = kcalloc ( pss - > package . count ,
sizeof ( struct acpi_device * ) ,
GFP_KERNEL ) ;
2009-09-18 23:41:09 +04:00
if ( ! resource - > domain_devices ) {
res = - ENOMEM ;
goto end ;
}
resource - > holders_dir = kobject_create_and_add ( " measures " ,
& resource - > acpi_dev - > dev . kobj ) ;
if ( ! resource - > holders_dir ) {
res = - ENOMEM ;
goto exit_free ;
}
resource - > num_domain_devices = pss - > package . count ;
for ( i = 0 ; i < pss - > package . count ; i + + ) {
struct acpi_device * obj ;
union acpi_object * element = & ( pss - > package . elements [ i ] ) ;
/* Refuse non-references */
if ( element - > type ! = ACPI_TYPE_LOCAL_REFERENCE )
continue ;
/* Create a symlink to domain objects */
resource - > domain_devices [ i ] = NULL ;
2013-11-20 13:28:23 +04:00
if ( acpi_bus_get_device ( element - > reference . handle ,
& resource - > domain_devices [ i ] ) )
2009-09-18 23:41:09 +04:00
continue ;
obj = resource - > domain_devices [ i ] ;
get_device ( & obj - > dev ) ;
res = sysfs_create_link ( resource - > holders_dir , & obj - > dev . kobj ,
kobject_name ( & obj - > dev . kobj ) ) ;
if ( res ) {
put_device ( & obj - > dev ) ;
resource - > domain_devices [ i ] = NULL ;
}
}
res = 0 ;
goto end ;
exit_free :
kfree ( resource - > domain_devices ) ;
end :
kfree ( buffer . pointer ) ;
return res ;
}
/* Registration and deregistration */
2012-04-02 22:19:02 +04:00
static int register_attrs ( struct acpi_power_meter_resource * resource ,
struct sensor_template * attrs )
2009-09-18 23:41:09 +04:00
{
struct device * dev = & resource - > acpi_dev - > dev ;
struct sensor_device_attribute * sensors =
& resource - > sensors [ resource - > num_sensors ] ;
int res = 0 ;
2012-04-02 22:19:02 +04:00
while ( attrs - > label ) {
sensors - > dev_attr . attr . name = attrs - > label ;
2018-12-11 01:01:58 +03:00
sensors - > dev_attr . attr . mode = 0444 ;
2012-04-02 22:19:02 +04:00
sensors - > dev_attr . show = attrs - > show ;
sensors - > index = attrs - > index ;
2009-09-18 23:41:09 +04:00
2012-04-02 22:19:02 +04:00
if ( attrs - > set ) {
2018-12-11 01:01:58 +03:00
sensors - > dev_attr . attr . mode | = 0200 ;
2012-04-02 22:19:02 +04:00
sensors - > dev_attr . store = attrs - > set ;
2009-09-18 23:41:09 +04:00
}
2012-03-28 23:11:47 +04:00
sysfs_attr_init ( & sensors - > dev_attr . attr ) ;
2009-09-18 23:41:09 +04:00
res = device_create_file ( dev , & sensors - > dev_attr ) ;
if ( res ) {
sensors - > dev_attr . attr . name = NULL ;
goto error ;
}
sensors + + ;
resource - > num_sensors + + ;
2012-04-02 22:19:02 +04:00
attrs + + ;
2009-09-18 23:41:09 +04:00
}
error :
return res ;
}
static void remove_attrs ( struct acpi_power_meter_resource * resource )
{
int i ;
for ( i = 0 ; i < resource - > num_sensors ; i + + ) {
if ( ! resource - > sensors [ i ] . dev_attr . attr . name )
continue ;
device_remove_file ( & resource - > acpi_dev - > dev ,
& resource - > sensors [ i ] . dev_attr ) ;
}
remove_domain_devices ( resource ) ;
resource - > num_sensors = 0 ;
}
static int setup_attrs ( struct acpi_power_meter_resource * resource )
{
int res = 0 ;
res = read_domain_devices ( resource ) ;
if ( res )
return res ;
if ( resource - > caps . flags & POWER_METER_CAN_MEASURE ) {
2012-04-02 22:19:03 +04:00
res = register_attrs ( resource , meter_attrs ) ;
2009-09-18 23:41:09 +04:00
if ( res )
goto error ;
}
if ( resource - > caps . flags & POWER_METER_CAN_CAP ) {
if ( ! can_cap_in_hardware ( ) ) {
2019-07-24 11:01:10 +03:00
dev_warn ( & resource - > acpi_dev - > dev ,
" Ignoring unsafe software power cap! \n " ) ;
2009-09-18 23:41:09 +04:00
goto skip_unsafe_cap ;
}
2012-04-02 22:19:04 +04:00
if ( resource - > caps . configurable_cap )
2012-04-02 22:19:02 +04:00
res = register_attrs ( resource , rw_cap_attrs ) ;
2012-04-02 22:19:04 +04:00
else
2012-04-02 22:19:02 +04:00
res = register_attrs ( resource , ro_cap_attrs ) ;
2012-04-02 22:19:04 +04:00
if ( res )
goto error ;
2012-04-02 22:19:02 +04:00
res = register_attrs ( resource , misc_cap_attrs ) ;
2009-09-18 23:41:09 +04:00
if ( res )
goto error ;
}
2012-04-02 22:19:04 +04:00
skip_unsafe_cap :
2009-09-18 23:41:09 +04:00
if ( resource - > caps . flags & POWER_METER_CAN_TRIP ) {
2012-04-02 22:19:02 +04:00
res = register_attrs ( resource , trip_attrs ) ;
2009-09-18 23:41:09 +04:00
if ( res )
goto error ;
}
2012-04-02 22:19:02 +04:00
res = register_attrs ( resource , misc_attrs ) ;
2009-09-18 23:41:09 +04:00
if ( res )
goto error ;
return res ;
error :
remove_attrs ( resource ) ;
return res ;
}
static void free_capabilities ( struct acpi_power_meter_resource * resource )
{
acpi_string * str ;
int i ;
str = & resource - > model_number ;
2020-10-07 10:51:48 +03:00
for ( i = 0 ; i < 3 ; i + + , str + + ) {
2009-09-18 23:41:09 +04:00
kfree ( * str ) ;
2020-10-07 10:51:48 +03:00
* str = NULL ;
}
2009-09-18 23:41:09 +04:00
}
static int read_capabilities ( struct acpi_power_meter_resource * resource )
{
int res = 0 ;
int i ;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER , NULL } ;
struct acpi_buffer state = { 0 , NULL } ;
struct acpi_buffer format = { sizeof ( " NNNNNNNNNNN " ) , " NNNNNNNNNNN " } ;
union acpi_object * pss ;
acpi_string * str ;
acpi_status status ;
status = acpi_evaluate_object ( resource - > acpi_dev - > handle , " _PMC " , NULL ,
& buffer ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
acpi_evaluation_failure_warn ( resource - > acpi_dev - > handle , " _PMC " ,
status ) ;
2009-09-18 23:41:09 +04:00
return - ENODEV ;
}
pss = buffer . pointer ;
if ( ! pss | |
pss - > type ! = ACPI_TYPE_PACKAGE | |
pss - > package . count ! = 14 ) {
dev_err ( & resource - > acpi_dev - > dev , ACPI_POWER_METER_NAME
" Invalid _PMC data \n " ) ;
res = - EFAULT ;
goto end ;
}
/* Grab all the integer data at once */
state . length = sizeof ( struct acpi_power_meter_capabilities ) ;
state . pointer = & resource - > caps ;
status = acpi_extract_package ( pss , & format , & state ) ;
if ( ACPI_FAILURE ( status ) ) {
2021-03-05 21:43:54 +03:00
dev_err ( & resource - > acpi_dev - > dev , ACPI_POWER_METER_NAME
" _PMC package parsing failed: %s \n " ,
acpi_format_exception ( status ) ) ;
2009-09-18 23:41:09 +04:00
res = - EFAULT ;
goto end ;
}
if ( resource - > caps . units ) {
dev_err ( & resource - > acpi_dev - > dev , ACPI_POWER_METER_NAME
" Unknown units %llu. \n " ,
resource - > caps . units ) ;
res = - EINVAL ;
goto end ;
}
/* Grab the string data */
str = & resource - > model_number ;
for ( i = 11 ; i < 14 ; i + + ) {
union acpi_object * element = & ( pss - > package . elements [ i ] ) ;
if ( element - > type ! = ACPI_TYPE_STRING ) {
res = - EINVAL ;
goto error ;
}
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 00:03:40 +03:00
* str = kcalloc ( element - > string . length + 1 , sizeof ( u8 ) ,
2009-09-18 23:41:09 +04:00
GFP_KERNEL ) ;
if ( ! * str ) {
res = - ENOMEM ;
goto error ;
}
strncpy ( * str , element - > string . pointer , element - > string . length ) ;
str + + ;
}
dev_info ( & resource - > acpi_dev - > dev , " Found ACPI power meter. \n " ) ;
goto end ;
error :
2020-10-07 10:51:48 +03:00
free_capabilities ( resource ) ;
2009-09-18 23:41:09 +04:00
end :
kfree ( buffer . pointer ) ;
return res ;
}
/* Handle ACPI event notifications */
static void acpi_power_meter_notify ( struct acpi_device * device , u32 event )
{
struct acpi_power_meter_resource * resource ;
int res ;
if ( ! device | | ! acpi_driver_data ( device ) )
return ;
resource = acpi_driver_data ( device ) ;
switch ( event ) {
case METER_NOTIFY_CONFIG :
hwmon: (acpi_power_meter) Fix lockdep splat
Damien Le Moal reports a lockdep splat with the acpi_power_meter,
observed with Linux v5.5 and later.
======================================================
WARNING: possible circular locking dependency detected
5.6.0-rc2+ #629 Not tainted
------------------------------------------------------
python/1397 is trying to acquire lock:
ffff888619080070 (&resource->lock){+.+.}, at: show_power+0x3c/0xa0 [acpi_power_meter]
but task is already holding lock:
ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (kn->count#119){++++}:
__kernfs_remove+0x626/0x7e0
kernfs_remove_by_name_ns+0x41/0x80
remove_attrs+0xcb/0x3c0 [acpi_power_meter]
acpi_power_meter_notify+0x1f7/0x310 [acpi_power_meter]
acpi_ev_notify_dispatch+0x198/0x1f3
acpi_os_execute_deferred+0x4d/0x70
process_one_work+0x7c8/0x1340
worker_thread+0x94/0xc70
kthread+0x2ed/0x3f0
ret_from_fork+0x24/0x30
-> #0 (&resource->lock){+.+.}:
__lock_acquire+0x20be/0x49b0
lock_acquire+0x127/0x340
__mutex_lock+0x15b/0x1350
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
vfs_read+0x152/0x2c0
ksys_read+0xf3/0x1d0
do_syscall_64+0x95/0x1010
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(kn->count#119);
lock(&resource->lock);
lock(kn->count#119);
lock(&resource->lock);
*** DEADLOCK ***
4 locks held by python/1397:
#0: ffff8890242d64e0 (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x9b/0xb0
#1: ffff889040be74e0 (&p->lock){+.+.}, at: seq_read+0x6b/0xf90
#2: ffff8890448eb880 (&of->mutex){+.+.}, at: kernfs_seq_start+0x47/0x160
#3: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
stack backtrace:
CPU: 10 PID: 1397 Comm: python Not tainted 5.6.0-rc2+ #629
Hardware name: Supermicro Super Server/X11DPL-i, BIOS 3.1 05/21/2019
Call Trace:
dump_stack+0x97/0xe0
check_noncircular+0x32e/0x3e0
? print_circular_bug.isra.0+0x1e0/0x1e0
? unwind_next_frame+0xb9a/0x1890
? entry_SYSCALL_64_after_hwframe+0x49/0xbe
? graph_lock+0x79/0x170
? __lockdep_reset_lock+0x3c0/0x3c0
? mark_lock+0xbc/0x1150
__lock_acquire+0x20be/0x49b0
? mark_held_locks+0xe0/0xe0
? stack_trace_save+0x91/0xc0
lock_acquire+0x127/0x340
? show_power+0x3c/0xa0 [acpi_power_meter]
? device_remove_bin_file+0x10/0x10
? device_remove_bin_file+0x10/0x10
__mutex_lock+0x15b/0x1350
? show_power+0x3c/0xa0 [acpi_power_meter]
? show_power+0x3c/0xa0 [acpi_power_meter]
? mutex_lock_io_nested+0x11f0/0x11f0
? lock_downgrade+0x6a0/0x6a0
? kernfs_seq_start+0x47/0x160
? lock_acquire+0x127/0x340
? kernfs_seq_start+0x6a/0x160
? device_remove_bin_file+0x10/0x10
? show_power+0x3c/0xa0 [acpi_power_meter]
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
? memset+0x20/0x40
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
? security_file_permission+0x16f/0x2c0
vfs_read+0x152/0x2c0
Problem is that reading an attribute takes the kernfs lock in the kernfs
code, then resource->lock in the driver. During an ACPI notification, the
opposite happens: The resource lock is taken first, followed by the kernfs
lock when sysfs attributes are removed and re-created. Presumably this is
now seen due to some locking related changes in kernfs after v5.4, but it
was likely always a problem.
Fix the problem by not blindly acquiring the lock in the notification
function. It is only needed to protect the various update functions.
However, those update functions are called anyway when sysfs attributes
are read. This means that we can just stop calling those functions from
the notifier, and the resource lock in the notifier function is no longer
needed.
That leaves two situations:
First, METER_NOTIFY_CONFIG removes and re-allocates capability strings.
While it did so under the resource lock, _displaying_ those strings was not
protected, creating a race condition. To solve this problem, selectively
protect both removal/creation and reporting of capability attributes with
the resource lock.
Second, removing and re-creating the attribute files is no longer protected
by the resource lock. That doesn't matter since access to each individual
attribute is protected by the kernfs lock. Userspace may get messed up if
attributes disappear and reappear under its nose, but that is not different
than today, and there is nothing we can do about it without major driver
restructuring.
Last but not least, when removing the driver, remove attribute functions
first, then release capability strings. This avoids yet another race
condition.
Reported-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: stable@vger.kernel.org # v5.5+
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2020-02-20 01:36:14 +03:00
mutex_lock ( & resource - > lock ) ;
2009-09-18 23:41:09 +04:00
free_capabilities ( resource ) ;
res = read_capabilities ( resource ) ;
hwmon: (acpi_power_meter) Fix lockdep splat
Damien Le Moal reports a lockdep splat with the acpi_power_meter,
observed with Linux v5.5 and later.
======================================================
WARNING: possible circular locking dependency detected
5.6.0-rc2+ #629 Not tainted
------------------------------------------------------
python/1397 is trying to acquire lock:
ffff888619080070 (&resource->lock){+.+.}, at: show_power+0x3c/0xa0 [acpi_power_meter]
but task is already holding lock:
ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (kn->count#119){++++}:
__kernfs_remove+0x626/0x7e0
kernfs_remove_by_name_ns+0x41/0x80
remove_attrs+0xcb/0x3c0 [acpi_power_meter]
acpi_power_meter_notify+0x1f7/0x310 [acpi_power_meter]
acpi_ev_notify_dispatch+0x198/0x1f3
acpi_os_execute_deferred+0x4d/0x70
process_one_work+0x7c8/0x1340
worker_thread+0x94/0xc70
kthread+0x2ed/0x3f0
ret_from_fork+0x24/0x30
-> #0 (&resource->lock){+.+.}:
__lock_acquire+0x20be/0x49b0
lock_acquire+0x127/0x340
__mutex_lock+0x15b/0x1350
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
vfs_read+0x152/0x2c0
ksys_read+0xf3/0x1d0
do_syscall_64+0x95/0x1010
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(kn->count#119);
lock(&resource->lock);
lock(kn->count#119);
lock(&resource->lock);
*** DEADLOCK ***
4 locks held by python/1397:
#0: ffff8890242d64e0 (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x9b/0xb0
#1: ffff889040be74e0 (&p->lock){+.+.}, at: seq_read+0x6b/0xf90
#2: ffff8890448eb880 (&of->mutex){+.+.}, at: kernfs_seq_start+0x47/0x160
#3: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
stack backtrace:
CPU: 10 PID: 1397 Comm: python Not tainted 5.6.0-rc2+ #629
Hardware name: Supermicro Super Server/X11DPL-i, BIOS 3.1 05/21/2019
Call Trace:
dump_stack+0x97/0xe0
check_noncircular+0x32e/0x3e0
? print_circular_bug.isra.0+0x1e0/0x1e0
? unwind_next_frame+0xb9a/0x1890
? entry_SYSCALL_64_after_hwframe+0x49/0xbe
? graph_lock+0x79/0x170
? __lockdep_reset_lock+0x3c0/0x3c0
? mark_lock+0xbc/0x1150
__lock_acquire+0x20be/0x49b0
? mark_held_locks+0xe0/0xe0
? stack_trace_save+0x91/0xc0
lock_acquire+0x127/0x340
? show_power+0x3c/0xa0 [acpi_power_meter]
? device_remove_bin_file+0x10/0x10
? device_remove_bin_file+0x10/0x10
__mutex_lock+0x15b/0x1350
? show_power+0x3c/0xa0 [acpi_power_meter]
? show_power+0x3c/0xa0 [acpi_power_meter]
? mutex_lock_io_nested+0x11f0/0x11f0
? lock_downgrade+0x6a0/0x6a0
? kernfs_seq_start+0x47/0x160
? lock_acquire+0x127/0x340
? kernfs_seq_start+0x6a/0x160
? device_remove_bin_file+0x10/0x10
? show_power+0x3c/0xa0 [acpi_power_meter]
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
? memset+0x20/0x40
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
? security_file_permission+0x16f/0x2c0
vfs_read+0x152/0x2c0
Problem is that reading an attribute takes the kernfs lock in the kernfs
code, then resource->lock in the driver. During an ACPI notification, the
opposite happens: The resource lock is taken first, followed by the kernfs
lock when sysfs attributes are removed and re-created. Presumably this is
now seen due to some locking related changes in kernfs after v5.4, but it
was likely always a problem.
Fix the problem by not blindly acquiring the lock in the notification
function. It is only needed to protect the various update functions.
However, those update functions are called anyway when sysfs attributes
are read. This means that we can just stop calling those functions from
the notifier, and the resource lock in the notifier function is no longer
needed.
That leaves two situations:
First, METER_NOTIFY_CONFIG removes and re-allocates capability strings.
While it did so under the resource lock, _displaying_ those strings was not
protected, creating a race condition. To solve this problem, selectively
protect both removal/creation and reporting of capability attributes with
the resource lock.
Second, removing and re-creating the attribute files is no longer protected
by the resource lock. That doesn't matter since access to each individual
attribute is protected by the kernfs lock. Userspace may get messed up if
attributes disappear and reappear under its nose, but that is not different
than today, and there is nothing we can do about it without major driver
restructuring.
Last but not least, when removing the driver, remove attribute functions
first, then release capability strings. This avoids yet another race
condition.
Reported-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: stable@vger.kernel.org # v5.5+
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2020-02-20 01:36:14 +03:00
mutex_unlock ( & resource - > lock ) ;
2009-09-18 23:41:09 +04:00
if ( res )
break ;
remove_attrs ( resource ) ;
setup_attrs ( resource ) ;
break ;
case METER_NOTIFY_TRIP :
sysfs_notify ( & device - > dev . kobj , NULL , POWER_AVERAGE_NAME ) ;
break ;
case METER_NOTIFY_CAP :
sysfs_notify ( & device - > dev . kobj , NULL , POWER_CAP_NAME ) ;
break ;
case METER_NOTIFY_INTERVAL :
sysfs_notify ( & device - > dev . kobj , NULL , POWER_AVG_INTERVAL_NAME ) ;
break ;
case METER_NOTIFY_CAPPING :
sysfs_notify ( & device - > dev . kobj , NULL , POWER_ALARM_NAME ) ;
dev_info ( & device - > dev , " Capping in progress. \n " ) ;
break ;
default :
2013-09-13 21:51:35 +04:00
WARN ( 1 , " Unexpected event %d \n " , event ) ;
break ;
2009-09-18 23:41:09 +04:00
}
acpi_bus_generate_netlink_event ( ACPI_POWER_METER_CLASS ,
dev_name ( & device - > dev ) , event , 0 ) ;
}
static int acpi_power_meter_add ( struct acpi_device * device )
{
int res ;
struct acpi_power_meter_resource * resource ;
if ( ! device )
return - EINVAL ;
resource = kzalloc ( sizeof ( struct acpi_power_meter_resource ) ,
GFP_KERNEL ) ;
if ( ! resource )
return - ENOMEM ;
resource - > sensors_valid = 0 ;
resource - > acpi_dev = device ;
mutex_init ( & resource - > lock ) ;
strcpy ( acpi_device_name ( device ) , ACPI_POWER_METER_DEVICE_NAME ) ;
strcpy ( acpi_device_class ( device ) , ACPI_POWER_METER_CLASS ) ;
device - > driver_data = resource ;
res = read_capabilities ( resource ) ;
if ( res )
goto exit_free ;
resource - > trip [ 0 ] = resource - > trip [ 1 ] = - 1 ;
res = setup_attrs ( resource ) ;
if ( res )
2020-06-25 07:32:42 +03:00
goto exit_free_capability ;
2009-09-18 23:41:09 +04:00
resource - > hwmon_dev = hwmon_device_register ( & device - > dev ) ;
if ( IS_ERR ( resource - > hwmon_dev ) ) {
res = PTR_ERR ( resource - > hwmon_dev ) ;
goto exit_remove ;
}
res = 0 ;
goto exit ;
exit_remove :
remove_attrs ( resource ) ;
2020-06-25 07:32:42 +03:00
exit_free_capability :
free_capabilities ( resource ) ;
2009-09-18 23:41:09 +04:00
exit_free :
kfree ( resource ) ;
exit :
return res ;
}
2013-01-24 03:24:48 +04:00
static int acpi_power_meter_remove ( struct acpi_device * device )
2009-09-18 23:41:09 +04:00
{
struct acpi_power_meter_resource * resource ;
if ( ! device | | ! acpi_driver_data ( device ) )
return - EINVAL ;
resource = acpi_driver_data ( device ) ;
hwmon_device_unregister ( resource - > hwmon_dev ) ;
remove_attrs ( resource ) ;
hwmon: (acpi_power_meter) Fix lockdep splat
Damien Le Moal reports a lockdep splat with the acpi_power_meter,
observed with Linux v5.5 and later.
======================================================
WARNING: possible circular locking dependency detected
5.6.0-rc2+ #629 Not tainted
------------------------------------------------------
python/1397 is trying to acquire lock:
ffff888619080070 (&resource->lock){+.+.}, at: show_power+0x3c/0xa0 [acpi_power_meter]
but task is already holding lock:
ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (kn->count#119){++++}:
__kernfs_remove+0x626/0x7e0
kernfs_remove_by_name_ns+0x41/0x80
remove_attrs+0xcb/0x3c0 [acpi_power_meter]
acpi_power_meter_notify+0x1f7/0x310 [acpi_power_meter]
acpi_ev_notify_dispatch+0x198/0x1f3
acpi_os_execute_deferred+0x4d/0x70
process_one_work+0x7c8/0x1340
worker_thread+0x94/0xc70
kthread+0x2ed/0x3f0
ret_from_fork+0x24/0x30
-> #0 (&resource->lock){+.+.}:
__lock_acquire+0x20be/0x49b0
lock_acquire+0x127/0x340
__mutex_lock+0x15b/0x1350
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
vfs_read+0x152/0x2c0
ksys_read+0xf3/0x1d0
do_syscall_64+0x95/0x1010
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(kn->count#119);
lock(&resource->lock);
lock(kn->count#119);
lock(&resource->lock);
*** DEADLOCK ***
4 locks held by python/1397:
#0: ffff8890242d64e0 (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x9b/0xb0
#1: ffff889040be74e0 (&p->lock){+.+.}, at: seq_read+0x6b/0xf90
#2: ffff8890448eb880 (&of->mutex){+.+.}, at: kernfs_seq_start+0x47/0x160
#3: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160
stack backtrace:
CPU: 10 PID: 1397 Comm: python Not tainted 5.6.0-rc2+ #629
Hardware name: Supermicro Super Server/X11DPL-i, BIOS 3.1 05/21/2019
Call Trace:
dump_stack+0x97/0xe0
check_noncircular+0x32e/0x3e0
? print_circular_bug.isra.0+0x1e0/0x1e0
? unwind_next_frame+0xb9a/0x1890
? entry_SYSCALL_64_after_hwframe+0x49/0xbe
? graph_lock+0x79/0x170
? __lockdep_reset_lock+0x3c0/0x3c0
? mark_lock+0xbc/0x1150
__lock_acquire+0x20be/0x49b0
? mark_held_locks+0xe0/0xe0
? stack_trace_save+0x91/0xc0
lock_acquire+0x127/0x340
? show_power+0x3c/0xa0 [acpi_power_meter]
? device_remove_bin_file+0x10/0x10
? device_remove_bin_file+0x10/0x10
__mutex_lock+0x15b/0x1350
? show_power+0x3c/0xa0 [acpi_power_meter]
? show_power+0x3c/0xa0 [acpi_power_meter]
? mutex_lock_io_nested+0x11f0/0x11f0
? lock_downgrade+0x6a0/0x6a0
? kernfs_seq_start+0x47/0x160
? lock_acquire+0x127/0x340
? kernfs_seq_start+0x6a/0x160
? device_remove_bin_file+0x10/0x10
? show_power+0x3c/0xa0 [acpi_power_meter]
show_power+0x3c/0xa0 [acpi_power_meter]
dev_attr_show+0x3f/0x80
? memset+0x20/0x40
sysfs_kf_seq_show+0x216/0x410
seq_read+0x407/0xf90
? security_file_permission+0x16f/0x2c0
vfs_read+0x152/0x2c0
Problem is that reading an attribute takes the kernfs lock in the kernfs
code, then resource->lock in the driver. During an ACPI notification, the
opposite happens: The resource lock is taken first, followed by the kernfs
lock when sysfs attributes are removed and re-created. Presumably this is
now seen due to some locking related changes in kernfs after v5.4, but it
was likely always a problem.
Fix the problem by not blindly acquiring the lock in the notification
function. It is only needed to protect the various update functions.
However, those update functions are called anyway when sysfs attributes
are read. This means that we can just stop calling those functions from
the notifier, and the resource lock in the notifier function is no longer
needed.
That leaves two situations:
First, METER_NOTIFY_CONFIG removes and re-allocates capability strings.
While it did so under the resource lock, _displaying_ those strings was not
protected, creating a race condition. To solve this problem, selectively
protect both removal/creation and reporting of capability attributes with
the resource lock.
Second, removing and re-creating the attribute files is no longer protected
by the resource lock. That doesn't matter since access to each individual
attribute is protected by the kernfs lock. Userspace may get messed up if
attributes disappear and reappear under its nose, but that is not different
than today, and there is nothing we can do about it without major driver
restructuring.
Last but not least, when removing the driver, remove attribute functions
first, then release capability strings. This avoids yet another race
condition.
Reported-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: Damien Le Moal <Damien.LeMoal@wdc.com>
Cc: stable@vger.kernel.org # v5.5+
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2020-02-20 01:36:14 +03:00
free_capabilities ( resource ) ;
2009-09-18 23:41:09 +04:00
kfree ( resource ) ;
return 0 ;
}
2012-07-27 18:11:32 +04:00
# ifdef CONFIG_PM_SLEEP
2012-06-30 01:40:05 +04:00
static int acpi_power_meter_resume ( struct device * dev )
2009-09-18 23:41:09 +04:00
{
struct acpi_power_meter_resource * resource ;
2012-06-30 01:40:05 +04:00
if ( ! dev )
return - EINVAL ;
resource = acpi_driver_data ( to_acpi_device ( dev ) ) ;
if ( ! resource )
2009-09-18 23:41:09 +04:00
return - EINVAL ;
free_capabilities ( resource ) ;
read_capabilities ( resource ) ;
return 0 ;
}
2012-07-27 18:11:32 +04:00
# endif /* CONFIG_PM_SLEEP */
2012-06-30 01:40:05 +04:00
static SIMPLE_DEV_PM_OPS ( acpi_power_meter_pm , NULL , acpi_power_meter_resume ) ;
2009-09-18 23:41:09 +04:00
static struct acpi_driver acpi_power_meter_driver = {
. name = " power_meter " ,
. class = ACPI_POWER_METER_CLASS ,
. ids = power_meter_ids ,
. ops = {
. add = acpi_power_meter_add ,
. remove = acpi_power_meter_remove ,
. notify = acpi_power_meter_notify ,
} ,
2012-06-30 01:40:05 +04:00
. drv . pm = & acpi_power_meter_pm ,
2009-09-18 23:41:09 +04:00
} ;
/* Module init/exit routines */
static int __init enable_cap_knobs ( const struct dmi_system_id * d )
{
cap_in_hardware = 1 ;
return 0 ;
}
2017-09-14 12:59:30 +03:00
static const struct dmi_system_id pm_dmi_table [ ] __initconst = {
2009-09-18 23:41:09 +04:00
{
enable_cap_knobs , " IBM Active Energy Manager " ,
{
DMI_MATCH ( DMI_SYS_VENDOR , " IBM " )
} ,
} ,
{ }
} ;
static int __init acpi_power_meter_init ( void )
{
int result ;
if ( acpi_disabled )
return - ENODEV ;
dmi_check_system ( pm_dmi_table ) ;
result = acpi_bus_register_driver ( & acpi_power_meter_driver ) ;
if ( result < 0 )
2013-09-13 21:56:11 +04:00
return result ;
2009-09-18 23:41:09 +04:00
return 0 ;
}
static void __exit acpi_power_meter_exit ( void )
{
acpi_bus_unregister_driver ( & acpi_power_meter_driver ) ;
}
2013-08-27 02:42:27 +04:00
MODULE_AUTHOR ( " Darrick J. Wong <darrick.wong@oracle.com> " ) ;
2009-09-18 23:41:09 +04:00
MODULE_DESCRIPTION ( " ACPI 4.0 power meter driver " ) ;
MODULE_LICENSE ( " GPL " ) ;
module_param ( force_cap_on , bool , 0644 ) ;
MODULE_PARM_DESC ( force_cap_on , " Enable power cap even it is unsafe to do so. " ) ;
module_init ( acpi_power_meter_init ) ;
module_exit ( acpi_power_meter_exit ) ;