1636 lines
41 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic OPP OF helpers
*
* Copyright (C) 2009-2010 Texas Instruments Incorporated.
* Nishanth Menon
* Romit Dasgupta
* Kevin Hilman
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/device.h>
#include <linux/of.h>
#include <linux/pm_domain.h>
#include <linux/slab.h>
#include <linux/export.h>
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
#include <linux/energy_model.h>
#include "opp.h"
/* OPP tables with uninitialized required OPPs, protected by opp_table_lock */
static LIST_HEAD(lazy_opp_tables);
/*
* Returns opp descriptor node for a device node, caller must
* do of_node_put().
*/
static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np,
int index)
{
/* "operating-points-v2" can be an array for power domain providers */
return of_parse_phandle(np, "operating-points-v2", index);
}
/* Returns opp descriptor node for a device, caller must do of_node_put() */
struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
{
return _opp_of_get_opp_desc_node(dev->of_node, 0);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
struct opp_table *_managed_opp(struct device *dev, int index)
{
struct opp_table *opp_table, *managed_table = NULL;
struct device_node *np;
np = _opp_of_get_opp_desc_node(dev->of_node, index);
if (!np)
return NULL;
list_for_each_entry(opp_table, &opp_tables, node) {
if (opp_table->np == np) {
/*
* Multiple devices can point to the same OPP table and
* so will have same node-pointer, np.
*
* But the OPPs will be considered as shared only if the
* OPP table contains a "opp-shared" property.
*/
if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) {
_get_opp_table_kref(opp_table);
managed_table = opp_table;
}
break;
}
}
of_node_put(np);
return managed_table;
}
/* The caller must call dev_pm_opp_put() after the OPP is used */
static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table,
struct device_node *opp_np)
{
struct dev_pm_opp *opp;
mutex_lock(&opp_table->lock);
list_for_each_entry(opp, &opp_table->opp_list, node) {
if (opp->np == opp_np) {
dev_pm_opp_get(opp);
mutex_unlock(&opp_table->lock);
return opp;
}
}
mutex_unlock(&opp_table->lock);
return NULL;
}
static struct device_node *of_parse_required_opp(struct device_node *np,
int index)
{
return of_parse_phandle(np, "required-opps", index);
}
/* The caller must call dev_pm_opp_put_opp_table() after the table is used */
static struct opp_table *_find_table_of_opp_np(struct device_node *opp_np)
{
struct opp_table *opp_table;
struct device_node *opp_table_np;
opp_table_np = of_get_parent(opp_np);
if (!opp_table_np)
goto err;
/* It is safe to put the node now as all we need now is its address */
of_node_put(opp_table_np);
opp: Allocate the OPP table outside of opp_table_lock There is no critical section which needs protection with locks while allocating an OPP table, except while adding it to the opp_tables list. And taking the opp_table_lock for the entire duration causes circular dependency issues like the one mentioned below. This patch takes another approach to reduce the size of the critical section to avoid such issues, the details of that are present within the patch. ====================================================== WARNING: possible circular locking dependency detected 5.4.72 #14 Not tainted ------------------------------------------------------ chrome/1865 is trying to acquire lock: ffffffdd34921750 (opp_table_lock){+.+.}, at: _find_opp_table+0x34/0x74 but task is already holding lock: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec fscrypt: AES-256-CTS-CBC using implementation "cts-cbc-aes-ce" which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (reservation_ww_class_mutex){+.+.}: __mutex_lock_common+0xec/0xc0c ww_mutex_lock_interruptible+0x5c/0xc4 msm_gem_fault+0x2c/0x124 __do_fault+0x40/0x16c handle_mm_fault+0x7cc/0xd98 do_page_fault+0x230/0x3b4 do_translation_fault+0x5c/0x78 do_mem_abort+0x4c/0xb4 el0_da+0x1c/0x20 -> #3 (&mm->mmap_sem){++++}: __might_fault+0x70/0x98 compat_filldir+0xf8/0x48c dcache_readdir+0x70/0x1dc iterate_dir+0xd4/0x180 __arm64_compat_sys_getdents+0xa0/0x19c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 -> #2 (&sb->s_type->i_mutex_key#3){++++}: down_write+0x54/0x16c start_creating+0x68/0x128 debugfs_create_dir+0x28/0x114 opp_debug_register+0x8c/0xc0 _add_opp_dev_unlocked+0x5c/0x70 _add_opp_dev+0x38/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #1 (&opp_table->lock){+.+.}: __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _add_opp_dev+0x2c/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #0 (opp_table_lock){+.+.}: __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 other info that might help us debug this: Chain exists of: opp_table_lock --> &mm->mmap_sem --> reservation_ww_class_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reservation_ww_class_mutex); lock(&mm->mmap_sem); lock(reservation_ww_class_mutex); lock(opp_table_lock); *** DEADLOCK *** 3 locks held by chrome/1865: #0: ffffff81edecc0d8 (&dev->struct_mutex){+.+.}, at: msm_ioctl_gem_submit+0x264/0xb60 #1: ffffff81d0000870 (reservation_ww_class_acquire){+.+.}, at: msm_ioctl_gem_submit+0x8e8/0xb60 #2: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec stack backtrace: CPU: 0 PID: 1865 Comm: chrome Not tainted 5.4.72 #14 Hardware name: Google Lazor (rev1+) with LTE (DT) Call trace: dump_backtrace+0x0/0x158 show_stack+0x20/0x2c dump_stack+0xc8/0x160 print_circular_bug+0x2c4/0x2c8 check_noncircular+0x1a8/0x1b0 __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 Reported-by: Rob Clark <robdclark@gmail.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2020-10-27 16:53:21 +05:30
mutex_lock(&opp_table_lock);
list_for_each_entry(opp_table, &opp_tables, node) {
if (opp_table_np == opp_table->np) {
_get_opp_table_kref(opp_table);
opp: Allocate the OPP table outside of opp_table_lock There is no critical section which needs protection with locks while allocating an OPP table, except while adding it to the opp_tables list. And taking the opp_table_lock for the entire duration causes circular dependency issues like the one mentioned below. This patch takes another approach to reduce the size of the critical section to avoid such issues, the details of that are present within the patch. ====================================================== WARNING: possible circular locking dependency detected 5.4.72 #14 Not tainted ------------------------------------------------------ chrome/1865 is trying to acquire lock: ffffffdd34921750 (opp_table_lock){+.+.}, at: _find_opp_table+0x34/0x74 but task is already holding lock: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec fscrypt: AES-256-CTS-CBC using implementation "cts-cbc-aes-ce" which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (reservation_ww_class_mutex){+.+.}: __mutex_lock_common+0xec/0xc0c ww_mutex_lock_interruptible+0x5c/0xc4 msm_gem_fault+0x2c/0x124 __do_fault+0x40/0x16c handle_mm_fault+0x7cc/0xd98 do_page_fault+0x230/0x3b4 do_translation_fault+0x5c/0x78 do_mem_abort+0x4c/0xb4 el0_da+0x1c/0x20 -> #3 (&mm->mmap_sem){++++}: __might_fault+0x70/0x98 compat_filldir+0xf8/0x48c dcache_readdir+0x70/0x1dc iterate_dir+0xd4/0x180 __arm64_compat_sys_getdents+0xa0/0x19c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 -> #2 (&sb->s_type->i_mutex_key#3){++++}: down_write+0x54/0x16c start_creating+0x68/0x128 debugfs_create_dir+0x28/0x114 opp_debug_register+0x8c/0xc0 _add_opp_dev_unlocked+0x5c/0x70 _add_opp_dev+0x38/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #1 (&opp_table->lock){+.+.}: __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _add_opp_dev+0x2c/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #0 (opp_table_lock){+.+.}: __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 other info that might help us debug this: Chain exists of: opp_table_lock --> &mm->mmap_sem --> reservation_ww_class_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reservation_ww_class_mutex); lock(&mm->mmap_sem); lock(reservation_ww_class_mutex); lock(opp_table_lock); *** DEADLOCK *** 3 locks held by chrome/1865: #0: ffffff81edecc0d8 (&dev->struct_mutex){+.+.}, at: msm_ioctl_gem_submit+0x264/0xb60 #1: ffffff81d0000870 (reservation_ww_class_acquire){+.+.}, at: msm_ioctl_gem_submit+0x8e8/0xb60 #2: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec stack backtrace: CPU: 0 PID: 1865 Comm: chrome Not tainted 5.4.72 #14 Hardware name: Google Lazor (rev1+) with LTE (DT) Call trace: dump_backtrace+0x0/0x158 show_stack+0x20/0x2c dump_stack+0xc8/0x160 print_circular_bug+0x2c4/0x2c8 check_noncircular+0x1a8/0x1b0 __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 Reported-by: Rob Clark <robdclark@gmail.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2020-10-27 16:53:21 +05:30
mutex_unlock(&opp_table_lock);
return opp_table;
}
}
opp: Allocate the OPP table outside of opp_table_lock There is no critical section which needs protection with locks while allocating an OPP table, except while adding it to the opp_tables list. And taking the opp_table_lock for the entire duration causes circular dependency issues like the one mentioned below. This patch takes another approach to reduce the size of the critical section to avoid such issues, the details of that are present within the patch. ====================================================== WARNING: possible circular locking dependency detected 5.4.72 #14 Not tainted ------------------------------------------------------ chrome/1865 is trying to acquire lock: ffffffdd34921750 (opp_table_lock){+.+.}, at: _find_opp_table+0x34/0x74 but task is already holding lock: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec fscrypt: AES-256-CTS-CBC using implementation "cts-cbc-aes-ce" which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (reservation_ww_class_mutex){+.+.}: __mutex_lock_common+0xec/0xc0c ww_mutex_lock_interruptible+0x5c/0xc4 msm_gem_fault+0x2c/0x124 __do_fault+0x40/0x16c handle_mm_fault+0x7cc/0xd98 do_page_fault+0x230/0x3b4 do_translation_fault+0x5c/0x78 do_mem_abort+0x4c/0xb4 el0_da+0x1c/0x20 -> #3 (&mm->mmap_sem){++++}: __might_fault+0x70/0x98 compat_filldir+0xf8/0x48c dcache_readdir+0x70/0x1dc iterate_dir+0xd4/0x180 __arm64_compat_sys_getdents+0xa0/0x19c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 -> #2 (&sb->s_type->i_mutex_key#3){++++}: down_write+0x54/0x16c start_creating+0x68/0x128 debugfs_create_dir+0x28/0x114 opp_debug_register+0x8c/0xc0 _add_opp_dev_unlocked+0x5c/0x70 _add_opp_dev+0x38/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #1 (&opp_table->lock){+.+.}: __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _add_opp_dev+0x2c/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #0 (opp_table_lock){+.+.}: __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 other info that might help us debug this: Chain exists of: opp_table_lock --> &mm->mmap_sem --> reservation_ww_class_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reservation_ww_class_mutex); lock(&mm->mmap_sem); lock(reservation_ww_class_mutex); lock(opp_table_lock); *** DEADLOCK *** 3 locks held by chrome/1865: #0: ffffff81edecc0d8 (&dev->struct_mutex){+.+.}, at: msm_ioctl_gem_submit+0x264/0xb60 #1: ffffff81d0000870 (reservation_ww_class_acquire){+.+.}, at: msm_ioctl_gem_submit+0x8e8/0xb60 #2: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec stack backtrace: CPU: 0 PID: 1865 Comm: chrome Not tainted 5.4.72 #14 Hardware name: Google Lazor (rev1+) with LTE (DT) Call trace: dump_backtrace+0x0/0x158 show_stack+0x20/0x2c dump_stack+0xc8/0x160 print_circular_bug+0x2c4/0x2c8 check_noncircular+0x1a8/0x1b0 __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 Reported-by: Rob Clark <robdclark@gmail.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2020-10-27 16:53:21 +05:30
mutex_unlock(&opp_table_lock);
err:
return ERR_PTR(-ENODEV);
}
/* Free resources previously acquired by _opp_table_alloc_required_tables() */
static void _opp_table_free_required_tables(struct opp_table *opp_table)
{
struct opp_table **required_opp_tables = opp_table->required_opp_tables;
int i;
if (!required_opp_tables)
return;
for (i = 0; i < opp_table->required_opp_count; i++) {
if (IS_ERR_OR_NULL(required_opp_tables[i]))
continue;
dev_pm_opp_put_opp_table(required_opp_tables[i]);
}
kfree(required_opp_tables);
opp_table->required_opp_count = 0;
opp_table->required_opp_tables = NULL;
mutex_lock(&opp_table_lock);
list_del(&opp_table->lazy);
mutex_unlock(&opp_table_lock);
}
/*
* Populate all devices and opp tables which are part of "required-opps" list.
* Checking only the first OPP node should be enough.
*/
static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
struct device *dev,
struct device_node *opp_np)
{
struct opp_table **required_opp_tables;
struct device_node *required_np, *np;
bool lazy = false;
int count, i, size;
/* Traversing the first OPP node is all we need */
np = of_get_next_available_child(opp_np, NULL);
if (!np) {
dev_warn(dev, "Empty OPP table\n");
return;
}
count = of_count_phandle_with_args(np, "required-opps", NULL);
if (count <= 0)
goto put_np;
size = sizeof(*required_opp_tables) + sizeof(*opp_table->required_devs);
required_opp_tables = kcalloc(count, size, GFP_KERNEL);
if (!required_opp_tables)
goto put_np;
opp_table->required_opp_tables = required_opp_tables;
opp_table->required_devs = (void *)(required_opp_tables + count);
opp_table->required_opp_count = count;
for (i = 0; i < count; i++) {
required_np = of_parse_required_opp(np, i);
if (!required_np)
goto free_required_tables;
required_opp_tables[i] = _find_table_of_opp_np(required_np);
of_node_put(required_np);
if (IS_ERR(required_opp_tables[i]))
lazy = true;
}
/* Let's do the linking later on */
if (lazy) {
/*
* The OPP table is not held while allocating the table, take it
* now to avoid corruption to the lazy_opp_tables list.
*/
mutex_lock(&opp_table_lock);
list_add(&opp_table->lazy, &lazy_opp_tables);
mutex_unlock(&opp_table_lock);
}
goto put_np;
free_required_tables:
_opp_table_free_required_tables(opp_table);
put_np:
of_node_put(np);
}
void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
int index)
{
struct device_node *np, *opp_np;
u32 val;
/*
* Only required for backward compatibility with v1 bindings, but isn't
* harmful for other cases. And so we do it unconditionally.
*/
np = of_node_get(dev->of_node);
if (!np)
return;
if (!of_property_read_u32(np, "clock-latency", &val))
opp_table->clock_latency_ns_max = val;
of_property_read_u32(np, "voltage-tolerance",
&opp_table->voltage_tolerance_v1);
if (of_property_present(np, "#power-domain-cells"))
opp_table->is_genpd = true;
/* Get OPP table node */
opp_np = _opp_of_get_opp_desc_node(np, index);
of_node_put(np);
if (!opp_np)
return;
if (of_property_read_bool(opp_np, "opp-shared"))
opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
else
opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE;
opp_table->np = opp_np;
_opp_table_alloc_required_tables(opp_table, dev, opp_np);
}
void _of_clear_opp_table(struct opp_table *opp_table)
{
_opp_table_free_required_tables(opp_table);
of_node_put(opp_table->np);
}
/*
* Release all resources previously acquired with a call to
* _of_opp_alloc_required_opps().
*/
static void _of_opp_free_required_opps(struct opp_table *opp_table,
struct dev_pm_opp *opp)
{
struct dev_pm_opp **required_opps = opp->required_opps;
int i;
if (!required_opps)
return;
for (i = 0; i < opp_table->required_opp_count; i++) {
if (!required_opps[i])
continue;
/* Put the reference back */
dev_pm_opp_put(required_opps[i]);
}
opp->required_opps = NULL;
kfree(required_opps);
}
void _of_clear_opp(struct opp_table *opp_table, struct dev_pm_opp *opp)
{
_of_opp_free_required_opps(opp_table, opp);
of_node_put(opp->np);
}
static int _link_required_opps(struct dev_pm_opp *opp, struct opp_table *opp_table,
struct opp_table *required_table, int index)
{
struct device_node *np;
np = of_parse_required_opp(opp->np, index);
if (unlikely(!np))
return -ENODEV;
opp->required_opps[index] = _find_opp_of_np(required_table, np);
of_node_put(np);
if (!opp->required_opps[index]) {
pr_err("%s: Unable to find required OPP node: %pOF (%d)\n",
__func__, opp->np, index);
return -ENODEV;
}
/*
* There are two genpd (as required-opp) cases that we need to handle,
* devices with a single genpd and ones with multiple genpds.
*
* The single genpd case requires special handling as we need to use the
* same `dev` structure (instead of a virtual one provided by genpd
* core) for setting the performance state.
*
* It doesn't make sense for a device's DT entry to have both
* "opp-level" and single "required-opps" entry pointing to a genpd's
* OPP, as that would make the OPP core call
* dev_pm_domain_set_performance_state() for two different values for
* the same device structure. Lets treat single genpd configuration as a
* case where the OPP's level is directly available without required-opp
* link in the DT.
*
* Just update the `level` with the right value, which
* dev_pm_opp_set_opp() will take care of in the normal path itself.
*
* There is another case though, where a genpd's OPP table has
* required-opps set to a parent genpd. The OPP core expects the user to
* set the respective required `struct device` pointer via
* dev_pm_opp_set_config().
*/
if (required_table->is_genpd && opp_table->required_opp_count == 1 &&
!opp_table->required_devs[0]) {
/* Genpd core takes care of propagation to parent genpd */
if (!opp_table->is_genpd) {
if (!WARN_ON(opp->level != OPP_LEVEL_UNSET))
opp->level = opp->required_opps[0]->level;
}
}
return 0;
}
/* Populate all required OPPs which are part of "required-opps" list */
static int _of_opp_alloc_required_opps(struct opp_table *opp_table,
struct dev_pm_opp *opp)
{
struct opp_table *required_table;
int i, ret, count = opp_table->required_opp_count;
if (!count)
return 0;
opp->required_opps = kcalloc(count, sizeof(*opp->required_opps), GFP_KERNEL);
if (!opp->required_opps)
return -ENOMEM;
for (i = 0; i < count; i++) {
required_table = opp_table->required_opp_tables[i];
/* Required table not added yet, we will link later */
if (IS_ERR_OR_NULL(required_table))
continue;
ret = _link_required_opps(opp, opp_table, required_table, i);
if (ret)
goto free_required_opps;
}
return 0;
free_required_opps:
_of_opp_free_required_opps(opp_table, opp);
return ret;
}
/* Link required OPPs for an individual OPP */
static int lazy_link_required_opps(struct opp_table *opp_table,
struct opp_table *new_table, int index)
{
struct dev_pm_opp *opp;
int ret;
list_for_each_entry(opp, &opp_table->opp_list, node) {
ret = _link_required_opps(opp, opp_table, new_table, index);
if (ret)
return ret;
}
return 0;
}
/* Link required OPPs for all OPPs of the newly added OPP table */
static void lazy_link_required_opp_table(struct opp_table *new_table)
{
struct opp_table *opp_table, *temp, **required_opp_tables;
struct device_node *required_np, *opp_np, *required_table_np;
struct dev_pm_opp *opp;
int i, ret;
mutex_lock(&opp_table_lock);
list_for_each_entry_safe(opp_table, temp, &lazy_opp_tables, lazy) {
bool lazy = false;
/* opp_np can't be invalid here */
opp_np = of_get_next_available_child(opp_table->np, NULL);
for (i = 0; i < opp_table->required_opp_count; i++) {
required_opp_tables = opp_table->required_opp_tables;
/* Required opp-table is already parsed */
if (!IS_ERR(required_opp_tables[i]))
continue;
/* required_np can't be invalid here */
required_np = of_parse_required_opp(opp_np, i);
required_table_np = of_get_parent(required_np);
of_node_put(required_table_np);
of_node_put(required_np);
/*
* Newly added table isn't the required opp-table for
* opp_table.
*/
if (required_table_np != new_table->np) {
lazy = true;
continue;
}
required_opp_tables[i] = new_table;
_get_opp_table_kref(new_table);
/* Link OPPs now */
ret = lazy_link_required_opps(opp_table, new_table, i);
if (ret) {
/* The OPPs will be marked unusable */
lazy = false;
break;
}
}
of_node_put(opp_np);
/* All required opp-tables found, remove from lazy list */
if (!lazy) {
list_del_init(&opp_table->lazy);
list_for_each_entry(opp, &opp_table->opp_list, node)
_required_opps_available(opp, opp_table->required_opp_count);
}
}
mutex_unlock(&opp_table_lock);
}
static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table)
{
struct device_node *np, *opp_np;
struct property *prop;
if (!opp_table) {
np = of_node_get(dev->of_node);
if (!np)
return -ENODEV;
opp_np = _opp_of_get_opp_desc_node(np, 0);
of_node_put(np);
} else {
opp_np = of_node_get(opp_table->np);
}
/* Lets not fail in case we are parsing opp-v1 bindings */
if (!opp_np)
return 0;
/* Checking only first OPP is sufficient */
np = of_get_next_available_child(opp_np, NULL);
of_node_put(opp_np);
if (!np) {
dev_err(dev, "OPP table empty\n");
return -EINVAL;
}
prop = of_find_property(np, "opp-peak-kBps", NULL);
of_node_put(np);
if (!prop || !prop->length)
return 0;
return 1;
}
int dev_pm_opp_of_find_icc_paths(struct device *dev,
struct opp_table *opp_table)
{
struct device_node *np;
int ret, i, count, num_paths;
struct icc_path **paths;
ret = _bandwidth_supported(dev, opp_table);
if (ret == -EINVAL)
return 0; /* Empty OPP table is a valid corner-case, let's not fail */
else if (ret <= 0)
return ret;
ret = 0;
np = of_node_get(dev->of_node);
if (!np)
return 0;
count = of_count_phandle_with_args(np, "interconnects",
"#interconnect-cells");
of_node_put(np);
if (count < 0)
return 0;
/* two phandles when #interconnect-cells = <1> */
if (count % 2) {
dev_err(dev, "%s: Invalid interconnects values\n", __func__);
return -EINVAL;
}
num_paths = count / 2;
paths = kcalloc(num_paths, sizeof(*paths), GFP_KERNEL);
if (!paths)
return -ENOMEM;
for (i = 0; i < num_paths; i++) {
paths[i] = of_icc_get_by_index(dev, i);
if (IS_ERR(paths[i])) {
ret = dev_err_probe(dev, PTR_ERR(paths[i]), "%s: Unable to get path%d\n", __func__, i);
goto err;
}
}
if (opp_table) {
opp_table->paths = paths;
opp_table->path_count = num_paths;
return 0;
}
err:
while (i--)
icc_put(paths[i]);
kfree(paths);
return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_find_icc_paths);
static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
struct device_node *np)
{
unsigned int levels = opp_table->supported_hw_count;
int count, versions, ret, i, j;
u32 val;
if (!opp_table->supported_hw) {
/*
* In the case that no supported_hw has been set by the
* platform but there is an opp-supported-hw value set for
* an OPP then the OPP should not be enabled as there is
* no way to see if the hardware supports it.
*/
if (of_property_present(np, "opp-supported-hw"))
return false;
else
return true;
}
count = of_property_count_u32_elems(np, "opp-supported-hw");
if (count <= 0 || count % levels) {
dev_err(dev, "%s: Invalid opp-supported-hw property (%d)\n",
__func__, count);
return false;
}
versions = count / levels;
/* All levels in at least one of the versions should match */
for (i = 0; i < versions; i++) {
bool supported = true;
for (j = 0; j < levels; j++) {
ret = of_property_read_u32_index(np, "opp-supported-hw",
i * levels + j, &val);
if (ret) {
dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
__func__, i * levels + j, ret);
return false;
}
/* Check if the level is supported */
if (!(val & opp_table->supported_hw[j])) {
supported = false;
break;
}
}
if (supported)
return true;
}
return false;
}
static u32 *_parse_named_prop(struct dev_pm_opp *opp, struct device *dev,
struct opp_table *opp_table,
const char *prop_type, bool *triplet)
{
struct property *prop = NULL;
char name[NAME_MAX];
int count, ret;
u32 *out;
/* Search for "opp-<prop_type>-<name>" */
if (opp_table->prop_name) {
snprintf(name, sizeof(name), "opp-%s-%s", prop_type,
opp_table->prop_name);
prop = of_find_property(opp->np, name, NULL);
}
if (!prop) {
/* Search for "opp-<prop_type>" */
snprintf(name, sizeof(name), "opp-%s", prop_type);
prop = of_find_property(opp->np, name, NULL);
if (!prop)
return NULL;
}
count = of_property_count_u32_elems(opp->np, name);
if (count < 0) {
dev_err(dev, "%s: Invalid %s property (%d)\n", __func__, name,
count);
return ERR_PTR(count);
}
/*
* Initialize regulator_count, if regulator information isn't provided
* by the platform. Now that one of the properties is available, fix the
* regulator_count to 1.
*/
if (unlikely(opp_table->regulator_count == -1))
opp_table->regulator_count = 1;
if (count != opp_table->regulator_count &&
(!triplet || count != opp_table->regulator_count * 3)) {
dev_err(dev, "%s: Invalid number of elements in %s property (%u) with supplies (%d)\n",
__func__, prop_type, count, opp_table->regulator_count);
return ERR_PTR(-EINVAL);
}
out = kmalloc_array(count, sizeof(*out), GFP_KERNEL);
if (!out)
return ERR_PTR(-EINVAL);
ret = of_property_read_u32_array(opp->np, name, out, count);
if (ret) {
dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
kfree(out);
return ERR_PTR(-EINVAL);
}
if (triplet)
*triplet = count != opp_table->regulator_count;
return out;
}
static u32 *opp_parse_microvolt(struct dev_pm_opp *opp, struct device *dev,
struct opp_table *opp_table, bool *triplet)
{
u32 *microvolt;
microvolt = _parse_named_prop(opp, dev, opp_table, "microvolt", triplet);
if (IS_ERR(microvolt))
return microvolt;
if (!microvolt) {
/*
* Missing property isn't a problem, but an invalid
* entry is. This property isn't optional if regulator
* information is provided. Check only for the first OPP, as
* regulator_count may get initialized after that to a valid
* value.
*/
if (list_empty(&opp_table->opp_list) &&
opp_table->regulator_count > 0) {
dev_err(dev, "%s: opp-microvolt missing although OPP managing regulators\n",
__func__);
return ERR_PTR(-EINVAL);
}
}
return microvolt;
}
static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
struct opp_table *opp_table)
{
u32 *microvolt, *microamp, *microwatt;
int ret = 0, i, j;
bool triplet;
microvolt = opp_parse_microvolt(opp, dev, opp_table, &triplet);
if (IS_ERR(microvolt))
return PTR_ERR(microvolt);
microamp = _parse_named_prop(opp, dev, opp_table, "microamp", NULL);
if (IS_ERR(microamp)) {
ret = PTR_ERR(microamp);
goto free_microvolt;
}
microwatt = _parse_named_prop(opp, dev, opp_table, "microwatt", NULL);
if (IS_ERR(microwatt)) {
ret = PTR_ERR(microwatt);
goto free_microamp;
}
/*
* Initialize regulator_count if it is uninitialized and no properties
* are found.
*/
if (unlikely(opp_table->regulator_count == -1)) {
opp_table->regulator_count = 0;
return 0;
}
for (i = 0, j = 0; i < opp_table->regulator_count; i++) {
if (microvolt) {
opp->supplies[i].u_volt = microvolt[j++];
if (triplet) {
opp->supplies[i].u_volt_min = microvolt[j++];
opp->supplies[i].u_volt_max = microvolt[j++];
} else {
opp->supplies[i].u_volt_min = opp->supplies[i].u_volt;
opp->supplies[i].u_volt_max = opp->supplies[i].u_volt;
}
}
if (microamp)
opp->supplies[i].u_amp = microamp[i];
if (microwatt)
opp->supplies[i].u_watt = microwatt[i];
}
kfree(microwatt);
free_microamp:
kfree(microamp);
free_microvolt:
kfree(microvolt);
return ret;
}
/**
* dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
* entries
* @dev: device pointer used to lookup OPP table.
*
* Free OPPs created using static entries present in DT.
*/
void dev_pm_opp_of_remove_table(struct device *dev)
{
dev_pm_opp_remove_table(dev);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
static int _read_rate(struct dev_pm_opp *new_opp, struct opp_table *opp_table,
struct device_node *np)
{
struct property *prop;
int i, count, ret;
u64 *rates;
prop = of_find_property(np, "opp-hz", NULL);
if (!prop)
return -ENODEV;
count = prop->length / sizeof(u64);
if (opp_table->clk_count != count) {
pr_err("%s: Count mismatch between opp-hz and clk_count (%d %d)\n",
__func__, count, opp_table->clk_count);
return -EINVAL;
}
rates = kmalloc_array(count, sizeof(*rates), GFP_KERNEL);
if (!rates)
return -ENOMEM;
ret = of_property_read_u64_array(np, "opp-hz", rates, count);
if (ret) {
pr_err("%s: Error parsing opp-hz: %d\n", __func__, ret);
} else {
/*
* Rate is defined as an unsigned long in clk API, and so
* casting explicitly to its type. Must be fixed once rate is 64
* bit guaranteed in clk API.
*/
for (i = 0; i < count; i++) {
new_opp->rates[i] = (unsigned long)rates[i];
/* This will happen for frequencies > 4.29 GHz */
WARN_ON(new_opp->rates[i] != rates[i]);
}
}
kfree(rates);
return ret;
}
static int _read_bw(struct dev_pm_opp *new_opp, struct opp_table *opp_table,
struct device_node *np, bool peak)
{
const char *name = peak ? "opp-peak-kBps" : "opp-avg-kBps";
struct property *prop;
int i, count, ret;
u32 *bw;
prop = of_find_property(np, name, NULL);
if (!prop)
return -ENODEV;
count = prop->length / sizeof(u32);
if (opp_table->path_count != count) {
pr_err("%s: Mismatch between %s and paths (%d %d)\n",
__func__, name, count, opp_table->path_count);
return -EINVAL;
}
bw = kmalloc_array(count, sizeof(*bw), GFP_KERNEL);
if (!bw)
return -ENOMEM;
ret = of_property_read_u32_array(np, name, bw, count);
if (ret) {
pr_err("%s: Error parsing %s: %d\n", __func__, name, ret);
goto out;
}
for (i = 0; i < count; i++) {
if (peak)
new_opp->bandwidth[i].peak = kBps_to_icc(bw[i]);
else
new_opp->bandwidth[i].avg = kBps_to_icc(bw[i]);
}
out:
kfree(bw);
return ret;
}
static int _read_opp_key(struct dev_pm_opp *new_opp,
struct opp_table *opp_table, struct device_node *np)
{
bool found = false;
int ret;
ret = _read_rate(new_opp, opp_table, np);
if (!ret)
found = true;
else if (ret != -ENODEV)
return ret;
/*
* Bandwidth consists of peak and average (optional) values:
* opp-peak-kBps = <path1_value path2_value>;
* opp-avg-kBps = <path1_value path2_value>;
*/
ret = _read_bw(new_opp, opp_table, np, true);
if (!ret) {
found = true;
ret = _read_bw(new_opp, opp_table, np, false);
}
/* The properties were found but we failed to parse them */
if (ret && ret != -ENODEV)
return ret;
if (!of_property_read_u32(np, "opp-level", &new_opp->level))
found = true;
if (found)
return 0;
return ret;
}
/**
* _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
* @opp_table: OPP table
* @dev: device for which we do this operation
* @np: device node
*
* This function adds an opp definition to the opp table and returns status. The
* opp can be controlled using dev_pm_opp_enable/disable functions and may be
* removed by dev_pm_opp_remove.
*
* Return:
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
* Valid OPP pointer:
* On success
* NULL:
* Duplicate OPPs (both freq and volt are same) and opp->available
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
* OR if the OPP is not supported by hardware.
* ERR_PTR(-EEXIST):
* Freq are same and volt are different OR
* Duplicate OPPs (both freq and volt are same) and !opp->available
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
* ERR_PTR(-ENOMEM):
* Memory allocation failure
* ERR_PTR(-EINVAL):
* Failed parsing the OPP node
*/
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
struct device *dev, struct device_node *np)
{
struct dev_pm_opp *new_opp;
u32 val;
int ret;
new_opp = _opp_allocate(opp_table);
if (!new_opp)
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
return ERR_PTR(-ENOMEM);
ret = _read_opp_key(new_opp, opp_table, np);
if (ret < 0) {
dev_err(dev, "%s: opp key field not found\n", __func__);
goto free_opp;
}
/* Check if the OPP supports hardware's hierarchy of versions or not */
if (!_opp_is_supported(dev, opp_table, np)) {
dev_dbg(dev, "OPP not supported by hardware: %s\n",
of_node_full_name(np));
goto free_opp;
}
new_opp->turbo = of_property_read_bool(np, "turbo-mode");
new_opp->np = of_node_get(np);
new_opp->dynamic = false;
new_opp->available = true;
ret = _of_opp_alloc_required_opps(opp_table, new_opp);
if (ret)
goto free_opp;
if (!of_property_read_u32(np, "clock-latency-ns", &val))
new_opp->clock_latency_ns = val;
ret = opp_parse_supplies(new_opp, dev, opp_table);
if (ret)
goto free_required_opps;
ret = _opp_add(dev, new_opp, opp_table);
if (ret) {
/* Don't return error for duplicate OPPs */
if (ret == -EBUSY)
ret = 0;
goto free_required_opps;
}
/* OPP to select on device suspend */
if (of_property_read_bool(np, "opp-suspend")) {
if (opp_table->suspend_opp) {
/* Pick the OPP with higher rate/bw/level as suspend OPP */
if (_opp_compare_key(opp_table, new_opp, opp_table->suspend_opp) == 1) {
opp_table->suspend_opp->suspend = false;
new_opp->suspend = true;
opp_table->suspend_opp = new_opp;
}
} else {
new_opp->suspend = true;
opp_table->suspend_opp = new_opp;
}
}
if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu level:%u\n",
__func__, new_opp->turbo, new_opp->rates[0],
new_opp->supplies[0].u_volt, new_opp->supplies[0].u_volt_min,
new_opp->supplies[0].u_volt_max, new_opp->clock_latency_ns,
new_opp->level);
/*
* Notify the changes in the availability of the operable
* frequency/voltage list.
*/
blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ADD, new_opp);
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
return new_opp;
free_required_opps:
_of_opp_free_required_opps(opp_table, new_opp);
free_opp:
_opp_free(new_opp);
return ret ? ERR_PTR(ret) : NULL;
}
/* Initializes OPP tables based on new bindings */
static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table)
{
struct device_node *np;
int ret, count = 0;
struct dev_pm_opp *opp;
/* OPP table is already initialized for the device */
mutex_lock(&opp_table->lock);
if (opp_table->parsed_static_opps) {
opp_table->parsed_static_opps++;
mutex_unlock(&opp_table->lock);
return 0;
}
opp_table->parsed_static_opps = 1;
mutex_unlock(&opp_table->lock);
/* We have opp-table node now, iterate over it and add OPPs */
for_each_available_child_of_node(opp_table->np, np) {
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
opp = _opp_add_static_v2(opp_table, dev, np);
if (IS_ERR(opp)) {
ret = PTR_ERR(opp);
dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
ret);
of_node_put(np);
goto remove_static_opp;
PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added Currently the _of_add_opp_table_v2 call loops through the OPP nodes in the operating-points-v2 table in the device tree and calls _opp_add_static_v2 for each to add them to the table. It counts each iteration through this loop as an added OPP, however there are cases where _opp_add_static_v2() returns 0 but no new OPP is added to the list. This can happen while adding duplicate OPP or if the OPP isn't supported by hardware. Because of this the count variable will contain the number of OPP nodes in the table in device tree but not necessarily the ones that are actually added. As this count value is what is checked to determine if there are any valid OPPs, if a platform has an operating-points-v2 table with all OPP nodes containing opp-supported-hw values that are not currently supported, then _of_add_opp_table_v2 will fail to abort as it should due to an empty table. Additionally, since commit 3ba98324e81a ("PM / OPP: Get performance state using genpd helper"), the same count variable is compared against the number of OPPs containing performance states and requires that either all or none have pstates set, however in the case of any opp table that has any entries that do not get added by _opp_add_static_v2 due to incompatible opp-supported-hw fields, these numbers will not match and _of_add_opp_table_v2 will incorrectly fail. We need to clearly identify all the three cases (success, failure, unsupported/duplicate OPPs) and then increment count only on success case. Change return type of _opp_add_static_v2() to return the pointer to the newly added OPP instead of an integer. This routine now returns a valid pointer if the OPP is really added, NULL for unsupported or duplicate OPPs, and error value cased as a pointer on errors. Ideally the fixes tag in this commit should point back to the commit that introduced OPP v2 initially, as that's where we started incorrectly accounting for duplicate OPPs: commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings") But it wasn't a real problem until recently as the count was only used to check if any OPPs are added or not. And so this commit points to a rather recent commit where we added more code that depends on the value of "count". Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper") Reported-by: Dave Gerlach <d-gerlach@ti.com> Reported-by: Niklas Cassel <niklas.cassel@linaro.org> Tested-by: Niklas Cassel <niklas.cassel@linaro.org> Signed-off-by: Dave Gerlach <d-gerlach@ti.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2018-10-03 16:13:15 +05:30
} else if (opp) {
count++;
}
}
/* There should be one or more OPPs defined */
if (!count) {
dev_err(dev, "%s: no supported OPPs", __func__);
ret = -ENOENT;
goto remove_static_opp;
}
lazy_link_required_opp_table(opp_table);
return 0;
remove_static_opp:
_opp_remove_all_static(opp_table);
return ret;
}
/* Initializes OPP tables based on old-deprecated bindings */
static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table)
{
const struct property *prop;
const __be32 *val;
int nr, ret = 0;
mutex_lock(&opp_table->lock);
if (opp_table->parsed_static_opps) {
opp_table->parsed_static_opps++;
mutex_unlock(&opp_table->lock);
return 0;
}
opp_table->parsed_static_opps = 1;
mutex_unlock(&opp_table->lock);
prop = of_find_property(dev->of_node, "operating-points", NULL);
if (!prop) {
ret = -ENODEV;
goto remove_static_opp;
}
if (!prop->value) {
ret = -ENODATA;
goto remove_static_opp;
}
/*
* Each OPP is a set of tuples consisting of frequency and
* voltage like <freq-kHz vol-uV>.
*/
nr = prop->length / sizeof(u32);
if (nr % 2) {
dev_err(dev, "%s: Invalid OPP table\n", __func__);
ret = -EINVAL;
goto remove_static_opp;
}
val = prop->value;
while (nr) {
unsigned long freq = be32_to_cpup(val++) * 1000;
unsigned long volt = be32_to_cpup(val++);
struct dev_pm_opp_data data = {
.freq = freq,
.u_volt = volt,
};
ret = _opp_add_v1(opp_table, dev, &data, false);
if (ret) {
dev_err(dev, "%s: Failed to add OPP %ld (%d)\n",
__func__, data.freq, ret);
goto remove_static_opp;
}
nr -= 2;
}
return 0;
remove_static_opp:
_opp_remove_all_static(opp_table);
return ret;
}
static int _of_add_table_indexed(struct device *dev, int index)
{
struct opp_table *opp_table;
int ret, count;
if (index) {
/*
* If only one phandle is present, then the same OPP table
* applies for all index requests.
*/
count = of_count_phandle_with_args(dev->of_node,
"operating-points-v2", NULL);
if (count == 1)
index = 0;
}
opp_table = _add_opp_table_indexed(dev, index, true);
opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER The OPP core manages various resources, e.g. clocks or interconnect paths. These resources are looked up when the OPP table is allocated once dev_pm_opp_get_opp_table() is called the first time (either directly or indirectly through one of the many helper functions). At this point, the resources may not be available yet, i.e. looking them up will result in -EPROBE_DEFER. Unfortunately, dev_pm_opp_get_opp_table() is currently unable to propagate this error code since it only returns the allocated OPP table or NULL. This means that all consumers of the OPP core are required to make sure that all necessary resources are available. Usually this happens by requesting them, checking the result and releasing them immediately after. For example, we have added "dev_pm_opp_of_find_icc_paths(dev, NULL)" to several drivers now just to make sure the interconnect providers are ready before the OPP table is allocated. If this call is missing, the OPP core will only warn about this and then attempt to continue without interconnect. This will eventually fail horribly, e.g.: cpu cpu0: _allocate_opp_table: Error finding interconnect paths: -517 ... later ... of: _read_bw: Mismatch between opp-peak-kBps and paths (1 0) cpu cpu0: _opp_add_static_v2: opp key field not found cpu cpu0: _of_add_opp_table_v2: Failed to add OPP, -22 This example happens when trying to use interconnects for a CPU OPP table together with qcom-cpufreq-nvmem.c. qcom-cpufreq-nvmem calls dev_pm_opp_set_supported_hw(), which ends up allocating the OPP table early. To fix the problem with the current approach we would need to add yet another call to dev_pm_opp_of_find_icc_paths(dev, NULL). But actually qcom-cpufreq-nvmem.c has nothing to do with interconnects... This commit attempts to make this more robust by allowing dev_pm_opp_get_opp_table() to return an error pointer. Fixing all the usages is trivial because the function is usually used indirectly through another helper (e.g. dev_pm_opp_set_supported_hw() above). These other helpers already return an error pointer. The example above then works correctly because set_supported_hw() will return -EPROBE_DEFER, and qcom-cpufreq-nvmem.c already propagates that error. It should also be possible to remove the remaining usages of "dev_pm_opp_of_find_icc_paths(dev, NULL)" from other drivers as well. Note that this commit currently only handles -EPROBE_DEFER for the clock/interconnects within _allocate_opp_table(). Other errors are just ignored as before. Eventually those should be propagated as well. Signed-off-by: Stephan Gerhold <stephan@gerhold.net> Acked-by: Krzysztof Kozlowski <krzk@kernel.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> [ Viresh: skip checking return value of dev_pm_opp_get_opp_table() for EPROBE_DEFER in domain.c, fix NULL return value and reorder code a bit in core.c, and update exynos-asv.c ] Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2020-07-27 11:30:46 +02:00
if (IS_ERR(opp_table))
return PTR_ERR(opp_table);
/*
* OPPs have two version of bindings now. Also try the old (v1)
* bindings for backward compatibility with older dtbs.
*/
if (opp_table->np)
ret = _of_add_opp_table_v2(dev, opp_table);
else
ret = _of_add_opp_table_v1(dev, opp_table);
if (ret)
dev_pm_opp_put_opp_table(opp_table);
return ret;
}
static void devm_pm_opp_of_table_release(void *data)
{
dev_pm_opp_of_remove_table(data);
}
static int _devm_of_add_table_indexed(struct device *dev, int index)
{
int ret;
ret = _of_add_table_indexed(dev, index);
if (ret)
return ret;
return devm_add_action_or_reset(dev, devm_pm_opp_of_table_release, dev);
}
/**
* devm_pm_opp_of_add_table() - Initialize opp table from device tree
* @dev: device pointer used to lookup OPP table.
*
* Register the initial OPP table with the OPP library for given device.
*
* The opp_table structure will be freed after the device is destroyed.
*
* Return:
* 0 On success OR
* Duplicate OPPs (both freq and volt are same) and opp->available
* -EEXIST Freq are same and volt are different OR
* Duplicate OPPs (both freq and volt are same) and !opp->available
* -ENOMEM Memory allocation failure
* -ENODEV when 'operating-points' property is not found or is invalid data
* in device node.
* -ENODATA when empty 'operating-points' property is found
* -EINVAL when invalid entries are found in opp-v2 table
*/
int devm_pm_opp_of_add_table(struct device *dev)
{
return _devm_of_add_table_indexed(dev, 0);
}
EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table);
/**
* dev_pm_opp_of_add_table() - Initialize opp table from device tree
* @dev: device pointer used to lookup OPP table.
*
* Register the initial OPP table with the OPP library for given device.
*
* Return:
* 0 On success OR
* Duplicate OPPs (both freq and volt are same) and opp->available
* -EEXIST Freq are same and volt are different OR
* Duplicate OPPs (both freq and volt are same) and !opp->available
* -ENOMEM Memory allocation failure
* -ENODEV when 'operating-points' property is not found or is invalid data
* in device node.
* -ENODATA when empty 'operating-points' property is found
* -EINVAL when invalid entries are found in opp-v2 table
*/
int dev_pm_opp_of_add_table(struct device *dev)
{
return _of_add_table_indexed(dev, 0);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
/**
* dev_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree
* @dev: device pointer used to lookup OPP table.
* @index: Index number.
*
* Register the initial OPP table with the OPP library for given device only
* using the "operating-points-v2" property.
*
* Return: Refer to dev_pm_opp_of_add_table() for return values.
*/
int dev_pm_opp_of_add_table_indexed(struct device *dev, int index)
{
return _of_add_table_indexed(dev, index);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed);
/**
* devm_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree
* @dev: device pointer used to lookup OPP table.
* @index: Index number.
*
* This is a resource-managed variant of dev_pm_opp_of_add_table_indexed().
*/
int devm_pm_opp_of_add_table_indexed(struct device *dev, int index)
{
return _devm_of_add_table_indexed(dev, index);
}
EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table_indexed);
/* CPU device specific helpers */
/**
* dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
* @cpumask: cpumask for which OPP table needs to be removed
*
* This removes the OPP tables for CPUs present in the @cpumask.
* This should be used only to remove static entries created from DT.
*/
void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
{
_dev_pm_opp_cpumask_remove_table(cpumask, -1);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
/**
* dev_pm_opp_of_cpumask_add_table() - Adds OPP table for @cpumask
* @cpumask: cpumask for which OPP table needs to be added.
*
* This adds the OPP tables for CPUs present in the @cpumask.
*/
int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
{
struct device *cpu_dev;
int cpu, ret;
if (WARN_ON(cpumask_empty(cpumask)))
return -ENODEV;
for_each_cpu(cpu, cpumask) {
cpu_dev = get_cpu_device(cpu);
if (!cpu_dev) {
pr_err("%s: failed to get cpu%d device\n", __func__,
cpu);
ret = -ENODEV;
goto remove_table;
}
ret = dev_pm_opp_of_add_table(cpu_dev);
if (ret) {
/*
* OPP may get registered dynamically, don't print error
* message here.
*/
pr_debug("%s: couldn't find opp table for cpu:%d, %d\n",
__func__, cpu, ret);
goto remove_table;
}
}
return 0;
remove_table:
/* Free all other OPPs */
_dev_pm_opp_cpumask_remove_table(cpumask, cpu);
return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
/*
* Works only for OPP v2 bindings.
*
* Returns -ENOENT if operating-points-v2 bindings aren't supported.
*/
/**
* dev_pm_opp_of_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with
* @cpu_dev using operating-points-v2
* bindings.
*
* @cpu_dev: CPU device for which we do this operation
* @cpumask: cpumask to update with information of sharing CPUs
*
* This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
*
* Returns -ENOENT if operating-points-v2 isn't present for @cpu_dev.
*/
int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
struct cpumask *cpumask)
{
struct device_node *np, *tmp_np, *cpu_np;
int cpu, ret = 0;
/* Get OPP descriptor node */
np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
if (!np) {
dev_dbg(cpu_dev, "%s: Couldn't find opp node.\n", __func__);
return -ENOENT;
}
cpumask_set_cpu(cpu_dev->id, cpumask);
/* OPPs are shared ? */
if (!of_property_read_bool(np, "opp-shared"))
goto put_cpu_node;
for_each_possible_cpu(cpu) {
if (cpu == cpu_dev->id)
continue;
cpu_np = of_cpu_device_node_get(cpu);
if (!cpu_np) {
dev_err(cpu_dev, "%s: failed to get cpu%d node\n",
__func__, cpu);
ret = -ENOENT;
goto put_cpu_node;
}
/* Get OPP descriptor node */
tmp_np = _opp_of_get_opp_desc_node(cpu_np, 0);
of_node_put(cpu_np);
if (!tmp_np) {
pr_err("%pOF: Couldn't find opp node\n", cpu_np);
ret = -ENOENT;
goto put_cpu_node;
}
/* CPUs are sharing opp node */
if (np == tmp_np)
cpumask_set_cpu(cpu, cpumask);
of_node_put(tmp_np);
}
put_cpu_node:
of_node_put(np);
return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
/**
* of_get_required_opp_performance_state() - Search for required OPP and return its performance state.
* @np: Node that contains the "required-opps" property.
* @index: Index of the phandle to parse.
*
* Returns the performance state of the OPP pointed out by the "required-opps"
* property at @index in @np.
*
* Return: Zero or positive performance state on success, otherwise negative
* value on errors.
*/
int of_get_required_opp_performance_state(struct device_node *np, int index)
{
struct dev_pm_opp *opp;
struct device_node *required_np;
struct opp_table *opp_table;
int pstate = -EINVAL;
required_np = of_parse_required_opp(np, index);
if (!required_np)
return -ENODEV;
opp_table = _find_table_of_opp_np(required_np);
if (IS_ERR(opp_table)) {
pr_err("%s: Failed to find required OPP table %pOF: %ld\n",
__func__, np, PTR_ERR(opp_table));
goto put_required_np;
}
/* The OPP tables must belong to a genpd */
if (unlikely(!opp_table->is_genpd)) {
pr_err("%s: Performance state is only valid for genpds.\n", __func__);
goto put_required_np;
}
opp = _find_opp_of_np(opp_table, required_np);
if (opp) {
if (opp->level == OPP_LEVEL_UNSET) {
pr_err("%s: OPP levels aren't available for %pOF\n",
__func__, np);
} else {
pstate = opp->level;
}
dev_pm_opp_put(opp);
}
dev_pm_opp_put_opp_table(opp_table);
put_required_np:
of_node_put(required_np);
return pstate;
}
EXPORT_SYMBOL_GPL(of_get_required_opp_performance_state);
/**
* dev_pm_opp_get_of_node() - Gets the DT node corresponding to an opp
* @opp: opp for which DT node has to be returned for
*
* Return: DT node corresponding to the opp, else 0 on success.
*
* The caller needs to put the node with of_node_put() after using it.
*/
struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp)
{
if (IS_ERR_OR_NULL(opp)) {
pr_err("%s: Invalid parameters\n", __func__);
return NULL;
}
return of_node_get(opp->np);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
/*
* Callback function provided to the Energy Model framework upon registration.
* It provides the power used by @dev at @kHz if it is the frequency of an
* existing OPP, or at the frequency of the first OPP above @kHz otherwise
* (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled
PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-07-07 08:15:52 +01:00
* frequency and @uW to the associated power.
*
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-07-07 08:15:52 +01:00
_get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
/* Find the right frequency and related OPP */
opp_freq = *kHz * 1000;
opp = dev_pm_opp_find_freq_ceil(dev, &opp_freq);
if (IS_ERR(opp))
return -EINVAL;
opp_power = dev_pm_opp_get_power(opp);
dev_pm_opp_put(opp);
if (!opp_power)
return -EINVAL;
*kHz = opp_freq / 1000;
PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-07-07 08:15:52 +01:00
*uW = opp_power;
return 0;
}
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
/*
* Callback function provided to the Energy Model framework upon registration.
* This computes the power estimated by @dev at @kHz if it is the frequency
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
* of an existing OPP, or at the frequency of the first OPP above @kHz otherwise
* (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled
PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-07-07 08:15:52 +01:00
* frequency and @uW to the associated power. The power is estimated as
* P = C * V^2 * f with C being the device's capacitance and V and f
* respectively the voltage and frequency of the OPP.
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
*
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
*/
PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-07-07 08:15:52 +01:00
static int __maybe_unused _get_power(struct device *dev, unsigned long *uW,
unsigned long *kHz)
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
{
struct dev_pm_opp *opp;
struct device_node *np;
unsigned long mV, Hz;
u32 cap;
u64 tmp;
int ret;
np = of_node_get(dev->of_node);
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
if (!np)
return -EINVAL;
ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap);
of_node_put(np);
if (ret)
return -EINVAL;
Hz = *kHz * 1000;
opp = dev_pm_opp_find_freq_ceil(dev, &Hz);
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
if (IS_ERR(opp))
return -EINVAL;
mV = dev_pm_opp_get_voltage(opp) / 1000;
dev_pm_opp_put(opp);
if (!mV)
return -EINVAL;
tmp = (u64)cap * mV * mV * (Hz / 1000000);
PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-07-07 08:15:52 +01:00
/* Provide power in micro-Watts */
do_div(tmp, 1000000);
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-07-07 08:15:52 +01:00
*uW = (unsigned long)tmp;
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
*kHz = Hz / 1000;
return 0;
}
static bool _of_has_opp_microwatt_property(struct device *dev)
{
unsigned long power, freq = 0;
struct dev_pm_opp *opp;
/* Check if at least one OPP has needed property */
opp = dev_pm_opp_find_freq_ceil(dev, &freq);
if (IS_ERR(opp))
return false;
power = dev_pm_opp_get_power(opp);
dev_pm_opp_put(opp);
if (!power)
return false;
return true;
}
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
/**
* dev_pm_opp_of_register_em() - Attempt to register an Energy Model
* @dev : Device for which an Energy Model has to be registered
* @cpus : CPUs for which an Energy Model has to be registered. For
* other type of devices it should be set to NULL.
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
*
* This checks whether the "dynamic-power-coefficient" devicetree property has
* been specified, and tries to register an Energy Model with it if it has.
* Having this property means the voltages are known for OPPs and the EM
* might be calculated.
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
*/
int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus)
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
{
struct em_data_callback em_cb;
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
struct device_node *np;
int ret, nr_opp;
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
u32 cap;
if (IS_ERR_OR_NULL(dev)) {
ret = -EINVAL;
goto failed;
}
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
nr_opp = dev_pm_opp_get_opp_count(dev);
if (nr_opp <= 0) {
ret = -EINVAL;
goto failed;
}
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
/* First, try to find more precised Energy Model in DT */
if (_of_has_opp_microwatt_property(dev)) {
EM_SET_ACTIVE_POWER_CB(em_cb, _get_dt_power);
goto register_em;
}
np = of_node_get(dev->of_node);
if (!np) {
ret = -EINVAL;
goto failed;
}
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
/*
* Register an EM only if the 'dynamic-power-coefficient' property is
* set in devicetree. It is assumed the voltage values are known if that
* property is set since it is useless otherwise. If voltages are not
* known, just let the EM registration fail with an error to alert the
* user about the inconsistent configuration.
*/
ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap);
of_node_put(np);
if (ret || !cap) {
dev_dbg(dev, "Couldn't find proper 'dynamic-power-coefficient' in DT\n");
ret = -EINVAL;
goto failed;
}
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
EM_SET_ACTIVE_POWER_CB(em_cb, _get_power);
register_em:
ret = em_dev_register_perf_domain(dev, nr_opp, &em_cb, cpus, true);
if (ret)
goto failed;
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
return 0;
failed:
dev_dbg(dev, "Couldn't register Energy Model %d\n", ret);
return ret;
PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Tested-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
2019-02-04 11:09:48 +00:00
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em);