2017-11-07 17:30:07 +01:00
// SPDX-License-Identifier: GPL-2.0
2014-09-30 14:48:25 +01:00
/*
* cacheinfo support - processor cache information via sysfs
*
* Based on arch / x86 / kernel / cpu / intel_cacheinfo . c
* Author : Sudeep Holla < sudeep . holla @ arm . com >
*/
2016-10-28 09:45:30 +01:00
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2016-10-28 09:45:29 +01:00
# include <linux/acpi.h>
2014-09-30 14:48:25 +01:00
# include <linux/bitops.h>
# include <linux/cacheinfo.h>
# include <linux/compiler.h>
# include <linux/cpu.h>
# include <linux/device.h>
# include <linux/init.h>
2023-03-29 10:52:07 -05:00
# include <linux/of.h>
2014-09-30 14:48:25 +01:00
# include <linux/sched.h>
# include <linux/slab.h>
# include <linux/smp.h>
# include <linux/sysfs.h>
/* pointer to per cpu cacheinfo */
static DEFINE_PER_CPU ( struct cpu_cacheinfo , ci_cpu_cacheinfo ) ;
# define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cacheinfo, cpu))
# define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves)
# define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list)
2022-07-04 11:15:47 +01:00
# define per_cpu_cacheinfo_idx(cpu, idx) \
( per_cpu_cacheinfo ( cpu ) + ( idx ) )
2014-09-30 14:48:25 +01:00
2023-04-14 10:14:52 +02:00
/* Set if no cache information is found in DT/ACPI. */
static bool use_arch_info ;
2014-09-30 14:48:25 +01:00
struct cpu_cacheinfo * get_cpu_cacheinfo ( unsigned int cpu )
{
return ci_cacheinfo ( cpu ) ;
}
static inline bool cache_leaves_are_shared ( struct cacheinfo * this_leaf ,
struct cacheinfo * sib_leaf )
{
2022-07-04 11:15:48 +01:00
/*
* For non DT / ACPI systems , assume unique level 1 caches ,
2023-04-14 10:14:49 +02:00
* system - wide shared caches for all other levels .
2022-07-04 11:15:48 +01:00
*/
2023-04-14 10:14:52 +02:00
if ( ! ( IS_ENABLED ( CONFIG_OF ) | | IS_ENABLED ( CONFIG_ACPI ) ) | |
use_arch_info )
2023-04-14 10:14:49 +02:00
return ( this_leaf - > level ! = 1 ) & & ( sib_leaf - > level ! = 1 ) ;
2022-07-04 11:15:48 +01:00
2022-07-04 11:15:51 +01:00
if ( ( sib_leaf - > attributes & CACHE_ID ) & &
( this_leaf - > attributes & CACHE_ID ) )
return sib_leaf - > id = = this_leaf - > id ;
2018-05-11 18:57:58 -05:00
return sib_leaf - > fw_token = = this_leaf - > fw_token ;
2014-09-30 14:48:25 +01:00
}
2016-10-28 09:45:31 +01:00
2022-07-04 11:15:49 +01:00
bool last_level_cache_is_valid ( unsigned int cpu )
{
struct cacheinfo * llc ;
if ( ! cache_leaves ( cpu ) )
return false ;
llc = per_cpu_cacheinfo_idx ( cpu , cache_leaves ( cpu ) - 1 ) ;
2022-07-04 11:15:51 +01:00
return ( llc - > attributes & CACHE_ID ) | | ! ! llc - > fw_token ;
2022-07-04 11:15:49 +01:00
}
bool last_level_cache_is_shared ( unsigned int cpu_x , unsigned int cpu_y )
{
struct cacheinfo * llc_x , * llc_y ;
if ( ! last_level_cache_is_valid ( cpu_x ) | |
! last_level_cache_is_valid ( cpu_y ) )
return false ;
llc_x = per_cpu_cacheinfo_idx ( cpu_x , cache_leaves ( cpu_x ) - 1 ) ;
llc_y = per_cpu_cacheinfo_idx ( cpu_y , cache_leaves ( cpu_y ) - 1 ) ;
return cache_leaves_are_shared ( llc_x , llc_y ) ;
}
2022-07-04 11:15:48 +01:00
# ifdef CONFIG_OF
2023-04-14 10:14:50 +02:00
static bool of_check_cache_nodes ( struct device_node * np ) ;
2016-10-28 09:45:31 +01:00
/* OF properties to query for a given cache type */
struct cache_type_info {
const char * size_prop ;
const char * line_size_props [ 2 ] ;
const char * nr_sets_prop ;
} ;
static const struct cache_type_info cache_type_info [ ] = {
{
. size_prop = " cache-size " ,
. line_size_props = { " cache-line-size " ,
" cache-block-size " , } ,
. nr_sets_prop = " cache-sets " ,
} , {
. size_prop = " i-cache-size " ,
. line_size_props = { " i-cache-line-size " ,
" i-cache-block-size " , } ,
. nr_sets_prop = " i-cache-sets " ,
} , {
. size_prop = " d-cache-size " ,
. line_size_props = { " d-cache-line-size " ,
" d-cache-block-size " , } ,
. nr_sets_prop = " d-cache-sets " ,
} ,
} ;
static inline int get_cacheinfo_idx ( enum cache_type type )
{
if ( type = = CACHE_TYPE_UNIFIED )
return 0 ;
return type ;
}
2018-05-11 18:57:57 -05:00
static void cache_size ( struct cacheinfo * this_leaf , struct device_node * np )
2016-10-28 09:45:31 +01:00
{
const char * propname ;
int ct_idx ;
ct_idx = get_cacheinfo_idx ( this_leaf - > type ) ;
propname = cache_type_info [ ct_idx ] . size_prop ;
2018-12-19 16:16:03 +08:00
of_property_read_u32 ( np , propname , & this_leaf - > size ) ;
2016-10-28 09:45:31 +01:00
}
/* not cache_line_size() because that's a macro in include/linux/cache.h */
2018-05-11 18:57:57 -05:00
static void cache_get_line_size ( struct cacheinfo * this_leaf ,
struct device_node * np )
2016-10-28 09:45:31 +01:00
{
int i , lim , ct_idx ;
ct_idx = get_cacheinfo_idx ( this_leaf - > type ) ;
lim = ARRAY_SIZE ( cache_type_info [ ct_idx ] . line_size_props ) ;
for ( i = 0 ; i < lim ; i + + ) {
2018-07-06 13:50:31 +01:00
int ret ;
u32 line_size ;
2016-10-28 09:45:31 +01:00
const char * propname ;
propname = cache_type_info [ ct_idx ] . line_size_props [ i ] ;
2018-07-06 13:50:31 +01:00
ret = of_property_read_u32 ( np , propname , & line_size ) ;
if ( ! ret ) {
this_leaf - > coherency_line_size = line_size ;
2016-10-28 09:45:31 +01:00
break ;
2018-07-06 13:50:31 +01:00
}
2016-10-28 09:45:31 +01:00
}
}
2018-05-11 18:57:57 -05:00
static void cache_nr_sets ( struct cacheinfo * this_leaf , struct device_node * np )
2016-10-28 09:45:31 +01:00
{
const char * propname ;
int ct_idx ;
ct_idx = get_cacheinfo_idx ( this_leaf - > type ) ;
propname = cache_type_info [ ct_idx ] . nr_sets_prop ;
2018-12-19 16:16:03 +08:00
of_property_read_u32 ( np , propname , & this_leaf - > number_of_sets ) ;
2016-10-28 09:45:31 +01:00
}
static void cache_associativity ( struct cacheinfo * this_leaf )
{
unsigned int line_size = this_leaf - > coherency_line_size ;
unsigned int nr_sets = this_leaf - > number_of_sets ;
unsigned int size = this_leaf - > size ;
/*
* If the cache is fully associative , there is no need to
* check the other properties .
*/
if ( ! ( nr_sets = = 1 ) & & ( nr_sets > 0 & & size > 0 & & line_size > 0 ) )
this_leaf - > ways_of_associativity = ( size / nr_sets ) / line_size ;
}
2018-05-11 18:57:57 -05:00
static bool cache_node_is_unified ( struct cacheinfo * this_leaf ,
struct device_node * np )
2017-11-17 11:56:41 +00:00
{
2018-05-11 18:57:57 -05:00
return of_property_read_bool ( np , " cache-unified " ) ;
2017-11-17 11:56:41 +00:00
}
2018-05-11 18:57:57 -05:00
static void cache_of_set_props ( struct cacheinfo * this_leaf ,
struct device_node * np )
2016-10-28 09:45:31 +01:00
{
2018-05-11 18:57:57 -05:00
/*
* init_cache_level must setup the cache level correctly
* overriding the architecturally specified levels , so
* if type is NONE at this stage , it should be unified
*/
if ( this_leaf - > type = = CACHE_TYPE_NOCACHE & &
cache_node_is_unified ( this_leaf , np ) )
this_leaf - > type = CACHE_TYPE_UNIFIED ;
cache_size ( this_leaf , np ) ;
cache_get_line_size ( this_leaf , np ) ;
cache_nr_sets ( this_leaf , np ) ;
cache_associativity ( this_leaf ) ;
2016-10-28 09:45:31 +01:00
}
2018-05-11 18:57:56 -05:00
static int cache_setup_of_node ( unsigned int cpu )
{
2022-10-26 20:59:54 +02:00
struct device_node * np , * prev ;
2018-05-11 18:57:56 -05:00
struct cacheinfo * this_leaf ;
unsigned int index = 0 ;
2022-07-04 11:15:46 +01:00
np = of_cpu_device_node_get ( cpu ) ;
2018-05-11 18:57:56 -05:00
if ( ! np ) {
pr_err ( " Failed to find cpu%d device node \n " , cpu ) ;
return - ENOENT ;
}
2023-04-14 10:14:50 +02:00
if ( ! of_check_cache_nodes ( np ) ) {
of_node_put ( np ) ;
return - ENOENT ;
}
2022-10-26 20:59:54 +02:00
prev = np ;
2018-05-11 18:57:56 -05:00
while ( index < cache_leaves ( cpu ) ) {
2022-07-04 11:15:47 +01:00
this_leaf = per_cpu_cacheinfo_idx ( cpu , index ) ;
2022-10-26 20:59:54 +02:00
if ( this_leaf - > level ! = 1 ) {
2018-05-11 18:57:56 -05:00
np = of_find_next_cache_node ( np ) ;
2022-10-26 20:59:54 +02:00
of_node_put ( prev ) ;
prev = np ;
if ( ! np )
break ;
}
2018-05-11 18:57:57 -05:00
cache_of_set_props ( this_leaf , np ) ;
2018-05-11 18:57:58 -05:00
this_leaf - > fw_token = np ;
2018-05-11 18:57:56 -05:00
index + + ;
}
2022-10-26 20:59:54 +02:00
of_node_put ( np ) ;
2018-05-11 18:57:56 -05:00
if ( index ! = cache_leaves ( cpu ) ) /* not all OF nodes populated */
return - ENOENT ;
return 0 ;
}
2023-01-04 19:30:24 +01:00
2023-04-14 10:14:50 +02:00
static bool of_check_cache_nodes ( struct device_node * np )
{
struct device_node * next ;
if ( of_property_present ( np , " cache-size " ) | |
of_property_present ( np , " i-cache-size " ) | |
of_property_present ( np , " d-cache-size " ) | |
of_property_present ( np , " cache-unified " ) )
return true ;
next = of_find_next_cache_node ( np ) ;
if ( next ) {
of_node_put ( next ) ;
return true ;
}
return false ;
}
2023-01-04 19:30:26 +01:00
static int of_count_cache_leaves ( struct device_node * np )
2023-01-04 19:30:24 +01:00
{
2023-01-04 19:30:26 +01:00
unsigned int leaves = 0 ;
2023-01-04 19:30:24 +01:00
if ( of_property_read_bool ( np , " cache-size " ) )
+ + leaves ;
if ( of_property_read_bool ( np , " i-cache-size " ) )
+ + leaves ;
if ( of_property_read_bool ( np , " d-cache-size " ) )
+ + leaves ;
2023-01-04 19:30:26 +01:00
if ( ! leaves ) {
/* The '[i-|d-|]cache-size' property is required, but
* if absent , fallback on the ' cache - unified ' property .
*/
if ( of_property_read_bool ( np , " cache-unified " ) )
return 1 ;
else
return 2 ;
}
return leaves ;
}
int init_of_cache_level ( unsigned int cpu )
{
struct cpu_cacheinfo * this_cpu_ci = get_cpu_cacheinfo ( cpu ) ;
struct device_node * np = of_cpu_device_node_get ( cpu ) ;
struct device_node * prev = NULL ;
unsigned int levels = 0 , leaves , level ;
2023-04-14 10:14:50 +02:00
if ( ! of_check_cache_nodes ( np ) ) {
of_node_put ( np ) ;
return - ENOENT ;
}
2023-01-04 19:30:26 +01:00
leaves = of_count_cache_leaves ( np ) ;
2023-01-04 19:30:24 +01:00
if ( leaves > 0 )
levels = 1 ;
prev = np ;
while ( ( np = of_find_next_cache_node ( np ) ) ) {
of_node_put ( prev ) ;
prev = np ;
if ( ! of_device_is_compatible ( np , " cache " ) )
2023-01-04 19:30:25 +01:00
goto err_out ;
2023-01-04 19:30:24 +01:00
if ( of_property_read_u32 ( np , " cache-level " , & level ) )
2023-01-04 19:30:25 +01:00
goto err_out ;
2023-01-04 19:30:24 +01:00
if ( level < = levels )
2023-01-04 19:30:25 +01:00
goto err_out ;
2023-01-04 19:30:26 +01:00
leaves + = of_count_cache_leaves ( np ) ;
2023-01-04 19:30:24 +01:00
levels = level ;
}
of_node_put ( np ) ;
this_cpu_ci - > num_levels = levels ;
this_cpu_ci - > num_leaves = leaves ;
return 0 ;
2023-01-04 19:30:25 +01:00
err_out :
of_node_put ( np ) ;
return - EINVAL ;
2023-01-04 19:30:24 +01:00
}
2014-09-30 14:48:25 +01:00
# else
static inline int cache_setup_of_node ( unsigned int cpu ) { return 0 ; }
2023-01-04 19:30:24 +01:00
int init_of_cache_level ( unsigned int cpu ) { return 0 ; }
2014-09-30 14:48:25 +01:00
# endif
2018-05-11 18:58:02 -05:00
int __weak cache_setup_acpi ( unsigned int cpu )
{
return - ENOTSUPP ;
}
2019-05-28 10:16:53 +08:00
unsigned int coherency_max_size ;
2022-07-04 11:15:50 +01:00
static int cache_setup_properties ( unsigned int cpu )
{
int ret = 0 ;
if ( of_have_populated_dt ( ) )
ret = cache_setup_of_node ( cpu ) ;
else if ( ! acpi_disabled )
ret = cache_setup_acpi ( cpu ) ;
2023-04-14 10:14:52 +02:00
// Assume there is no cache information available in DT/ACPI from now.
if ( ret & & use_arch_cache_info ( ) )
use_arch_info = true ;
2022-07-04 11:15:50 +01:00
return ret ;
}
2014-09-30 14:48:25 +01:00
static int cache_shared_cpu_map_setup ( unsigned int cpu )
{
struct cpu_cacheinfo * this_cpu_ci = get_cpu_cacheinfo ( cpu ) ;
struct cacheinfo * this_leaf , * sib_leaf ;
2023-01-17 10:51:33 +00:00
unsigned int index , sib_index ;
2016-10-28 09:45:29 +01:00
int ret = 0 ;
2014-09-30 14:48:25 +01:00
2016-10-28 09:45:28 +01:00
if ( this_cpu_ci - > cpu_map_populated )
return 0 ;
2022-07-04 11:15:50 +01:00
/*
* skip setting up cache properties if LLC is valid , just need
* to update the shared cpu_map if the cache attributes were
* populated early before all the cpus are brought online
*/
2023-04-14 10:14:52 +02:00
if ( ! last_level_cache_is_valid ( cpu ) & & ! use_arch_info ) {
2022-07-04 11:15:50 +01:00
ret = cache_setup_properties ( cpu ) ;
if ( ret )
return ret ;
}
2014-09-30 14:48:25 +01:00
for ( index = 0 ; index < cache_leaves ( cpu ) ; index + + ) {
unsigned int i ;
2022-07-04 11:15:47 +01:00
this_leaf = per_cpu_cacheinfo_idx ( cpu , index ) ;
2014-09-30 14:48:25 +01:00
cpumask_set_cpu ( cpu , & this_leaf - > shared_cpu_map ) ;
for_each_online_cpu ( i ) {
struct cpu_cacheinfo * sib_cpu_ci = get_cpu_cacheinfo ( i ) ;
if ( i = = cpu | | ! sib_cpu_ci - > info_list )
continue ; /* skip if itself or no cacheinfo */
2023-01-17 10:51:33 +00:00
for ( sib_index = 0 ; sib_index < cache_leaves ( i ) ; sib_index + + ) {
sib_leaf = per_cpu_cacheinfo_idx ( i , sib_index ) ;
drivers: base: cacheinfo: Fix shared_cpu_map changes in event of CPU hotplug
While building the shared_cpu_map, check if the cache level and cache
type matches. On certain systems that build the cache topology based on
the instance ID, there are cases where the same ID may repeat across
multiple cache levels, leading inaccurate topology.
In event of CPU offlining, the cache_shared_cpu_map_remove() does not
consider if IDs at same level are being compared. As a result, when same
IDs repeat across different cache levels, the CPU going offline is not
removed from all the shared_cpu_map.
Below is the output of cache topology of CPU8 and it's SMT sibling after
CPU8 is offlined on a dual socket 3rd Generation AMD EPYC processor
(2 x 64C/128T) running kernel release v6.3:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
CPU8 is removed from index0 (L1i) but remains in the shared_cpu_list of
index1 (L1d) and index2 (L2). Since L1i, L1d, and L2 are shared by the
SMT siblings, and they have the same cache instance ID, CPU 2 is only
removed from the first index with matching ID which is index1 (L1i) in
this case. With this fix, the results are as expected when performing
the same experiment on the same system:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
When rebuilding topology, the same problem appears as
cache_shared_cpu_map_setup() implements a similar logic. Consider the
same 3rd Generation EPYC processor: CPUs in Core 1, that share the L1
and L2 caches, have L1 and L2 instance ID as 1. For all the CPUs on
the second chiplet, the L3 ID is also 1 leading to grouping on CPUs from
Core 1 (1, 17) and the entire second chiplet (8-15, 24-31) as CPUs
sharing one cache domain. This went undetected since x86 processors
depended on arch specific populate_cache_leaves() method to repopulate
the shared_cpus_map when CPU came back online until kernel release
v6.3-rc5.
Fixes: 198102c9103f ("cacheinfo: Fix shared_cpu_map to handle shared caches at different levels")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-2-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:14 +05:30
/*
* Comparing cache IDs only makes sense if the leaves
* belong to the same cache level of same type . Skip
* the check if level and type do not match .
*/
if ( sib_leaf - > level ! = this_leaf - > level | |
sib_leaf - > type ! = this_leaf - > type )
continue ;
2023-01-17 10:51:33 +00:00
if ( cache_leaves_are_shared ( this_leaf , sib_leaf ) ) {
cpumask_set_cpu ( cpu , & sib_leaf - > shared_cpu_map ) ;
cpumask_set_cpu ( i , & this_leaf - > shared_cpu_map ) ;
break ;
}
2014-09-30 14:48:25 +01:00
}
}
2019-05-28 10:16:53 +08:00
/* record the maximum cache line size */
if ( this_leaf - > coherency_line_size > coherency_max_size )
coherency_max_size = this_leaf - > coherency_line_size ;
2014-09-30 14:48:25 +01:00
}
drivers: base: cacheinfo: Update cpu_map_populated during CPU Hotplug
Until commit 5c2712387d48 ("cacheinfo: Fix LLC is not exported through
sysfs"), cacheinfo called populate_cache_leaves() for CPU coming online
which let the arch specific functions handle (at least on x86)
populating the shared_cpu_map. However, with the changes in the
aforementioned commit, populate_cache_leaves() is not called when a CPU
comes online as a result of hotplug since last_level_cache_is_valid()
returns true as the cacheinfo data is not discarded. The CPU coming
online is not present in shared_cpu_map, however, it will not be added
since the cpu_cacheinfo->cpu_map_populated flag is set (it is set in
populate_cache_leaves() when cacheinfo is first populated for x86)
This can lead to inconsistencies in the shared_cpu_map when an offlined
CPU comes online again. Example below depicts the inconsistency in the
shared_cpu_list in cacheinfo when CPU8 is offlined and onlined again on
a 3rd Generation EPYC processor:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
9-15,136-143
Clear the flag when the CPU is removed from shared_cpu_map when
cache_shared_cpu_map_remove() is called during CPU hotplug. This will
allow cache_shared_cpu_map_setup() to add the CPU coming back online in
the shared_cpu_map. Set the flag again when the shared_cpu_map is setup.
Following are results of performing the same test as described above with
the changes:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
8,136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
8-15,136-143
Fixes: 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-3-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:15 +05:30
/* shared_cpu_map is now populated for the cpu */
this_cpu_ci - > cpu_map_populated = true ;
2014-09-30 14:48:25 +01:00
return 0 ;
}
static void cache_shared_cpu_map_remove ( unsigned int cpu )
{
drivers: base: cacheinfo: Update cpu_map_populated during CPU Hotplug
Until commit 5c2712387d48 ("cacheinfo: Fix LLC is not exported through
sysfs"), cacheinfo called populate_cache_leaves() for CPU coming online
which let the arch specific functions handle (at least on x86)
populating the shared_cpu_map. However, with the changes in the
aforementioned commit, populate_cache_leaves() is not called when a CPU
comes online as a result of hotplug since last_level_cache_is_valid()
returns true as the cacheinfo data is not discarded. The CPU coming
online is not present in shared_cpu_map, however, it will not be added
since the cpu_cacheinfo->cpu_map_populated flag is set (it is set in
populate_cache_leaves() when cacheinfo is first populated for x86)
This can lead to inconsistencies in the shared_cpu_map when an offlined
CPU comes online again. Example below depicts the inconsistency in the
shared_cpu_list in cacheinfo when CPU8 is offlined and onlined again on
a 3rd Generation EPYC processor:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
9-15,136-143
Clear the flag when the CPU is removed from shared_cpu_map when
cache_shared_cpu_map_remove() is called during CPU hotplug. This will
allow cache_shared_cpu_map_setup() to add the CPU coming back online in
the shared_cpu_map. Set the flag again when the shared_cpu_map is setup.
Following are results of performing the same test as described above with
the changes:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
8,136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
8-15,136-143
Fixes: 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-3-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:15 +05:30
struct cpu_cacheinfo * this_cpu_ci = get_cpu_cacheinfo ( cpu ) ;
2014-09-30 14:48:25 +01:00
struct cacheinfo * this_leaf , * sib_leaf ;
2023-01-17 10:51:33 +00:00
unsigned int sibling , index , sib_index ;
2014-09-30 14:48:25 +01:00
for ( index = 0 ; index < cache_leaves ( cpu ) ; index + + ) {
2022-07-04 11:15:47 +01:00
this_leaf = per_cpu_cacheinfo_idx ( cpu , index ) ;
2014-09-30 14:48:25 +01:00
for_each_cpu ( sibling , & this_leaf - > shared_cpu_map ) {
2022-07-04 11:15:52 +01:00
struct cpu_cacheinfo * sib_cpu_ci =
get_cpu_cacheinfo ( sibling ) ;
2015-08-08 10:46:02 +02:00
2022-07-04 11:15:52 +01:00
if ( sibling = = cpu | | ! sib_cpu_ci - > info_list )
continue ; /* skip if itself or no cacheinfo */
2015-08-08 10:46:02 +02:00
2023-01-17 10:51:33 +00:00
for ( sib_index = 0 ; sib_index < cache_leaves ( sibling ) ; sib_index + + ) {
sib_leaf = per_cpu_cacheinfo_idx ( sibling , sib_index ) ;
drivers: base: cacheinfo: Fix shared_cpu_map changes in event of CPU hotplug
While building the shared_cpu_map, check if the cache level and cache
type matches. On certain systems that build the cache topology based on
the instance ID, there are cases where the same ID may repeat across
multiple cache levels, leading inaccurate topology.
In event of CPU offlining, the cache_shared_cpu_map_remove() does not
consider if IDs at same level are being compared. As a result, when same
IDs repeat across different cache levels, the CPU going offline is not
removed from all the shared_cpu_map.
Below is the output of cache topology of CPU8 and it's SMT sibling after
CPU8 is offlined on a dual socket 3rd Generation AMD EPYC processor
(2 x 64C/128T) running kernel release v6.3:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
CPU8 is removed from index0 (L1i) but remains in the shared_cpu_list of
index1 (L1d) and index2 (L2). Since L1i, L1d, and L2 are shared by the
SMT siblings, and they have the same cache instance ID, CPU 2 is only
removed from the first index with matching ID which is index1 (L1i) in
this case. With this fix, the results are as expected when performing
the same experiment on the same system:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 136
/sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143
When rebuilding topology, the same problem appears as
cache_shared_cpu_map_setup() implements a similar logic. Consider the
same 3rd Generation EPYC processor: CPUs in Core 1, that share the L1
and L2 caches, have L1 and L2 instance ID as 1. For all the CPUs on
the second chiplet, the L3 ID is also 1 leading to grouping on CPUs from
Core 1 (1, 17) and the entire second chiplet (8-15, 24-31) as CPUs
sharing one cache domain. This went undetected since x86 processors
depended on arch specific populate_cache_leaves() method to repopulate
the shared_cpus_map when CPU came back online until kernel release
v6.3-rc5.
Fixes: 198102c9103f ("cacheinfo: Fix shared_cpu_map to handle shared caches at different levels")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-2-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:14 +05:30
/*
* Comparing cache IDs only makes sense if the leaves
* belong to the same cache level of same type . Skip
* the check if level and type do not match .
*/
if ( sib_leaf - > level ! = this_leaf - > level | |
sib_leaf - > type ! = this_leaf - > type )
continue ;
2023-01-17 10:51:33 +00:00
if ( cache_leaves_are_shared ( this_leaf , sib_leaf ) ) {
cpumask_clear_cpu ( cpu , & sib_leaf - > shared_cpu_map ) ;
cpumask_clear_cpu ( sibling , & this_leaf - > shared_cpu_map ) ;
break ;
}
}
2014-09-30 14:48:25 +01:00
}
}
drivers: base: cacheinfo: Update cpu_map_populated during CPU Hotplug
Until commit 5c2712387d48 ("cacheinfo: Fix LLC is not exported through
sysfs"), cacheinfo called populate_cache_leaves() for CPU coming online
which let the arch specific functions handle (at least on x86)
populating the shared_cpu_map. However, with the changes in the
aforementioned commit, populate_cache_leaves() is not called when a CPU
comes online as a result of hotplug since last_level_cache_is_valid()
returns true as the cacheinfo data is not discarded. The CPU coming
online is not present in shared_cpu_map, however, it will not be added
since the cpu_cacheinfo->cpu_map_populated flag is set (it is set in
populate_cache_leaves() when cacheinfo is first populated for x86)
This can lead to inconsistencies in the shared_cpu_map when an offlined
CPU comes online again. Example below depicts the inconsistency in the
shared_cpu_list in cacheinfo when CPU8 is offlined and onlined again on
a 3rd Generation EPYC processor:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
9-15,136-143
Clear the flag when the CPU is removed from shared_cpu_map when
cache_shared_cpu_map_remove() is called during CPU hotplug. This will
allow cache_shared_cpu_map_setup() to add the CPU coming back online in
the shared_cpu_map. Set the flag again when the shared_cpu_map is setup.
Following are results of performing the same test as described above with
the changes:
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# echo 0 > /sys/devices/system/cpu/cpu8/online
# echo 1 > /sys/devices/system/cpu/cpu8/online
# for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done
/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136
/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143
# cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list
8,136
# cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list
8-15,136-143
Fixes: 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230508084115.1157-3-kprateek.nayak@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2023-05-08 14:11:15 +05:30
/* cpu is no longer populated in the shared map */
this_cpu_ci - > cpu_map_populated = false ;
2014-09-30 14:48:25 +01:00
}
static void free_cache_attributes ( unsigned int cpu )
{
2015-08-08 10:46:02 +02:00
if ( ! per_cpu_cacheinfo ( cpu ) )
return ;
2014-09-30 14:48:25 +01:00
cache_shared_cpu_map_remove ( cpu ) ;
}
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
int __weak early_cache_level ( unsigned int cpu )
{
return - ENOENT ;
}
2014-09-30 14:48:25 +01:00
int __weak init_cache_level ( unsigned int cpu )
{
return - ENOENT ;
}
int __weak populate_cache_leaves ( unsigned int cpu )
{
return - ENOENT ;
}
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
static inline
int allocate_cache_info ( int cpu )
{
per_cpu_cacheinfo ( cpu ) = kcalloc ( cache_leaves ( cpu ) ,
sizeof ( struct cacheinfo ) , GFP_ATOMIC ) ;
if ( ! per_cpu_cacheinfo ( cpu ) ) {
cache_leaves ( cpu ) = 0 ;
return - ENOMEM ;
}
return 0 ;
}
int fetch_cache_info ( unsigned int cpu )
{
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
struct cpu_cacheinfo * this_cpu_ci = get_cpu_cacheinfo ( cpu ) ;
2023-01-24 16:40:46 +01:00
unsigned int levels = 0 , split_levels = 0 ;
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
int ret ;
if ( acpi_disabled ) {
ret = init_of_cache_level ( cpu ) ;
} else {
ret = acpi_get_cache_info ( cpu , & levels , & split_levels ) ;
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
if ( ! ret ) {
this_cpu_ci - > num_levels = levels ;
/*
* This assumes that :
* - there cannot be any split caches ( data / instruction )
* above a unified cache
* - data / instruction caches come by pair
*/
this_cpu_ci - > num_leaves = levels + split_levels ;
}
}
if ( ret | | ! cache_leaves ( cpu ) ) {
ret = early_cache_level ( cpu ) ;
if ( ret )
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
return ret ;
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
if ( ! cache_leaves ( cpu ) )
return - ENOENT ;
this_cpu_ci - > early_ci_levels = true ;
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
}
return allocate_cache_info ( cpu ) ;
}
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
static inline int init_level_allocate_ci ( unsigned int cpu )
2014-09-30 14:48:25 +01:00
{
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
unsigned int early_leaves = cache_leaves ( cpu ) ;
2014-09-30 14:48:25 +01:00
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
/* Since early initialization/allocation of the cacheinfo is allowed
* via fetch_cache_info ( ) and this also gets called as CPU hotplug
* callbacks via cacheinfo_cpu_online , the init / alloc can be skipped
* as it will happen only once ( the cacheinfo memory is never freed ) .
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
* Just populate the cacheinfo . However , if the cacheinfo has been
* allocated early through the arch - specific early_cache_level ( ) call ,
* there is a chance the info is wrong ( this can happen on arm64 ) . In
* that case , call init_cache_level ( ) anyway to give the arch - specific
* code a chance to make things right .
2022-07-04 11:15:50 +01:00
*/
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
if ( per_cpu_cacheinfo ( cpu ) & & ! ci_cacheinfo ( cpu ) - > early_ci_levels )
return 0 ;
2022-07-04 11:15:50 +01:00
2015-05-27 11:26:13 +01:00
if ( init_cache_level ( cpu ) | | ! cache_leaves ( cpu ) )
2014-09-30 14:48:25 +01:00
return - ENOENT ;
cacheinfo: Add arch specific early level initializer
This patch gives architecture specific code the ability to initialize
the cache level and allocate cacheinfo memory early, when cache level
initialization runs on the primary CPU for all possible CPUs.
This is part of a patch series that attempts to further the work in
commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU").
Previously, in the absence of any DT/ACPI cache info, architecture
specific cache detection and info allocation for secondary CPUs would
happen in non-preemptible context during early CPU initialization and
trigger a "BUG: sleeping function called from invalid context" splat on
an RT kernel.
More specifically, this patch adds the early_cache_level() function,
which is called by fetch_cache_info() as a fallback when the number of
cache leaves cannot be extracted from DT/ACPI. In the default generic
(weak) implementation, this new function returns -ENOENT, which
preserves the original behavior for architectures that do not implement
the function.
Since early detection can get the number of cache leaves wrong in some
cases*, additional logic is added to still call init_cache_level() later
on the secondary CPU, therefore giving the architecture specific code an
opportunity to go back and fix the initial guess. Again, the original
behavior is preserved for architectures that do not implement the new
function.
* For example, on arm64, CLIDR_EL1 detection works only when it runs on
the current CPU. In other words, a CPU cannot detect the cache depth
for any other CPU than itself.
Signed-off-by: Radu Rendec <rrendec@redhat.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20230412185759.755408-2-rrendec@redhat.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-04-12 14:57:57 -04:00
/*
* Now that we have properly initialized the cache level info , make
* sure we don ' t try to do that again the next time we are called
* ( e . g . as CPU hotplug callbacks ) .
*/
ci_cacheinfo ( cpu ) - > early_ci_levels = false ;
if ( cache_leaves ( cpu ) < = early_leaves )
return 0 ;
kfree ( per_cpu_cacheinfo ( cpu ) ) ;
return allocate_cache_info ( cpu ) ;
}
int detect_cache_attributes ( unsigned int cpu )
{
int ret ;
ret = init_level_allocate_ci ( cpu ) ;
arch_topology: Build cacheinfo from primary CPU
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
2023-01-04 19:30:29 +01:00
if ( ret )
return ret ;
2014-09-30 14:48:25 +01:00
2018-05-11 18:57:57 -05:00
/*
2023-03-28 19:49:15 +08:00
* If LLC is valid the cache leaves were already populated so just go to
* update the cpu map .
2018-05-11 18:57:57 -05:00
*/
2023-03-28 19:49:15 +08:00
if ( ! last_level_cache_is_valid ( cpu ) ) {
/*
* populate_cache_leaves ( ) may completely setup the cache leaves and
* shared_cpu_map or it may leave it partially setup .
*/
ret = populate_cache_leaves ( cpu ) ;
if ( ret )
goto free_ci ;
}
2022-07-04 11:15:50 +01:00
2014-09-30 14:48:25 +01:00
/*
2018-05-11 18:57:58 -05:00
* For systems using DT for cache hierarchy , fw_token
* and shared_cpu_map will be set up here only if they are
* not populated already
2014-09-30 14:48:25 +01:00
*/
ret = cache_shared_cpu_map_setup ( cpu ) ;
2015-03-17 17:28:46 +00:00
if ( ret ) {
2016-10-28 09:45:29 +01:00
pr_warn ( " Unable to detect cache hierarchy for CPU %d \n " , cpu ) ;
2014-09-30 14:48:25 +01:00
goto free_ci ;
2015-03-17 17:28:46 +00:00
}
2016-10-28 09:45:31 +01:00
2014-09-30 14:48:25 +01:00
return 0 ;
free_ci :
free_cache_attributes ( cpu ) ;
return ret ;
}
/* pointer to cpuX/cache device */
static DEFINE_PER_CPU ( struct device * , ci_cache_dev ) ;
# define per_cpu_cache_dev(cpu) (per_cpu(ci_cache_dev, cpu))
static cpumask_t cache_dev_map ;
/* pointer to array of devices for cpuX/cache/indexY */
static DEFINE_PER_CPU ( struct device * * , ci_index_dev ) ;
# define per_cpu_index_dev(cpu) (per_cpu(ci_index_dev, cpu))
# define per_cache_index_dev(cpu, idx) ((per_cpu_index_dev(cpu))[idx])
# define show_one(file_name, object) \
static ssize_t file_name # # _show ( struct device * dev , \
struct device_attribute * attr , char * buf ) \
{ \
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ; \
2020-09-16 13:40:42 -07:00
return sysfs_emit ( buf , " %u \n " , this_leaf - > object ) ; \
2014-09-30 14:48:25 +01:00
}
2016-10-22 06:19:49 -07:00
show_one ( id , id ) ;
2014-09-30 14:48:25 +01:00
show_one ( level , level ) ;
show_one ( coherency_line_size , coherency_line_size ) ;
show_one ( number_of_sets , number_of_sets ) ;
show_one ( physical_line_partition , physical_line_partition ) ;
show_one ( ways_of_associativity , ways_of_associativity ) ;
static ssize_t size_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
{
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ;
drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions
Convert the various sprintf fmaily calls in sysfs device show functions
to sysfs_emit and sysfs_emit_at for PAGE_SIZE buffer safety.
Done with:
$ spatch -sp-file sysfs_emit_dev.cocci --in-place --max-width=80 .
And cocci script:
$ cat sysfs_emit_dev.cocci
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- strcpy(buf, chr);
+ sysfs_emit(buf, chr);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
- len += scnprintf(buf + len, PAGE_SIZE - len,
+ len += sysfs_emit_at(buf, len,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
...
- strcpy(buf, chr);
- return strlen(buf);
+ return sysfs_emit(buf, chr);
}
Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/3d033c33056d88bbe34d4ddb62afd05ee166ab9a.1600285923.git.joe@perches.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-09-16 13:40:39 -07:00
return sysfs_emit ( buf , " %uK \n " , this_leaf - > size > > 10 ) ;
2014-09-30 14:48:25 +01:00
}
2020-09-16 13:40:44 -07:00
static ssize_t shared_cpu_map_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
2014-09-30 14:48:25 +01:00
{
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ;
const struct cpumask * mask = & this_leaf - > shared_cpu_map ;
2020-09-16 13:40:44 -07:00
return sysfs_emit ( buf , " %*pb \n " , nr_cpu_ids , mask ) ;
2014-09-30 14:48:25 +01:00
}
static ssize_t shared_cpu_list_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
{
2020-09-16 13:40:44 -07:00
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ;
const struct cpumask * mask = & this_leaf - > shared_cpu_map ;
return sysfs_emit ( buf , " %*pbl \n " , nr_cpu_ids , mask ) ;
2014-09-30 14:48:25 +01:00
}
static ssize_t type_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
{
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ;
2020-09-16 13:40:40 -07:00
const char * output ;
2014-09-30 14:48:25 +01:00
switch ( this_leaf - > type ) {
case CACHE_TYPE_DATA :
2020-09-16 13:40:40 -07:00
output = " Data " ;
break ;
2014-09-30 14:48:25 +01:00
case CACHE_TYPE_INST :
2020-09-16 13:40:40 -07:00
output = " Instruction " ;
break ;
2014-09-30 14:48:25 +01:00
case CACHE_TYPE_UNIFIED :
2020-09-16 13:40:40 -07:00
output = " Unified " ;
break ;
2014-09-30 14:48:25 +01:00
default :
return - EINVAL ;
}
2020-09-16 13:40:40 -07:00
return sysfs_emit ( buf , " %s \n " , output ) ;
2014-09-30 14:48:25 +01:00
}
static ssize_t allocation_policy_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
{
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ;
unsigned int ci_attr = this_leaf - > attributes ;
2020-09-16 13:40:40 -07:00
const char * output ;
2014-09-30 14:48:25 +01:00
if ( ( ci_attr & CACHE_READ_ALLOCATE ) & & ( ci_attr & CACHE_WRITE_ALLOCATE ) )
2020-09-16 13:40:40 -07:00
output = " ReadWriteAllocate " ;
2014-09-30 14:48:25 +01:00
else if ( ci_attr & CACHE_READ_ALLOCATE )
2020-09-16 13:40:40 -07:00
output = " ReadAllocate " ;
2014-09-30 14:48:25 +01:00
else if ( ci_attr & CACHE_WRITE_ALLOCATE )
2020-09-16 13:40:40 -07:00
output = " WriteAllocate " ;
else
return 0 ;
return sysfs_emit ( buf , " %s \n " , output ) ;
2014-09-30 14:48:25 +01:00
}
static ssize_t write_policy_show ( struct device * dev ,
struct device_attribute * attr , char * buf )
{
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ;
unsigned int ci_attr = this_leaf - > attributes ;
int n = 0 ;
if ( ci_attr & CACHE_WRITE_THROUGH )
drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions
Convert the various sprintf fmaily calls in sysfs device show functions
to sysfs_emit and sysfs_emit_at for PAGE_SIZE buffer safety.
Done with:
$ spatch -sp-file sysfs_emit_dev.cocci --in-place --max-width=80 .
And cocci script:
$ cat sysfs_emit_dev.cocci
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- strcpy(buf, chr);
+ sysfs_emit(buf, chr);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
- len += scnprintf(buf + len, PAGE_SIZE - len,
+ len += sysfs_emit_at(buf, len,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
...
- strcpy(buf, chr);
- return strlen(buf);
+ return sysfs_emit(buf, chr);
}
Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/3d033c33056d88bbe34d4ddb62afd05ee166ab9a.1600285923.git.joe@perches.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-09-16 13:40:39 -07:00
n = sysfs_emit ( buf , " WriteThrough \n " ) ;
2014-09-30 14:48:25 +01:00
else if ( ci_attr & CACHE_WRITE_BACK )
drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions
Convert the various sprintf fmaily calls in sysfs device show functions
to sysfs_emit and sysfs_emit_at for PAGE_SIZE buffer safety.
Done with:
$ spatch -sp-file sysfs_emit_dev.cocci --in-place --max-width=80 .
And cocci script:
$ cat sysfs_emit_dev.cocci
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- strcpy(buf, chr);
+ sysfs_emit(buf, chr);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
- len += scnprintf(buf + len, PAGE_SIZE - len,
+ len += sysfs_emit_at(buf, len,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
...
- strcpy(buf, chr);
- return strlen(buf);
+ return sysfs_emit(buf, chr);
}
Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/3d033c33056d88bbe34d4ddb62afd05ee166ab9a.1600285923.git.joe@perches.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-09-16 13:40:39 -07:00
n = sysfs_emit ( buf , " WriteBack \n " ) ;
2014-09-30 14:48:25 +01:00
return n ;
}
2016-10-22 06:19:49 -07:00
static DEVICE_ATTR_RO ( id ) ;
2014-09-30 14:48:25 +01:00
static DEVICE_ATTR_RO ( level ) ;
static DEVICE_ATTR_RO ( type ) ;
static DEVICE_ATTR_RO ( coherency_line_size ) ;
static DEVICE_ATTR_RO ( ways_of_associativity ) ;
static DEVICE_ATTR_RO ( number_of_sets ) ;
static DEVICE_ATTR_RO ( size ) ;
static DEVICE_ATTR_RO ( allocation_policy ) ;
static DEVICE_ATTR_RO ( write_policy ) ;
static DEVICE_ATTR_RO ( shared_cpu_map ) ;
static DEVICE_ATTR_RO ( shared_cpu_list ) ;
static DEVICE_ATTR_RO ( physical_line_partition ) ;
static struct attribute * cache_default_attrs [ ] = {
2016-10-22 06:19:49 -07:00
& dev_attr_id . attr ,
2014-09-30 14:48:25 +01:00
& dev_attr_type . attr ,
& dev_attr_level . attr ,
& dev_attr_shared_cpu_map . attr ,
& dev_attr_shared_cpu_list . attr ,
& dev_attr_coherency_line_size . attr ,
& dev_attr_ways_of_associativity . attr ,
& dev_attr_number_of_sets . attr ,
& dev_attr_size . attr ,
& dev_attr_allocation_policy . attr ,
& dev_attr_write_policy . attr ,
& dev_attr_physical_line_partition . attr ,
NULL
} ;
static umode_t
cache_default_attrs_is_visible ( struct kobject * kobj ,
struct attribute * attr , int unused )
{
struct device * dev = kobj_to_dev ( kobj ) ;
struct cacheinfo * this_leaf = dev_get_drvdata ( dev ) ;
const struct cpumask * mask = & this_leaf - > shared_cpu_map ;
umode_t mode = attr - > mode ;
2016-10-22 06:19:49 -07:00
if ( ( attr = = & dev_attr_id . attr ) & & ( this_leaf - > attributes & CACHE_ID ) )
return mode ;
2014-09-30 14:48:25 +01:00
if ( ( attr = = & dev_attr_type . attr ) & & this_leaf - > type )
return mode ;
if ( ( attr = = & dev_attr_level . attr ) & & this_leaf - > level )
return mode ;
if ( ( attr = = & dev_attr_shared_cpu_map . attr ) & & ! cpumask_empty ( mask ) )
return mode ;
if ( ( attr = = & dev_attr_shared_cpu_list . attr ) & & ! cpumask_empty ( mask ) )
return mode ;
if ( ( attr = = & dev_attr_coherency_line_size . attr ) & &
this_leaf - > coherency_line_size )
return mode ;
if ( ( attr = = & dev_attr_ways_of_associativity . attr ) & &
this_leaf - > size ) /* allow 0 = full associativity */
return mode ;
if ( ( attr = = & dev_attr_number_of_sets . attr ) & &
this_leaf - > number_of_sets )
return mode ;
if ( ( attr = = & dev_attr_size . attr ) & & this_leaf - > size )
return mode ;
if ( ( attr = = & dev_attr_write_policy . attr ) & &
( this_leaf - > attributes & CACHE_WRITE_POLICY_MASK ) )
return mode ;
if ( ( attr = = & dev_attr_allocation_policy . attr ) & &
( this_leaf - > attributes & CACHE_ALLOCATE_POLICY_MASK ) )
return mode ;
if ( ( attr = = & dev_attr_physical_line_partition . attr ) & &
this_leaf - > physical_line_partition )
return mode ;
return 0 ;
}
static const struct attribute_group cache_default_group = {
. attrs = cache_default_attrs ,
. is_visible = cache_default_attrs_is_visible ,
} ;
static const struct attribute_group * cache_default_groups [ ] = {
& cache_default_group ,
NULL ,
} ;
static const struct attribute_group * cache_private_groups [ ] = {
& cache_default_group ,
NULL , /* Place holder for private group */
NULL ,
} ;
const struct attribute_group *
__weak cache_get_priv_group ( struct cacheinfo * this_leaf )
{
return NULL ;
}
static const struct attribute_group * *
cache_get_attribute_groups ( struct cacheinfo * this_leaf )
{
const struct attribute_group * priv_group =
cache_get_priv_group ( this_leaf ) ;
if ( ! priv_group )
return cache_default_groups ;
if ( ! cache_private_groups [ 1 ] )
cache_private_groups [ 1 ] = priv_group ;
return cache_private_groups ;
}
/* Add/Remove cache interface for CPU device */
static void cpu_cache_sysfs_exit ( unsigned int cpu )
{
int i ;
struct device * ci_dev ;
if ( per_cpu_index_dev ( cpu ) ) {
for ( i = 0 ; i < cache_leaves ( cpu ) ; i + + ) {
ci_dev = per_cache_index_dev ( cpu , i ) ;
if ( ! ci_dev )
continue ;
device_unregister ( ci_dev ) ;
}
kfree ( per_cpu_index_dev ( cpu ) ) ;
per_cpu_index_dev ( cpu ) = NULL ;
}
device_unregister ( per_cpu_cache_dev ( cpu ) ) ;
per_cpu_cache_dev ( cpu ) = NULL ;
}
static int cpu_cache_sysfs_init ( unsigned int cpu )
{
struct device * dev = get_cpu_device ( cpu ) ;
if ( per_cpu_cacheinfo ( cpu ) = = NULL )
return - ENOENT ;
per_cpu_cache_dev ( cpu ) = cpu_device_create ( dev , NULL , NULL , " cache " ) ;
if ( IS_ERR ( per_cpu_cache_dev ( cpu ) ) )
return PTR_ERR ( per_cpu_cache_dev ( cpu ) ) ;
/* Allocate all required memory */
per_cpu_index_dev ( cpu ) = kcalloc ( cache_leaves ( cpu ) ,
sizeof ( struct device * ) , GFP_KERNEL ) ;
if ( unlikely ( per_cpu_index_dev ( cpu ) = = NULL ) )
goto err_out ;
return 0 ;
err_out :
cpu_cache_sysfs_exit ( cpu ) ;
return - ENOMEM ;
}
static int cache_add_dev ( unsigned int cpu )
{
unsigned int i ;
int rc ;
struct device * ci_dev , * parent ;
struct cacheinfo * this_leaf ;
const struct attribute_group * * cache_groups ;
rc = cpu_cache_sysfs_init ( cpu ) ;
if ( unlikely ( rc < 0 ) )
return rc ;
parent = per_cpu_cache_dev ( cpu ) ;
for ( i = 0 ; i < cache_leaves ( cpu ) ; i + + ) {
2022-07-04 11:15:47 +01:00
this_leaf = per_cpu_cacheinfo_idx ( cpu , i ) ;
2014-09-30 14:48:25 +01:00
if ( this_leaf - > disable_sysfs )
continue ;
2018-10-04 09:20:05 -06:00
if ( this_leaf - > type = = CACHE_TYPE_NOCACHE )
break ;
2014-09-30 14:48:25 +01:00
cache_groups = cache_get_attribute_groups ( this_leaf ) ;
ci_dev = cpu_device_create ( parent , this_leaf , cache_groups ,
" index%1u " , i ) ;
if ( IS_ERR ( ci_dev ) ) {
rc = PTR_ERR ( ci_dev ) ;
goto err ;
}
per_cache_index_dev ( cpu , i ) = ci_dev ;
}
cpumask_set_cpu ( cpu , & cache_dev_map ) ;
return 0 ;
err :
cpu_cache_sysfs_exit ( cpu ) ;
return rc ;
}
2023-10-16 13:29:55 +08:00
/*
* Calculate the size of the per - CPU data cache slice . This can be
* used to estimate the size of the data cache slice that can be used
* by one CPU under ideal circumstances . UNIFIED caches are counted
* in addition to DATA caches . So , please consider code cache usage
* when use the result .
*
* Because the cache inclusive / non - inclusive information isn ' t
* available , we just use the size of the per - CPU slice of LLC to make
* the result more predictable across architectures .
*/
static void update_per_cpu_data_slice_size_cpu ( unsigned int cpu )
{
struct cpu_cacheinfo * ci ;
struct cacheinfo * llc ;
unsigned int nr_shared ;
if ( ! last_level_cache_is_valid ( cpu ) )
return ;
ci = ci_cacheinfo ( cpu ) ;
llc = per_cpu_cacheinfo_idx ( cpu , cache_leaves ( cpu ) - 1 ) ;
if ( llc - > type ! = CACHE_TYPE_DATA & & llc - > type ! = CACHE_TYPE_UNIFIED )
return ;
nr_shared = cpumask_weight ( & llc - > shared_cpu_map ) ;
if ( nr_shared )
ci - > per_cpu_data_slice_size = llc - > size / nr_shared ;
}
static void update_per_cpu_data_slice_size ( bool cpu_online , unsigned int cpu )
{
unsigned int icpu ;
for_each_online_cpu ( icpu ) {
if ( ! cpu_online & & icpu = = cpu )
continue ;
update_per_cpu_data_slice_size_cpu ( icpu ) ;
}
}
2016-11-03 15:50:08 +01:00
static int cacheinfo_cpu_online ( unsigned int cpu )
2014-09-30 14:48:25 +01:00
{
2016-11-03 15:50:08 +01:00
int rc = detect_cache_attributes ( cpu ) ;
2014-09-30 14:48:25 +01:00
2016-11-03 15:50:08 +01:00
if ( rc )
return rc ;
rc = cache_add_dev ( cpu ) ;
if ( rc )
2023-10-16 13:29:55 +08:00
goto err ;
update_per_cpu_data_slice_size ( true , cpu ) ;
mm, pcp: reduce lock contention for draining high-order pages
In commit f26b3fa04611 ("mm/page_alloc: limit number of high-order pages
on PCP during bulk free"), the PCP (Per-CPU Pageset) will be drained when
PCP is mostly used for high-order pages freeing to improve the cache-hot
pages reusing between page allocating and freeing CPUs.
On system with small per-CPU data cache slice, pages shouldn't be cached
before draining to guarantee cache-hot. But on a system with large
per-CPU data cache slice, some pages can be cached before draining to
reduce zone lock contention.
So, in this patch, instead of draining without any caching, "pcp->batch"
pages will be cached in PCP before draining if the size of the per-CPU
data cache slice is more than "3 * batch".
In theory, if the size of per-CPU data cache slice is more than "2 *
batch", we can reuse cache-hot pages between CPUs. But considering the
other usage of cache (code, other data accessing, etc.), "3 * batch" is
used.
Note: "3 * batch" is chosen to make sure the optimization works on recent
x86_64 server CPUs. If you want to increase it, please check whether it
breaks the optimization.
On a 2-socket Intel server with 128 logical CPU, with the patch, the
network bandwidth of the UNIX (AF_UNIX) test case of lmbench test suite
with 16-pair processes increase 70.5%. The cycles% of the spinlock
contention (mostly for zone lock) decreases from 46.1% to 21.3%. The
number of PCP draining for high order pages freeing (free_high) decreases
89.9%. The cache miss rate keeps 0.2%.
Link: https://lkml.kernel.org/r/20231016053002.756205-4-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-10-16 13:29:56 +08:00
setup_pcp_cacheinfo ( ) ;
2023-10-16 13:29:55 +08:00
return 0 ;
err :
free_cache_attributes ( cpu ) ;
2016-11-03 15:50:08 +01:00
return rc ;
2014-09-30 14:48:25 +01:00
}
2016-11-03 15:50:08 +01:00
static int cacheinfo_cpu_pre_down ( unsigned int cpu )
2014-09-30 14:48:25 +01:00
{
2016-11-03 15:50:08 +01:00
if ( cpumask_test_and_clear_cpu ( cpu , & cache_dev_map ) )
cpu_cache_sysfs_exit ( cpu ) ;
free_cache_attributes ( cpu ) ;
2023-10-16 13:29:55 +08:00
update_per_cpu_data_slice_size ( false , cpu ) ;
mm, pcp: reduce lock contention for draining high-order pages
In commit f26b3fa04611 ("mm/page_alloc: limit number of high-order pages
on PCP during bulk free"), the PCP (Per-CPU Pageset) will be drained when
PCP is mostly used for high-order pages freeing to improve the cache-hot
pages reusing between page allocating and freeing CPUs.
On system with small per-CPU data cache slice, pages shouldn't be cached
before draining to guarantee cache-hot. But on a system with large
per-CPU data cache slice, some pages can be cached before draining to
reduce zone lock contention.
So, in this patch, instead of draining without any caching, "pcp->batch"
pages will be cached in PCP before draining if the size of the per-CPU
data cache slice is more than "3 * batch".
In theory, if the size of per-CPU data cache slice is more than "2 *
batch", we can reuse cache-hot pages between CPUs. But considering the
other usage of cache (code, other data accessing, etc.), "3 * batch" is
used.
Note: "3 * batch" is chosen to make sure the optimization works on recent
x86_64 server CPUs. If you want to increase it, please check whether it
breaks the optimization.
On a 2-socket Intel server with 128 logical CPU, with the patch, the
network bandwidth of the UNIX (AF_UNIX) test case of lmbench test suite
with 16-pair processes increase 70.5%. The cycles% of the spinlock
contention (mostly for zone lock) decreases from 46.1% to 21.3%. The
number of PCP draining for high order pages freeing (free_high) decreases
89.9%. The cache miss rate keeps 0.2%.
Link: https://lkml.kernel.org/r/20231016053002.756205-4-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-10-16 13:29:56 +08:00
setup_pcp_cacheinfo ( ) ;
2016-11-03 15:50:08 +01:00
return 0 ;
2014-09-30 14:48:25 +01:00
}
static int __init cacheinfo_sysfs_init ( void )
{
2019-06-24 18:36:56 +01:00
return cpuhp_setup_state ( CPUHP_AP_BASE_CACHEINFO_ONLINE ,
" base/cacheinfo:online " ,
2016-11-03 15:50:08 +01:00
cacheinfo_cpu_online , cacheinfo_cpu_pre_down ) ;
2014-09-30 14:48:25 +01:00
}
device_initcall ( cacheinfo_sysfs_init ) ;