2019-05-19 15:08:55 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2017-10-27 05:42:28 +03:00
/*
* Housekeeping management . Manage the targets for routine code that can run on
* any CPU : unbound workqueues , timers , kthreads and any offloadable work .
*
* Copyright ( C ) 2017 Red Hat , Inc . , Frederic Weisbecker
2018-02-21 07:17:26 +03:00
* Copyright ( C ) 2017 - 2018 SUSE , Frederic Weisbecker
2017-10-27 05:42:28 +03:00
*
*/
2022-02-07 18:59:06 +03:00
enum hk_flags {
HK_FLAG_TIMER = BIT ( HK_TYPE_TIMER ) ,
HK_FLAG_RCU = BIT ( HK_TYPE_RCU ) ,
HK_FLAG_MISC = BIT ( HK_TYPE_MISC ) ,
HK_FLAG_SCHED = BIT ( HK_TYPE_SCHED ) ,
HK_FLAG_TICK = BIT ( HK_TYPE_TICK ) ,
HK_FLAG_DOMAIN = BIT ( HK_TYPE_DOMAIN ) ,
HK_FLAG_WQ = BIT ( HK_TYPE_WQ ) ,
HK_FLAG_MANAGED_IRQ = BIT ( HK_TYPE_MANAGED_IRQ ) ,
HK_FLAG_KTHREAD = BIT ( HK_TYPE_KTHREAD ) ,
} ;
2018-12-03 12:05:56 +03:00
DEFINE_STATIC_KEY_FALSE ( housekeeping_overridden ) ;
EXPORT_SYMBOL_GPL ( housekeeping_overridden ) ;
2022-02-07 18:59:10 +03:00
struct housekeeping {
cpumask_var_t cpumasks [ HK_TYPE_MAX ] ;
unsigned long flags ;
} ;
static struct housekeeping housekeeping ;
2017-10-27 05:42:31 +03:00
2022-02-07 18:59:06 +03:00
bool housekeeping_enabled ( enum hk_type type )
2019-07-06 04:26:51 +03:00
{
2022-02-07 18:59:10 +03:00
return ! ! ( housekeeping . flags & BIT ( type ) ) ;
2019-07-06 04:26:51 +03:00
}
EXPORT_SYMBOL_GPL ( housekeeping_enabled ) ;
2022-02-07 18:59:06 +03:00
int housekeeping_any_cpu ( enum hk_type type )
2017-10-27 05:42:31 +03:00
{
2019-06-28 11:51:41 +03:00
int cpu ;
if ( static_branch_unlikely ( & housekeeping_overridden ) ) {
2022-02-07 18:59:10 +03:00
if ( housekeeping . flags & BIT ( type ) ) {
cpu = sched_numa_find_closest ( housekeeping . cpumasks [ type ] , smp_processor_id ( ) ) ;
2019-06-28 11:51:41 +03:00
if ( cpu < nr_cpu_ids )
return cpu ;
2024-04-11 17:39:05 +03:00
cpu = cpumask_any_and ( housekeeping . cpumasks [ type ] , cpu_online_mask ) ;
if ( likely ( cpu < nr_cpu_ids ) )
return cpu ;
/*
* Unless we have another problem this can only happen
* at boot time before start_secondary ( ) brings the 1 st
* housekeeping CPU up .
*/
WARN_ON_ONCE ( system_state = = SYSTEM_RUNNING | |
type ! = HK_TYPE_TIMER ) ;
2019-06-28 11:51:41 +03:00
}
}
2017-10-27 05:42:31 +03:00
return smp_processor_id ( ) ;
}
EXPORT_SYMBOL_GPL ( housekeeping_any_cpu ) ;
2022-02-07 18:59:06 +03:00
const struct cpumask * housekeeping_cpumask ( enum hk_type type )
2017-10-27 05:42:31 +03:00
{
2018-12-03 12:05:56 +03:00
if ( static_branch_unlikely ( & housekeeping_overridden ) )
2022-02-07 18:59:10 +03:00
if ( housekeeping . flags & BIT ( type ) )
return housekeeping . cpumasks [ type ] ;
2017-10-27 05:42:31 +03:00
return cpu_possible_mask ;
}
EXPORT_SYMBOL_GPL ( housekeeping_cpumask ) ;
2022-02-07 18:59:06 +03:00
void housekeeping_affine ( struct task_struct * t , enum hk_type type )
2017-10-27 05:42:31 +03:00
{
2018-12-03 12:05:56 +03:00
if ( static_branch_unlikely ( & housekeeping_overridden ) )
2022-02-07 18:59:10 +03:00
if ( housekeeping . flags & BIT ( type ) )
set_cpus_allowed_ptr ( t , housekeeping . cpumasks [ type ] ) ;
2017-10-27 05:42:31 +03:00
}
EXPORT_SYMBOL_GPL ( housekeeping_affine ) ;
2022-02-07 18:59:06 +03:00
bool housekeeping_test_cpu ( int cpu , enum hk_type type )
2017-10-27 05:42:31 +03:00
{
2018-12-03 12:05:56 +03:00
if ( static_branch_unlikely ( & housekeeping_overridden ) )
2022-02-07 18:59:10 +03:00
if ( housekeeping . flags & BIT ( type ) )
return cpumask_test_cpu ( cpu , housekeeping . cpumasks [ type ] ) ;
2017-10-27 05:42:31 +03:00
return true ;
}
EXPORT_SYMBOL_GPL ( housekeeping_test_cpu ) ;
2017-10-27 05:42:28 +03:00
void __init housekeeping_init ( void )
{
2022-02-07 18:59:10 +03:00
enum hk_type type ;
if ( ! housekeeping . flags )
2017-10-27 05:42:28 +03:00
return ;
2018-12-03 12:05:56 +03:00
static_branch_enable ( & housekeeping_overridden ) ;
2017-10-27 05:42:36 +03:00
2022-02-07 18:59:10 +03:00
if ( housekeeping . flags & HK_FLAG_TICK )
2018-02-21 07:17:27 +03:00
sched_tick_offload_init ( ) ;
2022-02-07 18:59:10 +03:00
for_each_set_bit ( type , & housekeeping . flags , HK_TYPE_MAX ) {
/* We need at least one CPU to handle housekeeping work */
WARN_ON_ONCE ( cpumask_empty ( housekeeping . cpumasks [ type ] ) ) ;
}
}
static void __init housekeeping_setup_type ( enum hk_type type ,
cpumask_var_t housekeeping_staging )
{
alloc_bootmem_cpumask_var ( & housekeeping . cpumasks [ type ] ) ;
cpumask_copy ( housekeeping . cpumasks [ type ] ,
housekeeping_staging ) ;
2017-10-27 05:42:36 +03:00
}
2022-02-07 18:59:10 +03:00
static int __init housekeeping_setup ( char * str , unsigned long flags )
2017-10-27 05:42:36 +03:00
{
2022-02-07 18:59:07 +03:00
cpumask_var_t non_housekeeping_mask , housekeeping_staging ;
2024-04-13 17:17:46 +03:00
unsigned int first_cpu ;
2022-02-07 18:59:08 +03:00
int err = 0 ;
2017-10-27 05:42:36 +03:00
2022-02-07 18:59:10 +03:00
if ( ( flags & HK_FLAG_TICK ) & & ! ( housekeeping . flags & HK_FLAG_TICK ) ) {
2022-02-07 18:59:09 +03:00
if ( ! IS_ENABLED ( CONFIG_NO_HZ_FULL ) ) {
pr_warn ( " Housekeeping: nohz unsupported. "
" Build with CONFIG_NO_HZ_FULL \n " ) ;
return 0 ;
}
}
2017-10-27 05:42:36 +03:00
alloc_bootmem_cpumask_var ( & non_housekeeping_mask ) ;
sched/isolation: Reconcile rcu_nocbs= and nohz_full=
We have a mismatch between RCU and isolation -- in relation to what is
considered the maximum valid CPU number.
This matters because nohz_full= and rcu_nocbs= are joined at the hip; in
fact the former will enforce the latter. So we don't want a CPU mask to
be valid for one and denied for the other.
The difference 1st appeared as of v4.15; further details are below.
As it is confusing to anyone who isn't looking at the code regularly, a
reminder is in order; three values exist here:
CONFIG_NR_CPUS - compiled in maximum cap on number of CPUs supported.
nr_cpu_ids - possible # of CPUs (typically reflects what ACPI says)
cpus_present - actual number of present/detected/installed CPUs.
For this example, I'll refer to NR_CPUS=64 from "make defconfig" and
nr_cpu_ids=6 for ACPI reporting on a board that could run a six core,
and present=4 for a quad that is physically in the socket. From dmesg:
smpboot: Allowing 6 CPUs, 2 hotplug CPUs
setup_percpu: NR_CPUS:64 nr_cpumask_bits:64 nr_cpu_ids:6 nr_node_ids:1
rcu: RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=6.
smp: Brought up 1 node, 4 CPUs
And from userspace, see:
paul@trash:/sys/devices/system/cpu$ cat present
0-3
paul@trash:/sys/devices/system/cpu$ cat possible
0-5
paul@trash:/sys/devices/system/cpu$ cat kernel_max
63
Everything is fine if we boot 5x5 for rcu/nohz:
Command line: BOOT_IMAGE=/boot/bzImage nohz_full=2-5 rcu_nocbs=2-5 root=/dev/sda1 ro
NO_HZ: Full dynticks CPUs: 2-5.
rcu: Offload RCU callbacks from CPUs: 2-5.
..even though there is no CPU 4 or 5. Both RCU and nohz_full are OK.
Now we push that > 6 but less than NR_CPU and with 15x15 we get:
Command line: BOOT_IMAGE=/boot/bzImage rcu_nocbs=2-15 nohz_full=2-15 root=/dev/sda1 ro
rcu: Note: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.
rcu: Offload RCU callbacks from CPUs: 2-5.
These are both functionally equivalent, as we are only changing flags on
phantom CPUs that don't exist, but note the kernel interpretation changes.
And worse, it only changes for one of the two - which is the problem.
RCU doesn't care if you want to restrict the flags on phantom CPUs but
clearly nohz_full does after this change from v4.15.
edb9382175c3: ("sched/isolation: Move isolcpus= handling to the housekeeping code")
- if (cpulist_parse(str, non_housekeeping_mask) < 0) {
- pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
+ err = cpulist_parse(str, non_housekeeping_mask);
+ if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) {
+ pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
To be clear, the sanity check on "possible" (nr_cpu_ids) is new here.
The goal was reasonable ; not wanting housekeeping to land on a
not-possible CPU, but note two things:
1) this is an exclusion list, not an inclusion list; we are tracking
non_housekeeping CPUs; not ones who are explicitly assigned housekeeping
2) we went one further in 9219565aa890 ("sched/isolation: Require a present CPU in housekeeping mask")
- ensuring that housekeeping was sanity checking against present and not just possible CPUs.
To be clear, this means the check added in v4.15 is doubly redundant.
And more importantly, overly strict/restrictive.
We care now, because the bitmap boot arg parsing now knows that a value
of "N" is NR_CPUS; the size of the bitmap, but the bitmap code doesn't
know anything about the subtleties of our max/possible/present CPU
specifics as outlined above.
So drop the check added in v4.15 (edb9382175c3) and make RCU and
nohz_full both in alignment again on NR_CPUS so "N" works for both,
and then they can fall back to nr_cpu_ids internally just as before.
Command line: BOOT_IMAGE=/boot/bzImage nohz_full=2-N rcu_nocbs=2-N root=/dev/sda1 ro
NO_HZ: Full dynticks CPUs: 2-5.
rcu: Offload RCU callbacks from CPUs: 2-5.
As shown above, with this change, RCU and nohz_full are in sync, even
with the use of the "N" placeholder. Same result is achieved with "15".
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20210419042659.1134916-1-paul.gortmaker@windriver.com
2021-04-19 07:26:59 +03:00
if ( cpulist_parse ( str , non_housekeeping_mask ) < 0 ) {
2017-10-27 05:42:37 +03:00
pr_warn ( " Housekeeping: nohz_full= or isolcpus= incorrect CPU range \n " ) ;
2022-02-07 18:59:08 +03:00
goto free_non_housekeeping_mask ;
2017-10-27 05:42:28 +03:00
}
2022-02-07 18:59:07 +03:00
alloc_bootmem_cpumask_var ( & housekeeping_staging ) ;
cpumask_andnot ( housekeeping_staging ,
cpu_possible_mask , non_housekeeping_mask ) ;
2019-04-11 06:34:47 +03:00
2024-04-13 17:17:46 +03:00
first_cpu = cpumask_first_and ( cpu_present_mask , housekeeping_staging ) ;
if ( first_cpu > = nr_cpu_ids | | first_cpu > = setup_max_cpus ) {
2022-02-07 18:59:07 +03:00
__cpumask_set_cpu ( smp_processor_id ( ) , housekeeping_staging ) ;
__cpumask_clear_cpu ( smp_processor_id ( ) , non_housekeeping_mask ) ;
2022-02-07 18:59:10 +03:00
if ( ! housekeeping . flags ) {
2019-04-11 06:34:47 +03:00
pr_warn ( " Housekeeping: must include one present CPU, "
" using boot CPU:%d \n " , smp_processor_id ( ) ) ;
}
2022-02-07 18:59:07 +03:00
}
2024-04-13 17:17:46 +03:00
if ( cpumask_empty ( non_housekeeping_mask ) )
goto free_housekeeping_staging ;
2022-02-07 18:59:10 +03:00
if ( ! housekeeping . flags ) {
/* First setup call ("nohz_full=" or "isolcpus=") */
enum hk_type type ;
for_each_set_bit ( type , & flags , HK_TYPE_MAX )
housekeeping_setup_type ( type , housekeeping_staging ) ;
2017-10-27 05:42:37 +03:00
} else {
2022-02-07 18:59:10 +03:00
/* Second setup call ("nohz_full=" after "isolcpus=" or the reverse) */
enum hk_type type ;
unsigned long iter_flags = flags & housekeeping . flags ;
for_each_set_bit ( type , & iter_flags , HK_TYPE_MAX ) {
if ( ! cpumask_equal ( housekeeping_staging ,
housekeeping . cpumasks [ type ] ) ) {
pr_warn ( " Housekeeping: nohz_full= must match isolcpus= \n " ) ;
goto free_housekeeping_staging ;
}
2017-10-27 05:42:37 +03:00
}
2022-02-07 18:59:10 +03:00
iter_flags = flags & ~ housekeeping . flags ;
for_each_set_bit ( type , & iter_flags , HK_TYPE_MAX )
housekeeping_setup_type ( type , housekeeping_staging ) ;
2017-10-27 05:42:37 +03:00
}
2022-02-07 18:59:07 +03:00
2022-02-07 18:59:10 +03:00
if ( ( flags & HK_FLAG_TICK ) & & ! ( housekeeping . flags & HK_FLAG_TICK ) )
2022-02-07 18:59:09 +03:00
tick_nohz_full_setup ( non_housekeeping_mask ) ;
2017-10-27 05:42:32 +03:00
2022-02-07 18:59:10 +03:00
housekeeping . flags | = flags ;
2022-02-07 18:59:08 +03:00
err = 1 ;
2017-10-27 05:42:36 +03:00
2022-02-07 18:59:08 +03:00
free_housekeeping_staging :
free_bootmem_cpumask_var ( housekeeping_staging ) ;
free_non_housekeeping_mask :
2017-10-27 05:42:36 +03:00
free_bootmem_cpumask_var ( non_housekeeping_mask ) ;
2022-02-07 18:59:08 +03:00
return err ;
2017-10-27 05:42:28 +03:00
}
2017-10-27 05:42:37 +03:00
static int __init housekeeping_nohz_full_setup ( char * str )
{
2022-02-07 18:59:10 +03:00
unsigned long flags ;
2017-10-27 05:42:37 +03:00
2020-05-27 17:29:09 +03:00
flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU |
HK_FLAG_MISC | HK_FLAG_KTHREAD ;
2017-10-27 05:42:37 +03:00
return housekeeping_setup ( str , flags ) ;
}
2017-10-27 05:42:36 +03:00
__setup ( " nohz_full= " , housekeeping_nohz_full_setup ) ;
2017-10-27 05:42:37 +03:00
static int __init housekeeping_isolcpus_setup ( char * str )
{
2022-02-07 18:59:10 +03:00
unsigned long flags = 0 ;
2020-04-04 01:35:17 +03:00
bool illegal = false ;
char * par ;
int len ;
2017-10-27 05:42:38 +03:00
while ( isalpha ( * str ) ) {
if ( ! strncmp ( str , " nohz, " , 5 ) ) {
str + = 5 ;
flags | = HK_FLAG_TICK ;
continue ;
}
if ( ! strncmp ( str , " domain, " , 7 ) ) {
str + = 7 ;
flags | = HK_FLAG_DOMAIN ;
continue ;
}
2020-01-20 12:16:25 +03:00
if ( ! strncmp ( str , " managed_irq, " , 12 ) ) {
str + = 12 ;
flags | = HK_FLAG_MANAGED_IRQ ;
continue ;
}
2020-04-04 01:35:17 +03:00
/*
* Skip unknown sub - parameter and validate that it is not
* containing an invalid character .
*/
for ( par = str , len = 0 ; * str & & * str ! = ' , ' ; str + + , len + + ) {
if ( ! isalpha ( * str ) & & * str ! = ' _ ' )
illegal = true ;
}
if ( illegal ) {
pr_warn ( " isolcpus: Invalid flag %.*s \n " , len , par ) ;
return 0 ;
}
pr_info ( " isolcpus: Skipped unknown flag %.*s \n " , len , par ) ;
str + + ;
2017-10-27 05:42:38 +03:00
}
/* Default behaviour for isolcpus without flags */
if ( ! flags )
flags | = HK_FLAG_DOMAIN ;
return housekeeping_setup ( str , flags ) ;
2017-10-27 05:42:37 +03:00
}
__setup ( " isolcpus= " , housekeeping_isolcpus_setup ) ;