Merge branches 'doc.2021.11.30c', 'exp.2021.12.07a', 'fastnohz.2021.11.30c', 'fixes.2021.11.30c', 'nocb.2021.12.09a', 'nolibc.2021.11.30c', 'tasks.2021.12.09a', 'torture.2021.12.07a' and 'torturescript.2021.11.30c' into HEAD

doc.2021.11.30c: Documentation updates.
exp.2021.12.07a: Expedited-grace-period fixes.
fastnohz.2021.11.30c: Remove CONFIG_RCU_FAST_NO_HZ.
fixes.2021.11.30c: Miscellaneous fixes.
nocb.2021.12.09a: No-CB CPU updates.
nolibc.2021.11.30c: Tiny in-kernel library updates.
tasks.2021.12.09a: RCU-tasks updates, including update-side scalability.
torture.2021.12.07a: Torture-test in-kernel module updates.
torturescript.2021.11.30c: Torture-test scripting updates.
This commit is contained in:
Paul E. McKenney
2021-12-09 11:38:09 -08:00
51 changed files with 1088 additions and 718 deletions

View File

@ -16,7 +16,7 @@
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
{
/*
* In order to read the offloaded state of an rdp is a safe
* In order to read the offloaded state of an rdp in a safe
* and stable way and prevent from its value to be changed
* under us, we must either hold the barrier mutex, the cpu
* hotplug lock (read or write) or the nocb lock. Local
@ -51,12 +51,10 @@ static void __init rcu_bootup_announce_oddness(void)
RCU_FANOUT);
if (rcu_fanout_exact)
pr_info("\tHierarchical RCU autobalancing is disabled.\n");
if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n");
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n");
if (RCU_NUM_LVLS >= 4)
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
if (RCU_FANOUT_LEAF != 16)
@ -88,13 +86,13 @@ static void __init rcu_bootup_announce_oddness(void)
if (rcu_kick_kthreads)
pr_info("\tKick kthreads if too-long grace period.\n");
if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
pr_info("\tRCU callback double-/use-after-free debug is enabled.\n");
if (gp_preinit_delay)
pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
if (gp_init_delay)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
if (gp_cleanup_delay)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay);
if (!use_softirq)
pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
@ -260,10 +258,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* no need to check for a subsequent expedited GP. (Though we are
* still in a quiescent state in any case.)
*/
if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
else
WARN_ON_ONCE(rdp->exp_deferred_qs);
WARN_ON_ONCE(rdp->cpu_no_qs.b.exp);
}
/*
@ -277,12 +275,16 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* current task, there might be any number of other tasks blocked while
* in an RCU read-side critical section.
*
* Unlike non-preemptible-RCU, quiescent state reports for expedited
* grace periods are handled separately via deferred quiescent states
* and context switch events.
*
* Callers to this function must disable preemption.
*/
static void rcu_qs(void)
{
RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) {
trace_rcu_grace_period(TPS("rcu_preempt"),
__this_cpu_read(rcu_data.gp_seq),
TPS("cpuqs"));
@ -350,7 +352,7 @@ void rcu_note_context_switch(bool preempt)
* means that we continue to block the current grace period.
*/
rcu_qs();
if (rdp->exp_deferred_qs)
if (rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
rcu_tasks_qs(current, preempt);
trace_rcu_utilization(TPS("End context switch"));
@ -477,7 +479,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
special = t->rcu_read_unlock_special;
rdp = this_cpu_ptr(&rcu_data);
if (!special.s && !rdp->exp_deferred_qs) {
if (!special.s && !rdp->cpu_no_qs.b.exp) {
local_irq_restore(flags);
return;
}
@ -497,7 +499,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* tasks are handled when removing the task from the
* blocked-tasks list below.
*/
if (rdp->exp_deferred_qs)
if (rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
/* Clean up if blocked during RCU read-side critical section. */
@ -580,7 +582,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
rcu_preempt_depth() == 0;
}
@ -642,7 +644,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
(IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
t->rcu_blocked_node);
// Need to defer quiescent state until everything is enabled.
if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) {
if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) {
// Using softirq, safe to awaken, and either the
// wakeup is free or there is either an expedited
// GP in flight or a potential need to deboost.
@ -845,10 +847,8 @@ static void rcu_qs(void)
trace_rcu_grace_period(TPS("rcu_sched"),
__this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
return;
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
}
/*
@ -925,7 +925,18 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return false;
}
static void rcu_preempt_deferred_qs(struct task_struct *t) { }
// Except that we do need to respond to a request by an expedited grace
// period for a quiescent state from this CPU. Note that requests from
// tasks are handled when removing the task from the blocked-tasks list
// below.
static void rcu_preempt_deferred_qs(struct task_struct *t)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
if (rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
}
/*
* Because there is no preemptible RCU, there can be no readers blocked,
@ -1153,7 +1164,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
/*
* Create an RCU-boost kthread for the specified node if one does not
* already exist. We only create this kthread for preemptible RCU.
* Returns zero if all is well, a negated errno otherwise.
*/
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
{
@ -1204,8 +1214,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
if (cpumask_weight(cm) == 0)
cpumask_setall(cm);
cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
set_cpus_allowed_ptr(t, cm);
free_cpumask_var(cm);
}
@ -1253,201 +1264,6 @@ static void __init rcu_spawn_boost_kthreads(void)
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#if !defined(CONFIG_RCU_FAST_NO_HZ)
/*
* Check to see if any future non-offloaded RCU-related work will need
* to be done by the current CPU, even if none need be done immediately,
* returning 1 if so. This function is part of the RCU implementation;
* it is -not- an exported member of the RCU API.
*
* Because we not have RCU_FAST_NO_HZ, just check whether or not this
* CPU has RCU callbacks queued.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
*nextevt = KTIME_MAX;
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
/*
* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
* after it.
*/
static void rcu_cleanup_after_idle(void)
{
}
/*
* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
* is nothing.
*/
static void rcu_prepare_for_idle(void)
{
}
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
/*
* This code is invoked when a CPU goes idle, at which point we want
* to have the CPU do everything required for RCU so that it can enter
* the energy-efficient dyntick-idle mode.
*
* The following preprocessor symbol controls this:
*
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
* to sleep in dyntick-idle mode with RCU callbacks pending. This
* is sized to be roughly one RCU grace period. Those energy-efficiency
* benchmarkers who might otherwise be tempted to set this to a large
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
* system. And if you are -that- concerned about energy efficiency,
* just power the system down and be done with it!
*
* The value below works well in practice. If future workloads require
* adjustment, they can be converted into kernel config parameters, though
* making the state machine smarter might be a better option.
*/
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
module_param(rcu_idle_gp_delay, int, 0644);
/*
* Try to advance callbacks on the current CPU, but only if it has been
* awhile since the last time we did so. Afterwards, if there are any
* callbacks ready for immediate invocation, return true.
*/
static bool __maybe_unused rcu_try_advance_all_cbs(void)
{
bool cbs_ready = false;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp;
/* Exit early if we advanced recently. */
if (jiffies == rdp->last_advance_all)
return false;
rdp->last_advance_all = jiffies;
rnp = rdp->mynode;
/*
* Don't bother checking unless a grace period has
* completed since we last checked and there are
* callbacks not yet ready to invoke.
*/
if ((rcu_seq_completed_gp(rdp->gp_seq,
rcu_seq_current(&rnp->gp_seq)) ||
unlikely(READ_ONCE(rdp->gpwrap))) &&
rcu_segcblist_pend_cbs(&rdp->cblist))
note_gp_changes(rdp);
if (rcu_segcblist_ready_cbs(&rdp->cblist))
cbs_ready = true;
return cbs_ready;
}
/*
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
* to invoke. If the CPU has callbacks, try to advance them. Tell the
* caller about what to set the timeout.
*
* The caller must have disabled interrupts.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
unsigned long dj;
lockdep_assert_irqs_disabled();
/* If no non-offloaded callbacks, RCU doesn't need the CPU. */
if (rcu_segcblist_empty(&rdp->cblist) ||
rcu_rdp_is_offloaded(rdp)) {
*nextevt = KTIME_MAX;
return 0;
}
/* Attempt to advance callbacks. */
if (rcu_try_advance_all_cbs()) {
/* Some ready to invoke, so initiate later invocation. */
invoke_rcu_core();
return 1;
}
rdp->last_accelerate = jiffies;
/* Request timer and round. */
dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
*nextevt = basemono + dj * TICK_NSEC;
return 0;
}
/*
* Prepare a CPU for idle from an RCU perspective. The first major task is to
* sense whether nohz mode has been enabled or disabled via sysfs. The second
* major task is to accelerate (that is, assign grace-period numbers to) any
* recently arrived callbacks.
*
* The caller must have disabled interrupts.
*/
static void rcu_prepare_for_idle(void)
{
bool needwake;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp;
int tne;
lockdep_assert_irqs_disabled();
if (rcu_rdp_is_offloaded(rdp))
return;
/* Handle nohz enablement switches conservatively. */
tne = READ_ONCE(tick_nohz_active);
if (tne != rdp->tick_nohz_enabled_snap) {
if (!rcu_segcblist_empty(&rdp->cblist))
invoke_rcu_core(); /* force nohz to see update. */
rdp->tick_nohz_enabled_snap = tne;
return;
}
if (!tne)
return;
/*
* If we have not yet accelerated this jiffy, accelerate all
* callbacks on this CPU.
*/
if (rdp->last_accelerate == jiffies)
return;
rdp->last_accelerate = jiffies;
if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
needwake = rcu_accelerate_cbs(rnp, rdp);
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
if (needwake)
rcu_gp_kthread_wake();
}
}
/*
* Clean up for exit from idle. Attempt to advance callbacks based on
* any grace periods that elapsed while the CPU was idle, and if any
* callbacks are now ready to invoke, initiate invocation.
*/
static void rcu_cleanup_after_idle(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
lockdep_assert_irqs_disabled();
if (rcu_rdp_is_offloaded(rdp))
return;
if (rcu_try_advance_all_cbs())
invoke_rcu_core();
}
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
/*
* Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
* grace-period kthread will do force_quiescent_state() processing?
@ -1455,7 +1271,7 @@ static void rcu_cleanup_after_idle(void)
* CPU unless the grace period has extended for too long.
*
* This code relies on the fact that all NO_HZ_FULL CPUs are also
* CONFIG_RCU_NOCB_CPU CPUs.
* RCU_NOCB_CPU CPUs.
*/
static bool rcu_nohz_full_cpu(void)
{