2019-01-17 21:13:19 +03:00
/* SPDX-License-Identifier: GPL-2.0+ */
2017-05-02 11:31:18 +03:00
/*
2017-05-02 16:30:12 +03:00
* RCU segmented callback lists , internal - to - rcu header file
2017-05-02 11:31:18 +03:00
*
* Copyright IBM Corporation , 2017
*
2019-01-17 21:13:19 +03:00
* Authors : Paul E . McKenney < paulmck @ linux . ibm . com >
2017-05-02 11:31:18 +03:00
*/
# include <linux/rcu_segcblist.h>
2019-07-02 03:36:53 +03:00
/* Return number of callbacks in the specified callback list. */
static inline long rcu_cblist_n_cbs ( struct rcu_cblist * rclp )
{
return READ_ONCE ( rclp - > len ) ;
}
2020-11-18 19:15:41 +03:00
/* Return number of callbacks in segmented callback list by summing seglen. */
long rcu_segcblist_n_segment_cbs ( struct rcu_segcblist * rsclp ) ;
2017-05-02 16:30:12 +03:00
void rcu_cblist_init ( struct rcu_cblist * rclp ) ;
rcu/nocb: Add bypass callback queueing
Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs
takes advantage of unrelated grace periods, thus reducing the memory
footprint in the face of floods of call_rcu() invocations. However,
the ->cblist field is a more-complex rcu_segcblist structure which must
be protected via locking. Even though there are only three entities
which can acquire this lock (the CPU invoking call_rcu(), the no-CBs
grace-period kthread, and the no-CBs callbacks kthread), the contention
on this lock is excessive under heavy stress.
This commit therefore greatly reduces contention by provisioning
an rcu_cblist structure field named ->nocb_bypass within the
rcu_data structure. Each no-CBs CPU is permitted only a limited
number of enqueues onto the ->cblist per jiffy, controlled by a new
nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to
about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is
exceeded, the CPU instead enqueues onto the new ->nocb_bypass.
The ->nocb_bypass is flushed into the ->cblist every jiffy or when
the number of callbacks on ->nocb_bypass exceeds qhimark, whichever
happens first. During call_rcu() floods, this flushing is carried out
by the CPU during the course of its call_rcu() invocations. However,
a CPU could simply stop invoking call_rcu() at any time. The no-CBs
grace-period kthread therefore carries out less-aggressive flushing
(every few jiffies or when the number of callbacks on ->nocb_bypass
exceeds (2 * qhimark), whichever comes first). This means that the
no-CBs grace-period kthread cannot be permitted to do unbounded waits
while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is
used to provide the needed wakeups.
[ paulmck: Apply Coverity feedback reported by Colin Ian King. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
2019-07-03 02:03:33 +03:00
void rcu_cblist_enqueue ( struct rcu_cblist * rclp , struct rcu_head * rhp ) ;
void rcu_cblist_flush_enqueue ( struct rcu_cblist * drclp ,
struct rcu_cblist * srclp ,
struct rcu_head * rhp ) ;
2017-05-02 16:30:12 +03:00
struct rcu_head * rcu_cblist_dequeue ( struct rcu_cblist * rclp ) ;
2017-05-02 11:31:18 +03:00
/*
* Is the specified rcu_segcblist structure empty ?
*
* But careful ! The fact that the - > head field is NULL does not
* necessarily imply that there are no callbacks associated with
* this structure . When callbacks are being invoked , they are
* removed as a group . If callback invocation must be preempted ,
* the remaining callbacks will be added back to the list . Either
* way , the counts are updated later .
*
* So it is often the case that rcu_segcblist_n_cbs ( ) should be used
* instead .
*/
static inline bool rcu_segcblist_empty ( struct rcu_segcblist * rsclp )
{
2019-05-14 01:57:50 +03:00
return ! READ_ONCE ( rsclp - > head ) ;
2017-05-02 11:31:18 +03:00
}
/* Return number of callbacks in segmented callback list. */
static inline long rcu_segcblist_n_cbs ( struct rcu_segcblist * rsclp )
{
2019-07-02 03:36:53 +03:00
# ifdef CONFIG_RCU_NOCB_CPU
return atomic_long_read ( & rsclp - > len ) ;
# else
2017-05-02 11:31:18 +03:00
return READ_ONCE ( rsclp - > len ) ;
2019-07-02 03:36:53 +03:00
# endif
2017-05-02 11:31:18 +03:00
}
2020-11-13 15:13:16 +03:00
static inline void rcu_segcblist_set_flags ( struct rcu_segcblist * rsclp ,
int flags )
{
rsclp - > flags | = flags ;
}
static inline void rcu_segcblist_clear_flags ( struct rcu_segcblist * rsclp ,
int flags )
{
rsclp - > flags & = ~ flags ;
}
static inline bool rcu_segcblist_test_flags ( struct rcu_segcblist * rsclp ,
int flags )
{
return READ_ONCE ( rsclp - > flags ) & flags ;
}
2017-05-02 11:31:18 +03:00
/*
* Is the specified rcu_segcblist enabled , for example , not corresponding
2019-05-14 19:50:49 +03:00
* to an offline CPU ?
2017-05-02 11:31:18 +03:00
*/
static inline bool rcu_segcblist_is_enabled ( struct rcu_segcblist * rsclp )
{
2020-11-13 15:13:16 +03:00
return rcu_segcblist_test_flags ( rsclp , SEGCBLIST_ENABLED ) ;
2017-05-02 11:31:18 +03:00
}
2019-04-13 01:58:34 +03:00
/* Is the specified rcu_segcblist offloaded? */
static inline bool rcu_segcblist_is_offloaded ( struct rcu_segcblist * rsclp )
{
2020-11-13 15:13:17 +03:00
if ( IS_ENABLED ( CONFIG_RCU_NOCB_CPU ) ) {
/*
* Complete de - offloading happens only when SEGCBLIST_SOFTIRQ_ONLY
* is set .
*/
if ( ! rcu_segcblist_test_flags ( rsclp , SEGCBLIST_SOFTIRQ_ONLY ) )
return true ;
}
return false ;
2019-04-13 01:58:34 +03:00
}
2020-11-13 15:13:27 +03:00
static inline bool rcu_segcblist_completely_offloaded ( struct rcu_segcblist * rsclp )
{
int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED ;
if ( IS_ENABLED ( CONFIG_RCU_NOCB_CPU ) ) {
if ( ( rsclp - > flags & flags ) = = flags )
return true ;
}
return false ;
}
2017-05-02 11:31:18 +03:00
/*
* Are all segments following the specified segment of the specified
* rcu_segcblist structure empty of callbacks ? ( The specified
* segment might well contain callbacks . )
*/
static inline bool rcu_segcblist_restempty ( struct rcu_segcblist * rsclp , int seg )
{
2019-05-14 00:36:11 +03:00
return ! READ_ONCE ( * READ_ONCE ( rsclp - > tails [ seg ] ) ) ;
2017-05-02 11:31:18 +03:00
}
2020-12-18 21:20:34 +03:00
/*
* Is the specified segment of the specified rcu_segcblist structure
* empty of callbacks ?
*/
static inline bool rcu_segcblist_segempty ( struct rcu_segcblist * rsclp , int seg )
{
if ( seg = = RCU_DONE_TAIL )
return & rsclp - > head = = rsclp - > tails [ RCU_DONE_TAIL ] ;
return rsclp - > tails [ seg - 1 ] = = rsclp - > tails [ seg ] ;
}
rcu/nocb: Add bypass callback queueing
Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs
takes advantage of unrelated grace periods, thus reducing the memory
footprint in the face of floods of call_rcu() invocations. However,
the ->cblist field is a more-complex rcu_segcblist structure which must
be protected via locking. Even though there are only three entities
which can acquire this lock (the CPU invoking call_rcu(), the no-CBs
grace-period kthread, and the no-CBs callbacks kthread), the contention
on this lock is excessive under heavy stress.
This commit therefore greatly reduces contention by provisioning
an rcu_cblist structure field named ->nocb_bypass within the
rcu_data structure. Each no-CBs CPU is permitted only a limited
number of enqueues onto the ->cblist per jiffy, controlled by a new
nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to
about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is
exceeded, the CPU instead enqueues onto the new ->nocb_bypass.
The ->nocb_bypass is flushed into the ->cblist every jiffy or when
the number of callbacks on ->nocb_bypass exceeds qhimark, whichever
happens first. During call_rcu() floods, this flushing is carried out
by the CPU during the course of its call_rcu() invocations. However,
a CPU could simply stop invoking call_rcu() at any time. The no-CBs
grace-period kthread therefore carries out less-aggressive flushing
(every few jiffies or when the number of callbacks on ->nocb_bypass
exceeds (2 * qhimark), whichever comes first). This means that the
no-CBs grace-period kthread cannot be permitted to do unbounded waits
while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is
used to provide the needed wakeups.
[ paulmck: Apply Coverity feedback reported by Colin Ian King. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
2019-07-03 02:03:33 +03:00
void rcu_segcblist_inc_len ( struct rcu_segcblist * rsclp ) ;
rcu/tree: Make rcu_do_batch count how many callbacks were executed
The rcu_do_batch() function extracts the ready-to-invoke callbacks
from the rcu_segcblist located in the ->cblist field of the current
CPU's rcu_data structure. These callbacks are first moved to a local
(unsegmented) rcu_cblist. The rcu_do_batch() function then uses this
rcu_cblist's ->len field to count how many CBs it has invoked, but it
does so by counting that field down from zero. Finally, this function
negates the value in this ->len field (resulting in a positive number)
and subtracts the result from the ->len field of the current CPU's
->cblist field.
Except that it is sometimes necessary for rcu_do_batch() to stop invoking
callbacks mid-stream, despite there being more ready to invoke, for
example, if a high-priority task wakes up. In this case the remaining
not-yet-invoked callbacks are requeued back onto the CPU's ->cblist,
but remain in the ready-to-invoke segment of that list. As above, the
negative of the local rcu_cblist's ->len field is still subtracted from
the ->len field of the current CPU's ->cblist field.
The design of counting down from 0 is confusing and error-prone, plus
use of a positive count will make it easier to provide a uniform and
consistent API to deal with the per-segment counts that are added
later in this series. For example, rcu_segcblist_extract_done_cbs()
can unconditionally populate the resulting unsegmented list's ->len
field during extraction.
This commit therefore explicitly counts how many callbacks were executed
in rcu_do_batch() itself, counting up from zero, and then uses that
to update the per-CPU segcb list's ->len field, without relying on the
downcounting of rcl->len from zero.
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2020-11-03 17:25:57 +03:00
void rcu_segcblist_add_len ( struct rcu_segcblist * rsclp , long v ) ;
2017-05-02 16:30:12 +03:00
void rcu_segcblist_init ( struct rcu_segcblist * rsclp ) ;
void rcu_segcblist_disable ( struct rcu_segcblist * rsclp ) ;
2020-11-13 15:13:19 +03:00
void rcu_segcblist_offload ( struct rcu_segcblist * rsclp , bool offload ) ;
2017-05-02 16:30:12 +03:00
bool rcu_segcblist_ready_cbs ( struct rcu_segcblist * rsclp ) ;
bool rcu_segcblist_pend_cbs ( struct rcu_segcblist * rsclp ) ;
struct rcu_head * rcu_segcblist_first_cb ( struct rcu_segcblist * rsclp ) ;
struct rcu_head * rcu_segcblist_first_pend_cb ( struct rcu_segcblist * rsclp ) ;
2019-05-15 19:56:40 +03:00
bool rcu_segcblist_nextgp ( struct rcu_segcblist * rsclp , unsigned long * lp ) ;
2017-05-02 16:30:12 +03:00
void rcu_segcblist_enqueue ( struct rcu_segcblist * rsclp ,
2019-08-30 19:36:32 +03:00
struct rcu_head * rhp ) ;
2017-05-02 16:30:12 +03:00
bool rcu_segcblist_entrain ( struct rcu_segcblist * rsclp ,
2019-08-30 19:36:32 +03:00
struct rcu_head * rhp ) ;
2017-05-02 16:30:12 +03:00
void rcu_segcblist_extract_done_cbs ( struct rcu_segcblist * rsclp ,
struct rcu_cblist * rclp ) ;
void rcu_segcblist_extract_pend_cbs ( struct rcu_segcblist * rsclp ,
struct rcu_cblist * rclp ) ;
void rcu_segcblist_insert_count ( struct rcu_segcblist * rsclp ,
struct rcu_cblist * rclp ) ;
void rcu_segcblist_insert_done_cbs ( struct rcu_segcblist * rsclp ,
struct rcu_cblist * rclp ) ;
void rcu_segcblist_insert_pend_cbs ( struct rcu_segcblist * rsclp ,
struct rcu_cblist * rclp ) ;
void rcu_segcblist_advance ( struct rcu_segcblist * rsclp , unsigned long seq ) ;
bool rcu_segcblist_accelerate ( struct rcu_segcblist * rsclp , unsigned long seq ) ;
2017-06-27 17:44:06 +03:00
void rcu_segcblist_merge ( struct rcu_segcblist * dst_rsclp ,
struct rcu_segcblist * src_rsclp ) ;