Merge branches 'doc.2023.01.05a', 'fixes.2023.01.23a', 'kvfree.2023.01.03a', 'srcu.2023.01.03a', 'srcu-always.2023.02.02a', 'tasks.2023.01.03a', 'torture.2023.01.05a' and 'torturescript.2023.01.03a' into HEAD
doc.2023.01.05a: Documentation update. fixes.2023.01.23a: Miscellaneous fixes. kvfree.2023.01.03a: kvfree_rcu() updates. srcu.2023.01.03a: SRCU updates. srcu-always.2023.02.02a: Finish making SRCU be unconditionally available. tasks.2023.01.03a: Tasks-RCU updates. torture.2023.01.05a: Torture-test updates. torturescript.2023.01.03a: Torture-test scripting updates.
This commit is contained in:
commit
8e1704b6a8
@ -5113,6 +5113,11 @@
|
||||
rcupdate.rcu_cpu_stall_timeout to be used (after
|
||||
conversion from seconds to milliseconds).
|
||||
|
||||
rcupdate.rcu_exp_stall_task_details= [KNL]
|
||||
Print stack dumps of any tasks blocking the
|
||||
current expedited RCU grace period during an
|
||||
expedited RCU CPU stall warning.
|
||||
|
||||
rcupdate.rcu_expedited= [KNL]
|
||||
Use expedited grace-period primitives, for
|
||||
example, synchronize_rcu_expedited() instead
|
||||
|
@ -181,7 +181,6 @@ void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers);
|
||||
|
||||
#ifdef CONFIG_SRCU
|
||||
static DEFINE_MUTEX(device_links_lock);
|
||||
DEFINE_STATIC_SRCU(device_links_srcu);
|
||||
|
||||
@ -220,47 +219,6 @@ static void device_link_remove_from_lists(struct device_link *link)
|
||||
list_del_rcu(&link->s_node);
|
||||
list_del_rcu(&link->c_node);
|
||||
}
|
||||
#else /* !CONFIG_SRCU */
|
||||
static DECLARE_RWSEM(device_links_lock);
|
||||
|
||||
static inline void device_links_write_lock(void)
|
||||
{
|
||||
down_write(&device_links_lock);
|
||||
}
|
||||
|
||||
static inline void device_links_write_unlock(void)
|
||||
{
|
||||
up_write(&device_links_lock);
|
||||
}
|
||||
|
||||
int device_links_read_lock(void)
|
||||
{
|
||||
down_read(&device_links_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void device_links_read_unlock(int not_used)
|
||||
{
|
||||
up_read(&device_links_lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
int device_links_read_lock_held(void)
|
||||
{
|
||||
return lockdep_is_held(&device_links_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void device_link_synchronize_removal(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void device_link_remove_from_lists(struct device_link *link)
|
||||
{
|
||||
list_del(&link->s_node);
|
||||
list_del(&link->c_node);
|
||||
}
|
||||
#endif /* !CONFIG_SRCU */
|
||||
|
||||
static bool device_is_ancestor(struct device *dev, struct device *target)
|
||||
{
|
||||
|
@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
menuconfig DAX
|
||||
tristate "DAX: direct access to differentiated memory"
|
||||
select SRCU
|
||||
default m if NVDIMM_DAX
|
||||
|
||||
if DAX
|
||||
|
@ -2,7 +2,6 @@
|
||||
config STM
|
||||
tristate "System Trace Module devices"
|
||||
select CONFIGFS_FS
|
||||
select SRCU
|
||||
help
|
||||
A System Trace Module (STM) is a device exporting data in System
|
||||
Trace Protocol (STP) format as defined by MIPI STP standards.
|
||||
|
@ -6,7 +6,6 @@
|
||||
menuconfig MD
|
||||
bool "Multiple devices driver support (RAID and LVM)"
|
||||
depends on BLOCK
|
||||
select SRCU
|
||||
help
|
||||
Support multiple physical spindles through a single logical device.
|
||||
Required for RAID and logical volume management.
|
||||
|
@ -334,7 +334,6 @@ config NETCONSOLE_DYNAMIC
|
||||
|
||||
config NETPOLL
|
||||
def_bool NETCONSOLE
|
||||
select SRCU
|
||||
|
||||
config NET_POLL_CONTROLLER
|
||||
def_bool NETPOLL
|
||||
|
@ -258,7 +258,7 @@ config PCIE_MEDIATEK_GEN3
|
||||
MediaTek SoCs.
|
||||
|
||||
config VMD
|
||||
depends on PCI_MSI && X86_64 && SRCU && !UML
|
||||
depends on PCI_MSI && X86_64 && !UML
|
||||
tristate "Intel Volume Management Device Driver"
|
||||
help
|
||||
Adds support for the Intel Volume Management Device (VMD). VMD is a
|
||||
|
@ -17,7 +17,6 @@ config BTRFS_FS
|
||||
select FS_IOMAP
|
||||
select RAID6_PQ
|
||||
select XOR_BLOCKS
|
||||
select SRCU
|
||||
depends on PAGE_SIZE_LESS_THAN_256KB
|
||||
|
||||
help
|
||||
|
25
fs/locks.c
25
fs/locks.c
@ -1889,7 +1889,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
|
||||
}
|
||||
EXPORT_SYMBOL(generic_setlease);
|
||||
|
||||
#if IS_ENABLED(CONFIG_SRCU)
|
||||
/*
|
||||
* Kernel subsystems can register to be notified on any attempt to set
|
||||
* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
|
||||
@ -1923,30 +1922,6 @@ void lease_unregister_notifier(struct notifier_block *nb)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
|
||||
|
||||
#else /* !IS_ENABLED(CONFIG_SRCU) */
|
||||
static inline void
|
||||
lease_notifier_chain_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
setlease_notifier(long arg, struct file_lock *lease)
|
||||
{
|
||||
}
|
||||
|
||||
int lease_register_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lease_register_notifier);
|
||||
|
||||
void lease_unregister_notifier(struct notifier_block *nb)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
|
||||
|
||||
#endif /* IS_ENABLED(CONFIG_SRCU) */
|
||||
|
||||
/**
|
||||
* vfs_setlease - sets a lease on an open file
|
||||
* @filp: file pointer
|
||||
|
@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config FSNOTIFY
|
||||
def_bool n
|
||||
select SRCU
|
||||
|
||||
source "fs/notify/dnotify/Kconfig"
|
||||
source "fs/notify/inotify/Kconfig"
|
||||
|
@ -6,7 +6,6 @@
|
||||
config QUOTA
|
||||
bool "Quota support"
|
||||
select QUOTACTL
|
||||
select SRCU
|
||||
help
|
||||
If you say Y here, you will be able to set per user limits for disk
|
||||
usage (also called disk quotas). Currently, it works for the
|
||||
|
@ -139,7 +139,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
|
||||
if (last) {
|
||||
n->next = last->next;
|
||||
n->pprev = &last->next;
|
||||
rcu_assign_pointer(hlist_next_rcu(last), n);
|
||||
rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
|
||||
} else {
|
||||
hlist_nulls_add_head_rcu(n, h);
|
||||
}
|
||||
|
@ -238,6 +238,7 @@ void synchronize_rcu_tasks_rude(void);
|
||||
|
||||
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
|
||||
void exit_tasks_rcu_start(void);
|
||||
void exit_tasks_rcu_stop(void);
|
||||
void exit_tasks_rcu_finish(void);
|
||||
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
|
||||
@ -246,6 +247,7 @@ void exit_tasks_rcu_finish(void);
|
||||
#define call_rcu_tasks call_rcu
|
||||
#define synchronize_rcu_tasks synchronize_rcu
|
||||
static inline void exit_tasks_rcu_start(void) { }
|
||||
static inline void exit_tasks_rcu_stop(void) { }
|
||||
static inline void exit_tasks_rcu_finish(void) { }
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
|
||||
@ -374,11 +376,18 @@ static inline int debug_lockdep_rcu_enabled(void)
|
||||
* RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
|
||||
* @c: condition to check
|
||||
* @s: informative message
|
||||
*
|
||||
* This checks debug_lockdep_rcu_enabled() before checking (c) to
|
||||
* prevent early boot splats due to lockdep not yet being initialized,
|
||||
* and rechecks it after checking (c) to prevent false-positive splats
|
||||
* due to races with lockdep being disabled. See commit 3066820034b5dd
|
||||
* ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail.
|
||||
*/
|
||||
#define RCU_LOCKDEP_WARN(c, s) \
|
||||
do { \
|
||||
static bool __section(".data.unlikely") __warned; \
|
||||
if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \
|
||||
if (debug_lockdep_rcu_enabled() && (c) && \
|
||||
debug_lockdep_rcu_enabled() && !__warned) { \
|
||||
__warned = true; \
|
||||
lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
|
||||
} \
|
||||
@ -1004,6 +1013,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
|
||||
#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \
|
||||
kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
|
||||
|
||||
#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
|
||||
#define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr)
|
||||
|
||||
#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
|
||||
#define kvfree_rcu_arg_2(ptr, rhf) \
|
||||
do { \
|
||||
@ -1011,8 +1023,7 @@ do { \
|
||||
\
|
||||
if (___p) { \
|
||||
BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
|
||||
kvfree_call_rcu(&((___p)->rhf), (rcu_callback_t)(unsigned long) \
|
||||
(offsetof(typeof(*(ptr)), rhf))); \
|
||||
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
@ -1021,7 +1032,7 @@ do { \
|
||||
typeof(ptr) ___p = (ptr); \
|
||||
\
|
||||
if (___p) \
|
||||
kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \
|
||||
kvfree_call_rcu(NULL, (void *) (___p)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -98,25 +98,25 @@ static inline void synchronize_rcu_expedited(void)
|
||||
*/
|
||||
extern void kvfree(const void *addr);
|
||||
|
||||
static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
|
||||
{
|
||||
if (head) {
|
||||
call_rcu(head, func);
|
||||
call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
|
||||
return;
|
||||
}
|
||||
|
||||
// kvfree_rcu(one_arg) call.
|
||||
might_sleep();
|
||||
synchronize_rcu();
|
||||
kvfree((void *) func);
|
||||
kvfree(ptr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
|
||||
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
|
||||
#else
|
||||
static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr)
|
||||
{
|
||||
__kvfree_call_rcu(head, func);
|
||||
__kvfree_call_rcu(head, ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(void)
|
||||
}
|
||||
|
||||
void synchronize_rcu_expedited(void);
|
||||
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
|
||||
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
|
||||
|
||||
void rcu_barrier(void);
|
||||
bool rcu_eqs_special_set(int cpu);
|
||||
|
@ -214,6 +214,34 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* srcu_down_read - register a new reader for an SRCU-protected structure.
|
||||
* @ssp: srcu_struct in which to register the new reader.
|
||||
*
|
||||
* Enter a semaphore-like SRCU read-side critical section. Note that
|
||||
* SRCU read-side critical sections may be nested. However, it is
|
||||
* illegal to call anything that waits on an SRCU grace period for the
|
||||
* same srcu_struct, whether directly or indirectly. Please note that
|
||||
* one way to indirectly wait on an SRCU grace period is to acquire
|
||||
* a mutex that is held elsewhere while calling synchronize_srcu() or
|
||||
* synchronize_srcu_expedited(). But if you want lockdep to help you
|
||||
* keep this stuff straight, you should instead use srcu_read_lock().
|
||||
*
|
||||
* The semaphore-like nature of srcu_down_read() means that the matching
|
||||
* srcu_up_read() can be invoked from some other context, for example,
|
||||
* from some other task or from an irq handler. However, neither
|
||||
* srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler.
|
||||
*
|
||||
* Calls to srcu_down_read() may be nested, similar to the manner in
|
||||
* which calls to down_read() may be nested.
|
||||
*/
|
||||
static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
|
||||
{
|
||||
WARN_ON_ONCE(in_nmi());
|
||||
srcu_check_nmi_safety(ssp, false);
|
||||
return __srcu_read_lock(ssp);
|
||||
}
|
||||
|
||||
/**
|
||||
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
|
||||
* @ssp: srcu_struct in which to unregister the old reader.
|
||||
@ -254,6 +282,23 @@ srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
|
||||
__srcu_read_unlock(ssp, idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* srcu_up_read - unregister a old reader from an SRCU-protected structure.
|
||||
* @ssp: srcu_struct in which to unregister the old reader.
|
||||
* @idx: return value from corresponding srcu_read_lock().
|
||||
*
|
||||
* Exit an SRCU read-side critical section, but not necessarily from
|
||||
* the same context as the maching srcu_down_read().
|
||||
*/
|
||||
static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
|
||||
__releases(ssp)
|
||||
{
|
||||
WARN_ON_ONCE(idx & ~0x1);
|
||||
WARN_ON_ONCE(in_nmi());
|
||||
srcu_check_nmi_safety(ssp, false);
|
||||
__srcu_read_unlock(ssp, idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
|
||||
*
|
||||
|
@ -49,7 +49,7 @@ struct srcu_data {
|
||||
struct srcu_node {
|
||||
spinlock_t __private lock;
|
||||
unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */
|
||||
/* if greater than ->srcu_gq_seq. */
|
||||
/* if greater than ->srcu_gp_seq. */
|
||||
unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */
|
||||
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
|
||||
struct srcu_node *srcu_parent; /* Next up in tree. */
|
||||
|
@ -1865,7 +1865,6 @@ config PERF_EVENTS
|
||||
default y if PROFILING
|
||||
depends on HAVE_PERF_EVENTS
|
||||
select IRQ_WORK
|
||||
select SRCU
|
||||
help
|
||||
Enable kernel support for various performance events provided
|
||||
by software and hardware.
|
||||
|
@ -46,6 +46,9 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
|
||||
torture_param(int, stat_interval, 60,
|
||||
"Number of seconds between stats printk()s");
|
||||
torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
|
||||
torture_param(int, rt_boost, 2,
|
||||
"Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
|
||||
torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
|
||||
torture_param(int, verbose, 1,
|
||||
"Enable verbose debugging printk()s");
|
||||
|
||||
@ -127,15 +130,50 @@ static void torture_lock_busted_write_unlock(int tid __maybe_unused)
|
||||
/* BUGGY, do not use in real life!!! */
|
||||
}
|
||||
|
||||
static void torture_boost_dummy(struct torture_random_state *trsp)
|
||||
static void __torture_rt_boost(struct torture_random_state *trsp)
|
||||
{
|
||||
/* Only rtmutexes care about priority */
|
||||
const unsigned int factor = rt_boost_factor;
|
||||
|
||||
if (!rt_task(current)) {
|
||||
/*
|
||||
* Boost priority once every rt_boost_factor operations. When
|
||||
* the task tries to take the lock, the rtmutex it will account
|
||||
* for the new priority, and do any corresponding pi-dance.
|
||||
*/
|
||||
if (trsp && !(torture_random(trsp) %
|
||||
(cxt.nrealwriters_stress * factor))) {
|
||||
sched_set_fifo(current);
|
||||
} else /* common case, do nothing */
|
||||
return;
|
||||
} else {
|
||||
/*
|
||||
* The task will remain boosted for another 10 * rt_boost_factor
|
||||
* operations, then restored back to its original prio, and so
|
||||
* forth.
|
||||
*
|
||||
* When @trsp is nil, we want to force-reset the task for
|
||||
* stopping the kthread.
|
||||
*/
|
||||
if (!trsp || !(torture_random(trsp) %
|
||||
(cxt.nrealwriters_stress * factor * 2))) {
|
||||
sched_set_normal(current, 0);
|
||||
} else /* common case, do nothing */
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void torture_rt_boost(struct torture_random_state *trsp)
|
||||
{
|
||||
if (rt_boost != 2)
|
||||
return;
|
||||
|
||||
__torture_rt_boost(trsp);
|
||||
}
|
||||
|
||||
static struct lock_torture_ops lock_busted_ops = {
|
||||
.writelock = torture_lock_busted_write_lock,
|
||||
.write_delay = torture_lock_busted_write_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_lock_busted_write_unlock,
|
||||
.readlock = NULL,
|
||||
.read_delay = NULL,
|
||||
@ -179,7 +217,7 @@ __releases(torture_spinlock)
|
||||
static struct lock_torture_ops spin_lock_ops = {
|
||||
.writelock = torture_spin_lock_write_lock,
|
||||
.write_delay = torture_spin_lock_write_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_spin_lock_write_unlock,
|
||||
.readlock = NULL,
|
||||
.read_delay = NULL,
|
||||
@ -206,7 +244,7 @@ __releases(torture_spinlock)
|
||||
static struct lock_torture_ops spin_lock_irq_ops = {
|
||||
.writelock = torture_spin_lock_write_lock_irq,
|
||||
.write_delay = torture_spin_lock_write_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_lock_spin_write_unlock_irq,
|
||||
.readlock = NULL,
|
||||
.read_delay = NULL,
|
||||
@ -275,7 +313,7 @@ __releases(torture_rwlock)
|
||||
static struct lock_torture_ops rw_lock_ops = {
|
||||
.writelock = torture_rwlock_write_lock,
|
||||
.write_delay = torture_rwlock_write_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_rwlock_write_unlock,
|
||||
.readlock = torture_rwlock_read_lock,
|
||||
.read_delay = torture_rwlock_read_delay,
|
||||
@ -318,7 +356,7 @@ __releases(torture_rwlock)
|
||||
static struct lock_torture_ops rw_lock_irq_ops = {
|
||||
.writelock = torture_rwlock_write_lock_irq,
|
||||
.write_delay = torture_rwlock_write_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_rwlock_write_unlock_irq,
|
||||
.readlock = torture_rwlock_read_lock_irq,
|
||||
.read_delay = torture_rwlock_read_delay,
|
||||
@ -358,7 +396,7 @@ __releases(torture_mutex)
|
||||
static struct lock_torture_ops mutex_lock_ops = {
|
||||
.writelock = torture_mutex_lock,
|
||||
.write_delay = torture_mutex_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_mutex_unlock,
|
||||
.readlock = NULL,
|
||||
.read_delay = NULL,
|
||||
@ -456,7 +494,7 @@ static struct lock_torture_ops ww_mutex_lock_ops = {
|
||||
.exit = torture_ww_mutex_exit,
|
||||
.writelock = torture_ww_mutex_lock,
|
||||
.write_delay = torture_mutex_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_ww_mutex_unlock,
|
||||
.readlock = NULL,
|
||||
.read_delay = NULL,
|
||||
@ -474,37 +512,6 @@ __acquires(torture_rtmutex)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void torture_rtmutex_boost(struct torture_random_state *trsp)
|
||||
{
|
||||
const unsigned int factor = 50000; /* yes, quite arbitrary */
|
||||
|
||||
if (!rt_task(current)) {
|
||||
/*
|
||||
* Boost priority once every ~50k operations. When the
|
||||
* task tries to take the lock, the rtmutex it will account
|
||||
* for the new priority, and do any corresponding pi-dance.
|
||||
*/
|
||||
if (trsp && !(torture_random(trsp) %
|
||||
(cxt.nrealwriters_stress * factor))) {
|
||||
sched_set_fifo(current);
|
||||
} else /* common case, do nothing */
|
||||
return;
|
||||
} else {
|
||||
/*
|
||||
* The task will remain boosted for another ~500k operations,
|
||||
* then restored back to its original prio, and so forth.
|
||||
*
|
||||
* When @trsp is nil, we want to force-reset the task for
|
||||
* stopping the kthread.
|
||||
*/
|
||||
if (!trsp || !(torture_random(trsp) %
|
||||
(cxt.nrealwriters_stress * factor * 2))) {
|
||||
sched_set_normal(current, 0);
|
||||
} else /* common case, do nothing */
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void torture_rtmutex_delay(struct torture_random_state *trsp)
|
||||
{
|
||||
const unsigned long shortdelay_us = 2;
|
||||
@ -530,10 +537,18 @@ __releases(torture_rtmutex)
|
||||
rt_mutex_unlock(&torture_rtmutex);
|
||||
}
|
||||
|
||||
static void torture_rt_boost_rtmutex(struct torture_random_state *trsp)
|
||||
{
|
||||
if (!rt_boost)
|
||||
return;
|
||||
|
||||
__torture_rt_boost(trsp);
|
||||
}
|
||||
|
||||
static struct lock_torture_ops rtmutex_lock_ops = {
|
||||
.writelock = torture_rtmutex_lock,
|
||||
.write_delay = torture_rtmutex_delay,
|
||||
.task_boost = torture_rtmutex_boost,
|
||||
.task_boost = torture_rt_boost_rtmutex,
|
||||
.writeunlock = torture_rtmutex_unlock,
|
||||
.readlock = NULL,
|
||||
.read_delay = NULL,
|
||||
@ -600,7 +615,7 @@ __releases(torture_rwsem)
|
||||
static struct lock_torture_ops rwsem_lock_ops = {
|
||||
.writelock = torture_rwsem_down_write,
|
||||
.write_delay = torture_rwsem_write_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_rwsem_up_write,
|
||||
.readlock = torture_rwsem_down_read,
|
||||
.read_delay = torture_rwsem_read_delay,
|
||||
@ -652,7 +667,7 @@ static struct lock_torture_ops percpu_rwsem_lock_ops = {
|
||||
.exit = torture_percpu_rwsem_exit,
|
||||
.writelock = torture_percpu_rwsem_down_write,
|
||||
.write_delay = torture_rwsem_write_delay,
|
||||
.task_boost = torture_boost_dummy,
|
||||
.task_boost = torture_rt_boost,
|
||||
.writeunlock = torture_percpu_rwsem_up_write,
|
||||
.readlock = torture_percpu_rwsem_down_read,
|
||||
.read_delay = torture_rwsem_read_delay,
|
||||
|
@ -456,7 +456,6 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
|
||||
|
||||
#ifdef CONFIG_SRCU
|
||||
/*
|
||||
* SRCU notifier chain routines. Registration and unregistration
|
||||
* use a mutex, and call_chain is synchronized by SRCU (no locks).
|
||||
@ -573,8 +572,6 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
|
||||
|
||||
#endif /* CONFIG_SRCU */
|
||||
|
||||
static ATOMIC_NOTIFIER_HEAD(die_chain);
|
||||
|
||||
int notrace notify_die(enum die_val val, const char *str,
|
||||
|
@ -244,7 +244,24 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (pid_ns->pid_allocated == init_pids)
|
||||
break;
|
||||
/*
|
||||
* Release tasks_rcu_exit_srcu to avoid following deadlock:
|
||||
*
|
||||
* 1) TASK A unshare(CLONE_NEWPID)
|
||||
* 2) TASK A fork() twice -> TASK B (child reaper for new ns)
|
||||
* and TASK C
|
||||
* 3) TASK B exits, kills TASK C, waits for TASK A to reap it
|
||||
* 4) TASK A calls synchronize_rcu_tasks()
|
||||
* -> synchronize_srcu(tasks_rcu_exit_srcu)
|
||||
* 5) *DEADLOCK*
|
||||
*
|
||||
* It is considered safe to release tasks_rcu_exit_srcu here
|
||||
* because we assume the current task can not be concurrently
|
||||
* reaped at this point.
|
||||
*/
|
||||
exit_tasks_rcu_stop();
|
||||
schedule();
|
||||
exit_tasks_rcu_start();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
|
@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
|
||||
extern int rcu_cpu_stall_suppress;
|
||||
extern int rcu_cpu_stall_timeout;
|
||||
extern int rcu_exp_cpu_stall_timeout;
|
||||
extern bool rcu_exp_stall_task_details __read_mostly;
|
||||
int rcu_jiffies_till_stall_check(void);
|
||||
int rcu_exp_jiffies_till_stall_check(void);
|
||||
|
||||
@ -447,14 +448,20 @@ do { \
|
||||
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
|
||||
static inline bool rcu_gp_is_normal(void) { return true; }
|
||||
static inline bool rcu_gp_is_expedited(void) { return false; }
|
||||
static inline bool rcu_async_should_hurry(void) { return false; }
|
||||
static inline void rcu_expedite_gp(void) { }
|
||||
static inline void rcu_unexpedite_gp(void) { }
|
||||
static inline void rcu_async_hurry(void) { }
|
||||
static inline void rcu_async_relax(void) { }
|
||||
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
|
||||
#else /* #ifdef CONFIG_TINY_RCU */
|
||||
bool rcu_gp_is_normal(void); /* Internal RCU use. */
|
||||
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
|
||||
bool rcu_async_should_hurry(void); /* Internal RCU use. */
|
||||
void rcu_expedite_gp(void);
|
||||
void rcu_unexpedite_gp(void);
|
||||
void rcu_async_hurry(void);
|
||||
void rcu_async_relax(void);
|
||||
void rcupdate_announce_bootup_oddness(void);
|
||||
#ifdef CONFIG_TASKS_RCU_GENERIC
|
||||
void show_rcu_tasks_gp_kthreads(void);
|
||||
|
@ -89,7 +89,7 @@ static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
|
||||
}
|
||||
|
||||
/* Get the length of a segment of the rcu_segcblist structure. */
|
||||
static long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg)
|
||||
long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg)
|
||||
{
|
||||
return READ_ONCE(rsclp->seglen[seg]);
|
||||
}
|
||||
|
@ -15,6 +15,8 @@ static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
|
||||
return READ_ONCE(rclp->len);
|
||||
}
|
||||
|
||||
long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg);
|
||||
|
||||
/* Return number of callbacks in segmented callback list by summing seglen. */
|
||||
long rcu_segcblist_n_segment_cbs(struct rcu_segcblist *rsclp);
|
||||
|
||||
|
@ -399,7 +399,7 @@ static int torture_readlock_not_held(void)
|
||||
return rcu_read_lock_bh_held() || rcu_read_lock_sched_held();
|
||||
}
|
||||
|
||||
static int rcu_torture_read_lock(void) __acquires(RCU)
|
||||
static int rcu_torture_read_lock(void)
|
||||
{
|
||||
rcu_read_lock();
|
||||
return 0;
|
||||
@ -441,7 +441,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
|
||||
}
|
||||
}
|
||||
|
||||
static void rcu_torture_read_unlock(int idx) __releases(RCU)
|
||||
static void rcu_torture_read_unlock(int idx)
|
||||
{
|
||||
rcu_read_unlock();
|
||||
}
|
||||
@ -625,7 +625,7 @@ static struct srcu_struct srcu_ctld;
|
||||
static struct srcu_struct *srcu_ctlp = &srcu_ctl;
|
||||
static struct rcu_torture_ops srcud_ops;
|
||||
|
||||
static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
|
||||
static int srcu_torture_read_lock(void)
|
||||
{
|
||||
if (cur_ops == &srcud_ops)
|
||||
return srcu_read_lock_nmisafe(srcu_ctlp);
|
||||
@ -652,7 +652,7 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
|
||||
}
|
||||
}
|
||||
|
||||
static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
|
||||
static void srcu_torture_read_unlock(int idx)
|
||||
{
|
||||
if (cur_ops == &srcud_ops)
|
||||
srcu_read_unlock_nmisafe(srcu_ctlp, idx);
|
||||
@ -814,13 +814,13 @@ static void synchronize_rcu_trivial(void)
|
||||
}
|
||||
}
|
||||
|
||||
static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
|
||||
static int rcu_torture_read_lock_trivial(void)
|
||||
{
|
||||
preempt_disable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
|
||||
static void rcu_torture_read_unlock_trivial(int idx)
|
||||
{
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -76,6 +76,8 @@ torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
|
||||
// Wait until there are multiple CPUs before starting test.
|
||||
torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
|
||||
"Holdoff time before test start (s)");
|
||||
// Number of typesafe_lookup structures, that is, the degree of concurrency.
|
||||
torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures.");
|
||||
// Number of loops per experiment, all readers execute operations concurrently.
|
||||
torture_param(long, loops, 10000, "Number of loops per experiment.");
|
||||
// Number of readers, with -1 defaulting to about 75% of the CPUs.
|
||||
@ -124,7 +126,7 @@ static int exp_idx;
|
||||
|
||||
// Operations vector for selecting different types of tests.
|
||||
struct ref_scale_ops {
|
||||
void (*init)(void);
|
||||
bool (*init)(void);
|
||||
void (*cleanup)(void);
|
||||
void (*readsection)(const int nloops);
|
||||
void (*delaysection)(const int nloops, const int udl, const int ndl);
|
||||
@ -162,8 +164,9 @@ static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl
|
||||
}
|
||||
}
|
||||
|
||||
static void rcu_sync_scale_init(void)
|
||||
static bool rcu_sync_scale_init(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct ref_scale_ops rcu_ops = {
|
||||
@ -315,9 +318,10 @@ static struct ref_scale_ops refcnt_ops = {
|
||||
// Definitions for rwlock
|
||||
static rwlock_t test_rwlock;
|
||||
|
||||
static void ref_rwlock_init(void)
|
||||
static bool ref_rwlock_init(void)
|
||||
{
|
||||
rwlock_init(&test_rwlock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void ref_rwlock_section(const int nloops)
|
||||
@ -351,9 +355,10 @@ static struct ref_scale_ops rwlock_ops = {
|
||||
// Definitions for rwsem
|
||||
static struct rw_semaphore test_rwsem;
|
||||
|
||||
static void ref_rwsem_init(void)
|
||||
static bool ref_rwsem_init(void)
|
||||
{
|
||||
init_rwsem(&test_rwsem);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void ref_rwsem_section(const int nloops)
|
||||
@ -523,6 +528,237 @@ static struct ref_scale_ops clock_ops = {
|
||||
.name = "clock"
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Methods leveraging SLAB_TYPESAFE_BY_RCU.
|
||||
//
|
||||
|
||||
// Item to look up in a typesafe manner. Array of pointers to these.
|
||||
struct refscale_typesafe {
|
||||
atomic_t rts_refctr; // Used by all flavors
|
||||
spinlock_t rts_lock;
|
||||
seqlock_t rts_seqlock;
|
||||
unsigned int a;
|
||||
unsigned int b;
|
||||
};
|
||||
|
||||
static struct kmem_cache *typesafe_kmem_cachep;
|
||||
static struct refscale_typesafe **rtsarray;
|
||||
static long rtsarray_size;
|
||||
static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand);
|
||||
static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start);
|
||||
static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start);
|
||||
|
||||
// Conditionally acquire an explicit in-structure reference count.
|
||||
static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
|
||||
{
|
||||
return atomic_inc_not_zero(&rtsp->rts_refctr);
|
||||
}
|
||||
|
||||
// Unconditionally release an explicit in-structure reference count.
|
||||
static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start)
|
||||
{
|
||||
if (!atomic_dec_return(&rtsp->rts_refctr)) {
|
||||
WRITE_ONCE(rtsp->a, rtsp->a + 1);
|
||||
kmem_cache_free(typesafe_kmem_cachep, rtsp);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Unconditionally acquire an explicit in-structure spinlock.
|
||||
static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
|
||||
{
|
||||
spin_lock(&rtsp->rts_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Unconditionally release an explicit in-structure spinlock.
|
||||
static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start)
|
||||
{
|
||||
spin_unlock(&rtsp->rts_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Unconditionally acquire an explicit in-structure sequence lock.
|
||||
static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
|
||||
{
|
||||
*start = read_seqbegin(&rtsp->rts_seqlock);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Conditionally release an explicit in-structure sequence lock. Return
|
||||
// true if this release was successful, that is, if no retry is required.
|
||||
static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start)
|
||||
{
|
||||
return !read_seqretry(&rtsp->rts_seqlock, start);
|
||||
}
|
||||
|
||||
// Do a read-side critical section with the specified delay in
|
||||
// microseconds and nanoseconds inserted so as to increase probability
|
||||
// of failure.
|
||||
static void typesafe_delay_section(const int nloops, const int udl, const int ndl)
|
||||
{
|
||||
unsigned int a;
|
||||
unsigned int b;
|
||||
int i;
|
||||
long idx;
|
||||
struct refscale_typesafe *rtsp;
|
||||
unsigned int start;
|
||||
|
||||
for (i = nloops; i >= 0; i--) {
|
||||
preempt_disable();
|
||||
idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size;
|
||||
preempt_enable();
|
||||
retry:
|
||||
rcu_read_lock();
|
||||
rtsp = rcu_dereference(rtsarray[idx]);
|
||||
a = READ_ONCE(rtsp->a);
|
||||
if (!rts_acquire(rtsp, &start)) {
|
||||
rcu_read_unlock();
|
||||
goto retry;
|
||||
}
|
||||
if (a != READ_ONCE(rtsp->a)) {
|
||||
(void)rts_release(rtsp, start);
|
||||
rcu_read_unlock();
|
||||
goto retry;
|
||||
}
|
||||
un_delay(udl, ndl);
|
||||
// Remember, seqlock read-side release can fail.
|
||||
if (!rts_release(rtsp, start)) {
|
||||
rcu_read_unlock();
|
||||
goto retry;
|
||||
}
|
||||
b = READ_ONCE(rtsp->a);
|
||||
WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b);
|
||||
b = rtsp->b;
|
||||
rcu_read_unlock();
|
||||
WARN_ON_ONCE(a * a != b);
|
||||
}
|
||||
}
|
||||
|
||||
// Because the acquisition and release methods are expensive, there
|
||||
// is no point in optimizing away the un_delay() function's two checks.
|
||||
// Thus simply define typesafe_read_section() as a simple wrapper around
|
||||
// typesafe_delay_section().
|
||||
static void typesafe_read_section(const int nloops)
|
||||
{
|
||||
typesafe_delay_section(nloops, 0, 0);
|
||||
}
|
||||
|
||||
// Allocate and initialize one refscale_typesafe structure.
|
||||
static struct refscale_typesafe *typesafe_alloc_one(void)
|
||||
{
|
||||
struct refscale_typesafe *rtsp;
|
||||
|
||||
rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL);
|
||||
if (!rtsp)
|
||||
return NULL;
|
||||
atomic_set(&rtsp->rts_refctr, 1);
|
||||
WRITE_ONCE(rtsp->a, rtsp->a + 1);
|
||||
WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a);
|
||||
return rtsp;
|
||||
}
|
||||
|
||||
// Slab-allocator constructor for refscale_typesafe structures created
|
||||
// out of a new slab of system memory.
|
||||
static void refscale_typesafe_ctor(void *rtsp_in)
|
||||
{
|
||||
struct refscale_typesafe *rtsp = rtsp_in;
|
||||
|
||||
spin_lock_init(&rtsp->rts_lock);
|
||||
seqlock_init(&rtsp->rts_seqlock);
|
||||
preempt_disable();
|
||||
rtsp->a = torture_random(this_cpu_ptr(&refscale_rand));
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static struct ref_scale_ops typesafe_ref_ops;
|
||||
static struct ref_scale_ops typesafe_lock_ops;
|
||||
static struct ref_scale_ops typesafe_seqlock_ops;
|
||||
|
||||
// Initialize for a typesafe test.
|
||||
static bool typesafe_init(void)
|
||||
{
|
||||
long idx;
|
||||
long si = lookup_instances;
|
||||
|
||||
typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe",
|
||||
sizeof(struct refscale_typesafe), sizeof(void *),
|
||||
SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor);
|
||||
if (!typesafe_kmem_cachep)
|
||||
return false;
|
||||
if (si < 0)
|
||||
si = -si * nr_cpu_ids;
|
||||
else if (si == 0)
|
||||
si = nr_cpu_ids;
|
||||
rtsarray_size = si;
|
||||
rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL);
|
||||
if (!rtsarray)
|
||||
return false;
|
||||
for (idx = 0; idx < rtsarray_size; idx++) {
|
||||
rtsarray[idx] = typesafe_alloc_one();
|
||||
if (!rtsarray[idx])
|
||||
return false;
|
||||
}
|
||||
if (cur_ops == &typesafe_ref_ops) {
|
||||
rts_acquire = typesafe_ref_acquire;
|
||||
rts_release = typesafe_ref_release;
|
||||
} else if (cur_ops == &typesafe_lock_ops) {
|
||||
rts_acquire = typesafe_lock_acquire;
|
||||
rts_release = typesafe_lock_release;
|
||||
} else if (cur_ops == &typesafe_seqlock_ops) {
|
||||
rts_acquire = typesafe_seqlock_acquire;
|
||||
rts_release = typesafe_seqlock_release;
|
||||
} else {
|
||||
WARN_ON_ONCE(1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Clean up after a typesafe test.
|
||||
static void typesafe_cleanup(void)
|
||||
{
|
||||
long idx;
|
||||
|
||||
if (rtsarray) {
|
||||
for (idx = 0; idx < rtsarray_size; idx++)
|
||||
kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]);
|
||||
kfree(rtsarray);
|
||||
rtsarray = NULL;
|
||||
rtsarray_size = 0;
|
||||
}
|
||||
kmem_cache_destroy(typesafe_kmem_cachep);
|
||||
typesafe_kmem_cachep = NULL;
|
||||
rts_acquire = NULL;
|
||||
rts_release = NULL;
|
||||
}
|
||||
|
||||
// The typesafe_init() function distinguishes these structures by address.
|
||||
static struct ref_scale_ops typesafe_ref_ops = {
|
||||
.init = typesafe_init,
|
||||
.cleanup = typesafe_cleanup,
|
||||
.readsection = typesafe_read_section,
|
||||
.delaysection = typesafe_delay_section,
|
||||
.name = "typesafe_ref"
|
||||
};
|
||||
|
||||
static struct ref_scale_ops typesafe_lock_ops = {
|
||||
.init = typesafe_init,
|
||||
.cleanup = typesafe_cleanup,
|
||||
.readsection = typesafe_read_section,
|
||||
.delaysection = typesafe_delay_section,
|
||||
.name = "typesafe_lock"
|
||||
};
|
||||
|
||||
static struct ref_scale_ops typesafe_seqlock_ops = {
|
||||
.init = typesafe_init,
|
||||
.cleanup = typesafe_cleanup,
|
||||
.readsection = typesafe_read_section,
|
||||
.delaysection = typesafe_delay_section,
|
||||
.name = "typesafe_seqlock"
|
||||
};
|
||||
|
||||
static void rcu_scale_one_reader(void)
|
||||
{
|
||||
if (readdelay <= 0)
|
||||
@ -812,6 +1048,7 @@ ref_scale_init(void)
|
||||
static struct ref_scale_ops *scale_ops[] = {
|
||||
&rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
|
||||
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
|
||||
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
|
||||
};
|
||||
|
||||
if (!torture_init_begin(scale_type, verbose))
|
||||
@ -833,7 +1070,10 @@ ref_scale_init(void)
|
||||
goto unwind;
|
||||
}
|
||||
if (cur_ops->init)
|
||||
cur_ops->init();
|
||||
if (!cur_ops->init()) {
|
||||
firsterr = -EUCLEAN;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
ref_scale_print_module_parms(cur_ops, "Start of test");
|
||||
|
||||
|
@ -154,7 +154,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
|
||||
*/
|
||||
static inline bool srcu_invl_snp_seq(unsigned long s)
|
||||
{
|
||||
return rcu_seq_state(s) == SRCU_SNP_INIT_SEQ;
|
||||
return s == SRCU_SNP_INIT_SEQ;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -469,24 +469,59 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
|
||||
|
||||
/*
|
||||
* If the locks are the same as the unlocks, then there must have
|
||||
* been no readers on this index at some time in between. This does
|
||||
* not mean that there are no more readers, as one could have read
|
||||
* the current index but not have incremented the lock counter yet.
|
||||
* been no readers on this index at some point in this function.
|
||||
* But there might be more readers, as a task might have read
|
||||
* the current ->srcu_idx but not yet have incremented its CPU's
|
||||
* ->srcu_lock_count[idx] counter. In fact, it is possible
|
||||
* that most of the tasks have been preempted between fetching
|
||||
* ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there
|
||||
* could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks
|
||||
* in a system whose address space was fully populated with memory.
|
||||
* Call this quantity Nt.
|
||||
*
|
||||
* So suppose that the updater is preempted here for so long
|
||||
* that more than ULONG_MAX non-nested readers come and go in
|
||||
* the meantime. It turns out that this cannot result in overflow
|
||||
* because if a reader modifies its unlock count after we read it
|
||||
* above, then that reader's next load of ->srcu_idx is guaranteed
|
||||
* to get the new value, which will cause it to operate on the
|
||||
* other bank of counters, where it cannot contribute to the
|
||||
* overflow of these counters. This means that there is a maximum
|
||||
* of 2*NR_CPUS increments, which cannot overflow given current
|
||||
* systems, especially not on 64-bit systems.
|
||||
* So suppose that the updater is preempted at this point in the
|
||||
* code for a long time. That now-preempted updater has already
|
||||
* flipped ->srcu_idx (possibly during the preceding grace period),
|
||||
* done an smp_mb() (again, possibly during the preceding grace
|
||||
* period), and summed up the ->srcu_unlock_count[idx] counters.
|
||||
* How many times can a given one of the aforementioned Nt tasks
|
||||
* increment the old ->srcu_idx value's ->srcu_lock_count[idx]
|
||||
* counter, in the absence of nesting?
|
||||
*
|
||||
* OK, how about nesting? This does impose a limit on nesting
|
||||
* of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
|
||||
* especially on 64-bit systems.
|
||||
* It can clearly do so once, given that it has already fetched
|
||||
* the old value of ->srcu_idx and is just about to use that value
|
||||
* to index its increment of ->srcu_lock_count[idx]. But as soon as
|
||||
* it leaves that SRCU read-side critical section, it will increment
|
||||
* ->srcu_unlock_count[idx], which must follow the updater's above
|
||||
* read from that same value. Thus, as soon the reading task does
|
||||
* an smp_mb() and a later fetch from ->srcu_idx, that task will be
|
||||
* guaranteed to get the new index. Except that the increment of
|
||||
* ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the
|
||||
* smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock()
|
||||
* is before the smp_mb(). Thus, that task might not see the new
|
||||
* value of ->srcu_idx until the -second- __srcu_read_lock(),
|
||||
* which in turn means that this task might well increment
|
||||
* ->srcu_lock_count[idx] for the old value of ->srcu_idx twice,
|
||||
* not just once.
|
||||
*
|
||||
* However, it is important to note that a given smp_mb() takes
|
||||
* effect not just for the task executing it, but also for any
|
||||
* later task running on that same CPU.
|
||||
*
|
||||
* That is, there can be almost Nt + Nc further increments of
|
||||
* ->srcu_lock_count[idx] for the old index, where Nc is the number
|
||||
* of CPUs. But this is OK because the size of the task_struct
|
||||
* structure limits the value of Nt and current systems limit Nc
|
||||
* to a few thousand.
|
||||
*
|
||||
* OK, but what about nesting? This does impose a limit on
|
||||
* nesting of half of the size of the task_struct structure
|
||||
* (measured in bytes), which should be sufficient. A late 2022
|
||||
* TREE01 rcutorture run reported this size to be no less than
|
||||
* 9408 bytes, allowing up to 4704 levels of nesting, which is
|
||||
* comfortably beyond excessive. Especially on 64-bit systems,
|
||||
* which are unlikely to be configured with an address space fully
|
||||
* populated with memory, at least not anytime soon.
|
||||
*/
|
||||
return srcu_readers_lock_idx(ssp, idx) == unlocks;
|
||||
}
|
||||
@ -726,7 +761,7 @@ static void srcu_gp_start(struct srcu_struct *ssp)
|
||||
int state;
|
||||
|
||||
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
|
||||
sdp = per_cpu_ptr(ssp->sda, 0);
|
||||
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
|
||||
else
|
||||
sdp = this_cpu_ptr(ssp->sda);
|
||||
lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
|
||||
@ -837,7 +872,8 @@ static void srcu_gp_end(struct srcu_struct *ssp)
|
||||
/* Initiate callback invocation as needed. */
|
||||
ss_state = smp_load_acquire(&ssp->srcu_size_state);
|
||||
if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
|
||||
srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, 0), cbdelay);
|
||||
srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()),
|
||||
cbdelay);
|
||||
} else {
|
||||
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
|
||||
srcu_for_each_node_breadth_first(ssp, snp) {
|
||||
@ -914,7 +950,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
|
||||
if (snp)
|
||||
for (; snp != NULL; snp = snp->srcu_parent) {
|
||||
sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
|
||||
if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
|
||||
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) ||
|
||||
(!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
|
||||
return;
|
||||
spin_lock_irqsave_rcu_node(snp, flags);
|
||||
@ -941,6 +977,9 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
|
||||
*
|
||||
* Note that this function also does the work of srcu_funnel_exp_start(),
|
||||
* in some cases by directly invoking it.
|
||||
*
|
||||
* The srcu read lock should be hold around this function. And s is a seq snap
|
||||
* after holding that lock.
|
||||
*/
|
||||
static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
|
||||
unsigned long s, bool do_norm)
|
||||
@ -961,7 +1000,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
|
||||
if (snp_leaf)
|
||||
/* Each pass through the loop does one level of the srcu_node tree. */
|
||||
for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
|
||||
if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != snp_leaf)
|
||||
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && snp != snp_leaf)
|
||||
return; /* GP already done and CBs recorded. */
|
||||
spin_lock_irqsave_rcu_node(snp, flags);
|
||||
snp_seq = snp->srcu_have_cbs[idx];
|
||||
@ -998,8 +1037,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
|
||||
if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
|
||||
WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
|
||||
|
||||
/* If grace period not already done and none in progress, start it. */
|
||||
if (!rcu_seq_done(&ssp->srcu_gp_seq, s) &&
|
||||
/* If grace period not already in progress, start it. */
|
||||
if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) &&
|
||||
rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
|
||||
WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
|
||||
srcu_gp_start(ssp);
|
||||
@ -1059,10 +1098,11 @@ static void srcu_flip(struct srcu_struct *ssp)
|
||||
|
||||
/*
|
||||
* Ensure that if the updater misses an __srcu_read_unlock()
|
||||
* increment, that task's next __srcu_read_lock() will see the
|
||||
* above counter update. Note that both this memory barrier
|
||||
* and the one in srcu_readers_active_idx_check() provide the
|
||||
* guarantee for __srcu_read_lock().
|
||||
* increment, that task's __srcu_read_lock() following its next
|
||||
* __srcu_read_lock() or __srcu_read_unlock() will see the above
|
||||
* counter update. Note that both this memory barrier and the
|
||||
* one in srcu_readers_active_idx_check() provide the guarantee
|
||||
* for __srcu_read_lock().
|
||||
*/
|
||||
smp_mb(); /* D */ /* Pairs with C. */
|
||||
}
|
||||
@ -1161,7 +1201,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
|
||||
idx = __srcu_read_lock_nmisafe(ssp);
|
||||
ss_state = smp_load_acquire(&ssp->srcu_size_state);
|
||||
if (ss_state < SRCU_SIZE_WAIT_CALL)
|
||||
sdp = per_cpu_ptr(ssp->sda, 0);
|
||||
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
|
||||
else
|
||||
sdp = raw_cpu_ptr(ssp->sda);
|
||||
spin_lock_irqsave_sdp_contention(sdp, &flags);
|
||||
@ -1497,7 +1537,7 @@ void srcu_barrier(struct srcu_struct *ssp)
|
||||
|
||||
idx = __srcu_read_lock_nmisafe(ssp);
|
||||
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
|
||||
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
|
||||
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id()));
|
||||
else
|
||||
for_each_possible_cpu(cpu)
|
||||
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
|
||||
|
@ -384,6 +384,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
bool gpdone = poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq);
|
||||
long n;
|
||||
long ncbs = 0;
|
||||
long ncbsnz = 0;
|
||||
@ -425,21 +426,23 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
|
||||
smp_store_release(&rtp->percpu_enqueue_lim, 1);
|
||||
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
|
||||
gpdone = false;
|
||||
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
|
||||
}
|
||||
if (rcu_task_cb_adjust && !ncbsnz &&
|
||||
poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq)) {
|
||||
if (rcu_task_cb_adjust && !ncbsnz && gpdone) {
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) {
|
||||
WRITE_ONCE(rtp->percpu_dequeue_lim, 1);
|
||||
pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name);
|
||||
}
|
||||
for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) {
|
||||
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
|
||||
if (rtp->percpu_dequeue_lim == 1) {
|
||||
for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) {
|
||||
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
|
||||
|
||||
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
|
||||
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
|
||||
}
|
||||
@ -560,8 +563,9 @@ static int __noreturn rcu_tasks_kthread(void *arg)
|
||||
static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
|
||||
{
|
||||
/* Complain if the scheduler has not started. */
|
||||
WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
|
||||
"synchronize_rcu_tasks called too soon");
|
||||
if (WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
|
||||
"synchronize_%s() called too soon", rtp->name))
|
||||
return;
|
||||
|
||||
// If the grace-period kthread is running, use it.
|
||||
if (READ_ONCE(rtp->kthread_ptr)) {
|
||||
@ -827,11 +831,21 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
|
||||
static void rcu_tasks_postscan(struct list_head *hop)
|
||||
{
|
||||
/*
|
||||
* Wait for tasks that are in the process of exiting. This
|
||||
* does only part of the job, ensuring that all tasks that were
|
||||
* previously exiting reach the point where they have disabled
|
||||
* preemption, allowing the later synchronize_rcu() to finish
|
||||
* the job.
|
||||
* Exiting tasks may escape the tasklist scan. Those are vulnerable
|
||||
* until their final schedule() with TASK_DEAD state. To cope with
|
||||
* this, divide the fragile exit path part in two intersecting
|
||||
* read side critical sections:
|
||||
*
|
||||
* 1) An _SRCU_ read side starting before calling exit_notify(),
|
||||
* which may remove the task from the tasklist, and ending after
|
||||
* the final preempt_disable() call in do_exit().
|
||||
*
|
||||
* 2) An _RCU_ read side starting with the final preempt_disable()
|
||||
* call in do_exit() and ending with the final call to schedule()
|
||||
* with TASK_DEAD state.
|
||||
*
|
||||
* This handles the part 1). And postgp will handle part 2) with a
|
||||
* call to synchronize_rcu().
|
||||
*/
|
||||
synchronize_srcu(&tasks_rcu_exit_srcu);
|
||||
}
|
||||
@ -898,7 +912,10 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
|
||||
*
|
||||
* In addition, this synchronize_rcu() waits for exiting tasks
|
||||
* to complete their final preempt_disable() region of execution,
|
||||
* cleaning up after the synchronize_srcu() above.
|
||||
* cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu),
|
||||
* enforcing the whole region before tasklist removal until
|
||||
* the final schedule() with TASK_DEAD state to be an RCU TASKS
|
||||
* read side critical section.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
}
|
||||
@ -988,27 +1005,42 @@ void show_rcu_tasks_classic_gp_kthread(void)
|
||||
EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
|
||||
#endif // !defined(CONFIG_TINY_RCU)
|
||||
|
||||
/* Do the srcu_read_lock() for the above synchronize_srcu(). */
|
||||
/*
|
||||
* Contribute to protect against tasklist scan blind spot while the
|
||||
* task is exiting and may be removed from the tasklist. See
|
||||
* corresponding synchronize_srcu() for further details.
|
||||
*/
|
||||
void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
|
||||
{
|
||||
preempt_disable();
|
||||
current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
|
||||
void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
|
||||
/*
|
||||
* Contribute to protect against tasklist scan blind spot while the
|
||||
* task is exiting and may be removed from the tasklist. See
|
||||
* corresponding synchronize_srcu() for further details.
|
||||
*/
|
||||
void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
|
||||
preempt_disable();
|
||||
__srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
|
||||
preempt_enable();
|
||||
exit_tasks_rcu_finish_trace(t);
|
||||
}
|
||||
|
||||
/*
|
||||
* Contribute to protect against tasklist scan blind spot while the
|
||||
* task is exiting and may be removed from the tasklist. See
|
||||
* corresponding synchronize_srcu() for further details.
|
||||
*/
|
||||
void exit_tasks_rcu_finish(void)
|
||||
{
|
||||
exit_tasks_rcu_stop();
|
||||
exit_tasks_rcu_finish_trace(current);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_TASKS_RCU */
|
||||
void exit_tasks_rcu_start(void) { }
|
||||
void exit_tasks_rcu_stop(void) { }
|
||||
void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
@ -1036,9 +1068,6 @@ static void rcu_tasks_be_rude(struct work_struct *work)
|
||||
// Wait for one rude RCU-tasks grace period.
|
||||
static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
|
||||
{
|
||||
if (num_online_cpus() <= 1)
|
||||
return; // Fastpath for only one CPU.
|
||||
|
||||
rtp->n_ipis += cpumask_weight(cpu_online_mask);
|
||||
schedule_on_each_cpu(rcu_tasks_be_rude);
|
||||
}
|
||||
@ -1815,23 +1844,21 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
|
||||
|
||||
static void rcu_tasks_initiate_self_tests(void)
|
||||
{
|
||||
unsigned long j = jiffies;
|
||||
|
||||
pr_info("Running RCU-tasks wait API self tests\n");
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
tests[0].runstart = j;
|
||||
tests[0].runstart = jiffies;
|
||||
synchronize_rcu_tasks();
|
||||
call_rcu_tasks(&tests[0].rh, test_rcu_tasks_callback);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TASKS_RUDE_RCU
|
||||
tests[1].runstart = j;
|
||||
tests[1].runstart = jiffies;
|
||||
synchronize_rcu_tasks_rude();
|
||||
call_rcu_tasks_rude(&tests[1].rh, test_rcu_tasks_callback);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
tests[2].runstart = j;
|
||||
tests[2].runstart = jiffies;
|
||||
synchronize_rcu_tasks_trace();
|
||||
call_rcu_tasks_trace(&tests[2].rh, test_rcu_tasks_callback);
|
||||
#endif
|
||||
|
@ -246,15 +246,12 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
|
||||
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
|
||||
{
|
||||
if (head) {
|
||||
void *ptr = (void *) head - (unsigned long) func;
|
||||
|
||||
if (head)
|
||||
kasan_record_aux_stack_noalloc(ptr);
|
||||
}
|
||||
|
||||
__kvfree_call_rcu(head, func);
|
||||
__kvfree_call_rcu(head, ptr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
|
||||
#endif
|
||||
|
@ -144,14 +144,16 @@ static int rcu_scheduler_fully_active __read_mostly;
|
||||
|
||||
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
|
||||
unsigned long gps, unsigned long flags);
|
||||
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
|
||||
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
|
||||
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
|
||||
static void invoke_rcu_core(void);
|
||||
static void rcu_report_exp_rdp(struct rcu_data *rdp);
|
||||
static void sync_sched_exp_online_cleanup(int cpu);
|
||||
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
|
||||
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
|
||||
static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
|
||||
static bool rcu_init_invoked(void);
|
||||
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
|
||||
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
|
||||
|
||||
/*
|
||||
* rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
|
||||
@ -214,27 +216,6 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
|
||||
*/
|
||||
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */
|
||||
|
||||
/*
|
||||
* Compute the mask of online CPUs for the specified rcu_node structure.
|
||||
* This will not be stable unless the rcu_node structure's ->lock is
|
||||
* held, but the bit corresponding to the current CPU will be stable
|
||||
* in most contexts.
|
||||
*/
|
||||
static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
|
||||
{
|
||||
return READ_ONCE(rnp->qsmaskinitnext);
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the CPU corresponding to the specified rcu_data structure online
|
||||
* from RCU's perspective? This perspective is given by that structure's
|
||||
* ->qsmaskinitnext field rather than by the global cpu_online_mask.
|
||||
*/
|
||||
static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
|
||||
{
|
||||
return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if an RCU grace period is in progress. The READ_ONCE()s
|
||||
* permit this function to be invoked without holding the root rcu_node
|
||||
@ -734,46 +715,6 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
|
||||
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
|
||||
|
||||
/*
|
||||
* Is the current CPU online as far as RCU is concerned?
|
||||
*
|
||||
* Disable preemption to avoid false positives that could otherwise
|
||||
* happen due to the current CPU number being sampled, this task being
|
||||
* preempted, its old CPU being taken offline, resuming on some other CPU,
|
||||
* then determining that its old CPU is now offline.
|
||||
*
|
||||
* Disable checking if in an NMI handler because we cannot safely
|
||||
* report errors from NMI handlers anyway. In addition, it is OK to use
|
||||
* RCU on an offline processor during initial boot, hence the check for
|
||||
* rcu_scheduler_fully_active.
|
||||
*/
|
||||
bool rcu_lockdep_current_cpu_online(void)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
bool ret = false;
|
||||
|
||||
if (in_nmi() || !rcu_scheduler_fully_active)
|
||||
return true;
|
||||
preempt_disable_notrace();
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
/*
|
||||
* Strictly, we care here about the case where the current CPU is
|
||||
* in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
|
||||
* not being up to date. So arch_spin_is_locked() might have a
|
||||
* false positive if it's held by some *other* CPU, but that's
|
||||
* OK because that just means a false *negative* on the warning.
|
||||
*/
|
||||
if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
|
||||
ret = true;
|
||||
preempt_enable_notrace();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
|
||||
|
||||
#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
|
||||
|
||||
/*
|
||||
* When trying to report a quiescent state on behalf of some other CPU,
|
||||
* it is our responsibility to check for and handle potential overflow
|
||||
@ -1350,13 +1291,6 @@ static void rcu_strict_gp_boundary(void *unused)
|
||||
invoke_rcu_core();
|
||||
}
|
||||
|
||||
// Has rcu_init() been invoked? This is used (for example) to determine
|
||||
// whether spinlocks may be acquired safely.
|
||||
static bool rcu_init_invoked(void)
|
||||
{
|
||||
return !!rcu_state.n_online_cpus;
|
||||
}
|
||||
|
||||
// Make the polled API aware of the beginning of a grace period.
|
||||
static void rcu_poll_gp_seq_start(unsigned long *snap)
|
||||
{
|
||||
@ -2091,92 +2025,6 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
|
||||
rcu_report_qs_rdp(rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Near the end of the offline process. Trace the fact that this CPU
|
||||
* is going offline.
|
||||
*/
|
||||
int rcutree_dying_cpu(unsigned int cpu)
|
||||
{
|
||||
bool blkd;
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
||||
return 0;
|
||||
|
||||
blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
|
||||
trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
|
||||
blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* All CPUs for the specified rcu_node structure have gone offline,
|
||||
* and all tasks that were preempted within an RCU read-side critical
|
||||
* section while running on one of those CPUs have since exited their RCU
|
||||
* read-side critical section. Some other CPU is reporting this fact with
|
||||
* the specified rcu_node structure's ->lock held and interrupts disabled.
|
||||
* This function therefore goes up the tree of rcu_node structures,
|
||||
* clearing the corresponding bits in the ->qsmaskinit fields. Note that
|
||||
* the leaf rcu_node structure's ->qsmaskinit field has already been
|
||||
* updated.
|
||||
*
|
||||
* This function does check that the specified rcu_node structure has
|
||||
* all CPUs offline and no blocked tasks, so it is OK to invoke it
|
||||
* prematurely. That said, invoking it after the fact will cost you
|
||||
* a needless lock acquisition. So once it has done its work, don't
|
||||
* invoke it again.
|
||||
*/
|
||||
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
|
||||
{
|
||||
long mask;
|
||||
struct rcu_node *rnp = rnp_leaf;
|
||||
|
||||
raw_lockdep_assert_held_rcu_node(rnp_leaf);
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
|
||||
WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
|
||||
WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
|
||||
return;
|
||||
for (;;) {
|
||||
mask = rnp->grpmask;
|
||||
rnp = rnp->parent;
|
||||
if (!rnp)
|
||||
break;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
rnp->qsmaskinit &= ~mask;
|
||||
/* Between grace periods, so better already be zero! */
|
||||
WARN_ON_ONCE(rnp->qsmask);
|
||||
if (rnp->qsmaskinit) {
|
||||
raw_spin_unlock_rcu_node(rnp);
|
||||
/* irqs remain disabled. */
|
||||
return;
|
||||
}
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The CPU has been completely removed, and some other CPU is reporting
|
||||
* this fact from process context. Do the remainder of the cleanup.
|
||||
* There can only be one CPU hotplug operation at a time, so no need for
|
||||
* explicit locking.
|
||||
*/
|
||||
int rcutree_dead_cpu(unsigned int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
||||
return 0;
|
||||
|
||||
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
|
||||
/* Adjust any no-longer-needed kthreads. */
|
||||
rcu_boost_kthread_setaffinity(rnp, -1);
|
||||
// Stop-machine done, so allow nohz_full to disable tick.
|
||||
tick_dep_clear(TICK_DEP_BIT_RCU);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke any RCU callbacks that have made it to the end of their grace
|
||||
* period. Throttle as specified by rdp->blimit.
|
||||
@ -2209,7 +2057,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
*/
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
|
||||
pending = rcu_segcblist_n_cbs(&rdp->cblist);
|
||||
pending = rcu_segcblist_get_seglen(&rdp->cblist, RCU_DONE_TAIL);
|
||||
div = READ_ONCE(rcu_divisor);
|
||||
div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
|
||||
bl = max(rdp->blimit, pending >> div);
|
||||
@ -2727,10 +2575,11 @@ static void check_cb_ovld(struct rcu_data *rdp)
|
||||
}
|
||||
|
||||
static void
|
||||
__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
|
||||
__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
|
||||
{
|
||||
static atomic_t doublefrees;
|
||||
unsigned long flags;
|
||||
bool lazy;
|
||||
struct rcu_data *rdp;
|
||||
bool was_alldone;
|
||||
|
||||
@ -2755,6 +2604,7 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
|
||||
kasan_record_aux_stack_noalloc(head);
|
||||
local_irq_save(flags);
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
lazy = lazy_in && !rcu_async_should_hurry();
|
||||
|
||||
/* Add the callback to our list. */
|
||||
if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {
|
||||
@ -2876,13 +2726,15 @@ EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/**
|
||||
* struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers
|
||||
* @list: List node. All blocks are linked between each other
|
||||
* @gp_snap: Snapshot of RCU state for objects placed to this bulk
|
||||
* @nr_records: Number of active pointers in the array
|
||||
* @next: Next bulk object in the block chain
|
||||
* @records: Array of the kvfree_rcu() pointers
|
||||
*/
|
||||
struct kvfree_rcu_bulk_data {
|
||||
struct list_head list;
|
||||
unsigned long gp_snap;
|
||||
unsigned long nr_records;
|
||||
struct kvfree_rcu_bulk_data *next;
|
||||
void *records[];
|
||||
};
|
||||
|
||||
@ -2898,26 +2750,28 @@ struct kvfree_rcu_bulk_data {
|
||||
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
|
||||
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
|
||||
* @head_free: List of kfree_rcu() objects waiting for a grace period
|
||||
* @bkvhead_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
|
||||
* @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
|
||||
* @krcp: Pointer to @kfree_rcu_cpu structure
|
||||
*/
|
||||
|
||||
struct kfree_rcu_cpu_work {
|
||||
struct rcu_work rcu_work;
|
||||
struct rcu_head *head_free;
|
||||
struct kvfree_rcu_bulk_data *bkvhead_free[FREE_N_CHANNELS];
|
||||
struct list_head bulk_head_free[FREE_N_CHANNELS];
|
||||
struct kfree_rcu_cpu *krcp;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
|
||||
* @head: List of kfree_rcu() objects not yet waiting for a grace period
|
||||
* @bkvhead: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
|
||||
* @head_gp_snap: Snapshot of RCU state for objects placed to "@head"
|
||||
* @bulk_head: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
|
||||
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
|
||||
* @lock: Synchronize access to this structure
|
||||
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
|
||||
* @initialized: The @rcu_work fields have been initialized
|
||||
* @count: Number of objects for which GP not started
|
||||
* @head_count: Number of objects in rcu_head singular list
|
||||
* @bulk_count: Number of objects in bulk-list
|
||||
* @bkvcache:
|
||||
* A simple cache list that contains objects for reuse purpose.
|
||||
* In order to save some per-cpu space the list is singular.
|
||||
@ -2935,13 +2789,20 @@ struct kfree_rcu_cpu_work {
|
||||
* the interactions with the slab allocators.
|
||||
*/
|
||||
struct kfree_rcu_cpu {
|
||||
// Objects queued on a linked list
|
||||
// through their rcu_head structures.
|
||||
struct rcu_head *head;
|
||||
struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS];
|
||||
unsigned long head_gp_snap;
|
||||
atomic_t head_count;
|
||||
|
||||
// Objects queued on a bulk-list.
|
||||
struct list_head bulk_head[FREE_N_CHANNELS];
|
||||
atomic_t bulk_count[FREE_N_CHANNELS];
|
||||
|
||||
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
|
||||
raw_spinlock_t lock;
|
||||
struct delayed_work monitor_work;
|
||||
bool initialized;
|
||||
int count;
|
||||
|
||||
struct delayed_work page_cache_work;
|
||||
atomic_t backoff_page_cache_fill;
|
||||
@ -3029,82 +2890,51 @@ drain_page_cache(struct kfree_rcu_cpu *krcp)
|
||||
return freed;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked in workqueue context after a grace period.
|
||||
* It frees all the objects queued on ->bkvhead_free or ->head_free.
|
||||
*/
|
||||
static void kfree_rcu_work(struct work_struct *work)
|
||||
static void
|
||||
kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp,
|
||||
struct kvfree_rcu_bulk_data *bnode, int idx)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS], *bnext;
|
||||
struct rcu_head *head, *next;
|
||||
struct kfree_rcu_cpu *krcp;
|
||||
struct kfree_rcu_cpu_work *krwp;
|
||||
int i, j;
|
||||
int i;
|
||||
|
||||
krwp = container_of(to_rcu_work(work),
|
||||
struct kfree_rcu_cpu_work, rcu_work);
|
||||
krcp = krwp->krcp;
|
||||
debug_rcu_bhead_unqueue(bnode);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
// Channels 1 and 2.
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++) {
|
||||
bkvhead[i] = krwp->bkvhead_free[i];
|
||||
krwp->bkvhead_free[i] = NULL;
|
||||
}
|
||||
rcu_lock_acquire(&rcu_callback_map);
|
||||
if (idx == 0) { // kmalloc() / kfree().
|
||||
trace_rcu_invoke_kfree_bulk_callback(
|
||||
rcu_state.name, bnode->nr_records,
|
||||
bnode->records);
|
||||
|
||||
// Channel 3.
|
||||
head = krwp->head_free;
|
||||
krwp->head_free = NULL;
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
kfree_bulk(bnode->nr_records, bnode->records);
|
||||
} else { // vmalloc() / vfree().
|
||||
for (i = 0; i < bnode->nr_records; i++) {
|
||||
trace_rcu_invoke_kvfree_callback(
|
||||
rcu_state.name, bnode->records[i], 0);
|
||||
|
||||
// Handle the first two channels.
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++) {
|
||||
for (; bkvhead[i]; bkvhead[i] = bnext) {
|
||||
bnext = bkvhead[i]->next;
|
||||
debug_rcu_bhead_unqueue(bkvhead[i]);
|
||||
|
||||
rcu_lock_acquire(&rcu_callback_map);
|
||||
if (i == 0) { // kmalloc() / kfree().
|
||||
trace_rcu_invoke_kfree_bulk_callback(
|
||||
rcu_state.name, bkvhead[i]->nr_records,
|
||||
bkvhead[i]->records);
|
||||
|
||||
kfree_bulk(bkvhead[i]->nr_records,
|
||||
bkvhead[i]->records);
|
||||
} else { // vmalloc() / vfree().
|
||||
for (j = 0; j < bkvhead[i]->nr_records; j++) {
|
||||
trace_rcu_invoke_kvfree_callback(
|
||||
rcu_state.name,
|
||||
bkvhead[i]->records[j], 0);
|
||||
|
||||
vfree(bkvhead[i]->records[j]);
|
||||
}
|
||||
}
|
||||
rcu_lock_release(&rcu_callback_map);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
if (put_cached_bnode(krcp, bkvhead[i]))
|
||||
bkvhead[i] = NULL;
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
if (bkvhead[i])
|
||||
free_page((unsigned long) bkvhead[i]);
|
||||
|
||||
cond_resched_tasks_rcu_qs();
|
||||
vfree(bnode->records[i]);
|
||||
}
|
||||
}
|
||||
rcu_lock_release(&rcu_callback_map);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
if (put_cached_bnode(krcp, bnode))
|
||||
bnode = NULL;
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
if (bnode)
|
||||
free_page((unsigned long) bnode);
|
||||
|
||||
cond_resched_tasks_rcu_qs();
|
||||
}
|
||||
|
||||
static void
|
||||
kvfree_rcu_list(struct rcu_head *head)
|
||||
{
|
||||
struct rcu_head *next;
|
||||
|
||||
/*
|
||||
* This is used when the "bulk" path can not be used for the
|
||||
* double-argument of kvfree_rcu(). This happens when the
|
||||
* page-cache is empty, which means that objects are instead
|
||||
* queued on a linked list through their rcu_head structures.
|
||||
* This list is named "Channel 3".
|
||||
*/
|
||||
for (; head; head = next) {
|
||||
unsigned long offset = (unsigned long)head->func;
|
||||
void *ptr = (void *)head - offset;
|
||||
void *ptr = (void *) head->func;
|
||||
unsigned long offset = (void *) head - ptr;
|
||||
|
||||
next = head->next;
|
||||
debug_rcu_head_unqueue((struct rcu_head *)ptr);
|
||||
@ -3119,16 +2949,72 @@ static void kfree_rcu_work(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked in workqueue context after a grace period.
|
||||
* It frees all the objects queued on ->bulk_head_free or ->head_free.
|
||||
*/
|
||||
static void kfree_rcu_work(struct work_struct *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kvfree_rcu_bulk_data *bnode, *n;
|
||||
struct list_head bulk_head[FREE_N_CHANNELS];
|
||||
struct rcu_head *head;
|
||||
struct kfree_rcu_cpu *krcp;
|
||||
struct kfree_rcu_cpu_work *krwp;
|
||||
int i;
|
||||
|
||||
krwp = container_of(to_rcu_work(work),
|
||||
struct kfree_rcu_cpu_work, rcu_work);
|
||||
krcp = krwp->krcp;
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
// Channels 1 and 2.
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++)
|
||||
list_replace_init(&krwp->bulk_head_free[i], &bulk_head[i]);
|
||||
|
||||
// Channel 3.
|
||||
head = krwp->head_free;
|
||||
krwp->head_free = NULL;
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
// Handle the first two channels.
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++) {
|
||||
// Start from the tail page, so a GP is likely passed for it.
|
||||
list_for_each_entry_safe(bnode, n, &bulk_head[i], list)
|
||||
kvfree_rcu_bulk(krcp, bnode, i);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is used when the "bulk" path can not be used for the
|
||||
* double-argument of kvfree_rcu(). This happens when the
|
||||
* page-cache is empty, which means that objects are instead
|
||||
* queued on a linked list through their rcu_head structures.
|
||||
* This list is named "Channel 3".
|
||||
*/
|
||||
kvfree_rcu_list(head);
|
||||
}
|
||||
|
||||
static bool
|
||||
need_offload_krc(struct kfree_rcu_cpu *krcp)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++)
|
||||
if (krcp->bkvhead[i])
|
||||
if (!list_empty(&krcp->bulk_head[i]))
|
||||
return true;
|
||||
|
||||
return !!krcp->head;
|
||||
return !!READ_ONCE(krcp->head);
|
||||
}
|
||||
|
||||
static int krc_count(struct kfree_rcu_cpu *krcp)
|
||||
{
|
||||
int sum = atomic_read(&krcp->head_count);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++)
|
||||
sum += atomic_read(&krcp->bulk_count[i]);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -3136,7 +3022,7 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
|
||||
{
|
||||
long delay, delay_left;
|
||||
|
||||
delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
|
||||
delay = krc_count(krcp) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
|
||||
if (delayed_work_pending(&krcp->monitor_work)) {
|
||||
delay_left = krcp->monitor_work.timer.expires - jiffies;
|
||||
if (delay < delay_left)
|
||||
@ -3146,6 +3032,44 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
|
||||
queue_delayed_work(system_wq, &krcp->monitor_work, delay);
|
||||
}
|
||||
|
||||
static void
|
||||
kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
|
||||
{
|
||||
struct list_head bulk_ready[FREE_N_CHANNELS];
|
||||
struct kvfree_rcu_bulk_data *bnode, *n;
|
||||
struct rcu_head *head_ready = NULL;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++) {
|
||||
INIT_LIST_HEAD(&bulk_ready[i]);
|
||||
|
||||
list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) {
|
||||
if (!poll_state_synchronize_rcu(bnode->gp_snap))
|
||||
break;
|
||||
|
||||
atomic_sub(bnode->nr_records, &krcp->bulk_count[i]);
|
||||
list_move(&bnode->list, &bulk_ready[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (krcp->head && poll_state_synchronize_rcu(krcp->head_gp_snap)) {
|
||||
head_ready = krcp->head;
|
||||
atomic_set(&krcp->head_count, 0);
|
||||
WRITE_ONCE(krcp->head, NULL);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++) {
|
||||
list_for_each_entry_safe(bnode, n, &bulk_ready[i], list)
|
||||
kvfree_rcu_bulk(krcp, bnode, i);
|
||||
}
|
||||
|
||||
if (head_ready)
|
||||
kvfree_rcu_list(head_ready);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
|
||||
*/
|
||||
@ -3156,26 +3080,31 @@ static void kfree_rcu_monitor(struct work_struct *work)
|
||||
unsigned long flags;
|
||||
int i, j;
|
||||
|
||||
// Drain ready for reclaim.
|
||||
kvfree_rcu_drain_ready(krcp);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
|
||||
// Attempt to start a new batch.
|
||||
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||
struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
|
||||
|
||||
// Try to detach bkvhead or head and attach it over any
|
||||
// Try to detach bulk_head or head and attach it over any
|
||||
// available corresponding free channel. It can be that
|
||||
// a previous RCU batch is in progress, it means that
|
||||
// immediately to queue another one is not possible so
|
||||
// in that case the monitor work is rearmed.
|
||||
if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||
|
||||
(krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||
|
||||
(krcp->head && !krwp->head_free)) {
|
||||
if ((!list_empty(&krcp->bulk_head[0]) && list_empty(&krwp->bulk_head_free[0])) ||
|
||||
(!list_empty(&krcp->bulk_head[1]) && list_empty(&krwp->bulk_head_free[1])) ||
|
||||
(READ_ONCE(krcp->head) && !krwp->head_free)) {
|
||||
|
||||
// Channel 1 corresponds to the SLAB-pointer bulk path.
|
||||
// Channel 2 corresponds to vmalloc-pointer bulk path.
|
||||
for (j = 0; j < FREE_N_CHANNELS; j++) {
|
||||
if (!krwp->bkvhead_free[j]) {
|
||||
krwp->bkvhead_free[j] = krcp->bkvhead[j];
|
||||
krcp->bkvhead[j] = NULL;
|
||||
if (list_empty(&krwp->bulk_head_free[j])) {
|
||||
atomic_set(&krcp->bulk_count[j], 0);
|
||||
list_replace_init(&krcp->bulk_head[j],
|
||||
&krwp->bulk_head_free[j]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3183,11 +3112,10 @@ static void kfree_rcu_monitor(struct work_struct *work)
|
||||
// objects queued on the linked list.
|
||||
if (!krwp->head_free) {
|
||||
krwp->head_free = krcp->head;
|
||||
krcp->head = NULL;
|
||||
atomic_set(&krcp->head_count, 0);
|
||||
WRITE_ONCE(krcp->head, NULL);
|
||||
}
|
||||
|
||||
WRITE_ONCE(krcp->count, 0);
|
||||
|
||||
// One work is per one batch, so there are three
|
||||
// "free channels", the batch can handle. It can
|
||||
// be that the work is in the pending state when
|
||||
@ -3197,6 +3125,8 @@ static void kfree_rcu_monitor(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
// If there is nothing to detach, it means that our job is
|
||||
// successfully done here. In case of having at least one
|
||||
// of the channels that is still busy we should rearm the
|
||||
@ -3204,8 +3134,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
|
||||
// still in progress.
|
||||
if (need_offload_krc(krcp))
|
||||
schedule_delayed_monitor_work(krcp);
|
||||
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart
|
||||
@ -3288,10 +3216,11 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
|
||||
return false;
|
||||
|
||||
idx = !!is_vmalloc_addr(ptr);
|
||||
bnode = list_first_entry_or_null(&(*krcp)->bulk_head[idx],
|
||||
struct kvfree_rcu_bulk_data, list);
|
||||
|
||||
/* Check if a new block is required. */
|
||||
if (!(*krcp)->bkvhead[idx] ||
|
||||
(*krcp)->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
|
||||
if (!bnode || bnode->nr_records == KVFREE_BULK_MAX_ENTR) {
|
||||
bnode = get_cached_bnode(*krcp);
|
||||
if (!bnode && can_alloc) {
|
||||
krc_this_cpu_unlock(*krcp, *flags);
|
||||
@ -3315,17 +3244,15 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
|
||||
if (!bnode)
|
||||
return false;
|
||||
|
||||
/* Initialize the new block. */
|
||||
// Initialize the new block and attach it.
|
||||
bnode->nr_records = 0;
|
||||
bnode->next = (*krcp)->bkvhead[idx];
|
||||
|
||||
/* Attach it to the head. */
|
||||
(*krcp)->bkvhead[idx] = bnode;
|
||||
list_add(&bnode->list, &(*krcp)->bulk_head[idx]);
|
||||
}
|
||||
|
||||
/* Finally insert. */
|
||||
(*krcp)->bkvhead[idx]->records
|
||||
[(*krcp)->bkvhead[idx]->nr_records++] = ptr;
|
||||
// Finally insert and update the GP for this page.
|
||||
bnode->records[bnode->nr_records++] = ptr;
|
||||
bnode->gp_snap = get_state_synchronize_rcu();
|
||||
atomic_inc(&(*krcp)->bulk_count[idx]);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -3342,26 +3269,21 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
|
||||
* be free'd in workqueue context. This allows us to: batch requests together to
|
||||
* reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load.
|
||||
*/
|
||||
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kfree_rcu_cpu *krcp;
|
||||
bool success;
|
||||
void *ptr;
|
||||
|
||||
if (head) {
|
||||
ptr = (void *) head - (unsigned long) func;
|
||||
} else {
|
||||
/*
|
||||
* Please note there is a limitation for the head-less
|
||||
* variant, that is why there is a clear rule for such
|
||||
* objects: it can be used from might_sleep() context
|
||||
* only. For other places please embed an rcu_head to
|
||||
* your data.
|
||||
*/
|
||||
/*
|
||||
* Please note there is a limitation for the head-less
|
||||
* variant, that is why there is a clear rule for such
|
||||
* objects: it can be used from might_sleep() context
|
||||
* only. For other places please embed an rcu_head to
|
||||
* your data.
|
||||
*/
|
||||
if (!head)
|
||||
might_sleep();
|
||||
ptr = (unsigned long *) func;
|
||||
}
|
||||
|
||||
// Queue the object but don't yet schedule the batch.
|
||||
if (debug_rcu_head_queue(ptr)) {
|
||||
@ -3382,14 +3304,16 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
// Inline if kvfree_rcu(one_arg) call.
|
||||
goto unlock_return;
|
||||
|
||||
head->func = func;
|
||||
head->func = ptr;
|
||||
head->next = krcp->head;
|
||||
krcp->head = head;
|
||||
WRITE_ONCE(krcp->head, head);
|
||||
atomic_inc(&krcp->head_count);
|
||||
|
||||
// Take a snapshot for this krcp.
|
||||
krcp->head_gp_snap = get_state_synchronize_rcu();
|
||||
success = true;
|
||||
}
|
||||
|
||||
WRITE_ONCE(krcp->count, krcp->count + 1);
|
||||
|
||||
// Set timer to drain after KFREE_DRAIN_JIFFIES.
|
||||
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
|
||||
schedule_delayed_monitor_work(krcp);
|
||||
@ -3420,7 +3344,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
count += READ_ONCE(krcp->count);
|
||||
count += krc_count(krcp);
|
||||
count += READ_ONCE(krcp->nr_bkv_objs);
|
||||
atomic_set(&krcp->backoff_page_cache_fill, 1);
|
||||
}
|
||||
@ -3437,7 +3361,7 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||
int count;
|
||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
count = krcp->count;
|
||||
count = krc_count(krcp);
|
||||
count += drain_page_cache(krcp);
|
||||
kfree_rcu_monitor(&krcp->monitor_work.work);
|
||||
|
||||
@ -3461,15 +3385,12 @@ static struct shrinker kfree_rcu_shrinker = {
|
||||
void __init kfree_rcu_scheduler_running(void)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
if (need_offload_krc(krcp))
|
||||
schedule_delayed_monitor_work(krcp);
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3485,9 +3406,10 @@ void __init kfree_rcu_scheduler_running(void)
|
||||
*/
|
||||
static int rcu_blocking_is_gp(void)
|
||||
{
|
||||
if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
|
||||
if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE) {
|
||||
might_sleep();
|
||||
return false;
|
||||
might_sleep(); /* Check for RCU read-side critical section. */
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3711,7 +3633,9 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full);
|
||||
* If @false is returned, it is the caller's responsibility to invoke this
|
||||
* function later on until it does return @true. Alternatively, the caller
|
||||
* can explicitly wait for a grace period, for example, by passing @oldstate
|
||||
* to cond_synchronize_rcu() or by directly invoking synchronize_rcu().
|
||||
* to either cond_synchronize_rcu() or cond_synchronize_rcu_expedited()
|
||||
* on the one hand or by directly invoking either synchronize_rcu() or
|
||||
* synchronize_rcu_expedited() on the other.
|
||||
*
|
||||
* Yes, this function does not take counter wrap into account.
|
||||
* But counter wrap is harmless. If the counter wraps, we have waited for
|
||||
@ -3722,6 +3646,12 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full);
|
||||
* completed. Alternatively, they can use get_completed_synchronize_rcu()
|
||||
* to get a guaranteed-completed grace-period state.
|
||||
*
|
||||
* In addition, because oldstate compresses the grace-period state for
|
||||
* both normal and expedited grace periods into a single unsigned long,
|
||||
* it can miss a grace period when synchronize_rcu() runs concurrently
|
||||
* with synchronize_rcu_expedited(). If this is unacceptable, please
|
||||
* instead use the _full() variant of these polling APIs.
|
||||
*
|
||||
* This function provides the same memory-ordering guarantees that
|
||||
* would be provided by a synchronize_rcu() that was invoked at the call
|
||||
* to the function that provided @oldstate, and that returned at the end
|
||||
@ -4079,6 +4009,155 @@ retry:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Compute the mask of online CPUs for the specified rcu_node structure.
|
||||
* This will not be stable unless the rcu_node structure's ->lock is
|
||||
* held, but the bit corresponding to the current CPU will be stable
|
||||
* in most contexts.
|
||||
*/
|
||||
static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
|
||||
{
|
||||
return READ_ONCE(rnp->qsmaskinitnext);
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the CPU corresponding to the specified rcu_data structure online
|
||||
* from RCU's perspective? This perspective is given by that structure's
|
||||
* ->qsmaskinitnext field rather than by the global cpu_online_mask.
|
||||
*/
|
||||
static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
|
||||
{
|
||||
return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
|
||||
|
||||
/*
|
||||
* Is the current CPU online as far as RCU is concerned?
|
||||
*
|
||||
* Disable preemption to avoid false positives that could otherwise
|
||||
* happen due to the current CPU number being sampled, this task being
|
||||
* preempted, its old CPU being taken offline, resuming on some other CPU,
|
||||
* then determining that its old CPU is now offline.
|
||||
*
|
||||
* Disable checking if in an NMI handler because we cannot safely
|
||||
* report errors from NMI handlers anyway. In addition, it is OK to use
|
||||
* RCU on an offline processor during initial boot, hence the check for
|
||||
* rcu_scheduler_fully_active.
|
||||
*/
|
||||
bool rcu_lockdep_current_cpu_online(void)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
bool ret = false;
|
||||
|
||||
if (in_nmi() || !rcu_scheduler_fully_active)
|
||||
return true;
|
||||
preempt_disable_notrace();
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
/*
|
||||
* Strictly, we care here about the case where the current CPU is
|
||||
* in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
|
||||
* not being up to date. So arch_spin_is_locked() might have a
|
||||
* false positive if it's held by some *other* CPU, but that's
|
||||
* OK because that just means a false *negative* on the warning.
|
||||
*/
|
||||
if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
|
||||
ret = true;
|
||||
preempt_enable_notrace();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
|
||||
|
||||
#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
|
||||
|
||||
// Has rcu_init() been invoked? This is used (for example) to determine
|
||||
// whether spinlocks may be acquired safely.
|
||||
static bool rcu_init_invoked(void)
|
||||
{
|
||||
return !!rcu_state.n_online_cpus;
|
||||
}
|
||||
|
||||
/*
|
||||
* Near the end of the offline process. Trace the fact that this CPU
|
||||
* is going offline.
|
||||
*/
|
||||
int rcutree_dying_cpu(unsigned int cpu)
|
||||
{
|
||||
bool blkd;
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
||||
return 0;
|
||||
|
||||
blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
|
||||
trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
|
||||
blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* All CPUs for the specified rcu_node structure have gone offline,
|
||||
* and all tasks that were preempted within an RCU read-side critical
|
||||
* section while running on one of those CPUs have since exited their RCU
|
||||
* read-side critical section. Some other CPU is reporting this fact with
|
||||
* the specified rcu_node structure's ->lock held and interrupts disabled.
|
||||
* This function therefore goes up the tree of rcu_node structures,
|
||||
* clearing the corresponding bits in the ->qsmaskinit fields. Note that
|
||||
* the leaf rcu_node structure's ->qsmaskinit field has already been
|
||||
* updated.
|
||||
*
|
||||
* This function does check that the specified rcu_node structure has
|
||||
* all CPUs offline and no blocked tasks, so it is OK to invoke it
|
||||
* prematurely. That said, invoking it after the fact will cost you
|
||||
* a needless lock acquisition. So once it has done its work, don't
|
||||
* invoke it again.
|
||||
*/
|
||||
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
|
||||
{
|
||||
long mask;
|
||||
struct rcu_node *rnp = rnp_leaf;
|
||||
|
||||
raw_lockdep_assert_held_rcu_node(rnp_leaf);
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
|
||||
WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
|
||||
WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
|
||||
return;
|
||||
for (;;) {
|
||||
mask = rnp->grpmask;
|
||||
rnp = rnp->parent;
|
||||
if (!rnp)
|
||||
break;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
rnp->qsmaskinit &= ~mask;
|
||||
/* Between grace periods, so better already be zero! */
|
||||
WARN_ON_ONCE(rnp->qsmask);
|
||||
if (rnp->qsmaskinit) {
|
||||
raw_spin_unlock_rcu_node(rnp);
|
||||
/* irqs remain disabled. */
|
||||
return;
|
||||
}
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The CPU has been completely removed, and some other CPU is reporting
|
||||
* this fact from process context. Do the remainder of the cleanup.
|
||||
* There can only be one CPU hotplug operation at a time, so no need for
|
||||
* explicit locking.
|
||||
*/
|
||||
int rcutree_dead_cpu(unsigned int cpu)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
||||
return 0;
|
||||
|
||||
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
|
||||
// Stop-machine done, so allow nohz_full to disable tick.
|
||||
tick_dep_clear(TICK_DEP_BIT_RCU);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Propagate ->qsinitmask bits up the rcu_node tree to account for the
|
||||
* first CPU in a given leaf rcu_node structure coming online. The caller
|
||||
@ -4408,11 +4487,13 @@ static int rcu_pm_notify(struct notifier_block *self,
|
||||
switch (action) {
|
||||
case PM_HIBERNATION_PREPARE:
|
||||
case PM_SUSPEND_PREPARE:
|
||||
rcu_async_hurry();
|
||||
rcu_expedite_gp();
|
||||
break;
|
||||
case PM_POST_HIBERNATION:
|
||||
case PM_POST_SUSPEND:
|
||||
rcu_unexpedite_gp();
|
||||
rcu_async_relax();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -4766,7 +4847,7 @@ struct workqueue_struct *rcu_gp_wq;
|
||||
static void __init kfree_rcu_batch_init(void)
|
||||
{
|
||||
int cpu;
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
/* Clamp it to [0:100] seconds interval. */
|
||||
if (rcu_delay_page_cache_fill_msec < 0 ||
|
||||
@ -4786,8 +4867,14 @@ static void __init kfree_rcu_batch_init(void)
|
||||
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||
INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
|
||||
krcp->krw_arr[i].krcp = krcp;
|
||||
|
||||
for (j = 0; j < FREE_N_CHANNELS; j++)
|
||||
INIT_LIST_HEAD(&krcp->krw_arr[i].bulk_head_free[j]);
|
||||
}
|
||||
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++)
|
||||
INIT_LIST_HEAD(&krcp->bulk_head[i]);
|
||||
|
||||
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
|
||||
INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
|
||||
krcp->initialized = true;
|
||||
@ -4838,6 +4925,8 @@ void __init rcu_init(void)
|
||||
// Kick-start any polled grace periods that started early.
|
||||
if (!(per_cpu_ptr(&rcu_data, cpu)->mynode->exp_seq_poll_rq & 0x1))
|
||||
(void)start_poll_synchronize_rcu_expedited();
|
||||
|
||||
rcu_test_sync_prims();
|
||||
}
|
||||
|
||||
#include "tree_stall.h"
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
static void rcu_exp_handler(void *unused);
|
||||
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
|
||||
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp);
|
||||
|
||||
/*
|
||||
* Record the start of an expedited grace period.
|
||||
@ -667,8 +668,11 @@ static void synchronize_rcu_expedited_wait(void)
|
||||
mask = leaf_node_cpu_bit(rnp, cpu);
|
||||
if (!(READ_ONCE(rnp->expmask) & mask))
|
||||
continue;
|
||||
preempt_disable(); // For smp_processor_id() in dump_cpu_task().
|
||||
dump_cpu_task(cpu);
|
||||
preempt_enable();
|
||||
}
|
||||
rcu_exp_print_detail_task_stall_rnp(rnp);
|
||||
}
|
||||
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
|
||||
panic_on_rcu_stall();
|
||||
@ -811,6 +815,36 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
|
||||
return ndetected;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan the current list of tasks blocked within RCU read-side critical
|
||||
* sections, dumping the stack of each that is blocking the current
|
||||
* expedited grace period.
|
||||
*/
|
||||
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_struct *t;
|
||||
|
||||
if (!rcu_exp_stall_task_details)
|
||||
return;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (!READ_ONCE(rnp->exp_tasks)) {
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
t = list_entry(rnp->exp_tasks->prev,
|
||||
struct task_struct, rcu_node_entry);
|
||||
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
|
||||
/*
|
||||
* We could be printing a lot while holding a spinlock.
|
||||
* Avoid triggering hard lockup.
|
||||
*/
|
||||
touch_nmi_watchdog();
|
||||
sched_show_task(t);
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
|
||||
/* Request an expedited quiescent state. */
|
||||
@ -883,6 +917,15 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, we never have to print out
|
||||
* tasks blocked within RCU read-side critical sections that are blocking
|
||||
* the current expedited grace period.
|
||||
*/
|
||||
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
|
||||
|
||||
/**
|
||||
|
@ -144,8 +144,45 @@ bool rcu_gp_is_normal(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
|
||||
|
||||
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
|
||||
static atomic_t rcu_async_hurry_nesting = ATOMIC_INIT(1);
|
||||
/*
|
||||
* Should call_rcu() callbacks be processed with urgency or are
|
||||
* they OK being executed with arbitrary delays?
|
||||
*/
|
||||
bool rcu_async_should_hurry(void)
|
||||
{
|
||||
return !IS_ENABLED(CONFIG_RCU_LAZY) ||
|
||||
atomic_read(&rcu_async_hurry_nesting);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_async_should_hurry);
|
||||
|
||||
/**
|
||||
* rcu_async_hurry - Make future async RCU callbacks not lazy.
|
||||
*
|
||||
* After a call to this function, future calls to call_rcu()
|
||||
* will be processed in a timely fashion.
|
||||
*/
|
||||
void rcu_async_hurry(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_RCU_LAZY))
|
||||
atomic_inc(&rcu_async_hurry_nesting);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_async_hurry);
|
||||
|
||||
/**
|
||||
* rcu_async_relax - Make future async RCU callbacks lazy.
|
||||
*
|
||||
* After a call to this function, future calls to call_rcu()
|
||||
* will be processed in a lazy fashion.
|
||||
*/
|
||||
void rcu_async_relax(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_RCU_LAZY))
|
||||
atomic_dec(&rcu_async_hurry_nesting);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_async_relax);
|
||||
|
||||
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
|
||||
/*
|
||||
* Should normal grace-period primitives be expedited? Intended for
|
||||
* use within RCU. Note that this function takes the rcu_expedited
|
||||
@ -195,6 +232,7 @@ static bool rcu_boot_ended __read_mostly;
|
||||
void rcu_end_inkernel_boot(void)
|
||||
{
|
||||
rcu_unexpedite_gp();
|
||||
rcu_async_relax();
|
||||
if (rcu_normal_after_boot)
|
||||
WRITE_ONCE(rcu_normal, 1);
|
||||
rcu_boot_ended = true;
|
||||
@ -220,6 +258,7 @@ void rcu_test_sync_prims(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_PROVE_RCU))
|
||||
return;
|
||||
pr_info("Running RCU synchronous self tests\n");
|
||||
synchronize_rcu();
|
||||
synchronize_rcu_expedited();
|
||||
}
|
||||
@ -508,6 +547,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
|
||||
module_param(rcu_cpu_stall_timeout, int, 0644);
|
||||
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
|
||||
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
|
||||
bool rcu_exp_stall_task_details __read_mostly;
|
||||
module_param(rcu_exp_stall_task_details, bool, 0644);
|
||||
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
|
||||
|
||||
// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
|
||||
@ -555,9 +596,12 @@ struct early_boot_kfree_rcu {
|
||||
static void early_boot_test_call_rcu(void)
|
||||
{
|
||||
static struct rcu_head head;
|
||||
int idx;
|
||||
static struct rcu_head shead;
|
||||
struct early_boot_kfree_rcu *rhp;
|
||||
|
||||
idx = srcu_down_read(&early_srcu);
|
||||
srcu_up_read(&early_srcu, idx);
|
||||
call_rcu(&head, test_callback);
|
||||
early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
|
||||
call_srcu(&early_srcu, &shead, test_callback);
|
||||
@ -586,6 +630,7 @@ static int rcu_verify_early_boot_tests(void)
|
||||
early_boot_test_counter++;
|
||||
srcu_barrier(&early_srcu);
|
||||
WARN_ON_ONCE(!poll_state_synchronize_srcu(&early_srcu, early_srcu_cookie));
|
||||
cleanup_srcu_struct(&early_srcu);
|
||||
}
|
||||
if (rcu_self_test_counter != early_boot_test_counter) {
|
||||
WARN_ON(1);
|
||||
|
@ -450,7 +450,7 @@ unsigned long
|
||||
torture_random(struct torture_random_state *trsp)
|
||||
{
|
||||
if (--trsp->trs_count < 0) {
|
||||
trsp->trs_state += (unsigned long)local_clock();
|
||||
trsp->trs_state += (unsigned long)local_clock() + raw_smp_processor_id();
|
||||
trsp->trs_count = TORTURE_RANDOM_REFRESH;
|
||||
}
|
||||
trsp->trs_state = trsp->trs_state * TORTURE_RANDOM_MULT +
|
||||
@ -915,7 +915,7 @@ void torture_kthread_stopping(char *title)
|
||||
VERBOSE_TOROUT_STRING(buf);
|
||||
while (!kthread_should_stop()) {
|
||||
torture_shutdown_absorb(title);
|
||||
schedule_timeout_uninterruptible(1);
|
||||
schedule_timeout_uninterruptible(HZ / 20);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(torture_kthread_stopping);
|
||||
|
@ -10,10 +10,9 @@
|
||||
T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
|
||||
trap 'rm -rf $T' 0
|
||||
|
||||
cat $1 > $T/.config
|
||||
sed -e 's/"//g' < $1 > $T/.config
|
||||
|
||||
cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
|
||||
grep -v '^CONFIG_INITRAMFS_SOURCE' |
|
||||
sed -e 's/"//g' -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' < $2 |
|
||||
awk '
|
||||
{
|
||||
print "if grep -q \"" $0 "\" < '"$T/.config"'";
|
||||
|
@ -10,7 +10,7 @@
|
||||
#
|
||||
# Authors: Paul E. McKenney <paulmck@kernel.org>
|
||||
|
||||
egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
|
||||
grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
|
||||
grep -v 'ODEBUG: ' |
|
||||
grep -v 'This means that this is a DEBUG kernel and it is' |
|
||||
grep -v 'Warning: unable to open an initial console' |
|
||||
|
@ -44,10 +44,10 @@ fi
|
||||
ncpus="`getconf _NPROCESSORS_ONLN`"
|
||||
make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
|
||||
retval=$?
|
||||
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
|
||||
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out
|
||||
then
|
||||
echo Kernel build error
|
||||
egrep "Stop|Error|error:|warning:" < $resdir/Make.out
|
||||
grep -E "Stop|Error|error:|warning:" < $resdir/Make.out
|
||||
echo Run aborted.
|
||||
exit 3
|
||||
fi
|
||||
|
@ -32,11 +32,11 @@ for i in ${rundir}/*/Make.out
|
||||
do
|
||||
scenariodir="`dirname $i`"
|
||||
scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
|
||||
if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
|
||||
if grep -E -q "error:|warning:|^ld: .*undefined reference to" < $i
|
||||
then
|
||||
egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
|
||||
grep -E "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
|
||||
files="$files $i.diags $i"
|
||||
elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
|
||||
elif ! test -f ${scenariobasedir}/vmlinux && ! test -f ${scenariobasedir}/vmlinux.xz && ! test -f "${rundir}/re-run"
|
||||
then
|
||||
echo No ${scenariobasedir}/vmlinux file > $i.diags
|
||||
files="$files $i.diags $i"
|
||||
|
@ -186,7 +186,7 @@ do
|
||||
fi
|
||||
;;
|
||||
--kconfig|--kconfigs)
|
||||
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
|
||||
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
|
||||
TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
|
||||
shift
|
||||
;;
|
||||
@ -585,7 +585,7 @@ awk < $T/cfgcpu.pack \
|
||||
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
|
||||
|
||||
# Extract the tests and their batches from the script.
|
||||
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
|
||||
grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
|
||||
sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
|
||||
awk '
|
||||
/^----Start/ {
|
||||
@ -622,7 +622,7 @@ then
|
||||
elif test "$dryrun" = sched
|
||||
then
|
||||
# Extract the test run schedule from the script.
|
||||
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
|
||||
grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
|
||||
sed -e 's/:.*$//' -e 's/^echo //'
|
||||
nbuilds="`grep 'Starting build\.' $T/script |
|
||||
grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' |
|
||||
|
@ -65,7 +65,7 @@ then
|
||||
fi
|
||||
|
||||
grep --binary-files=text 'torture:.*ver:' $file |
|
||||
egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
|
||||
grep -E --binary-files=text -v '\(null\)|rtc: 000000000* ' |
|
||||
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
|
||||
sed -e 's/^.*ver: //' |
|
||||
awk '
|
||||
@ -128,17 +128,17 @@ then
|
||||
then
|
||||
summary="$summary Badness: $n_badness"
|
||||
fi
|
||||
n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | egrep -c 'WARNING:|Warn'`
|
||||
n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | grep -E -c 'WARNING:|Warn'`
|
||||
if test "$n_warn" -ne 0
|
||||
then
|
||||
summary="$summary Warnings: $n_warn"
|
||||
fi
|
||||
n_bugs=`egrep -c '\bBUG|Oops:' $file`
|
||||
n_bugs=`grep -E -c '\bBUG|Oops:' $file`
|
||||
if test "$n_bugs" -ne 0
|
||||
then
|
||||
summary="$summary Bugs: $n_bugs"
|
||||
fi
|
||||
n_kcsan=`egrep -c 'BUG: KCSAN: ' $file`
|
||||
n_kcsan=`grep -E -c 'BUG: KCSAN: ' $file`
|
||||
if test "$n_kcsan" -ne 0
|
||||
then
|
||||
if test "$n_bugs" = "$n_kcsan"
|
||||
@ -158,7 +158,7 @@ then
|
||||
then
|
||||
summary="$summary lockdep: $n_badness"
|
||||
fi
|
||||
n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
|
||||
n_stalls=`grep -E -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
|
||||
if test "$n_stalls" -ne 0
|
||||
then
|
||||
summary="$summary Stalls: $n_stalls"
|
||||
|
Loading…
Reference in New Issue
Block a user