diff --git a/arch/Kconfig b/arch/Kconfig index 8f138e580d1a..6b95244c3057 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM config ARCH_HAVE_NMI_SAFE_CMPXCHG bool +config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS + bool + config HAVE_ALIGNED_STRUCT_PAGE bool help diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 505c8a1ccbe0..099ee812f3f1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -31,6 +31,7 @@ config ARM64 select ARCH_HAS_KCOV select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 903096bd87f8..386adde2feff 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -10,6 +10,7 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 318fce77601d..0acdfda33290 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -73,6 +73,7 @@ config S390 select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MEM_ENCRYPT + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_MEMORY diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6d1879ef933a..bcb3190eaa26 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -81,6 +81,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 6995c8979230..ac5ff0783b4f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -312,7 +312,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) * Ensure that all tasks observe the host state change before the * host_failed change. */ - call_rcu(&scmd->rcu, scsi_eh_inc_host_failed); + call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed); } /** diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 32f259fa5801..381b92d146c7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -416,7 +416,7 @@ static __always_inline void guest_context_enter_irqoff(void) */ if (!context_tracking_guest_enter()) { instrumentation_begin(); - rcu_virt_note_context_switch(smp_processor_id()); + rcu_virt_note_context_switch(); instrumentation_end(); } } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 08605ce7379d..4da98ca6273e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_RCU_LAZY +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func); +#else +static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + call_rcu(head, func); +} +#endif + /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active; @@ -340,6 +349,11 @@ static inline int rcu_read_lock_any_held(void) return !preemptible(); } +static inline int debug_lockdep_rcu_enabled(void) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 768196a5f39d..5c271bf3a1e7 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -142,12 +142,10 @@ static inline int rcu_needs_cpu(void) * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. */ -static inline void rcu_virt_note_context_switch(int cpu) { } +static inline void rcu_virt_note_context_switch(void) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } static inline void rcu_irq_exit_check_preempt(void) { } -#define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5efb51486e8a..4003bf6cfa1c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void); * wrapper around rcu_note_context_switch(), which allows TINY_RCU * to save a few bytes. The caller must have disabled interrupts. */ -static inline void rcu_virt_note_context_switch(int cpu) +static inline void rcu_virt_note_context_switch(void) { rcu_note_context_switch(false); } @@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); -bool rcu_is_idle_cpu(int cpu); - #ifdef CONFIG_PROVE_RCU void rcu_irq_exit_check_preempt(void); #else diff --git a/include/linux/slab.h b/include/linux/slab.h index 90877fcde70b..487418c7ea8c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -76,6 +76,17 @@ * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * + * Note that it is not possible to acquire a lock within a structure + * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference + * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages + * are not zeroed before being given to the slab, which means that any + * locks must be initialized after each and every kmem_struct_alloc(). + * Alternatively, make the ctor passed to kmem_cache_create() initialize + * the locks at page-allocation time, as is done in __i915_request_ctor(), + * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers + * to safely acquire those ctor-initialized locks under rcu_read_lock() + * protection. + * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a..f0814ffca34b 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -64,6 +64,20 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); +#ifdef CONFIG_NEED_SRCU_NMI_SAFE +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); +#else +static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + return __srcu_read_lock(ssp); +} +static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + __srcu_read_unlock(ssp, idx); +} +#endif /* CONFIG_NEED_SRCU_NMI_SAFE */ + #ifdef CONFIG_SRCU void srcu_init(void); #else /* #ifdef CONFIG_SRCU */ @@ -104,6 +118,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#define SRCU_NMI_UNKNOWN 0x0 +#define SRCU_NMI_UNSAFE 0x1 +#define SRCU_NMI_SAFE 0x2 + +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) +void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe); +#else +static inline void srcu_check_nmi_safety(struct srcu_struct *ssp, + bool nmi_safe) { } +#endif + + /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing @@ -161,17 +187,36 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); rcu_lock_acquire(&(ssp)->dep_map); return retval; } +/** + * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but in an NMI-safe manner. + * See srcu_read_lock() for more information. + */ +static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + srcu_check_nmi_safety(ssp, true); + retval = __srcu_read_lock_nmisafe(ssp); + rcu_lock_acquire(&(ssp)->dep_map); + return retval; +} + /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); return retval; } @@ -187,14 +232,32 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, false); rcu_lock_release(&(ssp)->dep_map); __srcu_read_unlock(ssp, idx); } +/** + * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section, but in an NMI-safe manner. + */ +static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, true); + rcu_lock_release(&(ssp)->dep_map); + __srcu_read_unlock_nmisafe(ssp, idx); +} + /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { + srcu_check_nmi_safety(ssp, false); __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index e3014319d1ad..c689a81752c9 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -23,8 +23,9 @@ struct srcu_struct; */ struct srcu_data { /* Read-side state. */ - unsigned long srcu_lock_count[2]; /* Locks per CPU. */ - unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ + atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ + atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ + int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index d471d22a5e21..05106e6fbd0e 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -72,6 +72,9 @@ config TREE_SRCU help This option selects the full-fledged version of SRCU. +config NEED_SRCU_NMI_SAFE + def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU + config TASKS_RCU_GENERIC def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU select SRCU @@ -311,4 +314,12 @@ config TASKS_TRACE_RCU_READ_MB Say N here if you hate read-side memory barriers. Take the default if you are unsure. +config RCU_LAZY + bool "RCU callback lazy invocation functionality" + depends on RCU_NOCB_CPU + default n + help + To save power, batch RCU callbacks and flush after delay, memory + pressure, or callback list growing too big. + endmenu # "RCU Subsystem" diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index be5979da07f5..65704cbc9df7 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -474,6 +474,14 @@ enum rcutorture_type { INVALID_RCU_FLAVOR }; +#if defined(CONFIG_RCU_LAZY) +unsigned long rcu_lazy_get_jiffies_till_flush(void); +void rcu_lazy_set_jiffies_till_flush(unsigned long j); +#else +static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; } +static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { } +#endif + #if defined(CONFIG_TREE_RCU) void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, unsigned long *gp_seq); diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 3ef02d4a8108..91fb5905a008 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -95,6 +95,7 @@ torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?"); torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate."); +torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?"); static char *scale_type = "rcu"; module_param(scale_type, charp, 0444); @@ -175,7 +176,7 @@ static struct rcu_scale_ops rcu_ops = { .get_gp_seq = rcu_get_gp_seq, .gp_diff = rcu_seq_diff, .exp_completed = rcu_exp_batches_completed, - .async = call_rcu, + .async = call_rcu_hurry, .gp_barrier = rcu_barrier, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, @@ -659,6 +660,14 @@ struct kfree_obj { struct rcu_head rh; }; +/* Used if doing RCU-kfree'ing via call_rcu(). */ +static void kfree_call_rcu(struct rcu_head *rh) +{ + struct kfree_obj *obj = container_of(rh, struct kfree_obj, rh); + + kfree(obj); +} + static int kfree_scale_thread(void *arg) { @@ -696,6 +705,11 @@ kfree_scale_thread(void *arg) if (!alloc_ptr) return -ENOMEM; + if (kfree_by_call_rcu) { + call_rcu(&(alloc_ptr->rh), kfree_call_rcu); + continue; + } + // By default kfree_rcu_test_single and kfree_rcu_test_double are // initialized to false. If both have the same value (false or true) // both are randomly tested, otherwise only the one with value true @@ -767,11 +781,58 @@ kfree_scale_shutdown(void *arg) return -EINVAL; } +// Used if doing RCU-kfree'ing via call_rcu(). +static unsigned long jiffies_at_lazy_cb; +static struct rcu_head lazy_test1_rh; +static int rcu_lazy_test1_cb_called; +static void call_rcu_lazy_test1(struct rcu_head *rh) +{ + jiffies_at_lazy_cb = jiffies; + WRITE_ONCE(rcu_lazy_test1_cb_called, 1); +} + static int __init kfree_scale_init(void) { - long i; int firsterr = 0; + long i; + unsigned long jif_start; + unsigned long orig_jif; + + // Also, do a quick self-test to ensure laziness is as much as + // expected. + if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) { + pr_alert("CONFIG_RCU_LAZY is disabled, falling back to kfree_rcu() for delayed RCU kfree'ing\n"); + kfree_by_call_rcu = 0; + } + + if (kfree_by_call_rcu) { + /* do a test to check the timeout. */ + orig_jif = rcu_lazy_get_jiffies_till_flush(); + + rcu_lazy_set_jiffies_till_flush(2 * HZ); + rcu_barrier(); + + jif_start = jiffies; + jiffies_at_lazy_cb = 0; + call_rcu(&lazy_test1_rh, call_rcu_lazy_test1); + + smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1); + + rcu_lazy_set_jiffies_till_flush(orig_jif); + + if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) { + pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n"); + WARN_ON_ONCE(1); + return -1; + } + + if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) { + pr_alert("ERROR: call_rcu() CBs are being too lazy!\n"); + WARN_ON_ONCE(1); + return -1; + } + } kfree_nrealthreads = compute_real(kfree_nthreads); /* Start up the kthreads. */ @@ -784,7 +845,9 @@ kfree_scale_init(void) schedule_timeout_uninterruptible(1); } - pr_alert("kfree object size=%zu\n", kfree_mult * sizeof(struct kfree_obj)); + pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n", + kfree_mult * sizeof(struct kfree_obj), + kfree_by_call_rcu); kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]), GFP_KERNEL); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 503c2aa845a4..634df26a2c27 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -357,6 +357,10 @@ struct rcu_torture_ops { bool (*poll_gp_state_exp)(unsigned long oldstate); void (*cond_sync_exp)(unsigned long oldstate); void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp); + unsigned long (*get_comp_state)(void); + void (*get_comp_state_full)(struct rcu_gp_oldstate *rgosp); + bool (*same_gp_state)(unsigned long oldstate1, unsigned long oldstate2); + bool (*same_gp_state_full)(struct rcu_gp_oldstate *rgosp1, struct rcu_gp_oldstate *rgosp2); unsigned long (*get_gp_state)(void); void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*get_gp_completed)(void); @@ -510,7 +514,7 @@ static unsigned long rcu_no_completed(void) static void rcu_torture_deferred_free(struct rcu_torture *p) { - call_rcu(&p->rtort_rcu, rcu_torture_cb); + call_rcu_hurry(&p->rtort_rcu, rcu_torture_cb); } static void rcu_sync_torture_init(void) @@ -535,6 +539,10 @@ static struct rcu_torture_ops rcu_ops = { .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, + .same_gp_state = same_state_synchronize_rcu, + .same_gp_state_full = same_state_synchronize_rcu_full, + .get_comp_state = get_completed_synchronize_rcu, + .get_comp_state_full = get_completed_synchronize_rcu_full, .get_gp_state = get_state_synchronize_rcu, .get_gp_state_full = get_state_synchronize_rcu_full, .get_gp_completed = get_completed_synchronize_rcu, @@ -551,7 +559,7 @@ static struct rcu_torture_ops rcu_ops = { .start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full, .poll_gp_state_exp = poll_state_synchronize_rcu, .cond_sync_exp = cond_synchronize_rcu_expedited, - .call = call_rcu, + .call = call_rcu_hurry, .cb_barrier = rcu_barrier, .fqs = rcu_force_quiescent_state, .stats = NULL, @@ -615,10 +623,14 @@ static struct rcu_torture_ops rcu_busted_ops = { DEFINE_STATIC_SRCU(srcu_ctl); static struct srcu_struct srcu_ctld; static struct srcu_struct *srcu_ctlp = &srcu_ctl; +static struct rcu_torture_ops srcud_ops; static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) { - return srcu_read_lock(srcu_ctlp); + if (cur_ops == &srcud_ops) + return srcu_read_lock_nmisafe(srcu_ctlp); + else + return srcu_read_lock(srcu_ctlp); } static void @@ -642,7 +654,10 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) { - srcu_read_unlock(srcu_ctlp, idx); + if (cur_ops == &srcud_ops) + srcu_read_unlock_nmisafe(srcu_ctlp, idx); + else + srcu_read_unlock(srcu_ctlp, idx); } static int torture_srcu_read_lock_held(void) @@ -848,7 +863,7 @@ static void rcu_tasks_torture_deferred_free(struct rcu_torture *p) static void synchronize_rcu_mult_test(void) { - synchronize_rcu_mult(call_rcu_tasks, call_rcu); + synchronize_rcu_mult(call_rcu_tasks, call_rcu_hurry); } static struct rcu_torture_ops tasks_ops = { @@ -1258,13 +1273,15 @@ static void rcu_torture_write_types(void) } else if (gp_normal && !cur_ops->deferred_free) { pr_alert("%s: gp_normal without primitives.\n", __func__); } - if (gp_poll1 && cur_ops->start_gp_poll && cur_ops->poll_gp_state) { + if (gp_poll1 && cur_ops->get_comp_state && cur_ops->same_gp_state && + cur_ops->start_gp_poll && cur_ops->poll_gp_state) { synctype[nsynctypes++] = RTWS_POLL_GET; pr_info("%s: Testing polling GPs.\n", __func__); } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) { pr_alert("%s: gp_poll without primitives.\n", __func__); } - if (gp_poll_full1 && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) { + if (gp_poll_full1 && cur_ops->get_comp_state_full && cur_ops->same_gp_state_full + && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) { synctype[nsynctypes++] = RTWS_POLL_GET_FULL; pr_info("%s: Testing polling full-state GPs.\n", __func__); } else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) { @@ -1339,14 +1356,18 @@ rcu_torture_writer(void *arg) struct rcu_gp_oldstate cookie_full; int expediting = 0; unsigned long gp_snap; + unsigned long gp_snap1; struct rcu_gp_oldstate gp_snap_full; + struct rcu_gp_oldstate gp_snap1_full; int i; int idx; int oldnice = task_nice(current); + struct rcu_gp_oldstate rgo[NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE]; struct rcu_torture *rp; struct rcu_torture *old_rp; static DEFINE_TORTURE_RANDOM(rand); bool stutter_waited; + unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE]; VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); if (!can_expedite) @@ -1463,20 +1484,43 @@ rcu_torture_writer(void *arg) break; case RTWS_POLL_GET: rcu_torture_writer_state = RTWS_POLL_GET; + for (i = 0; i < ARRAY_SIZE(ulo); i++) + ulo[i] = cur_ops->get_comp_state(); gp_snap = cur_ops->start_gp_poll(); rcu_torture_writer_state = RTWS_POLL_WAIT; - while (!cur_ops->poll_gp_state(gp_snap)) + while (!cur_ops->poll_gp_state(gp_snap)) { + gp_snap1 = cur_ops->get_gp_state(); + for (i = 0; i < ARRAY_SIZE(ulo); i++) + if (cur_ops->poll_gp_state(ulo[i]) || + cur_ops->same_gp_state(ulo[i], gp_snap1)) { + ulo[i] = gp_snap1; + break; + } + WARN_ON_ONCE(i >= ARRAY_SIZE(ulo)); torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + } rcu_torture_pipe_update(old_rp); break; case RTWS_POLL_GET_FULL: rcu_torture_writer_state = RTWS_POLL_GET_FULL; + for (i = 0; i < ARRAY_SIZE(rgo); i++) + cur_ops->get_comp_state_full(&rgo[i]); cur_ops->start_gp_poll_full(&gp_snap_full); rcu_torture_writer_state = RTWS_POLL_WAIT_FULL; - while (!cur_ops->poll_gp_state_full(&gp_snap_full)) + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) { + cur_ops->get_gp_state_full(&gp_snap1_full); + for (i = 0; i < ARRAY_SIZE(rgo); i++) + if (cur_ops->poll_gp_state_full(&rgo[i]) || + cur_ops->same_gp_state_full(&rgo[i], + &gp_snap1_full)) { + rgo[i] = gp_snap1_full; + break; + } + WARN_ON_ONCE(i >= ARRAY_SIZE(rgo)); torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + } rcu_torture_pipe_update(old_rp); break; case RTWS_POLL_GET_EXP: @@ -3388,13 +3432,13 @@ static void rcu_test_debug_objects(void) /* Try to queue the rh2 pair of callbacks for the same grace period. */ preempt_disable(); /* Prevent preemption from interrupting test. */ rcu_read_lock(); /* Make it impossible to finish a grace period. */ - call_rcu(&rh1, rcu_torture_leak_cb); /* Start grace period. */ + call_rcu_hurry(&rh1, rcu_torture_leak_cb); /* Start grace period. */ local_irq_disable(); /* Make it harder to start a new grace period. */ - call_rcu(&rh2, rcu_torture_leak_cb); - call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */ + call_rcu_hurry(&rh2, rcu_torture_leak_cb); + call_rcu_hurry(&rh2, rcu_torture_err_cb); /* Duplicate callback. */ if (rhp) { - call_rcu(rhp, rcu_torture_leak_cb); - call_rcu(rhp, rcu_torture_err_cb); /* Another duplicate callback. */ + call_rcu_hurry(rhp, rcu_torture_leak_cb); + call_rcu_hurry(rhp, rcu_torture_err_cb); /* Another duplicate callback. */ } local_irq_enable(); rcu_read_unlock(); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 1c304fec89c0..ca4b5dcec675 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -417,7 +417,7 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) for_each_possible_cpu(cpu) { struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); - sum += READ_ONCE(cpuc->srcu_lock_count[idx]); + sum += atomic_long_read(&cpuc->srcu_lock_count[idx]); } return sum; } @@ -429,13 +429,18 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) { int cpu; + unsigned long mask = 0; unsigned long sum = 0; for_each_possible_cpu(cpu) { struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); - sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); + sum += atomic_long_read(&cpuc->srcu_unlock_count[idx]); + if (IS_ENABLED(CONFIG_PROVE_RCU)) + mask = mask | READ_ONCE(cpuc->srcu_nmi_safety); } + WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask >> 1)), + "Mixed NMI-safe readers for srcu_struct at %ps.\n", ssp); return sum; } @@ -503,10 +508,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp) for_each_possible_cpu(cpu) { struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); - sum += READ_ONCE(cpuc->srcu_lock_count[0]); - sum += READ_ONCE(cpuc->srcu_lock_count[1]); - sum -= READ_ONCE(cpuc->srcu_unlock_count[0]); - sum -= READ_ONCE(cpuc->srcu_unlock_count[1]); + sum += atomic_long_read(&cpuc->srcu_lock_count[0]); + sum += atomic_long_read(&cpuc->srcu_lock_count[1]); + sum -= atomic_long_read(&cpuc->srcu_unlock_count[0]); + sum -= atomic_long_read(&cpuc->srcu_unlock_count[1]); } return sum; } @@ -626,6 +631,29 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +#ifdef CONFIG_PROVE_RCU +/* + * Check for consistent NMI safety. + */ +void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe) +{ + int nmi_safe_mask = 1 << nmi_safe; + int old_nmi_safe_mask; + struct srcu_data *sdp; + + /* NMI-unsafe use in NMI is a bad sign */ + WARN_ON_ONCE(!nmi_safe && in_nmi()); + sdp = raw_cpu_ptr(ssp->sda); + old_nmi_safe_mask = READ_ONCE(sdp->srcu_nmi_safety); + if (!old_nmi_safe_mask) { + WRITE_ONCE(sdp->srcu_nmi_safety, nmi_safe_mask); + return; + } + WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n", sdp->cpu, old_nmi_safe_mask, nmi_safe_mask); +} +EXPORT_SYMBOL_GPL(srcu_check_nmi_safety); +#endif /* CONFIG_PROVE_RCU */ + /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. @@ -636,7 +664,7 @@ int __srcu_read_lock(struct srcu_struct *ssp) int idx; idx = READ_ONCE(ssp->srcu_idx) & 0x1; - this_cpu_inc(ssp->sda->srcu_lock_count[idx]); + this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -650,10 +678,45 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ - this_cpu_inc(ssp->sda->srcu_unlock_count[idx]); + this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); +#ifdef CONFIG_NEED_SRCU_NMI_SAFE + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct, but in an NMI-safe manner using RMW atomics. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + int idx; + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + atomic_long_inc(&sdp->srcu_lock_count[idx]); + smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); + +/* + * Removes the count for the old reader from the appropriate per-CPU + * element of the srcu_struct. Note that this may well be a different + * CPU than that which was incremented by the corresponding srcu_read_lock(). + */ +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ + atomic_long_inc(&sdp->srcu_unlock_count[idx]); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); + +#endif // CONFIG_NEED_SRCU_NMI_SAFE + /* * Start an SRCU grace period. */ @@ -1090,7 +1153,12 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, int ss_state; check_init_srcu_struct(ssp); - idx = srcu_read_lock(ssp); + /* + * While starting a new grace period, make sure we are in an + * SRCU read-side critical section so that the grace-period + * sequence number cannot wrap around in the meantime. + */ + idx = __srcu_read_lock_nmisafe(ssp); ss_state = smp_load_acquire(&ssp->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_CALL) sdp = per_cpu_ptr(ssp->sda, 0); @@ -1123,7 +1191,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) srcu_funnel_exp_start(ssp, sdp_mynode, s); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); return s; } @@ -1427,13 +1495,13 @@ void srcu_barrier(struct srcu_struct *ssp) /* Initial count prevents reaching zero until all CBs are posted. */ atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); - idx = srcu_read_lock(ssp); + idx = __srcu_read_lock_nmisafe(ssp); if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); else for_each_possible_cpu(cpu) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); /* Remove the initial count, at which point reaching zero can happen. */ if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) @@ -1687,8 +1755,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) struct srcu_data *sdp; sdp = per_cpu_ptr(ssp->sda, cpu); - u0 = data_race(sdp->srcu_unlock_count[!idx]); - u1 = data_race(sdp->srcu_unlock_count[idx]); + u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx])); + u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx])); /* * Make sure that a lock is always counted if the corresponding @@ -1696,8 +1764,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) */ smp_rmb(); - l0 = data_race(sdp->srcu_lock_count[!idx]); - l1 = data_race(sdp->srcu_lock_count[idx]); + l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx])); + l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx])); c0 = l0 - u0; c1 = l1 - u1; diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 5cefc702158f..e550f97779b8 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -44,7 +44,7 @@ static void rcu_sync_func(struct rcu_head *rhp); static void rcu_sync_call(struct rcu_sync *rsp) { - call_rcu(&rsp->cb_head, rcu_sync_func); + call_rcu_hurry(&rsp->cb_head, rcu_sync_func); } /** diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index f5bf6fb430da..b0b885e071fa 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -728,7 +728,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) { lastinfo = j; rtsi = rtsi * rcu_task_stall_info_mult; - pr_info("%s: %s grace period %lu is %lu jiffies old.\n", + pr_info("%s: %s grace period number %lu (since boot) is %lu jiffies old.\n", __func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start); } } diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index a33a8d4942c3..72913ce21258 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = { void rcu_barrier(void) { - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); } EXPORT_SYMBOL(rcu_barrier); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6bb8e72bc815..d7764ffda6e5 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap) return !(snap & RCU_DYNTICKS_IDX); } -/* Return true if the specified CPU is currently idle from an RCU viewpoint. */ -bool rcu_is_idle_cpu(int cpu) -{ - return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)); -} - /* * Return true if the CPU corresponding to the specified rcu_data * structure has spent some time in an extended quiescent state since @@ -2106,7 +2100,7 @@ int rcutree_dying_cpu(unsigned int cpu) if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) return 0; - blkd = !!(rnp->qsmask & rdp->grpmask); + blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask); trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); return 0; @@ -2416,7 +2410,7 @@ void rcu_force_quiescent_state(void) struct rcu_node *rnp_old = NULL; /* Funnel through hierarchy to reduce memory contention. */ - rnp = __this_cpu_read(rcu_data.mynode); + rnp = raw_cpu_read(rcu_data.mynode); for (; rnp != NULL; rnp = rnp->parent) { ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) || !raw_spin_trylock(&rnp->fqslock); @@ -2728,47 +2722,8 @@ static void check_cb_ovld(struct rcu_data *rdp) raw_spin_unlock_rcu_node(rnp); } -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. - * - * RCU read-side critical sections are delimited by rcu_read_lock() - * and rcu_read_unlock(), and may be nested. In addition, but only in - * v5.0 and later, regions of code across which interrupts, preemption, - * or softirqs have been disabled also serve as RCU read-side critical - * sections. This includes hardware interrupt handlers, softirq handlers, - * and NMI handlers. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing RCU read-side critical section. On systems with more - * than one CPU, this means that when "func()" is invoked, each CPU is - * guaranteed to have executed a full memory barrier since the end of its - * last RCU read-side critical section whose beginning preceded the call - * to call_rcu(). It also means that each CPU executing an RCU read-side - * critical section that continues beyond the start of "func()" must have - * executed a memory barrier after the call_rcu() but before the beginning - * of that RCU read-side critical section. Note that these guarantees - * include CPUs that are offline, idle, or executing in user mode, as - * well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting RCU callback function "func()", then both CPU A and CPU B are - * guaranteed to execute a full memory barrier during the time interval - * between the call to call_rcu() and the invocation of "func()" -- even - * if CPU A and CPU B are the same CPU (but again only if the system has - * more than one CPU). - * - * Implementation of these memory-ordering guarantees is described here: - * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. - */ -void call_rcu(struct rcu_head *head, rcu_callback_t func) +static void +__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy) { static atomic_t doublefrees; unsigned long flags; @@ -2809,7 +2764,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) } check_cb_ovld(rdp); - if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) + if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) return; // Enqueued onto ->nocb_bypass, so just leave. // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. rcu_segcblist_enqueue(&rdp->cblist, head); @@ -2831,8 +2786,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) local_irq_restore(flags); } } -EXPORT_SYMBOL_GPL(call_rcu); +#ifdef CONFIG_RCU_LAZY +/** + * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and + * flush all lazy callbacks (including the new one) to the main ->cblist while + * doing so. + * + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. + * + * Use this API instead of call_rcu() if you don't want the callback to be + * invoked after very long periods of time, which can happen on systems without + * memory pressure and on systems which are lightly loaded or mostly idle. + * This function will cause callbacks to be invoked sooner than later at the + * expense of extra power. Other than that, this function is identical to, and + * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory + * ordering and other functionality. + */ +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + return __call_rcu_common(head, func, false); +} +EXPORT_SYMBOL_GPL(call_rcu_hurry); +#endif + +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * By default the callbacks are 'lazy' and are kept hidden from the main + * ->cblist to prevent starting of grace periods too soon. + * If you desire grace periods to start very soon, use call_rcu_hurry(). + * + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. + * + * RCU read-side critical sections are delimited by rcu_read_lock() + * and rcu_read_unlock(), and may be nested. In addition, but only in + * v5.0 and later, regions of code across which interrupts, preemption, + * or softirqs have been disabled also serve as RCU read-side critical + * sections. This includes hardware interrupt handlers, softirq handlers, + * and NMI handlers. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section. On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu(). It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section. Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). + * + * Implementation of these memory-ordering guarantees is described here: + * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. + */ +void call_rcu(struct rcu_head *head, rcu_callback_t func) +{ + return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); +} +EXPORT_SYMBOL_GPL(call_rcu); /* Maximum number of jiffies to wait before draining a batch. */ #define KFREE_DRAIN_JIFFIES (5 * HZ) @@ -3507,7 +3538,7 @@ void synchronize_rcu(void) if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); else - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); return; } @@ -3894,6 +3925,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) { unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence); unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); + bool wake_nocb = false; + bool was_alldone = false; lockdep_assert_held(&rcu_state.barrier_lock); if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq)) @@ -3902,7 +3935,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) rdp->barrier_head.func = rcu_barrier_callback; debug_rcu_head_queue(&rdp->barrier_head); rcu_nocb_lock(rdp); - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + /* + * Flush bypass and wakeup rcuog if we add callbacks to an empty regular + * queue. This way we don't wait for bypass timer that can reach seconds + * if it's fully lazy. + */ + was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); + wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist); if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { atomic_inc(&rcu_state.barrier_cpu_count); } else { @@ -3910,6 +3950,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence); } rcu_nocb_unlock(rdp); + if (wake_nocb) + wake_nocb_gp(rdp, false); smp_store_release(&rdp->barrier_seq_snap, gseq); } @@ -4276,8 +4318,6 @@ void rcu_report_dead(unsigned int cpu) // Do any dangling deferred wakeups. do_nocb_deferred_wakeup(rdp); - /* QS for any half-done expedited grace period. */ - rcu_report_exp_rdp(rdp); rcu_preempt_deferred_qs(current); /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ @@ -4325,7 +4365,7 @@ void rcutree_migrate_callbacks(int cpu) my_rdp = this_cpu_ptr(&rcu_data); my_rnp = my_rdp->mynode; rcu_nocb_lock(my_rdp); /* irqs already disabled. */ - WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false)); raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */ /* Leverage recent GPs and set GP for new callbacks. */ needwake = rcu_advance_cbs(my_rnp, rdp) || diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d4a97e40ea9c..fcb5d696eb17 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -263,14 +263,16 @@ struct rcu_data { unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ + long lazy_len; /* Length of buffered lazy callbacks. */ int cpu; }; /* Values for nocb_defer_wakeup field in struct rcu_data. */ #define RCU_NOCB_WAKE_NOT 0 #define RCU_NOCB_WAKE_BYPASS 1 -#define RCU_NOCB_WAKE 2 -#define RCU_NOCB_WAKE_FORCE 3 +#define RCU_NOCB_WAKE_LAZY 2 +#define RCU_NOCB_WAKE 3 +#define RCU_NOCB_WAKE_FORCE 4 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) /* For jiffies_till_first_fqs and */ @@ -439,10 +441,12 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); static void rcu_init_one_nocb(struct rcu_node *rnp); +static bool wake_nocb_gp(struct rcu_data *rdp, bool force); static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j); + unsigned long j, bool lazy); static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags); + bool *was_alldone, unsigned long flags, + bool lazy); static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, unsigned long flags); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 18e9b4cd78ef..ed6c3cce28f2 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -937,7 +937,7 @@ void synchronize_rcu_expedited(void) /* If expedited grace periods are prohibited, fall back to normal. */ if (rcu_gp_is_normal()) { - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); return; } diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 0a5f0ef41484..9e1c8caec5ce 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -256,6 +256,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) return __wake_nocb_gp(rdp_gp, rdp, force, flags); } +/* + * LAZY_FLUSH_JIFFIES decides the maximum amount of time that + * can elapse before lazy callbacks are flushed. Lazy callbacks + * could be flushed much earlier for a number of other reasons + * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are + * left unsubmitted to RCU after those many jiffies. + */ +#define LAZY_FLUSH_JIFFIES (10 * HZ) +static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES; + +#ifdef CONFIG_RCU_LAZY +// To be called only from test code. +void rcu_lazy_set_jiffies_till_flush(unsigned long jif) +{ + jiffies_till_flush = jif; +} +EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush); + +unsigned long rcu_lazy_get_jiffies_till_flush(void) +{ + return jiffies_till_flush; +} +EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush); +#endif + /* * Arrange to wake the GP kthread for this NOCB group at some future * time when it is safe to do so. @@ -269,10 +294,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); /* - * Bypass wakeup overrides previous deferments. In case - * of callback storm, no need to wake up too early. + * Bypass wakeup overrides previous deferments. In case of + * callback storms, no need to wake up too early. */ - if (waketype == RCU_NOCB_WAKE_BYPASS) { + if (waketype == RCU_NOCB_WAKE_LAZY && + rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { + mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush); + WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); + } else if (waketype == RCU_NOCB_WAKE_BYPASS) { mod_timer(&rdp_gp->nocb_timer, jiffies + 2); WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); } else { @@ -293,12 +322,16 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, * proves to be initially empty, just return false because the no-CB GP * kthread may need to be awakened in this case. * + * Return true if there was something to be flushed and it succeeded, otherwise + * false. + * * Note that this function always returns true if rhp is NULL. */ -static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) +static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in, + unsigned long j, bool lazy) { struct rcu_cblist rcl; + struct rcu_head *rhp = rhp_in; WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); rcu_lockdep_assert_cblist_protected(rdp); @@ -310,7 +343,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ if (rhp) rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ + + /* + * If the new CB requested was a lazy one, queue it onto the main + * ->cblist so that we can take advantage of the grace-period that will + * happen regardless. But queue it onto the bypass list first so that + * the lazy CB is ordered with the existing CBs in the bypass list. + */ + if (lazy && rhp) { + rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); + rhp = NULL; + } rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); + WRITE_ONCE(rdp->lazy_len, 0); + rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl); WRITE_ONCE(rdp->nocb_bypass_first, j); rcu_nocb_bypass_unlock(rdp); @@ -326,13 +372,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, * Note that this function always returns true if rhp is NULL. */ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { if (!rcu_rdp_is_offloaded(rdp)) return true; rcu_lockdep_assert_cblist_protected(rdp); rcu_nocb_bypass_lock(rdp); - return rcu_nocb_do_flush_bypass(rdp, rhp, j); + return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy); } /* @@ -345,7 +391,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) if (!rcu_rdp_is_offloaded(rdp) || !rcu_nocb_bypass_trylock(rdp)) return; - WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j)); + WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); } /* @@ -367,12 +413,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) * there is only one CPU in operation. */ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags) + bool *was_alldone, unsigned long flags, + bool lazy) { unsigned long c; unsigned long cur_gp_seq; unsigned long j = jiffies; long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); lockdep_assert_irqs_disabled(); @@ -417,24 +465,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, // If there hasn't yet been all that many ->cblist enqueues // this jiffy, tell the caller to enqueue onto ->cblist. But flush // ->nocb_bypass first. - if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) { + // Lazy CBs throttle this back and do immediate bypass queuing. + if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); if (*was_alldone) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstQ")); - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j)); + + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false)); WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); return false; // Caller must enqueue the callback. } // If ->nocb_bypass has been used too long or is too full, // flush ->nocb_bypass to ->cblist. - if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) || + if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || + (ncbs && bypass_is_lazy && + (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) || ncbs >= qhimark) { rcu_nocb_lock(rdp); - if (!rcu_nocb_flush_bypass(rdp, rhp, j)) { - *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); + *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); + + if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) { if (*was_alldone) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstQ")); @@ -447,7 +500,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, rcu_advance_cbs_nowake(rdp->mynode, rdp); rdp->nocb_gp_adv_time = j; } - rcu_nocb_unlock_irqrestore(rdp, flags); + + // The flush succeeded and we moved CBs into the regular list. + // Don't wait for the wake up timer as it may be too far ahead. + // Wake up the GP thread now instead, if the cblist was empty. + __call_rcu_nocb_wake(rdp, *was_alldone, flags); + return true; // Callback already enqueued. } @@ -457,13 +515,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); + + if (lazy) + WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1); + if (!ncbs) { WRITE_ONCE(rdp->nocb_bypass_first, j); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ")); } rcu_nocb_bypass_unlock(rdp); smp_mb(); /* Order enqueue before wake. */ - if (ncbs) { + // A wake up of the grace period kthread or timer adjustment + // needs to be done only if: + // 1. Bypass list was fully empty before (this is the first + // bypass list entry), or: + // 2. Both of these conditions are met: + // a. The bypass list previously had only lazy CBs, and: + // b. The new CB is non-lazy. + if (ncbs && (!bypass_is_lazy || lazy)) { local_irq_restore(flags); } else { // No-CBs GP kthread might be indefinitely asleep, if so, wake. @@ -491,8 +560,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, unsigned long flags) __releases(rdp->nocb_lock) { + long bypass_len; unsigned long cur_gp_seq; unsigned long j; + long lazy_len; long len; struct task_struct *t; @@ -506,9 +577,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, } // Need to actually to a wakeup. len = rcu_segcblist_n_cbs(&rdp->cblist); + bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass); + lazy_len = READ_ONCE(rdp->lazy_len); if (was_alldone) { rdp->qlen_last_fqs_check = len; - if (!irqs_disabled_flags(flags)) { + // Only lazy CBs in bypass list + if (lazy_len && bypass_len == lazy_len) { + rcu_nocb_unlock_irqrestore(rdp, flags); + wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, + TPS("WakeLazy")); + } else if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ rcu_nocb_unlock_irqrestore(rdp, flags); wake_nocb_gp(rdp, false); @@ -599,12 +677,12 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) static void nocb_gp_wait(struct rcu_data *my_rdp) { bool bypass = false; - long bypass_ncbs; int __maybe_unused cpu = my_rdp->cpu; unsigned long cur_gp_seq; unsigned long flags; bool gotcbs = false; unsigned long j = jiffies; + bool lazy = false; bool needwait_gp = false; // This prevents actual uninitialized use. bool needwake; bool needwake_gp; @@ -634,24 +712,43 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) * won't be ignored for long. */ list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) { + long bypass_ncbs; + bool flush_bypass = false; + long lazy_ncbs; + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check")); rcu_nocb_lock_irqsave(rdp, flags); lockdep_assert_held(&rdp->nocb_lock); bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); - if (bypass_ncbs && + lazy_ncbs = READ_ONCE(rdp->lazy_len); + + if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && + (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) || + bypass_ncbs > 2 * qhimark)) { + flush_bypass = true; + } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || bypass_ncbs > 2 * qhimark)) { - // Bypass full or old, so flush it. - (void)rcu_nocb_try_flush_bypass(rdp, j); - bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + flush_bypass = true; } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { rcu_nocb_unlock_irqrestore(rdp, flags); continue; /* No callbacks here, try next. */ } + + if (flush_bypass) { + // Bypass full or old, so flush it. + (void)rcu_nocb_try_flush_bypass(rdp, j); + bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + lazy_ncbs = READ_ONCE(rdp->lazy_len); + } + if (bypass_ncbs) { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, - TPS("Bypass")); - bypass = true; + bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass")); + if (bypass_ncbs == lazy_ncbs) + lazy = true; + else + bypass = true; } rnp = rdp->mynode; @@ -699,12 +796,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) my_rdp->nocb_gp_gp = needwait_gp; my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; - if (bypass && !rcu_nocb_poll) { - // At least one child with non-empty ->nocb_bypass, so set - // timer in order to avoid stranding its callbacks. - wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, - TPS("WakeBypassIsDeferred")); + // At least one child with non-empty ->nocb_bypass, so set + // timer in order to avoid stranding its callbacks. + if (!rcu_nocb_poll) { + // If bypass list only has lazy CBs. Add a deferred lazy wake up. + if (lazy && !bypass) { + wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY, + TPS("WakeLazyIsDeferred")); + // Otherwise add a deferred bypass wake up. + } else if (bypass) { + wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, + TPS("WakeBypassIsDeferred")); + } } + if (rcu_nocb_poll) { /* Polling, so trace if first poll in the series. */ if (gotcbs) @@ -1030,7 +1135,7 @@ static long rcu_nocb_rdp_deoffload(void *arg) * return false, which means that future calls to rcu_nocb_try_bypass() * will refuse to put anything into the bypass. */ - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); /* * Start with invoking rcu_core() early. This way if the current thread * happens to preempt an ongoing call to rcu_core() in the middle, @@ -1207,47 +1312,87 @@ int rcu_nocb_cpu_offload(int cpu) } EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); +static unsigned long +lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu; + unsigned long count = 0; + + /* Snapshot count of all CPUs */ + for_each_possible_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + count += READ_ONCE(rdp->lazy_len); + } + + return count ? count : SHRINK_EMPTY; +} + +static unsigned long +lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu; + unsigned long flags; + unsigned long count = 0; + + /* Snapshot count of all CPUs */ + for_each_possible_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + int _count = READ_ONCE(rdp->lazy_len); + + if (_count == 0) + continue; + rcu_nocb_lock_irqsave(rdp, flags); + WRITE_ONCE(rdp->lazy_len, 0); + rcu_nocb_unlock_irqrestore(rdp, flags); + wake_nocb_gp(rdp, false); + sc->nr_to_scan -= _count; + count += _count; + if (sc->nr_to_scan <= 0) + break; + } + return count ? count : SHRINK_STOP; +} + +static struct shrinker lazy_rcu_shrinker = { + .count_objects = lazy_rcu_shrink_count, + .scan_objects = lazy_rcu_shrink_scan, + .batch = 0, + .seeks = DEFAULT_SEEKS, +}; + void __init rcu_init_nohz(void) { int cpu; - bool need_rcu_nocb_mask = false; - bool offload_all = false; struct rcu_data *rdp; - -#if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) - if (!rcu_state.nocb_is_setup) { - need_rcu_nocb_mask = true; - offload_all = true; - } -#endif /* #if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) */ + const struct cpumask *cpumask = NULL; #if defined(CONFIG_NO_HZ_FULL) - if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) { - need_rcu_nocb_mask = true; - offload_all = false; /* NO_HZ_FULL has its own mask. */ - } -#endif /* #if defined(CONFIG_NO_HZ_FULL) */ + if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) + cpumask = tick_nohz_full_mask; +#endif - if (need_rcu_nocb_mask) { + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) && + !rcu_state.nocb_is_setup && !cpumask) + cpumask = cpu_possible_mask; + + if (cpumask) { if (!cpumask_available(rcu_nocb_mask)) { if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); return; } } + + cpumask_or(rcu_nocb_mask, rcu_nocb_mask, cpumask); rcu_state.nocb_is_setup = true; } if (!rcu_state.nocb_is_setup) return; -#if defined(CONFIG_NO_HZ_FULL) - if (tick_nohz_full_running) - cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); -#endif /* #if defined(CONFIG_NO_HZ_FULL) */ - - if (offload_all) - cpumask_setall(rcu_nocb_mask); + if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy")) + pr_err("Failed to register lazy_rcu shrinker!\n"); if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n"); @@ -1284,6 +1429,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) raw_spin_lock_init(&rdp->nocb_gp_lock); timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); rcu_cblist_init(&rdp->nocb_bypass); + WRITE_ONCE(rdp->lazy_len, 0); mutex_init(&rdp->nocb_gp_kthread_mutex); } @@ -1564,14 +1710,19 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) { } +static bool wake_nocb_gp(struct rcu_data *rdp, bool force) +{ + return false; +} + static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { return true; } static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags) + bool *was_alldone, unsigned long flags, bool lazy) { return false; } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e3142ee35fc6..7b0fe741a088 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1221,11 +1221,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) * We don't include outgoingcpu in the affinity set, use -1 if there is * no outgoing CPU. If there are no CPUs left in the affinity set, * this function allows the kthread to execute on any CPU. + * + * Any future concurrent calls are serialized via ->boost_kthread_mutex. */ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { struct task_struct *t = rnp->boost_kthread_task; - unsigned long mask = rcu_rnp_online_cpus(rnp); + unsigned long mask; cpumask_var_t cm; int cpu; @@ -1234,6 +1236,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) return; mutex_lock(&rnp->boost_kthread_mutex); + mask = rcu_rnp_online_cpus(rnp); for_each_leaf_node_possible_cpu(rnp, cpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && cpu != outgoingcpu) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cd5f5e7e0a1..07895deca271 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1771,7 +1771,7 @@ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork) if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { rwork->wq = wq; - call_rcu(&rwork->rcu, rcu_work_rcufn); + call_rcu_hurry(&rwork->rcu, rcu_work_rcufn); return true; } diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index e5c5315da274..668f6aa6a75d 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_noop_confirm_switch; percpu_ref_get(ref); /* put after confirmation */ - call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu); + call_rcu_hurry(&ref->data->rcu, + percpu_ref_switch_to_atomic_rcu); } static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) diff --git a/net/core/dst.c b/net/core/dst.c index bc9c9be4e080..a4e738d321ba 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); if (!newrefcnt) - call_rcu(&dst->rcu_head, dst_destroy_rcu); + call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e8b9a9202fec..b0acf6e19aed 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -234,13 +234,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa) call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } +static void in_dev_free_rcu(struct rcu_head *head) +{ + struct in_device *idev = container_of(head, struct in_device, rcu_head); + + kfree(rcu_dereference_protected(idev->mc_hash, 1)); + kfree(idev); +} + void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); - kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif @@ -248,7 +255,7 @@ void in_dev_finish_destroy(struct in_device *idev) if (!idev->dead) pr_err("Freeing alive in_device %p\n", idev); else - kfree(idev); + call_rcu(&idev->rcu_head, in_dev_free_rcu); } EXPORT_SYMBOL(in_dev_finish_destroy); @@ -298,12 +305,6 @@ out_kfree: goto out; } -static void in_dev_rcu_put(struct rcu_head *head) -{ - struct in_device *idev = container_of(head, struct in_device, rcu_head); - in_dev_put(idev); -} - static void inetdev_destroy(struct in_device *in_dev) { struct net_device *dev; @@ -328,7 +329,7 @@ static void inetdev_destroy(struct in_device *in_dev) neigh_parms_release(&arp_tbl, in_dev->arp_parms); arp_ifdown(dev); - call_rcu(&in_dev->rcu_head, in_dev_rcu_put); + in_dev_put(in_dev); } int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh index d5a16631b16e..0cf55f1bf654 100755 --- a/tools/testing/selftests/rcutorture/bin/config2csv.sh +++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh @@ -30,9 +30,8 @@ else fi scenarios="`echo $scenariosarg | sed -e "s/\/$defaultconfigs/g"`" -T=/tmp/config2latex.sh.$$ +T=`mktemp -d /tmp/config2latex.sh.XXXXXX` trap 'rm -rf $T' 0 -mkdir $T cat << '---EOF---' >> $T/p.awk END { diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh index 90016c359e83..b3d2e7efa40c 100755 --- a/tools/testing/selftests/rcutorture/bin/config_override.sh +++ b/tools/testing/selftests/rcutorture/bin/config_override.sh @@ -29,9 +29,8 @@ else exit 1 fi -T=${TMPDIR-/tmp}/config_override.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/config_override.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' | awk ' diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index 31584cee84d7..83fac1852ab2 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -7,9 +7,8 @@ # # Authors: Paul E. McKenney -T=${TMPDIR-/tmp}/abat-chk-config.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cat $1 > $T/.config diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index d6e5ce084b1c..28bdb3ac7ba6 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -15,9 +15,8 @@ # # Authors: Paul E. McKenney -T=${TMPDIR-/tmp}/configinit.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/configinit.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T # Capture config spec file. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index 0941f1ddab65..8a968fbda02c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -12,9 +12,8 @@ scriptname=$0 args="$*" -T=${TMPDIR-/tmp}/kvm-again.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-again.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T if ! test -d tools/testing/selftests/rcutorture/bin then @@ -51,27 +50,56 @@ RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh +bootargs= dryrun= dur= default_link="cp -R" -rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`" +resdir="`pwd`/tools/testing/selftests/rcutorture/res" +rundir="$resdir/`date +%Y.%m.%d-%H.%M.%S-again`" +got_datestamp= +got_rundir= startdate="`date`" starttime="`get_starttime`" usage () { echo "Usage: $scriptname $oldrun [ arguments ]:" + echo " --bootargs kernel-boot-arguments" + echo " --datestamp string" echo " --dryrun" echo " --duration minutes | s | h | d" echo " --link hard|soft|copy" echo " --remote" echo " --rundir /new/res/path" + echo "Command line: $scriptname $args" exit 1 } while test $# -gt 0 do case "$1" in + --bootargs|--bootarg) + checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--' + bootargs="$bootargs $2" + shift + ;; + --datestamp) + checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--' + if test -n "$got_rundir" || test -n "$got_datestamp" + then + echo Only one of --datestamp or --rundir may be specified + usage + fi + got_datestamp=y + ds=$2 + rundir="$resdir/$ds" + if test -e "$rundir" + then + echo "--datestamp $2: Already exists." + usage + fi + shift + ;; --dryrun) dryrun=1 ;; @@ -113,6 +141,12 @@ do ;; --rundir) checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error' + if test -n "$got_rundir" || test -n "$got_datestamp" + then + echo Only one of --datestamp or --rundir may be specified + usage + fi + got_rundir=y rundir=$2 if test -e "$rundir" then @@ -122,8 +156,11 @@ do shift ;; *) - echo Unknown argument $1 - usage + if test -n "$1" + then + echo Unknown argument $1 + usage + fi ;; esac shift @@ -156,7 +193,7 @@ do qemu_cmd_dir="`dirname "$i"`" kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`" jitter_dir="`dirname "$kernel_dir"`" - kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i + kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur "$bootargs" < $T/qemu-cmd > $i if test -n "$arg_remote" then echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh index f99b2c146f83..46b08cd16ba5 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh @@ -7,9 +7,8 @@ # # Usage: kvm-assign-cpus.sh /path/to/sysfs -T=/tmp/kvm-assign-cpus.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-assign-cpus.sh.XXXXXX`" trap 'rm -rf $T' 0 2 -mkdir $T sysfsdir=${1-/sys/devices/system/node} if ! cd "$sysfsdir" > $T/msg 2>&1 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 5ad973dca820..e28a82851f7c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -23,9 +23,8 @@ then fi resdir=${2} -T=${TMPDIR-/tmp}/test-linux.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-build.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cp ${config_template} $T/config cat << ___EOF___ >> $T/config diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh index ee886b40a5d2..2b56baceb05d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh @@ -18,9 +18,8 @@ then exit 1 fi -T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-end-run-stats.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE PATH=${RCUTORTURE}/bin:$PATH; export PATH diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 0789c5606d2a..1df7e695edf7 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -30,7 +30,7 @@ do resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi - TORTURE_SUITE="`cat $i/../torture_suite`" + TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE configfile=`echo $i | sed -e 's,^.*/,,'` rm -f $i/console.log.*.diags case "${TORTURE_SUITE}" in diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 9f0a5d5ff2dd..a2328163eba1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -34,19 +34,18 @@ fi shift # Pathnames: -# T: /tmp/kvm-remote.sh.$$ -# resdir: /tmp/kvm-remote.sh.$$/res -# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix) +# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp +# resdir: /tmp/kvm-remote.sh.NNNNNN/res +# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix) # oldrun: `pwd`/tools/testing/.../res/$otherds # # Pathname segments: -# TD: kvm-remote.sh.$$ +# TD: kvm-remote.sh.NNNNNN # ds: yyyy.mm.dd-hh.mm.ss-remote -TD=kvm-remote.sh.$$ -T=${TMPDIR-/tmp}/$TD +T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T +TD="`basename "$T"`" resdir="$T/res" ds=`date +%Y.%m.%d-%H.%M.%S`-remote diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index 1e29d656501b..c3808c490d92 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -13,9 +13,8 @@ # # Authors: Paul E. McKenney -T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T echo ---- Running batch $* # Check arguments diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index 44280582c594..76f24cd5825b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -17,9 +17,8 @@ # # Authors: Paul E. McKenney -T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T resdir="$1" if ! test -d "$resdir" @@ -109,7 +108,7 @@ do if test $kruntime -lt $seconds then echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 - grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 + grep "^(qemu) qemu:" $resdir/kvm-test-1-run*.sh.out >> $resdir/Warnings 2>&1 killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" if test -n "$killpid" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index f4c8055dbf7a..d2a3710a5f2a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -25,9 +25,8 @@ # # Authors: Paul E. McKenney -T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T . functions.sh . $CONFIGFRAG/ver_functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh index d40b4e60a50c..75a2610a27f3 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh @@ -3,10 +3,14 @@ # # Transform a qemu-cmd file to allow reuse. # -# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out +# Usage: kvm-transform.sh bzImage console.log jitter_dir seconds [ bootargs ] < qemu-cmd-in > qemu-cmd-out # # bzImage: Kernel and initrd from the same prior kvm.sh run. # console.log: File into which to place console output. +# jitter_dir: Jitter directory for TORTURE_JITTER_START and +# TORTURE_JITTER_STOP environment variables. +# seconds: Run duaration for *.shutdown_secs module parameter. +# bootargs: New kernel boot parameters. Beware of Robert Tables. # # The original qemu-cmd file is provided on standard input. # The transformed qemu-cmd file is on standard output. @@ -17,6 +21,9 @@ # # Authors: Paul E. McKenney +T=`mktemp -d /tmp/kvm-transform.sh.XXXXXXXXXX` +trap 'rm -rf $T' 0 2 + image="$1" if test -z "$image" then @@ -41,9 +48,17 @@ then echo "Invalid duration, should be numeric in seconds: '$seconds'" exit 1 fi +bootargs="$5" + +# Build awk program. +echo "BEGIN {" > $T/bootarg.awk +echo $bootargs | tr -s ' ' '\012' | + awk -v dq='"' '/./ { print "\tbootarg[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk +echo $bootargs | tr -s ' ' '\012' | sed -e 's/=.*$//' | + awk -v dq='"' '/./ { print "\tbootpar[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk +cat >> $T/bootarg.awk << '___EOF___' +} -awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ - -v seconds="$seconds" ' /^# seconds=/ { if (seconds == "") print $0; @@ -70,13 +85,7 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ { line = ""; for (i = 1; i <= NF; i++) { - if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) { - sub(/[0-9]*$/, seconds, $i); - if (line == "") - line = $i; - else - line = line " " $i; - } else if (line == "") { + if (line == "") { line = $i; } else { line = line " " $i; @@ -87,7 +96,44 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ } else if ($i == "-kernel") { i++; line = line " " image; + } else if ($i == "-append") { + for (i++; i <= NF; i++) { + arg = $i; + lq = ""; + rq = ""; + if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) + sub(/[0-9]*$/, seconds, arg); + if (arg ~ /^"/) { + lq = substr(arg, 1, 1); + arg = substr(arg, 2); + } + if (arg ~ /"$/) { + rq = substr(arg, length($i), 1); + arg = substr(arg, 1, length($i) - 1); + } + par = arg; + gsub(/=.*$/, "", par); + j = 1; + while (bootpar[j] != "") { + if (bootpar[j] == par) { + arg = ""; + break; + } + j++; + } + if (line == "") + line = lq arg; + else + line = line " " lq arg; + } + for (j in bootarg) + line = line " " bootarg[j]; + line = line rq; } } print line; -}' +} +___EOF___ + +awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ + -v seconds="$seconds" -f $T/bootarg.awk diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6c734818a875..7710b1e1cdda 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -14,9 +14,8 @@ scriptname=$0 args="$*" -T=${TMPDIR-/tmp}/kvm.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cd `dirname $scriptname`/../../../../../ diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 2dbfca3589b1..5a0b7ffcf047 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -15,9 +15,8 @@ F=$1 title=$2 -T=${TMPDIR-/tmp}/parse-build.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/parse-build.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T . functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index d477618e7261..130d0de4c3bb 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -206,9 +206,8 @@ ds="`date +%Y.%m.%d-%H.%M.%S`-torture" startdate="`date`" starttime="`get_starttime`" -T=/tmp/torture.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/torture.sh.XXXXXX`" trap 'rm -rf $T' 0 2 -mkdir $T echo " --- " $scriptname $args | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log @@ -278,6 +277,8 @@ function torture_one { then cat $T/$curflavor.out | tee -a $T/log echo retcode=$retcode | tee -a $T/log + else + echo $resdir > $T/last-resdir fi if test "$retcode" == 0 then @@ -303,10 +304,12 @@ function torture_set { shift curflavor=$flavor torture_one "$@" + mv $T/last-resdir $T/last-resdir-nodebug || : if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan torture_one "$@" --kasan + mv $T/last-resdir $T/last-resdir-kasan || : fi if test "$do_kcsan" = "yes" then @@ -317,6 +320,7 @@ function torture_set { cur_kcsan_kmake_args="$kcsan_kmake_args" fi torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + mv $T/last-resdir $T/last-resdir-kcsan || : fi } @@ -326,20 +330,34 @@ then echo " --- allmodconfig:" Start `date` | tee -a $T/log amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" mkdir -p "$amcdir" - echo " --- make clean" > "$amcdir/Make.out" 2>&1 + echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 - echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1 - cp .config $amcdir - make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 - echo " --- make " >> "$amcdir/Make.out" 2>&1 - make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 - retcode="$?" - echo $retcode > "$amcdir/Make.exitcode" - if test "$retcode" == 0 + retcode=$? + buildphase='"make clean"' + if test "$retcode" -eq 0 + then + echo " --- make allmodconfig" | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1 + cp .config $amcdir + make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 + retcode=$? + buildphase='"make allmodconfig"' + fi + if test "$retcode" -eq 0 + then + echo " --- make " | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1 + make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 + retcode="$?" + echo $retcode > "$amcdir/Make.exitcode" + buildphase='"make"' + fi + if test "$retcode" -eq 0 then echo "allmodconfig($retcode)" $amcdir >> $T/successes + echo Success >> $amcdir/log else echo "allmodconfig($retcode)" $amcdir >> $T/failures + echo " --- allmodconfig Test summary:" >> $amcdir/log + echo " --- Summary: Exit code $retcode from $buildphase, see Make.out" >> $amcdir/log fi fi @@ -379,11 +397,48 @@ then else primlist= fi +firsttime=1 +do_kasan_save="$do_kasan" +do_kcsan_save="$do_kcsan" for prim in $primlist do - torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + if test -n "$firsttime" + then + torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : + if test -f "$T/last-resdir-kasan" + then + mv $T/last-resdir-kasan $T/first-resdir-kasan || : + fi + if test -f "$T/last-resdir-kcsan" + then + mv $T/last-resdir-kcsan $T/first-resdir-kcsan || : + fi + firsttime= + do_kasan= + do_kcsan= + else + torture_bootargs= + for i in $T/first-resdir-* + do + case "$i" in + *-nodebug) + torture_suffix= + ;; + *-kasan) + torture_suffix="-kasan" + ;; + *-kcsan) + torture_suffix="-kcsan" + ;; + esac + torture_set "refscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "refscale.scale_type=$prim" + done + fi done +do_kasan="$do_kasan_save" +do_kcsan="$do_kcsan_save" if test "$do_rcuscale" = yes then @@ -391,11 +446,48 @@ then else primlist= fi +firsttime=1 +do_kasan_save="$do_kasan" +do_kcsan_save="$do_kcsan" for prim in $primlist do - torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + if test -n "$firsttime" + then + torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : + if test -f "$T/last-resdir-kasan" + then + mv $T/last-resdir-kasan $T/first-resdir-kasan || : + fi + if test -f "$T/last-resdir-kcsan" + then + mv $T/last-resdir-kcsan $T/first-resdir-kcsan || : + fi + firsttime= + do_kasan= + do_kcsan= + else + torture_bootargs= + for i in $T/first-resdir-* + do + case "$i" in + *-nodebug) + torture_suffix= + ;; + *-kasan) + torture_suffix="-kasan" + ;; + *-kcsan) + torture_suffix="-kcsan" + ;; + esac + torture_set "rcuscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "rcuscale.scale_type=$prim" + done + fi done +do_kasan="$do_kasan_save" +do_kcsan="$do_kcsan_save" if test "$do_kvfree" = "yes" then @@ -458,7 +550,10 @@ if test -n "$tdir" && test $compress_concurrency -gt 0 then # KASAN vmlinux files can approach 1GB in size, so compress them. echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1 - find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo + find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo-all + find "$tdir" -type f -name 're-run' -print | sed -e 's,/re-run,,' | + grep -e '-k[ac]san$' > $T/xz-todo-copy + sort $T/xz-todo-all $T/xz-todo-copy | uniq -u > $T/xz-todo ncompresses=0 batchno=1 if test -s $T/xz-todo @@ -490,6 +585,24 @@ then echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log fi wait + if test -s $T/xz-todo-copy + then + # The trick here is that we need corresponding + # vmlinux files from corresponding scenarios. + echo Linking vmlinux.xz files to re-use scenarios `date` | tee -a "$tdir/log-xz" | tee -a $T/log + dirstash="`pwd`" + for i in `cat $T/xz-todo-copy` + do + cd $i + find . -name vmlinux -print > $T/xz-todo-copy-vmlinux + for v in `cat $T/xz-todo-copy-vmlinux` + do + rm -f "$v" + cp -l `cat $i/re-run`/"$i/$v".xz "`dirname "$v"`" + done + cd "$dirstash" + done + fi echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log else