dd5e073381
syzbot report reminded us that very big ewma_log were supported in the past,
even if they made litle sense.
tc qdisc replace dev xxx root est 1sec 131072sec ...
While fixing the bug, also add boundary checks for ewma_log, in line
with range supported by iproute2.
UBSAN: shift-out-of-bounds in net/core/gen_estimator.c:83:38
shift exponent -1 is negative
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:79 [inline]
dump_stack+0x107/0x163 lib/dump_stack.c:120
ubsan_epilogue+0xb/0x5a lib/ubsan.c:148
__ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395
est_timer.cold+0xbb/0x12d net/core/gen_estimator.c:83
call_timer_fn+0x1a5/0x710 kernel/time/timer.c:1417
expire_timers kernel/time/timer.c:1462 [inline]
__run_timers.part.0+0x692/0xa80 kernel/time/timer.c:1731
__run_timers kernel/time/timer.c:1712 [inline]
run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1744
__do_softirq+0x2bc/0xa77 kernel/softirq.c:343
asm_call_irq_on_stack+0xf/0x20
</IRQ>
__run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline]
run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline]
do_softirq_own_stack+0xaa/0xd0 arch/x86/kernel/irq_64.c:77
invoke_softirq kernel/softirq.c:226 [inline]
__irq_exit_rcu+0x17f/0x200 kernel/softirq.c:420
irq_exit_rcu+0x5/0x20 kernel/softirq.c:432
sysvec_apic_timer_interrupt+0x4d/0x100 arch/x86/kernel/apic/apic.c:1096
asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:628
RIP: 0010:native_save_fl arch/x86/include/asm/irqflags.h:29 [inline]
RIP: 0010:arch_local_save_flags arch/x86/include/asm/irqflags.h:79 [inline]
RIP: 0010:arch_irqs_disabled arch/x86/include/asm/irqflags.h:169 [inline]
RIP: 0010:acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline]
RIP: 0010:acpi_idle_do_entry+0x1c9/0x250 drivers/acpi/processor_idle.c:516
Fixes: 1c0d32fde5
("net_sched: gen_estimator: complete rewrite of rate estimators")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20210114181929.1717985-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
271 lines
7.0 KiB
C
271 lines
7.0 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* net/sched/gen_estimator.c Simple rate estimator.
|
|
*
|
|
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
* Eric Dumazet <edumazet@google.com>
|
|
*
|
|
* Changes:
|
|
* Jamal Hadi Salim - moved it to net/core and reshulfed
|
|
* names to make it usable in general net subsystem.
|
|
*/
|
|
|
|
#include <linux/uaccess.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/module.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/string.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/sockios.h>
|
|
#include <linux/in.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <linux/init.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/seqlock.h>
|
|
#include <net/sock.h>
|
|
#include <net/gen_stats.h>
|
|
|
|
/* This code is NOT intended to be used for statistics collection,
|
|
* its purpose is to provide a base for statistical multiplexing
|
|
* for controlled load service.
|
|
* If you need only statistics, run a user level daemon which
|
|
* periodically reads byte counters.
|
|
*/
|
|
|
|
struct net_rate_estimator {
|
|
struct gnet_stats_basic_packed *bstats;
|
|
spinlock_t *stats_lock;
|
|
seqcount_t *running;
|
|
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
|
|
u8 ewma_log;
|
|
u8 intvl_log; /* period : (250ms << intvl_log) */
|
|
|
|
seqcount_t seq;
|
|
u64 last_packets;
|
|
u64 last_bytes;
|
|
|
|
u64 avpps;
|
|
u64 avbps;
|
|
|
|
unsigned long next_jiffies;
|
|
struct timer_list timer;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
static void est_fetch_counters(struct net_rate_estimator *e,
|
|
struct gnet_stats_basic_packed *b)
|
|
{
|
|
memset(b, 0, sizeof(*b));
|
|
if (e->stats_lock)
|
|
spin_lock(e->stats_lock);
|
|
|
|
__gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
|
|
|
|
if (e->stats_lock)
|
|
spin_unlock(e->stats_lock);
|
|
|
|
}
|
|
|
|
static void est_timer(struct timer_list *t)
|
|
{
|
|
struct net_rate_estimator *est = from_timer(est, t, timer);
|
|
struct gnet_stats_basic_packed b;
|
|
u64 rate, brate;
|
|
|
|
est_fetch_counters(est, &b);
|
|
brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
|
|
brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
|
|
|
|
rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
|
|
rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
|
|
|
|
write_seqcount_begin(&est->seq);
|
|
est->avbps += brate;
|
|
est->avpps += rate;
|
|
write_seqcount_end(&est->seq);
|
|
|
|
est->last_bytes = b.bytes;
|
|
est->last_packets = b.packets;
|
|
|
|
est->next_jiffies += ((HZ/4) << est->intvl_log);
|
|
|
|
if (unlikely(time_after_eq(jiffies, est->next_jiffies))) {
|
|
/* Ouch... timer was delayed. */
|
|
est->next_jiffies = jiffies + 1;
|
|
}
|
|
mod_timer(&est->timer, est->next_jiffies);
|
|
}
|
|
|
|
/**
|
|
* gen_new_estimator - create a new rate estimator
|
|
* @bstats: basic statistics
|
|
* @cpu_bstats: bstats per cpu
|
|
* @rate_est: rate estimator statistics
|
|
* @lock: lock for statistics and control path
|
|
* @running: qdisc running seqcount
|
|
* @opt: rate estimator configuration TLV
|
|
*
|
|
* Creates a new rate estimator with &bstats as source and &rate_est
|
|
* as destination. A new timer with the interval specified in the
|
|
* configuration TLV is created. Upon each interval, the latest statistics
|
|
* will be read from &bstats and the estimated rate will be stored in
|
|
* &rate_est with the statistics lock grabbed during this period.
|
|
*
|
|
* Returns 0 on success or a negative error code.
|
|
*
|
|
*/
|
|
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
|
|
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
|
struct net_rate_estimator __rcu **rate_est,
|
|
spinlock_t *lock,
|
|
seqcount_t *running,
|
|
struct nlattr *opt)
|
|
{
|
|
struct gnet_estimator *parm = nla_data(opt);
|
|
struct net_rate_estimator *old, *est;
|
|
struct gnet_stats_basic_packed b;
|
|
int intvl_log;
|
|
|
|
if (nla_len(opt) < sizeof(*parm))
|
|
return -EINVAL;
|
|
|
|
/* allowed timer periods are :
|
|
* -2 : 250ms, -1 : 500ms, 0 : 1 sec
|
|
* 1 : 2 sec, 2 : 4 sec, 3 : 8 sec
|
|
*/
|
|
if (parm->interval < -2 || parm->interval > 3)
|
|
return -EINVAL;
|
|
|
|
if (parm->ewma_log == 0 || parm->ewma_log >= 31)
|
|
return -EINVAL;
|
|
|
|
est = kzalloc(sizeof(*est), GFP_KERNEL);
|
|
if (!est)
|
|
return -ENOBUFS;
|
|
|
|
seqcount_init(&est->seq);
|
|
intvl_log = parm->interval + 2;
|
|
est->bstats = bstats;
|
|
est->stats_lock = lock;
|
|
est->running = running;
|
|
est->ewma_log = parm->ewma_log;
|
|
est->intvl_log = intvl_log;
|
|
est->cpu_bstats = cpu_bstats;
|
|
|
|
if (lock)
|
|
local_bh_disable();
|
|
est_fetch_counters(est, &b);
|
|
if (lock)
|
|
local_bh_enable();
|
|
est->last_bytes = b.bytes;
|
|
est->last_packets = b.packets;
|
|
|
|
if (lock)
|
|
spin_lock_bh(lock);
|
|
old = rcu_dereference_protected(*rate_est, 1);
|
|
if (old) {
|
|
del_timer_sync(&old->timer);
|
|
est->avbps = old->avbps;
|
|
est->avpps = old->avpps;
|
|
}
|
|
|
|
est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
|
|
timer_setup(&est->timer, est_timer, 0);
|
|
mod_timer(&est->timer, est->next_jiffies);
|
|
|
|
rcu_assign_pointer(*rate_est, est);
|
|
if (lock)
|
|
spin_unlock_bh(lock);
|
|
if (old)
|
|
kfree_rcu(old, rcu);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(gen_new_estimator);
|
|
|
|
/**
|
|
* gen_kill_estimator - remove a rate estimator
|
|
* @rate_est: rate estimator
|
|
*
|
|
* Removes the rate estimator.
|
|
*
|
|
*/
|
|
void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est)
|
|
{
|
|
struct net_rate_estimator *est;
|
|
|
|
est = xchg((__force struct net_rate_estimator **)rate_est, NULL);
|
|
if (est) {
|
|
del_timer_sync(&est->timer);
|
|
kfree_rcu(est, rcu);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(gen_kill_estimator);
|
|
|
|
/**
|
|
* gen_replace_estimator - replace rate estimator configuration
|
|
* @bstats: basic statistics
|
|
* @cpu_bstats: bstats per cpu
|
|
* @rate_est: rate estimator statistics
|
|
* @lock: lock for statistics and control path
|
|
* @running: qdisc running seqcount (might be NULL)
|
|
* @opt: rate estimator configuration TLV
|
|
*
|
|
* Replaces the configuration of a rate estimator by calling
|
|
* gen_kill_estimator() and gen_new_estimator().
|
|
*
|
|
* Returns 0 on success or a negative error code.
|
|
*/
|
|
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
|
|
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
|
struct net_rate_estimator __rcu **rate_est,
|
|
spinlock_t *lock,
|
|
seqcount_t *running, struct nlattr *opt)
|
|
{
|
|
return gen_new_estimator(bstats, cpu_bstats, rate_est,
|
|
lock, running, opt);
|
|
}
|
|
EXPORT_SYMBOL(gen_replace_estimator);
|
|
|
|
/**
|
|
* gen_estimator_active - test if estimator is currently in use
|
|
* @rate_est: rate estimator
|
|
*
|
|
* Returns true if estimator is active, and false if not.
|
|
*/
|
|
bool gen_estimator_active(struct net_rate_estimator __rcu **rate_est)
|
|
{
|
|
return !!rcu_access_pointer(*rate_est);
|
|
}
|
|
EXPORT_SYMBOL(gen_estimator_active);
|
|
|
|
bool gen_estimator_read(struct net_rate_estimator __rcu **rate_est,
|
|
struct gnet_stats_rate_est64 *sample)
|
|
{
|
|
struct net_rate_estimator *est;
|
|
unsigned seq;
|
|
|
|
rcu_read_lock();
|
|
est = rcu_dereference(*rate_est);
|
|
if (!est) {
|
|
rcu_read_unlock();
|
|
return false;
|
|
}
|
|
|
|
do {
|
|
seq = read_seqcount_begin(&est->seq);
|
|
sample->bps = est->avbps >> 8;
|
|
sample->pps = est->avpps >> 8;
|
|
} while (read_seqcount_retry(&est->seq, seq));
|
|
|
|
rcu_read_unlock();
|
|
return true;
|
|
}
|
|
EXPORT_SYMBOL(gen_estimator_read);
|