Merge branch 'nf-next' of git://1984.lsi.us.es/net-next

This commit is contained in:
David S. Miller
2011-12-25 02:21:45 -05:00
59 changed files with 1168 additions and 378 deletions

View File

@ -4,6 +4,14 @@ menu "Core Netfilter Configuration"
config NETFILTER_NETLINK
tristate
config NETFILTER_NETLINK_ACCT
tristate "Netfilter NFACCT over NFNETLINK interface"
depends on NETFILTER_ADVANCED
select NETFILTER_NETLINK
help
If this option is enabled, the kernel will include support
for extended accounting via NFNETLINK.
config NETFILTER_NETLINK_QUEUE
tristate "Netfilter NFQUEUE over NFNETLINK interface"
depends on NETFILTER_ADVANCED
@ -879,6 +887,16 @@ config NETFILTER_XT_MATCH_MULTIPORT
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_NFACCT
tristate '"nfacct" match support'
default m if NETFILTER_ADVANCED=n
select NETFILTER_NETLINK_ACCT
help
This option allows you to use the extended accounting through
nfnetlink_acct.
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_OSF
tristate '"osf" Passive OS fingerprint match'
depends on NETFILTER_ADVANCED && NETFILTER_NETLINK

View File

@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
obj-$(CONFIG_NETFILTER) = netfilter.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
@ -90,6 +91,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o

View File

@ -232,6 +232,21 @@ config IP_VS_NQ
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
comment 'IPVS SH scheduler'
config IP_VS_SH_TAB_BITS
int "IPVS source hashing table size (the Nth power of 2)"
range 4 20
default 8
---help---
The source hashing scheduler maps source IPs to destinations
stored in a hash table. This table is tiled by each destination
until all slots in the table are filled. When using weights to
allow destinations to receive more connections, the table is
tiled an amount proportional to the weights specified. The table
needs to be large enough to effectively fit all the destinations
multiplied by their respective weights.
comment 'IPVS application helper'
config IP_VS_FTP

View File

@ -30,6 +30,11 @@
* server is dead or overloaded, the load balancer can bypass the cache
* server and send requests to the original server directly.
*
* The weight destination attribute can be used to control the
* distribution of connections to the destinations in servernode. The
* greater the weight, the more connections the destination
* will receive.
*
*/
#define KMSG_COMPONENT "IPVS"
@ -99,9 +104,11 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
struct ip_vs_sh_bucket *b;
struct list_head *p;
struct ip_vs_dest *dest;
int d_count;
b = tbl;
p = &svc->destinations;
d_count = 0;
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
if (list_empty(p)) {
b->dest = NULL;
@ -113,7 +120,16 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
atomic_inc(&dest->refcnt);
b->dest = dest;
p = p->next;
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
atomic_read(&dest->weight));
/* Don't move to next dest until filling weight */
if (++d_count >= atomic_read(&dest->weight)) {
p = p->next;
d_count = 0;
}
}
b++;
}

View File

@ -46,8 +46,8 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
return 0;
return seq_printf(s, "packets=%llu bytes=%llu ",
(unsigned long long)acct[dir].packets,
(unsigned long long)acct[dir].bytes);
(unsigned long long)atomic64_read(&acct[dir].packets),
(unsigned long long)atomic64_read(&acct[dir].bytes));
};
EXPORT_SYMBOL_GPL(seq_print_acct);

View File

@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
unsigned int nf_conntrack_hash_rnd __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
{
@ -1044,10 +1045,8 @@ acct:
acct = nf_conn_acct_find(ct);
if (acct) {
spin_lock_bh(&ct->lock);
acct[CTINFO2DIR(ctinfo)].packets++;
acct[CTINFO2DIR(ctinfo)].bytes += skb->len;
spin_unlock_bh(&ct->lock);
atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes);
}
}
}
@ -1063,11 +1062,9 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
acct = nf_conn_acct_find(ct);
if (acct) {
spin_lock_bh(&ct->lock);
acct[CTINFO2DIR(ctinfo)].packets++;
acct[CTINFO2DIR(ctinfo)].bytes +=
skb->len - skb_network_offset(skb);
spin_unlock_bh(&ct->lock);
atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
atomic64_add(skb->len - skb_network_offset(skb),
&acct[CTINFO2DIR(ctinfo)].bytes);
}
}

View File

@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
static HLIST_HEAD(nf_ct_userspace_expect_list);
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 pid, int report)
@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
net->ct.expect_count--;
hlist_del(&exp->lnode);
if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
master_help->expecting[exp->class]--;
master_help->expecting[exp->class]--;
nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
nf_ct_expect_put(exp);
@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(exp);
const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
/* two references : one for hash insert, one for the timer */
atomic_add(2, &exp->use);
if (master_help) {
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
} else if (exp->flags & NF_CT_EXPECT_USERSPACE)
hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
if (master_help) {
p = &rcu_dereference_protected(
master_help->helper,
lockdep_is_held(&nf_conntrack_lock)
)->expect_policy[exp->class];
exp->timeout.expires = jiffies + p->timeout * HZ;
helper = rcu_dereference_protected(master_help->helper,
lockdep_is_held(&nf_conntrack_lock));
if (helper) {
exp->timeout.expires = jiffies +
helper->expect_policy[exp->class].timeout * HZ;
}
add_timer(&exp->timeout);
NF_CT_STAT_INC(net, expect_create);
return 0;
}
/* Race with expectations being used means we could have none to find; OK. */
@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
struct nf_conntrack_expect *i;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(expect);
struct hlist_node *n;
unsigned int h;
int ret = 1;
/* Don't allow expectations created from kernel-space with no helper */
if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
(!master_help || (master_help && !master_help->helper))) {
if (!master_help) {
ret = -ESHUTDOWN;
goto out;
}
@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
}
/* Will be over limit? */
if (master_help) {
p = &rcu_dereference_protected(
master_help->helper,
lockdep_is_held(&nf_conntrack_lock)
)->expect_policy[expect->class];
helper = rcu_dereference_protected(master_help->helper,
lockdep_is_held(&nf_conntrack_lock));
if (helper) {
p = &helper->expect_policy[expect->class];
if (p->max_expected &&
master_help->expecting[expect->class] >= p->max_expected) {
evict_oldest_expect(master, expect);
@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
if (ret <= 0)
goto out;
ret = 0;
nf_ct_expect_insert(expect);
ret = nf_ct_expect_insert(expect);
if (ret < 0)
goto out;
spin_unlock_bh(&nf_conntrack_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
return ret;
@ -461,21 +455,6 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
void nf_ct_remove_userspace_expectations(void)
{
struct nf_conntrack_expect *exp;
struct hlist_node *n, *next;
hlist_for_each_entry_safe(exp, n, next,
&nf_ct_userspace_expect_list, lnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
}
}
EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
#ifdef CONFIG_PROC_FS
struct ct_expect_iter_state {
struct seq_net_private p;

View File

@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
int ret = 0;
if (tmpl != NULL) {
/* we've got a userspace helper. */
if (tmpl->status & IPS_USERSPACE_HELPER) {
help = nf_ct_helper_ext_add(ct, flags);
if (help == NULL) {
ret = -ENOMEM;
goto out;
}
rcu_assign_pointer(help->helper, NULL);
__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
ret = 0;
goto out;
}
help = nfct_help(tmpl);
if (help != NULL)
helper = help->helper;

View File

@ -203,25 +203,18 @@ nla_put_failure:
}
static int
ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
enum ip_conntrack_dir dir)
dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes,
enum ip_conntrack_dir dir)
{
enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
struct nlattr *nest_count;
const struct nf_conn_counter *acct;
acct = nf_conn_acct_find(ct);
if (!acct)
return 0;
nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
if (!nest_count)
goto nla_put_failure;
NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS,
cpu_to_be64(acct[dir].packets));
NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES,
cpu_to_be64(acct[dir].bytes));
NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts));
NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes));
nla_nest_end(skb, nest_count);
@ -231,6 +224,27 @@ nla_put_failure:
return -1;
}
static int
ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
enum ip_conntrack_dir dir, int type)
{
struct nf_conn_counter *acct;
u64 pkts, bytes;
acct = nf_conn_acct_find(ct);
if (!acct)
return 0;
if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
pkts = atomic64_xchg(&acct[dir].packets, 0);
bytes = atomic64_xchg(&acct[dir].bytes, 0);
} else {
pkts = atomic64_read(&acct[dir].packets);
bytes = atomic64_read(&acct[dir].bytes);
}
return dump_counters(skb, pkts, bytes, dir);
}
static int
ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
{
@ -393,15 +407,15 @@ nla_put_failure:
}
static int
ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
int event, struct nf_conn *ct)
ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
struct nf_conn *ct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
unsigned int flags = pid ? NLM_F_MULTI : 0;
unsigned int flags = pid ? NLM_F_MULTI : 0, event;
event |= NFNL_SUBSYS_CTNETLINK << 8;
event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@ -430,8 +444,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_timeout(skb, ct) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
@ -612,8 +626,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto nla_put_failure;
if (events & (1 << IPCT_DESTROY)) {
if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
if (ctnetlink_dump_counters(skb, ct,
IP_CT_DIR_ORIGINAL, type) < 0 ||
ctnetlink_dump_counters(skb, ct,
IP_CT_DIR_REPLY, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
} else {
@ -709,20 +725,13 @@ restart:
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
IPCTNL_MSG_CT_NEW, ct) < 0) {
NFNL_MSG_TYPE(
cb->nlh->nlmsg_type),
ct) < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
goto out;
}
if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
IPCTNL_MSG_CT_GET_CTRZERO) {
struct nf_conn_counter *acct;
acct = nf_conn_acct_find(ct);
if (acct)
memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
}
}
if (cb->args[1]) {
cb->args[1] = 0;
@ -1001,7 +1010,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
IPCTNL_MSG_CT_NEW, ct);
NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
rcu_read_unlock();
nf_ct_put(ct);
if (err <= 0)
@ -1087,14 +1096,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
if (cda[CTA_NAT_DST]) {
ret = ctnetlink_parse_nat_setup(ct,
IP_NAT_MANIP_DST,
NF_NAT_MANIP_DST,
cda[CTA_NAT_DST]);
if (ret < 0)
return ret;
}
if (cda[CTA_NAT_SRC]) {
ret = ctnetlink_parse_nat_setup(ct,
IP_NAT_MANIP_SRC,
NF_NAT_MANIP_SRC,
cda[CTA_NAT_SRC]);
if (ret < 0)
return ret;
@ -1847,7 +1856,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
if (cda[CTA_EXPECT_MASTER])
if (cda[CTA_EXPECT_TUPLE])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
else if (cda[CTA_EXPECT_MASTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
else
return -EINVAL;
@ -2023,6 +2034,10 @@ ctnetlink_create_expect(struct net *net, u16 zone,
}
help = nfct_help(ct);
if (!help) {
err = -EOPNOTSUPP;
goto out;
}
if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
err = -EINVAL;
goto out;
@ -2247,7 +2262,6 @@ static void __exit ctnetlink_exit(void)
{
pr_info("ctnetlink: unregistering from nfnetlink.\n");
nf_ct_remove_userspace_expectations();
unregister_pernet_subsys(&ctnetlink_net_ops);
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
nfnetlink_subsys_unregister(&ctnl_subsys);

View File

@ -0,0 +1,352 @@
/*
* (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
* (C) 2011 Intra2net AG <http://www.intra2net.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation (or any later at your option).
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <net/netlink.h>
#include <net/sock.h>
#include <asm/atomic.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_acct.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
static LIST_HEAD(nfnl_acct_list);
struct nf_acct {
atomic64_t pkts;
atomic64_t bytes;
struct list_head head;
atomic_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
};
static int
nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
char *acct_name;
if (!tb[NFACCT_NAME])
return -EINVAL;
acct_name = nla_data(tb[NFACCT_NAME]);
list_for_each_entry(nfacct, &nfnl_acct_list, head) {
if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
matching = nfacct;
break;
}
if (matching) {
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
/* reset counters if you request a replacement. */
atomic64_set(&matching->pkts, 0);
atomic64_set(&matching->bytes, 0);
return 0;
}
return -EBUSY;
}
nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
if (nfacct == NULL)
return -ENOMEM;
strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
if (tb[NFACCT_BYTES]) {
atomic64_set(&nfacct->bytes,
be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES])));
}
if (tb[NFACCT_PKTS]) {
atomic64_set(&nfacct->pkts,
be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS])));
}
atomic_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
return 0;
}
static int
nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
int event, struct nf_acct *acct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
unsigned int flags = pid ? NLM_F_MULTI : 0;
u64 pkts, bytes;
event |= NFNL_SUBSYS_ACCT << 8;
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = AF_UNSPEC;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
pkts = atomic64_xchg(&acct->pkts, 0);
bytes = atomic64_xchg(&acct->bytes, 0);
} else {
pkts = atomic64_read(&acct->pkts);
bytes = atomic64_read(&acct->bytes);
}
NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts));
NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes));
NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)));
nlmsg_end(skb, nlh);
return skb->len;
nlmsg_failure:
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -1;
}
static int
nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nf_acct *cur, *last;
if (cb->args[2])
return 0;
last = (struct nf_acct *)cb->args[1];
if (cb->args[1])
cb->args[1] = 0;
rcu_read_lock();
list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
if (last && cur != last)
continue;
if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
NFNL_MSG_ACCT_NEW, cur) < 0) {
cb->args[1] = (unsigned long)cur;
break;
}
}
if (!cb->args[1])
cb->args[2] = 1;
rcu_read_unlock();
return skb->len;
}
static int
nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
int ret = 0;
struct nf_acct *cur;
char *acct_name;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
NULL, 0);
}
if (!tb[NFACCT_NAME])
return -EINVAL;
acct_name = nla_data(tb[NFACCT_NAME]);
list_for_each_entry(cur, &nfnl_acct_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
continue;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (skb2 == NULL)
break;
ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
NFNL_MSG_ACCT_NEW, cur);
if (ret <= 0)
kfree_skb(skb2);
break;
}
return ret;
}
/* try to delete object, fail if it is still in use. */
static int nfnl_acct_try_del(struct nf_acct *cur)
{
int ret = 0;
/* we want to avoid races with nfnl_acct_find_get. */
if (atomic_dec_and_test(&cur->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&cur->head);
kfree_rcu(cur, rcu_head);
} else {
/* still in use, restore reference counter. */
atomic_inc(&cur->refcnt);
ret = -EBUSY;
}
return ret;
}
static int
nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
char *acct_name;
struct nf_acct *cur;
int ret = -ENOENT;
if (!tb[NFACCT_NAME]) {
list_for_each_entry(cur, &nfnl_acct_list, head)
nfnl_acct_try_del(cur);
return 0;
}
acct_name = nla_data(tb[NFACCT_NAME]);
list_for_each_entry(cur, &nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
ret = nfnl_acct_try_del(cur);
if (ret < 0)
return ret;
break;
}
return ret;
}
static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
[NFACCT_BYTES] = { .type = NLA_U64 },
[NFACCT_PKTS] = { .type = NLA_U64 },
};
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
[NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
.attr_count = NFACCT_MAX,
.policy = nfnl_acct_policy },
[NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
.attr_count = NFACCT_MAX,
.policy = nfnl_acct_policy },
[NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
.attr_count = NFACCT_MAX,
.policy = nfnl_acct_policy },
[NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
.attr_count = NFACCT_MAX,
.policy = nfnl_acct_policy },
};
static const struct nfnetlink_subsystem nfnl_acct_subsys = {
.name = "acct",
.subsys_id = NFNL_SUBSYS_ACCT,
.cb_count = NFNL_MSG_ACCT_MAX,
.cb = nfnl_acct_cb,
};
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
struct nf_acct *nfnl_acct_find_get(const char *acct_name)
{
struct nf_acct *cur, *acct = NULL;
rcu_read_lock();
list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
continue;
if (!try_module_get(THIS_MODULE))
goto err;
if (!atomic_inc_not_zero(&cur->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
acct = cur;
break;
}
err:
rcu_read_unlock();
return acct;
}
EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
atomic_dec(&acct->refcnt);
module_put(THIS_MODULE);
}
EXPORT_SYMBOL_GPL(nfnl_acct_put);
void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
{
atomic64_inc(&nfacct->pkts);
atomic64_add(skb->len, &nfacct->bytes);
}
EXPORT_SYMBOL_GPL(nfnl_acct_update);
static int __init nfnl_acct_init(void)
{
int ret;
pr_info("nfnl_acct: registering with nfnetlink.\n");
ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
if (ret < 0) {
pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
goto err_out;
}
return 0;
err_out:
return ret;
}
static void __exit nfnl_acct_exit(void)
{
struct nf_acct *cur, *tmp;
pr_info("nfnl_acct: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&nfnl_acct_subsys);
list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
list_del_rcu(&cur->head);
/* We are sure that our objects have no clients at this point,
* it's safe to release them all without checking refcnt. */
kfree_rcu(cur, rcu_head);
}
}
module_init(nfnl_acct_init);
module_exit(nfnl_acct_exit);

View File

@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
int ret = 0;
u8 proto;
if (info->flags & ~XT_CT_NOTRACK)
return -EINVAL;
if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
return -EOPNOTSUPP;
if (info->flags & XT_CT_NOTRACK) {
ct = nf_ct_untracked_get();
@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
GFP_KERNEL))
goto err3;
if (info->helper[0]) {
if (info->flags & XT_CT_USERSPACE_HELPER) {
__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
} else if (info->helper[0]) {
ret = -ENOENT;
proto = xt_ct_find_proto(par);
if (!proto) {

View File

@ -40,46 +40,46 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
case XT_CONNBYTES_PKTS:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
what = counters[IP_CT_DIR_ORIGINAL].packets;
what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
break;
case XT_CONNBYTES_DIR_REPLY:
what = counters[IP_CT_DIR_REPLY].packets;
what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
case XT_CONNBYTES_DIR_BOTH:
what = counters[IP_CT_DIR_ORIGINAL].packets;
what += counters[IP_CT_DIR_REPLY].packets;
what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
}
break;
case XT_CONNBYTES_BYTES:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
what = counters[IP_CT_DIR_ORIGINAL].bytes;
what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
break;
case XT_CONNBYTES_DIR_REPLY:
what = counters[IP_CT_DIR_REPLY].bytes;
what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
break;
case XT_CONNBYTES_DIR_BOTH:
what = counters[IP_CT_DIR_ORIGINAL].bytes;
what += counters[IP_CT_DIR_REPLY].bytes;
what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
break;
}
break;
case XT_CONNBYTES_AVGPKT:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
bytes = counters[IP_CT_DIR_ORIGINAL].bytes;
pkts = counters[IP_CT_DIR_ORIGINAL].packets;
bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
break;
case XT_CONNBYTES_DIR_REPLY:
bytes = counters[IP_CT_DIR_REPLY].bytes;
pkts = counters[IP_CT_DIR_REPLY].packets;
bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
case XT_CONNBYTES_DIR_BOTH:
bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
counters[IP_CT_DIR_REPLY].bytes;
pkts = counters[IP_CT_DIR_ORIGINAL].packets +
counters[IP_CT_DIR_REPLY].packets;
bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) +
atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) +
atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
}
if (pkts != 0)

76
net/netfilter/xt_nfacct.c Normal file
View File

@ -0,0 +1,76 @@
/*
* (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
* (C) 2011 Intra2net AG <http://www.intra2net.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 (or any
* later at your option) as published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nfnetlink_acct.h>
#include <linux/netfilter/xt_nfacct.h>
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_nfacct");
MODULE_ALIAS("ip6t_nfacct");
static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_nfacct_match_info *info = par->targinfo;
nfnl_acct_update(skb, info->nfacct);
return true;
}
static int
nfacct_mt_checkentry(const struct xt_mtchk_param *par)
{
struct xt_nfacct_match_info *info = par->matchinfo;
struct nf_acct *nfacct;
nfacct = nfnl_acct_find_get(info->name);
if (nfacct == NULL) {
pr_info("xt_nfacct: accounting object with name `%s' "
"does not exists\n", info->name);
return -ENOENT;
}
info->nfacct = nfacct;
return 0;
}
static void
nfacct_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_nfacct_match_info *info = par->matchinfo;
nfnl_acct_put(info->nfacct);
}
static struct xt_match nfacct_mt_reg __read_mostly = {
.name = "nfacct",
.family = NFPROTO_UNSPEC,
.checkentry = nfacct_mt_checkentry,
.match = nfacct_mt,
.destroy = nfacct_mt_destroy,
.matchsize = sizeof(struct xt_nfacct_match_info),
.me = THIS_MODULE,
};
static int __init nfacct_mt_init(void)
{
return xt_register_match(&nfacct_mt_reg);
}
static void __exit nfacct_mt_exit(void)
{
xt_unregister_match(&nfacct_mt_reg);
}
module_init(nfacct_mt_init);
module_exit(nfacct_mt_exit);