Merge branch 'skb_expand_head'
Vasily Averin says: ==================== skbuff: introduce skb_expand_head() currently if skb does not have enough headroom skb_realloc_headrom is called. It is not optimal because it creates new skb. this patch set introduces new helper skb_expand_head() Unlike skb_realloc_headroom, it does not allocate a new skb if possible; copies skb->sk on new skb when as needed and frees original skb in case of failures. This helps to simplify ip[6]_finish_output2(), ip6_xmit() and few other functions in vrf, ax25 and bpf. There are few other cases where this helper can be used but it requires an additional investigations. v3 changes: - ax25 compilation warning fixed - v5.14-rc4 rebase - now it does not depend on non-committed pathces v2 changes: - helper's name was changed to skb_expand_head - fixed few mistakes inside skb_expand_head(): skb_set_owner_w should set sk on nskb kfree was replaced by kfree_skb() improved warning message - added minor refactoring in changed functions in vrf and bpf patches - removed kfree_skb() in ax25_rt_build_path caller ax25_ip_xmit ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
07e1d6b3e0
@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
|
||||
unsigned int hh_len = LL_RESERVED_SPACE(dev);
|
||||
struct neighbour *neigh;
|
||||
bool is_v6gw = false;
|
||||
int ret = -EINVAL;
|
||||
|
||||
nf_reset_ct(skb);
|
||||
|
||||
/* Be paranoid, rather than too clever. */
|
||||
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
|
||||
struct sk_buff *skb2;
|
||||
|
||||
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
|
||||
if (!skb2) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
skb = skb_expand_head(skb, hh_len);
|
||||
if (!skb) {
|
||||
skb->dev->stats.tx_errors++;
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (skb->sk)
|
||||
skb_set_owner_w(skb2, skb->sk);
|
||||
|
||||
consume_skb(skb);
|
||||
skb = skb2;
|
||||
}
|
||||
|
||||
rcu_read_lock_bh();
|
||||
|
||||
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
|
||||
if (!IS_ERR(neigh)) {
|
||||
int ret;
|
||||
|
||||
sock_confirm_neigh(skb, neigh);
|
||||
/* if crossing protocols, can not use the cached header */
|
||||
ret = neigh_output(neigh, skb, is_v6gw);
|
||||
@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
|
||||
}
|
||||
|
||||
rcu_read_unlock_bh();
|
||||
err:
|
||||
vrf_tx_error(skb->dev, skb);
|
||||
return ret;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
|
@ -1183,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
|
||||
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
|
||||
struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
|
||||
unsigned int headroom);
|
||||
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
|
||||
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
|
||||
int newtailroom, gfp_t priority);
|
||||
int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
|
||||
|
@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
|
||||
skb_pull(skb, AX25_KISS_HEADER_LEN);
|
||||
|
||||
if (digipeat != NULL) {
|
||||
if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
|
||||
kfree_skb(skb);
|
||||
if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
|
||||
goto put;
|
||||
}
|
||||
|
||||
skb = ourskb;
|
||||
}
|
||||
|
@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25)
|
||||
|
||||
void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
|
||||
{
|
||||
struct sk_buff *skbn;
|
||||
unsigned char *ptr;
|
||||
int headroom;
|
||||
|
||||
@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
|
||||
|
||||
headroom = ax25_addr_size(ax25->digipeat);
|
||||
|
||||
if (skb_headroom(skb) < headroom) {
|
||||
if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
|
||||
if (unlikely(skb_headroom(skb) < headroom)) {
|
||||
skb = skb_expand_head(skb, headroom);
|
||||
if (!skb) {
|
||||
printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
|
||||
kfree_skb(skb);
|
||||
return;
|
||||
}
|
||||
|
||||
if (skb->sk != NULL)
|
||||
skb_set_owner_w(skbn, skb->sk);
|
||||
|
||||
consume_skb(skb);
|
||||
skb = skbn;
|
||||
}
|
||||
|
||||
ptr = skb_push(skb, headroom);
|
||||
|
@ -441,24 +441,17 @@ put:
|
||||
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
|
||||
ax25_address *dest, ax25_digi *digi)
|
||||
{
|
||||
struct sk_buff *skbn;
|
||||
unsigned char *bp;
|
||||
int len;
|
||||
|
||||
len = digi->ndigi * AX25_ADDR_LEN;
|
||||
|
||||
if (skb_headroom(skb) < len) {
|
||||
if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
|
||||
if (unlikely(skb_headroom(skb) < len)) {
|
||||
skb = skb_expand_head(skb, len);
|
||||
if (!skb) {
|
||||
printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (skb->sk != NULL)
|
||||
skb_set_owner_w(skbn, skb->sk);
|
||||
|
||||
consume_skb(skb);
|
||||
|
||||
skb = skbn;
|
||||
}
|
||||
|
||||
bp = skb_push(skb, len);
|
||||
|
@ -2180,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
|
||||
skb->tstamp = 0;
|
||||
|
||||
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
|
||||
struct sk_buff *skb2;
|
||||
|
||||
skb2 = skb_realloc_headroom(skb, hh_len);
|
||||
if (unlikely(!skb2)) {
|
||||
kfree_skb(skb);
|
||||
skb = skb_expand_head(skb, hh_len);
|
||||
if (!skb)
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (skb->sk)
|
||||
skb_set_owner_w(skb2, skb->sk);
|
||||
consume_skb(skb);
|
||||
skb = skb2;
|
||||
}
|
||||
|
||||
rcu_read_lock_bh();
|
||||
@ -2214,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
|
||||
}
|
||||
rcu_read_unlock_bh();
|
||||
if (dst)
|
||||
IP6_INC_STATS(dev_net(dst->dev),
|
||||
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
|
||||
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
|
||||
out_drop:
|
||||
kfree_skb(skb);
|
||||
return -ENETDOWN;
|
||||
@ -2287,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
|
||||
skb->tstamp = 0;
|
||||
|
||||
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
|
||||
struct sk_buff *skb2;
|
||||
|
||||
skb2 = skb_realloc_headroom(skb, hh_len);
|
||||
if (unlikely(!skb2)) {
|
||||
kfree_skb(skb);
|
||||
skb = skb_expand_head(skb, hh_len);
|
||||
if (!skb)
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (skb->sk)
|
||||
skb_set_owner_w(skb2, skb->sk);
|
||||
consume_skb(skb);
|
||||
skb = skb2;
|
||||
}
|
||||
|
||||
rcu_read_lock_bh();
|
||||
|
@ -1789,6 +1789,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
|
||||
}
|
||||
EXPORT_SYMBOL(skb_realloc_headroom);
|
||||
|
||||
/**
|
||||
* skb_expand_head - reallocate header of &sk_buff
|
||||
* @skb: buffer to reallocate
|
||||
* @headroom: needed headroom
|
||||
*
|
||||
* Unlike skb_realloc_headroom, this one does not allocate a new skb
|
||||
* if possible; copies skb->sk to new skb as needed
|
||||
* and frees original skb in case of failures.
|
||||
*
|
||||
* It expect increased headroom and generates warning otherwise.
|
||||
*/
|
||||
|
||||
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
|
||||
{
|
||||
int delta = headroom - skb_headroom(skb);
|
||||
|
||||
if (WARN_ONCE(delta <= 0,
|
||||
"%s is expecting an increase in the headroom", __func__))
|
||||
return skb;
|
||||
|
||||
/* pskb_expand_head() might crash, if skb is shared */
|
||||
if (skb_shared(skb)) {
|
||||
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
|
||||
|
||||
if (likely(nskb)) {
|
||||
if (skb->sk)
|
||||
skb_set_owner_w(nskb, skb->sk);
|
||||
consume_skb(skb);
|
||||
} else {
|
||||
kfree_skb(skb);
|
||||
}
|
||||
skb = nskb;
|
||||
}
|
||||
if (skb &&
|
||||
pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
|
||||
kfree_skb(skb);
|
||||
skb = NULL;
|
||||
}
|
||||
return skb;
|
||||
}
|
||||
EXPORT_SYMBOL(skb_expand_head);
|
||||
|
||||
/**
|
||||
* skb_copy_expand - copy and expand sk_buff
|
||||
* @skb: buffer to copy
|
||||
|
@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
|
||||
} else if (rt->rt_type == RTN_BROADCAST)
|
||||
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
|
||||
|
||||
/* Be paranoid, rather than too clever. */
|
||||
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
|
||||
struct sk_buff *skb2;
|
||||
|
||||
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
|
||||
if (!skb2) {
|
||||
kfree_skb(skb);
|
||||
skb = skb_expand_head(skb, hh_len);
|
||||
if (!skb)
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (skb->sk)
|
||||
skb_set_owner_w(skb2, skb->sk);
|
||||
consume_skb(skb);
|
||||
skb = skb2;
|
||||
}
|
||||
|
||||
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
|
||||
|
@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
|
||||
{
|
||||
struct dst_entry *dst = skb_dst(skb);
|
||||
struct net_device *dev = dst->dev;
|
||||
struct inet6_dev *idev = ip6_dst_idev(dst);
|
||||
unsigned int hh_len = LL_RESERVED_SPACE(dev);
|
||||
int delta = hh_len - skb_headroom(skb);
|
||||
const struct in6_addr *nexthop;
|
||||
const struct in6_addr *daddr, *nexthop;
|
||||
struct ipv6hdr *hdr;
|
||||
struct neighbour *neigh;
|
||||
int ret;
|
||||
|
||||
/* Be paranoid, rather than too clever. */
|
||||
if (unlikely(delta > 0) && dev->header_ops) {
|
||||
/* pskb_expand_head() might crash, if skb is shared */
|
||||
if (skb_shared(skb)) {
|
||||
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
|
||||
|
||||
if (likely(nskb)) {
|
||||
if (skb->sk)
|
||||
skb_set_owner_w(nskb, skb->sk);
|
||||
consume_skb(skb);
|
||||
} else {
|
||||
kfree_skb(skb);
|
||||
}
|
||||
skb = nskb;
|
||||
}
|
||||
if (skb &&
|
||||
pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
|
||||
kfree_skb(skb);
|
||||
skb = NULL;
|
||||
}
|
||||
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
|
||||
skb = skb_expand_head(skb, hh_len);
|
||||
if (!skb) {
|
||||
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
|
||||
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
|
||||
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
|
||||
|
||||
hdr = ipv6_hdr(skb);
|
||||
daddr = &hdr->daddr;
|
||||
if (ipv6_addr_is_multicast(daddr)) {
|
||||
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
|
||||
((mroute6_is_socket(net, skb) &&
|
||||
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
|
||||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
|
||||
&ipv6_hdr(skb)->saddr))) {
|
||||
ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
|
||||
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
|
||||
|
||||
/* Do not check for IFF_ALLMULTI; multicast routing
|
||||
@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
|
||||
net, sk, newskb, NULL, newskb->dev,
|
||||
dev_loopback_xmit);
|
||||
|
||||
if (ipv6_hdr(skb)->hop_limit == 0) {
|
||||
if (hdr->hop_limit == 0) {
|
||||
IP6_INC_STATS(net, idev,
|
||||
IPSTATS_MIB_OUTDISCARDS);
|
||||
kfree_skb(skb);
|
||||
@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
|
||||
}
|
||||
|
||||
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
|
||||
|
||||
if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
|
||||
IPV6_ADDR_SCOPE_NODELOCAL &&
|
||||
if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
|
||||
!(dev->flags & IFF_LOOPBACK)) {
|
||||
kfree_skb(skb);
|
||||
return 0;
|
||||
@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
|
||||
}
|
||||
|
||||
rcu_read_lock_bh();
|
||||
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
|
||||
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
|
||||
nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
|
||||
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
|
||||
if (unlikely(!neigh))
|
||||
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
|
||||
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
|
||||
if (!IS_ERR(neigh)) {
|
||||
sock_confirm_neigh(skb, neigh);
|
||||
ret = neigh_output(neigh, skb, false);
|
||||
@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
|
||||
}
|
||||
rcu_read_unlock_bh();
|
||||
|
||||
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
|
||||
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
|
||||
const struct ipv6_pinfo *np = inet6_sk(sk);
|
||||
struct in6_addr *first_hop = &fl6->daddr;
|
||||
struct dst_entry *dst = skb_dst(skb);
|
||||
struct net_device *dev = dst->dev;
|
||||
struct inet6_dev *idev = ip6_dst_idev(dst);
|
||||
unsigned int head_room;
|
||||
struct ipv6hdr *hdr;
|
||||
u8 proto = fl6->flowi6_proto;
|
||||
@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
|
||||
int hlimit = -1;
|
||||
u32 mtu;
|
||||
|
||||
head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
|
||||
head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
|
||||
if (opt)
|
||||
head_room += opt->opt_nflen + opt->opt_flen;
|
||||
|
||||
if (unlikely(skb_headroom(skb) < head_room)) {
|
||||
struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
|
||||
if (!skb2) {
|
||||
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
|
||||
IPSTATS_MIB_OUTDISCARDS);
|
||||
kfree_skb(skb);
|
||||
if (unlikely(head_room > skb_headroom(skb))) {
|
||||
skb = skb_expand_head(skb, head_room);
|
||||
if (!skb) {
|
||||
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
if (skb->sk)
|
||||
skb_set_owner_w(skb2, skb->sk);
|
||||
consume_skb(skb);
|
||||
skb = skb2;
|
||||
}
|
||||
|
||||
if (opt) {
|
||||
@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
|
||||
|
||||
mtu = dst_mtu(dst);
|
||||
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
|
||||
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
|
||||
IPSTATS_MIB_OUT, skb->len);
|
||||
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
|
||||
|
||||
/* if egress device is enslaved to an L3 master device pass the
|
||||
* skb to its handler for processing
|
||||
@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
|
||||
* we promote our socket to non const
|
||||
*/
|
||||
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
|
||||
net, (struct sock *)sk, skb, NULL, dst->dev,
|
||||
net, (struct sock *)sk, skb, NULL, dev,
|
||||
dst_output);
|
||||
}
|
||||
|
||||
skb->dev = dst->dev;
|
||||
skb->dev = dev;
|
||||
/* ipv6_local_error() does not require socket lock,
|
||||
* we promote our socket to non const
|
||||
*/
|
||||
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
|
||||
|
||||
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
|
||||
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
|
||||
kfree_skb(skb);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user