f87c10a8aa
While forwarding we should not use the protocol path mtu to calculate the mtu for a forwarded packet but instead use the interface mtu. We mark forwarded skbs in ip_forward with IPSKB_FORWARDED, which was introduced for multicast forwarding. But as it does not conflict with our usage in unicast code path it is perfect for reuse. I moved the functions ip_sk_accept_pmtu, ip_sk_use_pmtu and ip_skb_dst_mtu along with the new ip_dst_mtu_maybe_forward to net/ip.h to fix circular dependencies because of IPSKB_FORWARDED. Because someone might have written a software which does probe destinations manually and expects the kernel to honour those path mtus I introduced a new per-namespace "ip_forward_use_pmtu" knob so someone can disable this new behaviour. We also still use mtus which are locked on a route for forwarding. The reason for this change is, that path mtus information can be injected into the kernel via e.g. icmp_err protocol handler without verification of local sockets. As such, this could cause the IPv4 forwarding path to wrongfully emit fragmentation needed notifications or start to fragment packets along a path. Tunnel and ipsec output paths clear IPCB again, thus IPSKB_FORWARDED won't be set and further fragmentation logic will use the path mtu to determine the fragmentation size. They also recheck packet size with help of path mtu discovery and report appropriate errors. Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: David Miller <davem@davemloft.net> Cc: John Heffner <johnwheffner@gmail.com> Cc: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
90 lines
2.0 KiB
C
90 lines
2.0 KiB
C
/*
|
|
* ipv4 in net namespaces
|
|
*/
|
|
|
|
#ifndef __NETNS_IPV4_H__
|
|
#define __NETNS_IPV4_H__
|
|
|
|
#include <linux/uidgid.h>
|
|
#include <net/inet_frag.h>
|
|
|
|
struct tcpm_hash_bucket;
|
|
struct ctl_table_header;
|
|
struct ipv4_devconf;
|
|
struct fib_rules_ops;
|
|
struct hlist_head;
|
|
struct fib_table;
|
|
struct sock;
|
|
struct local_ports {
|
|
seqlock_t lock;
|
|
int range[2];
|
|
};
|
|
|
|
struct netns_ipv4 {
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_header *forw_hdr;
|
|
struct ctl_table_header *frags_hdr;
|
|
struct ctl_table_header *ipv4_hdr;
|
|
struct ctl_table_header *route_hdr;
|
|
struct ctl_table_header *xfrm4_hdr;
|
|
#endif
|
|
struct ipv4_devconf *devconf_all;
|
|
struct ipv4_devconf *devconf_dflt;
|
|
#ifdef CONFIG_IP_MULTIPLE_TABLES
|
|
struct fib_rules_ops *rules_ops;
|
|
bool fib_has_custom_rules;
|
|
struct fib_table *fib_local;
|
|
struct fib_table *fib_main;
|
|
struct fib_table *fib_default;
|
|
#endif
|
|
#ifdef CONFIG_IP_ROUTE_CLASSID
|
|
int fib_num_tclassid_users;
|
|
#endif
|
|
struct hlist_head *fib_table_hash;
|
|
struct sock *fibnl;
|
|
|
|
struct sock **icmp_sk;
|
|
struct inet_peer_base *peers;
|
|
struct tcpm_hash_bucket *tcp_metrics_hash;
|
|
unsigned int tcp_metrics_hash_log;
|
|
struct netns_frags frags;
|
|
#ifdef CONFIG_NETFILTER
|
|
struct xt_table *iptable_filter;
|
|
struct xt_table *iptable_mangle;
|
|
struct xt_table *iptable_raw;
|
|
struct xt_table *arptable_filter;
|
|
#ifdef CONFIG_SECURITY
|
|
struct xt_table *iptable_security;
|
|
#endif
|
|
struct xt_table *nat_table;
|
|
#endif
|
|
|
|
int sysctl_icmp_echo_ignore_all;
|
|
int sysctl_icmp_echo_ignore_broadcasts;
|
|
int sysctl_icmp_ignore_bogus_error_responses;
|
|
int sysctl_icmp_ratelimit;
|
|
int sysctl_icmp_ratemask;
|
|
int sysctl_icmp_errors_use_inbound_ifaddr;
|
|
|
|
struct local_ports sysctl_local_ports;
|
|
|
|
int sysctl_tcp_ecn;
|
|
int sysctl_ip_no_pmtu_disc;
|
|
int sysctl_ip_fwd_use_pmtu;
|
|
|
|
kgid_t sysctl_ping_group_range[2];
|
|
|
|
atomic_t dev_addr_genid;
|
|
|
|
#ifdef CONFIG_IP_MROUTE
|
|
#ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
|
|
struct mr_table *mrt;
|
|
#else
|
|
struct list_head mr_tables;
|
|
struct fib_rules_ops *mr_rules_ops;
|
|
#endif
|
|
#endif
|
|
atomic_t rt_genid;
|
|
};
|
|
#endif
|