diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 516b3ac9a9b5..a43f3488fecf 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -109,9 +109,6 @@ static void appldata_get_net_sum_data(void *data) read_lock(&dev_base_lock); for (dev = dev_base; dev != NULL; dev = dev->next) { stats = dev->get_stats(dev); - if (stats == NULL) { - continue; - } rx_packets += stats->rx_packets; tx_packets += stats->tx_packets; rx_bytes += stats->rx_bytes; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index eb4b96c4d388..dcdad217df51 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2927,11 +2927,6 @@ endif #NETDEVICES config NETPOLL def_bool NETCONSOLE -config NETPOLL_RX - bool "Netpoll support for trapping incoming packets" - default n - depends on NETPOLL - config NETPOLL_TRAP bool "Netpoll traffic trapping" default n diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index cea3783c92c5..724bce51f936 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1360,13 +1360,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_undo_flags; } - if (slave_dev->get_stats == NULL) { - printk(KERN_NOTICE DRV_NAME - ": %s: the driver for slave device %s does not provide " - "get_stats function, network statistics will be " - "inaccurate.\n", bond_dev->name, slave_dev->name); - } - new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); if (!new_slave) { res = -ENOMEM; @@ -3641,33 +3634,31 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) bond_for_each_slave(bond, slave, i) { sstats = slave->dev->get_stats(slave->dev); - if (sstats) { - stats->rx_packets += sstats->rx_packets; - stats->rx_bytes += sstats->rx_bytes; - stats->rx_errors += sstats->rx_errors; - stats->rx_dropped += sstats->rx_dropped; + stats->rx_packets += sstats->rx_packets; + stats->rx_bytes += sstats->rx_bytes; + stats->rx_errors += sstats->rx_errors; + stats->rx_dropped += sstats->rx_dropped; - stats->tx_packets += sstats->tx_packets; - stats->tx_bytes += sstats->tx_bytes; - stats->tx_errors += sstats->tx_errors; - stats->tx_dropped += sstats->tx_dropped; + stats->tx_packets += sstats->tx_packets; + stats->tx_bytes += sstats->tx_bytes; + stats->tx_errors += sstats->tx_errors; + stats->tx_dropped += sstats->tx_dropped; - stats->multicast += sstats->multicast; - stats->collisions += sstats->collisions; + stats->multicast += sstats->multicast; + stats->collisions += sstats->collisions; - stats->rx_length_errors += sstats->rx_length_errors; - stats->rx_over_errors += sstats->rx_over_errors; - stats->rx_crc_errors += sstats->rx_crc_errors; - stats->rx_frame_errors += sstats->rx_frame_errors; - stats->rx_fifo_errors += sstats->rx_fifo_errors; - stats->rx_missed_errors += sstats->rx_missed_errors; + stats->rx_length_errors += sstats->rx_length_errors; + stats->rx_over_errors += sstats->rx_over_errors; + stats->rx_crc_errors += sstats->rx_crc_errors; + stats->rx_frame_errors += sstats->rx_frame_errors; + stats->rx_fifo_errors += sstats->rx_fifo_errors; + stats->rx_missed_errors += sstats->rx_missed_errors; - stats->tx_aborted_errors += sstats->tx_aborted_errors; - stats->tx_carrier_errors += sstats->tx_carrier_errors; - stats->tx_fifo_errors += sstats->tx_fifo_errors; - stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; - stats->tx_window_errors += sstats->tx_window_errors; - } + stats->tx_aborted_errors += sstats->tx_aborted_errors; + stats->tx_carrier_errors += sstats->tx_carrier_errors; + stats->tx_fifo_errors += sstats->tx_fifo_errors; + stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; + stats->tx_window_errors += sstats->tx_window_errors; } read_unlock_bh(&bond->lock); diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c index 3f8115db4d54..f3e47d0c2b3c 100644 --- a/drivers/net/pppox.c +++ b/drivers/net/pppox.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -114,6 +115,13 @@ static int pppox_create(struct socket *sock, int protocol) goto out; rc = -EPROTONOSUPPORT; +#ifdef CONFIG_KMOD + if (!pppox_protos[protocol]) { + char buffer[32]; + sprintf(buffer, "pppox-proto-%d", protocol); + request_module(buffer); + } +#endif if (!pppox_protos[protocol] || !try_module_get(pppox_protos[protocol]->owner)) goto out; diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index 453e6829756c..3df82fe9ce8c 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -373,8 +373,6 @@ static __inline__ int led_get_net_activity(void) if (LOOPBACK(in_dev->ifa_list->ifa_local)) continue; stats = dev->get_stats(dev); - if (!stats) - continue; rx_total += stats->rx_packets; tx_total += stats->tx_packets; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e027a3750a77..ac0c92b1e002 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -325,7 +325,6 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ -#define NETIF_F_INTERNAL_STATS 8192 /* Use stats structure in net_device */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -654,8 +653,10 @@ static inline void netif_start_queue(struct net_device *dev) static inline void netif_wake_queue(struct net_device *dev) { #ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) + if (netpoll_trap()) { + clear_bit(__LINK_STATE_XOFF, &dev->state); return; + } #endif if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) __netif_schedule(dev); @@ -663,10 +664,6 @@ static inline void netif_wake_queue(struct net_device *dev) static inline void netif_stop_queue(struct net_device *dev) { -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif set_bit(__LINK_STATE_XOFF, &dev->state); } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2694cb3ca763..253a2b9be9d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1471,6 +1471,11 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ skb = skb->next) +#define skb_queue_walk_safe(queue, skb, tmp) \ + for (skb = (queue)->next, tmp = skb->next; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->next) + #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 854aa6b543f1..802b3a38b041 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -40,6 +40,8 @@ enum IPSTATS_MIB_FRAGCREATES, /* FragCreates */ IPSTATS_MIB_INMCASTPKTS, /* InMcastPkts */ IPSTATS_MIB_OUTMCASTPKTS, /* OutMcastPkts */ + IPSTATS_MIB_INBCASTPKTS, /* InBcastPkts */ + IPSTATS_MIB_OUTBCASTPKTS, /* OutBcastPkts */ __IPSTATS_MIB_MAX }; diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 9c656a5cf842..a5d53e0fe152 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -185,6 +185,11 @@ enum { #define XFRM_MSG_NEWSADINFO XFRM_MSG_NEWSADINFO XFRM_MSG_GETSADINFO, #define XFRM_MSG_GETSADINFO XFRM_MSG_GETSADINFO + + XFRM_MSG_NEWSPDINFO, +#define XFRM_MSG_NEWSPDINFO XFRM_MSG_NEWSPDINFO + XFRM_MSG_GETSPDINFO, +#define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -290,6 +295,36 @@ enum xfrm_sadattr_type_t { #define XFRMA_SAD_MAX (__XFRMA_SAD_MAX - 1) }; +/* SPD Table filter flags */ +enum xfrm_spd_ftype_t { + XFRM_SPD_UNSPEC, + XFRM_SPD_HMASK=1, + XFRM_SPD_HMAX=2, + XFRM_SPD_ICNT=4, + XFRM_SPD_OCNT=8, + XFRM_SPD_FCNT=16, + XFRM_SPD_ISCNT=32, + XFRM_SPD_OSCNT=64, + XFRM_SPD_FSCNT=128, + __XFRM_SPD_MAX + +#define XFRM_SPD_MAX (__XFRM_SPD_MAX - 1) +}; +enum xfrm_spdattr_type_t { + XFRMA_SPD_UNSPEC, + XFRMA_SPDHMASK, + XFRMA_SPDHMAX, + XFRMA_SPDICNT, + XFRMA_SPDOCNT, + XFRMA_SPDFCNT, + XFRMA_SPDISCNT, + XFRMA_SPDOSCNT, + XFRMA_SPDFSCNT, + __XFRMA_SPD_MAX + +#define XFRMA_SPD_MAX (__XFRMA_SPD_MAX - 1) +}; + struct xfrm_usersa_info { struct xfrm_selector sel; struct xfrm_id id; diff --git a/include/net/flow.h b/include/net/flow.h index ce4b10d8b412..f3cc1f812619 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -97,4 +97,10 @@ extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; +static inline int flow_cache_uli_match(struct flowi *fl1, struct flowi *fl2) +{ + return (fl1->proto == fl2->proto && + !memcmp(&fl1->uli_u, &fl2->uli_u, sizeof(fl1->uli_u))); +} + #endif diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index 746e7416261e..fd70adbb3566 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -16,7 +16,7 @@ * completed a register, it can exploit the other functions. * For furthur reference on all IUCV functionality, refer to the * CP Programming Services book, also available on the web thru - * www.ibm.com/s390/vm/pubs, manual # SC24-5760 + * www.vm.ibm.com/pubs, manual # SC24-6084 * * Definition of Return Codes * - All positive return codes including zero are reflected back diff --git a/include/net/tcp.h b/include/net/tcp.h index a385797f160a..ef8f9d4dae85 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -736,9 +736,7 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) static inline void tcp_sync_left_out(struct tcp_sock *tp) { - if (tp->rx_opt.sack_ok && - (tp->sacked_out >= tp->packets_out - tp->lost_out)) - tp->sacked_out = tp->packets_out - tp->lost_out; + BUG_ON(tp->sacked_out + tp->lost_out > tp->packets_out); tp->left_out = tp->sacked_out + tp->lost_out; } @@ -1201,9 +1199,14 @@ static inline struct sk_buff *tcp_send_head(struct sock *sk) static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + sk->sk_send_head = skb->next; if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; + /* Don't override Nagle indefinately with F-RTO */ + if (tp->frto_counter == 2) + tp->frto_counter = 3; } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8287081d77f2..66c2d3eec03c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -423,6 +423,18 @@ struct xfrm_sadinfo u32 sadhmcnt; /* max allowed hash bkts */ u32 sadcnt; /* current running count */ }; + +struct xfrm_spdinfo +{ + u32 incnt; + u32 outcnt; + u32 fwdcnt; + u32 inscnt; + u32 outscnt; + u32 fwdscnt; + u32 spdhcnt; + u32 spdhmcnt; +}; #ifdef CONFIG_AUDITSYSCALL extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result, struct xfrm_policy *xp, struct xfrm_state *x); @@ -591,6 +603,10 @@ struct xfrm_dst struct rt6_info rt6; } u; struct dst_entry *route; +#ifdef CONFIG_XFRM_SUB_POLICY + struct flowi *origin; + struct xfrm_selector *partner; +#endif u32 genid; u32 route_mtu_cached; u32 child_mtu_cached; @@ -603,6 +619,12 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) dst_release(xdst->route); if (likely(xdst->u.dst.xfrm)) xfrm_state_put(xdst->u.dst.xfrm); +#ifdef CONFIG_XFRM_SUB_POLICY + kfree(xdst->origin); + xdst->origin = NULL; + kfree(xdst->partner); + xdst->partner = NULL; +#endif } extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); @@ -946,6 +968,7 @@ extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct xfrm_sadinfo *si); +extern void xfrm_spd_getinfo(struct xfrm_spdinfo *si); extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_notify(struct xfrm_state *x, int event); diff --git a/net/core/dev.c b/net/core/dev.c index d5e42d13bd67..eb999003bbb7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2101,26 +2101,23 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { struct net_device_stats *stats = dev->get_stats(dev); - if (stats) { - seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " - "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", - dev->name, stats->rx_bytes, stats->rx_packets, - stats->rx_errors, - stats->rx_dropped + stats->rx_missed_errors, - stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors + - stats->rx_crc_errors + stats->rx_frame_errors, - stats->rx_compressed, stats->multicast, - stats->tx_bytes, stats->tx_packets, - stats->tx_errors, stats->tx_dropped, - stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + - stats->tx_aborted_errors + - stats->tx_window_errors + - stats->tx_heartbeat_errors, - stats->tx_compressed); - } else - seq_printf(seq, "%6s: No statistics available.\n", dev->name); + seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " + "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", + dev->name, stats->rx_bytes, stats->rx_packets, + stats->rx_errors, + stats->rx_dropped + stats->rx_missed_errors, + stats->rx_fifo_errors, + stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors, + stats->rx_compressed, stats->multicast, + stats->tx_bytes, stats->tx_packets, + stats->tx_errors, stats->tx_dropped, + stats->tx_fifo_errors, stats->collisions, + stats->tx_carrier_errors + + stats->tx_aborted_errors + + stats->tx_window_errors + + stats->tx_heartbeat_errors, + stats->tx_compressed); } /* @@ -3257,11 +3254,9 @@ out: mutex_unlock(&net_todo_run_mutex); } -static struct net_device_stats *maybe_internal_stats(struct net_device *dev) +static struct net_device_stats *internal_stats(struct net_device *dev) { - if (dev->features & NETIF_F_INTERNAL_STATS) - return &dev->stats; - return NULL; + return &dev->stats; } /** @@ -3299,7 +3294,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, if (sizeof_priv) dev->priv = netdev_priv(dev); - dev->get_stats = maybe_internal_stats; + dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 324e7e0fdb2a..97069399d864 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -329,6 +329,7 @@ drop: static inline int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt; /* * Initialise the virtual path cache for the packet. It describes @@ -340,6 +341,8 @@ static inline int ip_rcv_finish(struct sk_buff *skb) if (unlikely(err)) { if (err == -EHOSTUNREACH) IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + else if (err == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); goto drop; } } @@ -358,6 +361,12 @@ static inline int ip_rcv_finish(struct sk_buff *skb) if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; + rt = (struct rtable*)skb->dst; + if (rt->rt_type == RTN_MULTICAST) + IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); + else if (rt->rt_type == RTN_BROADCAST) + IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS); + return dst_input(skb); drop: @@ -414,7 +423,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto inhdr_error; len = ntohs(iph->tot_len); - if (skb->len < len || len < (iph->ihl*4)) + if (skb->len < len) { + IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } else if (len < (iph->ihl*4)) goto inhdr_error; /* Our transport medium may have padded the buffer out. Now we know it diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 534650cad3a8..d6427d918512 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -160,9 +160,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static inline int ip_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; int hh_len = LL_RESERVED_SPACE(dev); + if (rt->rt_type == RTN_MULTICAST) + IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); + else if (rt->rt_type == RTN_BROADCAST) + IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); + /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { struct sk_buff *skb2; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2cf9a898ce50..d6e488668171 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1573,14 +1573,12 @@ void tcp_close(struct sock *sk, long timeout) sk_stream_mem_reclaim(sk); - /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section - * 3.10, we send a RST here because data was lost. To - * witness the awful effects of the old behavior of always - * doing a FIN, run an older 2.1.x kernel or 2.0.x, start - * a bulk GET in an FTP client, suspend the process, wait - * for the client to advertise a zero window, then kill -9 - * the FTP client, wheee... Note: timeout is always zero - * in such a case. + /* As outlined in RFC 2525, section 2.17, we send a RST here because + * data was lost. To witness the awful effects of the old behavior of + * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk + * GET in an FTP client, suspend the process, wait for the client to + * advertise a zero window, then kill -9 the FTP client, wheee... + * Note: timeout is always zero in such a case. */ if (data_was_unread) { /* Unread data was tossed, zap the connection. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 051f0f815f17..7641b2761a14 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1265,20 +1265,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ return flag; } -/* F-RTO can only be used if these conditions are satisfied: - * - there must be some unsent new data - * - the advertised window should allow sending it - * - TCP has never retransmitted anything other than head (SACK enhanced - * variant from Appendix B of RFC4138 is more robust here) +/* F-RTO can only be used if TCP has never retransmitted anything other than + * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */ int tcp_use_frto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - if (!sysctl_tcp_frto || !tcp_send_head(sk) || - after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, - tp->snd_una + tp->snd_wnd)) + if (!sysctl_tcp_frto) return 0; if (IsSackFrto()) @@ -2642,7 +2637,9 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) * algorithm is not part of the F-RTO detection algorithm * given in RFC4138 but can be selected separately). * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. + * and TCP falls back to conventional RTO recovery. F-RTO allows overriding + * of Nagle, this is done using frto_counter states 2 and 3, when a new data + * segment of any size sent during F-RTO, state 2 is upgraded to 3. * * Rationale: if the RTO was spurious, new ACKs should arrive from the * original window even after we transmit two new data segments. @@ -2671,7 +2668,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) inet_csk(sk)->icsk_retransmits = 0; if (!before(tp->snd_una, tp->frto_highmark)) { - tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag); + tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); return 1; } @@ -2697,7 +2694,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) return 1; } - if ((tp->frto_counter == 2) && + if ((tp->frto_counter >= 2) && (!(flag&FLAG_FORWARD_PROGRESS) || ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { /* RFC4138 shortcoming (see comment above) */ @@ -2710,10 +2707,19 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) } if (tp->frto_counter == 1) { + /* Sending of the next skb must be allowed or no FRTO */ + if (!tcp_send_head(sk) || + after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, + tp->snd_una + tp->snd_wnd)) { + tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), + flag); + return 1; + } + tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; tp->frto_counter = 2; return 1; - } else /* frto_counter == 2 */ { + } else { switch (sysctl_tcp_frto_response) { case 2: tcp_undo_spur_to_response(sk, flag); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e70a6840cb64..0faacf9c419d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1035,8 +1035,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, if (nonagle & TCP_NAGLE_PUSH) return 1; - /* Don't use the nagle rule for urgent data (or for the final FIN). */ - if (tp->urg_mode || + /* Don't use the nagle rule for urgent data (or for the final FIN). + * Nagle can be ignored during F-RTO too (see RFC4138). + */ + if (tp->urg_mode || (tp->frto_counter == 2) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) return 1; @@ -2035,7 +2037,7 @@ void tcp_send_fin(struct sock *sk) /* We get here when a process closes a file descriptor (either due to * an explicit close() or as a byproduct of exit()'ing) and there * was unread data in the receive queue. This behavior is recommended - * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM + * by RFC 2525, section 2.17. -DaveM */ void tcp_send_active_reset(struct sock *sk, gfp_t priority) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cec0f2cc49b7..144970704c2c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -114,14 +114,33 @@ DEFINE_RWLOCK(udp_hash_lock); static int udp_port_rover; -static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) +/* + * Note about this hash function : + * Typical use is probably daddr = 0, only dport is going to vary hash + */ +static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr) +{ + addr ^= addr >> 16; + addr ^= addr >> 8; + return port ^ addr; +} + +static inline int __udp_lib_port_inuse(unsigned int hash, int port, + __be32 daddr, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; + struct inet_sock *inet; - sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) - if (sk->sk_hash == num) + sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { + if (sk->sk_hash != hash) + continue; + inet = inet_sk(sk); + if (inet->num != port) + continue; + if (inet->rcv_saddr == daddr) return 1; + } return 0; } @@ -142,6 +161,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_node *node; struct hlist_head *head; struct sock *sk2; + unsigned int hash; int error = 1; write_lock_bh(&udp_hash_lock); @@ -156,7 +176,9 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { int size; - head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; + hash = hash_port_and_addr(result, + inet_sk(sk)->rcv_saddr); + head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + @@ -181,7 +203,10 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - if (! __udp_lib_lport_inuse(result, udptable)) + hash = hash_port_and_addr(result, + inet_sk(sk)->rcv_saddr); + if (! __udp_lib_port_inuse(hash, result, + inet_sk(sk)->rcv_saddr, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) @@ -189,11 +214,13 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, gotit: *port_rover = snum = result; } else { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + hash = hash_port_and_addr(snum, inet_sk(sk)->rcv_saddr); + head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; sk_for_each(sk2, node, head) - if (sk2->sk_hash == snum && + if (sk2->sk_hash == hash && sk2 != sk && + inet_sk(sk2)->num == snum && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -201,9 +228,9 @@ gotit: goto fail; } inet_sk(sk)->num = snum; - sk->sk_hash = snum; + sk->sk_hash = hash; if (sk_unhashed(sk)) { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } @@ -242,63 +269,78 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, { struct sock *sk, *result = NULL; struct hlist_node *node; - unsigned short hnum = ntohs(dport); - int badness = -1; + unsigned int hash, hashwild; + int score, best = -1; + + hash = hash_port_and_addr(ntohs(dport), daddr); + hashwild = hash_port_and_addr(ntohs(dport), 0); read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + +lookup: + + sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + if (sk->sk_hash != hash || ipv6_only_sock(sk) || + inet->num != dport) + continue; + + score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + goto found; + } else if (score > best) { + result = sk; + best = score; } } + + if (hash != hashwild) { + hash = hashwild; + goto lookup; + } +found: if (result) sock_hold(result); read_unlock(&udp_hash_lock); return result; } -static inline struct sock *udp_v4_mcast_next(struct sock *sk, - __be16 loc_port, __be32 loc_addr, - __be16 rmt_port, __be32 rmt_addr, - int dif) +static inline struct sock *udp_v4_mcast_next( + struct sock *sk, + unsigned int hnum, __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) { struct hlist_node *node; struct sock *s = sk; - unsigned short hnum = ntohs(loc_port); sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (s->sk_hash != hnum || + inet->num != loc_port || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -1129,29 +1171,44 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, __be32 saddr, __be32 daddr, struct hlist_head udptable[]) { - struct sock *sk; + struct sock *sk, *skw, *sknext; int dif; + unsigned int hash = hash_port_and_addr(ntohs(uh->dest), daddr); + unsigned int hashwild = hash_port_and_addr(ntohs(uh->dest), 0); + + dif = skb->dev->ifindex; read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); - dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); - if (sk) { - struct sock *sknext = NULL; + sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); + skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); + + sk = udp_v4_mcast_next(sk, hash, uh->dest, daddr, uh->source, saddr, dif); + if (!sk) { + hash = hashwild; + sk = udp_v4_mcast_next(skw, hash, uh->dest, daddr, uh->source, + saddr, dif); + } + if (sk) { do { struct sk_buff *skb1 = skb; - - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); + sknext = udp_v4_mcast_next(sk_next(sk), hash, uh->dest, + daddr, uh->source, saddr, dif); + if (!sknext && hash != hashwild) { + hash = hashwild; + sknext = udp_v4_mcast_next(skw, hash, uh->dest, + daddr, uh->source, saddr, dif); + } if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) - /* we should probably re-process instead - * of dropping packets here. */ + /* + * we should probably re-process + * instead of dropping packets here. + */ kfree_skb(skb1); } sk = sknext; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e04e49373505..3452433cbc96 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2359,8 +2359,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGENAME: -#ifdef CONFIG_SYSCTL if (idev) { + snmp6_unregister_dev(idev); +#ifdef CONFIG_SYSCTL addrconf_sysctl_unregister(&idev->cnf); neigh_sysctl_unregister(idev->nd_parms); neigh_sysctl_register(dev, idev->nd_parms, @@ -2368,8 +2369,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, &ndisc_ifinfo_sysctl_change, NULL); addrconf_sysctl_register(idev, &idev->cnf); - } #endif + snmp6_register_dev(idev); + } break; } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index acb306a5dd56..920dc9cf6a84 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -223,6 +223,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev) return -EINVAL; remove_proc_entry(idev->stats.proc_dir_entry->name, proc_net_devsnmp6); + idev->stats.proc_dir_entry = NULL; return 0; } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 538499a89975..5502cc948dfb 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -261,7 +261,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, spi); + return xfrm6_rcv_spi(skb, spi) > 0 ? : 0; } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 60f293842a39..903bdb6eaaa1 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -90,20 +90,43 @@ struct iucv_irq_data { u32 res2[8]; }; -struct iucv_work { +struct iucv_irq_list { struct list_head list; struct iucv_irq_data data; }; -static LIST_HEAD(iucv_work_queue); -static DEFINE_SPINLOCK(iucv_work_lock); - static struct iucv_irq_data *iucv_irq_data; static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE; static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE; -static void iucv_tasklet_handler(unsigned long); -static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0); +/* + * Queue of interrupt buffers lock for delivery via the tasklet + * (fast but can't call smp_call_function). + */ +static LIST_HEAD(iucv_task_queue); + +/* + * The tasklet for fast delivery of iucv interrupts. + */ +static void iucv_tasklet_fn(unsigned long); +static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_fn,0); + +/* + * Queue of interrupt buffers for delivery via a work queue + * (slower but can call smp_call_function). + */ +static LIST_HEAD(iucv_work_queue); + +/* + * The work element to deliver path pending interrupts. + */ +static void iucv_work_fn(struct work_struct *work); +static DECLARE_WORK(iucv_work, iucv_work_fn); + +/* + * Spinlock protecting task and work queue. + */ +static DEFINE_SPINLOCK(iucv_queue_lock); enum iucv_command_codes { IUCV_QUERY = 0, @@ -147,10 +170,10 @@ static unsigned long iucv_max_pathid; static DEFINE_SPINLOCK(iucv_table_lock); /* - * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet. - * Needed for iucv_path_sever called from tasklet. + * iucv_active_cpu: contains the number of the cpu executing the tasklet + * or the work handler. Needed for iucv_path_sever called from tasklet. */ -static int iucv_tasklet_cpu = -1; +static int iucv_active_cpu = -1; /* * Mutex and wait queue for iucv_register/iucv_unregister. @@ -449,17 +472,19 @@ static void iucv_setmask_mp(void) { int cpu; + preempt_disable(); for_each_online_cpu(cpu) /* Enable all cpus with a declared buffer. */ if (cpu_isset(cpu, iucv_buffer_cpumask) && !cpu_isset(cpu, iucv_irq_cpumask)) smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu); + preempt_enable(); } /** * iucv_setmask_up * - * Allow iucv interrupts on a single cpus. + * Allow iucv interrupts on a single cpu. */ static void iucv_setmask_up(void) { @@ -493,8 +518,10 @@ static int iucv_enable(void) goto out; /* Declare per cpu buffers. */ rc = -EIO; + preempt_disable(); for_each_online_cpu(cpu) smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); + preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out_path; @@ -584,48 +611,49 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16]) return iucv_call_b2f0(IUCV_SEVER, parm); } +#ifdef CONFIG_SMP /** - * __iucv_cleanup_pathid + * __iucv_cleanup_queue * @dummy: unused dummy argument * * Nop function called via smp_call_function to force work items from * pending external iucv interrupts to the work queue. */ -static void __iucv_cleanup_pathid(void *dummy) +static void __iucv_cleanup_queue(void *dummy) { } +#endif /** - * iucv_cleanup_pathid - * @pathid: 16 bit pathid + * iucv_cleanup_queue * * Function called after a path has been severed to find all remaining * work items for the now stale pathid. The caller needs to hold the * iucv_table_lock. */ -static void iucv_cleanup_pathid(u16 pathid) +static void iucv_cleanup_queue(void) { - struct iucv_work *p, *n; + struct iucv_irq_list *p, *n; /* - * Path is severed, the pathid can be reused immediatly on - * a iucv connect or a connection pending interrupt. - * iucv_path_connect and connection pending interrupt will - * wait until the iucv_table_lock is released before the - * recycled pathid enters the system. - * Force remaining interrupts to the work queue, then - * scan the work queue for items of this path. + * When a path is severed, the pathid can be reused immediatly + * on a iucv connect or a connection pending interrupt. Remove + * all entries from the task queue that refer to a stale pathid + * (iucv_path_table[ix] == NULL). Only then do the iucv connect + * or deliver the connection pending interrupt. To get all the + * pending interrupts force them to the work queue by calling + * an empty function on all cpus. */ - smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1); - spin_lock_irq(&iucv_work_lock); - list_for_each_entry_safe(p, n, &iucv_work_queue, list) { - /* Remove work items for pathid except connection pending */ - if (p->data.ippathid == pathid && p->data.iptype != 0x01) { + smp_call_function(__iucv_cleanup_queue, NULL, 0, 1); + spin_lock_irq(&iucv_queue_lock); + list_for_each_entry_safe(p, n, &iucv_task_queue, list) { + /* Remove stale work items from the task queue. */ + if (iucv_path_table[p->data.ippathid] == NULL) { list_del(&p->list); kfree(p); } } - spin_unlock_irq(&iucv_work_lock); + spin_unlock_irq(&iucv_queue_lock); } /** @@ -684,7 +712,6 @@ void iucv_unregister(struct iucv_handler *handler, int smp) iucv_sever_pathid(p->pathid, NULL); iucv_path_table[p->pathid] = NULL; list_del(&p->list); - iucv_cleanup_pathid(p->pathid); iucv_path_free(p); } spin_unlock_bh(&iucv_table_lock); @@ -757,9 +784,9 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, union iucv_param *parm; int rc; - preempt_disable(); - if (iucv_tasklet_cpu != smp_processor_id()) - spin_lock_bh(&iucv_table_lock); + BUG_ON(in_atomic()); + spin_lock_bh(&iucv_table_lock); + iucv_cleanup_queue(); parm = percpu_ptr(iucv_param, smp_processor_id()); memset(parm, 0, sizeof(union iucv_param)); parm->ctrl.ipmsglim = path->msglim; @@ -794,9 +821,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, rc = -EIO; } } - if (iucv_tasklet_cpu != smp_processor_id()) - spin_unlock_bh(&iucv_table_lock); - preempt_enable(); + spin_unlock_bh(&iucv_table_lock); return rc; } @@ -867,15 +892,14 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) preempt_disable(); - if (iucv_tasklet_cpu != smp_processor_id()) + if (iucv_active_cpu != smp_processor_id()) spin_lock_bh(&iucv_table_lock); rc = iucv_sever_pathid(path->pathid, userdata); if (!rc) { iucv_path_table[path->pathid] = NULL; list_del_init(&path->list); - iucv_cleanup_pathid(path->pathid); } - if (iucv_tasklet_cpu != smp_processor_id()) + if (iucv_active_cpu != smp_processor_id()) spin_unlock_bh(&iucv_table_lock); preempt_enable(); return rc; @@ -1244,8 +1268,7 @@ static void iucv_path_complete(struct iucv_irq_data *data) struct iucv_path_complete *ipc = (void *) data; struct iucv_path *path = iucv_path_table[ipc->ippathid]; - BUG_ON(!path || !path->handler); - if (path->handler->path_complete) + if (path && path->handler && path->handler->path_complete) path->handler->path_complete(path, ipc->ipuser); } @@ -1273,14 +1296,14 @@ static void iucv_path_severed(struct iucv_irq_data *data) struct iucv_path_severed *ips = (void *) data; struct iucv_path *path = iucv_path_table[ips->ippathid]; - BUG_ON(!path || !path->handler); + if (!path || !path->handler) /* Already severed */ + return; if (path->handler->path_severed) path->handler->path_severed(path, ips->ipuser); else { iucv_sever_pathid(path->pathid, NULL); iucv_path_table[path->pathid] = NULL; list_del_init(&path->list); - iucv_cleanup_pathid(path->pathid); iucv_path_free(path); } } @@ -1309,8 +1332,7 @@ static void iucv_path_quiesced(struct iucv_irq_data *data) struct iucv_path_quiesced *ipq = (void *) data; struct iucv_path *path = iucv_path_table[ipq->ippathid]; - BUG_ON(!path || !path->handler); - if (path->handler->path_quiesced) + if (path && path->handler && path->handler->path_quiesced) path->handler->path_quiesced(path, ipq->ipuser); } @@ -1338,8 +1360,7 @@ static void iucv_path_resumed(struct iucv_irq_data *data) struct iucv_path_resumed *ipr = (void *) data; struct iucv_path *path = iucv_path_table[ipr->ippathid]; - BUG_ON(!path || !path->handler); - if (path->handler->path_resumed) + if (path && path->handler && path->handler->path_resumed) path->handler->path_resumed(path, ipr->ipuser); } @@ -1371,8 +1392,7 @@ static void iucv_message_complete(struct iucv_irq_data *data) struct iucv_path *path = iucv_path_table[imc->ippathid]; struct iucv_message msg; - BUG_ON(!path || !path->handler); - if (path->handler->message_complete) { + if (path && path->handler && path->handler->message_complete) { msg.flags = imc->ipflags1; msg.id = imc->ipmsgid; msg.audit = imc->ipaudit; @@ -1417,8 +1437,7 @@ static void iucv_message_pending(struct iucv_irq_data *data) struct iucv_path *path = iucv_path_table[imp->ippathid]; struct iucv_message msg; - BUG_ON(!path || !path->handler); - if (path->handler->message_pending) { + if (path && path->handler && path->handler->message_pending) { msg.flags = imp->ipflags1; msg.id = imp->ipmsgid; msg.class = imp->iptrgcls; @@ -1433,17 +1452,16 @@ static void iucv_message_pending(struct iucv_irq_data *data) } /** - * iucv_tasklet_handler: + * iucv_tasklet_fn: * * This tasklet loops over the queue of irq buffers created by * iucv_external_interrupt, calls the appropriate action handler * and then frees the buffer. */ -static void iucv_tasklet_handler(unsigned long ignored) +static void iucv_tasklet_fn(unsigned long ignored) { typedef void iucv_irq_fn(struct iucv_irq_data *); static iucv_irq_fn *irq_fn[] = { - [0x01] = iucv_path_pending, [0x02] = iucv_path_complete, [0x03] = iucv_path_severed, [0x04] = iucv_path_quiesced, @@ -1453,38 +1471,70 @@ static void iucv_tasklet_handler(unsigned long ignored) [0x08] = iucv_message_pending, [0x09] = iucv_message_pending, }; - struct iucv_work *p; + struct list_head task_queue = LIST_HEAD_INIT(task_queue); + struct iucv_irq_list *p, *n; /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ spin_lock(&iucv_table_lock); - iucv_tasklet_cpu = smp_processor_id(); + iucv_active_cpu = smp_processor_id(); - spin_lock_irq(&iucv_work_lock); - while (!list_empty(&iucv_work_queue)) { - p = list_entry(iucv_work_queue.next, struct iucv_work, list); + spin_lock_irq(&iucv_queue_lock); + list_splice_init(&iucv_task_queue, &task_queue); + spin_unlock_irq(&iucv_queue_lock); + + list_for_each_entry_safe(p, n, &task_queue, list) { list_del_init(&p->list); - spin_unlock_irq(&iucv_work_lock); irq_fn[p->data.iptype](&p->data); kfree(p); - spin_lock_irq(&iucv_work_lock); } - spin_unlock_irq(&iucv_work_lock); - iucv_tasklet_cpu = -1; + iucv_active_cpu = -1; spin_unlock(&iucv_table_lock); } +/** + * iucv_work_fn: + * + * This work function loops over the queue of path pending irq blocks + * created by iucv_external_interrupt, calls the appropriate action + * handler and then frees the buffer. + */ +static void iucv_work_fn(struct work_struct *work) +{ + typedef void iucv_irq_fn(struct iucv_irq_data *); + struct list_head work_queue = LIST_HEAD_INIT(work_queue); + struct iucv_irq_list *p, *n; + + /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ + spin_lock_bh(&iucv_table_lock); + iucv_active_cpu = smp_processor_id(); + + spin_lock_irq(&iucv_queue_lock); + list_splice_init(&iucv_work_queue, &work_queue); + spin_unlock_irq(&iucv_queue_lock); + + iucv_cleanup_queue(); + list_for_each_entry_safe(p, n, &work_queue, list) { + list_del_init(&p->list); + iucv_path_pending(&p->data); + kfree(p); + } + + iucv_active_cpu = -1; + spin_unlock_bh(&iucv_table_lock); +} + /** * iucv_external_interrupt * @code: irq code * * Handles external interrupts coming in from CP. - * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler(). + * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn(). */ static void iucv_external_interrupt(u16 code) { struct iucv_irq_data *p; - struct iucv_work *work; + struct iucv_irq_list *work; p = percpu_ptr(iucv_irq_data, smp_processor_id()); if (p->ippathid >= iucv_max_pathid) { @@ -1498,16 +1548,23 @@ static void iucv_external_interrupt(u16 code) printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n"); return; } - work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC); + work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC); if (!work) { printk(KERN_WARNING "iucv_external_interrupt: out of memory\n"); return; } memcpy(&work->data, p, sizeof(work->data)); - spin_lock(&iucv_work_lock); - list_add_tail(&work->list, &iucv_work_queue); - spin_unlock(&iucv_work_lock); - tasklet_schedule(&iucv_tasklet); + spin_lock(&iucv_queue_lock); + if (p->iptype == 0x01) { + /* Path pending interrupt. */ + list_add_tail(&work->list, &iucv_work_queue); + schedule_work(&iucv_work); + } else { + /* The other interrupts. */ + list_add_tail(&work->list, &iucv_task_queue); + tasklet_schedule(&iucv_tasklet); + } + spin_unlock(&iucv_queue_lock); } /** @@ -1577,12 +1634,14 @@ out: */ static void iucv_exit(void) { - struct iucv_work *p, *n; + struct iucv_irq_list *p, *n; - spin_lock_irq(&iucv_work_lock); + spin_lock_irq(&iucv_queue_lock); + list_for_each_entry_safe(p, n, &iucv_task_queue, list) + kfree(p); list_for_each_entry_safe(p, n, &iucv_work_queue, list) kfree(p); - spin_unlock_irq(&iucv_work_lock); + spin_unlock_irq(&iucv_queue_lock); unregister_hotcpu_notifier(&iucv_cpu_notifier); percpu_free(iucv_param); percpu_free(iucv_irq_data); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 11938fb20395..2fc0a92caa78 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3987,7 +3987,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, memcpy(&temp, &from->ipaddr, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; - if(space_left < addrlen) + if (space_left < addrlen) return -ENOMEM; if (copy_to_user(to, &temp, addrlen)) return -EFAULT; @@ -4076,8 +4076,9 @@ done: /* Helper function that copies local addresses to user and returns the number * of addresses copied. */ -static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs, - void __user *to) +static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, + int max_addrs, void *to, + int *bytes_copied) { struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; @@ -4094,10 +4095,10 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) - return -EFAULT; + memcpy(to, &temp, addrlen); to += addrlen; + *bytes_copied += addrlen; cnt ++; if (cnt >= max_addrs) break; } @@ -4105,8 +4106,8 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add return cnt; } -static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, - void __user **to, size_t space_left) +static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, + size_t space_left, int *bytes_copied) { struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; @@ -4123,14 +4124,14 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if(space_leftaddress_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - cnt = sctp_copy_laddrs_to_user_old(sk, bp->port, - getaddrs.addr_num, - to); - if (cnt < 0) { - err = cnt; - goto unlock; - } + cnt = sctp_copy_laddrs_old(sk, bp->port, + getaddrs.addr_num, + addrs, &bytes_copied); goto copy_getaddrs; } } @@ -4206,22 +4214,29 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) { - err = -EFAULT; - goto unlock; - } + memcpy(addrs, &temp, addrlen); to += addrlen; + bytes_copied += addrlen; cnt ++; if (cnt >= getaddrs.addr_num) break; } copy_getaddrs: + sctp_read_unlock(addr_lock); + + /* copy the entire address list into the user provided space */ + if (copy_to_user(to, addrs, bytes_copied)) { + err = -EFAULT; + goto error; + } + + /* copy the leading structure back to user */ getaddrs.addr_num = cnt; if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old))) err = -EFAULT; -unlock: - sctp_read_unlock(addr_lock); +error: + kfree(addrs); return err; } @@ -4241,7 +4256,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, rwlock_t *addr_lock; int err = 0; size_t space_left; - int bytes_copied; + int bytes_copied = 0; + void *addrs; if (len <= sizeof(struct sctp_getaddrs)) return -EINVAL; @@ -4269,6 +4285,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, to = optval + offsetof(struct sctp_getaddrs,addrs); space_left = len - sizeof(struct sctp_getaddrs) - offsetof(struct sctp_getaddrs,addrs); + addrs = kmalloc(space_left, GFP_KERNEL); + if (!addrs) + return -ENOMEM; sctp_read_lock(addr_lock); @@ -4279,11 +4298,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - cnt = sctp_copy_laddrs_to_user(sk, bp->port, - &to, space_left); + cnt = sctp_copy_laddrs(sk, bp->port, addrs, + space_left, &bytes_copied); if (cnt < 0) { err = cnt; - goto unlock; + goto error; } goto copy_getaddrs; } @@ -4294,26 +4313,31 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if(space_left < addrlen) - return -ENOMEM; /*fixme: right error?*/ - if (copy_to_user(to, &temp, addrlen)) { - err = -EFAULT; - goto unlock; + if (space_left < addrlen) { + err = -ENOMEM; /*fixme: right error?*/ + goto error; } + memcpy(addrs, &temp, addrlen); to += addrlen; + bytes_copied += addrlen; cnt ++; space_left -= addrlen; } copy_getaddrs: + sctp_read_unlock(addr_lock); + + if (copy_to_user(to, addrs, bytes_copied)) { + err = -EFAULT; + goto error; + } if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) return -EFAULT; - bytes_copied = ((char __user *)to) - optval; if (put_user(bytes_copied, optlen)) return -EFAULT; -unlock: - sctp_read_unlock(addr_lock); +error: + kfree(addrs); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 762926009c04..263e34e45265 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -579,8 +579,22 @@ static inline int xfrm_byidx_should_resize(int total) return 0; } -static DEFINE_MUTEX(hash_resize_mutex); +void xfrm_spd_getinfo(struct xfrm_spdinfo *si) +{ + read_lock_bh(&xfrm_policy_lock); + si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; + si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; + si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = xfrm_idx_hmask; + si->spdhmcnt = xfrm_policy_hashmax; + read_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_spd_getinfo); +static DEFINE_MUTEX(hash_resize_mutex); static void xfrm_hash_resize(struct work_struct *__unused) { int dir, total; @@ -1330,6 +1344,40 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, return err; } +static int inline +xfrm_dst_alloc_copy(void **target, void *src, int size) +{ + if (!*target) { + *target = kmalloc(size, GFP_ATOMIC); + if (!*target) + return -ENOMEM; + } + memcpy(*target, src, size); + return 0; +} + +static int inline +xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + return xfrm_dst_alloc_copy((void **)&(xdst->partner), + sel, sizeof(*sel)); +#else + return 0; +#endif +} + +static int inline +xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); +#else + return 0; +#endif +} static int stale_bundle(struct dst_entry *dst); @@ -1518,6 +1566,18 @@ restart: err = -EHOSTUNREACH; goto error; } + + if (npols > 1) + err = xfrm_dst_update_parent(dst, &pols[1]->selector); + else + err = xfrm_dst_update_origin(dst, fl); + if (unlikely(err)) { + write_unlock_bh(&policy->lock); + if (dst) + dst_free(dst); + goto error; + } + dst->next = policy->bundles; policy->bundles = dst; dst_hold(dst); @@ -1933,6 +1993,15 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; +#ifdef CONFIG_XFRM_SUB_POLICY + if (fl) { + if (first->origin && !flow_cache_uli_match(first->origin, fl)) + return 0; + if (first->partner && + !xfrm_selector_match(first->partner, fl, family)) + return 0; + } +#endif last = NULL; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 69110fed64b6..4210d91624cd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -672,6 +672,81 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } +static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) +{ + struct xfrm_spdinfo si; + struct nlmsghdr *nlh; + u32 *f; + + nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + if (nlh == NULL) /* shouldnt really happen ... */ + return -EMSGSIZE; + + f = nlmsg_data(nlh); + *f = flags; + xfrm_spd_getinfo(&si); + + if (flags & XFRM_SPD_HMASK) + NLA_PUT_U32(skb, XFRMA_SPDHMASK, si.spdhcnt); + if (flags & XFRM_SPD_HMAX) + NLA_PUT_U32(skb, XFRMA_SPDHMAX, si.spdhmcnt); + if (flags & XFRM_SPD_ICNT) + NLA_PUT_U32(skb, XFRMA_SPDICNT, si.incnt); + if (flags & XFRM_SPD_OCNT) + NLA_PUT_U32(skb, XFRMA_SPDOCNT, si.outcnt); + if (flags & XFRM_SPD_FCNT) + NLA_PUT_U32(skb, XFRMA_SPDFCNT, si.fwdcnt); + if (flags & XFRM_SPD_ISCNT) + NLA_PUT_U32(skb, XFRMA_SPDISCNT, si.inscnt); + if (flags & XFRM_SPD_OSCNT) + NLA_PUT_U32(skb, XFRMA_SPDOSCNT, si.inscnt); + if (flags & XFRM_SPD_FSCNT) + NLA_PUT_U32(skb, XFRMA_SPDFSCNT, si.inscnt); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) +{ + struct sk_buff *r_skb; + u32 *flags = NLMSG_DATA(nlh); + u32 spid = NETLINK_CB(skb).pid; + u32 seq = nlh->nlmsg_seq; + int len = NLMSG_LENGTH(sizeof(u32)); + + + if (*flags & XFRM_SPD_HMASK) + len += RTA_SPACE(sizeof(u32)); + if (*flags & XFRM_SPD_HMAX) + len += RTA_SPACE(sizeof(u32)); + if (*flags & XFRM_SPD_ICNT) + len += RTA_SPACE(sizeof(u32)); + if (*flags & XFRM_SPD_OCNT) + len += RTA_SPACE(sizeof(u32)); + if (*flags & XFRM_SPD_FCNT) + len += RTA_SPACE(sizeof(u32)); + if (*flags & XFRM_SPD_ISCNT) + len += RTA_SPACE(sizeof(u32)); + if (*flags & XFRM_SPD_OSCNT) + len += RTA_SPACE(sizeof(u32)); + if (*flags & XFRM_SPD_FSCNT) + len += RTA_SPACE(sizeof(u32)); + + r_skb = alloc_skb(len, GFP_ATOMIC); + if (r_skb == NULL) + return -ENOMEM; + + if (build_spdinfo(r_skb, spid, seq, *flags) < 0) + BUG(); + + return nlmsg_unicast(xfrm_nl, r_skb, spid); +} + static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { struct xfrm_sadinfo si; @@ -1879,6 +1954,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), }; #undef XMSGSIZE @@ -1907,6 +1983,7 @@ static struct xfrm_link { [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae }, [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate }, [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo }, + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, }; static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)