2005-04-16 15:20:36 -07:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* This file implements the various access functions for the
* PROC file system . It is mainly used for debugging and
* statistics .
*
* Authors : Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
* Gerald J . Heim , < heim @ peanuts . informatik . uni - tuebingen . de >
* Fred Baumgarten , < dc6iq @ insu1 . etec . uni - karlsruhe . de >
* Erik Schoenfelder , < schoenfr @ ibr . cs . tu - bs . de >
*
* Fixes :
* Alan Cox : UDP sockets show the rxqueue / txqueue
* using hint flag for the netinfo .
* Pauline Middelink : identd support
* Alan Cox : Make / proc safer .
* Erik Schoenfelder : / proc / net / snmp
* Alan Cox : Handle dead sockets properly .
* Gerhard Koerting : Show both timers
* Alan Cox : Allow inode to be NULL ( kernel socket )
* Andi Kleen : Add support for open_requests and
* split functions for more readibility .
* Andi Kleen : Add support for / proc / net / netstat
* Arnaldo C . Melo : Convert to seq_file
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <linux/types.h>
2007-09-12 12:01:34 +02:00
# include <net/net_namespace.h>
2005-04-16 15:20:36 -07:00
# include <net/icmp.h>
# include <net/protocol.h>
# include <net/tcp.h>
# include <net/udp.h>
2006-11-27 11:10:57 -08:00
# include <net/udplite.h>
2008-12-29 23:04:08 -08:00
# include <linux/bottom_half.h>
2005-12-27 02:43:12 -02:00
# include <linux/inetdevice.h>
2005-04-16 15:20:36 -07:00
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
2011-07-15 11:47:34 -04:00
# include <linux/export.h>
2005-04-16 15:20:36 -07:00
# include <net/sock.h>
# include <net/raw.h>
/*
* Report socket allocation statistics [ mea @ utu . fi ]
*/
static int sockstat_seq_show ( struct seq_file * seq , void * v )
{
2008-03-31 19:43:18 -07:00
struct net * net = seq - > private ;
2008-12-29 23:04:08 -08:00
int orphans , sockets ;
local_bh_disable ( ) ;
2009-02-16 00:08:56 -08:00
orphans = percpu_counter_sum_positive ( & tcp_orphan_count ) ;
2011-12-11 21:47:02 +00:00
sockets = proto_sockets_allocated_sum_positive ( & tcp_prot ) ;
2008-12-29 23:04:08 -08:00
local_bh_enable ( ) ;
2008-03-31 19:43:18 -07:00
2005-04-16 15:20:36 -07:00
socket_seq_show ( seq ) ;
2010-11-09 23:24:26 +00:00
seq_printf ( seq , " TCP: inuse %d orphan %d tw %d alloc %d mem %ld \n " ,
2008-12-29 23:04:08 -08:00
sock_prot_inuse_get ( net , & tcp_prot ) , orphans ,
tcp_death_row . tw_count , sockets ,
2011-12-11 21:47:02 +00:00
proto_memory_allocated ( & tcp_prot ) ) ;
2010-11-09 23:24:26 +00:00
seq_printf ( seq , " UDP: inuse %d mem %ld \n " ,
2008-03-31 19:43:18 -07:00
sock_prot_inuse_get ( net , & udp_prot ) ,
2011-12-11 21:47:02 +00:00
proto_memory_allocated ( & udp_prot ) ) ;
2008-03-31 19:41:46 -07:00
seq_printf ( seq , " UDPLITE: inuse %d \n " ,
2008-03-31 19:43:18 -07:00
sock_prot_inuse_get ( net , & udplite_prot ) ) ;
2008-03-31 19:41:46 -07:00
seq_printf ( seq , " RAW: inuse %d \n " ,
2008-03-31 19:43:18 -07:00
sock_prot_inuse_get ( net , & raw_prot ) ) ;
2007-10-15 02:31:52 -07:00
seq_printf ( seq , " FRAG: inuse %d memory %d \n " ,
2008-03-31 19:43:18 -07:00
ip_frag_nqueues ( net ) , ip_frag_mem ( net ) ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
static int sockstat_seq_open ( struct inode * inode , struct file * file )
{
2008-07-18 04:07:21 -07:00
return single_open_net ( inode , file , sockstat_seq_show ) ;
2008-03-31 19:43:18 -07:00
}
2007-02-12 00:55:35 -08:00
static const struct file_operations sockstat_seq_fops = {
2005-04-16 15:20:36 -07:00
. owner = THIS_MODULE ,
. open = sockstat_seq_open ,
. read = seq_read ,
. llseek = seq_lseek ,
2008-07-18 04:07:44 -07:00
. release = single_release_net ,
2005-04-16 15:20:36 -07:00
} ;
/* snmp items */
2005-11-29 16:21:38 -08:00
static const struct snmp_mib snmp4_ipstats_list [ ] = {
2009-04-27 02:45:02 -07:00
SNMP_MIB_ITEM ( " InReceives " , IPSTATS_MIB_INPKTS ) ,
2005-04-16 15:20:36 -07:00
SNMP_MIB_ITEM ( " InHdrErrors " , IPSTATS_MIB_INHDRERRORS ) ,
SNMP_MIB_ITEM ( " InAddrErrors " , IPSTATS_MIB_INADDRERRORS ) ,
SNMP_MIB_ITEM ( " ForwDatagrams " , IPSTATS_MIB_OUTFORWDATAGRAMS ) ,
SNMP_MIB_ITEM ( " InUnknownProtos " , IPSTATS_MIB_INUNKNOWNPROTOS ) ,
SNMP_MIB_ITEM ( " InDiscards " , IPSTATS_MIB_INDISCARDS ) ,
SNMP_MIB_ITEM ( " InDelivers " , IPSTATS_MIB_INDELIVERS ) ,
2009-04-27 02:45:02 -07:00
SNMP_MIB_ITEM ( " OutRequests " , IPSTATS_MIB_OUTPKTS ) ,
2005-04-16 15:20:36 -07:00
SNMP_MIB_ITEM ( " OutDiscards " , IPSTATS_MIB_OUTDISCARDS ) ,
SNMP_MIB_ITEM ( " OutNoRoutes " , IPSTATS_MIB_OUTNOROUTES ) ,
SNMP_MIB_ITEM ( " ReasmTimeout " , IPSTATS_MIB_REASMTIMEOUT ) ,
SNMP_MIB_ITEM ( " ReasmReqds " , IPSTATS_MIB_REASMREQDS ) ,
SNMP_MIB_ITEM ( " ReasmOKs " , IPSTATS_MIB_REASMOKS ) ,
SNMP_MIB_ITEM ( " ReasmFails " , IPSTATS_MIB_REASMFAILS ) ,
SNMP_MIB_ITEM ( " FragOKs " , IPSTATS_MIB_FRAGOKS ) ,
SNMP_MIB_ITEM ( " FragFails " , IPSTATS_MIB_FRAGFAILS ) ,
SNMP_MIB_ITEM ( " FragCreates " , IPSTATS_MIB_FRAGCREATES ) ,
SNMP_MIB_SENTINEL
} ;
2013-08-06 03:32:11 -07:00
/* Following items are displayed in /proc/net/netstat */
2007-05-14 03:07:30 -07:00
static const struct snmp_mib snmp4_ipextstats_list [ ] = {
SNMP_MIB_ITEM ( " InNoRoutes " , IPSTATS_MIB_INNOROUTES ) ,
SNMP_MIB_ITEM ( " InTruncatedPkts " , IPSTATS_MIB_INTRUNCATEDPKTS ) ,
SNMP_MIB_ITEM ( " InMcastPkts " , IPSTATS_MIB_INMCASTPKTS ) ,
SNMP_MIB_ITEM ( " OutMcastPkts " , IPSTATS_MIB_OUTMCASTPKTS ) ,
SNMP_MIB_ITEM ( " InBcastPkts " , IPSTATS_MIB_INBCASTPKTS ) ,
SNMP_MIB_ITEM ( " OutBcastPkts " , IPSTATS_MIB_OUTBCASTPKTS ) ,
2009-04-27 02:45:02 -07:00
SNMP_MIB_ITEM ( " InOctets " , IPSTATS_MIB_INOCTETS ) ,
SNMP_MIB_ITEM ( " OutOctets " , IPSTATS_MIB_OUTOCTETS ) ,
SNMP_MIB_ITEM ( " InMcastOctets " , IPSTATS_MIB_INMCASTOCTETS ) ,
SNMP_MIB_ITEM ( " OutMcastOctets " , IPSTATS_MIB_OUTMCASTOCTETS ) ,
SNMP_MIB_ITEM ( " InBcastOctets " , IPSTATS_MIB_INBCASTOCTETS ) ,
SNMP_MIB_ITEM ( " OutBcastOctets " , IPSTATS_MIB_OUTBCASTOCTETS ) ,
2013-08-06 03:32:11 -07:00
/* Non RFC4293 fields */
2013-04-29 08:39:56 +00:00
SNMP_MIB_ITEM ( " InCsumErrors " , IPSTATS_MIB_CSUMERRORS ) ,
2013-08-06 03:32:11 -07:00
SNMP_MIB_ITEM ( " InNoECTPkts " , IPSTATS_MIB_NOECTPKTS ) ,
SNMP_MIB_ITEM ( " InECT1Pkts " , IPSTATS_MIB_ECT1PKTS ) ,
SNMP_MIB_ITEM ( " InECT0Pkts " , IPSTATS_MIB_ECT0PKTS ) ,
SNMP_MIB_ITEM ( " InCEPkts " , IPSTATS_MIB_CEPKTS ) ,
2007-05-14 03:07:30 -07:00
SNMP_MIB_SENTINEL
} ;
2010-01-22 10:17:26 +00:00
static const struct {
const char * name ;
2007-09-17 09:57:33 -07:00
int index ;
} icmpmibmap [ ] = {
{ " DestUnreachs " , ICMP_DEST_UNREACH } ,
{ " TimeExcds " , ICMP_TIME_EXCEEDED } ,
{ " ParmProbs " , ICMP_PARAMETERPROB } ,
{ " SrcQuenchs " , ICMP_SOURCE_QUENCH } ,
{ " Redirects " , ICMP_REDIRECT } ,
{ " Echos " , ICMP_ECHO } ,
{ " EchoReps " , ICMP_ECHOREPLY } ,
{ " Timestamps " , ICMP_TIMESTAMP } ,
{ " TimestampReps " , ICMP_TIMESTAMPREPLY } ,
{ " AddrMasks " , ICMP_ADDRESS } ,
{ " AddrMaskReps " , ICMP_ADDRESSREPLY } ,
2007-10-09 01:59:42 -07:00
{ NULL , 0 }
2007-09-17 09:57:33 -07:00
} ;
2005-11-29 16:21:38 -08:00
static const struct snmp_mib snmp4_tcp_list [ ] = {
2005-04-16 15:20:36 -07:00
SNMP_MIB_ITEM ( " RtoAlgorithm " , TCP_MIB_RTOALGORITHM ) ,
SNMP_MIB_ITEM ( " RtoMin " , TCP_MIB_RTOMIN ) ,
SNMP_MIB_ITEM ( " RtoMax " , TCP_MIB_RTOMAX ) ,
SNMP_MIB_ITEM ( " MaxConn " , TCP_MIB_MAXCONN ) ,
SNMP_MIB_ITEM ( " ActiveOpens " , TCP_MIB_ACTIVEOPENS ) ,
SNMP_MIB_ITEM ( " PassiveOpens " , TCP_MIB_PASSIVEOPENS ) ,
SNMP_MIB_ITEM ( " AttemptFails " , TCP_MIB_ATTEMPTFAILS ) ,
SNMP_MIB_ITEM ( " EstabResets " , TCP_MIB_ESTABRESETS ) ,
SNMP_MIB_ITEM ( " CurrEstab " , TCP_MIB_CURRESTAB ) ,
SNMP_MIB_ITEM ( " InSegs " , TCP_MIB_INSEGS ) ,
SNMP_MIB_ITEM ( " OutSegs " , TCP_MIB_OUTSEGS ) ,
SNMP_MIB_ITEM ( " RetransSegs " , TCP_MIB_RETRANSSEGS ) ,
SNMP_MIB_ITEM ( " InErrs " , TCP_MIB_INERRS ) ,
SNMP_MIB_ITEM ( " OutRsts " , TCP_MIB_OUTRSTS ) ,
2013-04-29 08:39:56 +00:00
SNMP_MIB_ITEM ( " InCsumErrors " , TCP_MIB_CSUMERRORS ) ,
2005-04-16 15:20:36 -07:00
SNMP_MIB_SENTINEL
} ;
2005-11-29 16:21:38 -08:00
static const struct snmp_mib snmp4_udp_list [ ] = {
2005-04-16 15:20:36 -07:00
SNMP_MIB_ITEM ( " InDatagrams " , UDP_MIB_INDATAGRAMS ) ,
SNMP_MIB_ITEM ( " NoPorts " , UDP_MIB_NOPORTS ) ,
SNMP_MIB_ITEM ( " InErrors " , UDP_MIB_INERRORS ) ,
SNMP_MIB_ITEM ( " OutDatagrams " , UDP_MIB_OUTDATAGRAMS ) ,
2006-08-14 23:57:10 -07:00
SNMP_MIB_ITEM ( " RcvbufErrors " , UDP_MIB_RCVBUFERRORS ) ,
SNMP_MIB_ITEM ( " SndbufErrors " , UDP_MIB_SNDBUFERRORS ) ,
2013-04-29 08:39:56 +00:00
SNMP_MIB_ITEM ( " InCsumErrors " , UDP_MIB_CSUMERRORS ) ,
2005-04-16 15:20:36 -07:00
SNMP_MIB_SENTINEL
} ;
2005-11-29 16:21:38 -08:00
static const struct snmp_mib snmp4_net_list [ ] = {
2005-04-16 15:20:36 -07:00
SNMP_MIB_ITEM ( " SyncookiesSent " , LINUX_MIB_SYNCOOKIESSENT ) ,
SNMP_MIB_ITEM ( " SyncookiesRecv " , LINUX_MIB_SYNCOOKIESRECV ) ,
SNMP_MIB_ITEM ( " SyncookiesFailed " , LINUX_MIB_SYNCOOKIESFAILED ) ,
SNMP_MIB_ITEM ( " EmbryonicRsts " , LINUX_MIB_EMBRYONICRSTS ) ,
SNMP_MIB_ITEM ( " PruneCalled " , LINUX_MIB_PRUNECALLED ) ,
SNMP_MIB_ITEM ( " RcvPruned " , LINUX_MIB_RCVPRUNED ) ,
SNMP_MIB_ITEM ( " OfoPruned " , LINUX_MIB_OFOPRUNED ) ,
SNMP_MIB_ITEM ( " OutOfWindowIcmps " , LINUX_MIB_OUTOFWINDOWICMPS ) ,
SNMP_MIB_ITEM ( " LockDroppedIcmps " , LINUX_MIB_LOCKDROPPEDICMPS ) ,
SNMP_MIB_ITEM ( " ArpFilter " , LINUX_MIB_ARPFILTER ) ,
SNMP_MIB_ITEM ( " TW " , LINUX_MIB_TIMEWAITED ) ,
SNMP_MIB_ITEM ( " TWRecycled " , LINUX_MIB_TIMEWAITRECYCLED ) ,
SNMP_MIB_ITEM ( " TWKilled " , LINUX_MIB_TIMEWAITKILLED ) ,
SNMP_MIB_ITEM ( " PAWSPassive " , LINUX_MIB_PAWSPASSIVEREJECTED ) ,
SNMP_MIB_ITEM ( " PAWSActive " , LINUX_MIB_PAWSACTIVEREJECTED ) ,
SNMP_MIB_ITEM ( " PAWSEstab " , LINUX_MIB_PAWSESTABREJECTED ) ,
SNMP_MIB_ITEM ( " DelayedACKs " , LINUX_MIB_DELAYEDACKS ) ,
SNMP_MIB_ITEM ( " DelayedACKLocked " , LINUX_MIB_DELAYEDACKLOCKED ) ,
SNMP_MIB_ITEM ( " DelayedACKLost " , LINUX_MIB_DELAYEDACKLOST ) ,
SNMP_MIB_ITEM ( " ListenOverflows " , LINUX_MIB_LISTENOVERFLOWS ) ,
SNMP_MIB_ITEM ( " ListenDrops " , LINUX_MIB_LISTENDROPS ) ,
SNMP_MIB_ITEM ( " TCPPrequeued " , LINUX_MIB_TCPPREQUEUED ) ,
SNMP_MIB_ITEM ( " TCPDirectCopyFromBacklog " , LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG ) ,
SNMP_MIB_ITEM ( " TCPDirectCopyFromPrequeue " , LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE ) ,
SNMP_MIB_ITEM ( " TCPPrequeueDropped " , LINUX_MIB_TCPPREQUEUEDROPPED ) ,
SNMP_MIB_ITEM ( " TCPHPHits " , LINUX_MIB_TCPHPHITS ) ,
SNMP_MIB_ITEM ( " TCPHPHitsToUser " , LINUX_MIB_TCPHPHITSTOUSER ) ,
SNMP_MIB_ITEM ( " TCPPureAcks " , LINUX_MIB_TCPPUREACKS ) ,
SNMP_MIB_ITEM ( " TCPHPAcks " , LINUX_MIB_TCPHPACKS ) ,
SNMP_MIB_ITEM ( " TCPRenoRecovery " , LINUX_MIB_TCPRENORECOVERY ) ,
SNMP_MIB_ITEM ( " TCPSackRecovery " , LINUX_MIB_TCPSACKRECOVERY ) ,
SNMP_MIB_ITEM ( " TCPSACKReneging " , LINUX_MIB_TCPSACKRENEGING ) ,
SNMP_MIB_ITEM ( " TCPFACKReorder " , LINUX_MIB_TCPFACKREORDER ) ,
SNMP_MIB_ITEM ( " TCPSACKReorder " , LINUX_MIB_TCPSACKREORDER ) ,
SNMP_MIB_ITEM ( " TCPRenoReorder " , LINUX_MIB_TCPRENOREORDER ) ,
SNMP_MIB_ITEM ( " TCPTSReorder " , LINUX_MIB_TCPTSREORDER ) ,
SNMP_MIB_ITEM ( " TCPFullUndo " , LINUX_MIB_TCPFULLUNDO ) ,
SNMP_MIB_ITEM ( " TCPPartialUndo " , LINUX_MIB_TCPPARTIALUNDO ) ,
SNMP_MIB_ITEM ( " TCPDSACKUndo " , LINUX_MIB_TCPDSACKUNDO ) ,
SNMP_MIB_ITEM ( " TCPLossUndo " , LINUX_MIB_TCPLOSSUNDO ) ,
SNMP_MIB_ITEM ( " TCPLostRetransmit " , LINUX_MIB_TCPLOSTRETRANSMIT ) ,
SNMP_MIB_ITEM ( " TCPRenoFailures " , LINUX_MIB_TCPRENOFAILURES ) ,
SNMP_MIB_ITEM ( " TCPSackFailures " , LINUX_MIB_TCPSACKFAILURES ) ,
SNMP_MIB_ITEM ( " TCPLossFailures " , LINUX_MIB_TCPLOSSFAILURES ) ,
SNMP_MIB_ITEM ( " TCPFastRetrans " , LINUX_MIB_TCPFASTRETRANS ) ,
SNMP_MIB_ITEM ( " TCPForwardRetrans " , LINUX_MIB_TCPFORWARDRETRANS ) ,
SNMP_MIB_ITEM ( " TCPSlowStartRetrans " , LINUX_MIB_TCPSLOWSTARTRETRANS ) ,
SNMP_MIB_ITEM ( " TCPTimeouts " , LINUX_MIB_TCPTIMEOUTS ) ,
tcp: Tail loss probe (TLP)
This patch series implement the Tail loss probe (TLP) algorithm described
in http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01. The
first patch implements the basic algorithm.
TLP's goal is to reduce tail latency of short transactions. It achieves
this by converting retransmission timeouts (RTOs) occuring due
to tail losses (losses at end of transactions) into fast recovery.
TLP transmits one packet in two round-trips when a connection is in
Open state and isn't receiving any ACKs. The transmitted packet, aka
loss probe, can be either new or a retransmission. When there is tail
loss, the ACK from a loss probe triggers FACK/early-retransmit based
fast recovery, thus avoiding a costly RTO. In the absence of loss,
there is no change in the connection state.
PTO stands for probe timeout. It is a timer event indicating
that an ACK is overdue and triggers a loss probe packet. The PTO value
is set to max(2*SRTT, 10ms) and is adjusted to account for delayed
ACK timer when there is only one oustanding packet.
TLP Algorithm
On transmission of new data in Open state:
-> packets_out > 1: schedule PTO in max(2*SRTT, 10ms).
-> packets_out == 1: schedule PTO in max(2*RTT, 1.5*RTT + 200ms)
-> PTO = min(PTO, RTO)
Conditions for scheduling PTO:
-> Connection is in Open state.
-> Connection is either cwnd limited or no new data to send.
-> Number of probes per tail loss episode is limited to one.
-> Connection is SACK enabled.
When PTO fires:
new_segment_exists:
-> transmit new segment.
-> packets_out++. cwnd remains same.
no_new_packet:
-> retransmit the last segment.
Its ACK triggers FACK or early retransmit based recovery.
ACK path:
-> rearm RTO at start of ACK processing.
-> reschedule PTO if need be.
In addition, the patch includes a small variation to the Early Retransmit
(ER) algorithm, such that ER and TLP together can in principle recover any
N-degree of tail loss through fast recovery. TLP is controlled by the same
sysctl as ER, tcp_early_retrans sysctl.
tcp_early_retrans==0; disables TLP and ER.
==1; enables RFC5827 ER.
==2; delayed ER.
==3; TLP and delayed ER. [DEFAULT]
==4; TLP only.
The TLP patch series have been extensively tested on Google Web servers.
It is most effective for short Web trasactions, where it reduced RTOs by 15%
and improved HTTP response time (average by 6%, 99th percentile by 10%).
The transmitted probes account for <0.5% of the overall transmissions.
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-03-11 10:00:43 +00:00
SNMP_MIB_ITEM ( " TCPLossProbes " , LINUX_MIB_TCPLOSSPROBES ) ,
2013-03-11 10:00:44 +00:00
SNMP_MIB_ITEM ( " TCPLossProbeRecovery " , LINUX_MIB_TCPLOSSPROBERECOVERY ) ,
2005-04-16 15:20:36 -07:00
SNMP_MIB_ITEM ( " TCPRenoRecoveryFail " , LINUX_MIB_TCPRENORECOVERYFAIL ) ,
SNMP_MIB_ITEM ( " TCPSackRecoveryFail " , LINUX_MIB_TCPSACKRECOVERYFAIL ) ,
SNMP_MIB_ITEM ( " TCPSchedulerFailed " , LINUX_MIB_TCPSCHEDULERFAILED ) ,
SNMP_MIB_ITEM ( " TCPRcvCollapsed " , LINUX_MIB_TCPRCVCOLLAPSED ) ,
SNMP_MIB_ITEM ( " TCPDSACKOldSent " , LINUX_MIB_TCPDSACKOLDSENT ) ,
SNMP_MIB_ITEM ( " TCPDSACKOfoSent " , LINUX_MIB_TCPDSACKOFOSENT ) ,
SNMP_MIB_ITEM ( " TCPDSACKRecv " , LINUX_MIB_TCPDSACKRECV ) ,
SNMP_MIB_ITEM ( " TCPDSACKOfoRecv " , LINUX_MIB_TCPDSACKOFORECV ) ,
SNMP_MIB_ITEM ( " TCPAbortOnData " , LINUX_MIB_TCPABORTONDATA ) ,
SNMP_MIB_ITEM ( " TCPAbortOnClose " , LINUX_MIB_TCPABORTONCLOSE ) ,
SNMP_MIB_ITEM ( " TCPAbortOnMemory " , LINUX_MIB_TCPABORTONMEMORY ) ,
SNMP_MIB_ITEM ( " TCPAbortOnTimeout " , LINUX_MIB_TCPABORTONTIMEOUT ) ,
SNMP_MIB_ITEM ( " TCPAbortOnLinger " , LINUX_MIB_TCPABORTONLINGER ) ,
SNMP_MIB_ITEM ( " TCPAbortFailed " , LINUX_MIB_TCPABORTFAILED ) ,
SNMP_MIB_ITEM ( " TCPMemoryPressures " , LINUX_MIB_TCPMEMORYPRESSURES ) ,
2007-08-24 22:55:52 -07:00
SNMP_MIB_ITEM ( " TCPSACKDiscard " , LINUX_MIB_TCPSACKDISCARD ) ,
SNMP_MIB_ITEM ( " TCPDSACKIgnoredOld " , LINUX_MIB_TCPDSACKIGNOREDOLD ) ,
SNMP_MIB_ITEM ( " TCPDSACKIgnoredNoUndo " , LINUX_MIB_TCPDSACKIGNOREDNOUNDO ) ,
2007-09-25 22:47:31 -07:00
SNMP_MIB_ITEM ( " TCPSpuriousRTOs " , LINUX_MIB_TCPSPURIOUSRTOS ) ,
2008-07-30 03:03:15 -07:00
SNMP_MIB_ITEM ( " TCPMD5NotFound " , LINUX_MIB_TCPMD5NOTFOUND ) ,
SNMP_MIB_ITEM ( " TCPMD5Unexpected " , LINUX_MIB_TCPMD5UNEXPECTED ) ,
2008-11-24 21:27:22 -08:00
SNMP_MIB_ITEM ( " TCPSackShifted " , LINUX_MIB_SACKSHIFTED ) ,
SNMP_MIB_ITEM ( " TCPSackMerged " , LINUX_MIB_SACKMERGED ) ,
SNMP_MIB_ITEM ( " TCPSackShiftFallback " , LINUX_MIB_SACKSHIFTFALLBACK ) ,
2010-03-07 23:21:57 +00:00
SNMP_MIB_ITEM ( " TCPBacklogDrop " , LINUX_MIB_TCPBACKLOGDROP ) ,
SNMP_MIB_ITEM ( " TCPMinTTLDrop " , LINUX_MIB_TCPMINTTLDROP ) ,
2010-03-19 05:37:18 +00:00
SNMP_MIB_ITEM ( " TCPDeferAcceptDrop " , LINUX_MIB_TCPDEFERACCEPTDROP ) ,
2010-06-02 12:05:27 +00:00
SNMP_MIB_ITEM ( " IPReversePathFilter " , LINUX_MIB_IPRPFILTER ) ,
2010-12-08 12:16:33 -08:00
SNMP_MIB_ITEM ( " TCPTimeWaitOverflow " , LINUX_MIB_TCPTIMEWAITOVERFLOW ) ,
2011-08-30 03:21:44 +00:00
SNMP_MIB_ITEM ( " TCPReqQFullDoCookies " , LINUX_MIB_TCPREQQFULLDOCOOKIES ) ,
SNMP_MIB_ITEM ( " TCPReqQFullDrop " , LINUX_MIB_TCPREQQFULLDROP ) ,
2012-01-25 04:44:20 +00:00
SNMP_MIB_ITEM ( " TCPRetransFail " , LINUX_MIB_TCPRETRANSFAIL ) ,
2012-03-18 11:07:47 +00:00
SNMP_MIB_ITEM ( " TCPRcvCoalesce " , LINUX_MIB_TCPRCVCOALESCE ) ,
2012-07-16 01:41:36 +00:00
SNMP_MIB_ITEM ( " TCPOFOQueue " , LINUX_MIB_TCPOFOQUEUE ) ,
SNMP_MIB_ITEM ( " TCPOFODrop " , LINUX_MIB_TCPOFODROP ) ,
SNMP_MIB_ITEM ( " TCPOFOMerge " , LINUX_MIB_TCPOFOMERGE ) ,
2012-07-17 10:13:05 +02:00
SNMP_MIB_ITEM ( " TCPChallengeACK " , LINUX_MIB_TCPCHALLENGEACK ) ,
2012-07-17 01:41:30 +00:00
SNMP_MIB_ITEM ( " TCPSYNChallenge " , LINUX_MIB_TCPSYNCHALLENGE ) ,
2012-07-19 06:43:07 +00:00
SNMP_MIB_ITEM ( " TCPFastOpenActive " , LINUX_MIB_TCPFASTOPENACTIVE ) ,
2014-03-03 12:31:36 -08:00
SNMP_MIB_ITEM ( " TCPFastOpenActiveFail " , LINUX_MIB_TCPFASTOPENACTIVEFAIL ) ,
2012-08-31 12:29:11 +00:00
SNMP_MIB_ITEM ( " TCPFastOpenPassive " , LINUX_MIB_TCPFASTOPENPASSIVE ) ,
SNMP_MIB_ITEM ( " TCPFastOpenPassiveFail " , LINUX_MIB_TCPFASTOPENPASSIVEFAIL ) ,
SNMP_MIB_ITEM ( " TCPFastOpenListenOverflow " , LINUX_MIB_TCPFASTOPENLISTENOVERFLOW ) ,
SNMP_MIB_ITEM ( " TCPFastOpenCookieReqd " , LINUX_MIB_TCPFASTOPENCOOKIEREQD ) ,
2013-04-18 06:52:51 +00:00
SNMP_MIB_ITEM ( " TCPSpuriousRtxHostQueues " , LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES ) ,
2013-08-07 11:33:25 +03:00
SNMP_MIB_ITEM ( " BusyPollRxPackets " , LINUX_MIB_BUSYPOLLRXPACKETS ) ,
tcp: auto corking
With the introduction of TCP Small Queues, TSO auto sizing, and TCP
pacing, we can implement Automatic Corking in the kernel, to help
applications doing small write()/sendmsg() to TCP sockets.
Idea is to change tcp_push() to check if the current skb payload is
under skb optimal size (a multiple of MSS bytes)
If under 'size_goal', and at least one packet is still in Qdisc or
NIC TX queues, set the TCP Small Queue Throttled bit, so that the push
will be delayed up to TX completion time.
This delay might allow the application to coalesce more bytes
in the skb in following write()/sendmsg()/sendfile() system calls.
The exact duration of the delay is depending on the dynamics
of the system, and might be zero if no packet for this flow
is actually held in Qdisc or NIC TX ring.
Using FQ/pacing is a way to increase the probability of
autocorking being triggered.
Add a new sysctl (/proc/sys/net/ipv4/tcp_autocorking) to control
this feature and default it to 1 (enabled)
Add a new SNMP counter : nstat -a | grep TcpExtTCPAutoCorking
This counter is incremented every time we detected skb was under used
and its flush was deferred.
Tested:
Interesting effects when using line buffered commands under ssh.
Excellent performance results in term of cpu usage and total throughput.
lpq83:~# echo 1 >/proc/sys/net/ipv4/tcp_autocorking
lpq83:~# perf stat ./super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128
9410.39
Performance counter stats for './super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128':
35209.439626 task-clock # 2.901 CPUs utilized
2,294 context-switches # 0.065 K/sec
101 CPU-migrations # 0.003 K/sec
4,079 page-faults # 0.116 K/sec
97,923,241,298 cycles # 2.781 GHz [83.31%]
51,832,908,236 stalled-cycles-frontend # 52.93% frontend cycles idle [83.30%]
25,697,986,603 stalled-cycles-backend # 26.24% backend cycles idle [66.70%]
102,225,978,536 instructions # 1.04 insns per cycle
# 0.51 stalled cycles per insn [83.38%]
18,657,696,819 branches # 529.906 M/sec [83.29%]
91,679,646 branch-misses # 0.49% of all branches [83.40%]
12.136204899 seconds time elapsed
lpq83:~# echo 0 >/proc/sys/net/ipv4/tcp_autocorking
lpq83:~# perf stat ./super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128
6624.89
Performance counter stats for './super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128':
40045.864494 task-clock # 3.301 CPUs utilized
171 context-switches # 0.004 K/sec
53 CPU-migrations # 0.001 K/sec
4,080 page-faults # 0.102 K/sec
111,340,458,645 cycles # 2.780 GHz [83.34%]
61,778,039,277 stalled-cycles-frontend # 55.49% frontend cycles idle [83.31%]
29,295,522,759 stalled-cycles-backend # 26.31% backend cycles idle [66.67%]
108,654,349,355 instructions # 0.98 insns per cycle
# 0.57 stalled cycles per insn [83.34%]
19,552,170,748 branches # 488.244 M/sec [83.34%]
157,875,417 branch-misses # 0.81% of all branches [83.34%]
12.130267788 seconds time elapsed
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-05 22:36:05 -08:00
SNMP_MIB_ITEM ( " TCPAutoCorking " , LINUX_MIB_TCPAUTOCORKING ) ,
2014-02-25 14:34:32 +01:00
SNMP_MIB_ITEM ( " TCPFromZeroWindowAdv " , LINUX_MIB_TCPFROMZEROWINDOWADV ) ,
SNMP_MIB_ITEM ( " TCPToZeroWindowAdv " , LINUX_MIB_TCPTOZEROWINDOWADV ) ,
SNMP_MIB_ITEM ( " TCPWantZeroWindowAdv " , LINUX_MIB_TCPWANTZEROWINDOWADV ) ,
2014-03-03 12:31:36 -08:00
SNMP_MIB_ITEM ( " TCPSynRetrans " , LINUX_MIB_TCPSYNRETRANS ) ,
SNMP_MIB_ITEM ( " TCPOrigDataSent " , LINUX_MIB_TCPORIGDATASENT ) ,
2005-04-16 15:20:36 -07:00
SNMP_MIB_SENTINEL
} ;
2008-11-10 21:43:08 -08:00
static void icmpmsg_put_line ( struct seq_file * seq , unsigned long * vals ,
unsigned short * type , int count )
{
int j ;
if ( count ) {
seq_printf ( seq , " \n IcmpMsg: " ) ;
for ( j = 0 ; j < count ; + + j )
seq_printf ( seq , " %sType%u " ,
type [ j ] & 0x100 ? " Out " : " In " ,
type [ j ] & 0xff ) ;
seq_printf ( seq , " \n IcmpMsg: " ) ;
for ( j = 0 ; j < count ; + + j )
seq_printf ( seq , " %lu " , vals [ j ] ) ;
}
}
2007-09-17 09:57:33 -07:00
static void icmpmsg_put ( struct seq_file * seq )
{
# define PERLINE 16
2008-11-10 21:43:08 -08:00
int i , count ;
unsigned short type [ PERLINE ] ;
unsigned long vals [ PERLINE ] , val ;
2008-07-18 04:06:04 -07:00
struct net * net = seq - > private ;
2007-09-17 09:57:33 -07:00
count = 0 ;
for ( i = 0 ; i < ICMPMSG_MIB_MAX ; i + + ) {
2011-11-08 13:04:43 +00:00
val = atomic_long_read ( & net - > mib . icmpmsg_statistics - > mibs [ i ] ) ;
2008-11-10 21:43:08 -08:00
if ( val ) {
type [ count ] = i ;
vals [ count + + ] = val ;
}
if ( count = = PERLINE ) {
icmpmsg_put_line ( seq , vals , type , count ) ;
count = 0 ;
}
2007-09-17 09:57:33 -07:00
}
2008-11-10 21:43:08 -08:00
icmpmsg_put_line ( seq , vals , type , count ) ;
2007-09-17 09:57:33 -07:00
# undef PERLINE
}
static void icmp_put ( struct seq_file * seq )
{
int i ;
2008-07-18 04:06:04 -07:00
struct net * net = seq - > private ;
2011-11-08 13:04:43 +00:00
atomic_long_t * ptr = net - > mib . icmpmsg_statistics - > mibs ;
2007-09-17 09:57:33 -07:00
2013-04-29 08:39:56 +00:00
seq_puts ( seq , " \n Icmp: InMsgs InErrors InCsumErrors " ) ;
2013-12-31 15:11:27 +08:00
for ( i = 0 ; icmpmibmap [ i ] . name ! = NULL ; i + + )
2007-09-17 09:57:33 -07:00
seq_printf ( seq , " In%s " , icmpmibmap [ i ] . name ) ;
seq_printf ( seq , " OutMsgs OutErrors " ) ;
2013-12-31 15:11:27 +08:00
for ( i = 0 ; icmpmibmap [ i ] . name ! = NULL ; i + + )
2007-09-17 09:57:33 -07:00
seq_printf ( seq , " Out%s " , icmpmibmap [ i ] . name ) ;
2013-04-29 08:39:56 +00:00
seq_printf ( seq , " \n Icmp: %lu %lu %lu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field ( net - > mib . icmp_statistics , ICMP_MIB_INMSGS ) ,
snmp_fold_field ( net - > mib . icmp_statistics , ICMP_MIB_INERRORS ) ,
snmp_fold_field ( net - > mib . icmp_statistics , ICMP_MIB_CSUMERRORS ) ) ;
2013-12-31 15:11:27 +08:00
for ( i = 0 ; icmpmibmap [ i ] . name ! = NULL ; i + + )
2007-09-17 09:57:33 -07:00
seq_printf ( seq , " %lu " ,
2011-11-08 13:04:43 +00:00
atomic_long_read ( ptr + icmpmibmap [ i ] . index ) ) ;
2007-09-17 09:57:33 -07:00
seq_printf ( seq , " %lu %lu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field ( net - > mib . icmp_statistics , ICMP_MIB_OUTMSGS ) ,
snmp_fold_field ( net - > mib . icmp_statistics , ICMP_MIB_OUTERRORS ) ) ;
2013-12-31 15:11:27 +08:00
for ( i = 0 ; icmpmibmap [ i ] . name ! = NULL ; i + + )
2007-09-17 09:57:33 -07:00
seq_printf ( seq , " %lu " ,
2011-11-08 13:04:43 +00:00
atomic_long_read ( ptr + ( icmpmibmap [ i ] . index | 0x100 ) ) ) ;
2007-09-17 09:57:33 -07:00
}
2005-04-16 15:20:36 -07:00
/*
* Called from the PROCfs module . This outputs / proc / net / snmp .
*/
static int snmp_seq_show ( struct seq_file * seq , void * v )
{
int i ;
2008-07-18 04:06:04 -07:00
struct net * net = seq - > private ;
2005-04-16 15:20:36 -07:00
seq_puts ( seq , " Ip: Forwarding DefaultTTL " ) ;
for ( i = 0 ; snmp4_ipstats_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %s " , snmp4_ipstats_list [ i ] . name ) ;
seq_printf ( seq , " \n Ip: %d %d " ,
2008-07-18 04:06:26 -07:00
IPV4_DEVCONF_ALL ( net , FORWARDING ) ? 1 : 2 ,
2007-12-16 13:32:48 -08:00
sysctl_ip_default_ttl ) ;
2005-04-16 15:20:36 -07:00
2010-06-30 13:31:19 -07:00
BUILD_BUG_ON ( offsetof ( struct ipstats_mib , mibs ) ! = 0 ) ;
2005-04-16 15:20:36 -07:00
for ( i = 0 ; snmp4_ipstats_list [ i ] . name ! = NULL ; i + + )
2010-06-30 13:31:19 -07:00
seq_printf ( seq , " %llu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field64 ( net - > mib . ip_statistics ,
2010-06-30 13:31:19 -07:00
snmp4_ipstats_list [ i ] . entry ,
offsetof ( struct ipstats_mib , syncp ) ) ) ;
2005-04-16 15:20:36 -07:00
2007-09-17 09:57:33 -07:00
icmp_put ( seq ) ; /* RFC 2011 compatibility */
icmpmsg_put ( seq ) ;
2005-04-16 15:20:36 -07:00
seq_puts ( seq , " \n Tcp: " ) ;
for ( i = 0 ; snmp4_tcp_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %s " , snmp4_tcp_list [ i ] . name ) ;
seq_puts ( seq , " \n Tcp: " ) ;
for ( i = 0 ; snmp4_tcp_list [ i ] . name ! = NULL ; i + + ) {
/* MaxConn field is signed, RFC 2012 */
if ( snmp4_tcp_list [ i ] . entry = = TCP_MIB_MAXCONN )
seq_printf ( seq , " %ld " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field ( net - > mib . tcp_statistics ,
2007-04-24 21:53:35 -07:00
snmp4_tcp_list [ i ] . entry ) ) ;
2005-04-16 15:20:36 -07:00
else
seq_printf ( seq , " %lu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field ( net - > mib . tcp_statistics ,
2007-04-24 21:53:35 -07:00
snmp4_tcp_list [ i ] . entry ) ) ;
2005-04-16 15:20:36 -07:00
}
seq_puts ( seq , " \n Udp: " ) ;
for ( i = 0 ; snmp4_udp_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %s " , snmp4_udp_list [ i ] . name ) ;
seq_puts ( seq , " \n Udp: " ) ;
for ( i = 0 ; snmp4_udp_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %lu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field ( net - > mib . udp_statistics ,
2007-04-24 21:53:35 -07:00
snmp4_udp_list [ i ] . entry ) ) ;
2005-04-16 15:20:36 -07:00
2006-11-27 11:10:57 -08:00
/* the UDP and UDP-Lite MIBs are the same */
seq_puts ( seq , " \n UdpLite: " ) ;
for ( i = 0 ; snmp4_udp_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %s " , snmp4_udp_list [ i ] . name ) ;
seq_puts ( seq , " \n UdpLite: " ) ;
for ( i = 0 ; snmp4_udp_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %lu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field ( net - > mib . udplite_statistics ,
2007-04-24 21:53:35 -07:00
snmp4_udp_list [ i ] . entry ) ) ;
2008-03-06 16:22:02 -08:00
2005-04-16 15:20:36 -07:00
seq_putc ( seq , ' \n ' ) ;
return 0 ;
}
static int snmp_seq_open ( struct inode * inode , struct file * file )
{
2008-07-18 04:07:21 -07:00
return single_open_net ( inode , file , snmp_seq_show ) ;
2008-07-18 04:06:04 -07:00
}
2007-02-12 00:55:35 -08:00
static const struct file_operations snmp_seq_fops = {
2005-04-16 15:20:36 -07:00
. owner = THIS_MODULE ,
. open = snmp_seq_open ,
. read = seq_read ,
. llseek = seq_lseek ,
2008-07-18 04:07:44 -07:00
. release = single_release_net ,
2005-04-16 15:20:36 -07:00
} ;
2007-09-17 09:57:33 -07:00
2005-04-16 15:20:36 -07:00
/*
* Output / proc / net / netstat
*/
static int netstat_seq_show ( struct seq_file * seq , void * v )
{
int i ;
2008-07-18 04:05:17 -07:00
struct net * net = seq - > private ;
2005-04-16 15:20:36 -07:00
seq_puts ( seq , " TcpExt: " ) ;
for ( i = 0 ; snmp4_net_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %s " , snmp4_net_list [ i ] . name ) ;
seq_puts ( seq , " \n TcpExt: " ) ;
for ( i = 0 ; snmp4_net_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %lu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field ( net - > mib . net_statistics ,
2007-04-24 21:53:35 -07:00
snmp4_net_list [ i ] . entry ) ) ;
2005-04-16 15:20:36 -07:00
2007-05-14 03:07:30 -07:00
seq_puts ( seq , " \n IpExt: " ) ;
for ( i = 0 ; snmp4_ipextstats_list [ i ] . name ! = NULL ; i + + )
seq_printf ( seq , " %s " , snmp4_ipextstats_list [ i ] . name ) ;
seq_puts ( seq , " \n IpExt: " ) ;
for ( i = 0 ; snmp4_ipextstats_list [ i ] . name ! = NULL ; i + + )
2010-06-30 13:31:19 -07:00
seq_printf ( seq , " %llu " ,
2014-05-05 15:55:55 -07:00
snmp_fold_field64 ( net - > mib . ip_statistics ,
2010-06-30 13:31:19 -07:00
snmp4_ipextstats_list [ i ] . entry ,
offsetof ( struct ipstats_mib , syncp ) ) ) ;
2007-05-14 03:07:30 -07:00
2005-04-16 15:20:36 -07:00
seq_putc ( seq , ' \n ' ) ;
return 0 ;
}
static int netstat_seq_open ( struct inode * inode , struct file * file )
{
2008-07-18 04:07:21 -07:00
return single_open_net ( inode , file , netstat_seq_show ) ;
2008-07-18 04:05:17 -07:00
}
2007-02-12 00:55:35 -08:00
static const struct file_operations netstat_seq_fops = {
2005-04-16 15:20:36 -07:00
. owner = THIS_MODULE ,
. open = netstat_seq_open ,
. read = seq_read ,
. llseek = seq_lseek ,
2008-07-18 04:07:44 -07:00
. release = single_release_net ,
2005-04-16 15:20:36 -07:00
} ;
2008-03-31 19:42:37 -07:00
static __net_init int ip_proc_init_net ( struct net * net )
{
2013-02-18 01:34:54 +00:00
if ( ! proc_create ( " sockstat " , S_IRUGO , net - > proc_net ,
& sockstat_seq_fops ) )
2008-07-18 04:06:50 -07:00
goto out_sockstat ;
2013-02-18 01:34:54 +00:00
if ( ! proc_create ( " netstat " , S_IRUGO , net - > proc_net , & netstat_seq_fops ) )
2008-07-18 04:05:17 -07:00
goto out_netstat ;
2013-02-18 01:34:54 +00:00
if ( ! proc_create ( " snmp " , S_IRUGO , net - > proc_net , & snmp_seq_fops ) )
2008-07-18 04:06:04 -07:00
goto out_snmp ;
2008-07-18 04:05:17 -07:00
2008-03-31 19:42:37 -07:00
return 0 ;
2008-07-18 04:05:17 -07:00
2008-07-18 04:06:04 -07:00
out_snmp :
2013-02-18 01:34:56 +00:00
remove_proc_entry ( " netstat " , net - > proc_net ) ;
2008-07-18 04:05:17 -07:00
out_netstat :
2013-02-18 01:34:56 +00:00
remove_proc_entry ( " sockstat " , net - > proc_net ) ;
2008-07-18 04:06:50 -07:00
out_sockstat :
2008-07-18 04:05:17 -07:00
return - ENOMEM ;
2008-03-31 19:42:37 -07:00
}
static __net_exit void ip_proc_exit_net ( struct net * net )
{
2013-02-18 01:34:56 +00:00
remove_proc_entry ( " snmp " , net - > proc_net ) ;
remove_proc_entry ( " netstat " , net - > proc_net ) ;
remove_proc_entry ( " sockstat " , net - > proc_net ) ;
2008-03-31 19:42:37 -07:00
}
static __net_initdata struct pernet_operations ip_proc_ops = {
. init = ip_proc_init_net ,
. exit = ip_proc_exit_net ,
} ;
2005-04-16 15:20:36 -07:00
int __init ip_misc_proc_init ( void )
{
2008-07-18 04:06:50 -07:00
return register_pernet_subsys ( & ip_proc_ops ) ;
2005-04-16 15:20:36 -07:00
}
2007-04-20 15:57:15 -07:00