2006-06-26 02:48:02 +02:00
/*
include / types / server . h
This file defines everything related to servers .
2008-02-14 20:25:24 +01:00
Copyright ( C ) 2000 - 2008 Willy Tarreau - w @ 1 wt . eu
2006-06-26 02:48:02 +02:00
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation , version 2.1
exclusively .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
*/
# ifndef _TYPES_SERVER_H
# define _TYPES_SERVER_H
# include <netinet/in.h>
# include <arpa/inet.h>
2006-06-29 18:54:54 +02:00
# include <common/config.h>
2007-11-26 01:15:43 +01:00
# include <common/eb32tree.h>
2006-06-29 17:53:05 +02:00
# include <common/mini-clist.h>
2006-06-26 02:48:02 +02:00
# include <types/buffers.h>
# include <types/proxy.h>
# include <types/queue.h>
# include <types/task.h>
/* server flags */
2006-11-12 23:57:19 +01:00
# define SRV_RUNNING 0x0001 /* the server is UP */
# define SRV_BACKUP 0x0002 /* this server is a backup server */
# define SRV_MAPPORTS 0x0004 /* this server uses mapped ports */
# define SRV_BIND_SRC 0x0008 /* this server uses a specific source address */
# define SRV_CHECKED 0x0010 /* this server needs to be checked */
2007-11-30 10:41:39 +01:00
# define SRV_GOINGDOWN 0x0020 /* this server says that it's going down (404) */
2007-11-30 17:42:05 +01:00
# define SRV_WARMINGUP 0x0040 /* this server is warming up after a failure */
2008-01-13 18:12:24 +01:00
/* unused: 0x0080 */
# define SRV_TPROXY_ADDR 0x0100 /* bind to this non-local address to reach this server */
# define SRV_TPROXY_CIP 0x0200 /* bind to the client's IP address to reach this server */
# define SRV_TPROXY_CLI 0x0300 /* bind to the client's IP+port to reach this server */
# define SRV_TPROXY_MASK 0x0300 /* bind to a non-local address to reach this server */
2006-06-26 02:48:02 +02:00
/* function which act on servers need to return various errors */
# define SRV_STATUS_OK 0 /* everything is OK. */
# define SRV_STATUS_INTERNAL 1 /* other unrecoverable errors. */
# define SRV_STATUS_NOSRV 2 /* no server is available */
# define SRV_STATUS_FULL 3 /* the/all server(s) are saturated */
# define SRV_STATUS_QUEUED 4 /* the/all server(s) are saturated but the connection was queued */
2007-11-30 08:33:21 +01:00
/* bits for s->result used for health-checks */
# define SRV_CHK_UNKNOWN 0x0000 /* initialized to this by default */
# define SRV_CHK_ERROR 0x0001 /* error encountered during the check; has precedence */
# define SRV_CHK_RUNNING 0x0002 /* server seen as running */
# define SRV_CHK_DISABLE 0x0004 /* server returned a "disable" code */
2006-06-26 02:48:02 +02:00
2007-12-02 11:01:23 +01:00
/* various constants */
# define SRV_UWGHT_RANGE 256
# define SRV_UWGHT_MAX (SRV_UWGHT_RANGE - 1)
# define SRV_EWGHT_RANGE (SRV_UWGHT_RANGE * BE_WEIGHT_SCALE)
# define SRV_EWGHT_MAX (SRV_UWGHT_MAX * BE_WEIGHT_SCALE)
2006-06-26 02:48:02 +02:00
struct server {
struct server * next ;
int state ; /* server state (SRV_*) */
2007-11-26 01:15:43 +01:00
int prev_state ; /* server state before last change (SRV_*) */
2006-06-26 02:48:02 +02:00
int cklen ; /* the len of the cookie, to speed up checks */
2008-02-14 20:25:24 +01:00
int rdr_len ; /* the length of the redirection prefix */
2007-03-25 21:03:01 +02:00
char * cookie ; /* the id set in the cookie */
2008-02-14 20:25:24 +01:00
char * rdr_pfx ; /* the redirection prefix */
2007-03-25 21:03:01 +02:00
struct proxy * proxy ; /* the proxy this server belongs to */
[BUG] fix the dequeuing logic to ensure that all requests get served
The dequeuing logic was completely wrong. First, a task was assigned
to all servers to process the queue, but this task was never scheduled
and was only woken up on session free. Second, there was no reservation
of server entries when a task was assigned a server. This means that
as long as the task was not connected to the server, its presence was
not accounted for. This was causing trouble when detecting whether or
not a server had reached maxconn. Third, during a redispatch, a session
could lose its place at the server's and get blocked because another
session at the same moment would have stolen the entry. Fourth, the
redispatch option did not work when maxqueue was reached for a server,
and it was not possible to do so without indefinitely hanging a session.
The root cause of all those problems was the lack of pre-reservation of
connections at the server's, and the lack of tracking of servers during
a redispatch. Everything relied on combinations of flags which could
appear similarly in quite distinct situations.
This patch is a major rework but there was no other solution, as the
internal logic was deeply flawed. The resulting code is cleaner, more
understandable, uses less magics and is overall more robust.
As an added bonus, "option redispatch" now works when maxqueue has
been reached on a server.
2008-06-20 15:04:11 +02:00
int served ; /* # of active sessions currently being served (ie not pending) */
2007-03-25 21:03:01 +02:00
int cur_sess , cur_sess_max ; /* number of currently active sessions (including syn_sent) */
unsigned maxconn , minconn ; /* max # of active sessions (0 = unlimited), min# for dynamic limit. */
int nbpend , nbpend_max ; /* number of pending connections */
2007-10-25 20:15:38 +02:00
int maxqueue ; /* maximum number of pending connections allowed */
2007-03-25 21:03:01 +02:00
struct list pendconns ; /* pending connections */
[MEDIUM] Fix memory freeing at exit
New functions implemented:
- deinit_pollers: called at the end of deinit())
- prune_acl: called via list_for_each_entry_safe
Add missing pool_destroy2 calls:
- p->hdr_idx_pool
- pool2_tree64
Implement all task stopping:
- health-check: needs new "struct task" in the struct server
- queue processing: queue_mgt
- appsess_refresh: appsession_refresh
before (idle system):
==6079== LEAK SUMMARY:
==6079== definitely lost: 1,112 bytes in 75 blocks.
==6079== indirectly lost: 53,356 bytes in 2,090 blocks.
==6079== possibly lost: 52 bytes in 1 blocks.
==6079== still reachable: 150,996 bytes in 504 blocks.
==6079== suppressed: 0 bytes in 0 blocks.
after (idle system):
==6945== LEAK SUMMARY:
==6945== definitely lost: 7,644 bytes in 137 blocks.
==6945== indirectly lost: 9,913 bytes in 587 blocks.
==6945== possibly lost: 0 bytes in 0 blocks.
==6945== still reachable: 0 bytes in 0 blocks.
==6945== suppressed: 0 bytes in 0 blocks.
before (running system for ~2m):
==9343== LEAK SUMMARY:
==9343== definitely lost: 1,112 bytes in 75 blocks.
==9343== indirectly lost: 54,199 bytes in 2,122 blocks.
==9343== possibly lost: 52 bytes in 1 blocks.
==9343== still reachable: 151,128 bytes in 509 blocks.
==9343== suppressed: 0 bytes in 0 blocks.
after (running system for ~2m):
==11616== LEAK SUMMARY:
==11616== definitely lost: 7,644 bytes in 137 blocks.
==11616== indirectly lost: 9,981 bytes in 591 blocks.
==11616== possibly lost: 0 bytes in 0 blocks.
==11616== still reachable: 4 bytes in 1 blocks.
==11616== suppressed: 0 bytes in 0 blocks.
Still not perfect but significant improvement.
2008-05-29 23:53:44 +02:00
struct task * check ; /* the task associated to the health check processing */
2007-03-25 21:03:01 +02:00
2006-06-26 02:48:02 +02:00
struct sockaddr_in addr ; /* the address to connect to */
2007-03-25 21:03:01 +02:00
struct sockaddr_in source_addr ; /* the address to which we want to bind for connect() */
2008-01-14 16:36:15 +01:00
# if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
2007-03-25 21:03:01 +02:00
struct sockaddr_in tproxy_addr ; /* non-local address we want to bind to for connect() */
2006-11-12 23:57:19 +01:00
# endif
2007-03-25 21:03:01 +02:00
2008-02-18 01:26:35 +01:00
struct server * tracknext , * tracked ; /* next server in a tracking list, tracked server */
char * trackit ; /* temporary variable to make assignment deferrable */
2007-03-25 20:46:19 +02:00
struct sockaddr_in check_addr ; /* the address to check, if different from <addr> */
2006-06-26 02:48:02 +02:00
short check_port ; /* the port to use for the health checks */
int health ; /* 0->rise-1 = bad; rise->rise+fall-1 = good */
2007-03-25 21:03:01 +02:00
int rise , fall ; /* time in iterations */
2008-01-21 01:54:06 +01:00
int inter , fastinter , downinter ; /* checks: time in milliseconds */
2007-11-30 17:42:05 +01:00
int slowstart ; /* slowstart time in seconds (ms in the conf) */
2007-11-30 08:33:21 +01:00
int result ; /* health-check result : SRV_CHK_* */
2006-06-26 02:48:02 +02:00
int curfd ; /* file desc used for current test, or -1 if not in test */
2007-03-25 21:03:01 +02:00
char * id ; /* just for identification */
2007-03-25 21:16:40 +02:00
unsigned uweight , eweight ; /* user-specified weight, and effective weight */
unsigned wscore ; /* weight score, used during srv map computation */
2007-11-26 01:15:43 +01:00
unsigned prev_eweight ; /* eweight before last change */
unsigned rweight ; /* remainer of weight in the current LB tree */
unsigned npos , lpos ; /* next and last positions in the LB tree */
struct eb32_node lb_node ; /* node used for tree-based load balancing */
struct eb_root * lb_tree ; /* we want to know in what tree the server is */
struct server * next_full ; /* next server in the temporary full list */
2007-03-25 21:03:01 +02:00
2006-06-26 02:48:02 +02:00
unsigned failed_checks , down_trans ; /* failed checks and up-down transitions */
[MEDIUM] stats: report server and backend cumulated downtime
Hello,
This patch implements new statistics for SLA calculation by adding new
field 'Dwntime' with total down time since restart (both HTTP/CSV) and
extending status field (HTTP) or inserting a new one (CSV) with time
showing how long each server/backend is in a current state. Additionaly,
down transations are also calculated and displayed for backends, so it is
possible to know how many times selected backend was down, generating "No
server is available to handle this request." error.
New information are presentetd in two different ways:
- for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or
"59m 59s"
- for CSV: seconds
I believe that seconds resolution is enough.
As there are more columns in the status page I decided to shrink some
names to make more space:
- Weight -> Wght
- Check -> Chk
- Down -> Dwn
Making described changes I also made some improvements and fixed some
small bugs:
- don't increment s->health above 's->rise + s->fall - 1'. Previously it
was incremented an then (re)set to 's->rise + s->fall - 1'.
- do not set server down if it is down already
- do not set server up if it is up already
- fix colspan in multiple places (mostly introduced by my previous patch)
- add missing "status" header to CSV
- fix order of retries/redispatches in server (CSV)
- s/Tthen/Then/
- s/server/backend/ in DATA_ST_PX_BE (dumpstats.c)
Changes from previous version:
- deal with negative time intervales
- don't relay on s->state (SRV_RUNNING)
- little reworked human_time + compacted format (no spaces). If needed it
can be used in the future for other purposes by optionally making "cnt"
as an argument
- leave set_server_down mostly unchanged
- only little reworked "process_chk: 9"
- additional fields in CSV are appended to the rigth
- fix "SEC" macro
- named arguments (human_time, be_downtime, srv_downtime)
Hope it is OK. If there are only cosmetic changes needed please fill free
to correct it, however if there are some bigger changes required I would
like to discuss it first or at last to know what exactly was changed
especially since I already put this patch into my production server. :)
Thank you,
Best regards,
Krzysztof Oledzki
2007-10-22 16:21:10 +02:00
unsigned down_time ; /* total time the server was down */
time_t last_change ; /* last time, when the state was changed */
2006-06-26 02:48:02 +02:00
unsigned failed_conns , failed_resp ; /* failed connect() and responses */
2008-01-06 16:36:16 +01:00
unsigned retries , redispatches ; /* retried and redispatched connections */
2007-03-25 21:03:01 +02:00
unsigned failed_secu ; /* blocked responses because of security concerns */
unsigned cum_sess ; /* cumulated number of sessions really sent to this server */
2007-12-05 10:34:49 +01:00
unsigned cum_lbconn ; /* cumulated number of sessions directed by load balancing */
[MEDIUM] stats: report server and backend cumulated downtime
Hello,
This patch implements new statistics for SLA calculation by adding new
field 'Dwntime' with total down time since restart (both HTTP/CSV) and
extending status field (HTTP) or inserting a new one (CSV) with time
showing how long each server/backend is in a current state. Additionaly,
down transations are also calculated and displayed for backends, so it is
possible to know how many times selected backend was down, generating "No
server is available to handle this request." error.
New information are presentetd in two different ways:
- for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or
"59m 59s"
- for CSV: seconds
I believe that seconds resolution is enough.
As there are more columns in the status page I decided to shrink some
names to make more space:
- Weight -> Wght
- Check -> Chk
- Down -> Dwn
Making described changes I also made some improvements and fixed some
small bugs:
- don't increment s->health above 's->rise + s->fall - 1'. Previously it
was incremented an then (re)set to 's->rise + s->fall - 1'.
- do not set server down if it is down already
- do not set server up if it is up already
- fix colspan in multiple places (mostly introduced by my previous patch)
- add missing "status" header to CSV
- fix order of retries/redispatches in server (CSV)
- s/Tthen/Then/
- s/server/backend/ in DATA_ST_PX_BE (dumpstats.c)
Changes from previous version:
- deal with negative time intervales
- don't relay on s->state (SRV_RUNNING)
- little reworked human_time + compacted format (no spaces). If needed it
can be used in the future for other purposes by optionally making "cnt"
as an argument
- leave set_server_down mostly unchanged
- only little reworked "process_chk: 9"
- additional fields in CSV are appended to the rigth
- fix "SEC" macro
- named arguments (human_time, be_downtime, srv_downtime)
Hope it is OK. If there are only cosmetic changes needed please fill free
to correct it, however if there are some bigger changes required I would
like to discuss it first or at last to know what exactly was changed
especially since I already put this patch into my production server. :)
Thank you,
Best regards,
Krzysztof Oledzki
2007-10-22 16:21:10 +02:00
2007-03-25 21:03:01 +02:00
long long bytes_in ; /* number of bytes transferred from the client to the server */
long long bytes_out ; /* number of bytes transferred from the server to the client */
2007-11-04 23:35:08 +01:00
int puid ; /* proxy-unique server ID, used for SNMP */
2006-06-26 02:48:02 +02:00
} ;
# endif /* _TYPES_SERVER_H */
/*
* Local variables :
* c - indent - level : 8
* c - basic - offset : 8
* End :
*/