2016-11-26 19:48:49 +01:00
package healthcheck
import (
2016-11-30 22:49:57 +01:00
"context"
2021-06-25 21:08:11 +02:00
"errors"
2017-03-24 09:36:33 +01:00
"fmt"
2017-05-10 14:28:57 -04:00
"net"
2016-11-26 19:48:49 +01:00
"net/http"
"net/url"
2017-05-10 14:28:57 -04:00
"strconv"
2016-11-26 19:48:49 +01:00
"time"
2017-01-31 22:55:02 +01:00
2020-09-26 13:30:03 +02:00
gokitmetrics "github.com/go-kit/kit/metrics"
2022-11-21 18:36:05 +01:00
"github.com/rs/zerolog/log"
2023-02-03 15:24:05 +01:00
"github.com/traefik/traefik/v3/pkg/config/dynamic"
"github.com/traefik/traefik/v3/pkg/config/runtime"
2022-09-20 16:54:08 +02:00
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
2022-10-14 17:16:08 +02:00
"google.golang.org/grpc/credentials/insecure"
2022-09-20 16:54:08 +02:00
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"
2016-11-26 19:48:49 +01:00
)
2022-11-16 11:38:07 +01:00
const modeGRPC = "grpc"
2016-11-26 19:48:49 +01:00
2022-11-16 11:38:07 +01:00
// StatusSetter should be implemented by a service that, when the status of a
// registered target change, needs to be notified of that change.
type StatusSetter interface {
SetStatus ( ctx context . Context , childName string , up bool )
2018-06-11 11:36:03 +02:00
}
2022-11-16 11:38:07 +01:00
// StatusUpdater should be implemented by a service that, when its status
// changes (e.g. all if its children are down), needs to propagate upwards (to
// their parent(s)) that change.
type StatusUpdater interface {
RegisterStatusUpdater ( fn func ( up bool ) ) error
2021-06-25 21:08:11 +02:00
}
2022-11-16 11:38:07 +01:00
type metricsHealthCheck interface {
ServiceServerUpGauge ( ) gokitmetrics . Gauge
2016-11-26 19:48:49 +01:00
}
2022-11-16 11:38:07 +01:00
type ServiceHealthChecker struct {
balancer StatusSetter
info * runtime . ServiceInfo
2017-03-15 19:16:06 +01:00
2022-11-16 11:38:07 +01:00
config * dynamic . ServerHealthCheck
interval time . Duration
timeout time . Duration
2017-03-24 09:36:33 +01:00
2022-11-16 11:38:07 +01:00
metrics metricsHealthCheck
2019-08-07 08:14:04 -07:00
2022-11-16 11:38:07 +01:00
client * http . Client
targets map [ string ] * url . URL
2016-11-26 19:48:49 +01:00
}
2022-11-16 11:38:07 +01:00
func NewServiceHealthChecker ( ctx context . Context , metrics metricsHealthCheck , config * dynamic . ServerHealthCheck , service StatusSetter , info * runtime . ServiceInfo , transport http . RoundTripper , targets map [ string ] * url . URL ) * ServiceHealthChecker {
2022-11-21 18:36:05 +01:00
logger := log . Ctx ( ctx )
2016-11-26 19:48:49 +01:00
2022-11-16 11:38:07 +01:00
interval := time . Duration ( config . Interval )
if interval <= 0 {
2022-11-21 18:36:05 +01:00
logger . Error ( ) . Msg ( "Health check interval smaller than zero" )
2022-11-16 11:38:07 +01:00
interval = time . Duration ( dynamic . DefaultHealthCheckInterval )
2018-06-11 11:36:03 +02:00
}
2016-11-26 19:48:49 +01:00
2022-11-16 11:38:07 +01:00
timeout := time . Duration ( config . Timeout )
if timeout <= 0 {
2022-11-21 18:36:05 +01:00
logger . Error ( ) . Msg ( "Health check timeout smaller than zero" )
2022-11-16 11:38:07 +01:00
timeout = time . Duration ( dynamic . DefaultHealthCheckTimeout )
2017-03-09 16:27:31 +01:00
}
2016-11-26 19:48:49 +01:00
2022-11-16 11:38:07 +01:00
client := & http . Client {
Transport : transport ,
2017-03-09 16:27:31 +01:00
}
2022-08-08 10:22:07 -03:00
2022-11-16 11:38:07 +01:00
if config . FollowRedirects != nil && ! * config . FollowRedirects {
client . CheckRedirect = func ( req * http . Request , via [ ] * http . Request ) error {
return http . ErrUseLastResponse
}
2022-08-08 10:22:07 -03:00
}
2022-11-16 11:38:07 +01:00
return & ServiceHealthChecker {
balancer : service ,
info : info ,
config : config ,
interval : interval ,
timeout : timeout ,
targets : targets ,
client : client ,
metrics : metrics ,
2016-11-30 22:49:57 +01:00
}
2016-11-26 19:48:49 +01:00
}
2022-11-16 11:38:07 +01:00
func ( shc * ServiceHealthChecker ) Launch ( ctx context . Context ) {
ticker := time . NewTicker ( shc . interval )
2017-03-09 16:27:31 +01:00
defer ticker . Stop ( )
2022-11-16 11:38:07 +01:00
2017-04-11 17:10:46 +02:00
for {
select {
case <- ctx . Done ( ) :
return
2019-09-13 19:28:04 +02:00
2022-11-16 11:38:07 +01:00
case <- ticker . C :
for proxyName , target := range shc . targets {
select {
case <- ctx . Done ( ) :
return
default :
}
2020-09-26 13:30:03 +02:00
2022-11-16 11:38:07 +01:00
up := true
serverUpMetricValue := float64 ( 1 )
2020-09-26 13:30:03 +02:00
2022-11-16 11:38:07 +01:00
if err := shc . executeHealthCheck ( ctx , shc . config , target ) ; err != nil {
// The context is canceled when the dynamic configuration is refreshed.
if errors . Is ( err , context . Canceled ) {
return
}
2020-09-26 13:30:03 +02:00
2022-11-21 18:36:05 +01:00
log . Ctx ( ctx ) . Warn ( ) .
Str ( "targetURL" , target . String ( ) ) .
Err ( err ) .
Msg ( "Health check failed." )
2020-09-26 13:30:03 +02:00
2022-11-16 11:38:07 +01:00
up = false
serverUpMetricValue = float64 ( 0 )
}
2017-03-09 16:27:31 +01:00
2022-11-16 11:38:07 +01:00
shc . balancer . SetStatus ( ctx , proxyName , up )
2020-09-26 13:30:03 +02:00
2022-11-16 11:38:07 +01:00
statusStr := runtime . StatusDown
if up {
statusStr = runtime . StatusUp
2019-08-07 08:14:04 -07:00
}
2020-09-26 13:30:03 +02:00
2022-11-16 11:38:07 +01:00
shc . info . UpdateServerStatus ( target . String ( ) , statusStr )
2020-09-26 13:30:03 +02:00
2022-11-16 11:38:07 +01:00
shc . metrics . ServiceServerUpGauge ( ) .
2022-11-23 16:04:05 +01:00
With ( "service" , proxyName , "url" , target . String ( ) ) .
2022-11-16 11:38:07 +01:00
Set ( serverUpMetricValue )
}
2017-04-11 17:10:46 +02:00
}
}
2017-03-09 16:27:31 +01:00
}
2022-11-16 11:38:07 +01:00
func ( shc * ServiceHealthChecker ) executeHealthCheck ( ctx context . Context , config * dynamic . ServerHealthCheck , target * url . URL ) error {
ctx , cancel := context . WithDeadline ( ctx , time . Now ( ) . Add ( shc . timeout ) )
defer cancel ( )
2017-05-10 14:28:57 -04:00
2022-11-16 11:38:07 +01:00
if config . Mode == modeGRPC {
return shc . checkHealthGRPC ( ctx , target )
2018-04-16 11:40:03 +02:00
}
2022-11-16 11:38:07 +01:00
return shc . checkHealthHTTP ( ctx , target )
2018-06-11 11:36:03 +02:00
}
2018-05-22 09:22:03 +02:00
2022-11-16 11:38:07 +01:00
// checkHealthHTTP returns an error with a meaningful description if the health check failed.
// Dedicated to HTTP servers.
func ( shc * ServiceHealthChecker ) checkHealthHTTP ( ctx context . Context , target * url . URL ) error {
req , err := shc . newRequest ( ctx , target )
if err != nil {
return fmt . Errorf ( "create HTTP request: %w" , err )
2018-04-16 11:40:03 +02:00
}
2022-11-16 11:38:07 +01:00
resp , err := shc . client . Do ( req )
if err != nil {
return fmt . Errorf ( "HTTP request failed: %w" , err )
2022-09-20 16:54:08 +02:00
}
2022-11-16 11:38:07 +01:00
defer resp . Body . Close ( )
2022-11-24 14:10:05 +03:30
if shc . config . Status == 0 && ( resp . StatusCode < http . StatusOK || resp . StatusCode >= http . StatusBadRequest ) {
2022-11-16 11:38:07 +01:00
return fmt . Errorf ( "received error status code: %v" , resp . StatusCode )
}
2022-11-24 14:10:05 +03:30
if shc . config . Status != 0 && shc . config . Status != resp . StatusCode {
return fmt . Errorf ( "received error status code: %v expected status code: %v" , resp . StatusCode , shc . config . Status )
}
2022-11-16 11:38:07 +01:00
return nil
2022-09-20 16:54:08 +02:00
}
2022-11-16 11:38:07 +01:00
func ( shc * ServiceHealthChecker ) newRequest ( ctx context . Context , target * url . URL ) ( * http . Request , error ) {
u , err := target . Parse ( shc . config . Path )
2017-05-10 14:28:57 -04:00
if err != nil {
2022-11-16 11:38:07 +01:00
return nil , err
2017-05-10 14:28:57 -04:00
}
2018-05-22 09:22:03 +02:00
2022-11-16 11:38:07 +01:00
if len ( shc . config . Scheme ) > 0 {
u . Scheme = shc . config . Scheme
2016-11-26 19:48:49 +01:00
}
2018-01-03 12:32:03 +01:00
2022-11-16 11:38:07 +01:00
if shc . config . Port != 0 {
u . Host = net . JoinHostPort ( u . Hostname ( ) , strconv . Itoa ( shc . config . Port ) )
2020-02-26 17:28:04 +01:00
}
2022-11-16 11:38:07 +01:00
req , err := http . NewRequestWithContext ( ctx , shc . config . Method , u . String ( ) , http . NoBody )
2018-05-22 09:22:03 +02:00
if err != nil {
2022-11-16 11:38:07 +01:00
return nil , fmt . Errorf ( "failed to create HTTP request: %w" , err )
2018-01-03 12:32:03 +01:00
}
2018-05-22 09:22:03 +02:00
2022-11-16 11:38:07 +01:00
if shc . config . Hostname != "" {
req . Host = shc . config . Hostname
}
2018-05-22 09:22:03 +02:00
2022-11-16 11:38:07 +01:00
for k , v := range shc . config . Headers {
req . Header . Set ( k , v )
2018-05-22 09:22:03 +02:00
}
2022-11-16 11:38:07 +01:00
return req , nil
2016-11-26 19:48:49 +01:00
}
2019-05-16 10:58:06 +02:00
2022-09-20 16:54:08 +02:00
// checkHealthGRPC returns an error with a meaningful description if the health check failed.
// Dedicated to gRPC servers implementing gRPC Health Checking Protocol v1.
2022-11-16 11:38:07 +01:00
func ( shc * ServiceHealthChecker ) checkHealthGRPC ( ctx context . Context , serverURL * url . URL ) error {
u , err := serverURL . Parse ( shc . config . Path )
2022-09-20 16:54:08 +02:00
if err != nil {
return fmt . Errorf ( "failed to parse server URL: %w" , err )
}
port := u . Port ( )
2022-11-16 11:38:07 +01:00
if shc . config . Port != 0 {
port = strconv . Itoa ( shc . config . Port )
2022-09-20 16:54:08 +02:00
}
serverAddr := net . JoinHostPort ( u . Hostname ( ) , port )
var opts [ ] grpc . DialOption
2022-11-16 11:38:07 +01:00
switch shc . config . Scheme {
2022-09-20 16:54:08 +02:00
case "http" , "h2c" , "" :
2022-10-14 17:16:08 +02:00
opts = append ( opts , grpc . WithTransportCredentials ( insecure . NewCredentials ( ) ) )
2022-09-20 16:54:08 +02:00
}
conn , err := grpc . DialContext ( ctx , serverAddr , opts ... )
if err != nil {
if errors . Is ( err , context . DeadlineExceeded ) {
2022-11-16 11:38:07 +01:00
return fmt . Errorf ( "fail to connect to %s within %s: %w" , serverAddr , shc . config . Timeout , err )
2022-09-20 16:54:08 +02:00
}
return fmt . Errorf ( "fail to connect to %s: %w" , serverAddr , err )
}
defer func ( ) { _ = conn . Close ( ) } ( )
resp , err := healthpb . NewHealthClient ( conn ) . Check ( ctx , & healthpb . HealthCheckRequest { } )
if err != nil {
if stat , ok := status . FromError ( err ) ; ok {
switch stat . Code ( ) {
case codes . Unimplemented :
return fmt . Errorf ( "gRPC server does not implement the health protocol: %w" , err )
case codes . DeadlineExceeded :
return fmt . Errorf ( "gRPC health check timeout: %w" , err )
2022-11-16 11:38:07 +01:00
case codes . Canceled :
return context . Canceled
2022-09-20 16:54:08 +02:00
}
}
return fmt . Errorf ( "gRPC health check failed: %w" , err )
}
2023-11-29 12:20:57 +01:00
if resp . GetStatus ( ) != healthpb . HealthCheckResponse_SERVING {
return fmt . Errorf ( "received gRPC status code: %v" , resp . GetStatus ( ) )
2022-09-20 16:54:08 +02:00
}
return nil
}