diff --git a/docs/querying/api.md b/docs/querying/api.md index 139eb58dd..ef9455188 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -363,6 +363,103 @@ $ curl http://localhost:9090/api/v1/targets } ``` + +## Rules + +The `/rules` API endpoint returns a list of alerting and recording rules that +are currently loaded. In addition it returns the currently active alerts fired +by the Prometheus instance of each alerting rule. + +As the `/rules` endpoint is fairly new, it does not have the same stability +guarantees as the overarching API v1. + +``` +GET /api/v1/rules +``` + +```json +$ curl http://localhost:9090/api/v1/rules + +{ + "data": { + "groups": [ + { + "rules": [ + { + "alerts": [ + { + "activeAt": "2018-07-04T20:27:12.60602144+02:00", + "annotations": { + "summary": "High request latency" + }, + "labels": { + "alertname": "HighRequestLatency", + "severity": "page" + }, + "state": "firing", + "value": 1 + } + ], + "annotations": { + "summary": "High request latency" + }, + "duration": 600, + "labels": { + "severity": "page" + }, + "name": "HighRequestLatency", + "query": "job:request_latency_seconds:mean5m{job=\"myjob\"} > 0.5", + "type": "alerting" + }, + { + "name": "job:http_inprogress_requests:sum", + "query": "sum(http_inprogress_requests) by (job)", + "type": "recording" + } + ], + "file": "/rules.yaml", + "interval": 60, + "name": "example" + } + ] + }, + "status": "success" +} +``` + + +## Alerts + +The `/alerts` endpoint returns a list of all active alerts. + +As the `/alerts` endpoint is fairly new, it does not have the same stability +guarantees as the overarching API v1. + +``` +GET /api/v1/alerts +``` + +```json +$ curl http://localhost:9090/api/v1/alerts + +{ + "data": { + "alerts": [ + { + "activeAt": "2018-07-04T20:27:12.60602144+02:00", + "annotations": {}, + "labels": { + "alertname": "my-alert" + }, + "state": "firing", + "value": 1 + } + ] + }, + "status": "success" +} +``` + ## Querying target metadata The following endpoint returns metadata about metrics currently scraped by targets. diff --git a/rules/alerting.go b/rules/alerting.go index 2a94e2b0a..015ae3a91 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -126,46 +126,31 @@ func NewAlertingRule(name string, vec promql.Expr, hold time.Duration, lbls, ann } } -// Name returns the name of the alert. +// Name returns the name of the alerting rule. func (r *AlertingRule) Name() string { return r.name } -// Query returns the query expression of the alert. +// Query returns the query expression of the alerting rule. func (r *AlertingRule) Query() promql.Expr { return r.vector } -// Duration returns the hold duration of the alert. +// Duration returns the hold duration of the alerting rule. func (r *AlertingRule) Duration() time.Duration { return r.holdDuration } -// Labels returns the labels of the alert. +// Labels returns the labels of the alerting rule. func (r *AlertingRule) Labels() labels.Labels { return r.labels } -// Annotations returns the annotations of the alert. +// Annotations returns the annotations of the alerting rule. func (r *AlertingRule) Annotations() labels.Labels { return r.annotations } -// Alertinfo return an array of alerts -func (r *AlertingRule) Alertinfo() []*Alert { - activealerts := &r.active - alertsarr := make([]*Alert, 0) - if len(*activealerts) > 0 { - for _, a := range *activealerts { - if a.ResolvedAt.IsZero() { - alertsarr = append(alertsarr, a) - } - } - return alertsarr - } - return nil -} - func (r *AlertingRule) equal(o *AlertingRule) bool { return r.name == o.name && labels.Equal(r.labels, o.labels) } diff --git a/rules/manager.go b/rules/manager.go index 4477bd6a6..7ddca7432 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -188,6 +188,9 @@ func (g *Group) File() string { return g.file } // Rules returns the group's rules. func (g *Group) Rules() []Rule { return g.rules } +// Interval returns the group's interval. +func (g *Group) Interval() time.Duration { return g.interval } + func (g *Group) run(ctx context.Context) { defer close(g.terminated) diff --git a/rules/recording.go b/rules/recording.go index 26e7cc408..69fdfa03e 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -52,6 +52,16 @@ func (rule *RecordingRule) Name() string { return rule.name } +// Query returns the rule query expression. +func (rule *RecordingRule) Query() promql.Expr { + return rule.vector +} + +// Labels returns the rule labels. +func (rule *RecordingRule) Labels() labels.Labels { + return rule.labels +} + // Eval evaluates the rule and then overrides the metric names and labels accordingly. func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, _ *url.URL) (promql.Vector, error) { vector, err := query(ctx, rule.vector.String(), ts) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 5cd3ce0c6..f9be522d8 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -96,17 +96,9 @@ type alertmanagerRetriever interface { DroppedAlertmanagers() []*url.URL } -type alertRetreiver interface { - AlertingRules() []*rules.AlertingRule -} - -type rulesRetreiver interface { +type rulesRetriever interface { RuleGroups() []*rules.Group -} - -type alertsrulesRetreiver interface { - alertRetreiver - rulesRetreiver + AlertingRules() []*rules.AlertingRule } type response struct { @@ -133,7 +125,7 @@ type API struct { targetRetriever targetRetriever alertmanagerRetriever alertmanagerRetriever - alertsrulesRetreiver alertsrulesRetreiver + rulesRetriever rulesRetriever now func() time.Time config func() config.Config flagsMap map[string]string @@ -156,20 +148,20 @@ func NewAPI( db func() *tsdb.DB, enableAdmin bool, logger log.Logger, - al alertsrulesRetreiver, + rr rulesRetriever, ) *API { return &API{ QueryEngine: qe, Queryable: q, targetRetriever: tr, alertmanagerRetriever: ar, - now: time.Now, - config: configFunc, - flagsMap: flagsMap, - ready: readyFunc, - db: db, - enableAdmin: enableAdmin, - alertsrulesRetreiver: al, + now: time.Now, + config: configFunc, + flagsMap: flagsMap, + ready: readyFunc, + db: db, + enableAdmin: enableAdmin, + rulesRetriever: rr, } } @@ -597,92 +589,130 @@ func (api *API) alertmanagers(r *http.Request) (interface{}, *apiError, func()) return ams, nil, nil } -// AlertDiscovery has info for all alerts +// AlertDiscovery has info for all active alerts. type AlertDiscovery struct { - Alertgrps []*Alertgrp `json:"alertgrp"` + Alerts []*Alert `json:"alerts"` } -// Alert has info for a alert +// Alert has info for an alert. type Alert struct { Labels labels.Labels `json:"labels"` - Status string `json:"status"` - Activesince *time.Time `json:"activesince,omitempty"` + Annotations labels.Labels `json:"annotations"` + State string `json:"state"` + ActiveAt *time.Time `json:"activeAt,omitempty"` + Value float64 `json:"value"` } -// Alertgrp has info for alerts part of a group -type Alertgrp struct { +func (api *API) alerts(r *http.Request) (interface{}, *apiError, func()) { + alertingRules := api.rulesRetriever.AlertingRules() + alerts := []*Alert{} + + for _, alertingRule := range alertingRules { + alerts = append( + alerts, + rulesAlertsToAPIAlerts(alertingRule.ActiveAlerts())..., + ) + } + + res := &AlertDiscovery{Alerts: alerts} + + return res, nil, nil +} + +func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert { + apiAlerts := make([]*Alert, len(rulesAlerts)) + for i, ruleAlert := range rulesAlerts { + apiAlerts[i] = &Alert{ + Labels: ruleAlert.Labels, + Annotations: ruleAlert.Annotations, + State: ruleAlert.State.String(), + ActiveAt: &ruleAlert.ActiveAt, + Value: ruleAlert.Value, + } + } + + return apiAlerts +} + +// RuleDiscovery has info for all rules +type RuleDiscovery struct { + RuleGroups []*RuleGroup `json:"groups"` +} + +// RuleGroup has info for rules which are part of a group +type RuleGroup struct { + Name string `json:"name"` + File string `json:"file"` + // In order to preserve rule ordering, while exposing type (alerting or recording) + // specific properties, both alerting and recording rules are exposed in the + // same array. + Rules []rule `json:"rules"` + Interval float64 `json:"interval"` +} + +type rule interface{} + +type alertingRule struct { Name string `json:"name"` Query string `json:"query"` - Duration string `json:"duration"` - Annotations labels.Labels `json:"annotations,omitempty"` + Duration float64 `json:"duration"` + Labels labels.Labels `json:"labels"` + Annotations labels.Labels `json:"annotations"` Alerts []*Alert `json:"alerts"` + // Type of an alertingRule is always "alerting". + Type string `json:"type"` } -func (api *API) alerts(r *http.Request) (interface{}, *apiError) { - alertingrules := api.alertsrulesRetreiver.AlertingRules() - var alertgrps []*Alertgrp - res := &AlertDiscovery{Alertgrps: alertgrps} - for _, activerule := range alertingrules { - t := &Alertgrp{ - Name: activerule.Name(), - Query: fmt.Sprintf("%v", activerule.Query()), - Duration: activerule.Duration().String(), - Annotations: activerule.Annotations(), +type recordingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Labels labels.Labels `json:"labels,omitempty"` + // Type of a recordingRule is always "recording". + Type string `json:"type"` +} + +func (api *API) rules(r *http.Request) (interface{}, *apiError, func()) { + ruleGroups := api.rulesRetriever.RuleGroups() + res := &RuleDiscovery{RuleGroups: make([]*RuleGroup, len(ruleGroups))} + for i, grp := range ruleGroups { + apiRuleGroup := &RuleGroup{ + Name: grp.Name(), + File: grp.File(), + Interval: grp.Interval().Seconds(), + Rules: []rule{}, } - alerts := activerule.Alertinfo() - var activealerts []*Alert - for _, alert := range alerts { - q := &Alert{ - Labels: alert.Labels, - Status: alert.State.String(), - Activesince: &alert.ActiveAt, + + for _, r := range grp.Rules() { + var enrichedRule rule + + switch rule := r.(type) { + case *rules.AlertingRule: + enrichedRule = alertingRule{ + Name: rule.Name(), + Query: rule.Query().String(), + Duration: rule.Duration().Seconds(), + Labels: rule.Labels(), + Annotations: rule.Annotations(), + Alerts: rulesAlertsToAPIAlerts(rule.ActiveAlerts()), + Type: "alerting", + } + case *rules.RecordingRule: + enrichedRule = recordingRule{ + Name: rule.Name(), + Query: rule.Query().String(), + Labels: rule.Labels(), + Type: "recording", + } + default: + err := fmt.Errorf("failed to assert type of rule '%v'", rule.Name()) + return nil, &apiError{errorInternal, err}, nil } - activealerts = append(activealerts, q) + apiRuleGroup.Rules = append(apiRuleGroup.Rules, enrichedRule) } - t.Alerts = activealerts - res.Alertgrps = append(res.Alertgrps, t) + res.RuleGroups[i] = apiRuleGroup } - - return res, nil -} - -// GroupDiscovery has info for all rules -type GroupDiscovery struct { - Rulegrps []*Rulegrp `json:"groups"` -} - -// Rulegrp has info for rules which are part of a group -type Rulegrp struct { - Name string `json:"name"` - File string `json:"file"` - Rules []*Ruleinfo `json:"rules"` -} - -// Ruleinfo has rule in human readable format using \n as line separators -type Ruleinfo struct { - Rule string `json:"rule"` -} - -func (api *API) rules(r *http.Request) (interface{}, *apiError) { - grps := api.alertsrulesRetreiver.RuleGroups() - res := &GroupDiscovery{Rulegrps: make([]*Rulegrp, len(grps))} - for i, grp := range grps { - t := &Rulegrp{ - Name: grp.Name(), - File: grp.File(), - } - var rulearr []*Ruleinfo - for _, rule := range grp.Rules() { - q := &Ruleinfo{ - Rule: rule.String(), - } - rulearr = append(rulearr, q) - } - t.Rules = rulearr - res.Rulegrps[i] = t - } - return res, nil + return res, nil, nil } type prometheusConfig struct { diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 5082b03bc..e0d0f23c5 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -21,7 +21,6 @@ import ( "fmt" "github.com/go-kit/kit/log" "io/ioutil" - stdlog "log" "math" "net/http" "net/http/httptest" @@ -102,18 +101,18 @@ func (t testAlertmanagerRetriever) DroppedAlertmanagers() []*url.URL { } } -type testalertsrulesfunc struct { - test *testing.T +type rulesRetrieverMock struct { + testing *testing.T } -func (t testalertsrulesfunc) AlertingRules() []*rules.AlertingRule { +func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { expr1, err := promql.ParseExpr(`absent(test_metric3) != 1`) if err != nil { - stdlog.Fatalf("Unable to parse alert expression: %s", err) + m.testing.Fatalf("unable to parse alert expression: %s", err) } expr2, err := promql.ParseExpr(`up == 1`) if err != nil { - stdlog.Fatalf("Unable to parse alert expression: %s", err) + m.testing.Fatalf("Unable to parse alert expression: %s", err) } rule1 := rules.NewAlertingRule( @@ -138,10 +137,10 @@ func (t testalertsrulesfunc) AlertingRules() []*rules.AlertingRule { return r } -func (t testalertsrulesfunc) RuleGroups() []*rules.Group { - var ar testalertsrulesfunc +func (m rulesRetrieverMock) RuleGroups() []*rules.Group { + var ar rulesRetrieverMock arules := ar.AlertingRules() - storage := testutil.NewStorage(t.test) + storage := testutil.NewStorage(m.testing) defer storage.Close() engine := promql.NewEngine(nil, nil, 10, 10*time.Second) @@ -158,10 +157,15 @@ func (t testalertsrulesfunc) RuleGroups() []*rules.Group { r = append(r, alertrule) } - group := rules.NewGroup("grp", "/path/to/file", time.Second, r, opts) - fmt.Println(group) - return []*rules.Group{group} + recordingExpr, err := promql.ParseExpr(`vector(1)`) + if err != nil { + m.testing.Fatalf("unable to parse alert expression: %s", err) + } + recordingRule := rules.NewRecordingRule("recording-rule-1", recordingExpr, labels.Labels{}) + r = append(r, recordingRule) + group := rules.NewGroup("grp", "/path/to/file", time.Second, r, opts) + return []*rules.Group{group} } var samplePrometheusCfg = config.Config{ @@ -196,10 +200,14 @@ func TestEndpoints(t *testing.T) { now := time.Now() - t.Run("local", func(t *testing.T) { + var algr rulesRetrieverMock + algr.testing = t + algr.AlertingRules() + algr.RuleGroups() - var algr testalertsrulesfunc - algr.test = t + t.Run("local", func(t *testing.T) { + var algr rulesRetrieverMock + algr.testing = t algr.AlertingRules() @@ -210,11 +218,11 @@ func TestEndpoints(t *testing.T) { QueryEngine: suite.QueryEngine(), targetRetriever: testTargetRetriever{}, alertmanagerRetriever: testAlertmanagerRetriever{}, - now: func() time.Time { return now }, - config: func() config.Config { return samplePrometheusCfg }, - flagsMap: sampleFlagMap, - ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, - alertsrulesRetreiver: algr, + now: func() time.Time { return now }, + config: func() config.Config { return samplePrometheusCfg }, + flagsMap: sampleFlagMap, + ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, + rulesRetriever: algr, } testEndpoints(t, api, true) @@ -251,8 +259,8 @@ func TestEndpoints(t *testing.T) { t.Fatal(err) } - var algr testalertsrulesfunc - algr.test = t + var algr rulesRetrieverMock + algr.testing = t algr.AlertingRules() @@ -263,11 +271,11 @@ func TestEndpoints(t *testing.T) { QueryEngine: suite.QueryEngine(), targetRetriever: testTargetRetriever{}, alertmanagerRetriever: testAlertmanagerRetriever{}, - now: func() time.Time { return now }, - config: func() config.Config { return samplePrometheusCfg }, - flagsMap: sampleFlagMap, - ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, - alertsrulesRetreiver: algr, + now: func() time.Time { return now }, + config: func() config.Config { return samplePrometheusCfg }, + flagsMap: sampleFlagMap, + ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, + rulesRetriever: algr, } testEndpoints(t, api, false) @@ -652,37 +660,41 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { { endpoint: api.alerts, response: &AlertDiscovery{ - Alertgrps: []*Alertgrp{ - { - Name: "test_metric3", - Query: "absent(test_metric3) != 1", - Duration: "1s", - Alerts: nil, - Annotations: labels.Labels{}, - }, - { - Name: "test_metric4", - Query: "up == 1", - Duration: "1s", - Alerts: nil, - Annotations: labels.Labels{}, - }, - }, + Alerts: []*Alert{}, }, }, { endpoint: api.rules, - response: &GroupDiscovery{ - Rulegrps: []*Rulegrp{ + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ { - Name: "grp", - File: "/path/to/file", - Rules: []*Ruleinfo{ - { - Rule: "alert: test_metric3\nexpr: absent(test_metric3) != 1\nfor: 1s\n", + Name: "grp", + File: "/path/to/file", + Interval: 1, + Rules: []rule{ + alertingRule{ + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Type: "alerting", }, - { - Rule: "alert: test_metric4\nexpr: up == 1\nfor: 1s\n", + alertingRule{ + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Type: "alerting", + }, + recordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Type: "recording", }, }, }, @@ -768,7 +780,21 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { t.Fatalf("Expected error of type %q but got none", test.errType) } if !reflect.DeepEqual(resp, test.response) { - t.Fatalf("Response does not match, expected:\n%+v\ngot:\n%+v", test.response, resp) + respJSON, err := json.Marshal(resp) + if err != nil { + t.Fatalf("failed to marshal response as JSON: %v", err.Error()) + } + + expectedRespJSON, err := json.Marshal(test.response) + if err != nil { + t.Fatalf("failed to marshal expected response as JSON: %v", err.Error()) + } + + t.Fatalf( + "Response does not match, expected:\n%+v\ngot:\n%+v", + string(expectedRespJSON), + string(respJSON), + ) } } }