Commit 4acbcd70 by Sofia Papagiannaki Committed by GitHub

AlertingNG: POC of evaluator under feature flag. (#27922)

* New feature toggle for enabling alerting NG

* Initial commit

* Modify evaluate alert API request

* Check for unique labels in alert execution result dataframes

* Remove print statement

* Additional minor fixes/comments

* Fix lint issues

* Add API endpoint for evaluating panel queries

* Push missing renaming

* add refId for condition to API

* add refId for condition to API

* switch dashboard based eval to get method

* add from/to params to dashboard based eval

* add from/to params to  eval endpoint

Co-authored-by: kyle <kyle@grafana.com>
parent 3928d0c5
......@@ -1417,7 +1417,7 @@ For more information about Grafana Enterprise, refer to [Grafana Enterprise]({{<
### enable
Keys of alpha features to enable, separated by space. Available alpha features are: `transformations`
Keys of alpha features to enable, separated by space. Available alpha features are: `transformations`,`ngalert`
## [date_formats]
......
......@@ -34,6 +34,7 @@ export interface BuildInfo {
export interface FeatureToggles {
live: boolean;
expressions: boolean;
ngalert: boolean;
/**
* @remarks
......
......@@ -57,6 +57,7 @@ export class GrafanaBootConfig implements GrafanaConfig {
meta: false,
datasourceInsights: false,
reportGrid: false,
ngalert: false,
};
licenseInfo: LicenseInfo = {} as LicenseInfo;
rendererAvailable = false;
......
......@@ -351,6 +351,13 @@ func (hs *HTTPServer) registerRoutes() {
alertsRoute.Get("/states-for-dashboard", Wrap(GetAlertStatesForDashboard))
})
if hs.Cfg.IsNgAlertEnabled() {
apiRoute.Group("/alert-definitions", func(alertDefinitions routing.RouteRegister) {
alertDefinitions.Get("/eval/:dashboardID/:panelID/:refID", reqEditorRole, Wrap(hs.AlertDefinitionEval))
alertDefinitions.Post("/eval", reqEditorRole, bind(dtos.EvalAlertConditionsCommand{}), Wrap(hs.ConditionsEval))
})
}
apiRoute.Get("/alert-notifiers", reqEditorRole, Wrap(GetAlertNotifiers))
apiRoute.Group("/alert-notifications", func(alertNotifications routing.RouteRegister) {
......
package dtos
import (
"time"
eval "github.com/grafana/grafana/pkg/services/ngalert"
)
type EvalAlertConditionsCommand struct {
Conditions eval.Conditions `json:"conditions"`
Now time.Time `json:"now"`
}
......@@ -29,6 +29,7 @@ import (
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/hooks"
"github.com/grafana/grafana/pkg/services/login"
eval "github.com/grafana/grafana/pkg/services/ngalert"
"github.com/grafana/grafana/pkg/services/provisioning"
"github.com/grafana/grafana/pkg/services/quota"
"github.com/grafana/grafana/pkg/services/rendering"
......@@ -70,6 +71,7 @@ type HTTPServer struct {
BackendPluginManager backendplugin.Manager `inject:""`
PluginManager *plugins.PluginManager `inject:""`
SearchService *search.SearchService `inject:""`
AlertNG *eval.AlertNG `inject:""`
Live *live.GrafanaLive
Listener net.Listener
}
......
package api
import (
"context"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/api/dtos"
"github.com/grafana/grafana/pkg/models"
eval "github.com/grafana/grafana/pkg/services/ngalert"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/tsdb"
"github.com/grafana/grafana/pkg/util"
)
// POST /api/alert-definitions/eval
func (hs *HTTPServer) ConditionsEval(c *models.ReqContext, dto dtos.EvalAlertConditionsCommand) Response {
alertCtx, cancelFn := context.WithTimeout(context.Background(), setting.AlertingEvaluationTimeout)
defer cancelFn()
alertExecCtx := eval.AlertExecCtx{Ctx: alertCtx, SignedInUser: c.SignedInUser}
fromStr := c.Query("from")
if fromStr == "" {
fromStr = "now-3h"
}
toStr := c.Query("to")
if toStr == "" {
toStr = "now"
}
execResult, err := dto.Conditions.Execute(alertExecCtx, fromStr, toStr)
if err != nil {
return Error(400, "Failed to execute conditions", err)
}
evalResults, err := eval.EvaluateExecutionResult(execResult)
if err != nil {
return Error(400, "Failed to evaluate results", err)
}
frame := evalResults.AsDataFrame()
df := tsdb.NewDecodedDataFrames([]*data.Frame{&frame})
instances, err := df.Encoded()
if err != nil {
return Error(400, "Failed to encode result dataframes", err)
}
return JSON(200, util.DynMap{
"instances": instances,
})
}
// GET /api/alert-definitions/eval/:dashboardId/:panelId/:refId"
func (hs *HTTPServer) AlertDefinitionEval(c *models.ReqContext) Response {
dashboardID := c.ParamsInt64(":dashboardID")
panelID := c.ParamsInt64(":panelID")
conditionRefID := c.Params(":refID")
fromStr := c.Query("from")
if fromStr == "" {
fromStr = "now-3h"
}
toStr := c.Query("to")
if toStr == "" {
toStr = "now"
}
conditions, err := hs.AlertNG.LoadAlertConditions(dashboardID, panelID, conditionRefID, c.SignedInUser, c.SkipCache)
if err != nil {
return Error(400, "Failed to load conditions", err)
}
alertCtx, cancelFn := context.WithTimeout(context.Background(), setting.AlertingEvaluationTimeout)
defer cancelFn()
alertExecCtx := eval.AlertExecCtx{Ctx: alertCtx, SignedInUser: c.SignedInUser}
execResult, err := conditions.Execute(alertExecCtx, fromStr, toStr)
if err != nil {
return Error(400, "Failed to execute conditions", err)
}
evalResults, err := eval.EvaluateExecutionResult(execResult)
if err != nil {
return Error(400, "Failed to evaluate results", err)
}
frame := evalResults.AsDataFrame()
df := tsdb.NewDecodedDataFrames([]*data.Frame{&frame})
instances, err := df.Encoded()
if err != nil {
return Error(400, "Failed to encode result dataframes", err)
}
return JSON(200, util.DynMap{
"instances": instances,
})
}
......@@ -42,29 +42,29 @@ var (
type DsAccess string
type DataSource struct {
Id int64
OrgId int64
Version int
Id int64 `json:"id"`
OrgId int64 `json:"orgId"`
Version int `json:"version"`
Name string
Type string
Access DsAccess
Url string
Password string
User string
Database string
BasicAuth bool
BasicAuthUser string
BasicAuthPassword string
WithCredentials bool
IsDefault bool
JsonData *simplejson.Json
SecureJsonData securejsondata.SecureJsonData
ReadOnly bool
Uid string
Created time.Time
Updated time.Time
Name string `json:"name"`
Type string `json:"type"`
Access DsAccess `json:"access"`
Url string `json:"url"`
Password string `json:"password"`
User string `json:"user"`
Database string `json:"database"`
BasicAuth bool `json:"basicAuth"`
BasicAuthUser string `json:"basicAuthUser"`
BasicAuthPassword string `json:"basicAuthPassword"`
WithCredentials bool `json:"withCredentials"`
IsDefault bool `json:"isDefault"`
JsonData *simplejson.Json `json:"jsonData"`
SecureJsonData securejsondata.SecureJsonData `json:"secureJsonData"`
ReadOnly bool `json:"readOnly"`
Uid string `json:"uid"`
Created time.Time `json:"created"`
Updated time.Time `json:"updated"`
}
// DecryptedBasicAuthPassword returns data source basic auth password in plain text. It uses either deprecated
......
package eval
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/models"
"github.com/grafana/grafana/pkg/plugins"
"github.com/grafana/grafana/pkg/registry"
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/tsdb"
)
type minimalDashboard struct {
Panels []struct {
ID int64 `json:"id"`
Datasource string `json:"datasource"`
Targets []*simplejson.Json `json:"targets"`
} `json:"panels"`
}
type AlertNG struct {
DatasourceCache datasources.CacheService `inject:""`
}
func init() {
registry.RegisterService(&AlertNG{})
}
// Init initializes the AlertingService.
func (e *AlertNG) Init() error {
return nil
}
type AlertExecCtx struct {
AlertDefitionID int64
SignedInUser *models.SignedInUser
Ctx context.Context
}
// At least Warn or Crit condition must be non-empty
type Conditions struct {
Condition string `json:"condition"`
QueriesAndExpressions []tsdb.Query `json:"queriesAndExpressions"`
}
type ExecutionResult struct {
AlertDefinitionId int64
Error error
Results data.Frames
}
type EvalResults []EvalResult
type EvalResult struct {
Instance data.Labels
State State // Enum
}
type State int
const (
Normal State = iota
Warning
Critical
Error
)
func (s State) String() string {
return [...]string{"Normal", "Warning", "Critical", "Error"}[s]
}
// IsValid checks the conditions validity
func (c Conditions) IsValid() bool {
/*
if c.WarnCondition == "" && c.CritCondition == "" {
return false
}
*/
// TODO search for refIDs in QueriesAndExpressions
return len(c.QueriesAndExpressions) != 0
}
// LoadAlertConditions returns a Conditions object for the given alertDefintionId.
func (ng *AlertNG) LoadAlertConditions(dashboardID int64, panelID int64, conditionRefID string, signedInUser *models.SignedInUser, skipCache bool) (*Conditions, error) {
//func (ng *AlertNG) LoadAlertConditions(alertDefinitionID int64, signedInUser *models.SignedInUser, skipCache bool) (*Conditions, error) {
/*
getAlertByIDQuery := models.GetAlertByIdQuery{Id: alertDefinitionID}
if err := bus.Dispatch(&getAlertByIDQuery); err != nil {
return nil, err
}
dashboardID := getAlertByIDQuery.Result.DashboardId
panelID := getAlertByIDQuery.Result.PanelId
*/
// get queries from the dashboard (because GEL expressions cannot be stored in alerts so far)
getDashboardQuery := models.GetDashboardQuery{Id: dashboardID}
if err := bus.Dispatch(&getDashboardQuery); err != nil {
return nil, err
}
blob, err := getDashboardQuery.Result.Data.MarshalJSON()
if err != nil {
return nil, errors.New("Failed to marshal dashboard JSON")
}
var dash minimalDashboard
err = json.Unmarshal(blob, &dash)
if err != nil {
return nil, errors.New("Failed to unmarshal dashboard JSON")
}
conditions := Conditions{}
for _, p := range dash.Panels {
if p.ID == panelID {
panelDatasource := p.Datasource
var ds *models.DataSource
for i, query := range p.Targets {
refID := query.Get("refId").MustString("A")
queryDatasource := query.Get("datasource").MustString()
if i == 0 && queryDatasource != "__expr__" {
dsName := panelDatasource
if queryDatasource != "" {
dsName = queryDatasource
}
getDataSourceByNameQuery := models.GetDataSourceByNameQuery{Name: dsName, OrgId: getDashboardQuery.Result.OrgId}
if err := bus.Dispatch(&getDataSourceByNameQuery); err != nil {
return nil, err
}
ds, err = ng.DatasourceCache.GetDatasource(getDataSourceByNameQuery.Result.Id, signedInUser, skipCache)
if err != nil {
return nil, err
}
}
if ds == nil {
return nil, errors.New("No datasource reference found")
}
if queryDatasource == "" {
query.Set("datasource", ds.Name)
}
if query.Get("datasourceId").MustString() == "" {
query.Set("datasourceId", ds.Id)
}
if query.Get("orgId").MustString() == "" { // GEL requires orgID inside the query JSON
// need to decide which organisation id is expected there
// in grafana queries is passed the signed in user organisation id:
// https://github.com/grafana/grafana/blob/34a355fe542b511ed02976523aa6716aeb00bde6/packages/grafana-runtime/src/utils/DataSourceWithBackend.ts#L60
// but I think that it should be datasource org id instead
query.Set("orgId", 0)
}
if query.Get("maxDataPoints").MustString() == "" { // GEL requires maxDataPoints inside the query JSON
query.Set("maxDataPoints", 100)
}
// intervalMS is calculated by the frontend
// should we do something similar?
if query.Get("intervalMs").MustString() == "" { // GEL requires intervalMs inside the query JSON
query.Set("intervalMs", 1000)
}
conditions.QueriesAndExpressions = append(conditions.QueriesAndExpressions, tsdb.Query{
RefId: refID,
MaxDataPoints: query.Get("maxDataPoints").MustInt64(100),
IntervalMs: query.Get("intervalMs").MustInt64(1000),
QueryType: query.Get("queryType").MustString(""),
Model: query,
DataSource: ds,
})
}
}
}
conditions.Condition = conditionRefID
return &conditions, nil
}
// Execute runs the WarnCondition and CritCondtion expressions or queries.
func (conditions *Conditions) Execute(ctx AlertExecCtx, fromStr, toStr string) (*ExecutionResult, error) {
result := ExecutionResult{}
if !conditions.IsValid() {
return nil, fmt.Errorf("Invalid conditions")
}
request := &tsdb.TsdbQuery{
TimeRange: tsdb.NewTimeRange(fromStr, toStr),
Debug: true,
User: ctx.SignedInUser,
}
for i := range conditions.QueriesAndExpressions {
request.Queries = append(request.Queries, &conditions.QueriesAndExpressions[i])
}
resp, err := plugins.Transform.Transform(ctx.Ctx, request)
if err != nil {
result.Error = err
return &result, err
}
conditionResult := resp.Results[conditions.Condition]
if conditionResult == nil {
err = fmt.Errorf("No GEL results")
result.Error = err
return &result, err
}
result.Results, err = conditionResult.Dataframes.Decoded()
if err != nil {
result.Error = err
return &result, err
}
return &result, nil
}
// EvaluateExecutionResult takes the ExecutionResult, and returns a frame where
// each column is a string type that holds a string representing its state.
func EvaluateExecutionResult(results *ExecutionResult) (EvalResults, error) {
evalResults := make([]EvalResult, 0)
labels := make(map[string]bool)
for _, f := range results.Results {
rowLen, err := f.RowLen()
if err != nil {
return nil, fmt.Errorf("Unable to get frame row length")
}
if rowLen > 1 {
return nil, fmt.Errorf("Invalid frame %v: row length %v", f.Name, rowLen)
}
if len(f.Fields) > 1 {
return nil, fmt.Errorf("Invalid frame %v: field length %v", f.Name, len(f.Fields))
}
if f.Fields[0].Type() != data.FieldTypeNullableFloat64 {
return nil, fmt.Errorf("Invalid frame %v: field type %v", f.Name, f.Fields[0].Type())
}
labelsStr := f.Fields[0].Labels.String()
_, ok := labels[labelsStr]
if ok {
return nil, fmt.Errorf("Invalid frame %v: frames cannot uniquely be identified by its labels: %q", f.Name, labelsStr)
}
labels[labelsStr] = true
state := Normal
val, err := f.Fields[0].FloatAt(0)
if err != nil || val != 0 {
state = Critical
}
evalResults = append(evalResults, EvalResult{
Instance: f.Fields[0].Labels,
State: state,
})
}
return evalResults, nil
}
// AsDataFrame forms the EvalResults in Frame suitable for displaying in the table panel of the front end.
// This may be temporary, as there might be a fair amount we want to display in the frontend, and it might not make sense to store that in data.Frame.
// For the first pass, I would expect a Frame with a single row, and a column for each instance with a boolean value.
func (evalResults EvalResults) AsDataFrame() data.Frame {
fields := make([]*data.Field, 0)
for _, evalResult := range evalResults {
fields = append(fields, data.NewField("", evalResult.Instance, []bool{evalResult.State != Normal}))
}
f := data.NewFrame("", fields...)
return *f
}
......@@ -327,6 +327,11 @@ func (c Cfg) IsLiveEnabled() bool {
return c.FeatureToggles["live"]
}
// IsNgAlertEnabled returns whether the standalone alerts feature is enabled.
func (c Cfg) IsNgAlertEnabled() bool {
return c.FeatureToggles["ngalert"]
}
type CommandLineArgs struct {
Config string
HomePath string
......
......@@ -19,12 +19,12 @@ type TsdbQuery struct {
}
type Query struct {
RefId string
Model *simplejson.Json
DataSource *models.DataSource
MaxDataPoints int64
IntervalMs int64
QueryType string
RefId string `json:"refID"`
Model *simplejson.Json `json:"model,omitempty"`
DataSource *models.DataSource `json:"datasource"`
MaxDataPoints int64 `json:"maxDataPoints"`
IntervalMs int64 `json:"intervalMs"`
QueryType string `json:"queryType"`
}
type Response struct {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment