Commit f6a160b2 by Torkel Ödegaard

feat(alerting): testing alert is starting to work

parent 9e91aacd
......@@ -83,6 +83,9 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
}
if err := bus.Dispatch(&backendCmd); err != nil {
if validationErr, ok := err.(alerting.AlertValidationError); ok {
return ApiError(422, validationErr.Error(), nil)
}
return ApiError(500, "Failed to test rule", err)
}
......@@ -96,6 +99,10 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
dtoRes.Error = res.Error.Error()
}
for _, log := range res.Logs {
dtoRes.Logs = append(dtoRes.Logs, &dtos.AlertTestResultLog{Message: log.Message, Data: log.Data})
}
dtoRes.Timing = fmt.Sprintf("%1.3fs", res.GetDurationSeconds())
return Json(200, dtoRes)
......
......@@ -40,7 +40,13 @@ type AlertTestCommand struct {
}
type AlertTestResult struct {
Triggered bool `json:"triggerd"`
Timing string `json:"timing"`
Error string `json:"error"`
Triggered bool `json:"triggerd"`
Timing string `json:"timing"`
Error string `json:"error"`
Logs []*AlertTestResultLog `json:"logs"`
}
type AlertTestResultLog struct {
Message string `json:"message"`
Data interface{} `json:"data"`
}
......@@ -23,6 +23,14 @@ type AlertRule struct {
Notifications []int64
}
type AlertValidationError struct {
Reason string
}
func (e AlertValidationError) Error() string {
return e.Reason
}
var (
ValueFormatRegex = regexp.MustCompile("^\\d+")
UnitFormatRegex = regexp.MustCompile("\\w{1}$")
......
......@@ -2,7 +2,6 @@ package alerting
import (
"encoding/json"
"errors"
"fmt"
"github.com/grafana/grafana/pkg/bus"
......@@ -50,15 +49,22 @@ func (c *QueryCondition) executeQuery(context *AlertResultContext) (tsdb.TimeSer
resp, err := c.HandleRequest(req)
if err != nil {
return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() error %v", err)
return nil, fmt.Errorf("tsdb.HandleRequest() error %v", err)
}
for _, v := range resp.Results {
if v.Error != nil {
return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() response error %v", v)
return nil, fmt.Errorf("tsdb.HandleRequest() response error %v", v)
}
result = append(result, v.Series...)
if context.IsTestRun {
context.Logs = append(context.Logs, &AlertResultLogEntry{
Message: "Query Condition Query Result",
Data: v.Series,
})
}
}
return result, nil
......@@ -154,17 +160,17 @@ func NewDefaultAlertEvaluator(model *simplejson.Json) (*DefaultAlertEvaluator, e
evaluator.Type = model.Get("type").MustString()
if evaluator.Type == "" {
return nil, errors.New("Alert evaluator missing type property")
return nil, AlertValidationError{Reason: "Evaluator missing type property"}
}
params := model.Get("params").MustArray()
if len(params) == 0 {
return nil, errors.New("Alert evaluator missing threshold parameter")
return nil, AlertValidationError{Reason: "Evaluator missing threshold parameter"}
}
threshold, ok := params[0].(json.Number)
if !ok {
return nil, errors.New("Alert evaluator has invalid threshold parameter")
return nil, AlertValidationError{Reason: "Evaluator has invalid threshold parameter"}
}
evaluator.Threshold, _ = threshold.Float64()
......
package alerting
import (
"fmt"
"time"
"github.com/benbjohnson/clock"
......@@ -18,7 +17,6 @@ type Engine struct {
ruleReader RuleReader
log log.Logger
responseHandler ResultHandler
alertJobTimeout time.Duration
}
func NewEngine() *Engine {
......@@ -31,7 +29,6 @@ func NewEngine() *Engine {
ruleReader: NewRuleReader(),
log: log.New("alerting.engine"),
responseHandler: NewResultHandler(),
alertJobTimeout: time.Second * 5,
}
return e
......@@ -82,32 +79,14 @@ func (e *Engine) execDispatch() {
for job := range e.execQueue {
log.Trace("Alerting: engine:execDispatch() starting job %s", job.Rule.Name)
job.Running = true
e.executeJob(job)
}
}
func (e *Engine) executeJob(job *AlertJob) {
startTime := time.Now()
resultChan := make(chan *AlertResultContext, 1)
go e.handler.Execute(job.Rule, resultChan)
select {
case <-time.After(e.alertJobTimeout):
e.resultQueue <- &AlertResultContext{
Error: fmt.Errorf("Timeout"),
Rule: job.Rule,
StartTime: startTime,
EndTime: time.Now(),
}
close(resultChan)
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
case result := <-resultChan:
e.log.Debug("Job Execution done", "timing", result.GetDurationSeconds(), "ruleId", job.Rule.Id)
e.resultQueue <- result
}
job.Running = true
context := NewAlertResultContext(job.Rule)
e.handler.Execute(context)
job.Running = false
}
......
......@@ -129,7 +129,7 @@ func (e *DashAlertExtractor) GetAlerts() ([]*m.Alert, error) {
alerts = append(alerts, alert)
} else {
e.log.Error("Failed to extract alerts from dashboard", "error", err)
return nil, errors.New("Failed to extract alerts from dashboard")
return nil, err
}
}
}
......
package alerting
import (
"fmt"
"time"
"github.com/grafana/grafana/pkg/log"
......@@ -11,41 +12,50 @@ var (
)
type HandlerImpl struct {
log log.Logger
log log.Logger
alertJobTimeout time.Duration
}
func NewHandler() *HandlerImpl {
return &HandlerImpl{
log: log.New("alerting.executor"),
log: log.New("alerting.executor"),
alertJobTimeout: time.Second * 5,
}
}
func (e *HandlerImpl) Execute(rule *AlertRule, resultQueue chan *AlertResultContext) {
resultQueue <- e.eval(rule)
}
func (e *HandlerImpl) Execute(context *AlertResultContext) {
go e.eval(context)
func (e *HandlerImpl) eval(rule *AlertRule) *AlertResultContext {
result := &AlertResultContext{
StartTime: time.Now(),
Rule: rule,
select {
case <-time.After(e.alertJobTimeout):
context.Error = fmt.Errorf("Timeout")
context.EndTime = time.Now()
e.log.Debug("Job Execution timeout", "alertId", context.Rule.Id)
case <-context.DoneChan:
e.log.Debug("Job Execution done", "timing", context.GetDurationSeconds(), "alertId", context.Rule.Id)
}
for _, condition := range rule.Conditions {
condition.Eval(result)
}
func (e *HandlerImpl) eval(context *AlertResultContext) {
for _, condition := range context.Rule.Conditions {
condition.Eval(context)
// break if condition could not be evaluated
if result.Error != nil {
if context.Error != nil {
break
}
// break if result has not triggered yet
if result.Triggered == false {
if context.Triggered == false {
break
}
}
result.EndTime = time.Now()
return result
context.EndTime = time.Now()
context.DoneChan <- true
}
// func (e *HandlerImpl) executeQuery(job *AlertJob) (tsdb.TimeSeriesSlice, error) {
......
......@@ -7,7 +7,7 @@ import (
)
type AlertHandler interface {
Execute(rule *AlertRule, resultChan chan *AlertResultContext)
Execute(context *AlertResultContext)
}
type Scheduler interface {
......
......@@ -28,18 +28,37 @@ func (aj *AlertJob) IncRetry() {
type AlertResultContext struct {
Triggered bool
IsTestRun bool
Details []*AlertResultDetail
Logs []*AlertResultLogEntry
Error error
Description string
StartTime time.Time
EndTime time.Time
Rule *AlertRule
DoneChan chan bool
CancelChan chan bool
}
func (a *AlertResultContext) GetDurationSeconds() float64 {
return float64(a.EndTime.Nanosecond()-a.StartTime.Nanosecond()) / float64(1000000000)
}
func NewAlertResultContext(rule *AlertRule) *AlertResultContext {
return &AlertResultContext{
StartTime: time.Now(),
Rule: rule,
Logs: make([]*AlertResultLogEntry, 0),
DoneChan: make(chan bool, 1),
CancelChan: make(chan bool, 1),
}
}
type AlertResultLogEntry struct {
Message string
Data interface{}
}
type AlertResultDetail struct {
Value float64
Metric string
......
......@@ -2,7 +2,6 @@ package alerting
import (
"fmt"
"time"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/components/simplejson"
......@@ -38,28 +37,21 @@ func handleAlertTestCommand(cmd *AlertTestCommand) error {
return err
}
if res, err := testAlertRule(rule); err != nil {
return err
} else {
cmd.Result = res
return nil
}
cmd.Result = testAlertRule(rule)
return nil
}
}
return fmt.Errorf("Could not find alert with panel id %d", cmd.PanelId)
}
func testAlertRule(rule *AlertRule) (*AlertResultContext, error) {
func testAlertRule(rule *AlertRule) *AlertResultContext {
handler := NewHandler()
resultChan := make(chan *AlertResultContext, 1)
handler.Execute(rule, resultChan)
context := NewAlertResultContext(rule)
context.IsTestRun = true
select {
case <-time.After(time.Second * 10):
return &AlertResultContext{Error: fmt.Errorf("Timeout")}, nil
case result := <-resultChan:
return result, nil
}
handler.Execute(context)
return context
}
......@@ -19,6 +19,7 @@ func addAlertMigrations(mg *Migrator) {
{Name: "settings", Type: DB_Text, Nullable: false},
{Name: "frequency", Type: DB_BigInt, Nullable: false},
{Name: "handler", Type: DB_BigInt, Nullable: false},
{Name: "severity", Type: DB_Text, Nullable: false},
{Name: "enabled", Type: DB_Bool, Nullable: false},
{Name: "created", Type: DB_DateTime, Nullable: false},
{Name: "updated", Type: DB_DateTime, Nullable: false},
......
......@@ -16,7 +16,7 @@ export class AlertSrv {
init() {
this.$rootScope.onAppEvent('alert-error', (e, alert) => {
this.set(alert[0], alert[1], 'error', 0);
this.set(alert[0], alert[1], 'error', 7000);
}, this.$rootScope);
this.$rootScope.onAppEvent('alert-warning', (e, alert) => {
......
......@@ -69,7 +69,7 @@ export class AlertTabCtrl {
initModel() {
var alert = this.alert = this.panel.alert = this.panel.alert || {};
alert.conditions = [];
alert.conditions = alert.conditions || [];
if (alert.conditions.length === 0) {
alert.conditions.push(this.buildDefaultCondition());
}
......@@ -149,8 +149,8 @@ export class AlertTabCtrl {
panelId: this.panelCtrl.panel.id,
};
this.backendSrv.post('/api/alerts/test', payload).then(res => {
this.testResult = res;
return this.backendSrv.post('/api/alerts/test', payload).then(res => {
this.testResult = angular.toJson(res, true);
this.testing = false;
});
}
......
......@@ -131,6 +131,12 @@
Evaluating rule <i class="fa fa-spinner fa-spin"></i>
</div>
<div class="gf-form-group" ng-if="ctrl.testResult">
<pre>
{{ctrl.testResult}}
</pre>
</div>
<div class="gf-form-group" ng-if="!ctrl.alert.enabled">
<div class="gf-form-button-row">
<button class="btn btn-inverse" ng-click="ctrl.enable()">
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment