Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/bosun: serving temporary configs from redis. #1593

Merged
merged 2 commits into from
Feb 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions cmd/bosun/database/config_data.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package database

import (
"crypto/md5"
"encoding/base64"

"bosun.org/_third_party/github.com/garyburd/redigo/redis"

"bosun.org/collect"
"bosun.org/opentsdb"
)

type ConfigDataAccess interface {
SaveTempConfig(text string) (hash string, err error)
GetTempConfig(hash string) (text string, err error)
}

func (d *dataAccess) Configs() ConfigDataAccess {
return d
}

const configLifetime = 60 * 24 * 14 // 2 weeks

func (d *dataAccess) SaveTempConfig(text string) (string, error) {
defer collect.StartTimer("redis", opentsdb.TagSet{"op": "SaveTempConfig"})()
conn := d.GetConnection()
defer conn.Close()

sig := md5.Sum([]byte(text))
b64 := base64.StdEncoding.EncodeToString(sig[0:8])
_, err := conn.Do("SET", "tempConfig:"+b64, text, "EX", configLifetime)
return b64, err
}

func (d *dataAccess) GetTempConfig(hash string) (string, error) {
defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetTempConfig"})()
conn := d.GetConnection()
defer conn.Close()

key := "tempConfig:" + hash
dat, err := redis.String(conn.Do("GET", key))
if err != nil {
return "", err
}
_, err = conn.Do("EXPIRE", key, configLifetime)
return dat, err
}
2 changes: 2 additions & 0 deletions cmd/bosun/database/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ import (
// Core data access interface for everything sched needs
type DataAccess interface {
Metadata() MetadataDataAccess
Configs() ConfigDataAccess
Search() SearchDataAccess
Errors() ErrorDataAccess
State() StateDataAccess
Silence() SilenceDataAccess
Notifications() NotificationDataAccess
}

type MetadataDataAccess interface {
Expand Down
133 changes: 133 additions & 0 deletions cmd/bosun/database/notification_data.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package database

import (
"fmt"
"strings"
"time"

"bosun.org/_third_party/github.com/garyburd/redigo/redis"

"bosun.org/collect"
"bosun.org/models"
"bosun.org/opentsdb"
"bosun.org/slog"
)

/*

pendingNotifications: ZSET timestamp ak:notification

notsByAlert:alert SET of notifications possible per alert. used to clear alerts by alert key

*/

const (
pendingNotificationsKey = "pendingNotifications"
)

func notsByAlertKeyKey(ak models.AlertKey) string {
return fmt.Sprintf("notsByAlert:%s", ak.Name())
}

type NotificationDataAccess interface {
InsertNotification(ak models.AlertKey, notification string, dueAt time.Time) error

//Get notifications that are currently due or past due. Does not delete.
GetDueNotifications() (map[models.AlertKey]map[string]time.Time, error)

//Clear all notifications due on or before a given timestamp. Intended is to use the max returned from GetDueNotifications once you have processed them.
ClearNotificationsBefore(time.Time) error

ClearNotifications(ak models.AlertKey) error

GetNextNotificationTime() (time.Time, error)
}

func (d *dataAccess) Notifications() NotificationDataAccess {
return d
}

func (d *dataAccess) InsertNotification(ak models.AlertKey, notification string, dueAt time.Time) error {
defer collect.StartTimer("redis", opentsdb.TagSet{"op": "InsertNotification"})()
conn := d.GetConnection()
defer conn.Close()

_, err := conn.Do("ZADD", pendingNotificationsKey, dueAt.UTC().Unix(), fmt.Sprintf("%s:%s", ak, notification))
if err != nil {
return slog.Wrap(err)
}
_, err = conn.Do("SADD", notsByAlertKeyKey(ak), notification)
return slog.Wrap(err)
}

func (d *dataAccess) GetDueNotifications() (map[models.AlertKey]map[string]time.Time, error) {
defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetDueNotifications"})()
conn := d.GetConnection()
defer conn.Close()
m, err := redis.Int64Map(conn.Do("ZRANGEBYSCORE", pendingNotificationsKey, 0, time.Now().UTC().Unix(), "WITHSCORES"))
if err != nil {
return nil, slog.Wrap(err)
}
results := map[models.AlertKey]map[string]time.Time{}
for key, t := range m {
last := strings.LastIndex(key, ":")
if last == -1 {
continue
}
ak, not := models.AlertKey(key[:last]), key[last+1:]
if results[ak] == nil {
results[ak] = map[string]time.Time{}
}
results[ak][not] = time.Unix(t, 0).UTC()
}
return results, err
}

func (d *dataAccess) ClearNotificationsBefore(t time.Time) error {
defer collect.StartTimer("redis", opentsdb.TagSet{"op": "ClearNotificationsBefore"})()
conn := d.GetConnection()
defer conn.Close()

_, err := conn.Do("ZREMRANGEBYSCORE", pendingNotificationsKey, 0, t.UTC().Unix())
return slog.Wrap(err)
}

func (d *dataAccess) ClearNotifications(ak models.AlertKey) error {
defer collect.StartTimer("redis", opentsdb.TagSet{"op": "ClearNotifications"})()
conn := d.GetConnection()
defer conn.Close()

nots, err := redis.Strings(conn.Do("SMEMBERS", notsByAlertKeyKey(ak)))
if err != nil {
return slog.Wrap(err)
}

if len(nots) == 0 {
return nil
}

args := []interface{}{pendingNotificationsKey}
for _, not := range nots {
key := fmt.Sprintf("%s:%s", ak, not)
args = append(args, key)
}
_, err = conn.Do("ZREM", args...)
return slog.Wrap(err)
}

func (d *dataAccess) GetNextNotificationTime() (time.Time, error) {
defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetNextNotificationTime"})()
conn := d.GetConnection()
defer conn.Close()

m, err := redis.Int64Map(conn.Do("ZRANGE", pendingNotificationsKey, 0, 0, "WITHSCORES"))
if err != nil {
return time.Time{}, slog.Wrap(err)
}
// default time is one hour from now if no pending notifications exist
t := time.Now().UTC().Add(time.Hour).Truncate(time.Second)
for _, i := range m {
t = time.Unix(i, 0).UTC()
}
return t, nil
}
68 changes: 68 additions & 0 deletions cmd/bosun/database/test/notifications_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package dbtest

import (
"testing"
"time"

"bosun.org/models"
)

func TestNotifications_RoundTrip(t *testing.T) {

nd := testData.Notifications()
notTime := time.Now().UTC().Add(-10 * time.Hour).Truncate(time.Second)
future := time.Now().UTC().Add(time.Hour).Truncate(time.Second)
oneMin := time.Now().UTC().Add(time.Minute).Truncate(time.Second)

// with nothing pending, next time should be an hour from now
next, err := nd.GetNextNotificationTime()
check(t, err)
if next != future {
t.Fatalf("wrong next time. %s != %s", next, future)
}

// add notifications
err = nd.InsertNotification(models.AlertKey("notak{foo=a}"), "chat", notTime)
check(t, err)
err = nd.InsertNotification(models.AlertKey("notak{foo=b}"), "chat", oneMin)
check(t, err)
err = nd.InsertNotification(models.AlertKey("notak{foo=c}"), "chat", future)
check(t, err)

// next time should be correct
next, err = nd.GetNextNotificationTime()
check(t, err)
if next != notTime {
t.Fatalf("wrong next time. %s != %s", next, notTime)
}

// make sure only one due
due, err := nd.GetDueNotifications()
check(t, err)
if len(due) != 1 {
t.Fatalf("Wrong number of due notifications. %d != %d", len(due), 1)
}

// next time should still be correct
next, err = nd.GetNextNotificationTime()
check(t, err)
if next != notTime {
t.Fatalf("wrong next time. %s != %s", next, notTime)
}

check(t, nd.ClearNotificationsBefore(notTime))
// next time should be 1 minute
next, err = nd.GetNextNotificationTime()
check(t, err)
if next != oneMin {
t.Fatalf("wrong next time. %s != %s", next, oneMin)
}

check(t, nd.ClearNotifications(models.AlertKey("notak{foo=b}")))
// next time should be 1 hour
next, err = nd.GetNextNotificationTime()
check(t, err)
if next != future {
t.Fatalf("wrong next time. %s != %s", next, future)
}
}
7 changes: 3 additions & 4 deletions cmd/bosun/sched/alertRunner.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ func (s *Schedule) Run() error {
go s.PingHosts()
}
go s.dispatchNotifications()
go s.performSave()
go s.updateCheckContext()
for _, a := range s.Conf.Alerts {
go s.RunAlert(a)
Expand All @@ -28,7 +27,7 @@ func (s *Schedule) Run() error {
}
func (s *Schedule) updateCheckContext() {
for {
ctx := &checkContext{time.Now(), cache.New(0)}
ctx := &checkContext{utcNow(), cache.New(0)}
s.ctx = ctx
time.Sleep(s.Conf.CheckFrequency)
s.Lock("CollectStates")
Expand All @@ -40,7 +39,7 @@ func (s *Schedule) RunAlert(a *conf.Alert) {
for {
wait := time.After(s.Conf.CheckFrequency * time.Duration(a.RunEvery))
s.checkAlert(a)
s.LastCheck = time.Now()
s.LastCheck = utcNow()
<-wait
}
}
Expand All @@ -51,7 +50,7 @@ func (s *Schedule) checkAlert(a *conf.Alert) {
rh := s.NewRunHistory(checkTime, checkCache)
s.CheckAlert(nil, rh, a)

start := time.Now()
start := utcNow()
s.RunHistory(rh)
slog.Infof("runHistory on %s took %v\n", a.Name, time.Since(start))
}
Loading