Skip to content

Commit

Permalink
Pull out common calculation into a function
Browse files Browse the repository at this point in the history
  • Loading branch information
kentquirk committed Mar 19, 2023
1 parent cb1cc4b commit 84028f7
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 201 deletions.
63 changes: 9 additions & 54 deletions avgsamplerate.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"encoding/json"
"errors"
"math"
"sort"
"sync"
"time"
)
Expand Down Expand Up @@ -34,7 +33,7 @@ type AvgSampleRate struct {
MaxKeys int

savedSampleRates map[string]int
currentCounts map[string]int
currentCounts map[string]float64

// haveData indicates that we have gotten a sample of traffic. Before we've
// gotten any samples of traffic, we should we should use the default goal
Expand Down Expand Up @@ -62,7 +61,7 @@ func (a *AvgSampleRate) Start() error {
if a.savedSampleRates == nil {
a.savedSampleRates = make(map[string]int)
}
a.currentCounts = make(map[string]int)
a.currentCounts = make(map[string]float64)
a.done = make(chan struct{})

// spin up calculator
Expand Down Expand Up @@ -92,7 +91,7 @@ func (a *AvgSampleRate) updateMaps() {
// make a local copy of the sample counters for calculation
a.lock.Lock()
tmpCounts := a.currentCounts
a.currentCounts = make(map[string]int)
a.currentCounts = make(map[string]float64)
a.lock.Unlock()
// short circuit if no traffic
numKeys := len(tmpCounts)
Expand All @@ -106,64 +105,20 @@ func (a *AvgSampleRate) updateMaps() {

// Goal events to send this interval is the total count of received events
// divided by the desired average sample rate
var sumEvents int
var sumEvents float64
for _, count := range tmpCounts {
sumEvents += count
}
goalCount := float64(sumEvents) / float64(a.GoalSampleRate)
goalCount := sumEvents / float64(a.GoalSampleRate)
// goalRatio is the goalCount divided by the sum of all the log values - it
// determines what percentage of the total event space belongs to each key
var logSum float64
for _, count := range tmpCounts {
logSum += math.Log10(float64(count))
logSum += math.Log10(count)
}
goalRatio := goalCount / logSum

// must go through the keys in a fixed order to prevent rounding from changing
// results
keys := make([]string, len(tmpCounts))
var i int
for k := range tmpCounts {
keys[i] = k
i++
}
sort.Strings(keys)

// goal number of events per key is goalRatio * key count, but never less than
// one. If a key falls below its goal, it gets a sample rate of 1 and the
// extra available events get passed on down the line.
newSavedSampleRates := make(map[string]int)
keysRemaining := len(tmpCounts)
var extra float64
for _, key := range keys {
count := float64(tmpCounts[key])
// take the max of 1 or my log10 share of the total
goalForKey := math.Max(1, math.Log10(count)*goalRatio)
// take this key's share of the extra and pass the rest along
extraForKey := extra / float64(keysRemaining)
goalForKey += extraForKey
extra -= extraForKey
keysRemaining--
if count <= goalForKey {
// there are fewer samples than the allotted number for this key. set
// sample rate to 1 and redistribute the unused slots for future keys
newSavedSampleRates[key] = 1
extra += goalForKey - count
} else {
// there are more samples than the allotted number. Sample this key enough
// to knock it under the limit (aka round up)
rate := math.Ceil(count / goalForKey)
// if counts are <= 1 we can get values for goalForKey that are +Inf
// and subsequent division ends up with NaN. If that's the case,
// fall back to 1
if math.IsNaN(rate) {
newSavedSampleRates[key] = 1
} else {
newSavedSampleRates[key] = int(rate)
}
extra += goalForKey - (count / float64(newSavedSampleRates[key]))
}
}
newSavedSampleRates := calculateSampleRates(goalRatio, tmpCounts)
a.lock.Lock()
defer a.lock.Unlock()
a.savedSampleRates = newSavedSampleRates
Expand All @@ -186,10 +141,10 @@ func (a *AvgSampleRate) GetSampleRateMulti(key string, count int) int {
if a.MaxKeys > 0 {
// If a key already exists, increment it. If not, but we're under the limit, store a new key
if _, found := a.currentCounts[key]; found || len(a.currentCounts) < a.MaxKeys {
a.currentCounts[key] += count
a.currentCounts[key] += float64(count)
}
} else {
a.currentCounts[key] += count
a.currentCounts[key] += float64(count)
}
if !a.haveData {
return a.GoalSampleRate
Expand Down
40 changes: 20 additions & 20 deletions avgsamplerate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
GoalSampleRate: 20,
}
tsts := []struct {
inputSampleCount map[string]int
inputSampleCount map[string]float64
expectedSavedSampleRates map[string]int
}{
{
map[string]int{
map[string]float64{
"one": 1,
"two": 1,
"three": 2,
Expand All @@ -48,7 +48,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
},
},
{
map[string]int{
map[string]float64{
"one": 1,
"two": 1,
"three": 2,
Expand All @@ -72,7 +72,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
},
},
{
map[string]int{
map[string]float64{
"one": 1,
"two": 1,
"three": 2,
Expand All @@ -88,7 +88,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
},
},
{
map[string]int{
map[string]float64{
"one": 1000,
"two": 1000,
"three": 2000,
Expand All @@ -104,7 +104,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
},
},
{
map[string]int{
map[string]float64{
"one": 6000,
"two": 6000,
"three": 6000,
Expand All @@ -120,19 +120,19 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
},
},
{
map[string]int{
map[string]float64{
"one": 12000,
},
map[string]int{
"one": 20,
},
},
{
map[string]int{},
map[string]float64{},
map[string]int{},
},
{
map[string]int{
map[string]float64{
"one": 10,
"two": 1,
"three": 1,
Expand Down Expand Up @@ -189,18 +189,18 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
func TestAvgSampleGetSampleRateStartup(t *testing.T) {
a := &AvgSampleRate{
GoalSampleRate: 10,
currentCounts: map[string]int{},
currentCounts: map[string]float64{},
}
rate := a.GetSampleRate("key")
assert.Equal(t, rate, 10)
// and the counters still get bumped
assert.Equal(t, a.currentCounts["key"], 1)
assert.Equal(t, a.currentCounts["key"], 1.0)
}

func TestAvgSampleRace(t *testing.T) {
a := &AvgSampleRate{
GoalSampleRate: 2,
currentCounts: map[string]int{},
currentCounts: map[string]float64{},
savedSampleRates: map[string]int{},
haveData: true,
}
Expand Down Expand Up @@ -234,7 +234,7 @@ func TestAvgSampleRateGetSampleRate(t *testing.T) {
a := &AvgSampleRate{
haveData: true,
}
a.currentCounts = map[string]int{
a.currentCounts = map[string]float64{
"one": 5,
"two": 8,
}
Expand All @@ -246,7 +246,7 @@ func TestAvgSampleRateGetSampleRate(t *testing.T) {
tsts := []struct {
inputKey string
expectedSampleRate int
expectedCurrentCountForKey int
expectedCurrentCountForKey float64
}{
{"one", 10, 6},
{"two", 1, 9},
Expand All @@ -267,7 +267,7 @@ func TestAvgSampleRateMaxKeys(t *testing.T) {
a := &AvgSampleRate{
MaxKeys: 3,
}
a.currentCounts = map[string]int{
a.currentCounts = map[string]float64{
"one": 1,
"two": 1,
}
Expand All @@ -276,7 +276,7 @@ func TestAvgSampleRateMaxKeys(t *testing.T) {
// with MaxKeys 3, we are under the key limit, so three should get added
a.GetSampleRate("three")
assert.Equal(t, 3, len(a.currentCounts))
assert.Equal(t, 1, a.currentCounts["three"])
assert.Equal(t, 1., a.currentCounts["three"])
// Now we're at 3 keys - four should not be added
a.GetSampleRate("four")
assert.Equal(t, 3, len(a.currentCounts))
Expand All @@ -285,7 +285,7 @@ func TestAvgSampleRateMaxKeys(t *testing.T) {
// We should still support bumping counts for existing keys
a.GetSampleRate("one")
assert.Equal(t, 3, len(a.currentCounts))
assert.Equal(t, 2, a.currentCounts["one"])
assert.Equal(t, 2., a.currentCounts["one"])
}

func TestAvgSampleRateSaveState(t *testing.T) {
Expand Down Expand Up @@ -334,7 +334,7 @@ func TestAvgSampleRateHitsTargetRate(t *testing.T) {
toleranceLower := float64(rate) - tolerance

for _, keyCount := range testKeyCount {
sampler := &AvgSampleRate{GoalSampleRate: rate, currentCounts: make(map[string]int)}
sampler := &AvgSampleRate{GoalSampleRate: rate, currentCounts: make(map[string]float64)}

// build a consistent set of keys to use
keys := make([]string, keyCount)
Expand All @@ -347,7 +347,7 @@ func TestAvgSampleRateHitsTargetRate(t *testing.T) {
// so that count ranges are reasonable (i.e. they don't go from 1 to 10000 back to 100)
base := math.Pow10(i%3 + 1)
count := float64(((i%10)+1))*base + float64(mrand.Intn(int(base)))
sampler.currentCounts[key] = int(count)
sampler.currentCounts[key] = count
}

// build an initial set of sample rates so we don't just return the target rate
Expand Down Expand Up @@ -390,7 +390,7 @@ func TestAvgSampleUpdateMapsSparseCounts(t *testing.T) {
a.savedSampleRates = make(map[string]int)

for i := 0; i <= 100; i++ {
input := make(map[string]int)
input := make(map[string]float64)
// simulate steady stream of input from one key
input["largest_count"] = 20
// sporadic keys with single counts that come and go with each interval
Expand Down
Loading

0 comments on commit 84028f7

Please sign in to comment.