Pull out common calculation into a function

honeycombio · Mar 19, 2023 · 84028f7 · 84028f7
1 parent cb1cc4b
commit 84028f7
Show file tree

Hide file tree

Showing 7 changed files with 118 additions and 201 deletions.
diff --git a/avgsamplerate.go b/avgsamplerate.go
@@ -4,7 +4,6 @@ import (
  "encoding/json"
  "errors"
  "math"
- "sort"
  "sync"
  "time"
 )
@@ -34,7 +33,7 @@ type AvgSampleRate struct {
  MaxKeys int
 
  savedSampleRates map[string]int
- currentCounts map[string]int
+ currentCounts map[string]float64
 
  // haveData indicates that we have gotten a sample of traffic. Before we've
  // gotten any samples of traffic, we should we should use the default goal
@@ -62,7 +61,7 @@ func (a *AvgSampleRate) Start() error {
  if a.savedSampleRates == nil {
  a.savedSampleRates = make(map[string]int)
  }
- a.currentCounts = make(map[string]int)
+ a.currentCounts = make(map[string]float64)
  a.done = make(chan struct{})
 
  // spin up calculator
@@ -92,7 +91,7 @@ func (a *AvgSampleRate) updateMaps() {
  // make a local copy of the sample counters for calculation
  a.lock.Lock()
  tmpCounts := a.currentCounts
- a.currentCounts = make(map[string]int)
+ a.currentCounts = make(map[string]float64)
  a.lock.Unlock()
  // short circuit if no traffic
  numKeys := len(tmpCounts)
@@ -106,64 +105,20 @@ func (a *AvgSampleRate) updateMaps() {
 
  // Goal events to send this interval is the total count of received events
  // divided by the desired average sample rate
- var sumEvents int
+ var sumEvents float64
  for _, count := range tmpCounts {
  sumEvents += count
  }
- goalCount := float64(sumEvents) / float64(a.GoalSampleRate)
+ goalCount := sumEvents / float64(a.GoalSampleRate)
  // goalRatio is the goalCount divided by the sum of all the log values - it
  // determines what percentage of the total event space belongs to each key
  var logSum float64
  for _, count := range tmpCounts {
- logSum += math.Log10(float64(count))
+ logSum += math.Log10(count)
  }
  goalRatio := goalCount / logSum
 
- // must go through the keys in a fixed order to prevent rounding from changing
- // results
- keys := make([]string, len(tmpCounts))
- var i int
- for k := range tmpCounts {
- keys[i] = k
- i++
- }
- sort.Strings(keys)
-
- // goal number of events per key is goalRatio * key count, but never less than
- // one. If a key falls below its goal, it gets a sample rate of 1 and the
- // extra available events get passed on down the line.
- newSavedSampleRates := make(map[string]int)
- keysRemaining := len(tmpCounts)
- var extra float64
- for _, key := range keys {
- count := float64(tmpCounts[key])
- // take the max of 1 or my log10 share of the total
- goalForKey := math.Max(1, math.Log10(count)*goalRatio)
- // take this key's share of the extra and pass the rest along
- extraForKey := extra / float64(keysRemaining)
- goalForKey += extraForKey
- extra -= extraForKey
- keysRemaining--
- if count <= goalForKey {
- // there are fewer samples than the allotted number for this key. set
- // sample rate to 1 and redistribute the unused slots for future keys
- newSavedSampleRates[key] = 1
- extra += goalForKey - count
- } else {
- // there are more samples than the allotted number. Sample this key enough
- // to knock it under the limit (aka round up)
- rate := math.Ceil(count / goalForKey)
- // if counts are <= 1 we can get values for goalForKey that are +Inf
- // and subsequent division ends up with NaN. If that's the case,
- // fall back to 1
- if math.IsNaN(rate) {
- newSavedSampleRates[key] = 1
- } else {
- newSavedSampleRates[key] = int(rate)
- }
- extra += goalForKey - (count / float64(newSavedSampleRates[key]))
- }
- }
+ newSavedSampleRates := calculateSampleRates(goalRatio, tmpCounts)
  a.lock.Lock()
  defer a.lock.Unlock()
  a.savedSampleRates = newSavedSampleRates
@@ -186,10 +141,10 @@ func (a *AvgSampleRate) GetSampleRateMulti(key string, count int) int {
  if a.MaxKeys > 0 {
  // If a key already exists, increment it. If not, but we're under the limit, store a new key
  if _, found := a.currentCounts[key]; found || len(a.currentCounts) < a.MaxKeys {
- a.currentCounts[key] += count
+ a.currentCounts[key] += float64(count)
  }
  } else {
- a.currentCounts[key] += count
+ a.currentCounts[key] += float64(count)
  }
  if !a.haveData {
  return a.GoalSampleRate

diff --git a/avgsamplerate_test.go b/avgsamplerate_test.go
@@ -18,11 +18,11 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
  GoalSampleRate: 20,
  }
  tsts := []struct {
- inputSampleCount map[string]int
+ inputSampleCount map[string]float64
  expectedSavedSampleRates map[string]int
  }{
  {
- map[string]int{
+ map[string]float64{
  "one": 1,
  "two": 1,
  "three": 2,
@@ -48,7 +48,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
  },
  },
  {
- map[string]int{
+ map[string]float64{
  "one": 1,
  "two": 1,
  "three": 2,
@@ -72,7 +72,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
  },
  },
  {
- map[string]int{
+ map[string]float64{
  "one": 1,
  "two": 1,
  "three": 2,
@@ -88,7 +88,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
  },
  },
  {
- map[string]int{
+ map[string]float64{
  "one": 1000,
  "two": 1000,
  "three": 2000,
@@ -104,7 +104,7 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
  },
  },
  {
- map[string]int{
+ map[string]float64{
  "one": 6000,
  "two": 6000,
  "three": 6000,
@@ -120,19 +120,19 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
  },
  },
  {
- map[string]int{
+ map[string]float64{
  "one": 12000,
  },
  map[string]int{
  "one": 20,
  },
  },
  {
- map[string]int{},
+ map[string]float64{},
  map[string]int{},
  },
  {
- map[string]int{
+ map[string]float64{
  "one": 10,
  "two": 1,
  "three": 1,
@@ -189,18 +189,18 @@ func TestAvgSampleUpdateMaps(t *testing.T) {
 func TestAvgSampleGetSampleRateStartup(t *testing.T) {
  a := &AvgSampleRate{
  GoalSampleRate: 10,
- currentCounts: map[string]int{},
+ currentCounts: map[string]float64{},
  }
  rate := a.GetSampleRate("key")
  assert.Equal(t, rate, 10)
  // and the counters still get bumped
- assert.Equal(t, a.currentCounts["key"], 1)
+ assert.Equal(t, a.currentCounts["key"], 1.0)
 }
 
 func TestAvgSampleRace(t *testing.T) {
  a := &AvgSampleRate{
  GoalSampleRate: 2,
- currentCounts: map[string]int{},
+ currentCounts: map[string]float64{},
  savedSampleRates: map[string]int{},
  haveData: true,
  }
@@ -234,7 +234,7 @@ func TestAvgSampleRateGetSampleRate(t *testing.T) {
  a := &AvgSampleRate{
  haveData: true,
  }
- a.currentCounts = map[string]int{
+ a.currentCounts = map[string]float64{
  "one": 5,
  "two": 8,
  }
@@ -246,7 +246,7 @@ func TestAvgSampleRateGetSampleRate(t *testing.T) {
  tsts := []struct {
  inputKey string
  expectedSampleRate int
- expectedCurrentCountForKey int
+ expectedCurrentCountForKey float64
  }{
  {"one", 10, 6},
  {"two", 1, 9},
@@ -267,7 +267,7 @@ func TestAvgSampleRateMaxKeys(t *testing.T) {
  a := &AvgSampleRate{
  MaxKeys: 3,
  }
- a.currentCounts = map[string]int{
+ a.currentCounts = map[string]float64{
  "one": 1,
  "two": 1,
  }
@@ -276,7 +276,7 @@ func TestAvgSampleRateMaxKeys(t *testing.T) {
  // with MaxKeys 3, we are under the key limit, so three should get added
  a.GetSampleRate("three")
  assert.Equal(t, 3, len(a.currentCounts))
- assert.Equal(t, 1, a.currentCounts["three"])
+ assert.Equal(t, 1., a.currentCounts["three"])
  // Now we're at 3 keys - four should not be added
  a.GetSampleRate("four")
  assert.Equal(t, 3, len(a.currentCounts))
@@ -285,7 +285,7 @@ func TestAvgSampleRateMaxKeys(t *testing.T) {
  // We should still support bumping counts for existing keys
  a.GetSampleRate("one")
  assert.Equal(t, 3, len(a.currentCounts))
- assert.Equal(t, 2, a.currentCounts["one"])
+ assert.Equal(t, 2., a.currentCounts["one"])
 }
 
 func TestAvgSampleRateSaveState(t *testing.T) {
@@ -334,7 +334,7 @@ func TestAvgSampleRateHitsTargetRate(t *testing.T) {
  toleranceLower := float64(rate) - tolerance
 
  for _, keyCount := range testKeyCount {
- sampler := &AvgSampleRate{GoalSampleRate: rate, currentCounts: make(map[string]int)}
+ sampler := &AvgSampleRate{GoalSampleRate: rate, currentCounts: make(map[string]float64)}
 
  // build a consistent set of keys to use
  keys := make([]string, keyCount)
@@ -347,7 +347,7 @@ func TestAvgSampleRateHitsTargetRate(t *testing.T) {
  // so that count ranges are reasonable (i.e. they don't go from 1 to 10000 back to 100)
  base := math.Pow10(i%3 + 1)
  count := float64(((i%10)+1))*base + float64(mrand.Intn(int(base)))
- sampler.currentCounts[key] = int(count)
+ sampler.currentCounts[key] = count
  }
 
  // build an initial set of sample rates so we don't just return the target rate
@@ -390,7 +390,7 @@ func TestAvgSampleUpdateMapsSparseCounts(t *testing.T) {
  a.savedSampleRates = make(map[string]int)
 
  for i := 0; i <= 100; i++ {
- input := make(map[string]int)
+ input := make(map[string]float64)
  // simulate steady stream of input from one key
  input["largest_count"] = 20
  // sporadic keys with single counts that come and go with each interval