2015-10-07 22:11:52 +00:00
|
|
|
package statsd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"math"
|
|
|
|
"math/rand"
|
|
|
|
"sort"
|
|
|
|
)
|
|
|
|
|
|
|
|
const defaultPercentileLimit = 1000
|
|
|
|
|
|
|
|
// RunningStats calculates a running mean, variance, standard deviation,
|
|
|
|
// lower bound, upper bound, count, and can calculate estimated percentiles.
|
|
|
|
// It is based on the incremental algorithm described here:
|
|
|
|
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
|
|
|
type RunningStats struct {
|
|
|
|
k float64
|
|
|
|
n int64
|
|
|
|
ex float64
|
|
|
|
ex2 float64
|
|
|
|
|
|
|
|
// Array used to calculate estimated percentiles
|
|
|
|
// We will store a maximum of PercLimit values, at which point we will start
|
|
|
|
// randomly replacing old values, hence it is an estimated percentile.
|
|
|
|
perc []float64
|
|
|
|
PercLimit int
|
|
|
|
|
2017-09-14 22:21:54 +00:00
|
|
|
sum float64
|
|
|
|
|
2015-10-07 22:11:52 +00:00
|
|
|
lower float64
|
2017-09-14 22:21:54 +00:00
|
|
|
upper float64
|
2015-10-07 22:11:52 +00:00
|
|
|
|
|
|
|
// cache if we have sorted the list so that we never re-sort a sorted list,
|
|
|
|
// which can have very bad performance.
|
|
|
|
sorted bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rs *RunningStats) AddValue(v float64) {
|
|
|
|
// Whenever a value is added, the list is no longer sorted.
|
|
|
|
rs.sorted = false
|
|
|
|
|
|
|
|
if rs.n == 0 {
|
|
|
|
rs.k = v
|
|
|
|
rs.upper = v
|
|
|
|
rs.lower = v
|
|
|
|
if rs.PercLimit == 0 {
|
|
|
|
rs.PercLimit = defaultPercentileLimit
|
|
|
|
}
|
|
|
|
rs.perc = make([]float64, 0, rs.PercLimit)
|
|
|
|
}
|
|
|
|
|
|
|
|
// These are used for the running mean and variance
|
2019-05-14 23:20:35 +00:00
|
|
|
rs.n++
|
2015-10-07 22:11:52 +00:00
|
|
|
rs.ex += v - rs.k
|
|
|
|
rs.ex2 += (v - rs.k) * (v - rs.k)
|
|
|
|
|
2017-09-14 22:21:54 +00:00
|
|
|
// add to running sum
|
|
|
|
rs.sum += v
|
|
|
|
|
2015-10-07 22:11:52 +00:00
|
|
|
// track upper and lower bounds
|
|
|
|
if v > rs.upper {
|
|
|
|
rs.upper = v
|
|
|
|
} else if v < rs.lower {
|
|
|
|
rs.lower = v
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(rs.perc) < rs.PercLimit {
|
|
|
|
rs.perc = append(rs.perc, v)
|
|
|
|
} else {
|
|
|
|
// Reached limit, choose random index to overwrite in the percentile array
|
|
|
|
rs.perc[rand.Intn(len(rs.perc))] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rs *RunningStats) Mean() float64 {
|
|
|
|
return rs.k + rs.ex/float64(rs.n)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rs *RunningStats) Variance() float64 {
|
|
|
|
return (rs.ex2 - (rs.ex*rs.ex)/float64(rs.n)) / float64(rs.n)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rs *RunningStats) Stddev() float64 {
|
|
|
|
return math.Sqrt(rs.Variance())
|
|
|
|
}
|
|
|
|
|
2017-09-14 22:21:54 +00:00
|
|
|
func (rs *RunningStats) Sum() float64 {
|
|
|
|
return rs.sum
|
|
|
|
}
|
|
|
|
|
2015-10-07 22:11:52 +00:00
|
|
|
func (rs *RunningStats) Upper() float64 {
|
|
|
|
return rs.upper
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rs *RunningStats) Lower() float64 {
|
|
|
|
return rs.lower
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rs *RunningStats) Count() int64 {
|
|
|
|
return rs.n
|
|
|
|
}
|
|
|
|
|
2019-07-09 23:50:20 +00:00
|
|
|
func (rs *RunningStats) Percentile(n float64) float64 {
|
2015-10-07 22:11:52 +00:00
|
|
|
if n > 100 {
|
|
|
|
n = 100
|
|
|
|
}
|
|
|
|
|
|
|
|
if !rs.sorted {
|
|
|
|
sort.Float64s(rs.perc)
|
|
|
|
rs.sorted = true
|
|
|
|
}
|
|
|
|
|
2019-07-09 23:50:20 +00:00
|
|
|
i := float64(len(rs.perc)) * n / float64(100)
|
2017-09-14 22:27:42 +00:00
|
|
|
return rs.perc[clamp(i, 0, len(rs.perc)-1)]
|
|
|
|
}
|
|
|
|
|
2019-07-09 23:50:20 +00:00
|
|
|
func clamp(i float64, min int, max int) int {
|
|
|
|
if i < float64(min) {
|
2017-09-14 22:27:42 +00:00
|
|
|
return min
|
|
|
|
}
|
2019-07-09 23:50:20 +00:00
|
|
|
if i > float64(max) {
|
2017-09-14 22:27:42 +00:00
|
|
|
return max
|
2015-10-07 22:11:52 +00:00
|
|
|
}
|
2019-07-09 23:50:20 +00:00
|
|
|
return int(i)
|
2015-10-07 22:11:52 +00:00
|
|
|
}
|