Refactor collection_jitter and flush_jitter
use a common function between collection_jitter and flush_jitter. which creates the same behavior between the two options. going forward, both jitters will be random sleeps that get re-evaluated at runtime for every interval (previously only collection_jitter did this) also fixes behavior so that both jitters will exit in the event of a process exit. closes #1296
This commit is contained in:
parent
e809c4e445
commit
892abec025
|
@ -2,6 +2,10 @@
|
|||
|
||||
### Release Notes
|
||||
|
||||
- `flush_jitter` behavior has been changed. The random jitter will now be
|
||||
evaluated at every flush interval, rather than once at startup. This makes it
|
||||
consistent with the behavior of `collection_jitter`.
|
||||
|
||||
- All AWS plugins now utilize a standard mechanism for evaluating credentials.
|
||||
This allows all AWS plugins to support environment variables, shared credential
|
||||
files & profiles, and role assumptions. See the specific plugin README for
|
||||
|
@ -31,6 +35,7 @@ time before a new metric is included by the plugin.
|
|||
- [#1278](https://github.com/influxdata/telegraf/pull/1278): RabbitMQ input: made url parameter optional by using DefaultURL (http://localhost:15672) if not specified
|
||||
- [#1197](https://github.com/influxdata/telegraf/pull/1197): Limit AWS GetMetricStatistics requests to 10 per second.
|
||||
- [#1278](https://github.com/influxdata/telegraf/pull/1278) & [#1288](https://github.com/influxdata/telegraf/pull/1288) & [#1295](https://github.com/influxdata/telegraf/pull/1295): RabbitMQ/Apache/InfluxDB inputs: made url(s) parameter optional by using reasonable input defaults if not specified
|
||||
- [#1296](https://github.com/influxdata/telegraf/issues/1296): Refactor of flush_jitter argument.
|
||||
|
||||
### Bugfixes
|
||||
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
cryptorand "crypto/rand"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/big"
|
||||
"math/rand"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/internal/config"
|
||||
"github.com/influxdata/telegraf/internal/models"
|
||||
)
|
||||
|
@ -115,27 +113,16 @@ func (a *Agent) gatherer(
|
|||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
jitter := a.Config.Agent.CollectionJitter.Duration.Nanoseconds()
|
||||
|
||||
for {
|
||||
var outerr error
|
||||
start := time.Now()
|
||||
|
||||
acc := NewAccumulator(input.Config, metricC)
|
||||
acc.SetDebug(a.Config.Agent.Debug)
|
||||
acc.setDefaultTags(a.Config.Tags)
|
||||
|
||||
if jitter != 0 {
|
||||
nanoSleep := rand.Int63n(jitter)
|
||||
d, err := time.ParseDuration(fmt.Sprintf("%dns", nanoSleep))
|
||||
if err != nil {
|
||||
log.Printf("Jittering collection interval failed for plugin %s",
|
||||
input.Name)
|
||||
} else {
|
||||
time.Sleep(d)
|
||||
}
|
||||
}
|
||||
internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)
|
||||
|
||||
start := time.Now()
|
||||
gatherWithTimeout(shutdown, input, acc, interval)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
|
@ -274,6 +261,7 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er
|
|||
a.flush()
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
|
||||
a.flush()
|
||||
case m := <-metricC:
|
||||
for _, o := range a.Config.Outputs {
|
||||
|
@ -283,35 +271,10 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er
|
|||
}
|
||||
}
|
||||
|
||||
// jitterInterval applies the the interval jitter to the flush interval using
|
||||
// crypto/rand number generator
|
||||
func jitterInterval(ininterval, injitter time.Duration) time.Duration {
|
||||
var jitter int64
|
||||
outinterval := ininterval
|
||||
if injitter.Nanoseconds() != 0 {
|
||||
maxjitter := big.NewInt(injitter.Nanoseconds())
|
||||
if j, err := cryptorand.Int(cryptorand.Reader, maxjitter); err == nil {
|
||||
jitter = j.Int64()
|
||||
}
|
||||
outinterval = time.Duration(jitter + ininterval.Nanoseconds())
|
||||
}
|
||||
|
||||
if outinterval.Nanoseconds() < time.Duration(500*time.Millisecond).Nanoseconds() {
|
||||
log.Printf("Flush interval %s too low, setting to 500ms\n", outinterval)
|
||||
outinterval = time.Duration(500 * time.Millisecond)
|
||||
}
|
||||
|
||||
return outinterval
|
||||
}
|
||||
|
||||
// Run runs the agent daemon, gathering every Interval
|
||||
func (a *Agent) Run(shutdown chan struct{}) error {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
a.Config.Agent.FlushInterval.Duration = jitterInterval(
|
||||
a.Config.Agent.FlushInterval.Duration,
|
||||
a.Config.Agent.FlushJitter.Duration)
|
||||
|
||||
log.Printf("Agent Config: Interval:%s, Debug:%#v, Quiet:%#v, Hostname:%#v, "+
|
||||
"Flush Interval:%s \n",
|
||||
a.Config.Agent.Interval.Duration, a.Config.Agent.Debug, a.Config.Agent.Quiet,
|
||||
|
|
|
@ -2,7 +2,6 @@ package agent
|
|||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf/internal/config"
|
||||
|
||||
|
@ -110,75 +109,3 @@ func TestAgent_LoadOutput(t *testing.T) {
|
|||
a, _ = NewAgent(c)
|
||||
assert.Equal(t, 3, len(a.Config.Outputs))
|
||||
}
|
||||
|
||||
func TestAgent_ZeroJitter(t *testing.T) {
|
||||
flushinterval := jitterInterval(time.Duration(10*time.Second),
|
||||
time.Duration(0*time.Second))
|
||||
|
||||
actual := flushinterval.Nanoseconds()
|
||||
exp := time.Duration(10 * time.Second).Nanoseconds()
|
||||
|
||||
if actual != exp {
|
||||
t.Errorf("Actual %v, expected %v", actual, exp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_ZeroInterval(t *testing.T) {
|
||||
min := time.Duration(500 * time.Millisecond).Nanoseconds()
|
||||
max := time.Duration(5 * time.Second).Nanoseconds()
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
flushinterval := jitterInterval(time.Duration(0*time.Second),
|
||||
time.Duration(5*time.Second))
|
||||
actual := flushinterval.Nanoseconds()
|
||||
|
||||
if actual > max {
|
||||
t.Errorf("Didn't expect interval %d to be > %d", actual, max)
|
||||
break
|
||||
}
|
||||
if actual < min {
|
||||
t.Errorf("Didn't expect interval %d to be < %d", actual, min)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_ZeroBoth(t *testing.T) {
|
||||
flushinterval := jitterInterval(time.Duration(0*time.Second),
|
||||
time.Duration(0*time.Second))
|
||||
|
||||
actual := flushinterval
|
||||
exp := time.Duration(500 * time.Millisecond)
|
||||
|
||||
if actual != exp {
|
||||
t.Errorf("Actual %v, expected %v", actual, exp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_JitterMax(t *testing.T) {
|
||||
max := time.Duration(32 * time.Second).Nanoseconds()
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
flushinterval := jitterInterval(time.Duration(30*time.Second),
|
||||
time.Duration(2*time.Second))
|
||||
actual := flushinterval.Nanoseconds()
|
||||
if actual > max {
|
||||
t.Errorf("Didn't expect interval %d to be > %d", actual, max)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_JitterMin(t *testing.T) {
|
||||
min := time.Duration(30 * time.Second).Nanoseconds()
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
flushinterval := jitterInterval(time.Duration(30*time.Second),
|
||||
time.Duration(2*time.Second))
|
||||
actual := flushinterval.Nanoseconds()
|
||||
if actual < min {
|
||||
t.Errorf("Didn't expect interval %d to be < %d", actual, min)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,6 @@ func NewConfig() *Config {
|
|||
Interval: internal.Duration{Duration: 10 * time.Second},
|
||||
RoundInterval: true,
|
||||
FlushInterval: internal.Duration{Duration: 10 * time.Second},
|
||||
FlushJitter: internal.Duration{Duration: 5 * time.Second},
|
||||
},
|
||||
|
||||
Tags: make(map[string]string),
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math/big"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
|
@ -228,3 +229,27 @@ func CompileFilter(filters []string) (glob.Glob, error) {
|
|||
}
|
||||
return out, err
|
||||
}
|
||||
|
||||
// RandomSleep will sleep for a random amount of time up to max.
|
||||
// If the shutdown channel is closed, it will return before it has finished
|
||||
// sleeping.
|
||||
func RandomSleep(max time.Duration, shutdown chan struct{}) {
|
||||
if max == 0 {
|
||||
return
|
||||
}
|
||||
maxSleep := big.NewInt(max.Nanoseconds())
|
||||
|
||||
var sleepns int64
|
||||
if j, err := rand.Int(rand.Reader, maxSleep); err == nil {
|
||||
sleepns = j.Int64()
|
||||
}
|
||||
|
||||
t := time.NewTimer(time.Nanosecond * time.Duration(sleepns))
|
||||
select {
|
||||
case <-t.C:
|
||||
return
|
||||
case <-shutdown:
|
||||
t.Stop()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
|
|
@ -137,3 +137,28 @@ func TestCompileFilter(t *testing.T) {
|
|||
assert.True(t, f.Match("mem"))
|
||||
assert.True(t, f.Match("network"))
|
||||
}
|
||||
|
||||
func TestRandomSleep(t *testing.T) {
|
||||
// test that zero max returns immediately
|
||||
s := time.Now()
|
||||
RandomSleep(time.Duration(0), make(chan struct{}))
|
||||
elapsed := time.Since(s)
|
||||
assert.True(t, elapsed < time.Millisecond)
|
||||
|
||||
// test that max sleep is respected
|
||||
s = time.Now()
|
||||
RandomSleep(time.Millisecond*50, make(chan struct{}))
|
||||
elapsed = time.Since(s)
|
||||
assert.True(t, elapsed < time.Millisecond*50)
|
||||
|
||||
// test that shutdown is respected
|
||||
s = time.Now()
|
||||
shutdown := make(chan struct{})
|
||||
go func() {
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
close(shutdown)
|
||||
}()
|
||||
RandomSleep(time.Second, shutdown)
|
||||
elapsed = time.Since(s)
|
||||
assert.True(t, elapsed < time.Millisecond*150)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue