add support for SIGUSR1 to trigger flush (#7366)
This commit is contained in:
parent
6c72c645a2
commit
819481b195
|
@ -4,6 +4,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -516,16 +517,7 @@ func (a *Agent) runOutputs(
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(output *models.RunningOutput) {
|
go func(output *models.RunningOutput) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
a.flushLoop(ctx, startTime, output, interval, jitter)
|
||||||
if a.Config.Agent.RoundInterval {
|
|
||||||
err := internal.SleepContext(
|
|
||||||
ctx, internal.AlignDuration(startTime, interval))
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
a.flush(ctx, output, interval, jitter)
|
|
||||||
}(output)
|
}(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -546,25 +538,39 @@ func (a *Agent) runOutputs(
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// flush runs an output's flush function periodically until the context is
|
// flushLoop runs an output's flush function periodically until the context is
|
||||||
// done.
|
// done.
|
||||||
func (a *Agent) flush(
|
func (a *Agent) flushLoop(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
|
startTime time.Time,
|
||||||
output *models.RunningOutput,
|
output *models.RunningOutput,
|
||||||
interval time.Duration,
|
interval time.Duration,
|
||||||
jitter time.Duration,
|
jitter time.Duration,
|
||||||
) {
|
) {
|
||||||
// since we are watching two channels we need a ticker with the jitter
|
|
||||||
// integrated.
|
|
||||||
ticker := NewTicker(interval, jitter)
|
|
||||||
defer ticker.Stop()
|
|
||||||
|
|
||||||
logError := func(err error) {
|
logError := func(err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("E! [agent] Error writing to %s: %v", output.LogName(), err)
|
log.Printf("E! [agent] Error writing to %s: %v", output.LogName(), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// watch for flush requests
|
||||||
|
flushRequested := make(chan os.Signal, 1)
|
||||||
|
watchForFlushSignal(flushRequested)
|
||||||
|
|
||||||
|
// align to round interval
|
||||||
|
if a.Config.Agent.RoundInterval {
|
||||||
|
err := internal.SleepContext(
|
||||||
|
ctx, internal.AlignDuration(startTime, interval))
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// since we are watching two channels we need a ticker with the jitter
|
||||||
|
// integrated.
|
||||||
|
ticker := NewTicker(interval, jitter)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
// Favor shutdown over other methods.
|
// Favor shutdown over other methods.
|
||||||
select {
|
select {
|
||||||
|
@ -575,8 +581,13 @@ func (a *Agent) flush(
|
||||||
}
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
logError(a.flushOnce(output, interval, output.Write))
|
||||||
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
logError(a.flushOnce(output, interval, output.Write))
|
logError(a.flushOnce(output, interval, output.Write))
|
||||||
|
case <-flushRequested:
|
||||||
|
logError(a.flushOnce(output, interval, output.Write))
|
||||||
case <-output.BatchReady:
|
case <-output.BatchReady:
|
||||||
// Favor the ticker over batch ready
|
// Favor the ticker over batch ready
|
||||||
select {
|
select {
|
||||||
|
@ -585,9 +596,6 @@ func (a *Agent) flush(
|
||||||
default:
|
default:
|
||||||
logError(a.flushOnce(output, interval, output.WriteBatch))
|
logError(a.flushOnce(output, interval, output.WriteBatch))
|
||||||
}
|
}
|
||||||
case <-ctx.Done():
|
|
||||||
logError(a.flushOnce(output, interval, output.Write))
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
const flushSignal = syscall.SIGUSR1
|
||||||
|
|
||||||
|
func watchForFlushSignal(flushRequested chan os.Signal) {
|
||||||
|
signal.Notify(flushRequested, flushSignal)
|
||||||
|
defer signal.Stop(flushRequested)
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
// +build windows
|
||||||
|
|
||||||
|
package agent
|
||||||
|
|
||||||
|
import "os"
|
||||||
|
|
||||||
|
func watchForFlushSignal(flushRequested chan os.Signal) {
|
||||||
|
// not implemented
|
||||||
|
}
|
|
@ -10,7 +10,6 @@ import (
|
||||||
_ "net/http/pprof" // Comment this line to disable pprof endpoint.
|
_ "net/http/pprof" // Comment this line to disable pprof endpoint.
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"runtime"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
@ -27,16 +26,16 @@ import (
|
||||||
"github.com/influxdata/telegraf/plugins/outputs"
|
"github.com/influxdata/telegraf/plugins/outputs"
|
||||||
_ "github.com/influxdata/telegraf/plugins/outputs/all"
|
_ "github.com/influxdata/telegraf/plugins/outputs/all"
|
||||||
_ "github.com/influxdata/telegraf/plugins/processors/all"
|
_ "github.com/influxdata/telegraf/plugins/processors/all"
|
||||||
"github.com/kardianos/service"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// If you update these, update usage.go and usage_windows.go
|
||||||
var fDebug = flag.Bool("debug", false,
|
var fDebug = flag.Bool("debug", false,
|
||||||
"turn on debug logging")
|
"turn on debug logging")
|
||||||
var pprofAddr = flag.String("pprof-addr", "",
|
var pprofAddr = flag.String("pprof-addr", "",
|
||||||
"pprof address to listen on, not activate pprof if empty")
|
"pprof address to listen on, not activate pprof if empty")
|
||||||
var fQuiet = flag.Bool("quiet", false,
|
var fQuiet = flag.Bool("quiet", false,
|
||||||
"run in quiet mode")
|
"run in quiet mode")
|
||||||
var fTest = flag.Bool("test", false, "enable test mode: gather metrics, print them out, and exit")
|
var fTest = flag.Bool("test", false, "enable test mode: gather metrics, print them out, and exit. Note: Test mode only runs inputs, not processors, aggregators, or outputs")
|
||||||
var fTestWait = flag.Int("test-wait", 0, "wait up to this many seconds for service inputs to complete in test mode")
|
var fTestWait = flag.Int("test-wait", 0, "wait up to this many seconds for service inputs to complete in test mode")
|
||||||
var fConfig = flag.String("config", "", "configuration file to load")
|
var fConfig = flag.String("config", "", "configuration file to load")
|
||||||
var fConfigDirectory = flag.String("config-directory", "",
|
var fConfigDirectory = flag.String("config-directory", "",
|
||||||
|
@ -78,7 +77,6 @@ var (
|
||||||
var stop chan struct{}
|
var stop chan struct{}
|
||||||
|
|
||||||
func reloadLoop(
|
func reloadLoop(
|
||||||
stop chan struct{},
|
|
||||||
inputFilters []string,
|
inputFilters []string,
|
||||||
outputFilters []string,
|
outputFilters []string,
|
||||||
aggregatorFilters []string,
|
aggregatorFilters []string,
|
||||||
|
@ -91,7 +89,7 @@ func reloadLoop(
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
signals := make(chan os.Signal)
|
signals := make(chan os.Signal, 1)
|
||||||
signal.Notify(signals, os.Interrupt, syscall.SIGHUP,
|
signal.Notify(signals, os.Interrupt, syscall.SIGHUP,
|
||||||
syscall.SIGTERM, syscall.SIGINT)
|
syscall.SIGTERM, syscall.SIGINT)
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -208,32 +206,6 @@ func usageExit(rc int) {
|
||||||
os.Exit(rc)
|
os.Exit(rc)
|
||||||
}
|
}
|
||||||
|
|
||||||
type program struct {
|
|
||||||
inputFilters []string
|
|
||||||
outputFilters []string
|
|
||||||
aggregatorFilters []string
|
|
||||||
processorFilters []string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *program) Start(s service.Service) error {
|
|
||||||
go p.run()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
func (p *program) run() {
|
|
||||||
stop = make(chan struct{})
|
|
||||||
reloadLoop(
|
|
||||||
stop,
|
|
||||||
p.inputFilters,
|
|
||||||
p.outputFilters,
|
|
||||||
p.aggregatorFilters,
|
|
||||||
p.processorFilters,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
func (p *program) Stop(s service.Service) error {
|
|
||||||
close(stop)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func formatFullVersion() string {
|
func formatFullVersion() string {
|
||||||
var parts = []string{"Telegraf"}
|
var parts = []string{"Telegraf"}
|
||||||
|
|
||||||
|
@ -380,80 +352,10 @@ func main() {
|
||||||
log.Println("Telegraf version already configured to: " + internal.Version())
|
log.Println("Telegraf version already configured to: " + internal.Version())
|
||||||
}
|
}
|
||||||
|
|
||||||
if runtime.GOOS == "windows" && windowsRunAsService() {
|
run(
|
||||||
programFiles := os.Getenv("ProgramFiles")
|
|
||||||
if programFiles == "" { // Should never happen
|
|
||||||
programFiles = "C:\\Program Files"
|
|
||||||
}
|
|
||||||
svcConfig := &service.Config{
|
|
||||||
Name: *fServiceName,
|
|
||||||
DisplayName: *fServiceDisplayName,
|
|
||||||
Description: "Collects data using a series of plugins and publishes it to" +
|
|
||||||
"another series of plugins.",
|
|
||||||
Arguments: []string{"--config", programFiles + "\\Telegraf\\telegraf.conf"},
|
|
||||||
}
|
|
||||||
|
|
||||||
prg := &program{
|
|
||||||
inputFilters: inputFilters,
|
|
||||||
outputFilters: outputFilters,
|
|
||||||
aggregatorFilters: aggregatorFilters,
|
|
||||||
processorFilters: processorFilters,
|
|
||||||
}
|
|
||||||
s, err := service.New(prg, svcConfig)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal("E! " + err.Error())
|
|
||||||
}
|
|
||||||
// Handle the --service flag here to prevent any issues with tooling that
|
|
||||||
// may not have an interactive session, e.g. installing from Ansible.
|
|
||||||
if *fService != "" {
|
|
||||||
if *fConfig != "" {
|
|
||||||
svcConfig.Arguments = []string{"--config", *fConfig}
|
|
||||||
}
|
|
||||||
if *fConfigDirectory != "" {
|
|
||||||
svcConfig.Arguments = append(svcConfig.Arguments, "--config-directory", *fConfigDirectory)
|
|
||||||
}
|
|
||||||
//set servicename to service cmd line, to have a custom name after relaunch as a service
|
|
||||||
svcConfig.Arguments = append(svcConfig.Arguments, "--service-name", *fServiceName)
|
|
||||||
|
|
||||||
err := service.Control(s, *fService)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal("E! " + err.Error())
|
|
||||||
}
|
|
||||||
os.Exit(0)
|
|
||||||
} else {
|
|
||||||
winlogger, err := s.Logger(nil)
|
|
||||||
if err == nil {
|
|
||||||
//When in service mode, register eventlog target andd setup default logging to eventlog
|
|
||||||
logger.RegisterEventLogger(winlogger)
|
|
||||||
logger.SetupLogging(logger.LogConfig{LogTarget: logger.LogTargetEventlog})
|
|
||||||
}
|
|
||||||
err = s.Run()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Println("E! " + err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
stop = make(chan struct{})
|
|
||||||
reloadLoop(
|
|
||||||
stop,
|
|
||||||
inputFilters,
|
inputFilters,
|
||||||
outputFilters,
|
outputFilters,
|
||||||
aggregatorFilters,
|
aggregatorFilters,
|
||||||
processorFilters,
|
processorFilters,
|
||||||
)
|
)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if Telegraf should create a Windows service.
|
|
||||||
func windowsRunAsService() bool {
|
|
||||||
if *fService != "" {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
if *fRunAsConsole {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return !service.Interactive()
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
func run(inputFilters, outputFilters, aggregatorFilters, processorFilters []string) {
|
||||||
|
stop = make(chan struct{})
|
||||||
|
reloadLoop(
|
||||||
|
inputFilters,
|
||||||
|
outputFilters,
|
||||||
|
aggregatorFilters,
|
||||||
|
processorFilters,
|
||||||
|
)
|
||||||
|
}
|
|
@ -0,0 +1,124 @@
|
||||||
|
// +build windows
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf/logger"
|
||||||
|
"github.com/kardianos/service"
|
||||||
|
)
|
||||||
|
|
||||||
|
func run(inputFilters, outputFilters, aggregatorFilters, processorFilters []string) {
|
||||||
|
if runtime.GOOS == "windows" && windowsRunAsService() {
|
||||||
|
runAsWindowsService(
|
||||||
|
inputFilters,
|
||||||
|
outputFilters,
|
||||||
|
aggregatorFilters,
|
||||||
|
processorFilters,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
stop = make(chan struct{})
|
||||||
|
reloadLoop(
|
||||||
|
inputFilters,
|
||||||
|
outputFilters,
|
||||||
|
aggregatorFilters,
|
||||||
|
processorFilters,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type program struct {
|
||||||
|
inputFilters []string
|
||||||
|
outputFilters []string
|
||||||
|
aggregatorFilters []string
|
||||||
|
processorFilters []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *program) Start(s service.Service) error {
|
||||||
|
go p.run()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
func (p *program) run() {
|
||||||
|
stop = make(chan struct{})
|
||||||
|
reloadLoop(
|
||||||
|
p.inputFilters,
|
||||||
|
p.outputFilters,
|
||||||
|
p.aggregatorFilters,
|
||||||
|
p.processorFilters,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
func (p *program) Stop(s service.Service) error {
|
||||||
|
close(stop)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runAsWindowsService(inputFilters, outputFilters, aggregatorFilters, processorFilters []string) {
|
||||||
|
programFiles := os.Getenv("ProgramFiles")
|
||||||
|
if programFiles == "" { // Should never happen
|
||||||
|
programFiles = "C:\\Program Files"
|
||||||
|
}
|
||||||
|
svcConfig := &service.Config{
|
||||||
|
Name: *fServiceName,
|
||||||
|
DisplayName: *fServiceDisplayName,
|
||||||
|
Description: "Collects data using a series of plugins and publishes it to" +
|
||||||
|
"another series of plugins.",
|
||||||
|
Arguments: []string{"--config", programFiles + "\\Telegraf\\telegraf.conf"},
|
||||||
|
}
|
||||||
|
|
||||||
|
prg := &program{
|
||||||
|
inputFilters: inputFilters,
|
||||||
|
outputFilters: outputFilters,
|
||||||
|
aggregatorFilters: aggregatorFilters,
|
||||||
|
processorFilters: processorFilters,
|
||||||
|
}
|
||||||
|
s, err := service.New(prg, svcConfig)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("E! " + err.Error())
|
||||||
|
}
|
||||||
|
// Handle the --service flag here to prevent any issues with tooling that
|
||||||
|
// may not have an interactive session, e.g. installing from Ansible.
|
||||||
|
if *fService != "" {
|
||||||
|
if *fConfig != "" {
|
||||||
|
svcConfig.Arguments = []string{"--config", *fConfig}
|
||||||
|
}
|
||||||
|
if *fConfigDirectory != "" {
|
||||||
|
svcConfig.Arguments = append(svcConfig.Arguments, "--config-directory", *fConfigDirectory)
|
||||||
|
}
|
||||||
|
//set servicename to service cmd line, to have a custom name after relaunch as a service
|
||||||
|
svcConfig.Arguments = append(svcConfig.Arguments, "--service-name", *fServiceName)
|
||||||
|
|
||||||
|
err := service.Control(s, *fService)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("E! " + err.Error())
|
||||||
|
}
|
||||||
|
os.Exit(0)
|
||||||
|
} else {
|
||||||
|
winlogger, err := s.Logger(nil)
|
||||||
|
if err == nil {
|
||||||
|
//When in service mode, register eventlog target andd setup default logging to eventlog
|
||||||
|
logger.RegisterEventLogger(winlogger)
|
||||||
|
logger.SetupLogging(logger.LogConfig{LogTarget: logger.LogTargetEventlog})
|
||||||
|
}
|
||||||
|
err = s.Run()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Println("E! " + err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if Telegraf should create a Windows service.
|
||||||
|
func windowsRunAsService() bool {
|
||||||
|
if *fService != "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if *fRunAsConsole {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return !service.Interactive()
|
||||||
|
}
|
|
@ -94,6 +94,19 @@ You should also add the following to your `SampleConfig()`:
|
||||||
data_format = "influx"
|
data_format = "influx"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Flushing Metrics to Outputs
|
||||||
|
|
||||||
|
Metrics are flushed to outputs when any of the following events happen:
|
||||||
|
- `flush_interval + rand(flush_jitter)` has elapsed since start or the last flush interval
|
||||||
|
- At least `metric_batch_size` count of metrics are waiting in the buffer
|
||||||
|
- The telegraf process has received a SIGUSR1 signal
|
||||||
|
|
||||||
|
Note that if the flush takes longer than the `agent.interval` to write the metrics
|
||||||
|
to the output, you'll see a message saying the output `did not complete within its
|
||||||
|
flush interval`. This may mean your output is not keeping up with the flow of metrics,
|
||||||
|
and you may want to look into enabling compression, reducing the size of your metrics,
|
||||||
|
or investigate other reasons why the writes might be taking longer than expected.
|
||||||
|
|
||||||
[file]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/file
|
[file]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/file
|
||||||
[output data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
[output data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
||||||
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
||||||
|
|
|
@ -32,8 +32,9 @@ The commands & flags are:
|
||||||
Valid values are 'agent', 'global_tags', 'outputs',
|
Valid values are 'agent', 'global_tags', 'outputs',
|
||||||
'processors', 'aggregators' and 'inputs'
|
'processors', 'aggregators' and 'inputs'
|
||||||
--sample-config print out full sample configuration
|
--sample-config print out full sample configuration
|
||||||
--test gather metrics, print them out, and exit;
|
--test enable test mode: gather metrics, print them out,
|
||||||
processors, aggregators, and outputs are not run
|
and exit. Note: Test mode only runs inputs, not
|
||||||
|
processors, aggregators, or outputs
|
||||||
--test-wait wait up to this many seconds for service
|
--test-wait wait up to this many seconds for service
|
||||||
inputs to complete in test mode
|
inputs to complete in test mode
|
||||||
--usage <plugin> print usage for a plugin, ie, 'telegraf --usage mysql'
|
--usage <plugin> print usage for a plugin, ie, 'telegraf --usage mysql'
|
||||||
|
|
|
@ -29,8 +29,9 @@ The commands & flags are:
|
||||||
--section-filter filter config sections to output, separator is :
|
--section-filter filter config sections to output, separator is :
|
||||||
Valid values are 'agent', 'global_tags', 'outputs',
|
Valid values are 'agent', 'global_tags', 'outputs',
|
||||||
'processors', 'aggregators' and 'inputs'
|
'processors', 'aggregators' and 'inputs'
|
||||||
--test gather metrics, print them out, and exit;
|
--test enable test mode: gather metrics, print them out,
|
||||||
processors, aggregators, and outputs are not run
|
and exit. Note: Test mode only runs inputs, not
|
||||||
|
processors, aggregators, or outputs
|
||||||
--test-wait wait up to this many seconds for service
|
--test-wait wait up to this many seconds for service
|
||||||
inputs to complete in test mode
|
inputs to complete in test mode
|
||||||
--usage <plugin> print usage for a plugin, ie, 'telegraf --usage mysql'
|
--usage <plugin> print usage for a plugin, ie, 'telegraf --usage mysql'
|
||||||
|
|
Loading…
Reference in New Issue