Add an exec daemon plugin (#4424)

This commit is contained in:
Jan Graichen 2020-02-28 19:46:03 +01:00 committed by GitHub
parent 0103691eb6
commit a20e6953d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 430 additions and 0 deletions

View File

@ -185,6 +185,7 @@ For documentation on the latest development code see the [documentation index][d
* [elasticsearch](./plugins/inputs/elasticsearch) * [elasticsearch](./plugins/inputs/elasticsearch)
* [ethtool](./plugins/inputs/ethtool) * [ethtool](./plugins/inputs/ethtool)
* [exec](./plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios) * [exec](./plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios)
* [execd](./plugins/inputs/execd)
* [fail2ban](./plugins/inputs/fail2ban) * [fail2ban](./plugins/inputs/fail2ban)
* [fibaro](./plugins/inputs/fibaro) * [fibaro](./plugins/inputs/fibaro)
* [file](./plugins/inputs/file) * [file](./plugins/inputs/file)

View File

@ -40,6 +40,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch" _ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch"
_ "github.com/influxdata/telegraf/plugins/inputs/ethtool" _ "github.com/influxdata/telegraf/plugins/inputs/ethtool"
_ "github.com/influxdata/telegraf/plugins/inputs/exec" _ "github.com/influxdata/telegraf/plugins/inputs/exec"
_ "github.com/influxdata/telegraf/plugins/inputs/execd"
_ "github.com/influxdata/telegraf/plugins/inputs/fail2ban" _ "github.com/influxdata/telegraf/plugins/inputs/fail2ban"
_ "github.com/influxdata/telegraf/plugins/inputs/fibaro" _ "github.com/influxdata/telegraf/plugins/inputs/fibaro"
_ "github.com/influxdata/telegraf/plugins/inputs/file" _ "github.com/influxdata/telegraf/plugins/inputs/file"

View File

@ -0,0 +1,111 @@
# Execd Input Plugin
The `execd` plugin runs an external program as a daemon. The programs must output metrics in any one of the accepted [Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md) on its standard output.
The `signal` can be configured to send a signal the running daemon on each collection interval.
Program output on standard error is mirrored to the telegraf log.
### Configuration:
```toml
## Program to run as daemon
command = ["telegraf-smartctl", "-d", "/dev/sda"]
## Define how the process is signaled on each collection interval.
## Valid values are:
## "none" : Do not signal anything.
## The process must output metrics by itself.
## "STDIN" : Send a newline on STDIN.
## "SIGHUP" : Send a HUP signal. Not available on Windows.
## "SIGUSR1" : Send a USR1 signal. Not available on Windows.
## "SIGUSR2" : Send a USR2 signal. Not available on Windows.
signal = "none"
## Delay before the process is restarted after an unexpected termination
restart_delay = "10s"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```
### Example
##### Daemon written in bash using STDIN signaling
```bash
#!/bin/bash
counter=0
while IFS= read -r LINE; do
echo "counter_bash count=${counter}"
let counter=counter+1
done
```
```toml
[[inputs.execd]]
command = ["plugins/inputs/execd/examples/count.sh"]
signal = "STDIN"
```
##### Go daemon using SIGHUP
```go
package main
import (
"fmt"
"os"
"os/signal"
"syscall"
)
func main() {
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP)
counter := 0
for {
<-c
fmt.Printf("counter_go count=%d\n", counter)
counter++
}
}
```
```toml
[[inputs.execd]]
command = ["plugins/inputs/execd/examples/count.go.exe"]
signal = "SIGHUP"
```
##### Ruby daemon running standalone
```ruby
#!/usr/bin/env ruby
counter = 0
loop do
puts "counter_ruby count=#{counter}"
STDOUT.flush
counter += 1
sleep 1
end
```
```toml
[[inputs.execd]]
command = ["plugins/inputs/execd/examples/count.rb"]
signal = "none"
```

View File

@ -0,0 +1,24 @@
package main
// Example using HUP signaling
import (
"fmt"
"os"
"os/signal"
"syscall"
)
func main() {
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP)
counter := 0
for {
<-c
fmt.Printf("counter_go count=%d\n", counter)
counter++
}
}

View File

@ -0,0 +1,13 @@
#!/usr/bin/env ruby
## Example in Ruby not using any signaling
counter = 0
loop do
puts "counter_ruby count=#{counter}"
STDOUT.flush
counter += 1
sleep 1
end

View File

@ -0,0 +1,12 @@
#!/bin/bash
## Example in bash using STDIN signaling
counter=0
while read; do
echo "counter_bash count=${counter}"
let counter=counter+1
done
(>&2 echo "terminate")

View File

@ -0,0 +1,209 @@
package execd
import (
"bufio"
"context"
"fmt"
"io"
"log"
"os/exec"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
)
const sampleConfig = `
## Program to run as daemon
command = ["telegraf-smartctl", "-d", "/dev/sda"]
## Define how the process is signaled on each collection interval.
## Valid values are:
## "none" : Do not signal anything.
## The process must output metrics by itself.
## "STDIN" : Send a newline on STDIN.
## "SIGHUP" : Send a HUP signal. Not available on Windows.
signal = "none"
## Delay before the process is restarted after an unexpected termination
restart_delay = "10s"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
`
type Execd struct {
Command []string
Signal string
RestartDelay internal.Duration
acc telegraf.Accumulator
cmd *exec.Cmd
parser parsers.Parser
stdin io.WriteCloser
cancel context.CancelFunc
wg sync.WaitGroup
}
func (e *Execd) SampleConfig() string {
return sampleConfig
}
func (e *Execd) Description() string {
return "Run executable as long-running input plugin"
}
func (e *Execd) SetParser(parser parsers.Parser) {
e.parser = parser
}
func (e *Execd) Start(acc telegraf.Accumulator) error {
e.acc = acc
if len(e.Command) == 0 {
return fmt.Errorf("E! [inputs.execd] FATAL no command specified")
}
e.wg.Add(1)
var ctx context.Context
ctx, e.cancel = context.WithCancel(context.Background())
go func() {
e.cmdLoop(ctx)
e.wg.Done()
}()
return nil
}
func (e *Execd) Stop() {
e.cancel()
e.wg.Wait()
}
func (e *Execd) cmdLoop(ctx context.Context) {
for {
// Use a buffered channel to ensure goroutine below can exit
// if `ctx.Done` is selected and nothing reads on `done` anymore
done := make(chan error, 1)
go func() {
done <- e.cmdRun()
}()
select {
case <-ctx.Done():
// Immediately exit process but with a graceful shutdown
// period before killing
internal.WaitTimeout(e.cmd, 0)
return
case err := <-done:
log.Printf("E! [inputs.execd] Process %s terminated: %s", e.Command, err)
}
log.Printf("E! [inputs.execd] Restarting in %s...", e.RestartDelay.Duration)
select {
case <-ctx.Done():
return
case <-time.After(e.RestartDelay.Duration):
// Continue the loop and restart the process
}
}
}
func (e *Execd) cmdRun() error {
var wg sync.WaitGroup
if len(e.Command) > 1 {
e.cmd = exec.Command(e.Command[0], e.Command[1:]...)
} else {
e.cmd = exec.Command(e.Command[0])
}
stdin, err := e.cmd.StdinPipe()
if err != nil {
return fmt.Errorf("E! [inputs.execd] Error opening stdin pipe: %s", err)
}
e.stdin = stdin
stdout, err := e.cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("E! [inputs.execd] Error opening stdout pipe: %s", err)
}
stderr, err := e.cmd.StderrPipe()
if err != nil {
return fmt.Errorf("E! [inputs.execd] Error opening stderr pipe: %s", err)
}
log.Printf("D! [inputs.execd] Starting process: %s", e.Command)
err = e.cmd.Start()
if err != nil {
return fmt.Errorf("E! [inputs.execd] Error starting process: %s", err)
}
wg.Add(2)
go func() {
e.cmdReadOut(stdout)
wg.Done()
}()
go func() {
e.cmdReadErr(stderr)
wg.Done()
}()
wg.Wait()
return e.cmd.Wait()
}
func (e *Execd) cmdReadOut(out io.Reader) {
scanner := bufio.NewScanner(out)
for scanner.Scan() {
metrics, err := e.parser.Parse(scanner.Bytes())
if err != nil {
e.acc.AddError(fmt.Errorf("E! [inputs.execd] Parse error: %s", err))
}
for _, metric := range metrics {
e.acc.AddMetric(metric)
}
}
if err := scanner.Err(); err != nil {
e.acc.AddError(fmt.Errorf("E! [inputs.execd] Error reading stdout: %s", err))
}
}
func (e *Execd) cmdReadErr(out io.Reader) {
scanner := bufio.NewScanner(out)
for scanner.Scan() {
log.Printf("E! [inputs.execd] stderr: %q", scanner.Text())
}
if err := scanner.Err(); err != nil {
e.acc.AddError(fmt.Errorf("E! [inputs.execd] Error reading stderr: %s", err))
}
}
func init() {
inputs.Add("execd", func() telegraf.Input {
return &Execd{
Signal: "none",
RestartDelay: internal.Duration{Duration: 10 * time.Second},
}
})
}

View File

@ -0,0 +1,33 @@
// +build !windows
package execd
import (
"fmt"
"io"
"syscall"
"github.com/influxdata/telegraf"
)
func (e *Execd) Gather(acc telegraf.Accumulator) error {
if e.cmd == nil || e.cmd.Process == nil {
return nil
}
switch e.Signal {
case "SIGHUP":
e.cmd.Process.Signal(syscall.SIGHUP)
case "SIGUSR1":
e.cmd.Process.Signal(syscall.SIGUSR1)
case "SIGUSR2":
e.cmd.Process.Signal(syscall.SIGUSR2)
case "STDIN":
io.WriteString(e.stdin, "\n")
case "none":
default:
return fmt.Errorf("invalid signal: %s", e.Signal)
}
return nil
}

View File

@ -0,0 +1,26 @@
// +build windows
package execd
import (
"fmt"
"io"
"github.com/influxdata/telegraf"
)
func (e *Execd) Gather(acc telegraf.Accumulator) error {
if e.cmd == nil || e.cmd.Process == nil {
return nil
}
switch e.Signal {
case "STDIN":
io.WriteString(e.stdin, "\n")
case "none":
default:
return fmt.Errorf("invalid signal: %s", e.Signal)
}
return nil
}