2020-02-28 18:46:03 +00:00
|
|
|
package execd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"log"
|
|
|
|
"os/exec"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/influxdata/telegraf"
|
2020-05-04 18:09:10 +00:00
|
|
|
"github.com/influxdata/telegraf/config"
|
2020-02-28 18:46:03 +00:00
|
|
|
"github.com/influxdata/telegraf/internal"
|
|
|
|
"github.com/influxdata/telegraf/plugins/inputs"
|
|
|
|
"github.com/influxdata/telegraf/plugins/parsers"
|
2020-05-05 21:43:45 +00:00
|
|
|
"github.com/influxdata/telegraf/plugins/parsers/influx"
|
2020-02-28 18:46:03 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const sampleConfig = `
|
|
|
|
## Program to run as daemon
|
|
|
|
command = ["telegraf-smartctl", "-d", "/dev/sda"]
|
|
|
|
|
|
|
|
## Define how the process is signaled on each collection interval.
|
|
|
|
## Valid values are:
|
|
|
|
## "none" : Do not signal anything.
|
|
|
|
## The process must output metrics by itself.
|
2020-02-28 18:58:56 +00:00
|
|
|
## "STDIN" : Send a newline on STDIN.
|
|
|
|
## "SIGHUP" : Send a HUP signal. Not available on Windows.
|
|
|
|
## "SIGUSR1" : Send a USR1 signal. Not available on Windows.
|
|
|
|
## "SIGUSR2" : Send a USR2 signal. Not available on Windows.
|
2020-02-28 18:46:03 +00:00
|
|
|
signal = "none"
|
|
|
|
|
|
|
|
## Delay before the process is restarted after an unexpected termination
|
|
|
|
restart_delay = "10s"
|
|
|
|
|
|
|
|
## Data format to consume.
|
|
|
|
## Each data format has its own unique set of configuration options, read
|
|
|
|
## more about them here:
|
|
|
|
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
|
|
|
data_format = "influx"
|
|
|
|
`
|
|
|
|
|
|
|
|
type Execd struct {
|
|
|
|
Command []string
|
|
|
|
Signal string
|
2020-05-04 18:09:10 +00:00
|
|
|
RestartDelay config.Duration
|
2020-02-28 18:46:03 +00:00
|
|
|
|
|
|
|
acc telegraf.Accumulator
|
|
|
|
cmd *exec.Cmd
|
|
|
|
parser parsers.Parser
|
|
|
|
stdin io.WriteCloser
|
2020-05-04 18:09:10 +00:00
|
|
|
stdout io.ReadCloser
|
|
|
|
stderr io.ReadCloser
|
2020-02-28 18:46:03 +00:00
|
|
|
cancel context.CancelFunc
|
|
|
|
wg sync.WaitGroup
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Execd) SampleConfig() string {
|
|
|
|
return sampleConfig
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Execd) Description() string {
|
|
|
|
return "Run executable as long-running input plugin"
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Execd) SetParser(parser parsers.Parser) {
|
|
|
|
e.parser = parser
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Execd) Start(acc telegraf.Accumulator) error {
|
|
|
|
e.acc = acc
|
|
|
|
|
|
|
|
if len(e.Command) == 0 {
|
2020-05-04 18:09:10 +00:00
|
|
|
return fmt.Errorf("FATAL no command specified")
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
2020-05-05 14:14:57 +00:00
|
|
|
e.wg.Add(1) // for the main loop
|
2020-02-28 18:46:03 +00:00
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
e.cancel = cancel
|
|
|
|
|
|
|
|
if err := e.cmdStart(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-28 18:46:03 +00:00
|
|
|
|
|
|
|
go func() {
|
|
|
|
e.cmdLoop(ctx)
|
|
|
|
e.wg.Done()
|
|
|
|
}()
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Execd) Stop() {
|
|
|
|
e.cancel()
|
|
|
|
e.wg.Wait()
|
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
// cmdLoop watches an already running process, restarting it when appropriate.
|
|
|
|
func (e *Execd) cmdLoop(ctx context.Context) error {
|
2020-02-28 18:46:03 +00:00
|
|
|
for {
|
|
|
|
// Use a buffered channel to ensure goroutine below can exit
|
|
|
|
// if `ctx.Done` is selected and nothing reads on `done` anymore
|
|
|
|
done := make(chan error, 1)
|
|
|
|
go func() {
|
2020-05-04 18:09:10 +00:00
|
|
|
done <- e.cmdWait()
|
2020-02-28 18:46:03 +00:00
|
|
|
}()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2020-05-04 18:09:10 +00:00
|
|
|
if e.stdin != nil {
|
|
|
|
e.stdin.Close()
|
|
|
|
// Immediately exit process but with a graceful shutdown
|
|
|
|
// period before killing
|
|
|
|
internal.WaitTimeout(e.cmd, 200*time.Millisecond)
|
|
|
|
}
|
|
|
|
return nil
|
2020-02-28 18:46:03 +00:00
|
|
|
case err := <-done:
|
2020-05-04 18:09:10 +00:00
|
|
|
log.Printf("Process %s terminated: %s", e.Command, err)
|
|
|
|
if isQuitting(ctx) {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
log.Printf("Restarting in %s...", time.Duration(e.RestartDelay))
|
2020-02-28 18:46:03 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2020-05-04 18:09:10 +00:00
|
|
|
return nil
|
|
|
|
case <-time.After(time.Duration(e.RestartDelay)):
|
2020-02-28 18:46:03 +00:00
|
|
|
// Continue the loop and restart the process
|
2020-05-04 18:09:10 +00:00
|
|
|
if err := e.cmdStart(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
func isQuitting(ctx context.Context) bool {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return true
|
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
2020-02-28 18:46:03 +00:00
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
func (e *Execd) cmdStart() (err error) {
|
2020-02-28 18:46:03 +00:00
|
|
|
if len(e.Command) > 1 {
|
|
|
|
e.cmd = exec.Command(e.Command[0], e.Command[1:]...)
|
|
|
|
} else {
|
|
|
|
e.cmd = exec.Command(e.Command[0])
|
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
e.stdin, err = e.cmd.StdinPipe()
|
2020-02-28 18:46:03 +00:00
|
|
|
if err != nil {
|
2020-05-04 18:09:10 +00:00
|
|
|
return fmt.Errorf("Error opening stdin pipe: %s", err)
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
e.stdout, err = e.cmd.StdoutPipe()
|
2020-02-28 18:46:03 +00:00
|
|
|
if err != nil {
|
2020-05-04 18:09:10 +00:00
|
|
|
return fmt.Errorf("Error opening stdout pipe: %s", err)
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
e.stderr, err = e.cmd.StderrPipe()
|
2020-02-28 18:46:03 +00:00
|
|
|
if err != nil {
|
2020-05-04 18:09:10 +00:00
|
|
|
return fmt.Errorf("Error opening stderr pipe: %s", err)
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
log.Printf("Starting process: %s", e.Command)
|
2020-02-28 18:46:03 +00:00
|
|
|
|
|
|
|
err = e.cmd.Start()
|
|
|
|
if err != nil {
|
2020-05-04 18:09:10 +00:00
|
|
|
return fmt.Errorf("Error starting process: %s", err)
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
2020-05-04 18:09:10 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Execd) cmdWait() error {
|
|
|
|
var wg sync.WaitGroup
|
2020-02-28 18:46:03 +00:00
|
|
|
wg.Add(2)
|
|
|
|
|
|
|
|
go func() {
|
2020-05-04 18:09:10 +00:00
|
|
|
e.cmdReadOut(e.stdout)
|
2020-02-28 18:46:03 +00:00
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
|
|
|
|
go func() {
|
2020-05-04 18:09:10 +00:00
|
|
|
e.cmdReadErr(e.stderr)
|
2020-02-28 18:46:03 +00:00
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
return e.cmd.Wait()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *Execd) cmdReadOut(out io.Reader) {
|
2020-05-05 21:43:45 +00:00
|
|
|
if _, isInfluxParser := e.parser.(*influx.Parser); isInfluxParser {
|
|
|
|
// work around the lack of built-in streaming parser. :(
|
|
|
|
e.cmdReadOutStream(out)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-02-28 18:46:03 +00:00
|
|
|
scanner := bufio.NewScanner(out)
|
|
|
|
|
|
|
|
for scanner.Scan() {
|
|
|
|
metrics, err := e.parser.Parse(scanner.Bytes())
|
|
|
|
if err != nil {
|
2020-05-04 18:09:10 +00:00
|
|
|
e.acc.AddError(fmt.Errorf("Parse error: %s", err))
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, metric := range metrics {
|
|
|
|
e.acc.AddMetric(metric)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := scanner.Err(); err != nil {
|
2020-05-04 18:09:10 +00:00
|
|
|
e.acc.AddError(fmt.Errorf("Error reading stdout: %s", err))
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-05 21:43:45 +00:00
|
|
|
func (e *Execd) cmdReadOutStream(out io.Reader) {
|
|
|
|
parser := influx.NewStreamParser(out)
|
|
|
|
|
|
|
|
for {
|
|
|
|
metric, err := parser.Next()
|
|
|
|
if err != nil {
|
|
|
|
if err == influx.EOF {
|
|
|
|
break // stream ended
|
|
|
|
}
|
|
|
|
if parseErr, isParseError := err.(*influx.ParseError); isParseError {
|
|
|
|
// parse error.
|
|
|
|
e.acc.AddError(parseErr)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// some non-recoverable error?
|
|
|
|
e.acc.AddError(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
e.acc.AddMetric(metric)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-28 18:46:03 +00:00
|
|
|
func (e *Execd) cmdReadErr(out io.Reader) {
|
|
|
|
scanner := bufio.NewScanner(out)
|
|
|
|
|
|
|
|
for scanner.Scan() {
|
2020-05-04 18:09:10 +00:00
|
|
|
log.Printf("stderr: %q", scanner.Text())
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := scanner.Err(); err != nil {
|
2020-05-04 18:09:10 +00:00
|
|
|
e.acc.AddError(fmt.Errorf("Error reading stderr: %s", err))
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
inputs.Add("execd", func() telegraf.Input {
|
|
|
|
return &Execd{
|
|
|
|
Signal: "none",
|
2020-05-04 18:09:10 +00:00
|
|
|
RestartDelay: config.Duration(10 * time.Second),
|
2020-02-28 18:46:03 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|