2016-04-02 08:40:20 +00:00
|
|
|
// +build linux
|
2016-03-29 19:38:07 +00:00
|
|
|
|
|
|
|
package sysstat
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"encoding/csv"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
2016-09-21 09:19:59 +00:00
|
|
|
"log"
|
2016-03-29 19:38:07 +00:00
|
|
|
"os"
|
|
|
|
"os/exec"
|
|
|
|
"path"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/influxdata/telegraf"
|
2016-04-29 01:23:45 +00:00
|
|
|
"github.com/influxdata/telegraf/internal"
|
2016-03-29 19:38:07 +00:00
|
|
|
"github.com/influxdata/telegraf/plugins/inputs"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2016-04-02 07:43:29 +00:00
|
|
|
firstTimestamp time.Time
|
2016-03-29 19:38:07 +00:00
|
|
|
execCommand = exec.Command // execCommand is used to mock commands in tests.
|
|
|
|
dfltActivities = []string{"DISK"}
|
|
|
|
)
|
|
|
|
|
2016-04-02 07:43:29 +00:00
|
|
|
const parseInterval = 1 // parseInterval is the interval (in seconds) where the parsing of the binary file takes place.
|
2016-03-29 19:38:07 +00:00
|
|
|
|
|
|
|
type Sysstat struct {
|
|
|
|
// Sadc represents the path to the sadc collector utility.
|
|
|
|
Sadc string `toml:"sadc_path"`
|
|
|
|
|
|
|
|
// Sadf represents the path to the sadf cmd.
|
|
|
|
Sadf string `toml:"sadf_path"`
|
|
|
|
|
|
|
|
// Activities is a list of activities that are passed as argument to the
|
|
|
|
// collector utility (e.g: DISK, SNMP etc...)
|
|
|
|
// The more activities that are added, the more data is collected.
|
|
|
|
Activities []string
|
|
|
|
|
|
|
|
// Options is a map of options.
|
|
|
|
//
|
|
|
|
// The key represents the actual option that the Sadf command is called with and
|
|
|
|
// the value represents the description for that option.
|
|
|
|
//
|
|
|
|
// For example, if you have the following options map:
|
|
|
|
// map[string]string{"-C": "cpu", "-d": "disk"}
|
|
|
|
// The Sadf command is run with the options -C and -d to extract cpu and
|
|
|
|
// disk metrics from the collected binary file.
|
|
|
|
//
|
|
|
|
// If Group is false (see below), each metric will be prefixed with the corresponding description
|
|
|
|
// and represents itself a measurement.
|
|
|
|
//
|
|
|
|
// If Group is true, metrics are grouped to a single measurement with the corresponding description as name.
|
|
|
|
Options map[string]string
|
|
|
|
|
|
|
|
// Group determines if metrics are grouped or not.
|
|
|
|
Group bool
|
|
|
|
|
|
|
|
// DeviceTags adds the possibility to add additional tags for devices.
|
|
|
|
DeviceTags map[string][]map[string]string `toml:"device_tags"`
|
|
|
|
tmpFile string
|
2016-04-02 07:43:29 +00:00
|
|
|
interval int
|
2016-03-29 19:38:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (*Sysstat) Description() string {
|
|
|
|
return "Sysstat metrics collector"
|
|
|
|
}
|
|
|
|
|
|
|
|
var sampleConfig = `
|
|
|
|
## Path to the sadc command.
|
2016-04-07 07:11:17 +00:00
|
|
|
#
|
2016-04-07 17:05:20 +00:00
|
|
|
## Common Defaults:
|
|
|
|
## Debian/Ubuntu: /usr/lib/sysstat/sadc
|
|
|
|
## Arch: /usr/lib/sa/sadc
|
|
|
|
## RHEL/CentOS: /usr/lib64/sa/sadc
|
2016-03-29 19:38:07 +00:00
|
|
|
sadc_path = "/usr/lib/sa/sadc" # required
|
|
|
|
#
|
|
|
|
#
|
|
|
|
## Path to the sadf command, if it is not in PATH
|
|
|
|
# sadf_path = "/usr/bin/sadf"
|
|
|
|
#
|
|
|
|
#
|
|
|
|
## Activities is a list of activities, that are passed as argument to the
|
|
|
|
## sadc collector utility (e.g: DISK, SNMP etc...)
|
|
|
|
## The more activities that are added, the more data is collected.
|
|
|
|
# activities = ["DISK"]
|
|
|
|
#
|
|
|
|
#
|
|
|
|
## Group metrics to measurements.
|
|
|
|
##
|
|
|
|
## If group is false each metric will be prefixed with a description
|
|
|
|
## and represents itself a measurement.
|
|
|
|
##
|
|
|
|
## If Group is true, corresponding metrics are grouped to a single measurement.
|
2016-04-07 09:01:07 +00:00
|
|
|
# group = true
|
2016-03-29 19:38:07 +00:00
|
|
|
#
|
|
|
|
#
|
2016-04-29 01:23:45 +00:00
|
|
|
## Options for the sadf command. The values on the left represent the sadf
|
|
|
|
## options and the values on the right their description (wich are used for
|
|
|
|
## grouping and prefixing metrics).
|
2016-04-07 09:01:07 +00:00
|
|
|
##
|
2016-04-29 01:23:45 +00:00
|
|
|
## Run 'sar -h' or 'man sar' to find out the supported options for your
|
|
|
|
## sysstat version.
|
2016-03-29 19:38:07 +00:00
|
|
|
[inputs.sysstat.options]
|
2016-04-29 01:23:45 +00:00
|
|
|
-C = "cpu"
|
|
|
|
-B = "paging"
|
|
|
|
-b = "io"
|
|
|
|
-d = "disk" # requires DISK activity
|
|
|
|
"-n ALL" = "network"
|
|
|
|
"-P ALL" = "per_cpu"
|
|
|
|
-q = "queue"
|
|
|
|
-R = "mem"
|
|
|
|
-r = "mem_util"
|
|
|
|
-S = "swap_util"
|
|
|
|
-u = "cpu_util"
|
|
|
|
-v = "inode"
|
|
|
|
-W = "swap"
|
|
|
|
-w = "task"
|
|
|
|
# -H = "hugepages" # only available for newer linux distributions
|
|
|
|
# "-I ALL" = "interrupts" # requires INT activity
|
2016-03-29 19:38:07 +00:00
|
|
|
#
|
|
|
|
#
|
2016-04-29 01:23:45 +00:00
|
|
|
## Device tags can be used to add additional tags for devices.
|
|
|
|
## For example the configuration below adds a tag vg with value rootvg for
|
|
|
|
## all metrics with sda devices.
|
2016-03-29 19:38:07 +00:00
|
|
|
# [[inputs.sysstat.device_tags.sda]]
|
|
|
|
# vg = "rootvg"
|
|
|
|
`
|
|
|
|
|
|
|
|
func (*Sysstat) SampleConfig() string {
|
|
|
|
return sampleConfig
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Sysstat) Gather(acc telegraf.Accumulator) error {
|
2016-04-02 07:43:29 +00:00
|
|
|
if s.interval == 0 {
|
|
|
|
if firstTimestamp.IsZero() {
|
|
|
|
firstTimestamp = time.Now()
|
|
|
|
} else {
|
2016-05-12 14:20:50 +00:00
|
|
|
s.interval = int(time.Since(firstTimestamp).Seconds() + 0.5)
|
2016-04-02 07:43:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
ts := time.Now().Add(time.Duration(s.interval) * time.Second)
|
2016-03-29 19:38:07 +00:00
|
|
|
if err := s.collect(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
errorChannel := make(chan error, len(s.Options)*2)
|
|
|
|
for option := range s.Options {
|
|
|
|
wg.Add(1)
|
|
|
|
go func(acc telegraf.Accumulator, option string) {
|
|
|
|
defer wg.Done()
|
|
|
|
if err := s.parse(acc, option, ts); err != nil {
|
|
|
|
errorChannel <- err
|
|
|
|
}
|
|
|
|
}(acc, option)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
close(errorChannel)
|
|
|
|
|
|
|
|
errorStrings := []string{}
|
|
|
|
for err := range errorChannel {
|
|
|
|
errorStrings = append(errorStrings, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := os.Stat(s.tmpFile); err == nil {
|
|
|
|
if err := os.Remove(s.tmpFile); err != nil {
|
|
|
|
errorStrings = append(errorStrings, err.Error())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(errorStrings) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return errors.New(strings.Join(errorStrings, "\n"))
|
|
|
|
}
|
|
|
|
|
2016-04-29 01:23:45 +00:00
|
|
|
// collect collects sysstat data with the collector utility sadc.
|
|
|
|
// It runs the following command:
|
2016-03-29 19:38:07 +00:00
|
|
|
// Sadc -S <Activity1> -S <Activity2> ... <collectInterval> 2 tmpFile
|
2016-04-29 01:23:45 +00:00
|
|
|
// The above command collects system metrics during <collectInterval> and
|
|
|
|
// saves it in binary form to tmpFile.
|
2016-03-29 19:38:07 +00:00
|
|
|
func (s *Sysstat) collect() error {
|
|
|
|
options := []string{}
|
|
|
|
for _, act := range s.Activities {
|
|
|
|
options = append(options, "-S", act)
|
|
|
|
}
|
|
|
|
s.tmpFile = path.Join("/tmp", fmt.Sprintf("sysstat-%d", time.Now().Unix()))
|
2016-04-29 01:23:45 +00:00
|
|
|
// collectInterval has to be smaller than the telegraf data collection interval
|
|
|
|
collectInterval := s.interval - parseInterval
|
2016-04-02 07:43:29 +00:00
|
|
|
|
2016-04-29 01:23:45 +00:00
|
|
|
// If true, interval is not defined yet and Gather is run for the first time.
|
|
|
|
if collectInterval < 0 {
|
2016-04-02 07:43:29 +00:00
|
|
|
collectInterval = 1 // In that case we only collect for 1 second.
|
|
|
|
}
|
|
|
|
|
2016-03-29 19:38:07 +00:00
|
|
|
options = append(options, strconv.Itoa(collectInterval), "2", s.tmpFile)
|
|
|
|
cmd := execCommand(s.Sadc, options...)
|
2016-05-01 12:05:03 +00:00
|
|
|
out, err := internal.CombinedOutputTimeout(cmd, time.Second*time.Duration(collectInterval+parseInterval))
|
2016-03-29 19:38:07 +00:00
|
|
|
if err != nil {
|
2016-09-21 09:19:59 +00:00
|
|
|
if err := os.Remove(s.tmpFile); err != nil {
|
2016-09-30 21:37:56 +00:00
|
|
|
log.Printf("E! failed to remove tmp file after %s command: %s", strings.Join(cmd.Args, " "), err)
|
2016-09-21 09:19:59 +00:00
|
|
|
}
|
2016-05-01 12:05:03 +00:00
|
|
|
return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out))
|
2016-03-29 19:38:07 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// parse runs Sadf on the previously saved tmpFile:
|
|
|
|
// Sadf -p -- -p <option> tmpFile
|
|
|
|
// and parses the output to add it to the telegraf.Accumulator acc.
|
|
|
|
func (s *Sysstat) parse(acc telegraf.Accumulator, option string, ts time.Time) error {
|
|
|
|
cmd := execCommand(s.Sadf, s.sadfOptions(option)...)
|
|
|
|
stdout, err := cmd.StdoutPipe()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
|
|
return fmt.Errorf("running command '%s' failed: %s", strings.Join(cmd.Args, " "), err)
|
|
|
|
}
|
|
|
|
|
|
|
|
r := bufio.NewReader(stdout)
|
|
|
|
csv := csv.NewReader(r)
|
|
|
|
csv.Comma = '\t'
|
|
|
|
csv.FieldsPerRecord = 6
|
|
|
|
var measurement string
|
|
|
|
// groupData to accumulate data when Group=true
|
|
|
|
type groupData struct {
|
|
|
|
tags map[string]string
|
|
|
|
fields map[string]interface{}
|
|
|
|
}
|
|
|
|
m := make(map[string]groupData)
|
|
|
|
for {
|
|
|
|
record, err := csv.Read()
|
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
device := record[3]
|
|
|
|
value, err := strconv.ParseFloat(record[5], 64)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
tags := map[string]string{}
|
|
|
|
if device != "-" {
|
|
|
|
tags["device"] = device
|
|
|
|
if addTags, ok := s.DeviceTags[device]; ok {
|
|
|
|
for _, tag := range addTags {
|
|
|
|
for k, v := range tag {
|
|
|
|
tags[k] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if s.Group {
|
|
|
|
measurement = s.Options[option]
|
|
|
|
if _, ok := m[device]; !ok {
|
|
|
|
m[device] = groupData{
|
|
|
|
fields: make(map[string]interface{}),
|
|
|
|
tags: make(map[string]string),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
g, _ := m[device]
|
|
|
|
if len(g.tags) == 0 {
|
|
|
|
for k, v := range tags {
|
|
|
|
g.tags[k] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
g.fields[escape(record[4])] = value
|
|
|
|
} else {
|
|
|
|
measurement = s.Options[option] + "_" + escape(record[4])
|
|
|
|
fields := map[string]interface{}{
|
|
|
|
"value": value,
|
|
|
|
}
|
|
|
|
acc.AddFields(measurement, fields, tags, ts)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
if s.Group {
|
|
|
|
for _, v := range m {
|
|
|
|
acc.AddFields(measurement, v.fields, v.tags, ts)
|
|
|
|
}
|
|
|
|
}
|
2016-04-29 01:23:45 +00:00
|
|
|
if err := internal.WaitTimeout(cmd, time.Second*5); err != nil {
|
|
|
|
return fmt.Errorf("command %s failed with %s",
|
|
|
|
strings.Join(cmd.Args, " "), err)
|
2016-03-29 19:38:07 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// sadfOptions creates the correct options for the sadf utility.
|
|
|
|
func (s *Sysstat) sadfOptions(activityOption string) []string {
|
|
|
|
options := []string{
|
|
|
|
"-p",
|
|
|
|
"--",
|
|
|
|
"-p",
|
|
|
|
}
|
|
|
|
|
|
|
|
opts := strings.Split(activityOption, " ")
|
|
|
|
options = append(options, opts...)
|
|
|
|
options = append(options, s.tmpFile)
|
|
|
|
|
|
|
|
return options
|
|
|
|
}
|
|
|
|
|
|
|
|
// escape removes % and / chars in field names
|
|
|
|
func escape(dirty string) string {
|
|
|
|
var fieldEscaper = strings.NewReplacer(
|
|
|
|
`%`, "pct_",
|
|
|
|
`/`, "_per_",
|
|
|
|
)
|
|
|
|
return fieldEscaper.Replace(dirty)
|
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
2016-04-07 09:18:09 +00:00
|
|
|
s := Sysstat{
|
|
|
|
Group: true,
|
|
|
|
Activities: dfltActivities,
|
|
|
|
}
|
|
|
|
sadf, _ := exec.LookPath("sadf")
|
|
|
|
if len(sadf) > 0 {
|
|
|
|
s.Sadf = sadf
|
|
|
|
}
|
2016-03-29 19:38:07 +00:00
|
|
|
inputs.Add("sysstat", func() telegraf.Input {
|
2016-04-07 09:18:09 +00:00
|
|
|
return &s
|
2016-03-29 19:38:07 +00:00
|
|
|
})
|
|
|
|
}
|