filestat input plugin

closes #929
This commit is contained in:
Cameron Sparr 2016-04-20 19:51:25 -06:00
parent f818f44693
commit 88def9b71b
9 changed files with 436 additions and 0 deletions

View File

@ -39,6 +39,7 @@ based on _prefix_ in addition to globs. This means that a filter like
- [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags.
- [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin.
- [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin.
- [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat.
### Bugfixes

View File

@ -580,6 +580,16 @@
# data_format = "influx"
# # Read stats about given file(s)
# [[inputs.filestat]]
# ## Files to gather stats about.
# ## These accept standard unix glob matching rules, but with the addition of
# ## ** as a "super asterisk". See https://github.com/gobwas/glob.
# ["/etc/telegraf/telegraf.conf", "/var/log/**.log"]
# ## If true, read the entire file and calculate an md5 checksum.
# md5 = false
# # Read metrics of haproxy, via socket or csv stats page
# [[inputs.haproxy]]
# ## An array of address to gather stats about. Specify an ip on hostname

View File

@ -14,6 +14,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/dovecot"
_ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch"
_ "github.com/influxdata/telegraf/plugins/inputs/exec"
_ "github.com/influxdata/telegraf/plugins/inputs/filestat"
_ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks"
_ "github.com/influxdata/telegraf/plugins/inputs/haproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"

View File

@ -0,0 +1,37 @@
# filestat Input Plugin
The filestat plugin gathers metrics about file existence, size, and other stats.
### Configuration:
```toml
# Read stats about given file(s)
[[inputs.filestat]]
## Files to gather stats about.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". See https://github.com/gobwas/glob.
files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"]
## If true, read the entire file and calculate an md5 checksum.
md5 = false
```
### Measurements & Fields:
- filestat
- exists (int, 0 | 1)
- size_bytes (int, bytes)
- md5 (optional, string)
### Tags:
- All measurements have the following tags:
- file (the path the to file, as specified in the config)
### Example Output:
```
$ telegraf -config /etc/telegraf/telegraf.conf -input-filter filestat -test
* Plugin: filestat, Collection 1
> filestat,file=/tmp/foo/bar,host=tyrion exists=0i 1461203374493128216
> filestat,file=/Users/sparrc/ws/telegraf.conf,host=tyrion exists=1i,size=47894i 1461203374493199335
```

View File

@ -0,0 +1,185 @@
package filestat
import (
"crypto/md5"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/gobwas/glob"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
var sepStr = fmt.Sprintf("%v", string(os.PathSeparator))
const sampleConfig = `
## Files to gather stats about.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". See https://github.com/gobwas/glob.
["/etc/telegraf/telegraf.conf", "/var/log/**.log"]
## If true, read the entire file and calculate an md5 checksum.
md5 = false
`
type FileStat struct {
Md5 bool
Files []string
// maps full file paths to glob obj
globs map[string]glob.Glob
// maps full file paths to their root dir
roots map[string]string
}
func NewFileStat() *FileStat {
return &FileStat{
globs: make(map[string]glob.Glob),
roots: make(map[string]string),
}
}
func (_ *FileStat) Description() string {
return "Read stats about given file(s)"
}
func (_ *FileStat) SampleConfig() string { return sampleConfig }
func (f *FileStat) Gather(acc telegraf.Accumulator) error {
var errS string
var err error
for _, filepath := range f.Files {
// Get the compiled glob object for this filepath
g, ok := f.globs[filepath]
if !ok {
if g, err = glob.Compile(filepath, os.PathSeparator); err != nil {
errS += err.Error() + " "
continue
}
f.globs[filepath] = g
}
// Get the root directory for this filepath
root, ok := f.roots[filepath]
if !ok {
root = findRootDir(filepath)
f.roots[filepath] = root
}
var matches []string
// Do not walk file tree if we don't have to.
if !hasMeta(filepath) {
matches = []string{filepath}
} else {
matches = walkFilePath(f.roots[filepath], f.globs[filepath])
}
for _, file := range matches {
tags := map[string]string{
"file": file,
}
fields := map[string]interface{}{
"exists": int64(0),
}
// Get file stats
fileInfo, err := os.Stat(file)
if os.IsNotExist(err) {
// file doesn't exist, so move on to the next
acc.AddFields("filestat", fields, tags)
continue
}
if err != nil {
errS += err.Error() + " "
continue
}
// file exists and no errors encountered
fields["exists"] = int64(1)
fields["size_bytes"] = fileInfo.Size()
if f.Md5 {
md5, err := getMd5(file)
if err != nil {
errS += err.Error() + " "
} else {
fields["md5_sum"] = md5
}
}
acc.AddFields("filestat", fields, tags)
}
}
if errS != "" {
return fmt.Errorf(errS)
}
return nil
}
// walk the filepath from the given root and return a list of files that match
// the given glob.
func walkFilePath(root string, g glob.Glob) []string {
matchedFiles := []string{}
walkfn := func(path string, _ os.FileInfo, _ error) error {
if g.Match(path) {
matchedFiles = append(matchedFiles, path)
}
return nil
}
filepath.Walk(root, walkfn)
return matchedFiles
}
// Read given file and calculate an md5 hash.
func getMd5(file string) (string, error) {
of, err := os.Open(file)
if err != nil {
return "", err
}
defer of.Close()
hash := md5.New()
_, err = io.Copy(hash, of)
if err != nil {
// fatal error
return "", err
}
return fmt.Sprintf("%x", hash.Sum(nil)), nil
}
// find the root dir of the given path (could include globs).
// ie:
// /var/log/telegraf.conf -> /var/log/
// /home/** -> /home/
// /home/*/** -> /home/
// /lib/share/*/*/**.txt -> /lib/share/
func findRootDir(path string) string {
pathItems := strings.Split(path, sepStr)
outpath := sepStr
for i, item := range pathItems {
if i == len(pathItems)-1 {
break
}
if item == "" {
continue
}
if hasMeta(item) {
break
}
outpath += item + sepStr
}
return outpath
}
// hasMeta reports whether path contains any magic glob characters.
func hasMeta(path string) bool {
return strings.IndexAny(path, "*?[") >= 0
}
func init() {
inputs.Add("filestat", func() telegraf.Input {
return NewFileStat()
})
}

View File

@ -0,0 +1,197 @@
package filestat
import (
"runtime"
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestGatherNoMd5(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Files = []string{
dir + "log1.log",
dir + "log2.log",
"/non/existant/file",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": "/non/existant/file",
}
fields3 := map[string]interface{}{
"exists": int64(0),
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGatherExplicitFiles(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "log1.log",
dir + "log2.log",
"/non/existant/file",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": "/non/existant/file",
}
fields3 := map[string]interface{}{
"exists": int64(0),
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGatherGlob(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "*.log",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
}
func TestGatherSuperAsterisk(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "**",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": dir + "test.conf",
}
fields3 := map[string]interface{}{
"size_bytes": int64(104),
"exists": int64(1),
"md5_sum": "5a7e9b77fa25e7bb411dbd17cf403c1f",
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestFindRootDir(t *testing.T) {
tests := []struct {
input string
output string
}{
{"/var/log/telegraf.conf", "/var/log/"},
{"/home/**", "/home/"},
{"/home/*/**", "/home/"},
{"/lib/share/*/*/**.txt", "/lib/share/"},
}
for _, test := range tests {
actual := findRootDir(test.input)
assert.Equal(t, test.output, actual)
}
}
func TestGetMd5(t *testing.T) {
dir := getTestdataDir()
md5, err := getMd5(dir + "test.conf")
assert.NoError(t, err)
assert.Equal(t, "5a7e9b77fa25e7bb411dbd17cf403c1f", md5)
md5, err = getMd5("/tmp/foo/bar/fooooo")
assert.Error(t, err)
}
func getTestdataDir() string {
_, filename, _, _ := runtime.Caller(1)
return strings.Replace(filename, "filestat_test.go", "testdata/", 1)
}

View File

View File

View File

@ -0,0 +1,5 @@
# this is a fake testing config file
# for testing the filestat plugin
option1 = "foo"
option2 = "bar"