From 88def9b71b2cbf4886f13031644489f143da7ee5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 20 Apr 2016 19:51:25 -0600 Subject: [PATCH] filestat input plugin closes #929 --- CHANGELOG.md | 1 + etc/telegraf.conf | 10 ++ plugins/inputs/all/all.go | 1 + plugins/inputs/filestat/README.md | 37 ++++ plugins/inputs/filestat/filestat.go | 185 +++++++++++++++++++ plugins/inputs/filestat/filestat_test.go | 197 +++++++++++++++++++++ plugins/inputs/filestat/testdata/log1.log | 0 plugins/inputs/filestat/testdata/log2.log | 0 plugins/inputs/filestat/testdata/test.conf | 5 + 9 files changed, 436 insertions(+) create mode 100644 plugins/inputs/filestat/README.md create mode 100644 plugins/inputs/filestat/filestat.go create mode 100644 plugins/inputs/filestat/filestat_test.go create mode 100644 plugins/inputs/filestat/testdata/log1.log create mode 100644 plugins/inputs/filestat/testdata/log2.log create mode 100644 plugins/inputs/filestat/testdata/test.conf diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a1cdcd43..6b09616ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags. - [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin. - [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin. +- [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat. ### Bugfixes diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 1623e5636..627f6d64c 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -580,6 +580,16 @@ # data_format = "influx" +# # Read stats about given file(s) +# [[inputs.filestat]] +# ## Files to gather stats about. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". See https://github.com/gobwas/glob. +# ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] +# ## If true, read the entire file and calculate an md5 checksum. +# md5 = false + + # # Read metrics of haproxy, via socket or csv stats page # [[inputs.haproxy]] # ## An array of address to gather stats about. Specify an ip on hostname diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 3f56ee541..36526f4d1 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -14,6 +14,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/dovecot" _ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch" _ "github.com/influxdata/telegraf/plugins/inputs/exec" + _ "github.com/influxdata/telegraf/plugins/inputs/filestat" _ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks" _ "github.com/influxdata/telegraf/plugins/inputs/haproxy" _ "github.com/influxdata/telegraf/plugins/inputs/http_response" diff --git a/plugins/inputs/filestat/README.md b/plugins/inputs/filestat/README.md new file mode 100644 index 000000000..bfa51011c --- /dev/null +++ b/plugins/inputs/filestat/README.md @@ -0,0 +1,37 @@ +# filestat Input Plugin + +The filestat plugin gathers metrics about file existence, size, and other stats. + +### Configuration: + +```toml +# Read stats about given file(s) +[[inputs.filestat]] + ## Files to gather stats about. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". See https://github.com/gobwas/glob. + files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] + ## If true, read the entire file and calculate an md5 checksum. + md5 = false +``` + +### Measurements & Fields: + +- filestat + - exists (int, 0 | 1) + - size_bytes (int, bytes) + - md5 (optional, string) + +### Tags: + +- All measurements have the following tags: + - file (the path the to file, as specified in the config) + +### Example Output: + +``` +$ telegraf -config /etc/telegraf/telegraf.conf -input-filter filestat -test +* Plugin: filestat, Collection 1 +> filestat,file=/tmp/foo/bar,host=tyrion exists=0i 1461203374493128216 +> filestat,file=/Users/sparrc/ws/telegraf.conf,host=tyrion exists=1i,size=47894i 1461203374493199335 +``` diff --git a/plugins/inputs/filestat/filestat.go b/plugins/inputs/filestat/filestat.go new file mode 100644 index 000000000..c0b70c878 --- /dev/null +++ b/plugins/inputs/filestat/filestat.go @@ -0,0 +1,185 @@ +package filestat + +import ( + "crypto/md5" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/gobwas/glob" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +var sepStr = fmt.Sprintf("%v", string(os.PathSeparator)) + +const sampleConfig = ` + ## Files to gather stats about. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". See https://github.com/gobwas/glob. + ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] + ## If true, read the entire file and calculate an md5 checksum. + md5 = false +` + +type FileStat struct { + Md5 bool + Files []string + + // maps full file paths to glob obj + globs map[string]glob.Glob + // maps full file paths to their root dir + roots map[string]string +} + +func NewFileStat() *FileStat { + return &FileStat{ + globs: make(map[string]glob.Glob), + roots: make(map[string]string), + } +} + +func (_ *FileStat) Description() string { + return "Read stats about given file(s)" +} + +func (_ *FileStat) SampleConfig() string { return sampleConfig } + +func (f *FileStat) Gather(acc telegraf.Accumulator) error { + var errS string + var err error + + for _, filepath := range f.Files { + // Get the compiled glob object for this filepath + g, ok := f.globs[filepath] + if !ok { + if g, err = glob.Compile(filepath, os.PathSeparator); err != nil { + errS += err.Error() + " " + continue + } + f.globs[filepath] = g + } + // Get the root directory for this filepath + root, ok := f.roots[filepath] + if !ok { + root = findRootDir(filepath) + f.roots[filepath] = root + } + + var matches []string + // Do not walk file tree if we don't have to. + if !hasMeta(filepath) { + matches = []string{filepath} + } else { + matches = walkFilePath(f.roots[filepath], f.globs[filepath]) + } + for _, file := range matches { + tags := map[string]string{ + "file": file, + } + fields := map[string]interface{}{ + "exists": int64(0), + } + // Get file stats + fileInfo, err := os.Stat(file) + if os.IsNotExist(err) { + // file doesn't exist, so move on to the next + acc.AddFields("filestat", fields, tags) + continue + } + if err != nil { + errS += err.Error() + " " + continue + } + + // file exists and no errors encountered + fields["exists"] = int64(1) + fields["size_bytes"] = fileInfo.Size() + + if f.Md5 { + md5, err := getMd5(file) + if err != nil { + errS += err.Error() + " " + } else { + fields["md5_sum"] = md5 + } + } + + acc.AddFields("filestat", fields, tags) + } + } + + if errS != "" { + return fmt.Errorf(errS) + } + return nil +} + +// walk the filepath from the given root and return a list of files that match +// the given glob. +func walkFilePath(root string, g glob.Glob) []string { + matchedFiles := []string{} + walkfn := func(path string, _ os.FileInfo, _ error) error { + if g.Match(path) { + matchedFiles = append(matchedFiles, path) + } + return nil + } + filepath.Walk(root, walkfn) + return matchedFiles +} + +// Read given file and calculate an md5 hash. +func getMd5(file string) (string, error) { + of, err := os.Open(file) + if err != nil { + return "", err + } + defer of.Close() + + hash := md5.New() + _, err = io.Copy(hash, of) + if err != nil { + // fatal error + return "", err + } + return fmt.Sprintf("%x", hash.Sum(nil)), nil +} + +// find the root dir of the given path (could include globs). +// ie: +// /var/log/telegraf.conf -> /var/log/ +// /home/** -> /home/ +// /home/*/** -> /home/ +// /lib/share/*/*/**.txt -> /lib/share/ +func findRootDir(path string) string { + pathItems := strings.Split(path, sepStr) + outpath := sepStr + for i, item := range pathItems { + if i == len(pathItems)-1 { + break + } + if item == "" { + continue + } + if hasMeta(item) { + break + } + outpath += item + sepStr + } + return outpath +} + +// hasMeta reports whether path contains any magic glob characters. +func hasMeta(path string) bool { + return strings.IndexAny(path, "*?[") >= 0 +} + +func init() { + inputs.Add("filestat", func() telegraf.Input { + return NewFileStat() + }) +} diff --git a/plugins/inputs/filestat/filestat_test.go b/plugins/inputs/filestat/filestat_test.go new file mode 100644 index 000000000..f8977c920 --- /dev/null +++ b/plugins/inputs/filestat/filestat_test.go @@ -0,0 +1,197 @@ +package filestat + +import ( + "runtime" + "strings" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +func TestGatherNoMd5(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Files = []string{ + dir + "log1.log", + dir + "log2.log", + "/non/existant/file", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) + + tags3 := map[string]string{ + "file": "/non/existant/file", + } + fields3 := map[string]interface{}{ + "exists": int64(0), + } + acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3) +} + +func TestGatherExplicitFiles(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Md5 = true + fs.Files = []string{ + dir + "log1.log", + dir + "log2.log", + "/non/existant/file", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) + + tags3 := map[string]string{ + "file": "/non/existant/file", + } + fields3 := map[string]interface{}{ + "exists": int64(0), + } + acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3) +} + +func TestGatherGlob(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Md5 = true + fs.Files = []string{ + dir + "*.log", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) +} + +func TestGatherSuperAsterisk(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Md5 = true + fs.Files = []string{ + dir + "**", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) + + tags3 := map[string]string{ + "file": dir + "test.conf", + } + fields3 := map[string]interface{}{ + "size_bytes": int64(104), + "exists": int64(1), + "md5_sum": "5a7e9b77fa25e7bb411dbd17cf403c1f", + } + acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3) +} + +func TestFindRootDir(t *testing.T) { + tests := []struct { + input string + output string + }{ + {"/var/log/telegraf.conf", "/var/log/"}, + {"/home/**", "/home/"}, + {"/home/*/**", "/home/"}, + {"/lib/share/*/*/**.txt", "/lib/share/"}, + } + + for _, test := range tests { + actual := findRootDir(test.input) + assert.Equal(t, test.output, actual) + } +} + +func TestGetMd5(t *testing.T) { + dir := getTestdataDir() + md5, err := getMd5(dir + "test.conf") + assert.NoError(t, err) + assert.Equal(t, "5a7e9b77fa25e7bb411dbd17cf403c1f", md5) + + md5, err = getMd5("/tmp/foo/bar/fooooo") + assert.Error(t, err) +} + +func getTestdataDir() string { + _, filename, _, _ := runtime.Caller(1) + return strings.Replace(filename, "filestat_test.go", "testdata/", 1) +} diff --git a/plugins/inputs/filestat/testdata/log1.log b/plugins/inputs/filestat/testdata/log1.log new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/inputs/filestat/testdata/log2.log b/plugins/inputs/filestat/testdata/log2.log new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/inputs/filestat/testdata/test.conf b/plugins/inputs/filestat/testdata/test.conf new file mode 100644 index 000000000..a06111991 --- /dev/null +++ b/plugins/inputs/filestat/testdata/test.conf @@ -0,0 +1,5 @@ +# this is a fake testing config file +# for testing the filestat plugin + +option1 = "foo" +option2 = "bar"