diff --git a/Gopkg.lock b/Gopkg.lock index a2df3c81d..d043bccd0 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -665,6 +665,14 @@ pruneopts = "" revision = "615a14ed75099c9eaac6949e22ac2341bf9d3197" +[[projects]] + digest = "1:a12b6f20a7e5eb7412d2e5cd15e1262a021f735fa958d664d9e7ba2160eefd0a" + name = "github.com/karrick/godirwalk" + packages = ["."] + pruneopts = "" + revision = "2de2192f9e35ce981c152a873ed943b93b79ced4" + version = "v1.7.5" + [[projects]] branch = "master" digest = "1:63e7368fcf6b54804076eaec26fd9cf0c4466166b272393db4b93102e1e962df" @@ -1480,6 +1488,7 @@ "github.com/jackc/pgx/pgtype", "github.com/jackc/pgx/stdlib", "github.com/kardianos/service", + "github.com/karrick/godirwalk", "github.com/kballard/go-shellquote", "github.com/matttproud/golang_protobuf_extensions/pbutil", "github.com/miekg/dns", diff --git a/Gopkg.toml b/Gopkg.toml index 791e265e8..80df324dc 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -245,3 +245,8 @@ [[override]] name = "github.com/vishvananda/netlink" revision = "b2de5d10e38ecce8607e6b438b6d174f389a004e" + +[[constraint]] + name = "github.com/karrick/godirwalk" + version = "1.7.5" + diff --git a/internal/globpath/globpath.go b/internal/globpath/globpath.go index a08731ad9..fc6a43618 100644 --- a/internal/globpath/globpath.go +++ b/internal/globpath/globpath.go @@ -1,7 +1,6 @@ package globpath import ( - "fmt" "os" "path/filepath" "strings" @@ -9,12 +8,10 @@ import ( "github.com/gobwas/glob" ) -var sepStr = fmt.Sprintf("%v", string(os.PathSeparator)) - type GlobPath struct { path string hasMeta bool - hasSuperMeta bool + HasSuperMeta bool rootGlob string g glob.Glob } @@ -22,13 +19,13 @@ type GlobPath struct { func Compile(path string) (*GlobPath, error) { out := GlobPath{ hasMeta: hasMeta(path), - hasSuperMeta: hasSuperMeta(path), + HasSuperMeta: hasSuperMeta(path), path: path, } // if there are no glob meta characters in the path, don't bother compiling // a glob object - if !out.hasMeta || !out.hasSuperMeta { + if !out.hasMeta || !out.HasSuperMeta { return &out, nil } @@ -43,6 +40,7 @@ func Compile(path string) (*GlobPath, error) { return &out, nil } +// Match returns all files matching the expression func (g *GlobPath) Match() map[string]os.FileInfo { out := make(map[string]os.FileInfo) if !g.hasMeta { @@ -52,7 +50,7 @@ func (g *GlobPath) Match() map[string]os.FileInfo { } return out } - if !g.hasSuperMeta { + if !g.HasSuperMeta { files, _ := filepath.Glob(g.path) for _, file := range files { info, err := os.Stat(file) @@ -79,6 +77,32 @@ func (g *GlobPath) Match() map[string]os.FileInfo { return out } +// MatchString test a string against the glob +func (g *GlobPath) MatchString(path string) bool { + if !g.HasSuperMeta { + res, _ := filepath.Match(g.path, path) + return res + } + return g.g.Match(path) +} + +// GetRoots returns a list of files and directories which should be optimal +// prefixes of matching files when you have a super-meta in your expression : +// - any directory under these roots may contain a matching file +// - no file outside of these roots can match the pattern +// Note that it returns both files and directories. +func (g *GlobPath) GetRoots() []string { + if !g.hasMeta { + return []string{g.path} + } + if !g.HasSuperMeta { + matches, _ := filepath.Glob(g.path) + return matches + } + roots, _ := filepath.Glob(g.rootGlob) + return roots +} + // hasMeta reports whether path contains any magic glob characters. func hasMeta(path string) bool { return strings.IndexAny(path, "*?[") >= 0 diff --git a/plugins/inputs/filecount/README.md b/plugins/inputs/filecount/README.md index cf11b7d90..260d18413 100644 --- a/plugins/inputs/filecount/README.md +++ b/plugins/inputs/filecount/README.md @@ -8,8 +8,17 @@ Counts files in directories that match certain criteria. # Count files in a directory [[inputs.filecount]] ## Directory to gather stats about. + ## deprecated in 1.9; use the directories option directory = "/var/cache/apt/archives" + ## Directories to gather stats about. + ## This accept standard unit glob matching rules, but with the addition of + ## ** as a "super asterisk". ie: + ## /var/log/** -> recursively find all directories in /var/log and count files in each directories + ## /var/log/*/* -> find all directories with a parent dir in /var/log and count files in each directories + ## /var/log -> count all files in /var/log and all of its subdirectories + directories = ["/var/cache/apt/archives"] + ## Only count files that match the name pattern. Defaults to "*". name = "*.deb" @@ -35,16 +44,17 @@ Counts files in directories that match certain criteria. - filecount - count (int) + - size_bytes (int) ### Tags: - All measurements have the following tags: - - directory (the directory path, as specified in the config) + - directory (the directory path) ### Example Output: ``` $ telegraf --config /etc/telegraf/telegraf.conf --input-filter filecount --test -> filecount,directory=/var/cache/apt,host=czernobog count=7i 1530034445000000000 -> filecount,directory=/tmp,host=czernobog count=17i 1530034445000000000 +> filecount,directory=/var/cache/apt,host=czernobog count=7i,size=7438336i 1530034445000000000 +> filecount,directory=/tmp,host=czernobog count=17i,size=28934786i 1530034445000000000 ``` diff --git a/plugins/inputs/filecount/filecount.go b/plugins/inputs/filecount/filecount.go index d613f3b77..f8840721b 100644 --- a/plugins/inputs/filecount/filecount.go +++ b/plugins/inputs/filecount/filecount.go @@ -1,15 +1,16 @@ package filecount import ( - "fmt" "os" "path/filepath" + "strings" "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal/globpath" "github.com/influxdata/telegraf/plugins/inputs" + "github.com/karrick/godirwalk" ) const sampleConfig = ` @@ -55,16 +56,17 @@ type FileCount struct { Size internal.Size MTime internal.Duration `toml:"mtime"` fileFilters []fileFilterFunc + globPaths []globpath.GlobPath } -type fileFilterFunc func(os.FileInfo) (bool, error) - func (_ *FileCount) Description() string { return "Count files in a directory" } func (_ *FileCount) SampleConfig() string { return sampleConfig } +type fileFilterFunc func(os.FileInfo) (bool, error) + func rejectNilFilters(filters []fileFilterFunc) []fileFilterFunc { filtered := make([]fileFilterFunc, 0, len(filters)) for _, f := range filters { @@ -137,48 +139,6 @@ func absDuration(x time.Duration) time.Duration { return x } -func (fc *FileCount) count(acc telegraf.Accumulator, basedir string, recursive bool) { - numFiles := int64(0) - walkFn := func(path string, file os.FileInfo, err error) error { - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - if path == basedir { - return nil - } - match, err := fc.filter(file) - if err != nil { - acc.AddError(err) - return nil - } - if match { - numFiles++ - } - if !recursive && file.IsDir() { - return filepath.SkipDir - } - return nil - } - - err := filepath.Walk(basedir, walkFn) - if err != nil { - acc.AddError(err) - return - } - - acc.AddFields("filecount", - map[string]interface{}{ - "count": numFiles, - }, - map[string]string{ - "directory": basedir, - }, - ) -} - func (fc *FileCount) initFileFilters() { filters := []fileFilterFunc{ fc.nameFilter(), @@ -189,6 +149,66 @@ func (fc *FileCount) initFileFilters() { fc.fileFilters = rejectNilFilters(filters) } +func (fc *FileCount) count(acc telegraf.Accumulator, basedir string, glob globpath.GlobPath) { + childCount := make(map[string]int64) + childSize := make(map[string]int64) + walkFn := func(path string, de *godirwalk.Dirent) error { + if path == basedir { + return nil + } + file, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + match, err := fc.filter(file) + if err != nil { + acc.AddError(err) + return nil + } + if match { + parent := path[:strings.LastIndex(path, "/")] + childCount[parent]++ + childSize[parent] += file.Size() + } + if file.IsDir() && !fc.Recursive && !glob.HasSuperMeta { + return filepath.SkipDir + } + return nil + } + postChildrenFn := func(path string, de *godirwalk.Dirent) error { + if glob.MatchString(path) { + gauge := map[string]interface{}{ + "count": childCount[path], + "size_bytes": childSize[path], + } + acc.AddGauge("filecount", gauge, + map[string]string{ + "directory": path, + }) + } + parent := path[:strings.LastIndex(path, "/")] + if fc.Recursive { + childCount[parent] += childCount[path] + childSize[parent] += childSize[path] + } + delete(childCount, path) + delete(childSize, path) + return nil + } + + err := godirwalk.Walk(basedir, &godirwalk.Options{ + Callback: walkFn, + PostChildrenCallback: postChildrenFn, + Unsorted: true, + }) + if err != nil { + acc.AddError(err) + } +} + func (fc *FileCount) filter(file os.FileInfo) (bool, error) { if fc.fileFilters == nil { fc.initFileFilters() @@ -208,19 +228,30 @@ func (fc *FileCount) filter(file os.FileInfo) (bool, error) { } func (fc *FileCount) Gather(acc telegraf.Accumulator) error { - globDirs := fc.getDirs() - dirs, err := getCompiledDirs(globDirs) - if err != nil { - return err + if fc.globPaths == nil { + fc.initGlobPaths(acc) } - for _, dir := range dirs { - fc.count(acc, dir, fc.Recursive) + for _, glob := range fc.globPaths { + for _, dir := range onlyDirectories(glob.GetRoots()) { + fc.count(acc, dir, glob) + } } return nil } +func onlyDirectories(directories []string) []string { + out := make([]string, 0) + for _, path := range directories { + info, err := os.Stat(path) + if err == nil && info.IsDir() { + out = append(out, path) + } + } + return out +} + func (fc *FileCount) getDirs() []string { dirs := make([]string, len(fc.Directories)) for i, dir := range fc.Directories { @@ -234,21 +265,16 @@ func (fc *FileCount) getDirs() []string { return dirs } -func getCompiledDirs(dirs []string) ([]string, error) { - compiledDirs := []string{} - for _, dir := range dirs { - g, err := globpath.Compile(dir) +func (fc *FileCount) initGlobPaths(acc telegraf.Accumulator) { + fc.globPaths = []globpath.GlobPath{} + for _, directory := range fc.getDirs() { + glob, err := globpath.Compile(directory) if err != nil { - return nil, fmt.Errorf("could not compile glob %v: %v", dir, err) - } - - for path, file := range g.Match() { - if file.IsDir() { - compiledDirs = append(compiledDirs, path) - } + acc.AddError(err) + } else { + fc.globPaths = append(fc.globPaths, *glob) } } - return compiledDirs, nil } func NewFileCount() *FileCount { diff --git a/plugins/inputs/filecount/filecount_test.go b/plugins/inputs/filecount/filecount_test.go index 7a48c2166..2294e8ce6 100644 --- a/plugins/inputs/filecount/filecount_test.go +++ b/plugins/inputs/filecount/filecount_test.go @@ -14,108 +14,112 @@ import ( ) func TestNoFilters(t *testing.T) { - fc := getNoFilterFileCount("*") - matches := []string{"foo", "bar", "baz", "qux", "subdir/", "subdir/quux", "subdir/quuz"} - - acc := testutil.Accumulator{} - acc.GatherError(fc.Gather) - - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + fc := getNoFilterFileCount() + matches := []string{"foo", "bar", "baz", "qux", + "subdir/", "subdir/quux", "subdir/quuz", + "subdir/nested2", "subdir/nested2/qux"} + fileCountEquals(t, fc, len(matches), 9084) } func TestNoFiltersOnChildDir(t *testing.T) { - fc := getNoFilterFileCount("testdata/*") - matches := []string{"subdir/quux", "subdir/quuz"} + fc := getNoFilterFileCount() + fc.Directories = []string{getTestdataDir() + "/*"} + matches := []string{"subdir/quux", "subdir/quuz", + "subdir/nested2/qux", "subdir/nested2"} + tags := map[string]string{"directory": getTestdataDir() + "/subdir"} acc := testutil.Accumulator{} acc.GatherError(fc.Gather) - require.True(t, assertFileCount(&acc, "testdata/subdir", len(matches))) + require.True(t, acc.HasPoint("filecount", tags, "count", int64(len(matches)))) + require.True(t, acc.HasPoint("filecount", tags, "size_bytes", int64(4542))) +} + +func TestNoRecursiveButSuperMeta(t *testing.T) { + fc := getNoFilterFileCount() + fc.Recursive = false + fc.Directories = []string{getTestdataDir() + "/**"} + matches := []string{"subdir/quux", "subdir/quuz", "subdir/nested2"} + + tags := map[string]string{"directory": getTestdataDir() + "/subdir"} + acc := testutil.Accumulator{} + acc.GatherError(fc.Gather) + + require.True(t, acc.HasPoint("filecount", tags, "count", int64(len(matches)))) + require.True(t, acc.HasPoint("filecount", tags, "size_bytes", int64(4096))) } func TestNameFilter(t *testing.T) { - fc := getNoFilterFileCount("testdata") + fc := getNoFilterFileCount() fc.Name = "ba*" matches := []string{"bar", "baz"} - - acc := testutil.Accumulator{} - acc.GatherError(fc.Gather) - - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + fileCountEquals(t, fc, len(matches), 0) } func TestNonRecursive(t *testing.T) { - fc := getNoFilterFileCount("testdata") + fc := getNoFilterFileCount() fc.Recursive = false matches := []string{"foo", "bar", "baz", "qux", "subdir"} + fileCountEquals(t, fc, len(matches), 4542) +} + +func TestDoubleAndSimpleStar(t *testing.T) { + fc := getNoFilterFileCount() + fc.Directories = []string{getTestdataDir() + "/**/*"} + matches := []string{"qux"} + tags := map[string]string{"directory": getTestdataDir() + "/subdir/nested2"} acc := testutil.Accumulator{} acc.GatherError(fc.Gather) - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + require.True(t, acc.HasPoint("filecount", tags, "count", int64(len(matches)))) + require.True(t, acc.HasPoint("filecount", tags, "size_bytes", int64(446))) } func TestRegularOnlyFilter(t *testing.T) { - fc := getNoFilterFileCount("testdata") + fc := getNoFilterFileCount() fc.RegularOnly = true matches := []string{ "foo", "bar", "baz", "qux", "subdir/quux", "subdir/quuz", - } - - acc := testutil.Accumulator{} - acc.GatherError(fc.Gather) - - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + "subdir/nested2/qux"} + fileCountEquals(t, fc, len(matches), 892) } func TestSizeFilter(t *testing.T) { - fc := getNoFilterFileCount("testdata") + fc := getNoFilterFileCount() fc.Size = internal.Size{Size: -100} - matches := []string{"foo", "bar", "baz", "subdir/quux", "subdir/quuz"} - - acc := testutil.Accumulator{} - acc.GatherError(fc.Gather) - - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + matches := []string{"foo", "bar", "baz", + "subdir/quux", "subdir/quuz"} + fileCountEquals(t, fc, len(matches), 0) fc.Size = internal.Size{Size: 100} - matches = []string{"qux"} - - acc = testutil.Accumulator{} - acc.GatherError(fc.Gather) - - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + matches = []string{"qux", "subdir/nested2//qux"} + fileCountEquals(t, fc, len(matches), 892) } func TestMTimeFilter(t *testing.T) { - oldFile := filepath.Join(getTestdataDir("testdata"), "baz") + oldFile := filepath.Join(getTestdataDir(), "baz") mtime := time.Date(1979, time.December, 14, 18, 25, 5, 0, time.UTC) if err := os.Chtimes(oldFile, mtime, mtime); err != nil { t.Skip("skipping mtime filter test.") } fileAge := time.Since(mtime) - (60 * time.Second) - fc := getNoFilterFileCount("testdata") + fc := getNoFilterFileCount() fc.MTime = internal.Duration{Duration: -fileAge} - matches := []string{"foo", "bar", "qux", "subdir/", "subdir/quux", "subdir/quuz"} - - acc := testutil.Accumulator{} - acc.GatherError(fc.Gather) - - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + matches := []string{"foo", "bar", "qux", + "subdir/", "subdir/quux", "subdir/quuz", + "sbudir/nested2", "subdir/nested2/qux"} + fileCountEquals(t, fc, len(matches), 9084) fc.MTime = internal.Duration{Duration: fileAge} matches = []string{"baz"} - - acc = testutil.Accumulator{} - acc.GatherError(fc.Gather) - - require.True(t, assertFileCount(&acc, "testdata", len(matches))) + fileCountEquals(t, fc, len(matches), 0) } -func getNoFilterFileCount(dir string) FileCount { +func getNoFilterFileCount() FileCount { return FileCount{ - Directories: []string{getTestdataDir(dir)}, + Directories: []string{getTestdataDir()}, Name: "*", Recursive: true, RegularOnly: false, @@ -125,12 +129,15 @@ func getNoFilterFileCount(dir string) FileCount { } } -func getTestdataDir(dir string) string { +func getTestdataDir() string { _, filename, _, _ := runtime.Caller(1) - return strings.Replace(filename, "filecount_test.go", dir, 1) + return strings.Replace(filename, "filecount_test.go", "testdata", 1) } -func assertFileCount(acc *testutil.Accumulator, expectedDir string, expectedCount int) bool { - tags := map[string]string{"directory": getTestdataDir(expectedDir)} - return acc.HasPoint("filecount", tags, "count", int64(expectedCount)) +func fileCountEquals(t *testing.T, fc FileCount, expectedCount int, expectedSize int) { + tags := map[string]string{"directory": getTestdataDir()} + acc := testutil.Accumulator{} + acc.GatherError(fc.Gather) + require.True(t, acc.HasPoint("filecount", tags, "count", int64(expectedCount))) + require.True(t, acc.HasPoint("filecount", tags, "size_bytes", int64(expectedSize))) } diff --git a/plugins/inputs/filecount/testdata/subdir/nested2/qux b/plugins/inputs/filecount/testdata/subdir/nested2/qux new file mode 100644 index 000000000..c7288f23d --- /dev/null +++ b/plugins/inputs/filecount/testdata/subdir/nested2/qux @@ -0,0 +1,7 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad +minim veniam, quis nostrud exercitation ullamco laboris nisi ut +aliquip ex ea commodo consequat. Duis aute irure dolor in +reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla +pariatur. Excepteur sint occaecat cupidatat non proident, sunt in +culpa qui officia deserunt mollit anim id est laborum.