add ZFS plugin

This commit is contained in:
Roman Statsevich 2015-11-03 18:53:09 +03:00
parent 422d240afb
commit 85c41a3a69
4 changed files with 919 additions and 0 deletions

View File

@ -27,5 +27,6 @@ import (
_ "github.com/influxdb/telegraf/plugins/rethinkdb" _ "github.com/influxdb/telegraf/plugins/rethinkdb"
_ "github.com/influxdb/telegraf/plugins/statsd" _ "github.com/influxdb/telegraf/plugins/statsd"
_ "github.com/influxdb/telegraf/plugins/system" _ "github.com/influxdb/telegraf/plugins/system"
_ "github.com/influxdb/telegraf/plugins/zfs"
_ "github.com/influxdb/telegraf/plugins/zookeeper" _ "github.com/influxdb/telegraf/plugins/zookeeper"
) )

227
plugins/zfs/README.md Normal file
View File

@ -0,0 +1,227 @@
# Telegraf plugin: zfs
Get ZFS stat from /proc/spl/kstat/zfs
# Measurements
Meta:
- tags: `pools=POOL1::POOL2`
Measurement names:
- arcstats_hits
- arcstats_misses
- arcstats_demand_data_hits
- arcstats_demand_data_misses
- arcstats_demand_metadata_hits
- arcstats_demand_metadata_misses
- arcstats_prefetch_data_hits
- arcstats_prefetch_data_misses
- arcstats_prefetch_metadata_hits
- arcstats_prefetch_metadata_misses
- arcstats_mru_hits
- arcstats_mru_ghost_hits
- arcstats_mfu_hits
- arcstats_mfu_ghost_hits
- arcstats_deleted
- arcstats_recycle_miss
- arcstats_mutex_miss
- arcstats_evict_skip
- arcstats_evict_l2_cached
- arcstats_evict_l2_eligible
- arcstats_evict_l2_ineligible
- arcstats_hash_elements
- arcstats_hash_elements_max
- arcstats_hash_collisions
- arcstats_hash_chains
- arcstats_hash_chain_max
- arcstats_p
- arcstats_c
- arcstats_c_min
- arcstats_c_max
- arcstats_size
- arcstats_hdr_size
- arcstats_data_size
- arcstats_meta_size
- arcstats_other_size
- arcstats_anon_size
- arcstats_anon_evict_data
- arcstats_anon_evict_metadata
- arcstats_mru_size
- arcstats_mru_evict_data
- arcstats_mru_evict_metadata
- arcstats_mru_ghost_size
- arcstats_mru_ghost_evict_data
- arcstats_mru_ghost_evict_metadata
- arcstats_mfu_size
- arcstats_mfu_evict_data
- arcstats_mfu_evict_metadata
- arcstats_mfu_ghost_size
- arcstats_mfu_ghost_evict_data
- arcstats_mfu_ghost_evict_metadata
- arcstats_l2_hits
- arcstats_l2_misses
- arcstats_l2_feeds
- arcstats_l2_rw_clash
- arcstats_l2_read_bytes
- arcstats_l2_write_bytes
- arcstats_l2_writes_sent
- arcstats_l2_writes_done
- arcstats_l2_writes_error
- arcstats_l2_writes_hdr_miss
- arcstats_l2_evict_lock_retry
- arcstats_l2_evict_reading
- arcstats_l2_free_on_write
- arcstats_l2_cdata_free_on_write
- arcstats_l2_abort_lowmem
- arcstats_l2_cksum_bad
- arcstats_l2_io_error
- arcstats_l2_size
- arcstats_l2_asize
- arcstats_l2_hdr_size
- arcstats_l2_compress_successes
- arcstats_l2_compress_zeros
- arcstats_l2_compress_failures
- arcstats_memory_throttle_count
- arcstats_duplicate_buffers
- arcstats_duplicate_buffers_size
- arcstats_duplicate_reads
- arcstats_memory_direct_count
- arcstats_memory_indirect_count
- arcstats_arc_no_grow
- arcstats_arc_tempreserve
- arcstats_arc_loaned_bytes
- arcstats_arc_prune
- arcstats_arc_meta_used
- arcstats_arc_meta_limit
- arcstats_arc_meta_max
- zfetchstats_hits
- zfetchstats_misses
- zfetchstats_colinear_hits
- zfetchstats_colinear_misses
- zfetchstats_stride_hits
- zfetchstats_stride_misses
- zfetchstats_reclaim_successes
- zfetchstats_reclaim_failures
- zfetchstats_streams_resets
- zfetchstats_streams_noresets
- zfetchstats_bogus_streams
- vdev_cache_stats_delegations
- vdev_cache_stats_hits
- vdev_cache_stats_misses
### Description
```
arcstats_hits
Total amount of cache hits in the arc.
arcstats_misses
Total amount of cache misses in the arc.
arcstats_demand_data_hits
Amount of cache hits for demand data, this is what matters (is good) for your application/share.
arcstats_demand_data_misses
Amount of cache misses for demand data, this is what matters (is bad) for your application/share.
arcstats_demand_metadata_hits
Ammount of cache hits for demand metadata, this matters (is good) for getting filesystem data (ls,find,…)
arcstats_demand_metadata_misses
Ammount of cache misses for demand metadata, this matters (is bad) for getting filesystem data (ls,find,…)
arcstats_prefetch_data_hits
The zfs prefetcher tried to prefetch somethin, but it was allready cached (boring)
arcstats_prefetch_data_misses
The zfs prefetcher prefetched something which was not in the cache (good job, could become a demand hit in the future)
arcstats_prefetch_metadata_hits
Same as above, but for metadata
arcstats_prefetch_metadata_misses
Same as above, but for metadata
arcstats_mru_hits
Cache hit in the “most recently used cache”, we move this to the mfu cache.
arcstats_mru_ghost_hits
Cache hit in the “most recently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mru cache size.
arcstats_mfu_hits
Cache hit in the “most freqently used cache” we move this to the begining of the mfu cache.
arcstats_mfu_ghost_hits
Cache hit in the “most frequently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mfu cache size.
arcstats_allocated
New data is written to the cache.
arcstats_deleted
Old data is evicted (deleted) from the cache.
arcstats_evict_l2_cached
We evicted something from the arc, but its still cached in the l2 if we need it.
arcstats_evict_l2_eligible
We evicted something from the arc, and its not in the l2 this is sad. (maybe we hadnt had enough time to store it there)
arcstats_evict_l2_ineligible
We evicted something which cannot be stored in the l2.
Reasons could be:
We have multiple pools, we evicted something from a pool whithot an l2 device.
The zfs property secondarycache.
arcstats_c
Arc target size, this is the size the system thinks the arc should have.
arcstats_size
Total size of the arc.
arcstats_l2_hits
Hits to the L2 cache. (It was not in the arc, but in the l2 cache)
arcstats_l2_misses
Miss to the L2 cache. (It was not in the arc, and not in the l2 cache)
arcstats_l2_size
Size of the l2 cache.
arcstats_l2_hdr_size
Size of the metadata in the arc (ram) used to manage (lookup if someting is in the l2) the l2 cache.
zfetchstats_hits
Counts the number of cache hits, to items wich are in the cache because of the prefetcher.
zfetchstats_colinear_hits
Counts the number of cache hits, to items wich are in the cache because of the prefetcher (prefetched linear reads)
zfetchstats_stride_hits
Counts the number of cache hits, to items wich are in the cache because of the prefetcher (prefetched stride reads)
vdev_cache_stats_hits
Hits to the vdev (device level) cache.
vdev_cache_stats_misses
Misses to the vdev (device level) cache.
```
# Default config
```
[zfs]
# ZFS kstat path
# If not specified, then default is:
# kstatPath = "/proc/spl/kstat/zfs"
#
# By default, telegraf gather all zfs stats
# If not specified, then default is:
# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"]
```

88
plugins/zfs/zfs.go Normal file
View File

@ -0,0 +1,88 @@
package zfs
import (
"path/filepath"
"strconv"
"strings"
"github.com/influxdb/telegraf/plugins"
"github.com/shirou/gopsutil/common"
)
type Zfs struct {
KstatPath string
KstatMetrics []string
}
var sampleConfig = `
# ZFS kstat path
# If not specified, then default is:
# kstatPath = "/proc/spl/kstat/zfs"
#
# By default, telegraf gather all zfs stats
# If not specified, then default is:
# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"]
`
func (z *Zfs) SampleConfig() string {
return sampleConfig
}
func (z *Zfs) Description() string {
return "Read metrics of ZFS from arcstats, zfetchstats and vdev_cache_stats"
}
func getTags(kstatPath string) map[string]string {
var pools string
poolsDirs, _ := filepath.Glob(kstatPath + "/*/io")
for _, poolDir := range poolsDirs {
poolDirSplit := strings.Split(poolDir, "/")
pool := poolDirSplit[len(poolDirSplit)-2]
if len(pools) != 0 {
pools += "::"
}
pools += pool
}
return map[string]string{"pools": pools}
}
func (z *Zfs) Gather(acc plugins.Accumulator) error {
kstatMetrics := z.KstatMetrics
if len(kstatMetrics) == 0 {
kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"}
}
kstatPath := z.KstatPath
if len(kstatPath) == 0 {
kstatPath = "/proc/spl/kstat/zfs"
}
tags := getTags(kstatPath)
for _, metric := range kstatMetrics {
lines, err := common.ReadLines(kstatPath + "/" + metric)
if err != nil {
panic(err)
}
for i, line := range lines {
if i == 0 || i == 1 {
continue
}
if len(line) < 1 {
continue
}
rawData := strings.Split(line, " ")
key := metric + "_" + rawData[0]
rawValue := rawData[len(rawData)-1]
value, _ := strconv.ParseInt(rawValue, 10, 64)
acc.Add(key, value, tags)
}
}
return nil
}
func init() {
plugins.Add("zfs", func() plugins.Plugin {
return &Zfs{}
})
}

603
plugins/zfs/zfs_test.go Normal file
View File

@ -0,0 +1,603 @@
package zfs
import (
"fmt"
"io/ioutil"
"os"
"testing"
"github.com/influxdb/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const arcstatsContents = `5 1 0x01 86 4128 23617128247 12081618582809582
name type data
hits 4 5968846374
misses 4 1659178751
demand_data_hits 4 4860247322
demand_data_misses 4 501499535
demand_metadata_hits 4 708608325
demand_metadata_misses 4 156591375
prefetch_data_hits 4 367047144
prefetch_data_misses 4 974529898
prefetch_metadata_hits 4 32943583
prefetch_metadata_misses 4 26557943
mru_hits 4 301176811
mru_ghost_hits 4 47066067
mfu_hits 4 5520612438
mfu_ghost_hits 4 45784009
deleted 4 1718937704
recycle_miss 4 481222994
mutex_miss 4 20575623
evict_skip 4 14655903906543
evict_l2_cached 4 145310202998272
evict_l2_eligible 4 16345402777088
evict_l2_ineligible 4 7437226893312
hash_elements 4 36617980
hash_elements_max 4 36618318
hash_collisions 4 554145157
hash_chains 4 4187651
hash_chain_max 4 26
p 4 13963222064
c 4 16381258376
c_min 4 4194304
c_max 4 16884125696
size 4 16319887096
hdr_size 4 42567864
data_size 4 60066304
meta_size 4 1701534208
other_size 4 1661543168
anon_size 4 94720
anon_evict_data 4 0
anon_evict_metadata 4 0
mru_size 4 973099008
mru_evict_data 4 9175040
mru_evict_metadata 4 32768
mru_ghost_size 4 32768
mru_ghost_evict_data 4 0
mru_ghost_evict_metadata 4 32768
mfu_size 4 788406784
mfu_evict_data 4 50881024
mfu_evict_metadata 4 81920
mfu_ghost_size 4 0
mfu_ghost_evict_data 4 0
mfu_ghost_evict_metadata 4 0
l2_hits 4 573868618
l2_misses 4 1085309718
l2_feeds 4 12182087
l2_rw_clash 4 9610
l2_read_bytes 4 32695938336768
l2_write_bytes 4 2826774778880
l2_writes_sent 4 4267687
l2_writes_done 4 4267687
l2_writes_error 4 0
l2_writes_hdr_miss 4 164
l2_evict_lock_retry 4 5
l2_evict_reading 4 0
l2_free_on_write 4 1606914
l2_cdata_free_on_write 4 1775
l2_abort_lowmem 4 83462
l2_cksum_bad 4 393860640
l2_io_error 4 53881460
l2_size 4 2471466648576
l2_asize 4 2461690072064
l2_hdr_size 4 12854175552
l2_compress_successes 4 12184849
l2_compress_zeros 4 0
l2_compress_failures 4 0
memory_throttle_count 4 0
duplicate_buffers 4 0
duplicate_buffers_size 4 0
duplicate_reads 4 0
memory_direct_count 4 5159942
memory_indirect_count 4 3034640
arc_no_grow 4 0
arc_tempreserve 4 0
arc_loaned_bytes 4 0
arc_prune 4 114554259559
arc_meta_used 4 16259820792
arc_meta_limit 4 12663094272
arc_meta_max 4 18327165696
`
const zfetchstatsContents = `3 1 0x01 11 528 23607270446 12081656848148208
name type data
hits 4 7812959060
misses 4 4154484207
colinear_hits 4 1366368
colinear_misses 4 4153117839
stride_hits 4 7309776732
stride_misses 4 222766182
reclaim_successes 4 107788388
reclaim_failures 4 4045329451
streams_resets 4 20989756
streams_noresets 4 503182328
bogus_streams 4 0
`
const vdev_cache_statsContents = `7 1 0x01 3 144 23617323692 12081684236238879
name type data
delegations 4 0
hits 4 0
misses 4 0
`
var testKstatPath = os.TempDir() + "/telegraf/proc/spl/kstat/zfs"
type metrics struct {
name string
value int64
}
func TestZfsGeneratesMetrics(t *testing.T) {
err := os.MkdirAll(testKstatPath, 0755)
require.NoError(t, err)
err = os.MkdirAll(testKstatPath+"/HOME", 0755)
require.NoError(t, err)
err = ioutil.WriteFile(testKstatPath+"/HOME/io", []byte(""), 0644)
require.NoError(t, err)
err = ioutil.WriteFile(testKstatPath+"/arcstats", []byte(arcstatsContents), 0644)
require.NoError(t, err)
err = ioutil.WriteFile(testKstatPath+"/zfetchstats", []byte(zfetchstatsContents), 0644)
require.NoError(t, err)
err = ioutil.WriteFile(testKstatPath+"/vdev_cache_stats", []byte(vdev_cache_statsContents), 0644)
require.NoError(t, err)
intMetrics := []*metrics{
{
name: "arcstats_hits",
value: 5968846374,
},
{
name: "arcstats_misses",
value: 1659178751,
},
{
name: "arcstats_demand_data_hits",
value: 4860247322,
},
{
name: "arcstats_demand_data_misses",
value: 501499535,
},
{
name: "arcstats_demand_metadata_hits",
value: 708608325,
},
{
name: "arcstats_demand_metadata_misses",
value: 156591375,
},
{
name: "arcstats_prefetch_data_hits",
value: 367047144,
},
{
name: "arcstats_prefetch_data_misses",
value: 974529898,
},
{
name: "arcstats_prefetch_metadata_hits",
value: 32943583,
},
{
name: "arcstats_prefetch_metadata_misses",
value: 26557943,
},
{
name: "arcstats_mru_hits",
value: 301176811,
},
{
name: "arcstats_mru_ghost_hits",
value: 47066067,
},
{
name: "arcstats_mfu_hits",
value: 5520612438,
},
{
name: "arcstats_mfu_ghost_hits",
value: 45784009,
},
{
name: "arcstats_deleted",
value: 1718937704,
},
{
name: "arcstats_recycle_miss",
value: 481222994,
},
{
name: "arcstats_mutex_miss",
value: 20575623,
},
{
name: "arcstats_evict_skip",
value: 14655903906543,
},
{
name: "arcstats_evict_l2_cached",
value: 145310202998272,
},
{
name: "arcstats_evict_l2_eligible",
value: 16345402777088,
},
{
name: "arcstats_evict_l2_ineligible",
value: 7437226893312,
},
{
name: "arcstats_hash_elements",
value: 36617980,
},
{
name: "arcstats_hash_elements_max",
value: 36618318,
},
{
name: "arcstats_hash_collisions",
value: 554145157,
},
{
name: "arcstats_hash_chains",
value: 4187651,
},
{
name: "arcstats_hash_chain_max",
value: 26,
},
{
name: "arcstats_p",
value: 13963222064,
},
{
name: "arcstats_c",
value: 16381258376,
},
{
name: "arcstats_c_min",
value: 4194304,
},
{
name: "arcstats_c_max",
value: 16884125696,
},
{
name: "arcstats_size",
value: 16319887096,
},
{
name: "arcstats_hdr_size",
value: 42567864,
},
{
name: "arcstats_data_size",
value: 60066304,
},
{
name: "arcstats_meta_size",
value: 1701534208,
},
{
name: "arcstats_other_size",
value: 1661543168,
},
{
name: "arcstats_anon_size",
value: 94720,
},
{
name: "arcstats_anon_evict_data",
value: 0,
},
{
name: "arcstats_anon_evict_metadata",
value: 0,
},
{
name: "arcstats_mru_size",
value: 973099008,
},
{
name: "arcstats_mru_evict_data",
value: 9175040,
},
{
name: "arcstats_mru_evict_metadata",
value: 32768,
},
{
name: "arcstats_mru_ghost_size",
value: 32768,
},
{
name: "arcstats_mru_ghost_evict_data",
value: 0,
},
{
name: "arcstats_mru_ghost_evict_metadata",
value: 32768,
},
{
name: "arcstats_mfu_size",
value: 788406784,
},
{
name: "arcstats_mfu_evict_data",
value: 50881024,
},
{
name: "arcstats_mfu_evict_metadata",
value: 81920,
},
{
name: "arcstats_mfu_ghost_size",
value: 0,
},
{
name: "arcstats_mfu_ghost_evict_data",
value: 0,
},
{
name: "arcstats_mfu_ghost_evict_metadata",
value: 0,
},
{
name: "arcstats_l2_hits",
value: 573868618,
},
{
name: "arcstats_l2_misses",
value: 1085309718,
},
{
name: "arcstats_l2_feeds",
value: 12182087,
},
{
name: "arcstats_l2_rw_clash",
value: 9610,
},
{
name: "arcstats_l2_read_bytes",
value: 32695938336768,
},
{
name: "arcstats_l2_write_bytes",
value: 2826774778880,
},
{
name: "arcstats_l2_writes_sent",
value: 4267687,
},
{
name: "arcstats_l2_writes_done",
value: 4267687,
},
{
name: "arcstats_l2_writes_error",
value: 0,
},
{
name: "arcstats_l2_writes_hdr_miss",
value: 164,
},
{
name: "arcstats_l2_evict_lock_retry",
value: 5,
},
{
name: "arcstats_l2_evict_reading",
value: 0,
},
{
name: "arcstats_l2_free_on_write",
value: 1606914,
},
{
name: "arcstats_l2_cdata_free_on_write",
value: 1775,
},
{
name: "arcstats_l2_abort_lowmem",
value: 83462,
},
{
name: "arcstats_l2_cksum_bad",
value: 393860640,
},
{
name: "arcstats_l2_io_error",
value: 53881460,
},
{
name: "arcstats_l2_size",
value: 2471466648576,
},
{
name: "arcstats_l2_asize",
value: 2461690072064,
},
{
name: "arcstats_l2_hdr_size",
value: 12854175552,
},
{
name: "arcstats_l2_compress_successes",
value: 12184849,
},
{
name: "arcstats_l2_compress_zeros",
value: 0,
},
{
name: "arcstats_l2_compress_failures",
value: 0,
},
{
name: "arcstats_memory_throttle_count",
value: 0,
},
{
name: "arcstats_duplicate_buffers",
value: 0,
},
{
name: "arcstats_duplicate_buffers_size",
value: 0,
},
{
name: "arcstats_duplicate_reads",
value: 0,
},
{
name: "arcstats_memory_direct_count",
value: 5159942,
},
{
name: "arcstats_memory_indirect_count",
value: 3034640,
},
{
name: "arcstats_arc_no_grow",
value: 0,
},
{
name: "arcstats_arc_tempreserve",
value: 0,
},
{
name: "arcstats_arc_loaned_bytes",
value: 0,
},
{
name: "arcstats_arc_prune",
value: 114554259559,
},
{
name: "arcstats_arc_meta_used",
value: 16259820792,
},
{
name: "arcstats_arc_meta_limit",
value: 12663094272,
},
{
name: "arcstats_arc_meta_max",
value: 18327165696,
},
{
name: "zfetchstats_hits",
value: 7812959060,
},
{
name: "zfetchstats_misses",
value: 4154484207,
},
{
name: "zfetchstats_colinear_hits",
value: 1366368,
},
{
name: "zfetchstats_colinear_misses",
value: 4153117839,
},
{
name: "zfetchstats_stride_hits",
value: 7309776732,
},
{
name: "zfetchstats_stride_misses",
value: 222766182,
},
{
name: "zfetchstats_reclaim_successes",
value: 107788388,
},
{
name: "zfetchstats_reclaim_failures",
value: 4045329451,
},
{
name: "zfetchstats_streams_resets",
value: 20989756,
},
{
name: "zfetchstats_streams_noresets",
value: 503182328,
},
{
name: "zfetchstats_bogus_streams",
value: 0,
},
{
name: "vdev_cache_stats_delegations",
value: 0,
},
{
name: "vdev_cache_stats_hits",
value: 0,
},
{
name: "vdev_cache_stats_misses",
value: 0,
},
}
var acc testutil.Accumulator
//one pool, all metrics
tags := map[string]string{
"pools": "HOME",
}
z := &Zfs{KstatPath: testKstatPath}
err = z.Gather(&acc)
require.NoError(t, err)
for _, metric := range intMetrics {
fmt.Println(metric.name)
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}
//two pools, all metrics
err = os.MkdirAll(testKstatPath+"/STORAGE", 0755)
require.NoError(t, err)
err = ioutil.WriteFile(testKstatPath+"/STORAGE/io", []byte(""), 0644)
require.NoError(t, err)
tags = map[string]string{
"pools": "HOME::STORAGE",
}
z = &Zfs{KstatPath: testKstatPath}
err = z.Gather(&acc)
require.NoError(t, err)
for _, metric := range intMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}
//two pools, one metric
z = &Zfs{KstatPath: testKstatPath, KstatMetrics: []string{"arcstats"}}
err = z.Gather(&acc)
require.NoError(t, err)
for _, metric := range intMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}
err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}