diff --git a/plugins/inputs/zfs/README.md b/plugins/inputs/zfs/README.md index 72510d45b..c6abc0600 100644 --- a/plugins/inputs/zfs/README.md +++ b/plugins/inputs/zfs/README.md @@ -1,227 +1,294 @@ -# Telegraf plugin: zfs +# ZFS plugin -Get ZFS stat from /proc/spl/kstat/zfs +This ZFS plugin provides metrics from your ZFS filesystems. It supports ZFS on +Linux and FreeBSD. It gets ZFS stat from `/proc/spl/kstat/zfs` on Linux and +from `sysctl` and `zpool` on FreeBSD. -# Measurements +### Configuration: -Meta: +```toml +[[inputs.zfs]] + ## ZFS kstat path. Ignored on FreeBSD + ## If not specified, then default is: + # kstatPath = "/proc/spl/kstat/zfs" -- tags: `pools=POOL1::POOL2` + ## By default, telegraf gather all zfs stats + ## If not specified, then default is: + # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] -Measurement names: + ## By default, don't gather zpool stats + # poolMetrics = false +``` -- arcstats_hits -- arcstats_misses +### Measurements & Fields: + +By default this plugin collects metrics about **Arc**, **Zfetch**, and +**Vdev cache**. All these metrics are either counters or measure sizes +in bytes. These metrics will be in the `zfs` measurement with the field +names listed bellow. + +If `poolMetrics` is enabled then additional metrics will be gathered for +each pool. + +- zfs + With fields listed bellow. + +#### Arc Stats + +- arcstats_allocated (FreeBSD only) +- arcstats_anon_evict_data (Linux only) +- arcstats_anon_evict_metadata (Linux only) +- arcstats_anon_evictable_data (FreeBSD only) +- arcstats_anon_evictable_metadata (FreeBSD only) +- arcstats_anon_size +- arcstats_arc_loaned_bytes (Linux only) +- arcstats_arc_meta_limit +- arcstats_arc_meta_max +- arcstats_arc_meta_min (FreeBSD only) +- arcstats_arc_meta_used +- arcstats_arc_no_grow (Linux only) +- arcstats_arc_prune (Linux only) +- arcstats_arc_tempreserve (Linux only) +- arcstats_c +- arcstats_c_max +- arcstats_c_min +- arcstats_data_size +- arcstats_deleted - arcstats_demand_data_hits - arcstats_demand_data_misses +- arcstats_demand_hit_predictive_prefetch (FreeBSD only) - arcstats_demand_metadata_hits - arcstats_demand_metadata_misses +- arcstats_duplicate_buffers +- arcstats_duplicate_buffers_size +- arcstats_duplicate_reads +- arcstats_evict_l2_cached +- arcstats_evict_l2_eligible +- arcstats_evict_l2_ineligible +- arcstats_evict_l2_skip (FreeBSD only) +- arcstats_evict_not_enough (FreeBSD only) +- arcstats_evict_skip +- arcstats_hash_chain_max +- arcstats_hash_chains +- arcstats_hash_collisions +- arcstats_hash_elements +- arcstats_hash_elements_max +- arcstats_hdr_size +- arcstats_hits +- arcstats_l2_abort_lowmem +- arcstats_l2_asize +- arcstats_l2_cdata_free_on_write +- arcstats_l2_cksum_bad +- arcstats_l2_compress_failures +- arcstats_l2_compress_successes +- arcstats_l2_compress_zeros +- arcstats_l2_evict_l1cached (FreeBSD only) +- arcstats_l2_evict_lock_retry +- arcstats_l2_evict_reading +- arcstats_l2_feeds +- arcstats_l2_free_on_write +- arcstats_l2_hdr_size +- arcstats_l2_hits +- arcstats_l2_io_error +- arcstats_l2_misses +- arcstats_l2_read_bytes +- arcstats_l2_rw_clash +- arcstats_l2_size +- arcstats_l2_write_buffer_bytes_scanned (FreeBSD only) +- arcstats_l2_write_buffer_iter (FreeBSD only) +- arcstats_l2_write_buffer_list_iter (FreeBSD only) +- arcstats_l2_write_buffer_list_null_iter (FreeBSD only) +- arcstats_l2_write_bytes +- arcstats_l2_write_full (FreeBSD only) +- arcstats_l2_write_in_l2 (FreeBSD only) +- arcstats_l2_write_io_in_progress (FreeBSD only) +- arcstats_l2_write_not_cacheable (FreeBSD only) +- arcstats_l2_write_passed_headroom (FreeBSD only) +- arcstats_l2_write_pios (FreeBSD only) +- arcstats_l2_write_spa_mismatch (FreeBSD only) +- arcstats_l2_write_trylock_fail (FreeBSD only) +- arcstats_l2_writes_done +- arcstats_l2_writes_error +- arcstats_l2_writes_hdr_miss (Linux only) +- arcstats_l2_writes_lock_retry (FreeBSD only) +- arcstats_l2_writes_sent +- arcstats_memory_direct_count (Linux only) +- arcstats_memory_indirect_count (Linux only) +- arcstats_memory_throttle_count +- arcstats_meta_size (Linux only) +- arcstats_mfu_evict_data (Linux only) +- arcstats_mfu_evict_metadata (Linux only) +- arcstats_mfu_ghost_evict_data (Linux only) +- arcstats_mfu_ghost_evict_metadata (Linux only) +- arcstats_metadata_size (FreeBSD only) +- arcstats_mfu_evictable_data (FreeBSD only) +- arcstats_mfu_evictable_metadata (FreeBSD only) +- arcstats_mfu_ghost_evictable_data (FreeBSD only) +- arcstats_mfu_ghost_evictable_metadata (FreeBSD only) +- arcstats_mfu_ghost_hits +- arcstats_mfu_ghost_size +- arcstats_mfu_hits +- arcstats_mfu_size +- arcstats_misses +- arcstats_mru_evict_data (Linux only) +- arcstats_mru_evict_metadata (Linux only) +- arcstats_mru_ghost_evict_data (Linux only) +- arcstats_mru_ghost_evict_metadata (Linux only) +- arcstats_mru_evictable_data (FreeBSD only) +- arcstats_mru_evictable_metadata (FreeBSD only) +- arcstats_mru_ghost_evictable_data (FreeBSD only) +- arcstats_mru_ghost_evictable_metadata (FreeBSD only) +- arcstats_mru_ghost_hits +- arcstats_mru_ghost_size +- arcstats_mru_hits +- arcstats_mru_size +- arcstats_mutex_miss +- arcstats_other_size +- arcstats_p - arcstats_prefetch_data_hits - arcstats_prefetch_data_misses - arcstats_prefetch_metadata_hits - arcstats_prefetch_metadata_misses -- arcstats_mru_hits -- arcstats_mru_ghost_hits -- arcstats_mfu_hits -- arcstats_mfu_ghost_hits -- arcstats_deleted -- arcstats_recycle_miss -- arcstats_mutex_miss -- arcstats_evict_skip -- arcstats_evict_l2_cached -- arcstats_evict_l2_eligible -- arcstats_evict_l2_ineligible -- arcstats_hash_elements -- arcstats_hash_elements_max -- arcstats_hash_collisions -- arcstats_hash_chains -- arcstats_hash_chain_max -- arcstats_p -- arcstats_c -- arcstats_c_min -- arcstats_c_max +- arcstats_recycle_miss (Linux only) - arcstats_size -- arcstats_hdr_size -- arcstats_data_size -- arcstats_meta_size -- arcstats_other_size -- arcstats_anon_size -- arcstats_anon_evict_data -- arcstats_anon_evict_metadata -- arcstats_mru_size -- arcstats_mru_evict_data -- arcstats_mru_evict_metadata -- arcstats_mru_ghost_size -- arcstats_mru_ghost_evict_data -- arcstats_mru_ghost_evict_metadata -- arcstats_mfu_size -- arcstats_mfu_evict_data -- arcstats_mfu_evict_metadata -- arcstats_mfu_ghost_size -- arcstats_mfu_ghost_evict_data -- arcstats_mfu_ghost_evict_metadata -- arcstats_l2_hits -- arcstats_l2_misses -- arcstats_l2_feeds -- arcstats_l2_rw_clash -- arcstats_l2_read_bytes -- arcstats_l2_write_bytes -- arcstats_l2_writes_sent -- arcstats_l2_writes_done -- arcstats_l2_writes_error -- arcstats_l2_writes_hdr_miss -- arcstats_l2_evict_lock_retry -- arcstats_l2_evict_reading -- arcstats_l2_free_on_write -- arcstats_l2_cdata_free_on_write -- arcstats_l2_abort_lowmem -- arcstats_l2_cksum_bad -- arcstats_l2_io_error -- arcstats_l2_size -- arcstats_l2_asize -- arcstats_l2_hdr_size -- arcstats_l2_compress_successes -- arcstats_l2_compress_zeros -- arcstats_l2_compress_failures -- arcstats_memory_throttle_count -- arcstats_duplicate_buffers -- arcstats_duplicate_buffers_size -- arcstats_duplicate_reads -- arcstats_memory_direct_count -- arcstats_memory_indirect_count -- arcstats_arc_no_grow -- arcstats_arc_tempreserve -- arcstats_arc_loaned_bytes -- arcstats_arc_prune -- arcstats_arc_meta_used -- arcstats_arc_meta_limit -- arcstats_arc_meta_max +- arcstats_sync_wait_for_async (FreeBSD only) + +#### Zfetch Stats + +- zfetchstats_bogus_streams (Linux only) +- zfetchstats_colinear_hits (Linux only) +- zfetchstats_colinear_misses (Linux only) - zfetchstats_hits +- zfetchstats_max_streams (FreeBSD only) - zfetchstats_misses -- zfetchstats_colinear_hits -- zfetchstats_colinear_misses -- zfetchstats_stride_hits -- zfetchstats_stride_misses -- zfetchstats_reclaim_successes -- zfetchstats_reclaim_failures -- zfetchstats_streams_resets -- zfetchstats_streams_noresets -- zfetchstats_bogus_streams +- zfetchstats_reclaim_failures (Linux only) +- zfetchstats_reclaim_successes (Linux only) +- zfetchstats_streams_noresets (Linux only) +- zfetchstats_streams_resets (Linux only) +- zfetchstats_stride_hits (Linux only) +- zfetchstats_stride_misses (Linux only) + +#### Vdev Cache Stats + - vdev_cache_stats_delegations - vdev_cache_stats_hits - vdev_cache_stats_misses +#### Pool Metrics (optional) + +On Linux: + +- zfs_pool + - nread (integer, ) + - nwritten (integer, ) + - reads (integer, ) + - writes (integer, ) + - wtime (integer, ) + - wlentime (integer, ) + - wupdate (integer, ) + - rtime (integer, ) + - rlentime (integer, ) + - rupdate (integer, ) + - wcnt (integer, ) + - rcnt (integer, ) + +On FreeBSD: + +- zfs_pool + - allocated (integer, bytes) + - capacity (integer, bytes) + - dedupratio (float, ratio) + - free (integer, bytes) + - size (integer, bytes) + - fragmentation (integer, percent) + +### Tags: + +- ZFS stats (`zfs`) will have the following tag: + - pools - A `::` concatenated list of all ZFS pools on the machine. + +- Pool metrics (`zfs_pool`) will have the following tag: + - pool - with the name of the pool which the metrics are for. + - health - the health status of the pool. (FreeBSD only) + +### Example Output: + +``` +$ ./telegraf -config telegraf.conf -input-filter zfs -test +* Plugin: zfs, Collection 1 +> zfs_pool,health=ONLINE,pool=zroot allocated=1578590208i,capacity=2i,dedupratio=1,fragmentation=1i,free=64456531968i,size=66035122176i 1464473103625653908 +> zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i 1464473103634124908 +``` + ### Description -``` -arcstats_hits - Total amount of cache hits in the arc. +A short description for some of the metrics. -arcstats_misses - Total amount of cache misses in the arc. +#### Arc Stats -arcstats_demand_data_hits - Amount of cache hits for demand data, this is what matters (is good) for your application/share. +`arcstats_hits` Total amount of cache hits in the arc. -arcstats_demand_data_misses - Amount of cache misses for demand data, this is what matters (is bad) for your application/share. +`arcstats_misses` Total amount of cache misses in the arc. -arcstats_demand_metadata_hits - Ammount of cache hits for demand metadata, this matters (is good) for getting filesystem data (ls,find,…) +`arcstats_demand_data_hits` Amount of cache hits for demand data, this is what matters (is good) for your application/share. -arcstats_demand_metadata_misses - Ammount of cache misses for demand metadata, this matters (is bad) for getting filesystem data (ls,find,…) +`arcstats_demand_data_misses` Amount of cache misses for demand data, this is what matters (is bad) for your application/share. -arcstats_prefetch_data_hits - The zfs prefetcher tried to prefetch somethin, but it was allready cached (boring) +`arcstats_demand_metadata_hits` Amount of cache hits for demand metadata, this matters (is good) for getting filesystem data (ls,find,…) -arcstats_prefetch_data_misses - The zfs prefetcher prefetched something which was not in the cache (good job, could become a demand hit in the future) +`arcstats_demand_metadata_misses` Amount of cache misses for demand metadata, this matters (is bad) for getting filesystem data (ls,find,…) -arcstats_prefetch_metadata_hits - Same as above, but for metadata +`arcstats_prefetch_data_hits` The zfs prefetcher tried to prefetch something, but it was already cached (boring) -arcstats_prefetch_metadata_misses - Same as above, but for metadata +`arcstats_prefetch_data_misses` The zfs prefetcher prefetched something which was not in the cache (good job, could become a demand hit in the future) -arcstats_mru_hits - Cache hit in the “most recently used cache”, we move this to the mfu cache. +`arcstats_prefetch_metadata_hits` Same as above, but for metadata -arcstats_mru_ghost_hits - Cache hit in the “most recently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mru cache size. +`arcstats_prefetch_metadata_misses` Same as above, but for metadata -arcstats_mfu_hits - Cache hit in the “most freqently used cache” we move this to the begining of the mfu cache. +`arcstats_mru_hits` Cache hit in the “most recently used cache”, we move this to the mfu cache. -arcstats_mfu_ghost_hits - Cache hit in the “most frequently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mfu cache size. +`arcstats_mru_ghost_hits` Cache hit in the “most recently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mru cache size. -arcstats_allocated - New data is written to the cache. +`arcstats_mfu_hits` Cache hit in the “most frequently used cache” we move this to the beginning of the mfu cache. -arcstats_deleted - Old data is evicted (deleted) from the cache. +`arcstats_mfu_ghost_hits` Cache hit in the “most frequently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mfu cache size. -arcstats_evict_l2_cached - We evicted something from the arc, but its still cached in the l2 if we need it. +`arcstats_allocated` New data is written to the cache. -arcstats_evict_l2_eligible - We evicted something from the arc, and it’s not in the l2 this is sad. (maybe we hadn’t had enough time to store it there) +`arcstats_deleted` Old data is evicted (deleted) from the cache. -arcstats_evict_l2_ineligible - We evicted something which cannot be stored in the l2. - Reasons could be: - We have multiple pools, we evicted something from a pool whithot an l2 device. - The zfs property secondarycache. +`arcstats_evict_l2_cached` We evicted something from the arc, but its still cached in the l2 if we need it. -arcstats_c - Arc target size, this is the size the system thinks the arc should have. +`arcstats_evict_l2_eligible` We evicted something from the arc, and it’s not in the l2 this is sad. (maybe we hadn’t had enough time to store it there) -arcstats_size - Total size of the arc. +`arcstats_evict_l2_ineligible` We evicted something which cannot be stored in the l2. + Reasons could be: + - We have multiple pools, we evicted something from a pool whithout an l2 device. + - The zfs property secondary cache. -arcstats_l2_hits - Hits to the L2 cache. (It was not in the arc, but in the l2 cache) +`arcstats_c` Arc target size, this is the size the system thinks the arc should have. -arcstats_l2_misses - Miss to the L2 cache. (It was not in the arc, and not in the l2 cache) +`arcstats_size` Total size of the arc. -arcstats_l2_size - Size of the l2 cache. +`arcstats_l2_hits` Hits to the L2 cache. (It was not in the arc, but in the l2 cache) -arcstats_l2_hdr_size - Size of the metadata in the arc (ram) used to manage (lookup if someting is in the l2) the l2 cache. +`arcstats_l2_misses` Miss to the L2 cache. (It was not in the arc, and not in the l2 cache) +`arcstats_l2_size` Size of the l2 cache. +`arcstats_l2_hdr_size` Size of the metadata in the arc (ram) used to manage (lookup if something is in the l2) the l2 cache. -zfetchstats_hits - Counts the number of cache hits, to items wich are in the cache because of the prefetcher. +#### Zfetch Stats -zfetchstats_colinear_hits - Counts the number of cache hits, to items wich are in the cache because of the prefetcher (prefetched linear reads) +`zfetchstats_hits` Counts the number of cache hits, to items which are in the cache because of the prefetcher. -zfetchstats_stride_hits - Counts the number of cache hits, to items wich are in the cache because of the prefetcher (prefetched stride reads) +`zfetchstats_colinear_hits` Counts the number of cache hits, to items which are in the cache because of the prefetcher (prefetched linear reads) +`zfetchstats_stride_hits` Counts the number of cache hits, to items which are in the cache because of the prefetcher (prefetched stride reads) +#### Vdev Cache Stats -vdev_cache_stats_hits - Hits to the vdev (device level) cache. - -vdev_cache_stats_misses - Misses to the vdev (device level) cache. -``` - -# Default config - -``` -[zfs] - # ZFS kstat path - # If not specified, then default is: - # kstatPath = "/proc/spl/kstat/zfs" - # - # By default, telegraf gather all zfs stats - # If not specified, then default is: - # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] -``` +`vdev_cache_stats_hits` Hits to the vdev (device level) cache. +`vdev_cache_stats_misses` Misses to the vdev (device level) cache. diff --git a/plugins/inputs/zfs/zfs.go b/plugins/inputs/zfs/zfs.go index bcbe03e95..05ca346b0 100644 --- a/plugins/inputs/zfs/zfs.go +++ b/plugins/inputs/zfs/zfs.go @@ -1,38 +1,27 @@ package zfs -import ( - "fmt" - "path/filepath" - "strconv" - "strings" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/internal" - "github.com/influxdata/telegraf/plugins/inputs" -) +type Sysctl func(metric string) ([]string, error) +type Zpool func() ([]string, error) type Zfs struct { KstatPath string KstatMetrics []string PoolMetrics bool -} - -type poolInfo struct { - name string - ioFilename string + sysctl Sysctl + zpool Zpool } var sampleConfig = ` - ## ZFS kstat path + ## ZFS kstat path. Ignored on FreeBSD ## If not specified, then default is: - kstatPath = "/proc/spl/kstat/zfs" + # kstatPath = "/proc/spl/kstat/zfs" ## By default, telegraf gather all zfs stats ## If not specified, then default is: - kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] + # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] ## By default, don't gather zpool stats - poolMetrics = false + # poolMetrics = false ` func (z *Zfs) SampleConfig() string { @@ -40,117 +29,5 @@ func (z *Zfs) SampleConfig() string { } func (z *Zfs) Description() string { - return "Read metrics of ZFS from arcstats, zfetchstats and vdev_cache_stats" -} - -func getPools(kstatPath string) []poolInfo { - pools := make([]poolInfo, 0) - poolsDirs, _ := filepath.Glob(kstatPath + "/*/io") - - for _, poolDir := range poolsDirs { - poolDirSplit := strings.Split(poolDir, "/") - pool := poolDirSplit[len(poolDirSplit)-2] - pools = append(pools, poolInfo{name: pool, ioFilename: poolDir}) - } - - return pools -} - -func getTags(pools []poolInfo) map[string]string { - var poolNames string - - for _, pool := range pools { - if len(poolNames) != 0 { - poolNames += "::" - } - poolNames += pool.name - } - - return map[string]string{"pools": poolNames} -} - -func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error { - lines, err := internal.ReadLines(pool.ioFilename) - if err != nil { - return err - } - - if len(lines) != 3 { - return err - } - - keys := strings.Fields(lines[1]) - values := strings.Fields(lines[2]) - - keyCount := len(keys) - - if keyCount != len(values) { - return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values) - } - - tag := map[string]string{"pool": pool.name} - fields := make(map[string]interface{}) - for i := 0; i < keyCount; i++ { - value, err := strconv.ParseInt(values[i], 10, 64) - if err != nil { - return err - } - fields[keys[i]] = value - } - acc.AddFields("zfs_pool", fields, tag) - - return nil -} - -func (z *Zfs) Gather(acc telegraf.Accumulator) error { - kstatMetrics := z.KstatMetrics - if len(kstatMetrics) == 0 { - kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"} - } - - kstatPath := z.KstatPath - if len(kstatPath) == 0 { - kstatPath = "/proc/spl/kstat/zfs" - } - - pools := getPools(kstatPath) - tags := getTags(pools) - - if z.PoolMetrics { - for _, pool := range pools { - err := gatherPoolStats(pool, acc) - if err != nil { - return err - } - } - } - - fields := make(map[string]interface{}) - for _, metric := range kstatMetrics { - lines, err := internal.ReadLines(kstatPath + "/" + metric) - if err != nil { - return err - } - for i, line := range lines { - if i == 0 || i == 1 { - continue - } - if len(line) < 1 { - continue - } - rawData := strings.Split(line, " ") - key := metric + "_" + rawData[0] - rawValue := rawData[len(rawData)-1] - value, _ := strconv.ParseInt(rawValue, 10, 64) - fields[key] = value - } - } - acc.AddFields("zfs", fields, tags) - return nil -} - -func init() { - inputs.Add("zfs", func() telegraf.Input { - return &Zfs{} - }) + return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools" } diff --git a/plugins/inputs/zfs/zfs_freebsd.go b/plugins/inputs/zfs/zfs_freebsd.go new file mode 100644 index 000000000..7ee72a140 --- /dev/null +++ b/plugins/inputs/zfs/zfs_freebsd.go @@ -0,0 +1,140 @@ +// +build freebsd + +package zfs + +import ( + "bytes" + "fmt" + "os/exec" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +func (z *Zfs) gatherPoolStats(acc telegraf.Accumulator) (string, error) { + + lines, err := z.zpool() + if err != nil { + return "", err + } + + pools := []string{} + for _, line := range lines { + col := strings.Split(line, "\t") + + pools = append(pools, col[0]) + } + + if z.PoolMetrics { + for _, line := range lines { + col := strings.Split(line, "\t") + tags := map[string]string{"pool": col[0], "health": col[8]} + fields := map[string]interface{}{} + + size, err := strconv.ParseInt(col[1], 10, 64) + if err != nil { + return "", fmt.Errorf("Error parsing size: %s", err) + } + fields["size"] = size + + alloc, err := strconv.ParseInt(col[2], 10, 64) + if err != nil { + return "", fmt.Errorf("Error parsing allocation: %s", err) + } + fields["allocated"] = alloc + + free, err := strconv.ParseInt(col[3], 10, 64) + if err != nil { + return "", fmt.Errorf("Error parsing free: %s", err) + } + fields["free"] = free + + frag, err := strconv.ParseInt(strings.TrimSuffix(col[5], "%"), 10, 0) + if err != nil { // This might be - for RO devs + frag = 0 + } + fields["fragmentation"] = frag + + capval, err := strconv.ParseInt(col[6], 10, 0) + if err != nil { + return "", fmt.Errorf("Error parsing capacity: %s", err) + } + fields["capacity"] = capval + + dedup, err := strconv.ParseFloat(strings.TrimSuffix(col[7], "x"), 32) + if err != nil { + return "", fmt.Errorf("Error parsing dedupratio: %s", err) + } + fields["dedupratio"] = dedup + + acc.AddFields("zfs_pool", fields, tags) + } + } + + return strings.Join(pools, "::"), nil +} + +func (z *Zfs) Gather(acc telegraf.Accumulator) error { + kstatMetrics := z.KstatMetrics + if len(kstatMetrics) == 0 { + kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"} + } + + tags := map[string]string{} + poolNames, err := z.gatherPoolStats(acc) + if err != nil { + return err + } + tags["pools"] = poolNames + + fields := make(map[string]interface{}) + for _, metric := range kstatMetrics { + stdout, err := z.sysctl(metric) + if err != nil { + return err + } + for _, line := range stdout { + rawData := strings.Split(line, ": ") + key := metric + "_" + strings.Split(rawData[0], ".")[4] + value, _ := strconv.ParseInt(rawData[1], 10, 64) + fields[key] = value + } + } + acc.AddFields("zfs", fields, tags) + return nil +} + +func run(command string, args ...string) ([]string, error) { + cmd := exec.Command(command, args...) + var outbuf, errbuf bytes.Buffer + cmd.Stdout = &outbuf + cmd.Stderr = &errbuf + err := cmd.Run() + + stdout := strings.TrimSpace(outbuf.String()) + stderr := strings.TrimSpace(errbuf.String()) + + if _, ok := err.(*exec.ExitError); ok { + return nil, fmt.Errorf("%s error: %s", command, stderr) + } + return strings.Split(stdout, "\n"), nil +} + +func zpool() ([]string, error) { + return run("zpool", []string{"list", "-Hp"}...) +} + +func sysctl(metric string) ([]string, error) { + return run("sysctl", []string{"-q", fmt.Sprintf("kstat.zfs.misc.%s", metric)}...) +} + +func init() { + inputs.Add("zfs", func() telegraf.Input { + return &Zfs{ + sysctl: sysctl, + zpool: zpool, + } + }) +} diff --git a/plugins/inputs/zfs/zfs_freebsd_test.go b/plugins/inputs/zfs/zfs_freebsd_test.go new file mode 100644 index 000000000..193c2816f --- /dev/null +++ b/plugins/inputs/zfs/zfs_freebsd_test.go @@ -0,0 +1,148 @@ +// +build freebsd + +package zfs + +import ( + "fmt" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/require" +) + +// $ zpool list -Hp +var zpool_output = []string{ + "freenas-boot 30601641984 2022177280 28579464704 - - 6 1.00x ONLINE -", + "red1 8933531975680 1126164848640 7807367127040 - 8% 12 1.83x ONLINE /mnt", + "temp1 2989297238016 1626309320704 1362987917312 - 38% 54 1.28x ONLINE /mnt", + "temp2 2989297238016 626958278656 2362338959360 - 12% 20 1.00x ONLINE /mnt", +} + +func mock_zpool() ([]string, error) { + return zpool_output, nil +} + +// sysctl -q kstat.zfs.misc.arcstats + +// sysctl -q kstat.zfs.misc.vdev_cache_stats +var kstat_vdev_cache_stats_output = []string{ + "kstat.zfs.misc.vdev_cache_stats.misses: 87789", + "kstat.zfs.misc.vdev_cache_stats.hits: 465583", + "kstat.zfs.misc.vdev_cache_stats.delegations: 6952", +} + +// sysctl -q kstat.zfs.misc.zfetchstats +var kstat_zfetchstats_output = []string{ + "kstat.zfs.misc.zfetchstats.max_streams: 0", + "kstat.zfs.misc.zfetchstats.misses: 0", + "kstat.zfs.misc.zfetchstats.hits: 0", +} + +func mock_sysctl(metric string) ([]string, error) { + if metric == "vdev_cache_stats" { + return kstat_vdev_cache_stats_output, nil + } + if metric == "zfetchstats" { + return kstat_zfetchstats_output, nil + } + return []string{}, fmt.Errorf("Invalid arg") +} + +func TestZfsPoolMetrics(t *testing.T) { + var acc testutil.Accumulator + + z := &Zfs{ + KstatMetrics: []string{"vdev_cache_stats"}, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err := z.Gather(&acc) + require.NoError(t, err) + + require.False(t, acc.HasMeasurement("zfs_pool")) + acc.Metrics = nil + + z = &Zfs{ + KstatMetrics: []string{"vdev_cache_stats"}, + PoolMetrics: true, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err = z.Gather(&acc) + require.NoError(t, err) + + //one pool, all metrics + tags := map[string]string{ + "pool": "freenas-boot", + "health": "ONLINE", + } + + poolMetrics := getFreeNasBootPoolMetrics() + + acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags) +} + +func TestZfsGeneratesMetrics(t *testing.T) { + var acc testutil.Accumulator + + z := &Zfs{ + KstatMetrics: []string{"vdev_cache_stats"}, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err := z.Gather(&acc) + require.NoError(t, err) + + //four pool, vdev_cache_stats metrics + tags := map[string]string{ + "pools": "freenas-boot::red1::temp1::temp2", + } + intMetrics := getKstatMetricsVdevOnly() + + acc.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) + + acc.Metrics = nil + + z = &Zfs{ + KstatMetrics: []string{"zfetchstats", "vdev_cache_stats"}, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err = z.Gather(&acc) + require.NoError(t, err) + + //four pool, vdev_cache_stats and zfetchstatus metrics + intMetrics = getKstatMetricsVdevAndZfetch() + + acc.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) +} + +func getFreeNasBootPoolMetrics() map[string]interface{} { + return map[string]interface{}{ + "allocated": int64(2022177280), + "capacity": int64(6), + "dedupratio": float64(1), + "free": int64(28579464704), + "size": int64(30601641984), + "fragmentation": int64(0), + } +} + +func getKstatMetricsVdevOnly() map[string]interface{} { + return map[string]interface{}{ + "vdev_cache_stats_misses": int64(87789), + "vdev_cache_stats_hits": int64(465583), + "vdev_cache_stats_delegations": int64(6952), + } +} + +func getKstatMetricsVdevAndZfetch() map[string]interface{} { + return map[string]interface{}{ + "vdev_cache_stats_misses": int64(87789), + "vdev_cache_stats_hits": int64(465583), + "vdev_cache_stats_delegations": int64(6952), + "zfetchstats_max_streams": int64(0), + "zfetchstats_misses": int64(0), + "zfetchstats_hits": int64(0), + } +} diff --git a/plugins/inputs/zfs/zfs_linux.go b/plugins/inputs/zfs/zfs_linux.go new file mode 100644 index 000000000..71ec7e5dc --- /dev/null +++ b/plugins/inputs/zfs/zfs_linux.go @@ -0,0 +1,131 @@ +// +build linux + +package zfs + +import ( + "fmt" + "path/filepath" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type poolInfo struct { + name string + ioFilename string +} + +func getPools(kstatPath string) []poolInfo { + pools := make([]poolInfo, 0) + poolsDirs, _ := filepath.Glob(kstatPath + "/*/io") + + for _, poolDir := range poolsDirs { + poolDirSplit := strings.Split(poolDir, "/") + pool := poolDirSplit[len(poolDirSplit)-2] + pools = append(pools, poolInfo{name: pool, ioFilename: poolDir}) + } + + return pools +} + +func getTags(pools []poolInfo) map[string]string { + var poolNames string + + for _, pool := range pools { + if len(poolNames) != 0 { + poolNames += "::" + } + poolNames += pool.name + } + + return map[string]string{"pools": poolNames} +} + +func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error { + lines, err := internal.ReadLines(pool.ioFilename) + if err != nil { + return err + } + + if len(lines) != 3 { + return err + } + + keys := strings.Fields(lines[1]) + values := strings.Fields(lines[2]) + + keyCount := len(keys) + + if keyCount != len(values) { + return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values) + } + + tag := map[string]string{"pool": pool.name} + fields := make(map[string]interface{}) + for i := 0; i < keyCount; i++ { + value, err := strconv.ParseInt(values[i], 10, 64) + if err != nil { + return err + } + fields[keys[i]] = value + } + acc.AddFields("zfs_pool", fields, tag) + + return nil +} + +func (z *Zfs) Gather(acc telegraf.Accumulator) error { + kstatMetrics := z.KstatMetrics + if len(kstatMetrics) == 0 { + kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"} + } + + kstatPath := z.KstatPath + if len(kstatPath) == 0 { + kstatPath = "/proc/spl/kstat/zfs" + } + + pools := getPools(kstatPath) + tags := getTags(pools) + + if z.PoolMetrics { + for _, pool := range pools { + err := gatherPoolStats(pool, acc) + if err != nil { + return err + } + } + } + + fields := make(map[string]interface{}) + for _, metric := range kstatMetrics { + lines, err := internal.ReadLines(kstatPath + "/" + metric) + if err != nil { + return err + } + for i, line := range lines { + if i == 0 || i == 1 { + continue + } + if len(line) < 1 { + continue + } + rawData := strings.Split(line, " ") + key := metric + "_" + rawData[0] + rawValue := rawData[len(rawData)-1] + value, _ := strconv.ParseInt(rawValue, 10, 64) + fields[key] = value + } + } + acc.AddFields("zfs", fields, tags) + return nil +} + +func init() { + inputs.Add("zfs", func() telegraf.Input { + return &Zfs{} + }) +} diff --git a/plugins/inputs/zfs/zfs_test.go b/plugins/inputs/zfs/zfs_linux_test.go similarity index 99% rename from plugins/inputs/zfs/zfs_test.go rename to plugins/inputs/zfs/zfs_linux_test.go index 03179ba59..c4db75ff5 100644 --- a/plugins/inputs/zfs/zfs_test.go +++ b/plugins/inputs/zfs/zfs_linux_test.go @@ -1,3 +1,5 @@ +// +build linux + package zfs import (