Merge latest version from github

This commit is contained in:
dom
2020-06-06 17:21:53 +02:00
1098 changed files with 159832 additions and 39770 deletions

View File

@@ -2,7 +2,9 @@ package all
import (
_ "github.com/influxdata/telegraf/plugins/aggregators/basicstats"
_ "github.com/influxdata/telegraf/plugins/aggregators/final"
_ "github.com/influxdata/telegraf/plugins/aggregators/histogram"
_ "github.com/influxdata/telegraf/plugins/aggregators/merge"
_ "github.com/influxdata/telegraf/plugins/aggregators/minmax"
_ "github.com/influxdata/telegraf/plugins/aggregators/valuecounter"
)

View File

@@ -1,6 +1,6 @@
# BasicStats Aggregator Plugin
The BasicStats aggregator plugin give us count,max,min,mean,sum,s2(variance), stdev for a set of values,
The BasicStats aggregator plugin give us count,diff,max,min,mean,non_negative_diff,sum,s2(variance), stdev for a set of values,
emitting the aggregate every `period` seconds.
### Configuration:
@@ -8,9 +8,6 @@ emitting the aggregate every `period` seconds.
```toml
# Keep the aggregate basicstats of each metric passing through.
[[aggregators.basicstats]]
## General Aggregator Arguments:
## The period on which to flush & clear the aggregator.
period = "30s"
@@ -18,23 +15,23 @@ emitting the aggregate every `period` seconds.
## aggregator and will not get sent to the output plugins.
drop_original = false
## BasicStats Arguments:
## Configures which basic stats to push as fields
stats = ["count","min","max","mean","stdev","s2","sum"]
# stats = ["count","diff","min","max","mean","non_negative_diff","stdev","s2","sum"]
```
- stats
- If not specified, then `count`, `min`, `max`, `mean`, `stdev`, and `s2` are aggregated and pushed as fields. `sum` is not aggregated by default to maintain backwards compatibility.
- If not specified, then `count`, `min`, `max`, `mean`, `stdev`, and `s2` are aggregated and pushed as fields. `sum`, `diff` and `non_negative_diff` are not aggregated by default to maintain backwards compatibility.
- If empty array, no stats are aggregated
### Measurements & Fields:
- measurement1
- field1_count
- field1_diff (difference)
- field1_max
- field1_min
- field1_mean
- field1_non_negative_diff (non-negative difference)
- field1_sum
- field1_s2 (variance)
- field1_stdev (standard deviation)
@@ -49,8 +46,8 @@ No tags are applied by this aggregator.
$ telegraf --config telegraf.conf --quiet
system,host=tars load1=1 1475583980000000000
system,host=tars load1=1 1475583990000000000
system,host=tars load1_count=2,load1_max=1,load1_min=1,load1_mean=1,load1_sum=2,load1_s2=0,load1_stdev=0 1475584010000000000
system,host=tars load1_count=2,load1_diff=0,load1_max=1,load1_min=1,load1_mean=1,load1_sum=2,load1_s2=0,load1_stdev=0 1475584010000000000
system,host=tars load1=1 1475584020000000000
system,host=tars load1=3 1475584030000000000
system,host=tars load1_count=2,load1_max=3,load1_min=1,load1_mean=2,load1_sum=4,load1_s2=2,load1_stdev=1.414162 1475584010000000000
system,host=tars load1_count=2,load1_diff=2,load1_max=3,load1_min=1,load1_mean=2,load1_sum=4,load1_s2=2,load1_stdev=1.414162 1475584010000000000
```

View File

@@ -1,7 +1,6 @@
package basicstats
import (
"log"
"math"
"github.com/influxdata/telegraf"
@@ -10,25 +9,28 @@ import (
type BasicStats struct {
Stats []string `toml:"stats"`
Log telegraf.Logger
cache map[uint64]aggregate
statsConfig *configuredStats
}
type configuredStats struct {
count bool
min bool
max bool
mean bool
variance bool
stdev bool
sum bool
count bool
min bool
max bool
mean bool
variance bool
stdev bool
sum bool
diff bool
non_negative_diff bool
}
func NewBasicStats() *BasicStats {
mm := &BasicStats{}
mm.Reset()
return mm
return &BasicStats{
cache: make(map[uint64]aggregate),
}
}
type aggregate struct {
@@ -43,65 +45,74 @@ type basicstats struct {
max float64
sum float64
mean float64
M2 float64 //intermedia value for variance/stdev
diff float64
M2 float64 //intermediate value for variance/stdev
LAST float64 //intermediate value for diff
}
var sampleConfig = `
## General Aggregator Arguments:
## The period on which to flush & clear the aggregator.
period = "30s"
## If true, the original metric will be dropped by the
## aggregator and will not get sent to the output plugins.
drop_original = false
## Configures which basic stats to push as fields
# stats = ["count", "min", "max", "mean", "stdev", "s2", "sum"]
`
func (m *BasicStats) SampleConfig() string {
func (*BasicStats) SampleConfig() string {
return sampleConfig
}
func (m *BasicStats) Description() string {
func (*BasicStats) Description() string {
return "Keep the aggregate basicstats of each metric passing through."
}
func (m *BasicStats) Add(in telegraf.Metric) {
func (b *BasicStats) Add(in telegraf.Metric) {
id := in.HashID()
if _, ok := m.cache[id]; !ok {
if _, ok := b.cache[id]; !ok {
// hit an uncached metric, create caches for first time:
a := aggregate{
name: in.Name(),
tags: in.Tags(),
fields: make(map[string]basicstats),
}
for k, v := range in.Fields() {
if fv, ok := convert(v); ok {
a.fields[k] = basicstats{
for _, field := range in.FieldList() {
if fv, ok := convert(field.Value); ok {
a.fields[field.Key] = basicstats{
count: 1,
min: fv,
max: fv,
mean: fv,
sum: fv,
diff: 0.0,
M2: 0.0,
LAST: fv,
}
}
}
m.cache[id] = a
b.cache[id] = a
} else {
for k, v := range in.Fields() {
if fv, ok := convert(v); ok {
if _, ok := m.cache[id].fields[k]; !ok {
for _, field := range in.FieldList() {
if fv, ok := convert(field.Value); ok {
if _, ok := b.cache[id].fields[field.Key]; !ok {
// hit an uncached field of a cached metric
m.cache[id].fields[k] = basicstats{
b.cache[id].fields[field.Key] = basicstats{
count: 1,
min: fv,
max: fv,
mean: fv,
sum: fv,
diff: 0.0,
M2: 0.0,
LAST: fv,
}
continue
}
tmp := m.cache[id].fields[k]
tmp := b.cache[id].fields[field.Key]
//https://en.m.wikipedia.org/wiki/Algorithms_for_calculating_variance
//variable initialization
x := fv
@@ -125,34 +136,33 @@ func (m *BasicStats) Add(in telegraf.Metric) {
}
//sum compute
tmp.sum += fv
//diff compute
tmp.diff = fv - tmp.LAST
//store final data
m.cache[id].fields[k] = tmp
b.cache[id].fields[field.Key] = tmp
}
}
}
}
func (m *BasicStats) Push(acc telegraf.Accumulator) {
config := getConfiguredStats(m)
for _, aggregate := range m.cache {
func (b *BasicStats) Push(acc telegraf.Accumulator) {
for _, aggregate := range b.cache {
fields := map[string]interface{}{}
for k, v := range aggregate.fields {
if config.count {
if b.statsConfig.count {
fields[k+"_count"] = v.count
}
if config.min {
if b.statsConfig.min {
fields[k+"_min"] = v.min
}
if config.max {
if b.statsConfig.max {
fields[k+"_max"] = v.max
}
if config.mean {
if b.statsConfig.mean {
fields[k+"_mean"] = v.mean
}
if config.sum {
if b.statsConfig.sum {
fields[k+"_sum"] = v.sum
}
@@ -160,12 +170,19 @@ func (m *BasicStats) Push(acc telegraf.Accumulator) {
if v.count > 1 {
variance := v.M2 / (v.count - 1)
if config.variance {
if b.statsConfig.variance {
fields[k+"_s2"] = variance
}
if config.stdev {
if b.statsConfig.stdev {
fields[k+"_stdev"] = math.Sqrt(variance)
}
if b.statsConfig.diff {
fields[k+"_diff"] = v.diff
}
if b.statsConfig.non_negative_diff && v.diff >= 0 {
fields[k+"_non_negative_diff"] = v.diff
}
}
//if count == 1 StdDev = infinite => so I won't send data
}
@@ -176,14 +193,12 @@ func (m *BasicStats) Push(acc telegraf.Accumulator) {
}
}
func parseStats(names []string) *configuredStats {
// member function for logging.
func (b *BasicStats) parseStats() *configuredStats {
parsed := &configuredStats{}
for _, name := range names {
for _, name := range b.Stats {
switch name {
case "count":
parsed.count = true
case "min":
@@ -198,46 +213,38 @@ func parseStats(names []string) *configuredStats {
parsed.stdev = true
case "sum":
parsed.sum = true
case "diff":
parsed.diff = true
case "non_negative_diff":
parsed.non_negative_diff = true
default:
log.Printf("W! Unrecognized basic stat '%s', ignoring", name)
b.Log.Warnf("Unrecognized basic stat %q, ignoring", name)
}
}
return parsed
}
func defaultStats() *configuredStats {
defaults := &configuredStats{}
defaults.count = true
defaults.min = true
defaults.max = true
defaults.mean = true
defaults.variance = true
defaults.stdev = true
defaults.sum = false
return defaults
}
func getConfiguredStats(m *BasicStats) *configuredStats {
if m.statsConfig == nil {
if m.Stats == nil {
m.statsConfig = defaultStats()
} else {
m.statsConfig = parseStats(m.Stats)
func (b *BasicStats) getConfiguredStats() {
if b.Stats == nil {
b.statsConfig = &configuredStats{
count: true,
min: true,
max: true,
mean: true,
variance: true,
stdev: true,
sum: false,
non_negative_diff: false,
}
} else {
b.statsConfig = b.parseStats()
}
return m.statsConfig
}
func (m *BasicStats) Reset() {
m.cache = make(map[uint64]aggregate)
func (b *BasicStats) Reset() {
b.cache = make(map[uint64]aggregate)
}
func convert(in interface{}) (float64, bool) {
@@ -253,6 +260,12 @@ func convert(in interface{}) (float64, bool) {
}
}
func (b *BasicStats) Init() error {
b.getConfiguredStats()
return nil
}
func init() {
aggregators.Add("basicstats", func() telegraf.Aggregator {
return NewBasicStats()

View File

@@ -17,6 +17,7 @@ var m1, _ = metric.New("m1",
"b": int64(1),
"c": float64(2),
"d": float64(2),
"g": int64(3),
},
time.Now(),
)
@@ -31,12 +32,15 @@ var m2, _ = metric.New("m1",
"f": uint64(200),
"ignoreme": "string",
"andme": true,
"g": int64(1),
},
time.Now(),
)
func BenchmarkApply(b *testing.B) {
minmax := NewBasicStats()
minmax.Log = testutil.Logger{}
minmax.getConfiguredStats()
for n := 0; n < b.N; n++ {
minmax.Add(m1)
@@ -48,6 +52,8 @@ func BenchmarkApply(b *testing.B) {
func TestBasicStatsWithPeriod(t *testing.T) {
acc := testutil.Accumulator{}
minmax := NewBasicStats()
minmax.Log = testutil.Logger{}
minmax.getConfiguredStats()
minmax.Add(m1)
minmax.Add(m2)
@@ -86,6 +92,12 @@ func TestBasicStatsWithPeriod(t *testing.T) {
"f_max": float64(200),
"f_min": float64(200),
"f_mean": float64(200),
"g_count": float64(2), //g
"g_max": float64(3),
"g_min": float64(1),
"g_mean": float64(2),
"g_s2": float64(2),
"g_stdev": math.Sqrt(2),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -98,6 +110,8 @@ func TestBasicStatsWithPeriod(t *testing.T) {
func TestBasicStatsDifferentPeriods(t *testing.T) {
acc := testutil.Accumulator{}
minmax := NewBasicStats()
minmax.Log = testutil.Logger{}
minmax.getConfiguredStats()
minmax.Add(m1)
minmax.Push(&acc)
@@ -118,6 +132,10 @@ func TestBasicStatsDifferentPeriods(t *testing.T) {
"d_max": float64(2),
"d_min": float64(2),
"d_mean": float64(2),
"g_count": float64(1), //g
"g_max": float64(3),
"g_min": float64(3),
"g_mean": float64(3),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -153,6 +171,10 @@ func TestBasicStatsDifferentPeriods(t *testing.T) {
"f_max": float64(200),
"f_min": float64(200),
"f_mean": float64(200),
"g_count": float64(1), //g
"g_max": float64(1),
"g_min": float64(1),
"g_mean": float64(1),
}
expectedTags = map[string]string{
"foo": "bar",
@@ -165,6 +187,8 @@ func TestBasicStatsWithOnlyCount(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"count"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -179,6 +203,7 @@ func TestBasicStatsWithOnlyCount(t *testing.T) {
"d_count": float64(2),
"e_count": float64(1),
"f_count": float64(1),
"g_count": float64(2),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -191,6 +216,8 @@ func TestBasicStatsWithOnlyMin(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"min"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -205,6 +232,7 @@ func TestBasicStatsWithOnlyMin(t *testing.T) {
"d_min": float64(2),
"e_min": float64(200),
"f_min": float64(200),
"g_min": float64(1),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -217,6 +245,8 @@ func TestBasicStatsWithOnlyMax(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"max"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -231,6 +261,7 @@ func TestBasicStatsWithOnlyMax(t *testing.T) {
"d_max": float64(6),
"e_max": float64(200),
"f_max": float64(200),
"g_max": float64(3),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -243,6 +274,8 @@ func TestBasicStatsWithOnlyMean(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"mean"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -257,6 +290,7 @@ func TestBasicStatsWithOnlyMean(t *testing.T) {
"d_mean": float64(4),
"e_mean": float64(200),
"f_mean": float64(200),
"g_mean": float64(2),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -269,6 +303,8 @@ func TestBasicStatsWithOnlySum(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"sum"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -283,6 +319,7 @@ func TestBasicStatsWithOnlySum(t *testing.T) {
"d_sum": float64(8),
"e_sum": float64(200),
"f_sum": float64(200),
"g_sum": float64(4),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -291,7 +328,7 @@ func TestBasicStatsWithOnlySum(t *testing.T) {
}
// Verify that sum doesn't suffer from floating point errors. Early
// implementations of sum were calulated from mean and count, which
// implementations of sum were calculated from mean and count, which
// e.g. summed "1, 1, 5, 1" as "7.999999..." instead of 8.
func TestBasicStatsWithOnlySumFloatingPointErrata(t *testing.T) {
@@ -326,6 +363,8 @@ func TestBasicStatsWithOnlySumFloatingPointErrata(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"sum"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(sum1)
aggregator.Add(sum2)
@@ -347,6 +386,8 @@ func TestBasicStatsWithOnlyVariance(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"s2"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -359,6 +400,7 @@ func TestBasicStatsWithOnlyVariance(t *testing.T) {
"b_s2": float64(2),
"c_s2": float64(2),
"d_s2": float64(8),
"g_s2": float64(2),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -371,6 +413,8 @@ func TestBasicStatsWithOnlyStandardDeviation(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"stdev"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -383,6 +427,7 @@ func TestBasicStatsWithOnlyStandardDeviation(t *testing.T) {
"b_stdev": math.Sqrt(2),
"c_stdev": math.Sqrt(2),
"d_stdev": math.Sqrt(8),
"g_stdev": math.Sqrt(2),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -395,6 +440,8 @@ func TestBasicStatsWithMinAndMax(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"min", "max"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -415,6 +462,61 @@ func TestBasicStatsWithMinAndMax(t *testing.T) {
"e_min": float64(200),
"f_max": float64(200), //f
"f_min": float64(200),
"g_max": float64(3), //g
"g_min": float64(1),
}
expectedTags := map[string]string{
"foo": "bar",
}
acc.AssertContainsTaggedFields(t, "m1", expectedFields, expectedTags)
}
// Test only aggregating diff
func TestBasicStatsWithDiff(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"diff"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
acc := testutil.Accumulator{}
aggregator.Push(&acc)
expectedFields := map[string]interface{}{
"a_diff": float64(0),
"b_diff": float64(2),
"c_diff": float64(2),
"d_diff": float64(4),
"g_diff": float64(-2),
}
expectedTags := map[string]string{
"foo": "bar",
}
acc.AssertContainsTaggedFields(t, "m1", expectedFields, expectedTags)
}
// Test only aggregating non_negative_diff
func TestBasicStatsWithNonNegativeDiff(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"non_negative_diff"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
acc := testutil.Accumulator{}
aggregator.Push(&acc)
expectedFields := map[string]interface{}{
"a_non_negative_diff": float64(0),
"b_non_negative_diff": float64(2),
"c_non_negative_diff": float64(2),
"d_non_negative_diff": float64(4),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -426,7 +528,9 @@ func TestBasicStatsWithMinAndMax(t *testing.T) {
func TestBasicStatsWithAllStats(t *testing.T) {
acc := testutil.Accumulator{}
minmax := NewBasicStats()
minmax.Log = testutil.Logger{}
minmax.Stats = []string{"count", "min", "max", "mean", "stdev", "s2", "sum"}
minmax.getConfiguredStats()
minmax.Add(m1)
minmax.Add(m2)
@@ -471,6 +575,13 @@ func TestBasicStatsWithAllStats(t *testing.T) {
"f_min": float64(200),
"f_mean": float64(200),
"f_sum": float64(200),
"g_count": float64(2), //g
"g_max": float64(3),
"g_min": float64(1),
"g_mean": float64(2),
"g_s2": float64(2),
"g_stdev": math.Sqrt(2),
"g_sum": float64(4),
}
expectedTags := map[string]string{
"foo": "bar",
@@ -483,6 +594,8 @@ func TestBasicStatsWithNoStats(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -498,6 +611,8 @@ func TestBasicStatsWithUnknownStat(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Stats = []string{"crazy"}
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)
@@ -509,12 +624,14 @@ func TestBasicStatsWithUnknownStat(t *testing.T) {
}
// Test that if Stats isn't supplied, then we only do count, min, max, mean,
// stdev, and s2. We purposely exclude sum for backwards compatability,
// stdev, and s2. We purposely exclude sum for backwards compatibility,
// otherwise user's working systems will suddenly (and surprisingly) start
// capturing sum without their input.
func TestBasicStatsWithDefaultStats(t *testing.T) {
aggregator := NewBasicStats()
aggregator.Log = testutil.Logger{}
aggregator.getConfiguredStats()
aggregator.Add(m1)
aggregator.Add(m2)

View File

@@ -0,0 +1,48 @@
# Final Aggregator Plugin
The final aggregator emits the last metric of a contiguous series. A
contiguous series is defined as a series which receives updates within the
time period in `series_timeout`. The contiguous series may be longer than the
time interval defined by `period`.
This is useful for getting the final value for data sources that produce
discrete time series such as procstat, cgroup, kubernetes etc.
When a series has not been updated within the time defined in
`series_timeout`, the last metric is emitted with the `_final` appended.
### Configuration
```toml
[[aggregators.final]]
## The period on which to flush & clear the aggregator.
period = "30s"
## If true, the original metric will be dropped by the
## aggregator and will not get sent to the output plugins.
drop_original = false
## The time that a series is not updated until considering it final.
series_timeout = "5m"
```
### Metrics
Measurement and tags are unchanged, fields are emitted with the suffix
`_final`.
### Example Output
```
counter,host=bar i_final=3,j_final=6 1554281635115090133
counter,host=foo i_final=3,j_final=6 1554281635112992012
```
Original input:
```
counter,host=bar i=1,j=4 1554281633101153300
counter,host=foo i=1,j=4 1554281633099323601
counter,host=bar i=2,j=5 1554281634107980073
counter,host=foo i=2,j=5 1554281634105931116
counter,host=bar i=3,j=6 1554281635115090133
counter,host=foo i=3,j=6 1554281635112992012
```

View File

@@ -0,0 +1,72 @@
package final
import (
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/aggregators"
)
var sampleConfig = `
## The period on which to flush & clear the aggregator.
period = "30s"
## If true, the original metric will be dropped by the
## aggregator and will not get sent to the output plugins.
drop_original = false
## The time that a series is not updated until considering it final.
series_timeout = "5m"
`
type Final struct {
SeriesTimeout internal.Duration `toml:"series_timeout"`
// The last metric for all series which are active
metricCache map[uint64]telegraf.Metric
}
func NewFinal() *Final {
return &Final{
SeriesTimeout: internal.Duration{Duration: 5 * time.Minute},
metricCache: make(map[uint64]telegraf.Metric),
}
}
func (m *Final) SampleConfig() string {
return sampleConfig
}
func (m *Final) Description() string {
return "Report the final metric of a series"
}
func (m *Final) Add(in telegraf.Metric) {
id := in.HashID()
m.metricCache[id] = in
}
func (m *Final) Push(acc telegraf.Accumulator) {
// Preserve timestamp of original metric
acc.SetPrecision(time.Nanosecond)
for id, metric := range m.metricCache {
if time.Since(metric.Time()) > m.SeriesTimeout.Duration {
fields := map[string]interface{}{}
for _, field := range metric.FieldList() {
fields[field.Key+"_final"] = field.Value
}
acc.AddFields(metric.Name(), fields, metric.Tags(), metric.Time())
delete(m.metricCache, id)
}
}
}
func (m *Final) Reset() {
}
func init() {
aggregators.Add("final", func() telegraf.Aggregator {
return NewFinal()
})
}

View File

@@ -0,0 +1,144 @@
package final
import (
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
)
func TestSimple(t *testing.T) {
acc := testutil.Accumulator{}
final := NewFinal()
tags := map[string]string{"foo": "bar"}
m1, _ := metric.New("m1",
tags,
map[string]interface{}{"a": int64(1)},
time.Unix(1530939936, 0))
m2, _ := metric.New("m1",
tags,
map[string]interface{}{"a": int64(2)},
time.Unix(1530939937, 0))
m3, _ := metric.New("m1",
tags,
map[string]interface{}{"a": int64(3)},
time.Unix(1530939938, 0))
final.Add(m1)
final.Add(m2)
final.Add(m3)
final.Push(&acc)
expected := []telegraf.Metric{
testutil.MustMetric(
"m1",
tags,
map[string]interface{}{
"a_final": 3,
},
time.Unix(1530939938, 0),
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics())
}
func TestTwoTags(t *testing.T) {
acc := testutil.Accumulator{}
final := NewFinal()
tags1 := map[string]string{"foo": "bar"}
tags2 := map[string]string{"foo": "baz"}
m1, _ := metric.New("m1",
tags1,
map[string]interface{}{"a": int64(1)},
time.Unix(1530939936, 0))
m2, _ := metric.New("m1",
tags2,
map[string]interface{}{"a": int64(2)},
time.Unix(1530939937, 0))
m3, _ := metric.New("m1",
tags1,
map[string]interface{}{"a": int64(3)},
time.Unix(1530939938, 0))
final.Add(m1)
final.Add(m2)
final.Add(m3)
final.Push(&acc)
expected := []telegraf.Metric{
testutil.MustMetric(
"m1",
tags2,
map[string]interface{}{
"a_final": 2,
},
time.Unix(1530939937, 0),
),
testutil.MustMetric(
"m1",
tags1,
map[string]interface{}{
"a_final": 3,
},
time.Unix(1530939938, 0),
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.SortMetrics())
}
func TestLongDifference(t *testing.T) {
acc := testutil.Accumulator{}
final := NewFinal()
final.SeriesTimeout = internal.Duration{Duration: 30 * time.Second}
tags := map[string]string{"foo": "bar"}
now := time.Now()
m1, _ := metric.New("m",
tags,
map[string]interface{}{"a": int64(1)},
now.Add(time.Second*-290))
m2, _ := metric.New("m",
tags,
map[string]interface{}{"a": int64(2)},
now.Add(time.Second*-275))
m3, _ := metric.New("m",
tags,
map[string]interface{}{"a": int64(3)},
now.Add(time.Second*-100))
m4, _ := metric.New("m",
tags,
map[string]interface{}{"a": int64(4)},
now.Add(time.Second*-20))
final.Add(m1)
final.Add(m2)
final.Push(&acc)
final.Add(m3)
final.Push(&acc)
final.Add(m4)
final.Push(&acc)
expected := []telegraf.Metric{
testutil.MustMetric(
"m",
tags,
map[string]interface{}{
"a_final": 2,
},
now.Add(time.Second*-275),
),
testutil.MustMetric(
"m",
tags,
map[string]interface{}{
"a_final": 3,
},
now.Add(time.Second*-100),
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.SortMetrics())
}

View File

@@ -3,19 +3,21 @@
The histogram aggregator plugin creates histograms containing the counts of
field values within a range.
Values added to a bucket are also added to the larger buckets in the
distribution. This creates a [cumulative histogram](https://en.wikipedia.org/wiki/Histogram#/media/File:Cumulative_vs_normal_histogram.svg).
If `cumulative` is set to true, values added to a bucket are also added to the
larger buckets in the distribution. This creates a [cumulative histogram](https://en.wikipedia.org/wiki/Histogram#/media/File:Cumulative_vs_normal_histogram.svg).
Otherwise, values are added to only one bucket, which creates an [ordinary histogram](https://en.wikipedia.org/wiki/Histogram#/media/File:Cumulative_vs_normal_histogram.svg)
Like other Telegraf aggregators, the metric is emitted every `period` seconds.
Bucket counts however are not reset between periods and will be non-strictly
increasing while Telegraf is running.
By default bucket counts are not reset between periods and will be non-strictly
increasing while Telegraf is running. This behavior can be changed by setting the
`reset` parameter to true.
#### Design
Each metric is passed to the aggregator and this aggregator searches
histogram buckets for those fields, which have been specified in the
config. If buckets are found, the aggregator will increment +1 to the appropriate
bucket otherwise it will be added to the `+Inf` bucket. Every `period`
bucket. Otherwise, it will be added to the `+Inf` bucket. Every `period`
seconds this data will be forwarded to the outputs.
The algorithm of hit counting to buckets was implemented on the base
@@ -34,16 +36,24 @@ of the algorithm which is implemented in the Prometheus
## aggregator and will not get sent to the output plugins.
drop_original = false
## If true, the histogram will be reset on flush instead
## of accumulating the results.
reset = false
## Whether bucket values should be accumulated. If set to false, "gt" tag will be added.
## Defaults to true.
cumulative = true
## Example config that aggregates all fields of the metric.
# [[aggregators.histogram.config]]
# ## The set of buckets.
# ## Right borders of buckets (with +Inf implicitly added).
# buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
# ## The name of metric.
# measurement_name = "cpu"
## Example config that aggregates only specific fields of the metric.
# [[aggregators.histogram.config]]
# ## The set of buckets.
# ## Right borders of buckets (with +Inf implicitly added).
# buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
# ## The name of metric.
# measurement_name = "diskio"
@@ -59,8 +69,9 @@ option. Optionally, if `fields` is set only the fields listed will be
aggregated. If `fields` is not set all fields are aggregated.
The `buckets` option contains a list of floats which specify the bucket
boundaries. Each float value defines the inclusive upper bound of the bucket.
boundaries. Each float value defines the inclusive upper (right) bound of the bucket.
The `+Inf` bucket is added automatically and does not need to be defined.
(For left boundaries, these specified bucket borders and `-Inf` will be used).
### Measurements & Fields:
@@ -72,26 +83,43 @@ The postfix `bucket` will be added to each field key.
### Tags:
All measurements are given the tag `le`. This tag has the border value of
bucket. It means that the metric value is less than or equal to the value of
this tag. For example, let assume that we have the metric value 10 and the
following buckets: [5, 10, 30, 70, 100]. Then the tag `le` will have the value
10, because the metrics value is passed into bucket with right border value
`10`.
* `cumulative = true` (default):
* `le`: Right bucket border. It means that the metric value is less than or
equal to the value of this tag. If a metric value is sorted into a bucket,
it is also sorted into all larger buckets. As a result, the value of
`<field>_bucket` is rising with rising `le` value. When `le` is `+Inf`,
the bucket value is the count of all metrics, because all metric values are
less than or equal to positive infinity.
* `cumulative = false`:
* `gt`: Left bucket border. It means that the metric value is greater than
(and not equal to) the value of this tag.
* `le`: Right bucket border. It means that the metric value is less than or
equal to the value of this tag.
* As both `gt` and `le` are present, each metric is sorted in only exactly
one bucket.
### Example Output:
Let assume we have the buckets [0, 10, 50, 100] and the following field values
for `usage_idle`: [50, 7, 99, 12]
With `cumulative = true`:
```
cpu,cpu=cpu1,host=localhost,le=0.0 usage_idle_bucket=0i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=10.0 usage_idle_bucket=0i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=20.0 usage_idle_bucket=1i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=30.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=40.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=50.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=60.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=70.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=80.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=90.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=100.0 usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=+Inf usage_idle_bucket=2i 1486998330000000000
cpu,cpu=cpu1,host=localhost,le=0.0 usage_idle_bucket=0i 1486998330000000000 # none
cpu,cpu=cpu1,host=localhost,le=10.0 usage_idle_bucket=1i 1486998330000000000 # 7
cpu,cpu=cpu1,host=localhost,le=50.0 usage_idle_bucket=2i 1486998330000000000 # 7, 12
cpu,cpu=cpu1,host=localhost,le=100.0 usage_idle_bucket=4i 1486998330000000000 # 7, 12, 50, 99
cpu,cpu=cpu1,host=localhost,le=+Inf usage_idle_bucket=4i 1486998330000000000 # 7, 12, 50, 99
```
With `cumulative = false`:
```
cpu,cpu=cpu1,host=localhost,gt=-Inf,le=0.0 usage_idle_bucket=0i 1486998330000000000 # none
cpu,cpu=cpu1,host=localhost,gt=0.0,le=10.0 usage_idle_bucket=1i 1486998330000000000 # 7
cpu,cpu=cpu1,host=localhost,gt=10.0,le=50.0 usage_idle_bucket=1i 1486998330000000000 # 12
cpu,cpu=cpu1,host=localhost,gt=50.0,le=100.0 usage_idle_bucket=2i 1486998330000000000 # 50, 99
cpu,cpu=cpu1,host=localhost,gt=100.0,le=+Inf usage_idle_bucket=0i 1486998330000000000 # none
```

View File

@@ -8,15 +8,23 @@ import (
"github.com/influxdata/telegraf/plugins/aggregators"
)
// bucketTag is the tag, which contains right bucket border
const bucketTag = "le"
// bucketRightTag is the tag, which contains right bucket border
const bucketRightTag = "le"
// bucketInf is the right bucket border for infinite values
const bucketInf = "+Inf"
// bucketPosInf is the right bucket border for infinite values
const bucketPosInf = "+Inf"
// bucketLeftTag is the tag, which contains left bucket border (exclusive)
const bucketLeftTag = "gt"
// bucketNegInf is the left bucket border for infinite values
const bucketNegInf = "-Inf"
// HistogramAggregator is aggregator with histogram configs and particular histograms for defined metrics
type HistogramAggregator struct {
Configs []config `toml:"config"`
Configs []config `toml:"config"`
ResetBuckets bool `toml:"reset"`
Cumulative bool `toml:"cumulative"`
buckets bucketsByMetrics
cache map[uint64]metricHistogramCollection
@@ -56,8 +64,10 @@ type groupedByCountFields struct {
}
// NewHistogramAggregator creates new histogram aggregator
func NewHistogramAggregator() telegraf.Aggregator {
h := &HistogramAggregator{}
func NewHistogramAggregator() *HistogramAggregator {
h := &HistogramAggregator{
Cumulative: true,
}
h.buckets = make(bucketsByMetrics)
h.resetCache()
@@ -72,16 +82,24 @@ var sampleConfig = `
## aggregator and will not get sent to the output plugins.
drop_original = false
## If true, the histogram will be reset on flush instead
## of accumulating the results.
reset = false
## Whether bucket values should be accumulated. If set to false, "gt" tag will be added.
## Defaults to true.
cumulative = true
## Example config that aggregates all fields of the metric.
# [[aggregators.histogram.config]]
# ## The set of buckets.
# ## Right borders of buckets (with +Inf implicitly added).
# buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
# ## The name of metric.
# measurement_name = "cpu"
## Example config that aggregates only specific fields of the metric.
# [[aggregators.histogram.config]]
# ## The set of buckets.
# ## Right borders of buckets (with +Inf implicitly added).
# buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
# ## The name of metric.
# measurement_name = "diskio"
@@ -162,18 +180,27 @@ func (h *HistogramAggregator) groupFieldsByBuckets(
tags map[string]string,
counts []int64,
) {
count := int64(0)
for index, bucket := range h.getBuckets(name, field) {
count += counts[index]
sum := int64(0)
buckets := h.getBuckets(name, field) // note that len(buckets) + 1 == len(counts)
tags[bucketTag] = strconv.FormatFloat(bucket, 'f', -1, 64)
h.groupField(metricsWithGroupedFields, name, field, count, copyTags(tags))
for index, count := range counts {
if !h.Cumulative {
sum = 0 // reset sum -> don't store cumulative counts
tags[bucketLeftTag] = bucketNegInf
if index > 0 {
tags[bucketLeftTag] = strconv.FormatFloat(buckets[index-1], 'f', -1, 64)
}
}
tags[bucketRightTag] = bucketPosInf
if index < len(buckets) {
tags[bucketRightTag] = strconv.FormatFloat(buckets[index], 'f', -1, 64)
}
sum += count
h.groupField(metricsWithGroupedFields, name, field, sum, copyTags(tags))
}
count += counts[len(counts)-1]
tags[bucketTag] = bucketInf
h.groupField(metricsWithGroupedFields, name, field, count, tags)
}
// groupField groups field by count value
@@ -201,9 +228,15 @@ func (h *HistogramAggregator) groupField(
)
}
// Reset does nothing, because we need to collect counts for a long time, otherwise if config parameter 'reset' has
// small value, we will get a histogram with a small amount of the distribution.
func (h *HistogramAggregator) Reset() {}
// Reset does nothing by default, because we typically need to collect counts for a long time.
// Otherwise if config parameter 'reset' has 'true' value, we will get a histogram
// with a small amount of the distribution. However in some use cases a reset is useful.
func (h *HistogramAggregator) Reset() {
if h.ResetBuckets {
h.resetCache()
h.buckets = make(bucketsByMetrics)
}
}
// resetCache resets cached counts(hits) in the buckets
func (h *HistogramAggregator) resetCache() {

View File

@@ -11,11 +11,15 @@ import (
"github.com/stretchr/testify/assert"
)
type fields map[string]interface{}
type tags map[string]string
// NewTestHistogram creates new test histogram aggregation with specified config
func NewTestHistogram(cfg []config) telegraf.Aggregator {
htm := &HistogramAggregator{Configs: cfg}
htm.buckets = make(bucketsByMetrics)
htm.resetCache()
func NewTestHistogram(cfg []config, reset bool, cumulative bool) telegraf.Aggregator {
htm := NewHistogramAggregator()
htm.Configs = cfg
htm.ResetBuckets = reset
htm.Cumulative = cumulative
return htm
}
@@ -23,8 +27,8 @@ func NewTestHistogram(cfg []config) telegraf.Aggregator {
// firstMetric1 is the first test metric
var firstMetric1, _ = metric.New(
"first_metric_name",
map[string]string{"tag_name": "tag_value"},
map[string]interface{}{
tags{},
fields{
"a": float64(15.3),
"b": float64(40),
},
@@ -34,8 +38,8 @@ var firstMetric1, _ = metric.New(
// firstMetric1 is the first test metric with other value
var firstMetric2, _ = metric.New(
"first_metric_name",
map[string]string{"tag_name": "tag_value"},
map[string]interface{}{
tags{},
fields{
"a": float64(15.9),
"c": float64(40),
},
@@ -45,8 +49,8 @@ var firstMetric2, _ = metric.New(
// secondMetric is the second metric
var secondMetric, _ = metric.New(
"second_metric_name",
map[string]string{"tag_name": "tag_value"},
map[string]interface{}{
tags{},
fields{
"a": float64(105),
"ignoreme": "string",
"andme": true,
@@ -65,35 +69,84 @@ func BenchmarkApply(b *testing.B) {
}
}
// TestHistogramWithPeriodAndOneField tests metrics for one period and for one field
func TestHistogramWithPeriodAndOneField(t *testing.T) {
// TestHistogram tests metrics for one period and for one field
func TestHistogram(t *testing.T) {
var cfg []config
cfg = append(cfg, config{Metric: "first_metric_name", Fields: []string{"a"}, Buckets: []float64{0.0, 10.0, 20.0, 30.0, 40.0}})
histogram := NewTestHistogram(cfg)
histogram := NewTestHistogram(cfg, false, true)
acc := &testutil.Accumulator{}
histogram.Add(firstMetric1)
histogram.Reset()
histogram.Add(firstMetric2)
histogram.Push(acc)
if len(acc.Metrics) != 6 {
assert.Fail(t, "Incorrect number of metrics")
}
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0)}, "0")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0)}, "10")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2)}, "20")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2)}, "30")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2)}, "40")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2)}, bucketInf)
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketRightTag: "10"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2)}, tags{bucketRightTag: "20"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2)}, tags{bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2)}, tags{bucketRightTag: "40"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2)}, tags{bucketRightTag: bucketPosInf})
}
// TestHistogramWithPeriodAndAllFields tests two metrics for one period and for all fields
func TestHistogramWithPeriodAndAllFields(t *testing.T) {
// TestHistogramNonCumulative tests metrics for one period and for one field
func TestHistogramNonCumulative(t *testing.T) {
var cfg []config
cfg = append(cfg, config{Metric: "first_metric_name", Fields: []string{"a"}, Buckets: []float64{0.0, 10.0, 20.0, 30.0, 40.0}})
histogram := NewTestHistogram(cfg, false, false)
acc := &testutil.Accumulator{}
histogram.Add(firstMetric1)
histogram.Reset()
histogram.Add(firstMetric2)
histogram.Push(acc)
if len(acc.Metrics) != 6 {
assert.Fail(t, "Incorrect number of metrics")
}
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketLeftTag: bucketNegInf, bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketLeftTag: "0", bucketRightTag: "10"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2)}, tags{bucketLeftTag: "10", bucketRightTag: "20"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketLeftTag: "20", bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketLeftTag: "30", bucketRightTag: "40"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketLeftTag: "40", bucketRightTag: bucketPosInf})
}
// TestHistogramWithReset tests metrics for one period and for one field, with reset between metrics adding
func TestHistogramWithReset(t *testing.T) {
var cfg []config
cfg = append(cfg, config{Metric: "first_metric_name", Fields: []string{"a"}, Buckets: []float64{0.0, 10.0, 20.0, 30.0, 40.0}})
histogram := NewTestHistogram(cfg, true, true)
acc := &testutil.Accumulator{}
histogram.Add(firstMetric1)
histogram.Reset()
histogram.Add(firstMetric2)
histogram.Push(acc)
if len(acc.Metrics) != 6 {
assert.Fail(t, "Incorrect number of metrics")
}
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0)}, tags{bucketRightTag: "10"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1)}, tags{bucketRightTag: "20"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1)}, tags{bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1)}, tags{bucketRightTag: "40"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1)}, tags{bucketRightTag: bucketPosInf})
}
// TestHistogramWithAllFields tests two metrics for one period and for all fields
func TestHistogramWithAllFields(t *testing.T) {
var cfg []config
cfg = append(cfg, config{Metric: "first_metric_name", Buckets: []float64{0.0, 15.5, 20.0, 30.0, 40.0}})
cfg = append(cfg, config{Metric: "second_metric_name", Buckets: []float64{0.0, 4.0, 10.0, 23.0, 30.0}})
histogram := NewTestHistogram(cfg)
histogram := NewTestHistogram(cfg, false, true)
acc := &testutil.Accumulator{}
@@ -106,50 +159,83 @@ func TestHistogramWithPeriodAndAllFields(t *testing.T) {
assert.Fail(t, "Incorrect number of metrics")
}
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, "0")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1), "b_bucket": int64(0), "c_bucket": int64(0)}, "15.5")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, "20")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, "30")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, "40")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, bucketInf)
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "15.5"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "20"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, tags{bucketRightTag: "40"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, tags{bucketRightTag: bucketPosInf})
assertContainsTaggedField(t, acc, "second_metric_name", map[string]interface{}{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, "0")
assertContainsTaggedField(t, acc, "second_metric_name", map[string]interface{}{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, "4")
assertContainsTaggedField(t, acc, "second_metric_name", map[string]interface{}{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, "10")
assertContainsTaggedField(t, acc, "second_metric_name", map[string]interface{}{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, "23")
assertContainsTaggedField(t, acc, "second_metric_name", map[string]interface{}{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, "30")
assertContainsTaggedField(t, acc, "second_metric_name", map[string]interface{}{"a_bucket": int64(1), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, bucketInf)
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketRightTag: "4"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketRightTag: "10"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketRightTag: "23"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(1), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketRightTag: bucketPosInf})
}
// TestHistogramDifferentPeriodsAndAllFields tests two metrics getting added with a push/reset in between (simulates
// TestHistogramWithAllFieldsNonCumulative tests two metrics for one period and for all fields
func TestHistogramWithAllFieldsNonCumulative(t *testing.T) {
var cfg []config
cfg = append(cfg, config{Metric: "first_metric_name", Buckets: []float64{0.0, 15.5, 20.0, 30.0, 40.0}})
cfg = append(cfg, config{Metric: "second_metric_name", Buckets: []float64{0.0, 4.0, 10.0, 23.0, 30.0}})
histogram := NewTestHistogram(cfg, false, false)
acc := &testutil.Accumulator{}
histogram.Add(firstMetric1)
histogram.Add(firstMetric2)
histogram.Add(secondMetric)
histogram.Push(acc)
if len(acc.Metrics) != 12 {
assert.Fail(t, "Incorrect number of metrics")
}
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketLeftTag: bucketNegInf, bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketLeftTag: "0", bucketRightTag: "15.5"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketLeftTag: "15.5", bucketRightTag: "20"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketLeftTag: "20", bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(1), "c_bucket": int64(1)}, tags{bucketLeftTag: "30", bucketRightTag: "40"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketLeftTag: "40", bucketRightTag: bucketPosInf})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketLeftTag: bucketNegInf, bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketLeftTag: "0", bucketRightTag: "4"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketLeftTag: "4", bucketRightTag: "10"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketLeftTag: "10", bucketRightTag: "23"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(0), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketLeftTag: "23", bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "second_metric_name", fields{"a_bucket": int64(1), "ignoreme_bucket": int64(0), "andme_bucket": int64(0)}, tags{bucketLeftTag: "30", bucketRightTag: bucketPosInf})
}
// TestHistogramWithTwoPeriodsAndAllFields tests two metrics getting added with a push/reset in between (simulates
// getting added in different periods) for all fields
func TestHistogramDifferentPeriodsAndAllFields(t *testing.T) {
func TestHistogramWithTwoPeriodsAndAllFields(t *testing.T) {
var cfg []config
cfg = append(cfg, config{Metric: "first_metric_name", Buckets: []float64{0.0, 10.0, 20.0, 30.0, 40.0}})
histogram := NewTestHistogram(cfg)
histogram := NewTestHistogram(cfg, false, true)
acc := &testutil.Accumulator{}
histogram.Add(firstMetric1)
histogram.Push(acc)
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0), "b_bucket": int64(0)}, "0")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0), "b_bucket": int64(0)}, "10")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1), "b_bucket": int64(0)}, "20")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1), "b_bucket": int64(0)}, "30")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1), "b_bucket": int64(1)}, "40")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1), "b_bucket": int64(1)}, bucketInf)
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0)}, tags{bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0)}, tags{bucketRightTag: "10"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1), "b_bucket": int64(0)}, tags{bucketRightTag: "20"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1), "b_bucket": int64(0)}, tags{bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1), "b_bucket": int64(1)}, tags{bucketRightTag: "40"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(1), "b_bucket": int64(1)}, tags{bucketRightTag: bucketPosInf})
acc.ClearMetrics()
histogram.Add(firstMetric2)
histogram.Push(acc)
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, "0")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, "10")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, "20")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, "30")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, "40")
assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, bucketInf)
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "0"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(0), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "10"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "20"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(0), "c_bucket": int64(0)}, tags{bucketRightTag: "30"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, tags{bucketRightTag: "40"})
assertContainsTaggedField(t, acc, "first_metric_name", fields{"a_bucket": int64(2), "b_bucket": int64(1), "c_bucket": int64(1)}, tags{bucketRightTag: bucketPosInf})
}
// TestWrongBucketsOrder tests the calling panic with incorrect order of buckets
@@ -166,35 +252,42 @@ func TestWrongBucketsOrder(t *testing.T) {
var cfg []config
cfg = append(cfg, config{Metric: "first_metric_name", Buckets: []float64{0.0, 90.0, 20.0, 30.0, 40.0}})
histogram := NewTestHistogram(cfg)
histogram := NewTestHistogram(cfg, false, true)
histogram.Add(firstMetric2)
}
// assertContainsTaggedField is help functions to test histogram data
func assertContainsTaggedField(t *testing.T, acc *testutil.Accumulator, metricName string, fields map[string]interface{}, le string) {
func assertContainsTaggedField(t *testing.T, acc *testutil.Accumulator, metricName string, fields map[string]interface{}, tags map[string]string) {
acc.Lock()
defer acc.Unlock()
for _, checkedMetric := range acc.Metrics {
// check metric name
// filter by metric name
if checkedMetric.Measurement != metricName {
continue
}
// check "le" tag
if checkedMetric.Tags[bucketTag] != le {
continue
}
// check fields
isFieldsIdentical := true
for field := range fields {
if _, ok := checkedMetric.Fields[field]; !ok {
isFieldsIdentical = false
// filter by tags
isTagsIdentical := true
for tag := range tags {
if val, ok := checkedMetric.Tags[tag]; !ok || val != tags[tag] {
isTagsIdentical = false
break
}
}
if !isFieldsIdentical {
if !isTagsIdentical {
continue
}
// filter by field keys
isFieldKeysIdentical := true
for field := range fields {
if _, ok := checkedMetric.Fields[field]; !ok {
isFieldKeysIdentical = false
break
}
}
if !isFieldKeysIdentical {
continue
}
@@ -203,8 +296,8 @@ func assertContainsTaggedField(t *testing.T, acc *testutil.Accumulator, metricNa
return
}
assert.Fail(t, fmt.Sprintf("incorrect fields %v of metric %s", fields, metricName))
assert.Fail(t, fmt.Sprintf("incorrect fields %v of metric %s", checkedMetric.Fields, metricName))
}
assert.Fail(t, fmt.Sprintf("unknown measurement '%s' with tags: %v, fields: %v", metricName, map[string]string{"le": le}, fields))
assert.Fail(t, fmt.Sprintf("unknown measurement '%s' with tags: %v, fields: %v", metricName, tags, fields))
}

View File

@@ -0,0 +1,25 @@
# Merge Aggregator
Merge metrics together into a metric with multiple fields into the most memory
and network transfer efficient form.
Use this plugin when fields are split over multiple metrics, with the same
measurement, tag set and timestamp. By merging into a single metric they can
be handled more efficiently by the output.
### Configuration
```toml
[[aggregators.merge]]
## If true, the original metric will be dropped by the
## aggregator and will not get sent to the output plugins.
drop_original = true
```
### Example
```diff
- cpu,host=localhost usage_time=42 1567562620000000000
- cpu,host=localhost idle_time=42 1567562620000000000
+ cpu,host=localhost idle_time=42,usage_time=42 1567562620000000000
```

View File

@@ -0,0 +1,66 @@
package seriesgrouper
import (
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/aggregators"
)
const (
description = "Merge metrics into multifield metrics by series key"
sampleConfig = `
## If true, the original metric will be dropped by the
## aggregator and will not get sent to the output plugins.
drop_original = true
`
)
type Merge struct {
grouper *metric.SeriesGrouper
log telegraf.Logger
}
func (a *Merge) Init() error {
a.grouper = metric.NewSeriesGrouper()
return nil
}
func (a *Merge) Description() string {
return description
}
func (a *Merge) SampleConfig() string {
return sampleConfig
}
func (a *Merge) Add(m telegraf.Metric) {
tags := m.Tags()
for _, field := range m.FieldList() {
err := a.grouper.Add(m.Name(), tags, m.Time(), field.Key, field.Value)
if err != nil {
a.log.Errorf("Error adding metric: %v", err)
}
}
}
func (a *Merge) Push(acc telegraf.Accumulator) {
// Always use nanosecond precision to avoid rounding metrics that were
// produced at a precision higher than the agent default.
acc.SetPrecision(time.Nanosecond)
for _, m := range a.grouper.Metrics() {
acc.AddMetric(m)
}
}
func (a *Merge) Reset() {
a.grouper = metric.NewSeriesGrouper()
}
func init() {
aggregators.Add("merge", func() telegraf.Aggregator {
return &Merge{}
})
}

View File

@@ -0,0 +1,186 @@
package seriesgrouper
import (
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
func TestSimple(t *testing.T) {
plugin := &Merge{}
err := plugin.Init()
require.NoError(t, err)
plugin.Add(
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_idle": 42,
},
time.Unix(0, 0),
),
)
require.NoError(t, err)
plugin.Add(
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_guest": 42,
},
time.Unix(0, 0),
),
)
require.NoError(t, err)
var acc testutil.Accumulator
plugin.Push(&acc)
expected := []telegraf.Metric{
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_idle": 42,
"time_guest": 42,
},
time.Unix(0, 0),
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics())
}
func TestNanosecondPrecision(t *testing.T) {
plugin := &Merge{}
err := plugin.Init()
require.NoError(t, err)
plugin.Add(
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_idle": 42,
},
time.Unix(0, 1),
),
)
require.NoError(t, err)
plugin.Add(
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_guest": 42,
},
time.Unix(0, 1),
),
)
require.NoError(t, err)
var acc testutil.Accumulator
acc.SetPrecision(time.Second)
plugin.Push(&acc)
expected := []telegraf.Metric{
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_idle": 42,
"time_guest": 42,
},
time.Unix(0, 1),
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics())
}
func TestReset(t *testing.T) {
plugin := &Merge{}
err := plugin.Init()
require.NoError(t, err)
plugin.Add(
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_idle": 42,
},
time.Unix(0, 0),
),
)
require.NoError(t, err)
var acc testutil.Accumulator
plugin.Push(&acc)
plugin.Reset()
plugin.Add(
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_guest": 42,
},
time.Unix(0, 0),
),
)
require.NoError(t, err)
plugin.Push(&acc)
expected := []telegraf.Metric{
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_idle": 42,
},
time.Unix(0, 0),
),
testutil.MustMetric(
"cpu",
map[string]string{
"cpu": "cpu0",
},
map[string]interface{}{
"time_guest": 42,
},
time.Unix(0, 0),
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics())
}

View File

@@ -11,8 +11,9 @@ configuration directive. When no `fields` is provided the plugin will not count
any fields. The results are emitted in fields in the format:
`originalfieldname_fieldvalue = count`.
Valuecounter only works on fields of the type int, bool or string. Float fields
are being dropped to prevent the creating of too many fields.
Counting fields with a high number of potential values may produce significant
amounts of new fields and memory usage, take care to only count fields with a
limited set of values.
### Configuration:

View File

@@ -2,7 +2,6 @@ package valuecounter
import (
"fmt"
"log"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/aggregators"
@@ -20,7 +19,7 @@ type ValueCounter struct {
Fields []string
}
// NewValueCounter create a new aggregation plugin which counts the occurances
// NewValueCounter create a new aggregation plugin which counts the occurrences
// of fields and emits the count.
func NewValueCounter() telegraf.Aggregator {
vc := &ValueCounter{}
@@ -46,7 +45,7 @@ func (vc *ValueCounter) SampleConfig() string {
// Description returns the description of the ValueCounter plugin
func (vc *ValueCounter) Description() string {
return "Count the occurance of values in fields."
return "Count the occurrence of values in fields."
}
// Add is run on every metric which passes the plugin
@@ -68,14 +67,6 @@ func (vc *ValueCounter) Add(in telegraf.Metric) {
for fk, fv := range in.Fields() {
for _, cf := range vc.Fields {
if fk == cf {
// Do not process float types to prevent memory from blowing up
switch fv.(type) {
default:
log.Printf("I! Valuecounter: Unsupported field type. " +
"Must be an int, string or bool. Ignoring.")
continue
case uint64, int64, string, bool:
}
fn := fmt.Sprintf("%v_%v", fk, fv)
vc.cache[id].fieldCount[fn]++
}

View File

@@ -22,9 +22,8 @@ func NewTestValueCounter(fields []string) telegraf.Aggregator {
var m1, _ = metric.New("m1",
map[string]string{"foo": "bar"},
map[string]interface{}{
"status": 200,
"somefield": 20.1,
"foobar": "bar",
"status": 200,
"foobar": "bar",
},
time.Now(),
)

View File

@@ -0,0 +1,25 @@
package kafka
import (
"errors"
"github.com/Shopify/sarama"
)
func SASLVersion(kafkaVersion sarama.KafkaVersion, saslVersion *int) (int16, error) {
if saslVersion == nil {
if kafkaVersion.IsAtLeast(sarama.V1_0_0_0) {
return sarama.SASLHandshakeV1, nil
}
return sarama.SASLHandshakeV0, nil
}
switch *saslVersion {
case 0:
return sarama.SASLHandshakeV0, nil
case 1:
return sarama.SASLHandshakeV1, nil
default:
return 0, errors.New("invalid SASL version")
}
}

View File

@@ -0,0 +1,35 @@
package logrus
import (
"io/ioutil"
"log"
"strings"
"sync"
"github.com/sirupsen/logrus"
)
var once sync.Once
type LogHook struct {
}
// Install a logging hook into the logrus standard logger, diverting all logs
// through the Telegraf logger at debug level. This is useful for libraries
// that directly log to the logrus system without providing an override method.
func InstallHook() {
once.Do(func() {
logrus.SetOutput(ioutil.Discard)
logrus.AddHook(&LogHook{})
})
}
func (h *LogHook) Fire(entry *logrus.Entry) error {
msg := strings.ReplaceAll(entry.Message, "\n", " ")
log.Print("D! [logrus] ", msg)
return nil
}
func (h *LogHook) Levels() []logrus.Level {
return logrus.AllLevels
}

View File

@@ -1,21 +1,28 @@
# Example Input Plugin
The example plugin gathers metrics about example things. This description
The `example` plugin gathers metrics about example things. This description
explains at a high level what the plugin does and provides links to where
additional information can be found.
### Configuration:
Telegraf minimum version: Telegraf x.x
Plugin minimum tested version: x.x
### Configuration
This section contains the default TOML to configure the plugin. You can
generate it using `telegraf --usage <plugin-name>`.
```toml
# Description
[[inputs.example]]
example_option = "example_value"
```
### Metrics:
#### example_option
A more in depth description of an option can be provided here, but only do so
if the option cannot be fully described in the sample config.
### Metrics
Here you should add an optional description and links to where the user can
get more information about the measurements.
@@ -32,16 +39,20 @@ mapped to the output.
- field1 (type, unit)
- field2 (float, percent)
- measurement2
+ measurement2
- tags:
- tag3
- fields:
- field3 (integer, bytes)
- field4 (integer, green=1 yellow=2 red=3)
- field5 (string)
- field6 (float)
- field7 (boolean)
### Sample Queries:
### Sample Queries
This section should contain some useful InfluxDB queries that can be used to
get started with the plugin or to generate dashboards. For each query listed,
This section can contain some useful InfluxDB queries that can be used to get
started with the plugin or to generate dashboards. For each query listed,
describe at a high level what data is returned.
Get the max, mean, and min for the measurement in the last hour:
@@ -49,7 +60,12 @@ Get the max, mean, and min for the measurement in the last hour:
SELECT max(field1), mean(field1), min(field1) FROM measurement1 WHERE tag1=bar AND time > now() - 1h GROUP BY tag
```
### Example Output:
### Troubleshooting
This optional section can provide basic troubleshooting steps that a user can
perform.
### Example Output
This section shows example output in Line Protocol format. You can often use
`telegraf --input-filter <plugin-name> --test` or use the `file` output to get

View File

@@ -0,0 +1,88 @@
# ActiveMQ Input Plugin
This plugin gather queues, topics & subscribers metrics using ActiveMQ Console API.
### Configuration:
```toml
# Description
[[inputs.activemq]]
## ActiveMQ WebConsole URL
url = "http://127.0.0.1:8161"
## Required ActiveMQ Endpoint
## deprecated in 1.11; use the url option
# server = "192.168.50.10"
# port = 8161
## Credentials for basic HTTP authentication
# username = "admin"
# password = "admin"
## Required ActiveMQ webadmin root path
# webadmin = "admin"
## Maximum time to receive response.
# response_timeout = "5s"
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
```
### Metrics
Every effort was made to preserve the names based on the XML response from the ActiveMQ Console API.
- activemq_queues
- tags:
- name
- source
- port
- fields:
- size
- consumer_count
- enqueue_count
- dequeue_count
+ activemq_topics
- tags:
- name
- source
- port
- fields:
- size
- consumer_count
- enqueue_count
- dequeue_count
- activemq_subscribers
- tags:
- client_id
- subscription_name
- connection_id
- destination_name
- selector
- active
- source
- port
- fields:
- pending_queue_size
- dispatched_queue_size
- dispatched_counter
- enqueue_counter
- dequeue_counter
### Example Output
```
activemq_queues,name=sandra,host=88284b2fe51b,source=localhost,port=8161 consumer_count=0i,enqueue_count=0i,dequeue_count=0i,size=0i 1492610703000000000
activemq_queues,name=Test,host=88284b2fe51b,source=localhost,port=8161 dequeue_count=0i,size=0i,consumer_count=0i,enqueue_count=0i 1492610703000000000
activemq_topics,name=ActiveMQ.Advisory.MasterBroker\ ,host=88284b2fe51b,source=localhost,port=8161 size=0i,consumer_count=0i,enqueue_count=1i,dequeue_count=0i 1492610703000000000
activemq_topics,host=88284b2fe51b,name=AAA\,source=localhost,port=8161 size=0i,consumer_count=1i,enqueue_count=0i,dequeue_count=0i 1492610703000000000
activemq_topics,name=ActiveMQ.Advisory.Topic\,source=localhost,port=8161 ,host=88284b2fe51b enqueue_count=1i,dequeue_count=0i,size=0i,consumer_count=0i 1492610703000000000
activemq_topics,name=ActiveMQ.Advisory.Queue\,source=localhost,port=8161 ,host=88284b2fe51b size=0i,consumer_count=0i,enqueue_count=2i,dequeue_count=0i 1492610703000000000
activemq_topics,name=AAAA\ ,host=88284b2fe51b,source=localhost,port=8161 consumer_count=0i,enqueue_count=0i,dequeue_count=0i,size=0i 1492610703000000000
activemq_subscribers,connection_id=NOTSET,destination_name=AAA,,source=localhost,port=8161,selector=AA,active=no,host=88284b2fe51b,client_id=AAA,subscription_name=AAA pending_queue_size=0i,dispatched_queue_size=0i,dispatched_counter=0i,enqueue_counter=0i,dequeue_counter=0i 1492610703000000000
```

View File

@@ -0,0 +1,311 @@
package activemq
import (
"encoding/xml"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"path"
"strconv"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs"
)
type ActiveMQ struct {
Server string `toml:"server"`
Port int `toml:"port"`
URL string `toml:"url"`
Username string `toml:"username"`
Password string `toml:"password"`
Webadmin string `toml:"webadmin"`
ResponseTimeout internal.Duration `toml:"response_timeout"`
tls.ClientConfig
client *http.Client
baseURL *url.URL
}
type Topics struct {
XMLName xml.Name `xml:"topics"`
TopicItems []Topic `xml:"topic"`
}
type Topic struct {
XMLName xml.Name `xml:"topic"`
Name string `xml:"name,attr"`
Stats Stats `xml:"stats"`
}
type Subscribers struct {
XMLName xml.Name `xml:"subscribers"`
SubscriberItems []Subscriber `xml:"subscriber"`
}
type Subscriber struct {
XMLName xml.Name `xml:"subscriber"`
ClientId string `xml:"clientId,attr"`
SubscriptionName string `xml:"subscriptionName,attr"`
ConnectionId string `xml:"connectionId,attr"`
DestinationName string `xml:"destinationName,attr"`
Selector string `xml:"selector,attr"`
Active string `xml:"active,attr"`
Stats Stats `xml:"stats"`
}
type Queues struct {
XMLName xml.Name `xml:"queues"`
QueueItems []Queue `xml:"queue"`
}
type Queue struct {
XMLName xml.Name `xml:"queue"`
Name string `xml:"name,attr"`
Stats Stats `xml:"stats"`
}
type Stats struct {
XMLName xml.Name `xml:"stats"`
Size int `xml:"size,attr"`
ConsumerCount int `xml:"consumerCount,attr"`
EnqueueCount int `xml:"enqueueCount,attr"`
DequeueCount int `xml:"dequeueCount,attr"`
PendingQueueSize int `xml:"pendingQueueSize,attr"`
DispatchedQueueSize int `xml:"dispatchedQueueSize,attr"`
DispatchedCounter int `xml:"dispatchedCounter,attr"`
EnqueueCounter int `xml:"enqueueCounter,attr"`
DequeueCounter int `xml:"dequeueCounter,attr"`
}
var sampleConfig = `
## ActiveMQ WebConsole URL
url = "http://127.0.0.1:8161"
## Required ActiveMQ Endpoint
## deprecated in 1.11; use the url option
# server = "127.0.0.1"
# port = 8161
## Credentials for basic HTTP authentication
# username = "admin"
# password = "admin"
## Required ActiveMQ webadmin root path
# webadmin = "admin"
## Maximum time to receive response.
# response_timeout = "5s"
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
`
func (a *ActiveMQ) Description() string {
return "Gather ActiveMQ metrics"
}
func (a *ActiveMQ) SampleConfig() string {
return sampleConfig
}
func (a *ActiveMQ) createHttpClient() (*http.Client, error) {
tlsCfg, err := a.ClientConfig.TLSConfig()
if err != nil {
return nil, err
}
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: tlsCfg,
},
Timeout: a.ResponseTimeout.Duration,
}
return client, nil
}
func (a *ActiveMQ) Init() error {
if a.ResponseTimeout.Duration < time.Second {
a.ResponseTimeout.Duration = time.Second * 5
}
var err error
u := &url.URL{Scheme: "http", Host: a.Server + ":" + strconv.Itoa(a.Port)}
if a.URL != "" {
u, err = url.Parse(a.URL)
if err != nil {
return err
}
}
if !strings.HasPrefix(u.Scheme, "http") {
return fmt.Errorf("invalid scheme %q", u.Scheme)
}
if u.Hostname() == "" {
return fmt.Errorf("invalid hostname %q", u.Hostname())
}
a.baseURL = u
a.client, err = a.createHttpClient()
if err != nil {
return err
}
return nil
}
func (a *ActiveMQ) GetMetrics(u string) ([]byte, error) {
req, err := http.NewRequest("GET", u, nil)
if err != nil {
return nil, err
}
if a.Username != "" || a.Password != "" {
req.SetBasicAuth(a.Username, a.Password)
}
resp, err := a.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("GET %s returned status %q", u, resp.Status)
}
return ioutil.ReadAll(resp.Body)
}
func (a *ActiveMQ) GatherQueuesMetrics(acc telegraf.Accumulator, queues Queues) {
for _, queue := range queues.QueueItems {
records := make(map[string]interface{})
tags := make(map[string]string)
tags["name"] = strings.TrimSpace(queue.Name)
tags["source"] = a.baseURL.Hostname()
tags["port"] = a.baseURL.Port()
records["size"] = queue.Stats.Size
records["consumer_count"] = queue.Stats.ConsumerCount
records["enqueue_count"] = queue.Stats.EnqueueCount
records["dequeue_count"] = queue.Stats.DequeueCount
acc.AddFields("activemq_queues", records, tags)
}
}
func (a *ActiveMQ) GatherTopicsMetrics(acc telegraf.Accumulator, topics Topics) {
for _, topic := range topics.TopicItems {
records := make(map[string]interface{})
tags := make(map[string]string)
tags["name"] = topic.Name
tags["source"] = a.baseURL.Hostname()
tags["port"] = a.baseURL.Port()
records["size"] = topic.Stats.Size
records["consumer_count"] = topic.Stats.ConsumerCount
records["enqueue_count"] = topic.Stats.EnqueueCount
records["dequeue_count"] = topic.Stats.DequeueCount
acc.AddFields("activemq_topics", records, tags)
}
}
func (a *ActiveMQ) GatherSubscribersMetrics(acc telegraf.Accumulator, subscribers Subscribers) {
for _, subscriber := range subscribers.SubscriberItems {
records := make(map[string]interface{})
tags := make(map[string]string)
tags["client_id"] = subscriber.ClientId
tags["subscription_name"] = subscriber.SubscriptionName
tags["connection_id"] = subscriber.ConnectionId
tags["destination_name"] = subscriber.DestinationName
tags["selector"] = subscriber.Selector
tags["active"] = subscriber.Active
tags["source"] = a.baseURL.Hostname()
tags["port"] = a.baseURL.Port()
records["pending_queue_size"] = subscriber.Stats.PendingQueueSize
records["dispatched_queue_size"] = subscriber.Stats.DispatchedQueueSize
records["dispatched_counter"] = subscriber.Stats.DispatchedCounter
records["enqueue_counter"] = subscriber.Stats.EnqueueCounter
records["dequeue_counter"] = subscriber.Stats.DequeueCounter
acc.AddFields("activemq_subscribers", records, tags)
}
}
func (a *ActiveMQ) Gather(acc telegraf.Accumulator) error {
dataQueues, err := a.GetMetrics(a.QueuesURL())
if err != nil {
return err
}
queues := Queues{}
err = xml.Unmarshal(dataQueues, &queues)
if err != nil {
return fmt.Errorf("queues XML unmarshal error: %v", err)
}
dataTopics, err := a.GetMetrics(a.TopicsURL())
if err != nil {
return err
}
topics := Topics{}
err = xml.Unmarshal(dataTopics, &topics)
if err != nil {
return fmt.Errorf("topics XML unmarshal error: %v", err)
}
dataSubscribers, err := a.GetMetrics(a.SubscribersURL())
if err != nil {
return err
}
subscribers := Subscribers{}
err = xml.Unmarshal(dataSubscribers, &subscribers)
if err != nil {
return fmt.Errorf("subscribers XML unmarshal error: %v", err)
}
a.GatherQueuesMetrics(acc, queues)
a.GatherTopicsMetrics(acc, topics)
a.GatherSubscribersMetrics(acc, subscribers)
return nil
}
func (a *ActiveMQ) QueuesURL() string {
ref := url.URL{Path: path.Join("/", a.Webadmin, "/xml/queues.jsp")}
return a.baseURL.ResolveReference(&ref).String()
}
func (a *ActiveMQ) TopicsURL() string {
ref := url.URL{Path: path.Join("/", a.Webadmin, "/xml/topics.jsp")}
return a.baseURL.ResolveReference(&ref).String()
}
func (a *ActiveMQ) SubscribersURL() string {
ref := url.URL{Path: path.Join("/", a.Webadmin, "/xml/subscribers.jsp")}
return a.baseURL.ResolveReference(&ref).String()
}
func init() {
inputs.Add("activemq", func() telegraf.Input {
return &ActiveMQ{
Server: "localhost",
Port: 8161,
Webadmin: "admin",
}
})
}

View File

@@ -0,0 +1,180 @@
package activemq
import (
"encoding/xml"
"net/http"
"net/http/httptest"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
func TestGatherQueuesMetrics(t *testing.T) {
s := `<queues>
<queue name="sandra">
<stats size="0" consumerCount="0" enqueueCount="0" dequeueCount="0"/>
<feed>
<atom>queueBrowse/sandra?view=rss&amp;feedType=atom_1.0</atom>
<rss>queueBrowse/sandra?view=rss&amp;feedType=rss_2.0</rss>
</feed>
</queue>
<queue name="Test">
<stats size="0" consumerCount="0" enqueueCount="0" dequeueCount="0"/>
<feed>
<atom>queueBrowse/Test?view=rss&amp;feedType=atom_1.0</atom>
<rss>queueBrowse/Test?view=rss&amp;feedType=rss_2.0</rss>
</feed>
</queue>
</queues>`
queues := Queues{}
xml.Unmarshal([]byte(s), &queues)
records := make(map[string]interface{})
tags := make(map[string]string)
tags["name"] = "Test"
tags["source"] = "localhost"
tags["port"] = "8161"
records["size"] = 0
records["consumer_count"] = 0
records["enqueue_count"] = 0
records["dequeue_count"] = 0
var acc testutil.Accumulator
activeMQ := new(ActiveMQ)
activeMQ.Server = "localhost"
activeMQ.Port = 8161
activeMQ.Init()
activeMQ.GatherQueuesMetrics(&acc, queues)
acc.AssertContainsTaggedFields(t, "activemq_queues", records, tags)
}
func TestGatherTopicsMetrics(t *testing.T) {
s := `<topics>
<topic name="ActiveMQ.Advisory.MasterBroker ">
<stats size="0" consumerCount="0" enqueueCount="1" dequeueCount="0"/>
</topic>
<topic name="AAA ">
<stats size="0" consumerCount="1" enqueueCount="0" dequeueCount="0"/>
</topic>
<topic name="ActiveMQ.Advisory.Topic ">
<stats size="0" consumerCount="0" enqueueCount="1" dequeueCount="0"/>
</topic>
<topic name="ActiveMQ.Advisory.Queue ">
<stats size="0" consumerCount="0" enqueueCount="2" dequeueCount="0"/>
</topic>
<topic name="AAAA ">
<stats size="0" consumerCount="0" enqueueCount="0" dequeueCount="0"/>
</topic>
</topics>`
topics := Topics{}
xml.Unmarshal([]byte(s), &topics)
records := make(map[string]interface{})
tags := make(map[string]string)
tags["name"] = "ActiveMQ.Advisory.MasterBroker "
tags["source"] = "localhost"
tags["port"] = "8161"
records["size"] = 0
records["consumer_count"] = 0
records["enqueue_count"] = 1
records["dequeue_count"] = 0
var acc testutil.Accumulator
activeMQ := new(ActiveMQ)
activeMQ.Server = "localhost"
activeMQ.Port = 8161
activeMQ.Init()
activeMQ.GatherTopicsMetrics(&acc, topics)
acc.AssertContainsTaggedFields(t, "activemq_topics", records, tags)
}
func TestGatherSubscribersMetrics(t *testing.T) {
s := `<subscribers>
<subscriber clientId="AAA" subscriptionName="AAA" connectionId="NOTSET" destinationName="AAA" selector="AA" active="no">
<stats pendingQueueSize="0" dispatchedQueueSize="0" dispatchedCounter="0" enqueueCounter="0" dequeueCounter="0"/>
</subscriber>
</subscribers>`
subscribers := Subscribers{}
xml.Unmarshal([]byte(s), &subscribers)
records := make(map[string]interface{})
tags := make(map[string]string)
tags["client_id"] = "AAA"
tags["subscription_name"] = "AAA"
tags["connection_id"] = "NOTSET"
tags["destination_name"] = "AAA"
tags["selector"] = "AA"
tags["active"] = "no"
tags["source"] = "localhost"
tags["port"] = "8161"
records["pending_queue_size"] = 0
records["dispatched_queue_size"] = 0
records["dispatched_counter"] = 0
records["enqueue_counter"] = 0
records["dequeue_counter"] = 0
var acc testutil.Accumulator
activeMQ := new(ActiveMQ)
activeMQ.Server = "localhost"
activeMQ.Port = 8161
activeMQ.Init()
activeMQ.GatherSubscribersMetrics(&acc, subscribers)
acc.AssertContainsTaggedFields(t, "activemq_subscribers", records, tags)
}
func TestURLs(t *testing.T) {
ts := httptest.NewServer(http.NotFoundHandler())
defer ts.Close()
ts.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/admin/xml/queues.jsp":
w.WriteHeader(http.StatusOK)
w.Write([]byte("<queues></queues>"))
case "/admin/xml/topics.jsp":
w.WriteHeader(http.StatusOK)
w.Write([]byte("<topics></topics>"))
case "/admin/xml/subscribers.jsp":
w.WriteHeader(http.StatusOK)
w.Write([]byte("<subscribers></subscribers>"))
default:
w.WriteHeader(http.StatusNotFound)
t.Fatalf("unexpected path: " + r.URL.Path)
}
})
plugin := ActiveMQ{
URL: "http://" + ts.Listener.Addr().String(),
Webadmin: "admin",
}
err := plugin.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = plugin.Gather(&acc)
require.NoError(t, err)
require.Len(t, acc.GetTelegrafMetrics(), 0)
}

View File

@@ -2,8 +2,6 @@ package aerospike
import (
"crypto/tls"
"errors"
"log"
"net"
"strconv"
"strings"
@@ -120,12 +118,8 @@ func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) erro
return err
}
for k, v := range stats {
val, err := parseValue(v)
if err == nil {
fields[strings.Replace(k, "-", "_", -1)] = val
} else {
log.Printf("I! skipping aerospike field %v with int64 overflow: %q", k, v)
}
val := parseValue(v)
fields[strings.Replace(k, "-", "_", -1)] = val
}
acc.AddFields("aerospike_node", fields, tags, time.Now())
@@ -152,12 +146,8 @@ func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) erro
if len(parts) < 2 {
continue
}
val, err := parseValue(parts[1])
if err == nil {
nFields[strings.Replace(parts[0], "-", "_", -1)] = val
} else {
log.Printf("I! skipping aerospike field %v with int64 overflow: %q", parts[0], parts[1])
}
val := parseValue(parts[1])
nFields[strings.Replace(parts[0], "-", "_", -1)] = val
}
acc.AddFields("aerospike_namespace", nFields, nTags, time.Now())
}
@@ -165,16 +155,16 @@ func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) erro
return nil
}
func parseValue(v string) (interface{}, error) {
func parseValue(v string) interface{} {
if parsed, err := strconv.ParseInt(v, 10, 64); err == nil {
return parsed, nil
} else if _, err := strconv.ParseUint(v, 10, 64); err == nil {
// int64 overflow, yet valid uint64
return nil, errors.New("Number is too large")
return parsed
} else if parsed, err := strconv.ParseUint(v, 10, 64); err == nil {
return parsed
} else if parsed, err := strconv.ParseBool(v); err == nil {
return parsed, nil
return parsed
} else {
return v, nil
// leave as string
return v
}
}

View File

@@ -52,17 +52,14 @@ func TestAerospikeStatisticsPartialErr(t *testing.T) {
func TestAerospikeParseValue(t *testing.T) {
// uint64 with value bigger than int64 max
val, err := parseValue("18446744041841121751")
assert.Nil(t, val)
assert.Error(t, err)
val := parseValue("18446744041841121751")
require.Equal(t, uint64(18446744041841121751), val)
// int values
val, err = parseValue("42")
assert.NoError(t, err)
assert.Equal(t, val, int64(42), "must be parsed as int")
val = parseValue("42")
require.Equal(t, val, int64(42), "must be parsed as int")
// string values
val, err = parseValue("BB977942A2CA502")
assert.NoError(t, err)
assert.Equal(t, val, `BB977942A2CA502`, "must be left as string")
val = parseValue("BB977942A2CA502")
require.Equal(t, val, `BB977942A2CA502`, "must be left as string")
}

View File

@@ -3,85 +3,135 @@ package all
import (
_ "gitea.statsd.de/dom/telegraf/plugins/inputs/rss"
_ "gitea.statsd.de/dom/telegraf/plugins/inputs/twitter"
_ "github.com/influxdata/telegraf/plugins/inputs/activemq"
_ "github.com/influxdata/telegraf/plugins/inputs/aerospike"
_ "github.com/influxdata/telegraf/plugins/inputs/amqp_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/apache"
_ "github.com/influxdata/telegraf/plugins/inputs/apcupsd"
_ "github.com/influxdata/telegraf/plugins/inputs/aurora"
_ "github.com/influxdata/telegraf/plugins/inputs/azure_storage_queue"
_ "github.com/influxdata/telegraf/plugins/inputs/bcache"
_ "github.com/influxdata/telegraf/plugins/inputs/beanstalkd"
_ "github.com/influxdata/telegraf/plugins/inputs/bind"
_ "github.com/influxdata/telegraf/plugins/inputs/bond"
_ "github.com/influxdata/telegraf/plugins/inputs/burrow"
_ "github.com/influxdata/telegraf/plugins/inputs/cassandra"
_ "github.com/influxdata/telegraf/plugins/inputs/ceph"
_ "github.com/influxdata/telegraf/plugins/inputs/cgroup"
_ "github.com/influxdata/telegraf/plugins/inputs/chrony"
_ "github.com/influxdata/telegraf/plugins/inputs/cisco_telemetry_gnmi"
_ "github.com/influxdata/telegraf/plugins/inputs/cisco_telemetry_mdt"
_ "github.com/influxdata/telegraf/plugins/inputs/clickhouse"
_ "github.com/influxdata/telegraf/plugins/inputs/cloud_pubsub"
_ "github.com/influxdata/telegraf/plugins/inputs/cloud_pubsub_push"
_ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch"
_ "github.com/influxdata/telegraf/plugins/inputs/conntrack"
_ "github.com/influxdata/telegraf/plugins/inputs/consul"
_ "github.com/influxdata/telegraf/plugins/inputs/couchbase"
_ "github.com/influxdata/telegraf/plugins/inputs/couchdb"
_ "github.com/influxdata/telegraf/plugins/inputs/cpu"
_ "github.com/influxdata/telegraf/plugins/inputs/dcos"
_ "github.com/influxdata/telegraf/plugins/inputs/disk"
_ "github.com/influxdata/telegraf/plugins/inputs/diskio"
_ "github.com/influxdata/telegraf/plugins/inputs/disque"
_ "github.com/influxdata/telegraf/plugins/inputs/dmcache"
_ "github.com/influxdata/telegraf/plugins/inputs/dns_query"
_ "github.com/influxdata/telegraf/plugins/inputs/docker"
_ "github.com/influxdata/telegraf/plugins/inputs/docker_log"
_ "github.com/influxdata/telegraf/plugins/inputs/dovecot"
_ "github.com/influxdata/telegraf/plugins/inputs/ecs"
_ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch"
_ "github.com/influxdata/telegraf/plugins/inputs/ethtool"
_ "github.com/influxdata/telegraf/plugins/inputs/eventhub_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/exec"
_ "github.com/influxdata/telegraf/plugins/inputs/execd"
_ "github.com/influxdata/telegraf/plugins/inputs/fail2ban"
_ "github.com/influxdata/telegraf/plugins/inputs/fibaro"
_ "github.com/influxdata/telegraf/plugins/inputs/file"
_ "github.com/influxdata/telegraf/plugins/inputs/filecount"
_ "github.com/influxdata/telegraf/plugins/inputs/filestat"
_ "github.com/influxdata/telegraf/plugins/inputs/fireboard"
_ "github.com/influxdata/telegraf/plugins/inputs/fluentd"
_ "github.com/influxdata/telegraf/plugins/inputs/github"
_ "github.com/influxdata/telegraf/plugins/inputs/graylog"
_ "github.com/influxdata/telegraf/plugins/inputs/haproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/hddtemp"
_ "github.com/influxdata/telegraf/plugins/inputs/http"
_ "github.com/influxdata/telegraf/plugins/inputs/http_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/http_listener_v2"
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
_ "github.com/influxdata/telegraf/plugins/inputs/icinga2"
_ "github.com/influxdata/telegraf/plugins/inputs/infiniband"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/internal"
_ "github.com/influxdata/telegraf/plugins/inputs/interrupts"
_ "github.com/influxdata/telegraf/plugins/inputs/ipmi_sensor"
_ "github.com/influxdata/telegraf/plugins/inputs/ipset"
_ "github.com/influxdata/telegraf/plugins/inputs/iptables"
_ "github.com/influxdata/telegraf/plugins/inputs/ipvs"
_ "github.com/influxdata/telegraf/plugins/inputs/jenkins"
_ "github.com/influxdata/telegraf/plugins/inputs/jolokia"
_ "github.com/influxdata/telegraf/plugins/inputs/jolokia2"
_ "github.com/influxdata/telegraf/plugins/inputs/jti_openconfig_telemetry"
_ "github.com/influxdata/telegraf/plugins/inputs/kafka_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/kafka_consumer_legacy"
_ "github.com/influxdata/telegraf/plugins/inputs/kapacitor"
_ "github.com/influxdata/telegraf/plugins/inputs/kernel"
_ "github.com/influxdata/telegraf/plugins/inputs/kernel_vmstat"
_ "github.com/influxdata/telegraf/plugins/inputs/kibana"
_ "github.com/influxdata/telegraf/plugins/inputs/kinesis_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/kube_inventory"
_ "github.com/influxdata/telegraf/plugins/inputs/kubernetes"
_ "github.com/influxdata/telegraf/plugins/inputs/lanz"
_ "github.com/influxdata/telegraf/plugins/inputs/leofs"
_ "github.com/influxdata/telegraf/plugins/inputs/linux_sysctl_fs"
_ "github.com/influxdata/telegraf/plugins/inputs/logparser"
_ "github.com/influxdata/telegraf/plugins/inputs/logstash"
_ "github.com/influxdata/telegraf/plugins/inputs/lustre2"
_ "github.com/influxdata/telegraf/plugins/inputs/mailchimp"
_ "github.com/influxdata/telegraf/plugins/inputs/marklogic"
_ "github.com/influxdata/telegraf/plugins/inputs/mcrouter"
_ "github.com/influxdata/telegraf/plugins/inputs/mem"
_ "github.com/influxdata/telegraf/plugins/inputs/memcached"
_ "github.com/influxdata/telegraf/plugins/inputs/mesos"
_ "github.com/influxdata/telegraf/plugins/inputs/minecraft"
_ "github.com/influxdata/telegraf/plugins/inputs/modbus"
_ "github.com/influxdata/telegraf/plugins/inputs/mongodb"
_ "github.com/influxdata/telegraf/plugins/inputs/monit"
_ "github.com/influxdata/telegraf/plugins/inputs/mqtt_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/multifile"
_ "github.com/influxdata/telegraf/plugins/inputs/mysql"
_ "github.com/influxdata/telegraf/plugins/inputs/nats"
_ "github.com/influxdata/telegraf/plugins/inputs/nats_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/neptune_apex"
_ "github.com/influxdata/telegraf/plugins/inputs/net"
_ "github.com/influxdata/telegraf/plugins/inputs/net_response"
_ "github.com/influxdata/telegraf/plugins/inputs/nginx"
_ "github.com/influxdata/telegraf/plugins/inputs/nginx_plus"
_ "github.com/influxdata/telegraf/plugins/inputs/nginx_plus_api"
_ "github.com/influxdata/telegraf/plugins/inputs/nginx_upstream_check"
_ "github.com/influxdata/telegraf/plugins/inputs/nginx_vts"
_ "github.com/influxdata/telegraf/plugins/inputs/nsq"
_ "github.com/influxdata/telegraf/plugins/inputs/nsq_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/nstat"
_ "github.com/influxdata/telegraf/plugins/inputs/ntpq"
_ "github.com/influxdata/telegraf/plugins/inputs/nvidia_smi"
_ "github.com/influxdata/telegraf/plugins/inputs/openldap"
_ "github.com/influxdata/telegraf/plugins/inputs/openntpd"
_ "github.com/influxdata/telegraf/plugins/inputs/opensmtpd"
_ "github.com/influxdata/telegraf/plugins/inputs/openweathermap"
_ "github.com/influxdata/telegraf/plugins/inputs/passenger"
_ "github.com/influxdata/telegraf/plugins/inputs/pf"
_ "github.com/influxdata/telegraf/plugins/inputs/pgbouncer"
_ "github.com/influxdata/telegraf/plugins/inputs/phpfpm"
_ "github.com/influxdata/telegraf/plugins/inputs/ping"
_ "github.com/influxdata/telegraf/plugins/inputs/postfix"
_ "github.com/influxdata/telegraf/plugins/inputs/postgresql"
_ "github.com/influxdata/telegraf/plugins/inputs/postgresql_extensible"
_ "github.com/influxdata/telegraf/plugins/inputs/powerdns"
_ "github.com/influxdata/telegraf/plugins/inputs/powerdns_recursor"
_ "github.com/influxdata/telegraf/plugins/inputs/processes"
_ "github.com/influxdata/telegraf/plugins/inputs/procstat"
_ "github.com/influxdata/telegraf/plugins/inputs/prometheus"
_ "github.com/influxdata/telegraf/plugins/inputs/puppetagent"
@@ -92,29 +142,42 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/riak"
_ "github.com/influxdata/telegraf/plugins/inputs/salesforce"
_ "github.com/influxdata/telegraf/plugins/inputs/sensors"
_ "github.com/influxdata/telegraf/plugins/inputs/sflow"
_ "github.com/influxdata/telegraf/plugins/inputs/smart"
_ "github.com/influxdata/telegraf/plugins/inputs/snmp"
_ "github.com/influxdata/telegraf/plugins/inputs/snmp_legacy"
_ "github.com/influxdata/telegraf/plugins/inputs/snmp_trap"
_ "github.com/influxdata/telegraf/plugins/inputs/socket_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/solr"
_ "github.com/influxdata/telegraf/plugins/inputs/sqlserver"
_ "github.com/influxdata/telegraf/plugins/inputs/stackdriver"
_ "github.com/influxdata/telegraf/plugins/inputs/statsd"
_ "github.com/influxdata/telegraf/plugins/inputs/suricata"
_ "github.com/influxdata/telegraf/plugins/inputs/swap"
_ "github.com/influxdata/telegraf/plugins/inputs/synproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/syslog"
_ "github.com/influxdata/telegraf/plugins/inputs/sysstat"
_ "github.com/influxdata/telegraf/plugins/inputs/system"
_ "github.com/influxdata/telegraf/plugins/inputs/systemd_units"
_ "github.com/influxdata/telegraf/plugins/inputs/tail"
_ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/teamspeak"
_ "github.com/influxdata/telegraf/plugins/inputs/temp"
_ "github.com/influxdata/telegraf/plugins/inputs/tengine"
_ "github.com/influxdata/telegraf/plugins/inputs/tomcat"
_ "github.com/influxdata/telegraf/plugins/inputs/trig"
_ "github.com/influxdata/telegraf/plugins/inputs/twemproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/udp_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/unbound"
_ "github.com/influxdata/telegraf/plugins/inputs/uwsgi"
_ "github.com/influxdata/telegraf/plugins/inputs/varnish"
_ "github.com/influxdata/telegraf/plugins/inputs/vsphere"
_ "github.com/influxdata/telegraf/plugins/inputs/webhooks"
_ "github.com/influxdata/telegraf/plugins/inputs/win_perf_counters"
_ "github.com/influxdata/telegraf/plugins/inputs/win_services"
_ "github.com/influxdata/telegraf/plugins/inputs/wireguard"
_ "github.com/influxdata/telegraf/plugins/inputs/wireless"
_ "github.com/influxdata/telegraf/plugins/inputs/x509_cert"
_ "github.com/influxdata/telegraf/plugins/inputs/zfs"
_ "github.com/influxdata/telegraf/plugins/inputs/zipkin"
_ "github.com/influxdata/telegraf/plugins/inputs/zookeeper"

View File

@@ -1,6 +1,6 @@
# AMQP Consumer Input Plugin
This plugin provides a consumer for use with AMQP 0-9-1, a promenent implementation of this protocol being [RabbitMQ](https://www.rabbitmq.com/).
This plugin provides a consumer for use with AMQP 0-9-1, a prominent implementation of this protocol being [RabbitMQ](https://www.rabbitmq.com/).
Metrics are read from a topic exchange using the configured queue and binding_key.
@@ -13,7 +13,6 @@ For an introduction to AMQP see:
The following defaults are known to work with RabbitMQ:
```toml
# AMQP consumer plugin
[[inputs.amqp_consumer]]
## Broker to consume from.
## deprecated in 1.7; use the brokers option
@@ -28,7 +27,7 @@ The following defaults are known to work with RabbitMQ:
# username = ""
# password = ""
## Exchange to declare and consume from.
## Name of the exchange to declare. If unset, no exchange will be declared.
exchange = "telegraf"
## Exchange type; common types are "direct", "fanout", "topic", "header", "x-consistent-hash".
@@ -42,16 +41,34 @@ The following defaults are known to work with RabbitMQ:
## Additional exchange arguments.
# exchange_arguments = { }
# exchange_arguments = {"hash_propery" = "timestamp"}
# exchange_arguments = {"hash_property" = "timestamp"}
## AMQP queue name
queue = "telegraf"
## Binding Key
## AMQP queue durability can be "transient" or "durable".
queue_durability = "durable"
## If true, queue will be passively declared.
# queue_passive = false
## A binding between the exchange and queue using this binding key is
## created. If unset, no binding is created.
binding_key = "#"
## Maximum number of messages server should give to the worker.
# prefetch_count = 50
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Auth method. PLAIN and EXTERNAL are supported
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
## described here: https://www.rabbitmq.com/plugins.html
@@ -64,6 +81,10 @@ The following defaults are known to work with RabbitMQ:
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
## Content encoding for message payloads, can be set to "gzip" to or
## "identity" to apply no encoding.
# content_encoding = "identity"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:

View File

@@ -1,36 +1,47 @@
package amqp_consumer
import (
"context"
"errors"
"fmt"
"log"
"math/rand"
"strings"
"sync"
"time"
"github.com/streadway/amqp"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/streadway/amqp"
)
const (
defaultMaxUndeliveredMessages = 1000
)
type empty struct{}
type semaphore chan empty
// AMQPConsumer is the top level struct for this plugin
type AMQPConsumer struct {
URL string `toml:"url"` // deprecated in 1.7; use brokers
Brokers []string `toml:"brokers"`
Username string `toml:"username"`
Password string `toml:"password"`
Exchange string `toml:"exchange"`
ExchangeType string `toml:"exchange_type"`
ExchangeDurability string `toml:"exchange_durability"`
ExchangePassive bool `toml:"exchange_passive"`
ExchangeArguments map[string]string `toml:"exchange_arguments"`
URL string `toml:"url"` // deprecated in 1.7; use brokers
Brokers []string `toml:"brokers"`
Username string `toml:"username"`
Password string `toml:"password"`
Exchange string `toml:"exchange"`
ExchangeType string `toml:"exchange_type"`
ExchangeDurability string `toml:"exchange_durability"`
ExchangePassive bool `toml:"exchange_passive"`
ExchangeArguments map[string]string `toml:"exchange_arguments"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
// Queue Name
Queue string
Queue string `toml:"queue"`
QueueDurability string `toml:"queue_durability"`
QueuePassive bool `toml:"queue_passive"`
// Binding Key
BindingKey string `toml:"binding_key"`
@@ -42,9 +53,16 @@ type AMQPConsumer struct {
AuthMethod string
tls.ClientConfig
parser parsers.Parser
conn *amqp.Connection
wg *sync.WaitGroup
ContentEncoding string `toml:"content_encoding"`
Log telegraf.Logger
deliveries map[telegraf.TrackingID]amqp.Delivery
parser parsers.Parser
conn *amqp.Connection
wg *sync.WaitGroup
cancel context.CancelFunc
decoder internal.ContentDecoder
}
type externalAuth struct{}
@@ -64,6 +82,8 @@ const (
DefaultExchangeType = "topic"
DefaultExchangeDurability = "durable"
DefaultQueueDurability = "durable"
DefaultPrefetchCount = 50
)
@@ -82,7 +102,7 @@ func (a *AMQPConsumer) SampleConfig() string {
# username = ""
# password = ""
## Exchange to declare and consume from.
## Name of the exchange to declare. If unset, no exchange will be declared.
exchange = "telegraf"
## Exchange type; common types are "direct", "fanout", "topic", "header", "x-consistent-hash".
@@ -96,17 +116,34 @@ func (a *AMQPConsumer) SampleConfig() string {
## Additional exchange arguments.
# exchange_arguments = { }
# exchange_arguments = {"hash_propery" = "timestamp"}
# exchange_arguments = {"hash_property" = "timestamp"}
## AMQP queue name
## AMQP queue name.
queue = "telegraf"
## Binding Key
## AMQP queue durability can be "transient" or "durable".
queue_durability = "durable"
## If true, queue will be passively declared.
# queue_passive = false
## A binding between the exchange and queue using this binding key is
## created. If unset, no binding is created.
binding_key = "#"
## Maximum number of messages server should give to the worker.
# prefetch_count = 50
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Auth method. PLAIN and EXTERNAL are supported
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
## described here: https://www.rabbitmq.com/plugins.html
@@ -119,6 +156,10 @@ func (a *AMQPConsumer) SampleConfig() string {
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
## Content encoding for message payloads, can be set to "gzip" to or
## "identity" to apply no encoding.
# content_encoding = "identity"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
@@ -173,14 +214,25 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
return err
}
a.decoder, err = internal.NewContentDecoder(a.ContentEncoding)
if err != nil {
return err
}
msgs, err := a.connect(amqpConf)
if err != nil {
return err
}
ctx, cancel := context.WithCancel(context.Background())
a.cancel = cancel
a.wg = &sync.WaitGroup{}
a.wg.Add(1)
go a.process(msgs, acc)
go func() {
defer a.wg.Done()
a.process(ctx, msgs, acc)
}()
go func() {
for {
@@ -189,17 +241,20 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
break
}
log.Printf("I! AMQP consumer connection closed: %s; trying to reconnect", err)
a.Log.Infof("Connection closed: %s; trying to reconnect", err)
for {
msgs, err := a.connect(amqpConf)
if err != nil {
log.Printf("E! AMQP connection failed: %s", err)
a.Log.Errorf("AMQP connection failed: %s", err)
time.Sleep(10 * time.Second)
continue
}
a.wg.Add(1)
go a.process(msgs, acc)
go func() {
defer a.wg.Done()
a.process(ctx, msgs, acc)
}()
break
}
}
@@ -217,14 +272,14 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
p := rand.Perm(len(brokers))
for _, n := range p {
broker := brokers[n]
log.Printf("D! [amqp_consumer] connecting to %q", broker)
a.Log.Debugf("Connecting to %q", broker)
conn, err := amqp.DialConfig(broker, *amqpConf)
if err == nil {
a.conn = conn
log.Printf("D! [amqp_consumer] connected to %q", broker)
a.Log.Debugf("Connected to %q", broker)
break
}
log.Printf("D! [amqp_consumer] error connecting to %q", broker)
a.Log.Debugf("Error connecting to %q", broker)
}
if a.conn == nil {
@@ -233,54 +288,55 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
ch, err := a.conn.Channel()
if err != nil {
return nil, fmt.Errorf("Failed to open a channel: %s", err)
return nil, fmt.Errorf("Failed to open a channel: %s", err.Error())
}
var exchangeDurable = true
switch a.ExchangeDurability {
case "transient":
exchangeDurable = false
default:
exchangeDurable = true
if a.Exchange != "" {
var exchangeDurable = true
switch a.ExchangeDurability {
case "transient":
exchangeDurable = false
default:
exchangeDurable = true
}
exchangeArgs := make(amqp.Table, len(a.ExchangeArguments))
for k, v := range a.ExchangeArguments {
exchangeArgs[k] = v
}
err = declareExchange(
ch,
a.Exchange,
a.ExchangeType,
a.ExchangePassive,
exchangeDurable,
exchangeArgs)
if err != nil {
return nil, err
}
}
exchangeArgs := make(amqp.Table, len(a.ExchangeArguments))
for k, v := range a.ExchangeArguments {
exchangeArgs[k] = v
}
err = declareExchange(
q, err := declareQueue(
ch,
a.Exchange,
a.ExchangeType,
a.ExchangePassive,
exchangeDurable,
exchangeArgs)
a.Queue,
a.QueueDurability,
a.QueuePassive)
if err != nil {
return nil, err
}
q, err := ch.QueueDeclare(
a.Queue, // queue
true, // durable
false, // delete when unused
false, // exclusive
false, // no-wait
nil, // arguments
)
if err != nil {
return nil, fmt.Errorf("Failed to declare a queue: %s", err)
}
err = ch.QueueBind(
q.Name, // queue
a.BindingKey, // binding-key
a.Exchange, // exchange
false,
nil,
)
if err != nil {
return nil, fmt.Errorf("Failed to bind a queue: %s", err)
if a.BindingKey != "" {
err = ch.QueueBind(
q.Name, // queue
a.BindingKey, // binding-key
a.Exchange, // exchange
false,
nil,
)
if err != nil {
return nil, fmt.Errorf("Failed to bind a queue: %s", err)
}
}
err = ch.Qos(
@@ -305,7 +361,6 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
}
log.Println("I! Started AMQP consumer")
return msgs, err
}
@@ -340,47 +395,164 @@ func declareExchange(
)
}
if err != nil {
return fmt.Errorf("error declaring exchange: %v", err)
return fmt.Errorf("Error declaring exchange: %v", err)
}
return nil
}
func declareQueue(
channel *amqp.Channel,
queueName string,
queueDurability string,
queuePassive bool,
) (*amqp.Queue, error) {
var queue amqp.Queue
var err error
var queueDurable = true
switch queueDurability {
case "transient":
queueDurable = false
default:
queueDurable = true
}
if queuePassive {
queue, err = channel.QueueDeclarePassive(
queueName, // queue
queueDurable, // durable
false, // delete when unused
false, // exclusive
false, // no-wait
nil, // arguments
)
} else {
queue, err = channel.QueueDeclare(
queueName, // queue
queueDurable, // durable
false, // delete when unused
false, // exclusive
false, // no-wait
nil, // arguments
)
}
if err != nil {
return nil, fmt.Errorf("Error declaring queue: %v", err)
}
return &queue, nil
}
// Read messages from queue and add them to the Accumulator
func (a *AMQPConsumer) process(msgs <-chan amqp.Delivery, acc telegraf.Accumulator) {
defer a.wg.Done()
for d := range msgs {
metrics, err := a.parser.Parse(d.Body)
if err != nil {
log.Printf("E! %v: error parsing metric - %v", err, string(d.Body))
} else {
for _, m := range metrics {
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
func (a *AMQPConsumer) process(ctx context.Context, msgs <-chan amqp.Delivery, ac telegraf.Accumulator) {
a.deliveries = make(map[telegraf.TrackingID]amqp.Delivery)
acc := ac.WithTracking(a.MaxUndeliveredMessages)
sem := make(semaphore, a.MaxUndeliveredMessages)
for {
select {
case <-ctx.Done():
return
case track := <-acc.Delivered():
if a.onDelivery(track) {
<-sem
}
case sem <- empty{}:
select {
case <-ctx.Done():
return
case track := <-acc.Delivered():
if a.onDelivery(track) {
<-sem
<-sem
}
case d, ok := <-msgs:
if !ok {
return
}
err := a.onMessage(acc, d)
if err != nil {
acc.AddError(err)
<-sem
}
}
}
d.Ack(false)
}
log.Printf("I! AMQP consumer queue closed")
}
func (a *AMQPConsumer) onMessage(acc telegraf.TrackingAccumulator, d amqp.Delivery) error {
onError := func() {
// Discard the message from the queue; will never be able to process
// this message.
rejErr := d.Ack(false)
if rejErr != nil {
a.Log.Errorf("Unable to reject message: %d: %v", d.DeliveryTag, rejErr)
a.conn.Close()
}
}
body, err := a.decoder.Decode(d.Body)
if err != nil {
onError()
return err
}
metrics, err := a.parser.Parse(body)
if err != nil {
onError()
return err
}
id := acc.AddTrackingMetricGroup(metrics)
a.deliveries[id] = d
return nil
}
func (a *AMQPConsumer) onDelivery(track telegraf.DeliveryInfo) bool {
delivery, ok := a.deliveries[track.ID()]
if !ok {
// Added by a previous connection
return false
}
if track.Delivered() {
err := delivery.Ack(false)
if err != nil {
a.Log.Errorf("Unable to ack written delivery: %d: %v", delivery.DeliveryTag, err)
a.conn.Close()
}
} else {
err := delivery.Reject(false)
if err != nil {
a.Log.Errorf("Unable to reject failed delivery: %d: %v", delivery.DeliveryTag, err)
a.conn.Close()
}
}
delete(a.deliveries, track.ID())
return true
}
func (a *AMQPConsumer) Stop() {
a.cancel()
a.wg.Wait()
err := a.conn.Close()
if err != nil && err != amqp.ErrClosed {
log.Printf("E! Error closing AMQP connection: %s", err)
a.Log.Errorf("Error closing AMQP connection: %s", err)
return
}
a.wg.Wait()
log.Println("I! Stopped AMQP service")
}
func init() {
inputs.Add("amqp_consumer", func() telegraf.Input {
return &AMQPConsumer{
URL: DefaultBroker,
AuthMethod: DefaultAuthMethod,
ExchangeType: DefaultExchangeType,
ExchangeDurability: DefaultExchangeDurability,
PrefetchCount: DefaultPrefetchCount,
URL: DefaultBroker,
AuthMethod: DefaultAuthMethod,
ExchangeType: DefaultExchangeType,
ExchangeDurability: DefaultExchangeDurability,
QueueDurability: DefaultQueueDurability,
PrefetchCount: DefaultPrefetchCount,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
})
}

View File

@@ -0,0 +1,54 @@
# APCUPSD Input Plugin
This plugin reads data from an apcupsd daemon over its NIS network protocol.
### Requirements
apcupsd should be installed and it's daemon should be running.
### Configuration
```toml
[[inputs.apcupsd]]
# A list of running apcupsd server to connect to.
# If not provided will default to tcp://127.0.0.1:3551
servers = ["tcp://127.0.0.1:3551"]
## Timeout for dialing server.
timeout = "5s"
```
### Metrics
- apcupsd
- tags:
- serial
- status (string representing the set status_flags)
- ups_name
- model
- fields:
- status_flags ([status-bits][])
- input_voltage
- load_percent
- battery_charge_percent
- time_left_ns
- output_voltage
- internal_temp
- battery_voltage
- input_frequency
- time_on_battery_ns
- battery_date
- nominal_input_voltage
- nominal_battery_voltage
- nominal_power
- firmware
### Example output
```
apcupsd,serial=AS1231515,status=ONLINE,ups_name=name1 time_on_battery=0,load_percent=9.7,time_left_minutes=98,output_voltage=230.4,internal_temp=32.4,battery_voltage=27.4,input_frequency=50.2,input_voltage=230.4,battery_charge_percent=100,status_flags=8i 1490035922000000000
```
[status-bits]: http://www.apcupsd.org/manual/manual.html#status-bits

View File

@@ -0,0 +1,114 @@
package apcupsd
import (
"context"
"net/url"
"strconv"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/mdlayher/apcupsd"
)
const defaultAddress = "tcp://127.0.0.1:3551"
var defaultTimeout = internal.Duration{Duration: time.Duration(time.Second * 5)}
type ApcUpsd struct {
Servers []string
Timeout internal.Duration
}
func (*ApcUpsd) Description() string {
return "Monitor APC UPSes connected to apcupsd"
}
var sampleConfig = `
# A list of running apcupsd server to connect to.
# If not provided will default to tcp://127.0.0.1:3551
servers = ["tcp://127.0.0.1:3551"]
## Timeout for dialing server.
timeout = "5s"
`
func (*ApcUpsd) SampleConfig() string {
return sampleConfig
}
func (h *ApcUpsd) Gather(acc telegraf.Accumulator) error {
ctx := context.Background()
for _, addr := range h.Servers {
addrBits, err := url.Parse(addr)
if err != nil {
return err
}
if addrBits.Scheme == "" {
addrBits.Scheme = "tcp"
}
ctx, cancel := context.WithTimeout(ctx, h.Timeout.Duration)
defer cancel()
status, err := fetchStatus(ctx, addrBits)
if err != nil {
return err
}
tags := map[string]string{
"serial": status.SerialNumber,
"ups_name": status.UPSName,
"status": status.Status,
"model": status.Model,
}
flags, err := strconv.ParseUint(strings.Fields(status.StatusFlags)[0], 0, 64)
if err != nil {
return err
}
fields := map[string]interface{}{
"status_flags": flags,
"input_voltage": status.LineVoltage,
"load_percent": status.LoadPercent,
"battery_charge_percent": status.BatteryChargePercent,
"time_left_ns": status.TimeLeft.Nanoseconds(),
"output_voltage": status.OutputVoltage,
"internal_temp": status.InternalTemp,
"battery_voltage": status.BatteryVoltage,
"input_frequency": status.LineFrequency,
"time_on_battery_ns": status.TimeOnBattery.Nanoseconds(),
"nominal_input_voltage": status.NominalInputVoltage,
"nominal_battery_voltage": status.NominalBatteryVoltage,
"nominal_power": status.NominalPower,
"firmware": status.Firmware,
"battery_date": status.BatteryDate,
}
acc.AddFields("apcupsd", fields, tags)
}
return nil
}
func fetchStatus(ctx context.Context, addr *url.URL) (*apcupsd.Status, error) {
client, err := apcupsd.DialContext(ctx, addr.Scheme, addr.Host)
if err != nil {
return nil, err
}
defer client.Close()
return client.Status()
}
func init() {
inputs.Add("apcupsd", func() telegraf.Input {
return &ApcUpsd{
Servers: []string{defaultAddress},
Timeout: defaultTimeout,
}
})
}

View File

@@ -0,0 +1,235 @@
package apcupsd
import (
"context"
"encoding/binary"
"net"
"testing"
"time"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
func TestApcupsdDocs(t *testing.T) {
apc := &ApcUpsd{}
apc.Description()
apc.SampleConfig()
}
func TestApcupsdInit(t *testing.T) {
input, ok := inputs.Inputs["apcupsd"]
if !ok {
t.Fatal("Input not defined")
}
_ = input().(*ApcUpsd)
}
func listen(ctx context.Context, t *testing.T, out [][]byte) (string, error) {
lc := net.ListenConfig{}
ln, err := lc.Listen(ctx, "tcp4", "127.0.0.1:0")
if err != nil {
return "", err
}
go func() {
for ctx.Err() == nil {
defer ln.Close()
conn, err := ln.Accept()
if err != nil {
continue
}
defer conn.Close()
conn.SetReadDeadline(time.Now().Add(time.Second))
in := make([]byte, 128)
n, err := conn.Read(in)
require.NoError(t, err, "failed to read from connection")
status := []byte{0, 6, 's', 't', 'a', 't', 'u', 's'}
want, got := status, in[:n]
require.Equal(t, want, got)
// Run against test function and append EOF to end of output bytes
out = append(out, []byte{0, 0})
for _, o := range out {
_, err := conn.Write(o)
require.NoError(t, err, "failed to write to connection")
}
}
}()
return ln.Addr().String(), nil
}
func TestConfig(t *testing.T) {
apc := &ApcUpsd{Timeout: defaultTimeout}
var (
tests = []struct {
name string
servers []string
err bool
}{
{
name: "test listen address no scheme",
servers: []string{"127.0.0.1:1234"},
err: true,
},
{
name: "test no port",
servers: []string{"127.0.0.3"},
err: true,
},
}
acc testutil.Accumulator
)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
apc.Servers = tt.servers
err := apc.Gather(&acc)
if tt.err {
require.Error(t, err)
} else {
require.NoError(t, err)
}
})
}
}
func TestApcupsdGather(t *testing.T) {
apc := &ApcUpsd{Timeout: defaultTimeout}
var (
tests = []struct {
name string
err bool
tags map[string]string
fields map[string]interface{}
out func() [][]byte
}{
{
name: "test listening server with output",
err: false,
tags: map[string]string{
"serial": "ABC123",
"status": "ONLINE",
"ups_name": "BERTHA",
"model": "Model 12345",
},
fields: map[string]interface{}{
"status_flags": uint64(8),
"battery_charge_percent": float64(0),
"battery_voltage": float64(0),
"input_frequency": float64(0),
"input_voltage": float64(0),
"internal_temp": float64(0),
"load_percent": float64(13),
"output_voltage": float64(0),
"time_left_ns": int64(2790000000000),
"time_on_battery_ns": int64(0),
"nominal_input_voltage": float64(230),
"nominal_battery_voltage": float64(12),
"nominal_power": int(865),
"firmware": string("857.L3 .I USB FW:L3"),
"battery_date": time.Date(2016, time.September, 06, 0, 0, 0, 0, time.UTC),
},
out: genOutput,
},
{
name: "test with bad output",
err: true,
out: genBadOutput,
},
}
acc testutil.Accumulator
)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
lAddr, err := listen(ctx, t, tt.out())
if err != nil {
t.Fatal(err)
}
apc.Servers = []string{"tcp://" + lAddr}
err = apc.Gather(&acc)
if tt.err {
require.Error(t, err)
} else {
require.NoError(t, err)
acc.AssertContainsTaggedFields(t, "apcupsd", tt.fields, tt.tags)
}
cancel()
})
}
}
// The following functionality is straight from apcupsd tests.
// kvBytes is a helper to generate length and key/value byte buffers.
func kvBytes(kv string) ([]byte, []byte) {
lenb := make([]byte, 2)
binary.BigEndian.PutUint16(lenb, uint16(len(kv)))
return lenb, []byte(kv)
}
func genOutput() [][]byte {
kvs := []string{
"SERIALNO : ABC123",
"STATUS : ONLINE",
"STATFLAG : 0x08 Status Flag",
"UPSNAME : BERTHA",
"MODEL : Model 12345",
"DATE : 2016-09-06 22:13:28 -0400",
"HOSTNAME : example",
"LOADPCT : 13.0 Percent Load Capacity",
"BATTDATE : 2016-09-06",
"TIMELEFT : 46.5 Minutes",
"TONBATT : 0 seconds",
"NUMXFERS : 0",
"SELFTEST : NO",
"NOMINV : 230 Volts",
"NOMBATTV : 12.0 Volts",
"NOMPOWER : 865 Watts",
"FIRMWARE : 857.L3 .I USB FW:L3",
}
var out [][]byte
for _, kv := range kvs {
lenb, kvb := kvBytes(kv)
out = append(out, lenb)
out = append(out, kvb)
}
return out
}
func genBadOutput() [][]byte {
kvs := []string{
"STATFLAG : 0x08Status Flag",
}
var out [][]byte
for _, kv := range kvs {
lenb, kvb := kvBytes(kv)
out = append(out, lenb)
out = append(out, kvb)
}
return out
}

View File

@@ -0,0 +1,35 @@
# Telegraf Input Plugin: Azure Storage Queue
This plugin gathers sizes of Azure Storage Queues.
### Configuration:
```toml
# Description
[[inputs.azure_storage_queue]]
## Required Azure Storage Account name
account_name = "mystorageaccount"
## Required Azure Storage Account access key
account_key = "storageaccountaccesskey"
## Set to false to disable peeking age of oldest message (executes faster)
# peek_oldest_message_age = true
```
### Metrics
- azure_storage_queues
- tags:
- queue
- account
- fields:
- size (integer, count)
- oldest_message_age_ns (integer, nanoseconds) Age of message at the head of the queue.
Requires `peek_oldest_message_age` to be configured to `true`.
### Example Output
```
azure_storage_queues,queue=myqueue,account=mystorageaccount oldest_message_age=799714900i,size=7i 1565970503000000000
azure_storage_queues,queue=myemptyqueue,account=mystorageaccount size=0i 1565970502000000000
```

View File

@@ -0,0 +1,134 @@
package azure_storage_queue
import (
"context"
"errors"
"net/url"
"strings"
"time"
"github.com/Azure/azure-storage-queue-go/azqueue"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
type AzureStorageQueue struct {
StorageAccountName string `toml:"account_name"`
StorageAccountKey string `toml:"account_key"`
PeekOldestMessageAge bool `toml:"peek_oldest_message_age"`
Log telegraf.Logger
serviceURL *azqueue.ServiceURL
}
var sampleConfig = `
## Required Azure Storage Account name
account_name = "mystorageaccount"
## Required Azure Storage Account access key
account_key = "storageaccountaccesskey"
## Set to false to disable peeking age of oldest message (executes faster)
# peek_oldest_message_age = true
`
func (a *AzureStorageQueue) Description() string {
return "Gather Azure Storage Queue metrics"
}
func (a *AzureStorageQueue) SampleConfig() string {
return sampleConfig
}
func (a *AzureStorageQueue) Init() error {
if a.StorageAccountName == "" {
return errors.New("account_name must be configured")
}
if a.StorageAccountKey == "" {
return errors.New("account_key must be configured")
}
return nil
}
func (a *AzureStorageQueue) GetServiceURL() (azqueue.ServiceURL, error) {
if a.serviceURL == nil {
_url, err := url.Parse("https://" + a.StorageAccountName + ".queue.core.windows.net")
if err != nil {
return azqueue.ServiceURL{}, err
}
credential, err := azqueue.NewSharedKeyCredential(a.StorageAccountName, a.StorageAccountKey)
if err != nil {
return azqueue.ServiceURL{}, err
}
pipeline := azqueue.NewPipeline(credential, azqueue.PipelineOptions{})
serviceURL := azqueue.NewServiceURL(*_url, pipeline)
a.serviceURL = &serviceURL
}
return *a.serviceURL, nil
}
func (a *AzureStorageQueue) GatherQueueMetrics(acc telegraf.Accumulator, queueItem azqueue.QueueItem, properties *azqueue.QueueGetPropertiesResponse, peekedMessage *azqueue.PeekedMessage) {
fields := make(map[string]interface{})
tags := make(map[string]string)
tags["queue"] = strings.TrimSpace(queueItem.Name)
tags["account"] = a.StorageAccountName
fields["size"] = properties.ApproximateMessagesCount()
if peekedMessage != nil {
fields["oldest_message_age_ns"] = time.Now().UnixNano() - peekedMessage.InsertionTime.UnixNano()
}
acc.AddFields("azure_storage_queues", fields, tags)
}
func (a *AzureStorageQueue) Gather(acc telegraf.Accumulator) error {
serviceURL, err := a.GetServiceURL()
if err != nil {
return err
}
ctx := context.TODO()
for marker := (azqueue.Marker{}); marker.NotDone(); {
a.Log.Debugf("Listing queues of storage account '%s'", a.StorageAccountName)
queuesSegment, err := serviceURL.ListQueuesSegment(ctx, marker,
azqueue.ListQueuesSegmentOptions{
Detail: azqueue.ListQueuesSegmentDetails{Metadata: false},
})
if err != nil {
return err
}
marker = queuesSegment.NextMarker
for _, queueItem := range queuesSegment.QueueItems {
a.Log.Debugf("Processing queue '%s' of storage account '%s'", queueItem.Name, a.StorageAccountName)
queueURL := serviceURL.NewQueueURL(queueItem.Name)
properties, err := queueURL.GetProperties(ctx)
if err != nil {
a.Log.Errorf("Error getting properties for queue %s: %s", queueItem.Name, err.Error())
continue
}
var peekedMessage *azqueue.PeekedMessage
if a.PeekOldestMessageAge {
messagesURL := queueURL.NewMessagesURL()
messagesResponse, err := messagesURL.Peek(ctx, 1)
if err != nil {
a.Log.Errorf("Error peeking queue %s: %s", queueItem.Name, err.Error())
} else if messagesResponse.NumMessages() > 0 {
peekedMessage = messagesResponse.Message(0)
}
}
a.GatherQueueMetrics(acc, queueItem, properties, peekedMessage)
}
}
return nil
}
func init() {
inputs.Add("azure_storage_queue", func() telegraf.Input {
return &AzureStorageQueue{PeekOldestMessageAge: true}
})
}

View File

@@ -59,7 +59,7 @@ func prettyToBytes(v string) uint64 {
}
var factor uint64
factor = 1
prefix := v[len(v)-1 : len(v)]
prefix := v[len(v)-1:]
if factors[prefix] != 0 {
v = v[:len(v)-1]
factor = factors[prefix]

View File

@@ -0,0 +1,98 @@
# Beanstalkd Input Plugin
The `beanstalkd` plugin collects server stats as well as tube stats (reported by `stats` and `stats-tube` commands respectively).
### Configuration:
```toml
[[inputs.beanstalkd]]
## Server to collect data from
server = "localhost:11300"
## List of tubes to gather stats about.
## If no tubes specified then data gathered for each tube on server reported by list-tubes command
tubes = ["notifications"]
```
### Metrics:
Please see the [Beanstalk Protocol doc](https://raw.githubusercontent.com/kr/beanstalkd/master/doc/protocol.txt) for detailed explanation of `stats` and `stats-tube` commands output.
`beanstalkd_overview` statistical information about the system as a whole
- fields
- cmd_delete
- cmd_pause_tube
- current_jobs_buried
- current_jobs_delayed
- current_jobs_ready
- current_jobs_reserved
- current_jobs_urgent
- current_using
- current_waiting
- current_watching
- pause
- pause_time_left
- total_jobs
- tags
- name
- server (address taken from config)
`beanstalkd_tube` statistical information about the specified tube
- fields
- binlog_current_index
- binlog_max_size
- binlog_oldest_index
- binlog_records_migrated
- binlog_records_written
- cmd_bury
- cmd_delete
- cmd_ignore
- cmd_kick
- cmd_list_tube_used
- cmd_list_tubes
- cmd_list_tubes_watched
- cmd_pause_tube
- cmd_peek
- cmd_peek_buried
- cmd_peek_delayed
- cmd_peek_ready
- cmd_put
- cmd_release
- cmd_reserve
- cmd_reserve_with_timeout
- cmd_stats
- cmd_stats_job
- cmd_stats_tube
- cmd_touch
- cmd_use
- cmd_watch
- current_connections
- current_jobs_buried
- current_jobs_delayed
- current_jobs_ready
- current_jobs_reserved
- current_jobs_urgent
- current_producers
- current_tubes
- current_waiting
- current_workers
- job_timeouts
- max_job_size
- pid
- rusage_stime
- rusage_utime
- total_connections
- total_jobs
- uptime
- tags
- hostname
- id
- server (address taken from config)
- version
### Example Output:
```
beanstalkd_overview,host=server.local,hostname=a2ab22ed12e0,id=232485800aa11b24,server=localhost:11300,version=1.10 cmd_stats_tube=29482i,current_jobs_delayed=0i,current_jobs_urgent=6i,cmd_kick=0i,cmd_stats=7378i,cmd_stats_job=0i,current_waiting=0i,max_job_size=65535i,pid=6i,cmd_bury=0i,cmd_reserve_with_timeout=0i,cmd_touch=0i,current_connections=1i,current_jobs_ready=6i,current_producers=0i,cmd_delete=0i,cmd_list_tubes=7369i,cmd_peek_ready=0i,cmd_put=6i,cmd_use=3i,cmd_watch=0i,current_jobs_reserved=0i,rusage_stime=6.07,cmd_list_tubes_watched=0i,cmd_pause_tube=0i,total_jobs=6i,binlog_records_migrated=0i,cmd_list_tube_used=0i,cmd_peek_delayed=0i,cmd_release=0i,current_jobs_buried=0i,job_timeouts=0i,binlog_current_index=0i,binlog_max_size=10485760i,total_connections=7378i,cmd_peek_buried=0i,cmd_reserve=0i,current_tubes=4i,binlog_records_written=0i,cmd_peek=0i,rusage_utime=1.13,uptime=7099i,binlog_oldest_index=0i,current_workers=0i,cmd_ignore=0i 1528801650000000000
beanstalkd_tube,host=server.local,name=notifications,server=localhost:11300 pause_time_left=0i,current_jobs_buried=0i,current_jobs_delayed=0i,current_jobs_reserved=0i,current_using=0i,current_waiting=0i,pause=0i,total_jobs=3i,cmd_delete=0i,cmd_pause_tube=0i,current_jobs_ready=3i,current_jobs_urgent=3i,current_watching=0i 1528801650000000000
```

View File

@@ -0,0 +1,270 @@
package beanstalkd
import (
"fmt"
"io"
"net/textproto"
"sync"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"gopkg.in/yaml.v2"
)
const sampleConfig = `
## Server to collect data from
server = "localhost:11300"
## List of tubes to gather stats about.
## If no tubes specified then data gathered for each tube on server reported by list-tubes command
tubes = ["notifications"]
`
type Beanstalkd struct {
Server string `toml:"server"`
Tubes []string `toml:"tubes"`
}
func (b *Beanstalkd) Description() string {
return "Collects Beanstalkd server and tubes stats"
}
func (b *Beanstalkd) SampleConfig() string {
return sampleConfig
}
func (b *Beanstalkd) Gather(acc telegraf.Accumulator) error {
connection, err := textproto.Dial("tcp", b.Server)
if err != nil {
return err
}
defer connection.Close()
tubes := b.Tubes
if len(tubes) == 0 {
err = runQuery(connection, "list-tubes", &tubes)
if err != nil {
acc.AddError(err)
}
}
var wg sync.WaitGroup
wg.Add(1)
go func() {
err := b.gatherServerStats(connection, acc)
if err != nil {
acc.AddError(err)
}
wg.Done()
}()
for _, tube := range tubes {
wg.Add(1)
go func(tube string) {
b.gatherTubeStats(connection, tube, acc)
wg.Done()
}(tube)
}
wg.Wait()
return nil
}
func (b *Beanstalkd) gatherServerStats(connection *textproto.Conn, acc telegraf.Accumulator) error {
stats := new(statsResponse)
if err := runQuery(connection, "stats", stats); err != nil {
return err
}
acc.AddFields("beanstalkd_overview",
map[string]interface{}{
"binlog_current_index": stats.BinlogCurrentIndex,
"binlog_max_size": stats.BinlogMaxSize,
"binlog_oldest_index": stats.BinlogOldestIndex,
"binlog_records_migrated": stats.BinlogRecordsMigrated,
"binlog_records_written": stats.BinlogRecordsWritten,
"cmd_bury": stats.CmdBury,
"cmd_delete": stats.CmdDelete,
"cmd_ignore": stats.CmdIgnore,
"cmd_kick": stats.CmdKick,
"cmd_list_tube_used": stats.CmdListTubeUsed,
"cmd_list_tubes": stats.CmdListTubes,
"cmd_list_tubes_watched": stats.CmdListTubesWatched,
"cmd_pause_tube": stats.CmdPauseTube,
"cmd_peek": stats.CmdPeek,
"cmd_peek_buried": stats.CmdPeekBuried,
"cmd_peek_delayed": stats.CmdPeekDelayed,
"cmd_peek_ready": stats.CmdPeekReady,
"cmd_put": stats.CmdPut,
"cmd_release": stats.CmdRelease,
"cmd_reserve": stats.CmdReserve,
"cmd_reserve_with_timeout": stats.CmdReserveWithTimeout,
"cmd_stats": stats.CmdStats,
"cmd_stats_job": stats.CmdStatsJob,
"cmd_stats_tube": stats.CmdStatsTube,
"cmd_touch": stats.CmdTouch,
"cmd_use": stats.CmdUse,
"cmd_watch": stats.CmdWatch,
"current_connections": stats.CurrentConnections,
"current_jobs_buried": stats.CurrentJobsBuried,
"current_jobs_delayed": stats.CurrentJobsDelayed,
"current_jobs_ready": stats.CurrentJobsReady,
"current_jobs_reserved": stats.CurrentJobsReserved,
"current_jobs_urgent": stats.CurrentJobsUrgent,
"current_producers": stats.CurrentProducers,
"current_tubes": stats.CurrentTubes,
"current_waiting": stats.CurrentWaiting,
"current_workers": stats.CurrentWorkers,
"job_timeouts": stats.JobTimeouts,
"max_job_size": stats.MaxJobSize,
"pid": stats.Pid,
"rusage_stime": stats.RusageStime,
"rusage_utime": stats.RusageUtime,
"total_connections": stats.TotalConnections,
"total_jobs": stats.TotalJobs,
"uptime": stats.Uptime,
},
map[string]string{
"hostname": stats.Hostname,
"id": stats.Id,
"server": b.Server,
"version": stats.Version,
},
)
return nil
}
func (b *Beanstalkd) gatherTubeStats(connection *textproto.Conn, tube string, acc telegraf.Accumulator) error {
stats := new(statsTubeResponse)
if err := runQuery(connection, "stats-tube "+tube, stats); err != nil {
return err
}
acc.AddFields("beanstalkd_tube",
map[string]interface{}{
"cmd_delete": stats.CmdDelete,
"cmd_pause_tube": stats.CmdPauseTube,
"current_jobs_buried": stats.CurrentJobsBuried,
"current_jobs_delayed": stats.CurrentJobsDelayed,
"current_jobs_ready": stats.CurrentJobsReady,
"current_jobs_reserved": stats.CurrentJobsReserved,
"current_jobs_urgent": stats.CurrentJobsUrgent,
"current_using": stats.CurrentUsing,
"current_waiting": stats.CurrentWaiting,
"current_watching": stats.CurrentWatching,
"pause": stats.Pause,
"pause_time_left": stats.PauseTimeLeft,
"total_jobs": stats.TotalJobs,
},
map[string]string{
"name": stats.Name,
"server": b.Server,
},
)
return nil
}
func runQuery(connection *textproto.Conn, cmd string, result interface{}) error {
requestId, err := connection.Cmd(cmd)
if err != nil {
return err
}
connection.StartResponse(requestId)
defer connection.EndResponse(requestId)
status, err := connection.ReadLine()
if err != nil {
return err
}
size := 0
if _, err = fmt.Sscanf(status, "OK %d", &size); err != nil {
return err
}
body := make([]byte, size+2)
if _, err = io.ReadFull(connection.R, body); err != nil {
return err
}
return yaml.Unmarshal(body, result)
}
func init() {
inputs.Add("beanstalkd", func() telegraf.Input {
return &Beanstalkd{}
})
}
type statsResponse struct {
BinlogCurrentIndex int `yaml:"binlog-current-index"`
BinlogMaxSize int `yaml:"binlog-max-size"`
BinlogOldestIndex int `yaml:"binlog-oldest-index"`
BinlogRecordsMigrated int `yaml:"binlog-records-migrated"`
BinlogRecordsWritten int `yaml:"binlog-records-written"`
CmdBury int `yaml:"cmd-bury"`
CmdDelete int `yaml:"cmd-delete"`
CmdIgnore int `yaml:"cmd-ignore"`
CmdKick int `yaml:"cmd-kick"`
CmdListTubeUsed int `yaml:"cmd-list-tube-used"`
CmdListTubes int `yaml:"cmd-list-tubes"`
CmdListTubesWatched int `yaml:"cmd-list-tubes-watched"`
CmdPauseTube int `yaml:"cmd-pause-tube"`
CmdPeek int `yaml:"cmd-peek"`
CmdPeekBuried int `yaml:"cmd-peek-buried"`
CmdPeekDelayed int `yaml:"cmd-peek-delayed"`
CmdPeekReady int `yaml:"cmd-peek-ready"`
CmdPut int `yaml:"cmd-put"`
CmdRelease int `yaml:"cmd-release"`
CmdReserve int `yaml:"cmd-reserve"`
CmdReserveWithTimeout int `yaml:"cmd-reserve-with-timeout"`
CmdStats int `yaml:"cmd-stats"`
CmdStatsJob int `yaml:"cmd-stats-job"`
CmdStatsTube int `yaml:"cmd-stats-tube"`
CmdTouch int `yaml:"cmd-touch"`
CmdUse int `yaml:"cmd-use"`
CmdWatch int `yaml:"cmd-watch"`
CurrentConnections int `yaml:"current-connections"`
CurrentJobsBuried int `yaml:"current-jobs-buried"`
CurrentJobsDelayed int `yaml:"current-jobs-delayed"`
CurrentJobsReady int `yaml:"current-jobs-ready"`
CurrentJobsReserved int `yaml:"current-jobs-reserved"`
CurrentJobsUrgent int `yaml:"current-jobs-urgent"`
CurrentProducers int `yaml:"current-producers"`
CurrentTubes int `yaml:"current-tubes"`
CurrentWaiting int `yaml:"current-waiting"`
CurrentWorkers int `yaml:"current-workers"`
Hostname string `yaml:"hostname"`
Id string `yaml:"id"`
JobTimeouts int `yaml:"job-timeouts"`
MaxJobSize int `yaml:"max-job-size"`
Pid int `yaml:"pid"`
RusageStime float64 `yaml:"rusage-stime"`
RusageUtime float64 `yaml:"rusage-utime"`
TotalConnections int `yaml:"total-connections"`
TotalJobs int `yaml:"total-jobs"`
Uptime int `yaml:"uptime"`
Version string `yaml:"version"`
}
type statsTubeResponse struct {
CmdDelete int `yaml:"cmd-delete"`
CmdPauseTube int `yaml:"cmd-pause-tube"`
CurrentJobsBuried int `yaml:"current-jobs-buried"`
CurrentJobsDelayed int `yaml:"current-jobs-delayed"`
CurrentJobsReady int `yaml:"current-jobs-ready"`
CurrentJobsReserved int `yaml:"current-jobs-reserved"`
CurrentJobsUrgent int `yaml:"current-jobs-urgent"`
CurrentUsing int `yaml:"current-using"`
CurrentWaiting int `yaml:"current-waiting"`
CurrentWatching int `yaml:"current-watching"`
Name string `yaml:"name"`
Pause int `yaml:"pause"`
PauseTimeLeft int `yaml:"pause-time-left"`
TotalJobs int `yaml:"total-jobs"`
}

View File

@@ -0,0 +1,332 @@
package beanstalkd_test
import (
"io"
"net"
"net/textproto"
"testing"
"github.com/influxdata/telegraf/plugins/inputs/beanstalkd"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
func TestBeanstalkd(t *testing.T) {
type tubeStats struct {
name string
fields map[string]interface{}
}
tests := []struct {
name string
tubesConfig []string
expectedTubes []tubeStats
notExpectedTubes []tubeStats
}{
{
name: "All tubes stats",
tubesConfig: []string{},
expectedTubes: []tubeStats{
{name: "default", fields: defaultTubeFields},
{name: "test", fields: testTubeFields},
},
notExpectedTubes: []tubeStats{},
},
{
name: "Specified tubes stats",
tubesConfig: []string{"test"},
expectedTubes: []tubeStats{
{name: "test", fields: testTubeFields},
},
notExpectedTubes: []tubeStats{
{name: "default", fields: defaultTubeFields},
},
},
{
name: "Unknown tube stats",
tubesConfig: []string{"unknown"},
expectedTubes: []tubeStats{},
notExpectedTubes: []tubeStats{
{name: "default", fields: defaultTubeFields},
{name: "test", fields: testTubeFields},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
server, err := startTestServer(t)
if err != nil {
t.Fatalf("Unable to create test server")
}
defer server.Close()
serverAddress := server.Addr().String()
plugin := beanstalkd.Beanstalkd{
Server: serverAddress,
Tubes: test.tubesConfig,
}
var acc testutil.Accumulator
require.NoError(t, acc.GatherError(plugin.Gather))
acc.AssertContainsTaggedFields(t, "beanstalkd_overview",
overviewFields,
getOverviewTags(serverAddress),
)
for _, expectedTube := range test.expectedTubes {
acc.AssertContainsTaggedFields(t, "beanstalkd_tube",
expectedTube.fields,
getTubeTags(serverAddress, expectedTube.name),
)
}
for _, notExpectedTube := range test.notExpectedTubes {
acc.AssertDoesNotContainsTaggedFields(t, "beanstalkd_tube",
notExpectedTube.fields,
getTubeTags(serverAddress, notExpectedTube.name),
)
}
})
}
}
func startTestServer(t *testing.T) (net.Listener, error) {
server, err := net.Listen("tcp", "localhost:0")
if err != nil {
return nil, err
}
go func() {
defer server.Close()
connection, err := server.Accept()
if err != nil {
t.Log("Test server: failed to accept connection. Error: ", err)
return
}
tp := textproto.NewConn(connection)
defer tp.Close()
sendSuccessResponse := func(body string) {
tp.PrintfLine("OK %d\r\n%s", len(body), body)
}
for {
cmd, err := tp.ReadLine()
if err == io.EOF {
return
} else if err != nil {
t.Log("Test server: failed read command. Error: ", err)
return
}
switch cmd {
case "list-tubes":
sendSuccessResponse(listTubesResponse)
case "stats":
sendSuccessResponse(statsResponse)
case "stats-tube default":
sendSuccessResponse(statsTubeDefaultResponse)
case "stats-tube test":
sendSuccessResponse(statsTubeTestResponse)
case "stats-tube unknown":
tp.PrintfLine("NOT_FOUND")
default:
t.Log("Test server: unknown command")
}
}
}()
return server, nil
}
const (
listTubesResponse = `---
- default
- test
`
statsResponse = `---
current-jobs-urgent: 5
current-jobs-ready: 5
current-jobs-reserved: 0
current-jobs-delayed: 1
current-jobs-buried: 0
cmd-put: 6
cmd-peek: 0
cmd-peek-ready: 1
cmd-peek-delayed: 0
cmd-peek-buried: 0
cmd-reserve: 0
cmd-reserve-with-timeout: 1
cmd-delete: 1
cmd-release: 0
cmd-use: 2
cmd-watch: 0
cmd-ignore: 0
cmd-bury: 1
cmd-kick: 1
cmd-touch: 0
cmd-stats: 1
cmd-stats-job: 0
cmd-stats-tube: 2
cmd-list-tubes: 1
cmd-list-tube-used: 0
cmd-list-tubes-watched: 0
cmd-pause-tube: 0
job-timeouts: 0
total-jobs: 6
max-job-size: 65535
current-tubes: 2
current-connections: 2
current-producers: 1
current-workers: 1
current-waiting: 0
total-connections: 2
pid: 6
version: 1.10
rusage-utime: 0.000000
rusage-stime: 0.000000
uptime: 20
binlog-oldest-index: 0
binlog-current-index: 0
binlog-records-migrated: 0
binlog-records-written: 0
binlog-max-size: 10485760
id: bba7546657efdd4c
hostname: 2873efd3e88c
`
statsTubeDefaultResponse = `---
name: default
current-jobs-urgent: 0
current-jobs-ready: 0
current-jobs-reserved: 0
current-jobs-delayed: 0
current-jobs-buried: 0
total-jobs: 0
current-using: 2
current-watching: 2
current-waiting: 0
cmd-delete: 0
cmd-pause-tube: 0
pause: 0
pause-time-left: 0
`
statsTubeTestResponse = `---
name: test
current-jobs-urgent: 5
current-jobs-ready: 5
current-jobs-reserved: 0
current-jobs-delayed: 1
current-jobs-buried: 0
total-jobs: 6
current-using: 0
current-watching: 0
current-waiting: 0
cmd-delete: 0
cmd-pause-tube: 0
pause: 0
pause-time-left: 0
`
)
var (
// Default tube without stats
defaultTubeFields = map[string]interface{}{
"cmd_delete": 0,
"cmd_pause_tube": 0,
"current_jobs_buried": 0,
"current_jobs_delayed": 0,
"current_jobs_ready": 0,
"current_jobs_reserved": 0,
"current_jobs_urgent": 0,
"current_using": 2,
"current_waiting": 0,
"current_watching": 2,
"pause": 0,
"pause_time_left": 0,
"total_jobs": 0,
}
// Test tube with stats
testTubeFields = map[string]interface{}{
"cmd_delete": 0,
"cmd_pause_tube": 0,
"current_jobs_buried": 0,
"current_jobs_delayed": 1,
"current_jobs_ready": 5,
"current_jobs_reserved": 0,
"current_jobs_urgent": 5,
"current_using": 0,
"current_waiting": 0,
"current_watching": 0,
"pause": 0,
"pause_time_left": 0,
"total_jobs": 6,
}
// Server stats
overviewFields = map[string]interface{}{
"binlog_current_index": 0,
"binlog_max_size": 10485760,
"binlog_oldest_index": 0,
"binlog_records_migrated": 0,
"binlog_records_written": 0,
"cmd_bury": 1,
"cmd_delete": 1,
"cmd_ignore": 0,
"cmd_kick": 1,
"cmd_list_tube_used": 0,
"cmd_list_tubes": 1,
"cmd_list_tubes_watched": 0,
"cmd_pause_tube": 0,
"cmd_peek": 0,
"cmd_peek_buried": 0,
"cmd_peek_delayed": 0,
"cmd_peek_ready": 1,
"cmd_put": 6,
"cmd_release": 0,
"cmd_reserve": 0,
"cmd_reserve_with_timeout": 1,
"cmd_stats": 1,
"cmd_stats_job": 0,
"cmd_stats_tube": 2,
"cmd_touch": 0,
"cmd_use": 2,
"cmd_watch": 0,
"current_connections": 2,
"current_jobs_buried": 0,
"current_jobs_delayed": 1,
"current_jobs_ready": 5,
"current_jobs_reserved": 0,
"current_jobs_urgent": 5,
"current_producers": 1,
"current_tubes": 2,
"current_waiting": 0,
"current_workers": 1,
"job_timeouts": 0,
"max_job_size": 65535,
"pid": 6,
"rusage_stime": 0.0,
"rusage_utime": 0.0,
"total_connections": 2,
"total_jobs": 6,
"uptime": 20,
}
)
func getOverviewTags(server string) map[string]string {
return map[string]string{
"hostname": "2873efd3e88c",
"id": "bba7546657efdd4c",
"server": server,
"version": "1.10",
}
}
func getTubeTags(server string, tube string) map[string]string {
return map[string]string{
"name": tube,
"server": server,
}
}

View File

@@ -0,0 +1,118 @@
# BIND 9 Nameserver Statistics Input Plugin
This plugin decodes the JSON or XML statistics provided by BIND 9 nameservers.
### XML Statistics Channel
Version 2 statistics (BIND 9.6 - 9.9) and version 3 statistics (BIND 9.9+) are supported. Note that
for BIND 9.9 to support version 3 statistics, it must be built with the `--enable-newstats` compile
flag, and it must be specifically requested via the correct URL. Version 3 statistics are the
default (and only) XML format in BIND 9.10+.
### JSON Statistics Channel
JSON statistics schema version 1 (BIND 9.10+) is supported. As of writing, some distros still do
not enable support for JSON statistics in their BIND packages.
### Configuration:
- **urls** []string: List of BIND statistics channel URLs to collect from. Do not include a
trailing slash in the URL. Default is "http://localhost:8053/xml/v3".
- **gather_memory_contexts** bool: Report per-context memory statistics.
- **gather_views** bool: Report per-view query statistics.
The following table summarizes the URL formats which should be used, depending on your BIND
version and configured statistics channel.
| BIND Version | Statistics Format | Example URL |
| ------------ | ----------------- | ----------------------------- |
| 9.6 - 9.8 | XML v2 | http://localhost:8053 |
| 9.9 | XML v2 | http://localhost:8053/xml/v2 |
| 9.9+ | XML v3 | http://localhost:8053/xml/v3 |
| 9.10+ | JSON v1 | http://localhost:8053/json/v1 |
#### Configuration of BIND Daemon
Add the following to your named.conf if running Telegraf on the same host as the BIND daemon:
```
statistics-channels {
inet 127.0.0.1 port 8053;
};
```
Alternatively, specify a wildcard address (e.g., 0.0.0.0) or specific IP address of an interface to
configure the BIND daemon to listen on that address. Note that you should secure the statistics
channel with an ACL if it is publicly reachable. Consult the BIND Administrator Reference Manual
for more information.
### Measurements & Fields:
- bind_counter
- name=value (multiple)
- bind_memory
- total_use
- in_use
- block_size
- context_size
- lost
- bind_memory_context
- total
- in_use
### Tags:
- All measurements
- url
- source
- port
- bind_counter
- type
- view (optional)
- bind_memory_context
- id
- name
### Sample Queries:
These are some useful queries (to generate dashboards or other) to run against data from this
plugin:
```
SELECT non_negative_derivative(mean(/^A$|^PTR$/), 5m) FROM bind_counter \
WHERE "url" = 'localhost:8053' AND "type" = 'qtype' AND time > now() - 1h \
GROUP BY time(5m), "type"
```
```
name: bind_counter
tags: type=qtype
time non_negative_derivative_A non_negative_derivative_PTR
---- ------------------------- ---------------------------
1553862000000000000 254.99444444430992 1388.311111111194
1553862300000000000 354 2135.716666666791
1553862600000000000 316.8666666666977 2130.133333333768
1553862900000000000 309.05000000004657 2126.75
1553863200000000000 315.64999999990687 2128.483333332464
1553863500000000000 308.9166666667443 2132.350000000559
1553863800000000000 302.64999999990687 2131.1833333335817
1553864100000000000 310.85000000009313 2132.449999999255
1553864400000000000 314.3666666666977 2136.216666666791
1553864700000000000 303.2333333331626 2133.8166666673496
1553865000000000000 304.93333333334886 2127.333333333023
1553865300000000000 317.93333333334886 2130.3166666664183
1553865600000000000 280.6666666667443 1807.9071428570896
```
### Example Output
Here is example output of this plugin:
```
bind_memory,host=LAP,port=8053,source=localhost,url=localhost:8053 block_size=12058624i,context_size=4575056i,in_use=4113717i,lost=0i,total_use=16663252i 1554276619000000000
bind_counter,host=LAP,port=8053,source=localhost,type=opcode,url=localhost:8053 IQUERY=0i,NOTIFY=0i,QUERY=9i,STATUS=0i,UPDATE=0i 1554276619000000000
bind_counter,host=LAP,port=8053,source=localhost,type=rcode,url=localhost:8053 17=0i,18=0i,19=0i,20=0i,21=0i,22=0i,BADCOOKIE=0i,BADVERS=0i,FORMERR=0i,NOERROR=7i,NOTAUTH=0i,NOTIMP=0i,NOTZONE=0i,NXDOMAIN=0i,NXRRSET=0i,REFUSED=0i,RESERVED11=0i,RESERVED12=0i,RESERVED13=0i,RESERVED14=0i,RESERVED15=0i,SERVFAIL=2i,YXDOMAIN=0i,YXRRSET=0i 1554276619000000000
bind_counter,host=LAP,port=8053,source=localhost,type=qtype,url=localhost:8053 A=1i,ANY=1i,NS=1i,PTR=5i,SOA=1i 1554276619000000000
bind_counter,host=LAP,port=8053,source=localhost,type=nsstat,url=localhost:8053 AuthQryRej=0i,CookieBadSize=0i,CookieBadTime=0i,CookieIn=9i,CookieMatch=0i,CookieNew=9i,CookieNoMatch=0i,DNS64=0i,ECSOpt=0i,ExpireOpt=0i,KeyTagOpt=0i,NSIDOpt=0i,OtherOpt=0i,QryAuthAns=7i,QryBADCOOKIE=0i,QryDropped=0i,QryDuplicate=0i,QryFORMERR=0i,QryFailure=0i,QryNXDOMAIN=0i,QryNXRedir=0i,QryNXRedirRLookup=0i,QryNoauthAns=0i,QryNxrrset=1i,QryRecursion=2i,QryReferral=0i,QrySERVFAIL=2i,QrySuccess=6i,QryTCP=1i,QryUDP=8i,RPZRewrites=0i,RateDropped=0i,RateSlipped=0i,RecQryRej=0i,RecursClients=0i,ReqBadEDNSVer=0i,ReqBadSIG=0i,ReqEdns0=9i,ReqSIG0=0i,ReqTCP=1i,ReqTSIG=0i,Requestv4=9i,Requestv6=0i,RespEDNS0=9i,RespSIG0=0i,RespTSIG=0i,Response=9i,TruncatedResp=0i,UpdateBadPrereq=0i,UpdateDone=0i,UpdateFail=0i,UpdateFwdFail=0i,UpdateRej=0i,UpdateReqFwd=0i,UpdateRespFwd=0i,XfrRej=0i,XfrReqDone=0i 1554276619000000000
bind_counter,host=LAP,port=8053,source=localhost,type=zonestat,url=localhost:8053 AXFRReqv4=0i,AXFRReqv6=0i,IXFRReqv4=0i,IXFRReqv6=0i,NotifyInv4=0i,NotifyInv6=0i,NotifyOutv4=0i,NotifyOutv6=0i,NotifyRej=0i,SOAOutv4=0i,SOAOutv6=0i,XfrFail=0i,XfrSuccess=0i 1554276619000000000
bind_counter,host=LAP,port=8053,source=localhost,type=sockstat,url=localhost:8053 FDWatchClose=0i,FDwatchConn=0i,FDwatchConnFail=0i,FDwatchRecvErr=0i,FDwatchSendErr=0i,FdwatchBindFail=0i,RawActive=1i,RawClose=0i,RawOpen=1i,RawOpenFail=0i,RawRecvErr=0i,TCP4Accept=6i,TCP4AcceptFail=0i,TCP4Active=9i,TCP4BindFail=0i,TCP4Close=5i,TCP4Conn=0i,TCP4ConnFail=0i,TCP4Open=8i,TCP4OpenFail=0i,TCP4RecvErr=0i,TCP4SendErr=0i,TCP6Accept=0i,TCP6AcceptFail=0i,TCP6Active=2i,TCP6BindFail=0i,TCP6Close=0i,TCP6Conn=0i,TCP6ConnFail=0i,TCP6Open=2i,TCP6OpenFail=0i,TCP6RecvErr=0i,TCP6SendErr=0i,UDP4Active=18i,UDP4BindFail=14i,UDP4Close=14i,UDP4Conn=0i,UDP4ConnFail=0i,UDP4Open=32i,UDP4OpenFail=0i,UDP4RecvErr=0i,UDP4SendErr=0i,UDP6Active=3i,UDP6BindFail=0i,UDP6Close=6i,UDP6Conn=0i,UDP6ConnFail=6i,UDP6Open=9i,UDP6OpenFail=0i,UDP6RecvErr=0i,UDP6SendErr=0i,UnixAccept=0i,UnixAcceptFail=0i,UnixActive=0i,UnixBindFail=0i,UnixClose=0i,UnixConn=0i,UnixConnFail=0i,UnixOpen=0i,UnixOpenFail=0i,UnixRecvErr=0i,UnixSendErr=0i 1554276619000000000
```

View File

@@ -0,0 +1,87 @@
package bind
import (
"fmt"
"net/http"
"net/url"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
type Bind struct {
Urls []string
GatherMemoryContexts bool
GatherViews bool
}
var sampleConfig = `
## An array of BIND XML statistics URI to gather stats.
## Default is "http://localhost:8053/xml/v3".
# urls = ["http://localhost:8053/xml/v3"]
# gather_memory_contexts = false
# gather_views = false
`
var client = &http.Client{
Timeout: time.Duration(4 * time.Second),
}
func (b *Bind) Description() string {
return "Read BIND nameserver XML statistics"
}
func (b *Bind) SampleConfig() string {
return sampleConfig
}
func (b *Bind) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup
if len(b.Urls) == 0 {
b.Urls = []string{"http://localhost:8053/xml/v3"}
}
for _, u := range b.Urls {
addr, err := url.Parse(u)
if err != nil {
acc.AddError(fmt.Errorf("Unable to parse address '%s': %s", u, err))
continue
}
wg.Add(1)
go func(addr *url.URL) {
defer wg.Done()
acc.AddError(b.gatherUrl(addr, acc))
}(addr)
}
wg.Wait()
return nil
}
func (b *Bind) gatherUrl(addr *url.URL, acc telegraf.Accumulator) error {
switch addr.Path {
case "":
// BIND 9.6 - 9.8
return b.readStatsXMLv2(addr, acc)
case "/json/v1":
// BIND 9.10+
return b.readStatsJSON(addr, acc)
case "/xml/v2":
// BIND 9.9
return b.readStatsXMLv2(addr, acc)
case "/xml/v3":
// BIND 9.9+
return b.readStatsXMLv3(addr, acc)
default:
return fmt.Errorf("URL %s is ambiguous. Please check plugin documentation for supported URL formats.",
addr)
}
}
func init() {
inputs.Add("bind", func() telegraf.Input { return &Bind{} })
}

View File

@@ -0,0 +1,617 @@
package bind
import (
"net"
"net/http"
"net/http/httptest"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestBindJsonStats(t *testing.T) {
ts := httptest.NewServer(http.FileServer(http.Dir("testdata")))
url := ts.Listener.Addr().String()
host, port, _ := net.SplitHostPort(url)
defer ts.Close()
b := Bind{
Urls: []string{ts.URL + "/json/v1"},
GatherMemoryContexts: true,
GatherViews: true,
}
var acc testutil.Accumulator
err := acc.GatherError(b.Gather)
assert.Nil(t, err)
// Use subtests for counters, since they are similar structure
type fieldSet struct {
fieldKey string
fieldValue int64
}
testCases := []struct {
counterType string
values []fieldSet
}{
{
"opcode",
[]fieldSet{
{"NOTIFY", 0},
{"UPDATE", 0},
{"IQUERY", 0},
{"QUERY", 13},
{"STATUS", 0},
},
},
{
"rcode",
[]fieldSet{
{"NOERROR", 1732},
{"FORMERR", 0},
{"SERVFAIL", 6},
{"NXDOMAIN", 200},
{"NOTIMP", 0},
{"REFUSED", 6},
{"REFUSED", 0},
{"YXDOMAIN", 0},
{"YXRRSET", 0},
{"NXRRSET", 0},
{"NOTAUTH", 0},
{"NOTZONE", 0},
{"RESERVED11", 0},
{"RESERVED12", 0},
{"RESERVED13", 0},
{"RESERVED14", 0},
{"RESERVED15", 0},
{"BADVERS", 0},
{"17", 0},
{"18", 0},
{"19", 0},
{"20", 0},
{"21", 0},
{"22", 0},
{"BADCOOKIE", 0},
},
},
{
"qtype",
[]fieldSet{
{"A", 2},
{"AAAA", 2},
{"PTR", 7},
{"SRV", 2},
},
},
{
"nsstat",
[]fieldSet{
{"QrySuccess", 6},
{"QryRecursion", 12},
{"Requestv4", 13},
{"QryNXDOMAIN", 4},
{"QryAuthAns", 1},
{"QryNxrrset", 1},
{"QryNoauthAns", 10},
{"QryUDP", 13},
{"QryDuplicate", 1},
{"QrySERVFAIL", 1},
{"Response", 12},
},
},
{
"sockstat",
[]fieldSet{
{"TCP4Open", 118},
{"UDP6Close", 112},
{"UDP4Close", 333},
{"TCP4Close", 119},
{"TCP6Active", 2},
{"UDP4Active", 2},
{"UDP4RecvErr", 1},
{"UDP4Open", 335},
{"TCP4Active", 10},
{"RawActive", 1},
{"UDP6ConnFail", 112},
{"TCP4Conn", 114},
{"UDP6Active", 1},
{"UDP6Open", 113},
{"UDP4Conn", 333},
{"UDP6SendErr", 112},
{"RawOpen", 1},
{"TCP4Accept", 6},
{"TCP6Open", 2},
},
},
{
"zonestat",
[]fieldSet{
{"NotifyOutv4", 8},
{"NotifyInv4", 5},
{"SOAOutv4", 5},
},
},
}
for _, tc := range testCases {
t.Run(tc.counterType, func(t *testing.T) {
tags := map[string]string{
"url": url,
"type": tc.counterType,
"source": host,
"port": port,
}
fields := map[string]interface{}{}
for _, val := range tc.values {
fields[val.fieldKey] = val.fieldValue
}
acc.AssertContainsTaggedFields(t, "bind_counter", fields, tags)
})
}
// Subtest for memory stats
t.Run("memory", func(t *testing.T) {
tags := map[string]string{
"url": url,
"source": host,
"port": port,
}
fields := map[string]interface{}{
"block_size": int64(13893632),
"context_size": int64(3685480),
"in_use": int64(3064368),
"lost": int64(0),
"total_use": int64(18206566),
}
acc.AssertContainsTaggedFields(t, "bind_memory", fields, tags)
})
// Subtest for per-context memory stats
t.Run("memory_context", func(t *testing.T) {
assert.True(t, acc.HasInt64Field("bind_memory_context", "total"))
assert.True(t, acc.HasInt64Field("bind_memory_context", "in_use"))
})
}
func TestBindXmlStatsV2(t *testing.T) {
ts := httptest.NewServer(http.FileServer(http.Dir("testdata")))
url := ts.Listener.Addr().String()
host, port, _ := net.SplitHostPort(url)
defer ts.Close()
b := Bind{
Urls: []string{ts.URL + "/xml/v2"},
GatherMemoryContexts: true,
GatherViews: true,
}
var acc testutil.Accumulator
err := acc.GatherError(b.Gather)
assert.Nil(t, err)
// Use subtests for counters, since they are similar structure
type fieldSet struct {
fieldKey string
fieldValue int64
}
testCases := []struct {
counterType string
values []fieldSet
}{
{
"opcode",
[]fieldSet{
{"UPDATE", 238},
{"QUERY", 102312374},
},
},
{
"qtype",
[]fieldSet{
{"ANY", 7},
{"DNSKEY", 452},
{"SSHFP", 2987},
{"SOA", 100415},
{"AAAA", 37786321},
{"MX", 441155},
{"IXFR", 157},
{"CNAME", 531},
{"NS", 1999},
{"TXT", 34628},
{"A", 58951432},
{"SRV", 741082},
{"PTR", 4211487},
{"NAPTR", 39137},
{"DS", 584},
},
},
{
"nsstat",
[]fieldSet{
{"XfrReqDone", 157},
{"ReqEdns0", 441758},
{"ReqTSIG", 0},
{"UpdateRespFwd", 0},
{"RespEDNS0", 441748},
{"QryDropped", 16},
{"RPZRewrites", 0},
{"XfrRej", 0},
{"RecQryRej", 0},
{"QryNxrrset", 24423133},
{"QryFORMERR", 0},
{"ReqTCP", 1548156},
{"UpdateDone", 0},
{"QrySERVFAIL", 14422},
{"QryRecursion", 2104239},
{"Requestv4", 102312611},
{"UpdateFwdFail", 0},
{"QryReferral", 3},
{"Response", 102301560},
{"RespTSIG", 0},
{"QrySuccess", 63811668},
{"QryFailure", 0},
{"RespSIG0", 0},
{"ReqSIG0", 0},
{"UpdateRej", 238},
{"QryAuthAns", 72180718},
{"UpdateFail", 0},
{"QryDuplicate", 10879},
{"RateDropped", 0},
{"QryNoauthAns", 30106182},
{"QryNXDOMAIN", 14052096},
{"ReqBadSIG", 0},
{"UpdateReqFwd", 0},
{"RateSlipped", 0},
{"TruncatedResp", 3787},
{"Requestv6", 1},
{"UpdateBadPrereq", 0},
{"AuthQryRej", 0},
{"ReqBadEDNSVer", 0},
},
},
{
"sockstat",
[]fieldSet{
{"FdwatchBindFail", 0},
{"UDP6Open", 238269},
{"UDP6SendErr", 238250},
{"TCP4ConnFail", 0},
{"TCP4Conn", 590},
{"TCP6AcceptFail", 0},
{"UDP4SendErr", 0},
{"FDwatchConn", 0},
{"TCP4RecvErr", 1},
{"TCP4OpenFail", 0},
{"UDP4OpenFail", 0},
{"UDP6OpenFail", 0},
{"TCP4Close", 1548268},
{"TCP6BindFail", 0},
{"TCP4AcceptFail", 0},
{"UnixConn", 0},
{"UDP4Open", 3765532},
{"TCP6Close", 0},
{"FDwatchRecvErr", 0},
{"UDP4Conn", 3764828},
{"UnixConnFail", 0},
{"TCP6Conn", 0},
{"TCP6OpenFail", 0},
{"TCP6SendErr", 0},
{"TCP6RecvErr", 0},
{"FDwatchSendErr", 0},
{"UDP4RecvErr", 1650},
{"UDP4ConnFail", 0},
{"UDP6Close", 238267},
{"FDWatchClose", 0},
{"TCP4Accept", 1547672},
{"UnixAccept", 0},
{"TCP4Open", 602},
{"UDP4BindFail", 219},
{"UDP6ConnFail", 238250},
{"UnixClose", 0},
{"TCP4BindFail", 0},
{"UnixOpenFail", 0},
{"UDP6BindFail", 16},
{"UnixOpen", 0},
{"UnixAcceptFail", 0},
{"UnixRecvErr", 0},
{"UDP6RecvErr", 0},
{"TCP6ConnFail", 0},
{"FDwatchConnFail", 0},
{"TCP4SendErr", 0},
{"UDP4Close", 3765528},
{"UnixSendErr", 0},
{"TCP6Open", 2},
{"UDP6Conn", 1},
{"TCP6Accept", 0},
{"UnixBindFail", 0},
},
},
}
for _, tc := range testCases {
t.Run(tc.counterType, func(t *testing.T) {
tags := map[string]string{
"url": url,
"type": tc.counterType,
"source": host,
"port": port,
}
fields := map[string]interface{}{}
for _, val := range tc.values {
fields[val.fieldKey] = val.fieldValue
}
acc.AssertContainsTaggedFields(t, "bind_counter", fields, tags)
})
}
// Subtest for memory stats
t.Run("memory", func(t *testing.T) {
tags := map[string]string{
"url": url,
"source": host,
"port": port,
}
fields := map[string]interface{}{
"block_size": int64(77070336),
"context_size": int64(6663840),
"in_use": int64(20772579),
"lost": int64(0),
"total_use": int64(81804609),
}
acc.AssertContainsTaggedFields(t, "bind_memory", fields, tags)
})
// Subtest for per-context memory stats
t.Run("memory_context", func(t *testing.T) {
assert.True(t, acc.HasInt64Field("bind_memory_context", "total"))
assert.True(t, acc.HasInt64Field("bind_memory_context", "in_use"))
})
}
func TestBindXmlStatsV3(t *testing.T) {
ts := httptest.NewServer(http.FileServer(http.Dir("testdata")))
url := ts.Listener.Addr().String()
host, port, _ := net.SplitHostPort(url)
defer ts.Close()
b := Bind{
Urls: []string{ts.URL + "/xml/v3"},
GatherMemoryContexts: true,
GatherViews: true,
}
var acc testutil.Accumulator
err := acc.GatherError(b.Gather)
assert.Nil(t, err)
// Use subtests for counters, since they are similar structure
type fieldSet struct {
fieldKey string
fieldValue int64
}
testCases := []struct {
counterType string
values []fieldSet
}{
{
"opcode",
[]fieldSet{
{"NOTIFY", 0},
{"UPDATE", 0},
{"IQUERY", 0},
{"QUERY", 74941},
{"STATUS", 0},
},
},
{
"qtype",
[]fieldSet{
{"ANY", 22},
{"SOA", 18},
{"AAAA", 5735},
{"MX", 618},
{"NS", 373},
{"TXT", 970},
{"A", 63672},
{"SRV", 139},
{"PTR", 3393},
{"RRSIG", 1},
},
},
{
"nsstat",
[]fieldSet{
{"DNS64", 0},
{"ExpireOpt", 0},
{"NSIDOpt", 0},
{"OtherOpt", 59},
{"XfrReqDone", 0},
{"ReqEdns0", 9250},
{"ReqTSIG", 0},
{"UpdateRespFwd", 0},
{"RespEDNS0", 9250},
{"QryDropped", 11},
{"RPZRewrites", 0},
{"XfrRej", 0},
{"RecQryRej", 35},
{"QryNxrrset", 2452},
{"QryFORMERR", 0},
{"ReqTCP", 260},
{"QryTCP", 258},
{"QryUDP", 74648},
{"UpdateDone", 0},
{"QrySERVFAIL", 122},
{"QryRecursion", 53750},
{"RecursClients", 0},
{"Requestv4", 74942},
{"UpdateFwdFail", 0},
{"QryReferral", 0},
{"Response", 63264},
{"RespTSIG", 0},
{"QrySuccess", 49044},
{"QryFailure", 35},
{"RespSIG0", 0},
{"ReqSIG0", 0},
{"UpdateRej", 0},
{"QryAuthAns", 2752},
{"UpdateFail", 0},
{"QryDuplicate", 11667},
{"RateDropped", 0},
{"QryNoauthAns", 60354},
{"QryNXDOMAIN", 11610},
{"ReqBadSIG", 0},
{"UpdateReqFwd", 0},
{"RateSlipped", 0},
{"TruncatedResp", 365},
{"Requestv6", 0},
{"UpdateBadPrereq", 0},
{"AuthQryRej", 0},
{"ReqBadEDNSVer", 0},
{"SitBadSize", 0},
{"SitBadTime", 0},
{"SitMatch", 0},
{"SitNew", 0},
{"SitNoMatch", 0},
{"SitOpt", 0},
{"TruncatedResp", 365},
},
},
{
"sockstat",
[]fieldSet{
{"FDwatchConnFail", 0},
{"UnixClose", 0},
{"TCP6OpenFail", 0},
{"TCP6Active", 0},
{"UDP4RecvErr", 14},
{"TCP6Conn", 0},
{"FDWatchClose", 0},
{"TCP4ConnFail", 0},
{"UnixConn", 0},
{"UnixSendErr", 0},
{"UDP6Close", 0},
{"UnixOpen", 0},
{"UDP4Conn", 92535},
{"TCP4Close", 336},
{"UnixAcceptFail", 0},
{"UnixAccept", 0},
{"TCP6AcceptFail", 0},
{"UDP6Open", 0},
{"UDP6BindFail", 0},
{"UDP6RecvErr", 0},
{"RawOpenFail", 0},
{"TCP4Accept", 293},
{"UDP6SendErr", 0},
{"UDP6Conn", 0},
{"TCP4SendErr", 0},
{"UDP4BindFail", 1},
{"UDP4Active", 4},
{"TCP4Active", 297},
{"UnixConnFail", 0},
{"UnixOpenFail", 0},
{"UDP6ConnFail", 0},
{"TCP6Accept", 0},
{"UnixRecvErr", 0},
{"RawActive", 1},
{"UDP6OpenFail", 0},
{"RawClose", 0},
{"UnixBindFail", 0},
{"UnixActive", 0},
{"FdwatchBindFail", 0},
{"UDP4SendErr", 0},
{"RawRecvErr", 0},
{"TCP6Close", 0},
{"FDwatchRecvErr", 0},
{"TCP4BindFail", 0},
{"TCP4AcceptFail", 0},
{"TCP4OpenFail", 0},
{"UDP4Open", 92542},
{"UDP4ConnFail", 0},
{"TCP4Conn", 44},
{"TCP6ConnFail", 0},
{"FDwatchConn", 0},
{"UDP6Active", 0},
{"RawOpen", 1},
{"TCP6BindFail", 0},
{"UDP4Close", 92538},
{"TCP6Open", 0},
{"TCP6SendErr", 0},
{"TCP4Open", 48},
{"FDwatchSendErr", 0},
{"TCP6RecvErr", 0},
{"UDP4OpenFail", 0},
{"TCP4RecvErr", 0},
},
},
}
for _, tc := range testCases {
t.Run(tc.counterType, func(t *testing.T) {
tags := map[string]string{
"url": url,
"type": tc.counterType,
"source": host,
"port": port,
}
fields := map[string]interface{}{}
for _, val := range tc.values {
fields[val.fieldKey] = val.fieldValue
}
acc.AssertContainsTaggedFields(t, "bind_counter", fields, tags)
})
}
// Subtest for memory stats
t.Run("memory", func(t *testing.T) {
tags := map[string]string{
"url": url,
"source": host,
"port": port,
}
fields := map[string]interface{}{
"block_size": int64(45875200),
"context_size": int64(10037400),
"in_use": int64(6000232),
"lost": int64(0),
"total_use": int64(777821909),
}
acc.AssertContainsTaggedFields(t, "bind_memory", fields, tags)
})
// Subtest for per-context memory stats
t.Run("memory_context", func(t *testing.T) {
assert.True(t, acc.HasInt64Field("bind_memory_context", "total"))
assert.True(t, acc.HasInt64Field("bind_memory_context", "in_use"))
})
}
func TestBindUnparseableURL(t *testing.T) {
b := Bind{
Urls: []string{"://example.com"},
}
var acc testutil.Accumulator
err := acc.GatherError(b.Gather)
assert.Contains(t, err.Error(), "Unable to parse address")
}

View File

@@ -0,0 +1,176 @@
package bind
import (
"encoding/json"
"fmt"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
)
type jsonStats struct {
OpCodes map[string]int
QTypes map[string]int
RCodes map[string]int
ZoneStats map[string]int
NSStats map[string]int
SockStats map[string]int
Views map[string]jsonView
Memory jsonMemory
}
type jsonMemory struct {
TotalUse int64
InUse int64
BlockSize int64
ContextSize int64
Lost int64
Contexts []struct {
Id string
Name string
Total int64
InUse int64
}
}
type jsonView struct {
Resolver map[string]map[string]int
}
// addJSONCounter adds a counter array to a Telegraf Accumulator, with the specified tags.
func addJSONCounter(acc telegraf.Accumulator, commonTags map[string]string, stats map[string]int) {
grouper := metric.NewSeriesGrouper()
ts := time.Now()
for name, value := range stats {
if commonTags["type"] == "opcode" && strings.HasPrefix(name, "RESERVED") {
continue
}
tags := make(map[string]string)
// Create local copy of tags since maps are reference types
for k, v := range commonTags {
tags[k] = v
}
grouper.Add("bind_counter", tags, ts, name, value)
}
//Add grouped metrics
for _, metric := range grouper.Metrics() {
acc.AddMetric(metric)
}
}
// addStatsJson walks a jsonStats struct and adds the values to the telegraf.Accumulator.
func (b *Bind) addStatsJSON(stats jsonStats, acc telegraf.Accumulator, urlTag string) {
grouper := metric.NewSeriesGrouper()
ts := time.Now()
tags := map[string]string{"url": urlTag}
host, port, _ := net.SplitHostPort(urlTag)
tags["source"] = host
tags["port"] = port
// Opcodes
tags["type"] = "opcode"
addJSONCounter(acc, tags, stats.OpCodes)
// RCodes stats
tags["type"] = "rcode"
addJSONCounter(acc, tags, stats.RCodes)
// Query RDATA types
tags["type"] = "qtype"
addJSONCounter(acc, tags, stats.QTypes)
// Nameserver stats
tags["type"] = "nsstat"
addJSONCounter(acc, tags, stats.NSStats)
// Socket statistics
tags["type"] = "sockstat"
addJSONCounter(acc, tags, stats.SockStats)
// Zonestats
tags["type"] = "zonestat"
addJSONCounter(acc, tags, stats.ZoneStats)
// Memory stats
fields := map[string]interface{}{
"total_use": stats.Memory.TotalUse,
"in_use": stats.Memory.InUse,
"block_size": stats.Memory.BlockSize,
"context_size": stats.Memory.ContextSize,
"lost": stats.Memory.Lost,
}
acc.AddGauge("bind_memory", fields, map[string]string{"url": urlTag, "source": host, "port": port})
// Detailed, per-context memory stats
if b.GatherMemoryContexts {
for _, c := range stats.Memory.Contexts {
tags := map[string]string{"url": urlTag, "id": c.Id, "name": c.Name, "source": host, "port": port}
fields := map[string]interface{}{"total": c.Total, "in_use": c.InUse}
acc.AddGauge("bind_memory_context", fields, tags)
}
}
// Detailed, per-view stats
if b.GatherViews {
for vName, view := range stats.Views {
for cntrType, counters := range view.Resolver {
for cntrName, value := range counters {
tags := map[string]string{
"url": urlTag,
"source": host,
"port": port,
"view": vName,
"type": cntrType,
}
grouper.Add("bind_counter", tags, ts, cntrName, value)
}
}
}
}
//Add grouped metrics
for _, metric := range grouper.Metrics() {
acc.AddMetric(metric)
}
}
// readStatsJSON takes a base URL to probe, and requests the individual statistics blobs that we
// are interested in. These individual blobs have a combined size which is significantly smaller
// than if we requested everything at once (e.g. taskmgr and socketmgr can be omitted).
func (b *Bind) readStatsJSON(addr *url.URL, acc telegraf.Accumulator) error {
var stats jsonStats
// Progressively build up full jsonStats struct by parsing the individual HTTP responses
for _, suffix := range [...]string{"/server", "/net", "/mem"} {
scrapeUrl := addr.String() + suffix
resp, err := client.Get(scrapeUrl)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("%s returned HTTP status: %s", scrapeUrl, resp.Status)
}
if err := json.NewDecoder(resp.Body).Decode(&stats); err != nil {
return fmt.Errorf("Unable to decode JSON blob: %s", err)
}
}
b.addStatsJSON(stats, acc, addr.Host)
return nil
}

133
plugins/inputs/bind/testdata/json/v1/mem vendored Normal file
View File

@@ -0,0 +1,133 @@
{
"json-stats-version":"1.2",
"boot-time":"2017-07-28T13:24:53Z",
"config-time":"2017-07-28T13:24:53Z",
"current-time":"2017-07-28T15:33:07Z",
"memory":{
"TotalUse":18206566,
"InUse":3064368,
"BlockSize":13893632,
"ContextSize":3685480,
"Lost":0,
"contexts":[
{
"id":"0x55fb2e042de0",
"name":"main",
"references":202,
"total":2693003,
"inuse":1454904,
"maxinuse":1508072,
"blocksize":786432,
"pools":40,
"hiwater":0,
"lowater":0
},
{
"id":"0x55fb2e0507e0",
"name":"dst",
"references":1,
"total":387478,
"inuse":91776,
"maxinuse":97208,
"pools":0,
"hiwater":0,
"lowater":0
},
{
"id":"0x55fb2e0938e0",
"name":"zonemgr-pool",
"references":113,
"total":742986,
"inuse":143776,
"maxinuse":313961,
"blocksize":262144,
"pools":0,
"hiwater":0,
"lowater":0
},
{
"id":"0x7f19d00017d0",
"name":"threadkey",
"references":1,
"total":0,
"inuse":0,
"maxinuse":0,
"pools":0,
"hiwater":0,
"lowater":0
},
{
"id":"0x7f19d00475f0",
"name":"client",
"references":3,
"total":267800,
"inuse":8760,
"maxinuse":8760,
"blocksize":262144,
"pools":2,
"hiwater":0,
"lowater":0
},
{
"id":"0x7f19d00dfca0",
"name":"cache",
"references":8,
"total":288938,
"inuse":83650,
"maxinuse":83842,
"blocksize":262144,
"pools":0,
"hiwater":0,
"lowater":0
},
{
"id":"0x7f19d00eaa30",
"name":"cache_heap",
"references":18,
"total":393216,
"inuse":132096,
"maxinuse":132096,
"blocksize":262144,
"pools":0,
"hiwater":0,
"lowater":0
},
{
"id":"0x7f19d01094e0",
"name":"res0",
"references":1,
"total":262144,
"inuse":0,
"maxinuse":22048,
"blocksize":262144,
"pools":0,
"hiwater":0,
"lowater":0
},
{
"id":"0x7f19d0114270",
"name":"res1",
"references":1,
"total":0,
"inuse":0,
"maxinuse":0,
"blocksize":0,
"pools":0,
"hiwater":0,
"lowater":0
},
{
"id":"0x7f19d011f000",
"name":"res2",
"references":1,
"total":0,
"inuse":0,
"maxinuse":0,
"blocksize":0,
"pools":0,
"hiwater":0,
"lowater":0
}
]
}
}

241
plugins/inputs/bind/testdata/json/v1/net vendored Normal file
View File

@@ -0,0 +1,241 @@
{
"json-stats-version":"1.2",
"boot-time":"2017-07-28T13:24:53Z",
"config-time":"2017-07-28T13:24:53Z",
"current-time":"2017-07-28T15:33:07Z",
"sockstats":{
"UDP4Open":335,
"UDP6Open":113,
"TCP4Open":118,
"TCP6Open":2,
"RawOpen":1,
"UDP4Close":333,
"UDP6Close":112,
"TCP4Close":119,
"UDP6ConnFail":112,
"UDP4Conn":333,
"TCP4Conn":114,
"TCP4Accept":6,
"UDP6SendErr":112,
"UDP4RecvErr":1,
"UDP4Active":2,
"UDP6Active":1,
"TCP4Active":10,
"TCP6Active":2,
"RawActive":1
},
"socketmgr":{
"sockets":[
{
"id":"0x7f19dd849010",
"references":1,
"type":"not-initialized",
"local-address":"<unknown address, family 16>",
"states":[
"bound"
]
},
{
"id":"0x7f19dd849268",
"references":1,
"type":"tcp",
"local-address":"0.0.0.0#8053",
"states":[
"listener",
"bound"
]
},
{
"id":"0x7f19dd849718",
"references":2,
"type":"udp",
"local-address":"::#53",
"states":[
"bound"
]
},
{
"id":"0x7f19dd849970",
"references":2,
"type":"tcp",
"local-address":"::#53",
"states":[
"listener",
"bound"
]
},
{
"id":"0x7f19dd849bc8",
"references":2,
"type":"udp",
"local-address":"127.0.0.1#53",
"states":[
"bound"
]
},
{
"id":"0x7f19dd6f4010",
"references":2,
"type":"tcp",
"local-address":"127.0.0.1#53",
"states":[
"listener",
"bound"
]
},
{
"id":"0x7f19dd6f4718",
"references":1,
"type":"tcp",
"local-address":"127.0.0.1#953",
"states":[
"listener",
"bound"
]
},
{
"id":"0x7f19dd6f4bc8",
"references":1,
"type":"tcp",
"local-address":"::1#953",
"states":[
"listener",
"bound"
]
},
{
"id":"0x7f19d4fb7970",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fb7bc8",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fc7010",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fc74c0",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fc7718",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fc7bc8",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd1010",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd1268",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd14c0",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd1718",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd1970",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd1bc8",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd9010",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fda4c0",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd9bc8",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fda268",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd9970",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fda010",
"references":1,
"type":"udp",
"states":[
]
},
{
"id":"0x7f19d4fd9718",
"references":1,
"type":"udp",
"states":[
]
}
]
}
}

View File

@@ -0,0 +1,172 @@
{
"json-stats-version":"1.2",
"boot-time":"2017-07-28T13:24:53Z",
"config-time":"2017-07-28T13:24:53Z",
"current-time":"2017-07-28T15:33:07Z",
"opcodes":{
"QUERY":13,
"IQUERY":0,
"STATUS":0,
"RESERVED3":0,
"NOTIFY":0,
"UPDATE":0,
"RESERVED6":0,
"RESERVED7":0,
"RESERVED8":0,
"RESERVED9":0,
"RESERVED10":0,
"RESERVED11":0,
"RESERVED12":0,
"RESERVED13":0,
"RESERVED14":0,
"RESERVED15":0
},
"rcodes":{
"NOERROR":1732,
"FORMERR":0,
"SERVFAIL":6,
"NXDOMAIN":200,
"NOTIMP":0,
"REFUSED":0,
"YXDOMAIN":0,
"YXRRSET":0,
"NXRRSET":0,
"NOTAUTH":0,
"NOTZONE":0,
"RESERVED11":0,
"RESERVED12":0,
"RESERVED13":0,
"RESERVED14":0,
"RESERVED15":0,
"BADVERS":0,
"17":0,
"18":0,
"19":0,
"20":0,
"21":0,
"22":0,
"BADCOOKIE":0
},
"qtypes":{
"A":2,
"PTR":7,
"AAAA":2,
"SRV":2
},
"nsstats":{
"Requestv4":13,
"Response":12,
"QrySuccess":6,
"QryAuthAns":1,
"QryNoauthAns":10,
"QryNxrrset":1,
"QrySERVFAIL":1,
"QryNXDOMAIN":4,
"QryRecursion":12,
"QryDuplicate":1,
"QryUDP":13
},
"zonestats":{
"NotifyOutv4":8,
"NotifyInv4":5,
"SOAOutv4":5
},
"views":{
"_default":{
"resolver":{
"stats":{
"Queryv4":447,
"Queryv6":112,
"Responsev4":444,
"NXDOMAIN":3,
"Truncated":114,
"Retry":242,
"QueryTimeout":3,
"GlueFetchv4":61,
"GlueFetchv6":68,
"GlueFetchv6Fail":24,
"ValAttempt":36,
"ValOk":27,
"ValNegOk":9,
"QryRTT100":287,
"QryRTT500":152,
"QryRTT800":4,
"BucketSize":31
},
"qtypes":{
"A":220,
"NS":19,
"PTR":22,
"AAAA":233,
"SRV":14,
"DS":27,
"DNSKEY":24
},
"cache":{
"A":150,
"NS":44,
"PTR":3,
"AAAA":104,
"DS":23,
"RRSIG":94,
"NSEC":8,
"DNSKEY":7,
"!AAAA":23,
"!DS":5,
"NXDOMAIN":1
},
"cachestats":{
"CacheHits":1675,
"CacheMisses":44,
"QueryHits":17,
"QueryMisses":12,
"DeleteLRU":0,
"DeleteTTL":16,
"CacheNodes":219,
"CacheBuckets":129,
"TreeMemTotal":551082,
"TreeMemInUse":150704,
"HeapMemMax":132096,
"HeapMemTotal":393216,
"HeapMemInUse":132096
},
"adb":{
"nentries":1021,
"entriescnt":254,
"nnames":1021,
"namescnt":195
}
}
},
"_bind":{
"resolver":{
"stats":{
"BucketSize":31
},
"qtypes":{
},
"cache":{
},
"cachestats":{
"CacheHits":0,
"CacheMisses":0,
"QueryHits":0,
"QueryMisses":0,
"DeleteLRU":0,
"DeleteTTL":0,
"CacheNodes":0,
"CacheBuckets":64,
"TreeMemTotal":287392,
"TreeMemInUse":29608,
"HeapMemMax":1024,
"HeapMemTotal":262144,
"HeapMemInUse":1024
},
"adb":{
"nentries":1021,
"nnames":1021
}
}
}
}
}

926
plugins/inputs/bind/testdata/xml/v2 vendored Normal file
View File

@@ -0,0 +1,926 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="/bind9.xsl"?>
<isc version="1.0">
<bind>
<statistics version="2.2">
<views>
<view>
<name>_default</name>
<rdtype>
<name>A</name>
<counter>2936881</counter>
</rdtype>
<rdtype>
<name>NS</name>
<counter>28994</counter>
</rdtype>
<rdtype>
<name>CNAME</name>
<counter>26</counter>
</rdtype>
<rdtype>
<name>SOA</name>
<counter>15131</counter>
</rdtype>
<rdtype>
<name>PTR</name>
<counter>47924</counter>
</rdtype>
<rdtype>
<name>MX</name>
<counter>1884</counter>
</rdtype>
<rdtype>
<name>TXT</name>
<counter>6486</counter>
</rdtype>
<rdtype>
<name>AAAA</name>
<counter>949781</counter>
</rdtype>
<rdtype>
<name>SRV</name>
<counter>14740</counter>
</rdtype>
<rdtype>
<name>NAPTR</name>
<counter>1606</counter>
</rdtype>
<rdtype>
<name>DS</name>
<counter>25</counter>
</rdtype>
<rdtype>
<name>SSHFP</name>
<counter>185</counter>
</rdtype>
<rdtype>
<name>DNSKEY</name>
<counter>13</counter>
</rdtype>
<rdtype>
<name>ANY</name>
<counter>1</counter>
</rdtype>
<resstat>
<name>Queryv4</name>
<counter>3765426</counter>
</resstat>
<resstat>
<name>Queryv6</name>
<counter>238251</counter>
</resstat>
<resstat>
<name>Responsev4</name>
<counter>3716142</counter>
</resstat>
<resstat>
<name>Responsev6</name>
<counter>1</counter>
</resstat>
<resstat>
<name>NXDOMAIN</name>
<counter>100052</counter>
</resstat>
<resstat>
<name>SERVFAIL</name>
<counter>5894</counter>
</resstat>
<resstat>
<name>FORMERR</name>
<counter>2041</counter>
</resstat>
<resstat>
<name>OtherError</name>
<counter>14801</counter>
</resstat>
<resstat>
<name>EDNS0Fail</name>
<counter>2615</counter>
</resstat>
<resstat>
<name>Mismatch</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Truncated</name>
<counter>598</counter>
</resstat>
<resstat>
<name>Lame</name>
<counter>117</counter>
</resstat>
<resstat>
<name>Retry</name>
<counter>383343</counter>
</resstat>
<resstat>
<name>QueryAbort</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QuerySockFail</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QueryTimeout</name>
<counter>50874</counter>
</resstat>
<resstat>
<name>GlueFetchv4</name>
<counter>260749</counter>
</resstat>
<resstat>
<name>GlueFetchv6</name>
<counter>225310</counter>
</resstat>
<resstat>
<name>GlueFetchv4Fail</name>
<counter>5756</counter>
</resstat>
<resstat>
<name>GlueFetchv6Fail</name>
<counter>141500</counter>
</resstat>
<resstat>
<name>ValAttempt</name>
<counter>0</counter>
</resstat>
<resstat>
<name>ValOk</name>
<counter>0</counter>
</resstat>
<resstat>
<name>ValNegOk</name>
<counter>0</counter>
</resstat>
<resstat>
<name>ValFail</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QryRTT10</name>
<counter>458176</counter>
</resstat>
<resstat>
<name>QryRTT100</name>
<counter>3010133</counter>
</resstat>
<resstat>
<name>QryRTT500</name>
<counter>244312</counter>
</resstat>
<resstat>
<name>QryRTT800</name>
<counter>1275</counter>
</resstat>
<resstat>
<name>QryRTT1600</name>
<counter>361</counter>
</resstat>
<resstat>
<name>QryRTT1600+</name>
<counter>236</counter>
</resstat>
<cache name="_default">
<rrset>
<name>A</name>
<counter>2700</counter>
</rrset>
<rrset>
<name>NS</name>
<counter>759</counter>
</rrset>
<rrset>
<name>CNAME</name>
<counter>486</counter>
</rrset>
<rrset>
<name>SOA</name>
<counter>2</counter>
</rrset>
<rrset>
<name>PTR</name>
<counter>6</counter>
</rrset>
<rrset>
<name>TXT</name>
<counter>2</counter>
</rrset>
<rrset>
<name>AAAA</name>
<counter>629</counter>
</rrset>
<rrset>
<name>SRV</name>
<counter>1</counter>
</rrset>
<rrset>
<name>DS</name>
<counter>48</counter>
</rrset>
<rrset>
<name>RRSIG</name>
<counter>203</counter>
</rrset>
<rrset>
<name>NSEC</name>
<counter>22</counter>
</rrset>
<rrset>
<name>DNSKEY</name>
<counter>1</counter>
</rrset>
<rrset>
<name>!A</name>
<counter>6</counter>
</rrset>
<rrset>
<name>!SOA</name>
<counter>26</counter>
</rrset>
<rrset>
<name>!AAAA</name>
<counter>84</counter>
</rrset>
<rrset>
<name>!NAPTR</name>
<counter>3</counter>
</rrset>
<rrset>
<name>NXDOMAIN</name>
<counter>143</counter>
</rrset>
</cache>
</view>
<view>
<name>_bind</name>
<resstat>
<name>Queryv4</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Queryv6</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Responsev4</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Responsev6</name>
<counter>0</counter>
</resstat>
<resstat>
<name>NXDOMAIN</name>
<counter>0</counter>
</resstat>
<resstat>
<name>SERVFAIL</name>
<counter>0</counter>
</resstat>
<resstat>
<name>FORMERR</name>
<counter>0</counter>
</resstat>
<resstat>
<name>OtherError</name>
<counter>0</counter>
</resstat>
<resstat>
<name>EDNS0Fail</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Mismatch</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Truncated</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Lame</name>
<counter>0</counter>
</resstat>
<resstat>
<name>Retry</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QueryAbort</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QuerySockFail</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QueryTimeout</name>
<counter>0</counter>
</resstat>
<resstat>
<name>GlueFetchv4</name>
<counter>0</counter>
</resstat>
<resstat>
<name>GlueFetchv6</name>
<counter>0</counter>
</resstat>
<resstat>
<name>GlueFetchv4Fail</name>
<counter>0</counter>
</resstat>
<resstat>
<name>GlueFetchv6Fail</name>
<counter>0</counter>
</resstat>
<resstat>
<name>ValAttempt</name>
<counter>0</counter>
</resstat>
<resstat>
<name>ValOk</name>
<counter>0</counter>
</resstat>
<resstat>
<name>ValNegOk</name>
<counter>0</counter>
</resstat>
<resstat>
<name>ValFail</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QryRTT10</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QryRTT100</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QryRTT500</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QryRTT800</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QryRTT1600</name>
<counter>0</counter>
</resstat>
<resstat>
<name>QryRTT1600+</name>
<counter>0</counter>
</resstat>
<cache name="_bind"/>
</view>
</views>
<server>
<boot-time>2016-10-02T18:45:00Z</boot-time>
<current-time>2016-10-23T19:27:48Z</current-time>
<requests>
<opcode>
<name>QUERY</name>
<counter>102312374</counter>
</opcode>
<opcode>
<name>UPDATE</name>
<counter>238</counter>
</opcode>
</requests>
<queries-in>
<rdtype>
<name>A</name>
<counter>58951432</counter>
</rdtype>
<rdtype>
<name>NS</name>
<counter>1999</counter>
</rdtype>
<rdtype>
<name>CNAME</name>
<counter>531</counter>
</rdtype>
<rdtype>
<name>SOA</name>
<counter>100415</counter>
</rdtype>
<rdtype>
<name>PTR</name>
<counter>4211487</counter>
</rdtype>
<rdtype>
<name>MX</name>
<counter>441155</counter>
</rdtype>
<rdtype>
<name>TXT</name>
<counter>34628</counter>
</rdtype>
<rdtype>
<name>AAAA</name>
<counter>37786321</counter>
</rdtype>
<rdtype>
<name>SRV</name>
<counter>741082</counter>
</rdtype>
<rdtype>
<name>NAPTR</name>
<counter>39137</counter>
</rdtype>
<rdtype>
<name>DS</name>
<counter>584</counter>
</rdtype>
<rdtype>
<name>SSHFP</name>
<counter>2987</counter>
</rdtype>
<rdtype>
<name>DNSKEY</name>
<counter>452</counter>
</rdtype>
<rdtype>
<name>IXFR</name>
<counter>157</counter>
</rdtype>
<rdtype>
<name>ANY</name>
<counter>7</counter>
</rdtype>
</queries-in>
<nsstat>
<name>Requestv4</name>
<counter>102312611</counter>
</nsstat>
<nsstat>
<name>Requestv6</name>
<counter>1</counter>
</nsstat>
<nsstat>
<name>ReqEdns0</name>
<counter>441758</counter>
</nsstat>
<nsstat>
<name>ReqBadEDNSVer</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>ReqTSIG</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>ReqSIG0</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>ReqBadSIG</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>ReqTCP</name>
<counter>1548156</counter>
</nsstat>
<nsstat>
<name>AuthQryRej</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>RecQryRej</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>XfrRej</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>UpdateRej</name>
<counter>238</counter>
</nsstat>
<nsstat>
<name>Response</name>
<counter>102301560</counter>
</nsstat>
<nsstat>
<name>TruncatedResp</name>
<counter>3787</counter>
</nsstat>
<nsstat>
<name>RespEDNS0</name>
<counter>441748</counter>
</nsstat>
<nsstat>
<name>RespTSIG</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>RespSIG0</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>QrySuccess</name>
<counter>63811668</counter>
</nsstat>
<nsstat>
<name>QryAuthAns</name>
<counter>72180718</counter>
</nsstat>
<nsstat>
<name>QryNoauthAns</name>
<counter>30106182</counter>
</nsstat>
<nsstat>
<name>QryReferral</name>
<counter>3</counter>
</nsstat>
<nsstat>
<name>QryNxrrset</name>
<counter>24423133</counter>
</nsstat>
<nsstat>
<name>QrySERVFAIL</name>
<counter>14422</counter>
</nsstat>
<nsstat>
<name>QryFORMERR</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>QryNXDOMAIN</name>
<counter>14052096</counter>
</nsstat>
<nsstat>
<name>QryRecursion</name>
<counter>2104239</counter>
</nsstat>
<nsstat>
<name>QryDuplicate</name>
<counter>10879</counter>
</nsstat>
<nsstat>
<name>QryDropped</name>
<counter>16</counter>
</nsstat>
<nsstat>
<name>QryFailure</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>XfrReqDone</name>
<counter>157</counter>
</nsstat>
<nsstat>
<name>UpdateReqFwd</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>UpdateRespFwd</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>UpdateFwdFail</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>UpdateDone</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>UpdateFail</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>UpdateBadPrereq</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>RPZRewrites</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>RateDropped</name>
<counter>0</counter>
</nsstat>
<nsstat>
<name>RateSlipped</name>
<counter>0</counter>
</nsstat>
<zonestat>
<name>NotifyOutv4</name>
<counter>663</counter>
</zonestat>
<zonestat>
<name>NotifyOutv6</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>NotifyInv4</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>NotifyInv6</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>NotifyRej</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>SOAOutv4</name>
<counter>386</counter>
</zonestat>
<zonestat>
<name>SOAOutv6</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>AXFRReqv4</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>AXFRReqv6</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>IXFRReqv4</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>IXFRReqv6</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>XfrSuccess</name>
<counter>0</counter>
</zonestat>
<zonestat>
<name>XfrFail</name>
<counter>0</counter>
</zonestat>
<resstat>
<name>Mismatch</name>
<counter>2</counter>
</resstat>
<sockstat>
<name>UDP4Open</name>
<counter>3765532</counter>
</sockstat>
<sockstat>
<name>UDP6Open</name>
<counter>238269</counter>
</sockstat>
<sockstat>
<name>TCP4Open</name>
<counter>602</counter>
</sockstat>
<sockstat>
<name>TCP6Open</name>
<counter>2</counter>
</sockstat>
<sockstat>
<name>UnixOpen</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP4OpenFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP6OpenFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP4OpenFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP6OpenFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixOpenFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP4Close</name>
<counter>3765528</counter>
</sockstat>
<sockstat>
<name>UDP6Close</name>
<counter>238267</counter>
</sockstat>
<sockstat>
<name>TCP4Close</name>
<counter>1548268</counter>
</sockstat>
<sockstat>
<name>TCP6Close</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixClose</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>FDWatchClose</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP4BindFail</name>
<counter>219</counter>
</sockstat>
<sockstat>
<name>UDP6BindFail</name>
<counter>16</counter>
</sockstat>
<sockstat>
<name>TCP4BindFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP6BindFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixBindFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>FdwatchBindFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP4ConnFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP6ConnFail</name>
<counter>238250</counter>
</sockstat>
<sockstat>
<name>TCP4ConnFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP6ConnFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixConnFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>FDwatchConnFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP4Conn</name>
<counter>3764828</counter>
</sockstat>
<sockstat>
<name>UDP6Conn</name>
<counter>1</counter>
</sockstat>
<sockstat>
<name>TCP4Conn</name>
<counter>590</counter>
</sockstat>
<sockstat>
<name>TCP6Conn</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixConn</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>FDwatchConn</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP4AcceptFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP6AcceptFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixAcceptFail</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP4Accept</name>
<counter>1547672</counter>
</sockstat>
<sockstat>
<name>TCP6Accept</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixAccept</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP4SendErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP6SendErr</name>
<counter>238250</counter>
</sockstat>
<sockstat>
<name>TCP4SendErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP6SendErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixSendErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>FDwatchSendErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UDP4RecvErr</name>
<counter>1650</counter>
</sockstat>
<sockstat>
<name>UDP6RecvErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>TCP4RecvErr</name>
<counter>1</counter>
</sockstat>
<sockstat>
<name>TCP6RecvErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>UnixRecvErr</name>
<counter>0</counter>
</sockstat>
<sockstat>
<name>FDwatchRecvErr</name>
<counter>0</counter>
</sockstat>
</server>
<memory>
<contexts>
<context>
<id>0x7f8a94e061d0</id>
<name>main</name>
<references>229</references>
<total>5002528</total>
<inuse>3662792</inuse>
<maxinuse>4848264</maxinuse>
<blocksize>2359296</blocksize>
<pools>75</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f8a94e13830</id>
<name>dst</name>
<references>1</references>
<total>133486</total>
<inuse>96456</inuse>
<maxinuse>102346</maxinuse>
<blocksize>-</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f8a94e401c0</id>
<name>zonemgr-pool</name>
<references>501</references>
<total>6339848</total>
<inuse>4384240</inuse>
<maxinuse>5734049</maxinuse>
<blocksize>6029312</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
</contexts>
<summary>
<TotalUse>81804609</TotalUse>
<InUse>20772579</InUse>
<BlockSize>77070336</BlockSize>
<ContextSize>6663840</ContextSize>
<Lost>0</Lost>
</summary>
</memory>
</statistics>
</bind>
</isc>

142
plugins/inputs/bind/testdata/xml/v3/mem vendored Normal file
View File

@@ -0,0 +1,142 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="/bind9.xsl"?>
<statistics version="3.6">
<server>
<boot-time>2017-07-21T11:53:28Z</boot-time>
<config-time>2017-07-21T11:53:28Z</config-time>
<current-time>2017-07-25T23:47:08Z</current-time>
</server>
<views>
</views>
<memory>
<contexts>
<context>
<id>0x55fb2e042de0</id>
<name>main</name>
<references>202</references>
<total>2706043</total>
<inuse>1454904</inuse>
<maxinuse>1508072</maxinuse>
<blocksize>786432</blocksize>
<pools>40</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x55fb2e0507e0</id>
<name>dst</name>
<references>1</references>
<total>387478</total>
<inuse>91776</inuse>
<maxinuse>97208</maxinuse>
<blocksize>-</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x55fb2e0938e0</id>
<name>zonemgr-pool</name>
<references>113</references>
<total>742986</total>
<inuse>143776</inuse>
<maxinuse>313961</maxinuse>
<blocksize>262144</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f19d00017d0</id>
<name>threadkey</name>
<references>1</references>
<total>0</total>
<inuse>0</inuse>
<maxinuse>0</maxinuse>
<blocksize>-</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f19d00475f0</id>
<name>client</name>
<references>3</references>
<total>267800</total>
<inuse>8760</inuse>
<maxinuse>8760</maxinuse>
<blocksize>262144</blocksize>
<pools>2</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f19d00dfca0</id>
<name>cache</name>
<references>8</references>
<total>288938</total>
<inuse>83650</inuse>
<maxinuse>83842</maxinuse>
<blocksize>262144</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f19d00eaa30</id>
<name>cache_heap</name>
<references>18</references>
<total>393216</total>
<inuse>132096</inuse>
<maxinuse>132096</maxinuse>
<blocksize>262144</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f19d01094e0</id>
<name>res0</name>
<references>1</references>
<total>262144</total>
<inuse>0</inuse>
<maxinuse>22048</maxinuse>
<blocksize>262144</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f19d0114270</id>
<name>res1</name>
<references>1</references>
<total>0</total>
<inuse>0</inuse>
<maxinuse>0</maxinuse>
<blocksize>0</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
<context>
<id>0x7f19d011f000</id>
<name>res2</name>
<references>1</references>
<total>0</total>
<inuse>0</inuse>
<maxinuse>0</maxinuse>
<blocksize>0</blocksize>
<pools>0</pools>
<hiwater>0</hiwater>
<lowater>0</lowater>
</context>
</contexts>
<summary>
<TotalUse>777821909</TotalUse>
<InUse>6000232</InUse>
<BlockSize>45875200</BlockSize>
<ContextSize>10037400</ContextSize>
<Lost>0</Lost>
</summary>
</memory>
</statistics>

156
plugins/inputs/bind/testdata/xml/v3/net vendored Normal file
View File

@@ -0,0 +1,156 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="/bind9.xsl"?>
<statistics version="3.6">
<server>
<boot-time>2017-07-21T11:53:28Z</boot-time>
<config-time>2017-07-21T11:53:28Z</config-time>
<current-time>2017-07-25T23:47:08Z</current-time>
<counters type="sockstat">
<counter name="UDP4Open">92542</counter>
<counter name="UDP6Open">0</counter>
<counter name="TCP4Open">48</counter>
<counter name="TCP6Open">0</counter>
<counter name="UnixOpen">0</counter>
<counter name="RawOpen">1</counter>
<counter name="UDP4OpenFail">0</counter>
<counter name="UDP6OpenFail">0</counter>
<counter name="TCP4OpenFail">0</counter>
<counter name="TCP6OpenFail">0</counter>
<counter name="UnixOpenFail">0</counter>
<counter name="RawOpenFail">0</counter>
<counter name="UDP4Close">92538</counter>
<counter name="UDP6Close">0</counter>
<counter name="TCP4Close">336</counter>
<counter name="TCP6Close">0</counter>
<counter name="UnixClose">0</counter>
<counter name="FDWatchClose">0</counter>
<counter name="RawClose">0</counter>
<counter name="UDP4BindFail">1</counter>
<counter name="UDP6BindFail">0</counter>
<counter name="TCP4BindFail">0</counter>
<counter name="TCP6BindFail">0</counter>
<counter name="UnixBindFail">0</counter>
<counter name="FdwatchBindFail">0</counter>
<counter name="UDP4ConnFail">0</counter>
<counter name="UDP6ConnFail">0</counter>
<counter name="TCP4ConnFail">0</counter>
<counter name="TCP6ConnFail">0</counter>
<counter name="UnixConnFail">0</counter>
<counter name="FDwatchConnFail">0</counter>
<counter name="UDP4Conn">92535</counter>
<counter name="UDP6Conn">0</counter>
<counter name="TCP4Conn">44</counter>
<counter name="TCP6Conn">0</counter>
<counter name="UnixConn">0</counter>
<counter name="FDwatchConn">0</counter>
<counter name="TCP4AcceptFail">0</counter>
<counter name="TCP6AcceptFail">0</counter>
<counter name="UnixAcceptFail">0</counter>
<counter name="TCP4Accept">293</counter>
<counter name="TCP6Accept">0</counter>
<counter name="UnixAccept">0</counter>
<counter name="UDP4SendErr">0</counter>
<counter name="UDP6SendErr">0</counter>
<counter name="TCP4SendErr">0</counter>
<counter name="TCP6SendErr">0</counter>
<counter name="UnixSendErr">0</counter>
<counter name="FDwatchSendErr">0</counter>
<counter name="UDP4RecvErr">14</counter>
<counter name="UDP6RecvErr">0</counter>
<counter name="TCP4RecvErr">0</counter>
<counter name="TCP6RecvErr">0</counter>
<counter name="UnixRecvErr">0</counter>
<counter name="FDwatchRecvErr">0</counter>
<counter name="RawRecvErr">0</counter>
<counter name="UDP4Active">4</counter>
<counter name="UDP6Active">0</counter>
<counter name="TCP4Active">297</counter>
<counter name="TCP6Active">0</counter>
<counter name="UnixActive">0</counter>
<counter name="RawActive">1</counter>
</counters>
</server>
<views>
</views>
<socketmgr>
<sockets>
<socket>
<id>0x7f19dd849010</id>
<references>1</references>
<type>not-initialized</type>
<local-address>&lt;unknown address, family 16&gt;</local-address>
<states>
<state>bound</state>
</states>
</socket>
<socket>
<id>0x7f19dd849268</id>
<references>1</references>
<type>tcp</type>
<local-address>0.0.0.0#8053</local-address>
<states>
<state>listener</state>
<state>bound</state>
</states>
</socket>
<socket>
<id>0x7f19dd849718</id>
<references>2</references>
<type>udp</type>
<local-address>::#53</local-address>
<states>
<state>bound</state>
</states>
</socket>
<socket>
<id>0x7f19dd849970</id>
<references>2</references>
<type>tcp</type>
<local-address>::#53</local-address>
<states>
<state>listener</state>
<state>bound</state>
</states>
</socket>
<socket>
<id>0x7f19dd849bc8</id>
<references>2</references>
<type>udp</type>
<local-address>127.0.0.1#53</local-address>
<states>
<state>bound</state>
</states>
</socket>
<socket>
<id>0x7f19dd6f4010</id>
<references>2</references>
<type>tcp</type>
<local-address>127.0.0.1#53</local-address>
<states>
<state>listener</state>
<state>bound</state>
</states>
</socket>
<socket>
<id>0x7f19dd6f4718</id>
<references>1</references>
<type>tcp</type>
<local-address>127.0.0.1#953</local-address>
<states>
<state>listener</state>
<state>bound</state>
</states>
</socket>
<socket>
<id>0x7f19dd6f4bc8</id>
<references>1</references>
<type>tcp</type>
<local-address>::1#953</local-address>
<states>
<state>listener</state>
<state>bound</state>
</states>
</socket>
</sockets>
</socketmgr>
</statistics>

View File

@@ -0,0 +1,328 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="/bind9.xsl"?>
<statistics version="3.6">
<server>
<boot-time>2017-07-21T11:53:28Z</boot-time>
<config-time>2017-07-21T11:53:28Z</config-time>
<current-time>2017-07-25T23:47:08Z</current-time>
<counters type="opcode">
<counter name="QUERY">74941</counter>
<counter name="IQUERY">0</counter>
<counter name="STATUS">0</counter>
<counter name="RESERVED3">0</counter>
<counter name="NOTIFY">0</counter>
<counter name="UPDATE">0</counter>
<counter name="RESERVED6">0</counter>
<counter name="RESERVED7">0</counter>
<counter name="RESERVED8">0</counter>
<counter name="RESERVED9">0</counter>
<counter name="RESERVED10">0</counter>
<counter name="RESERVED11">0</counter>
<counter name="RESERVED12">0</counter>
<counter name="RESERVED13">0</counter>
<counter name="RESERVED14">0</counter>
<counter name="RESERVED15">0</counter>
</counters>
<counters type="qtype">
<counter name="A">63672</counter>
<counter name="NS">373</counter>
<counter name="SOA">18</counter>
<counter name="PTR">3393</counter>
<counter name="MX">618</counter>
<counter name="TXT">970</counter>
<counter name="AAAA">5735</counter>
<counter name="SRV">139</counter>
<counter name="RRSIG">1</counter>
<counter name="ANY">22</counter>
</counters>
<counters type="nsstat">
<counter name="Requestv4">74942</counter>
<counter name="Requestv6">0</counter>
<counter name="ReqEdns0">9250</counter>
<counter name="ReqBadEDNSVer">0</counter>
<counter name="ReqTSIG">0</counter>
<counter name="ReqSIG0">0</counter>
<counter name="ReqBadSIG">0</counter>
<counter name="ReqTCP">260</counter>
<counter name="AuthQryRej">0</counter>
<counter name="RecQryRej">35</counter>
<counter name="XfrRej">0</counter>
<counter name="UpdateRej">0</counter>
<counter name="Response">63264</counter>
<counter name="TruncatedResp">365</counter>
<counter name="RespEDNS0">9250</counter>
<counter name="RespTSIG">0</counter>
<counter name="RespSIG0">0</counter>
<counter name="QrySuccess">49044</counter>
<counter name="QryAuthAns">2752</counter>
<counter name="QryNoauthAns">60354</counter>
<counter name="QryReferral">0</counter>
<counter name="QryNxrrset">2452</counter>
<counter name="QrySERVFAIL">122</counter>
<counter name="QryFORMERR">0</counter>
<counter name="QryNXDOMAIN">11610</counter>
<counter name="QryRecursion">53750</counter>
<counter name="QryDuplicate">11667</counter>
<counter name="QryDropped">11</counter>
<counter name="QryFailure">35</counter>
<counter name="XfrReqDone">0</counter>
<counter name="UpdateReqFwd">0</counter>
<counter name="UpdateRespFwd">0</counter>
<counter name="UpdateFwdFail">0</counter>
<counter name="UpdateDone">0</counter>
<counter name="UpdateFail">0</counter>
<counter name="UpdateBadPrereq">0</counter>
<counter name="RecursClients">0</counter>
<counter name="DNS64">0</counter>
<counter name="RateDropped">0</counter>
<counter name="RateSlipped">0</counter>
<counter name="RPZRewrites">0</counter>
<counter name="QryUDP">74648</counter>
<counter name="QryTCP">258</counter>
<counter name="NSIDOpt">0</counter>
<counter name="ExpireOpt">0</counter>
<counter name="OtherOpt">59</counter>
<counter name="SitOpt">0</counter>
<counter name="SitNew">0</counter>
<counter name="SitBadSize">0</counter>
<counter name="SitBadTime">0</counter>
<counter name="SitNoMatch">0</counter>
<counter name="SitMatch">0</counter>
</counters>
<counters type="zonestat">
<counter name="NotifyOutv4">2</counter>
<counter name="NotifyOutv6">0</counter>
<counter name="NotifyInv4">0</counter>
<counter name="NotifyInv6">0</counter>
<counter name="NotifyRej">0</counter>
<counter name="SOAOutv4">0</counter>
<counter name="SOAOutv6">0</counter>
<counter name="AXFRReqv4">0</counter>
<counter name="AXFRReqv6">0</counter>
<counter name="IXFRReqv4">0</counter>
<counter name="IXFRReqv6">0</counter>
<counter name="XfrSuccess">0</counter>
<counter name="XfrFail">0</counter>
</counters>
<counters type="resstat"/>
</server>
<views>
<view name="_default">
<counters type="resqtype">
<counter name="A">61568</counter>
<counter name="NS">9126</counter>
<counter name="PTR">1249</counter>
<counter name="MX">286</counter>
<counter name="TXT">942</counter>
<counter name="AAAA">3933</counter>
<counter name="SRV">21</counter>
<counter name="DS">13749</counter>
<counter name="DNSKEY">1699</counter>
</counters>
<counters type="resstats">
<counter name="Queryv4">92573</counter>
<counter name="Queryv6">0</counter>
<counter name="Responsev4">92135</counter>
<counter name="Responsev6">0</counter>
<counter name="NXDOMAIN">8182</counter>
<counter name="SERVFAIL">318</counter>
<counter name="FORMERR">0</counter>
<counter name="OtherError">0</counter>
<counter name="EDNS0Fail">0</counter>
<counter name="Mismatch">0</counter>
<counter name="Truncated">42</counter>
<counter name="Lame">12</counter>
<counter name="Retry">800</counter>
<counter name="QueryAbort">0</counter>
<counter name="QuerySockFail">0</counter>
<counter name="QueryCurUDP">0</counter>
<counter name="QueryCurTCP">0</counter>
<counter name="QueryTimeout">490</counter>
<counter name="GlueFetchv4">1398</counter>
<counter name="GlueFetchv6">0</counter>
<counter name="GlueFetchv4Fail">3</counter>
<counter name="GlueFetchv6Fail">0</counter>
<counter name="ValAttempt">90256</counter>
<counter name="ValOk">67322</counter>
<counter name="ValNegOk">22850</counter>
<counter name="ValFail">6</counter>
<counter name="QryRTT10">0</counter>
<counter name="QryRTT100">45760</counter>
<counter name="QryRTT500">45543</counter>
<counter name="QryRTT800">743</counter>
<counter name="QryRTT1600">75</counter>
<counter name="QryRTT1600+">0</counter>
<counter name="NumFetch">0</counter>
<counter name="BucketSize">31</counter>
<counter name="REFUSED">34</counter>
<counter name="SitClientOut">0</counter>
<counter name="SitOut">0</counter>
<counter name="SitIn">0</counter>
<counter name="SitClientOk">0</counter>
<counter name="BadEDNSVersion">0</counter>
<counter name="ZoneQuota">0</counter>
<counter name="ServerQuota">0</counter>
</counters>
<cache name="internal">
<rrset>
<name>A</name>
<counter>195</counter>
</rrset>
<rrset>
<name>NS</name>
<counter>42</counter>
</rrset>
<rrset>
<name>CNAME</name>
<counter>7</counter>
</rrset>
<rrset>
<name>PTR</name>
<counter>48</counter>
</rrset>
<rrset>
<name>MX</name>
<counter>7</counter>
</rrset>
<rrset>
<name>TXT</name>
<counter>6</counter>
</rrset>
<rrset>
<name>AAAA</name>
<counter>4</counter>
</rrset>
<rrset>
<name>DS</name>
<counter>97</counter>
</rrset>
<rrset>
<name>RRSIG</name>
<counter>258</counter>
</rrset>
<rrset>
<name>NSEC</name>
<counter>89</counter>
</rrset>
<rrset>
<name>DNSKEY</name>
<counter>60</counter>
</rrset>
<rrset>
<name>!DS</name>
<counter>29</counter>
</rrset>
<rrset>
<name>NXDOMAIN</name>
<counter>25</counter>
</rrset>
</cache>
<counters type="adbstat">
<counter name="nentries">1021</counter>
<counter name="entriescnt">314</counter>
<counter name="nnames">1021</counter>
<counter name="namescnt">316</counter>
</counters>
<counters type="cachestats">
<counter name="CacheHits">1904593</counter>
<counter name="CacheMisses">96</counter>
<counter name="QueryHits">336094</counter>
<counter name="QueryMisses">369336</counter>
<counter name="DeleteLRU">0</counter>
<counter name="DeleteTTL">47518</counter>
<counter name="CacheNodes">769</counter>
<counter name="CacheBuckets">519</counter>
<counter name="TreeMemTotal">1464363</counter>
<counter name="TreeMemInUse">392128</counter>
<counter name="TreeMemMax">828966</counter>
<counter name="HeapMemTotal">393216</counter>
<counter name="HeapMemInUse">132096</counter>
<counter name="HeapMemMax">132096</counter>
</counters>
</view>
<view name="_bind">
<zones>
<zone name="authors.bind" rdataclass="CH">
<serial>0</serial>
</zone>
<zone name="hostname.bind" rdataclass="CH">
<serial>0</serial>
</zone>
<zone name="version.bind" rdataclass="CH">
<serial>0</serial>
</zone>
<zone name="id.server" rdataclass="CH">
<serial>0</serial>
</zone>
</zones>
<counters type="resqtype"/>
<counters type="resstats">
<counter name="Queryv4">0</counter>
<counter name="Queryv6">0</counter>
<counter name="Responsev4">0</counter>
<counter name="Responsev6">0</counter>
<counter name="NXDOMAIN">0</counter>
<counter name="SERVFAIL">0</counter>
<counter name="FORMERR">0</counter>
<counter name="OtherError">0</counter>
<counter name="EDNS0Fail">0</counter>
<counter name="Mismatch">0</counter>
<counter name="Truncated">0</counter>
<counter name="Lame">0</counter>
<counter name="Retry">0</counter>
<counter name="QueryAbort">0</counter>
<counter name="QuerySockFail">0</counter>
<counter name="QueryCurUDP">0</counter>
<counter name="QueryCurTCP">0</counter>
<counter name="QueryTimeout">0</counter>
<counter name="GlueFetchv4">0</counter>
<counter name="GlueFetchv6">0</counter>
<counter name="GlueFetchv4Fail">0</counter>
<counter name="GlueFetchv6Fail">0</counter>
<counter name="ValAttempt">0</counter>
<counter name="ValOk">0</counter>
<counter name="ValNegOk">0</counter>
<counter name="ValFail">0</counter>
<counter name="QryRTT10">0</counter>
<counter name="QryRTT100">0</counter>
<counter name="QryRTT500">0</counter>
<counter name="QryRTT800">0</counter>
<counter name="QryRTT1600">0</counter>
<counter name="QryRTT1600+">0</counter>
<counter name="NumFetch">0</counter>
<counter name="BucketSize">31</counter>
<counter name="REFUSED">0</counter>
<counter name="SitClientOut">0</counter>
<counter name="SitOut">0</counter>
<counter name="SitIn">0</counter>
<counter name="SitClientOk">0</counter>
<counter name="BadEDNSVersion">0</counter>
<counter name="ZoneQuota">0</counter>
<counter name="ServerQuota">0</counter>
</counters>
<cache name="_bind"/>
<counters type="adbstat">
<counter name="nentries">1021</counter>
<counter name="entriescnt">0</counter>
<counter name="nnames">1021</counter>
<counter name="namescnt">0</counter>
</counters>
<counters type="cachestats">
<counter name="CacheHits">0</counter>
<counter name="CacheMisses">0</counter>
<counter name="QueryHits">0</counter>
<counter name="QueryMisses">0</counter>
<counter name="DeleteLRU">0</counter>
<counter name="DeleteTTL">0</counter>
<counter name="CacheNodes">0</counter>
<counter name="CacheBuckets">64</counter>
<counter name="TreeMemTotal">287392</counter>
<counter name="TreeMemInUse">29608</counter>
<counter name="TreeMemMax">29608</counter>
<counter name="HeapMemTotal">262144</counter>
<counter name="HeapMemInUse">1024</counter>
<counter name="HeapMemMax">1024</counter>
</counters>
</view>
</views>
</statistics>

View File

@@ -0,0 +1,168 @@
package bind
import (
"encoding/xml"
"fmt"
"net"
"net/http"
"net/url"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
)
type v2Root struct {
XMLName xml.Name
Version string `xml:"version,attr"`
Statistics v2Statistics `xml:"bind>statistics"`
}
// Omitted branches: socketmgr, taskmgr
type v2Statistics struct {
Version string `xml:"version,attr"`
Views []struct {
// Omitted branches: zones
Name string `xml:"name"`
RdTypes []v2Counter `xml:"rdtype"`
ResStats []v2Counter `xml:"resstat"`
Caches []struct {
Name string `xml:"name,attr"`
RRSets []v2Counter `xml:"rrset"`
} `xml:"cache"`
} `xml:"views>view"`
Server struct {
OpCodes []v2Counter `xml:"requests>opcode"`
RdTypes []v2Counter `xml:"queries-in>rdtype"`
NSStats []v2Counter `xml:"nsstat"`
ZoneStats []v2Counter `xml:"zonestat"`
ResStats []v2Counter `xml:"resstat"`
SockStats []v2Counter `xml:"sockstat"`
} `xml:"server"`
Memory struct {
Contexts []struct {
// Omitted nodes: references, maxinuse, blocksize, pools, hiwater, lowater
Id string `xml:"id"`
Name string `xml:"name"`
Total int64 `xml:"total"`
InUse int64 `xml:"inuse"`
} `xml:"contexts>context"`
Summary struct {
TotalUse int64
InUse int64
BlockSize int64
ContextSize int64
Lost int64
} `xml:"summary"`
} `xml:"memory"`
}
// BIND statistics v2 counter struct used throughout
type v2Counter struct {
Name string `xml:"name"`
Value int `xml:"counter"`
}
// addXMLv2Counter adds a v2Counter array to a Telegraf Accumulator, with the specified tags
func addXMLv2Counter(acc telegraf.Accumulator, commonTags map[string]string, stats []v2Counter) {
grouper := metric.NewSeriesGrouper()
ts := time.Now()
for _, c := range stats {
tags := make(map[string]string)
// Create local copy of tags since maps are reference types
for k, v := range commonTags {
tags[k] = v
}
grouper.Add("bind_counter", tags, ts, c.Name, c.Value)
}
//Add grouped metrics
for _, metric := range grouper.Metrics() {
acc.AddMetric(metric)
}
}
// readStatsXMLv2 decodes a BIND9 XML statistics version 2 document. Unlike the XML v3 statistics
// format, the v2 format does not support broken-out subsets.
func (b *Bind) readStatsXMLv2(addr *url.URL, acc telegraf.Accumulator) error {
var stats v2Root
resp, err := client.Get(addr.String())
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("%s returned HTTP status: %s", addr, resp.Status)
}
if err := xml.NewDecoder(resp.Body).Decode(&stats); err != nil {
return fmt.Errorf("Unable to decode XML document: %s", err)
}
tags := map[string]string{"url": addr.Host}
host, port, _ := net.SplitHostPort(addr.Host)
tags["source"] = host
tags["port"] = port
// Opcodes
tags["type"] = "opcode"
addXMLv2Counter(acc, tags, stats.Statistics.Server.OpCodes)
// Query RDATA types
tags["type"] = "qtype"
addXMLv2Counter(acc, tags, stats.Statistics.Server.RdTypes)
// Nameserver stats
tags["type"] = "nsstat"
addXMLv2Counter(acc, tags, stats.Statistics.Server.NSStats)
// Zone stats
tags["type"] = "zonestat"
addXMLv2Counter(acc, tags, stats.Statistics.Server.ZoneStats)
// Socket statistics
tags["type"] = "sockstat"
addXMLv2Counter(acc, tags, stats.Statistics.Server.SockStats)
// Memory stats
fields := map[string]interface{}{
"total_use": stats.Statistics.Memory.Summary.TotalUse,
"in_use": stats.Statistics.Memory.Summary.InUse,
"block_size": stats.Statistics.Memory.Summary.BlockSize,
"context_size": stats.Statistics.Memory.Summary.ContextSize,
"lost": stats.Statistics.Memory.Summary.Lost,
}
acc.AddGauge("bind_memory", fields, map[string]string{"url": addr.Host, "source": host, "port": port})
// Detailed, per-context memory stats
if b.GatherMemoryContexts {
for _, c := range stats.Statistics.Memory.Contexts {
tags := map[string]string{"url": addr.Host, "id": c.Id, "name": c.Name, "source": host, "port": port}
fields := map[string]interface{}{"total": c.Total, "in_use": c.InUse}
acc.AddGauge("bind_memory_context", fields, tags)
}
}
// Detailed, per-view stats
if b.GatherViews {
for _, v := range stats.Statistics.Views {
tags := map[string]string{"url": addr.Host, "view": v.Name}
// Query RDATA types
tags["type"] = "qtype"
addXMLv2Counter(acc, tags, v.RdTypes)
// Resolver stats
tags["type"] = "resstats"
addXMLv2Counter(acc, tags, v.ResStats)
}
}
return nil
}

View File

@@ -0,0 +1,161 @@
package bind
import (
"encoding/xml"
"fmt"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
)
// XML path: //statistics
// Omitted branches: socketmgr, taskmgr
type v3Stats struct {
Server v3Server `xml:"server"`
Views []v3View `xml:"views>view"`
Memory v3Memory `xml:"memory"`
}
// XML path: //statistics/memory
type v3Memory struct {
Contexts []struct {
// Omitted nodes: references, maxinuse, blocksize, pools, hiwater, lowater
Id string `xml:"id"`
Name string `xml:"name"`
Total int64 `xml:"total"`
InUse int64 `xml:"inuse"`
} `xml:"contexts>context"`
Summary struct {
TotalUse int64
InUse int64
BlockSize int64
ContextSize int64
Lost int64
} `xml:"summary"`
}
// XML path: //statistics/server
type v3Server struct {
CounterGroups []v3CounterGroup `xml:"counters"`
}
// XML path: //statistics/views/view
type v3View struct {
// Omitted branches: zones
Name string `xml:"name,attr"`
CounterGroups []v3CounterGroup `xml:"counters"`
Caches []struct {
Name string `xml:"name,attr"`
RRSets []struct {
Name string `xml:"name"`
Value int64 `xml:"counter"`
} `xml:"rrset"`
} `xml:"cache"`
}
// Generic XML v3 doc fragment used in multiple places
type v3CounterGroup struct {
Type string `xml:"type,attr"`
Counters []struct {
Name string `xml:"name,attr"`
Value int64 `xml:",chardata"`
} `xml:"counter"`
}
// addStatsXMLv3 walks a v3Stats struct and adds the values to the telegraf.Accumulator.
func (b *Bind) addStatsXMLv3(stats v3Stats, acc telegraf.Accumulator, hostPort string) {
grouper := metric.NewSeriesGrouper()
ts := time.Now()
host, port, _ := net.SplitHostPort(hostPort)
// Counter groups
for _, cg := range stats.Server.CounterGroups {
for _, c := range cg.Counters {
if cg.Type == "opcode" && strings.HasPrefix(c.Name, "RESERVED") {
continue
}
tags := map[string]string{"url": hostPort, "source": host, "port": port, "type": cg.Type}
grouper.Add("bind_counter", tags, ts, c.Name, c.Value)
}
}
// Memory stats
fields := map[string]interface{}{
"total_use": stats.Memory.Summary.TotalUse,
"in_use": stats.Memory.Summary.InUse,
"block_size": stats.Memory.Summary.BlockSize,
"context_size": stats.Memory.Summary.ContextSize,
"lost": stats.Memory.Summary.Lost,
}
acc.AddGauge("bind_memory", fields, map[string]string{"url": hostPort, "source": host, "port": port})
// Detailed, per-context memory stats
if b.GatherMemoryContexts {
for _, c := range stats.Memory.Contexts {
tags := map[string]string{"url": hostPort, "source": host, "port": port, "id": c.Id, "name": c.Name}
fields := map[string]interface{}{"total": c.Total, "in_use": c.InUse}
acc.AddGauge("bind_memory_context", fields, tags)
}
}
// Detailed, per-view stats
if b.GatherViews {
for _, v := range stats.Views {
for _, cg := range v.CounterGroups {
for _, c := range cg.Counters {
tags := map[string]string{
"url": hostPort,
"source": host,
"port": port,
"view": v.Name,
"type": cg.Type,
}
grouper.Add("bind_counter", tags, ts, c.Name, c.Value)
}
}
}
}
//Add grouped metrics
for _, metric := range grouper.Metrics() {
acc.AddMetric(metric)
}
}
// readStatsXMLv3 takes a base URL to probe, and requests the individual statistics documents that
// we are interested in. These individual documents have a combined size which is significantly
// smaller than if we requested everything at once (e.g. taskmgr and socketmgr can be omitted).
func (b *Bind) readStatsXMLv3(addr *url.URL, acc telegraf.Accumulator) error {
var stats v3Stats
// Progressively build up full v3Stats struct by parsing the individual HTTP responses
for _, suffix := range [...]string{"/server", "/net", "/mem"} {
scrapeUrl := addr.String() + suffix
resp, err := client.Get(scrapeUrl)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("%s returned HTTP status: %s", scrapeUrl, resp.Status)
}
if err := xml.NewDecoder(resp.Body).Decode(&stats); err != nil {
return fmt.Errorf("Unable to decode XML document: %s", err)
}
}
b.addStatsXMLv3(stats, acc, addr.Host)
return nil
}

View File

@@ -50,7 +50,7 @@ Supported Burrow version: `1.x`
# insecure_skip_verify = false
```
### Partition Status mappings
### Group/Partition Status mappings
* `OK` = 1
* `NOT_FOUND` = 2
@@ -66,9 +66,11 @@ Supported Burrow version: `1.x`
* `burrow_group` (one event per each consumer group)
- status (string, see Partition Status mappings)
- status_code (int, `1..6`, see Partition status mappings)
- parition_count (int, `number of partitions`)
- partition_count (int, `number of partitions`)
- offset (int64, `total offset of all partitions`)
- total_lag (int64, `totallag`)
- lag (int64, `maxlag.current_lag || 0`)
- timestamp (int64, `end.timestamp`)
* `burrow_partition` (one event per each topic partition)
- status (string, see Partition Status mappings)

View File

@@ -397,13 +397,11 @@ func (b *burrow) genGroupStatusMetrics(r *apiResponse, cluster, group string, ac
partitionCount = len(r.Status.Partitions)
}
// get max timestamp and offset from partitions list
// get max timestamp and total offset from partitions list
offset := int64(0)
timestamp := int64(0)
for _, partition := range r.Status.Partitions {
if partition.End.Offset > offset {
offset = partition.End.Offset
}
offset += partition.End.Offset
if partition.End.Timestamp > timestamp {
timestamp = partition.End.Timestamp
}
@@ -434,6 +432,9 @@ func (b *burrow) genGroupStatusMetrics(r *apiResponse, cluster, group string, ac
func (b *burrow) genGroupLagMetrics(r *apiResponse, cluster, group string, acc telegraf.Accumulator) {
for _, partition := range r.Status.Partitions {
if !b.filterTopics.Match(partition.Topic) {
continue
}
acc.AddFields(
"burrow_partition",
map[string]interface{}{

View File

@@ -160,7 +160,7 @@ func TestBurrowGroup(t *testing.T) {
"partition_count": 3,
"total_lag": int64(0),
"lag": int64(0),
"offset": int64(431323195),
"offset": int64(431323195 + 431322962 + 428636563),
"timestamp": int64(1515609490008),
},
}
@@ -262,7 +262,7 @@ func TestFilterGroups(t *testing.T) {
acc := &testutil.Accumulator{}
plugin.Gather(acc)
require.Exactly(t, 4, len(acc.Metrics))
require.Exactly(t, 1, len(acc.Metrics))
require.Empty(t, acc.Errors)
}

View File

@@ -39,19 +39,19 @@ Here is a list of metrics that might be useful to monitor your cassandra cluster
- [How to monitor Cassandra performance metrics](https://www.datadoghq.com/blog/how-to-monitor-cassandra-performance-metrics)
- [Cassandra Documentation](http://docs.datastax.com/en/cassandra/3.x/cassandra/operations/monitoringCassandraTOC.html)
####measurement = javaGarbageCollector
#### measurement = javaGarbageCollector
- /java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionTime
- /java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionCount
- /java.lang:type=GarbageCollector,name=ParNew/CollectionTime
- /java.lang:type=GarbageCollector,name=ParNew/CollectionCount
####measurement = javaMemory
#### measurement = javaMemory
- /java.lang:type=Memory/HeapMemoryUsage
- /java.lang:type=Memory/NonHeapMemoryUsage
####measurement = cassandraCache
#### measurement = cassandraCache
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Hits
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Requests
@@ -64,11 +64,11 @@ Here is a list of metrics that might be useful to monitor your cassandra cluster
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Size
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Capacity
####measurement = cassandraClient
#### measurement = cassandraClient
- /org.apache.cassandra.metrics:type=Client,name=connectedNativeClients
####measurement = cassandraClientRequest
#### measurement = cassandraClientRequest
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=TotalLatency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=TotalLatency
@@ -81,24 +81,24 @@ Here is a list of metrics that might be useful to monitor your cassandra cluster
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Failures
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Failures
####measurement = cassandraCommitLog
#### measurement = cassandraCommitLog
- /org.apache.cassandra.metrics:type=CommitLog,name=PendingTasks
- /org.apache.cassandra.metrics:type=CommitLog,name=TotalCommitLogSize
####measurement = cassandraCompaction
#### measurement = cassandraCompaction
- /org.apache.cassandra.metrics:type=Compaction,name=CompletedTasks
- /org.apache.cassandra.metrics:type=Compaction,name=PendingTasks
- /org.apache.cassandra.metrics:type=Compaction,name=TotalCompactionsCompleted
- /org.apache.cassandra.metrics:type=Compaction,name=BytesCompacted
####measurement = cassandraStorage
#### measurement = cassandraStorage
- /org.apache.cassandra.metrics:type=Storage,name=Load
- /org.apache.cassandra.metrics:type=Storage,name=Exceptions
- /org.apache.cassandra.metrics:type=Storage,name=Exceptions
####measurement = cassandraTable
#### measurement = cassandraTable
Using wildcards for "keyspace" and "scope" can create a lot of series as metrics will be reported for every table and keyspace including internal system tables. Specify a keyspace name and/or a table name to limit them.
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=LiveDiskSpaceUsed
@@ -110,7 +110,7 @@ Using wildcards for "keyspace" and "scope" can create a lot of series as metrics
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=WriteTotalLatency
####measurement = cassandraThreadPools
#### measurement = cassandraThreadPools
- /org.apache.cassandra.metrics:type=ThreadPools,path=internal,scope=CompactionExecutor,name=ActiveTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=internal,scope=AntiEntropyStage,name=ActiveTasks

View File

@@ -2,9 +2,11 @@
Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster.
Ceph has introduced a Telegraf and Influx plugin in the 13.x Mimic release. The Telegraf module sends to a Telegraf configured with a socket_listener. [Learn more in their docs](http://docs.ceph.com/docs/mimic/mgr/telegraf/)
*Admin Socket Stats*
This gatherer works by scanning the configured SocketDir for OSD and MON socket files. When it finds
This gatherer works by scanning the configured SocketDir for OSD, MON, MDS and RGW socket files. When it finds
a MON socket, it runs **ceph --admin-daemon $file perfcounters_dump**. For OSDs it runs **ceph --admin-daemon $file perf dump**
The resulting JSON is parsed and grouped into collections, based on top-level key. Top-level keys are
@@ -60,6 +62,8 @@ the cluster. The currently supported commands are:
## prefix of MON and OSD socket files, used to determine socket type
mon_prefix = "ceph-mon"
osd_prefix = "ceph-osd"
mds_prefix = "ceph-mds"
rgw_prefix = "ceph-client"
## suffix used to identify socket files
socket_suffix = "asok"
@@ -85,67 +89,15 @@ the cluster. The currently supported commands are:
gather_cluster_stats = false
```
### Measurements & Fields:
### Metrics:
*Admin Socket Stats*
All fields are collected under the **ceph** measurement and stored as float64s. For a full list of fields, see the sample perf dumps in ceph_test.go.
*Cluster Stats*
All admin measurements will have the following tags:
* ceph\_osdmap
* epoch (float)
* full (boolean)
* nearfull (boolean)
* num\_in\_osds (float)
* num\_osds (float)
* num\_remremapped\_pgs (float)
* num\_up\_osds (float)
* ceph\_pgmap
* bytes\_avail (float)
* bytes\_total (float)
* bytes\_used (float)
* data\_bytes (float)
* num\_pgs (float)
* op\_per\_sec (float)
* read\_bytes\_sec (float)
* version (float)
* write\_bytes\_sec (float)
* recovering\_bytes\_per\_sec (float)
* recovering\_keys\_per\_sec (float)
* recovering\_objects\_per\_sec (float)
* ceph\_pgmap\_state
* count (float)
* ceph\_usage
* bytes\_used (float)
* kb\_used (float)
* max\_avail (float)
* objects (float)
* ceph\_pool\_usage
* bytes\_used (float)
* kb\_used (float)
* max\_avail (float)
* objects (float)
* ceph\_pool\_stats
* op\_per\_sec (float)
* read\_bytes\_sec (float)
* write\_bytes\_sec (float)
* recovering\_object\_per\_sec (float)
* recovering\_bytes\_per\_sec (float)
* recovering\_keys\_per\_sec (float)
### Tags:
*Admin Socket Stats*
All measurements will have the following tags:
- type: either 'osd' or 'mon' to indicate which type of node was queried
- type: either 'osd', 'mon', 'mds' or 'rgw' to indicate which type of node was queried
- id: a unique string identifier, parsed from the socket file name for the node
- collection: the top-level key under which these fields were reported. Possible values are:
- for MON nodes:
@@ -183,41 +135,252 @@ All measurements will have the following tags:
- throttle-objecter_ops
- throttle-osd_client_bytes
- throttle-osd_client_messages
- for MDS nodes:
- AsyncMessenger::Worker-0
- AsyncMessenger::Worker-1
- AsyncMessenger::Worker-2
- finisher-PurgeQueue
- mds
- mds_cache
- mds_log
- mds_mem
- mds_server
- mds_sessions
- objecter
- purge_queue
- throttle-msgr_dispatch_throttler-mds
- throttle-objecter_bytes
- throttle-objecter_ops
- throttle-write_buf_throttle
- for RGW nodes:
- AsyncMessenger::Worker-0
- AsyncMessenger::Worker-1
- AsyncMessenger::Worker-2
- cct
- finisher-radosclient
- mempool
- objecter
- rgw
- simple-throttler
- throttle-msgr_dispatch_throttler-radosclient
- throttle-objecter_bytes
- throttle-objecter_ops
- throttle-rgw_async_rados_ops
*Cluster Stats*
* ceph\_pgmap\_state has the following tags:
* state (state for which the value applies e.g. active+clean, active+remapped+backfill)
* ceph\_pool\_usage has the following tags:
* id
* name
* ceph\_pool\_stats has the following tags:
* id
* name
+ ceph_health
- fields:
- status
- overall_status
- ceph_osdmap
- fields:
- epoch (float)
- num_osds (float)
- num_up_osds (float)
- num_in_osds (float)
- full (bool)
- nearfull (bool)
- num_remapped_pgs (float)
+ ceph_pgmap
- fields:
- version (float)
- num_pgs (float)
- data_bytes (float)
- bytes_used (float)
- bytes_avail (float)
- bytes_total (float)
- read_bytes_sec (float)
- write_bytes_sec (float)
- op_per_sec (float, exists only in ceph <10)
- read_op_per_sec (float)
- write_op_per_sec (float)
- ceph_pgmap_state
- tags:
- state
- fields:
- count (float)
+ ceph_usage
- fields:
- total_bytes (float)
- total_used_bytes (float)
- total_avail_bytes (float)
- total_space (float, exists only in ceph <0.84)
- total_used (float, exists only in ceph <0.84)
- total_avail (float, exists only in ceph <0.84)
- ceph_pool_usage
- tags:
- name
- fields:
- kb_used (float)
- bytes_used (float)
- objects (float)
- percent_used (float)
- max_avail (float)
+ ceph_pool_stats
- tags:
- name
- fields:
- read_bytes_sec (float)
- write_bytes_sec (float)
- op_per_sec (float, exists only in ceph <10)
- read_op_per_sec (float)
- write_op_per_sec (float)
- recovering_objects_per_sec (float)
- recovering_bytes_per_sec (float)
- recovering_keys_per_sec (float)
### Example Output:
*Admin Socket Stats*
<pre>
telegraf --config /etc/telegraf/telegraf.conf --config-directory /etc/telegraf/telegraf.d --input-filter ceph --test
* Plugin: ceph, Collection 1
> ceph,collection=paxos, id=node-2,role=openstack,type=mon accept_timeout=0,begin=14931264,begin_bytes.avgcount=14931264,begin_bytes.sum=180309683362,begin_keys.avgcount=0,begin_keys.sum=0,begin_latency.avgcount=14931264,begin_latency.sum=9293.29589,collect=1,collect_bytes.avgcount=1,collect_bytes.sum=24,collect_keys.avgcount=1,collect_keys.sum=1,collect_latency.avgcount=1,collect_latency.sum=0.00028,collect_timeout=0,collect_uncommitted=0,commit=14931264,commit_bytes.avgcount=0,commit_bytes.sum=0,commit_keys.avgcount=0,commit_keys.sum=0,commit_latency.avgcount=0,commit_latency.sum=0,lease_ack_timeout=0,lease_timeout=0,new_pn=0,new_pn_latency.avgcount=0,new_pn_latency.sum=0,refresh=14931264,refresh_latency.avgcount=14931264,refresh_latency.sum=8706.98498,restart=4,share_state=0,share_state_bytes.avgcount=0,share_state_bytes.sum=0,share_state_keys.avgcount=0,share_state_keys.sum=0,start_leader=0,start_peon=1,store_state=14931264,store_state_bytes.avgcount=14931264,store_state_bytes.sum=353119959211,store_state_keys.avgcount=14931264,store_state_keys.sum=289807523,store_state_latency.avgcount=14931264,store_state_latency.sum=10952.835724 1462821234814535148
> ceph,collection=throttle-mon_client_bytes,id=node-2,type=mon get=1413017,get_or_fail_fail=0,get_or_fail_success=0,get_sum=71211705,max=104857600,put=1413013,put_sum=71211459,take=0,take_sum=0,val=246,wait.avgcount=0,wait.sum=0 1462821234814737219
> ceph,collection=throttle-mon_daemon_bytes,id=node-2,type=mon get=4058121,get_or_fail_fail=0,get_or_fail_success=0,get_sum=6027348117,max=419430400,put=4058121,put_sum=6027348117,take=0,take_sum=0,val=0,wait.avgcount=0,wait.sum=0 1462821234814815661
> ceph,collection=throttle-msgr_dispatch_throttler-mon,id=node-2,type=mon get=54276277,get_or_fail_fail=0,get_or_fail_success=0,get_sum=370232877040,max=104857600,put=54276277,put_sum=370232877040,take=0,take_sum=0,val=0,wait.avgcount=0,wait.sum=0 1462821234814872064
</pre>
*Cluster Stats*
<pre>
> ceph_osdmap,host=ceph-mon-0 epoch=170772,full=false,nearfull=false,num_in_osds=340,num_osds=340,num_remapped_pgs=0,num_up_osds=340 1468841037000000000
> ceph_pgmap,host=ceph-mon-0 bytes_avail=634895531270144,bytes_total=812117151809536,bytes_used=177221620539392,data_bytes=56979991615058,num_pgs=22952,op_per_sec=15869,read_bytes_sec=43956026,version=39387592,write_bytes_sec=165344818 1468841037000000000
> ceph_pgmap_state,host=ceph-mon-0,state=active+clean count=22952 1468928660000000000
> ceph_pgmap_state,host=ceph-mon-0,state=active+degraded count=16 1468928660000000000
> ceph_usage,host=ceph-mon-0 total_avail_bytes=634895514791936,total_bytes=812117151809536,total_used_bytes=177221637017600 1468841037000000000
> ceph_pool_usage,host=ceph-mon-0,id=150,name=cinder.volumes bytes_used=12648553794802,kb_used=12352103316,max_avail=154342562489244,objects=3026295 1468841037000000000
> ceph_pool_usage,host=ceph-mon-0,id=182,name=cinder.volumes.flash bytes_used=8541308223964,kb_used=8341121313,max_avail=39388593563936,objects=2075066 1468841037000000000
> ceph_pool_stats,host=ceph-mon-0,id=150,name=cinder.volumes op_per_sec=1706,read_bytes_sec=28671674,write_bytes_sec=29994541 1468841037000000000
> ceph_pool_stats,host=ceph-mon-0,id=182,name=cinder.volumes.flash op_per_sec=9748,read_bytes_sec=9605524,write_bytes_sec=45593310 1468841037000000000
</pre>
```
ceph_health,host=stefanmon1 overall_status="",status="HEALTH_WARN" 1587118504000000000
ceph_osdmap,host=stefanmon1 epoch=203,full=false,nearfull=false,num_in_osds=8,num_osds=9,num_remapped_pgs=0,num_up_osds=8 1587118504000000000
ceph_pgmap,host=stefanmon1 bytes_avail=849879302144,bytes_total=858959904768,bytes_used=9080602624,data_bytes=5055,num_pgs=504,read_bytes_sec=0,read_op_per_sec=0,version=0,write_bytes_sec=0,write_op_per_sec=0 1587118504000000000
ceph_pgmap_state,host=stefanmon1,state=active+clean count=504 1587118504000000000
ceph_usage,host=stefanmon1 total_avail_bytes=849879302144,total_bytes=858959904768,total_used_bytes=196018176 1587118505000000000
ceph_pool_usage,host=stefanmon1,name=cephfs_data bytes_used=0,kb_used=0,max_avail=285804986368,objects=0,percent_used=0 1587118505000000000
ceph_pool_stats,host=stefanmon1,name=cephfs_data read_bytes_sec=0,read_op_per_sec=0,recovering_bytes_per_sec=0,recovering_keys_per_sec=0,recovering_objects_per_sec=0,write_bytes_sec=0,write_op_per_sec=0 1587118506000000000
```
*Admin Socket Stats*
```
> ceph,collection=cct,host=stefanmon1,id=stefanmon1,type=monitor total_workers=0,unhealthy_workers=0 1587117563000000000
> ceph,collection=mempool,host=stefanmon1,id=stefanmon1,type=monitor bloom_filter_bytes=0,bloom_filter_items=0,bluefs_bytes=0,bluefs_items=0,bluestore_alloc_bytes=0,bluestore_alloc_items=0,bluestore_cache_data_bytes=0,bluestore_cache_data_items=0,bluestore_cache_onode_bytes=0,bluestore_cache_onode_items=0,bluestore_cache_other_bytes=0,bluestore_cache_other_items=0,bluestore_fsck_bytes=0,bluestore_fsck_items=0,bluestore_txc_bytes=0,bluestore_txc_items=0,bluestore_writing_bytes=0,bluestore_writing_deferred_bytes=0,bluestore_writing_deferred_items=0,bluestore_writing_items=0,buffer_anon_bytes=719152,buffer_anon_items=192,buffer_meta_bytes=352,buffer_meta_items=4,mds_co_bytes=0,mds_co_items=0,osd_bytes=0,osd_items=0,osd_mapbl_bytes=0,osd_mapbl_items=0,osd_pglog_bytes=0,osd_pglog_items=0,osdmap_bytes=15872,osdmap_items=138,osdmap_mapping_bytes=63112,osdmap_mapping_items=7626,pgmap_bytes=38680,pgmap_items=477,unittest_1_bytes=0,unittest_1_items=0,unittest_2_bytes=0,unittest_2_items=0 1587117563000000000
> ceph,collection=throttle-mon_client_bytes,host=stefanmon1,id=stefanmon1,type=monitor get=1041157,get_or_fail_fail=0,get_or_fail_success=1041157,get_started=0,get_sum=64928901,max=104857600,put=1041157,put_sum=64928901,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117563000000000
> ceph,collection=throttle-msgr_dispatch_throttler-mon,host=stefanmon1,id=stefanmon1,type=monitor get=12695426,get_or_fail_fail=0,get_or_fail_success=12695426,get_started=0,get_sum=42542216884,max=104857600,put=12695426,put_sum=42542216884,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117563000000000
> ceph,collection=finisher-mon_finisher,host=stefanmon1,id=stefanmon1,type=monitor complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117563000000000
> ceph,collection=finisher-monstore,host=stefanmon1,id=stefanmon1,type=monitor complete_latency.avgcount=1609831,complete_latency.avgtime=0.015857621,complete_latency.sum=25528.09131035,queue_len=0 1587117563000000000
> ceph,collection=mon,host=stefanmon1,id=stefanmon1,type=monitor election_call=25,election_lose=0,election_win=22,num_elections=94,num_sessions=3,session_add=174679,session_rm=439316,session_trim=137 1587117563000000000
> ceph,collection=throttle-mon_daemon_bytes,host=stefanmon1,id=stefanmon1,type=monitor get=72697,get_or_fail_fail=0,get_or_fail_success=72697,get_started=0,get_sum=32261199,max=419430400,put=72697,put_sum=32261199,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117563000000000
> ceph,collection=rocksdb,host=stefanmon1,id=stefanmon1,type=monitor compact=1,compact_queue_len=0,compact_queue_merge=1,compact_range=19126,get=62449211,get_latency.avgcount=62449211,get_latency.avgtime=0.000022216,get_latency.sum=1387.371811726,rocksdb_write_delay_time.avgcount=0,rocksdb_write_delay_time.avgtime=0,rocksdb_write_delay_time.sum=0,rocksdb_write_memtable_time.avgcount=0,rocksdb_write_memtable_time.avgtime=0,rocksdb_write_memtable_time.sum=0,rocksdb_write_pre_and_post_time.avgcount=0,rocksdb_write_pre_and_post_time.avgtime=0,rocksdb_write_pre_and_post_time.sum=0,rocksdb_write_wal_time.avgcount=0,rocksdb_write_wal_time.avgtime=0,rocksdb_write_wal_time.sum=0,submit_latency.avgcount=0,submit_latency.avgtime=0,submit_latency.sum=0,submit_sync_latency.avgcount=3219961,submit_sync_latency.avgtime=0.007532173,submit_sync_latency.sum=24253.303584224,submit_transaction=0,submit_transaction_sync=3219961 1587117563000000000
> ceph,collection=AsyncMessenger::Worker-0,host=stefanmon1,id=stefanmon1,type=monitor msgr_active_connections=148317,msgr_created_connections=162806,msgr_recv_bytes=11557888328,msgr_recv_messages=5113369,msgr_running_fast_dispatch_time=0,msgr_running_recv_time=868.377161686,msgr_running_send_time=1626.525392721,msgr_running_total_time=4222.235694322,msgr_send_bytes=91516226816,msgr_send_messages=6973706 1587117563000000000
> ceph,collection=AsyncMessenger::Worker-2,host=stefanmon1,id=stefanmon1,type=monitor msgr_active_connections=146396,msgr_created_connections=159788,msgr_recv_bytes=2162802496,msgr_recv_messages=689168,msgr_running_fast_dispatch_time=0,msgr_running_recv_time=164.148550562,msgr_running_send_time=153.462890368,msgr_running_total_time=644.188791379,msgr_send_bytes=7422484152,msgr_send_messages=749381 1587117563000000000
> ceph,collection=cluster,host=stefanmon1,id=stefanmon1,type=monitor num_bytes=5055,num_mon=3,num_mon_quorum=3,num_object=245,num_object_degraded=0,num_object_misplaced=0,num_object_unfound=0,num_osd=9,num_osd_in=8,num_osd_up=8,num_pg=504,num_pg_active=504,num_pg_active_clean=504,num_pg_peering=0,num_pool=17,osd_bytes=858959904768,osd_bytes_avail=849889787904,osd_bytes_used=9070116864,osd_epoch=203 1587117563000000000
> ceph,collection=paxos,host=stefanmon1,id=stefanmon1,type=monitor accept_timeout=1,begin=1609847,begin_bytes.avgcount=1609847,begin_bytes.sum=41408662074,begin_keys.avgcount=1609847,begin_keys.sum=4829541,begin_latency.avgcount=1609847,begin_latency.avgtime=0.007213392,begin_latency.sum=11612.457661116,collect=0,collect_bytes.avgcount=0,collect_bytes.sum=0,collect_keys.avgcount=0,collect_keys.sum=0,collect_latency.avgcount=0,collect_latency.avgtime=0,collect_latency.sum=0,collect_timeout=1,collect_uncommitted=17,commit=1609831,commit_bytes.avgcount=1609831,commit_bytes.sum=41087428442,commit_keys.avgcount=1609831,commit_keys.sum=11637931,commit_latency.avgcount=1609831,commit_latency.avgtime=0.006236333,commit_latency.sum=10039.442388355,lease_ack_timeout=0,lease_timeout=0,new_pn=33,new_pn_latency.avgcount=33,new_pn_latency.avgtime=3.844272773,new_pn_latency.sum=126.86100151,refresh=1609856,refresh_latency.avgcount=1609856,refresh_latency.avgtime=0.005900486,refresh_latency.sum=9498.932866761,restart=109,share_state=2,share_state_bytes.avgcount=2,share_state_bytes.sum=39612,share_state_keys.avgcount=2,share_state_keys.sum=2,start_leader=22,start_peon=0,store_state=14,store_state_bytes.avgcount=14,store_state_bytes.sum=51908281,store_state_keys.avgcount=14,store_state_keys.sum=7016,store_state_latency.avgcount=14,store_state_latency.avgtime=11.668377665,store_state_latency.sum=163.357287311 1587117563000000000
> ceph,collection=throttle-msgr_dispatch_throttler-mon-mgrc,host=stefanmon1,id=stefanmon1,type=monitor get=13225,get_or_fail_fail=0,get_or_fail_success=13225,get_started=0,get_sum=158700,max=104857600,put=13225,put_sum=158700,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117563000000000
> ceph,collection=AsyncMessenger::Worker-1,host=stefanmon1,id=stefanmon1,type=monitor msgr_active_connections=147680,msgr_created_connections=162374,msgr_recv_bytes=29781706740,msgr_recv_messages=7170733,msgr_running_fast_dispatch_time=0,msgr_running_recv_time=1728.559151358,msgr_running_send_time=2086.681244508,msgr_running_total_time=6084.532916585,msgr_send_bytes=94062125718,msgr_send_messages=9161564 1587117563000000000
> ceph,collection=throttle-msgr_dispatch_throttler-cluster,host=stefanosd1,id=0,type=osd get=281745,get_or_fail_fail=0,get_or_fail_success=281745,get_started=0,get_sum=446024457,max=104857600,put=281745,put_sum=446024457,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-bluestore_throttle_bytes,host=stefanosd1,id=0,type=osd get=275707,get_or_fail_fail=0,get_or_fail_success=0,get_started=275707,get_sum=185073179842,max=67108864,put=268870,put_sum=185073179842,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_front_server,host=stefanosd1,id=0,type=osd get=2606982,get_or_fail_fail=0,get_or_fail_success=2606982,get_started=0,get_sum=5224391928,max=104857600,put=2606982,put_sum=5224391928,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=rocksdb,host=stefanosd1,id=0,type=osd compact=0,compact_queue_len=0,compact_queue_merge=0,compact_range=0,get=1570,get_latency.avgcount=1570,get_latency.avgtime=0.000051233,get_latency.sum=0.080436788,rocksdb_write_delay_time.avgcount=0,rocksdb_write_delay_time.avgtime=0,rocksdb_write_delay_time.sum=0,rocksdb_write_memtable_time.avgcount=0,rocksdb_write_memtable_time.avgtime=0,rocksdb_write_memtable_time.sum=0,rocksdb_write_pre_and_post_time.avgcount=0,rocksdb_write_pre_and_post_time.avgtime=0,rocksdb_write_pre_and_post_time.sum=0,rocksdb_write_wal_time.avgcount=0,rocksdb_write_wal_time.avgtime=0,rocksdb_write_wal_time.sum=0,submit_latency.avgcount=275707,submit_latency.avgtime=0.000174936,submit_latency.sum=48.231345334,submit_sync_latency.avgcount=268870,submit_sync_latency.avgtime=0.006097313,submit_sync_latency.sum=1639.384555624,submit_transaction=275707,submit_transaction_sync=268870 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_back_server,host=stefanosd1,id=0,type=osd get=2606982,get_or_fail_fail=0,get_or_fail_success=2606982,get_started=0,get_sum=5224391928,max=104857600,put=2606982,put_sum=5224391928,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-objecter_bytes,host=stefanosd1,id=0,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_back_client,host=stefanosd1,id=0,type=osd get=2610285,get_or_fail_fail=0,get_or_fail_success=2610285,get_started=0,get_sum=5231011140,max=104857600,put=2610285,put_sum=5231011140,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-1,host=stefanosd1,id=0,type=osd msgr_active_connections=2093,msgr_created_connections=29142,msgr_recv_bytes=7214238199,msgr_recv_messages=3928206,msgr_running_fast_dispatch_time=171.289615064,msgr_running_recv_time=278.531155966,msgr_running_send_time=489.482588813,msgr_running_total_time=1134.004853662,msgr_send_bytes=9814725232,msgr_send_messages=3814927 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-client,host=stefanosd1,id=0,type=osd get=488206,get_or_fail_fail=0,get_or_fail_success=488206,get_started=0,get_sum=104085134,max=104857600,put=488206,put_sum=104085134,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=finisher-defered_finisher,host=stefanosd1,id=0,type=osd complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117698000000000
> ceph,collection=recoverystate_perf,host=stefanosd1,id=0,type=osd activating_latency.avgcount=87,activating_latency.avgtime=0.114348341,activating_latency.sum=9.948305683,active_latency.avgcount=25,active_latency.avgtime=1790.961574431,active_latency.sum=44774.039360795,backfilling_latency.avgcount=0,backfilling_latency.avgtime=0,backfilling_latency.sum=0,clean_latency.avgcount=25,clean_latency.avgtime=1790.830827794,clean_latency.sum=44770.770694867,down_latency.avgcount=0,down_latency.avgtime=0,down_latency.sum=0,getinfo_latency.avgcount=141,getinfo_latency.avgtime=0.446233476,getinfo_latency.sum=62.918920183,getlog_latency.avgcount=87,getlog_latency.avgtime=0.007708069,getlog_latency.sum=0.670602073,getmissing_latency.avgcount=87,getmissing_latency.avgtime=0.000077594,getmissing_latency.sum=0.006750701,incomplete_latency.avgcount=0,incomplete_latency.avgtime=0,incomplete_latency.sum=0,initial_latency.avgcount=166,initial_latency.avgtime=0.001313715,initial_latency.sum=0.218076764,notbackfilling_latency.avgcount=0,notbackfilling_latency.avgtime=0,notbackfilling_latency.sum=0,notrecovering_latency.avgcount=0,notrecovering_latency.avgtime=0,notrecovering_latency.sum=0,peering_latency.avgcount=141,peering_latency.avgtime=0.948324273,peering_latency.sum=133.713722563,primary_latency.avgcount=79,primary_latency.avgtime=567.706192991,primary_latency.sum=44848.78924634,recovered_latency.avgcount=87,recovered_latency.avgtime=0.000378284,recovered_latency.sum=0.032910791,recovering_latency.avgcount=2,recovering_latency.avgtime=0.338242008,recovering_latency.sum=0.676484017,replicaactive_latency.avgcount=23,replicaactive_latency.avgtime=1790.893991295,replicaactive_latency.sum=41190.561799786,repnotrecovering_latency.avgcount=25,repnotrecovering_latency.avgtime=1647.627024984,repnotrecovering_latency.sum=41190.675624616,reprecovering_latency.avgcount=2,reprecovering_latency.avgtime=0.311884638,reprecovering_latency.sum=0.623769276,repwaitbackfillreserved_latency.avgcount=0,repwaitbackfillreserved_latency.avgtime=0,repwaitbackfillreserved_latency.sum=0,repwaitrecoveryreserved_latency.avgcount=2,repwaitrecoveryreserved_latency.avgtime=0.000462873,repwaitrecoveryreserved_latency.sum=0.000925746,reset_latency.avgcount=372,reset_latency.avgtime=0.125056393,reset_latency.sum=46.520978537,start_latency.avgcount=372,start_latency.avgtime=0.000109397,start_latency.sum=0.040695881,started_latency.avgcount=206,started_latency.avgtime=418.299777245,started_latency.sum=86169.754112641,stray_latency.avgcount=231,stray_latency.avgtime=0.98203205,stray_latency.sum=226.849403565,waitactingchange_latency.avgcount=0,waitactingchange_latency.avgtime=0,waitactingchange_latency.sum=0,waitlocalbackfillreserved_latency.avgcount=0,waitlocalbackfillreserved_latency.avgtime=0,waitlocalbackfillreserved_latency.sum=0,waitlocalrecoveryreserved_latency.avgcount=2,waitlocalrecoveryreserved_latency.avgtime=0.002802377,waitlocalrecoveryreserved_latency.sum=0.005604755,waitremotebackfillreserved_latency.avgcount=0,waitremotebackfillreserved_latency.avgtime=0,waitremotebackfillreserved_latency.sum=0,waitremoterecoveryreserved_latency.avgcount=2,waitremoterecoveryreserved_latency.avgtime=0.012855439,waitremoterecoveryreserved_latency.sum=0.025710878,waitupthru_latency.avgcount=87,waitupthru_latency.avgtime=0.805727895,waitupthru_latency.sum=70.09832695 1587117698000000000
> ceph,collection=cct,host=stefanosd1,id=0,type=osd total_workers=6,unhealthy_workers=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_front_client,host=stefanosd1,id=0,type=osd get=2610285,get_or_fail_fail=0,get_or_fail_success=2610285,get_started=0,get_sum=5231011140,max=104857600,put=2610285,put_sum=5231011140,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=bluefs,host=stefanosd1,id=0,type=osd bytes_written_slow=0,bytes_written_sst=9018781,bytes_written_wal=831081573,db_total_bytes=4294967296,db_used_bytes=434110464,files_written_sst=3,files_written_wal=2,gift_bytes=0,log_bytes=134291456,log_compactions=1,logged_bytes=1101668352,max_bytes_db=1234173952,max_bytes_slow=0,max_bytes_wal=0,num_files=11,reclaim_bytes=0,slow_total_bytes=0,slow_used_bytes=0,wal_total_bytes=0,wal_used_bytes=0 1587117698000000000
> ceph,collection=mempool,host=stefanosd1,id=0,type=osd bloom_filter_bytes=0,bloom_filter_items=0,bluefs_bytes=10600,bluefs_items=458,bluestore_alloc_bytes=230288,bluestore_alloc_items=28786,bluestore_cache_data_bytes=622592,bluestore_cache_data_items=43,bluestore_cache_onode_bytes=249280,bluestore_cache_onode_items=380,bluestore_cache_other_bytes=192678,bluestore_cache_other_items=20199,bluestore_fsck_bytes=0,bluestore_fsck_items=0,bluestore_txc_bytes=8272,bluestore_txc_items=11,bluestore_writing_bytes=0,bluestore_writing_deferred_bytes=670130,bluestore_writing_deferred_items=176,bluestore_writing_items=0,buffer_anon_bytes=2412465,buffer_anon_items=297,buffer_meta_bytes=5896,buffer_meta_items=67,mds_co_bytes=0,mds_co_items=0,osd_bytes=2124800,osd_items=166,osd_mapbl_bytes=155152,osd_mapbl_items=10,osd_pglog_bytes=3214704,osd_pglog_items=6288,osdmap_bytes=710892,osdmap_items=4426,osdmap_mapping_bytes=0,osdmap_mapping_items=0,pgmap_bytes=0,pgmap_items=0,unittest_1_bytes=0,unittest_1_items=0,unittest_2_bytes=0,unittest_2_items=0 1587117698000000000
> ceph,collection=osd,host=stefanosd1,id=0,type=osd agent_evict=0,agent_flush=0,agent_skip=0,agent_wake=0,cached_crc=0,cached_crc_adjusted=0,copyfrom=0,heartbeat_to_peers=7,loadavg=11,map_message_epoch_dups=21,map_message_epochs=40,map_messages=31,messages_delayed_for_map=0,missed_crc=0,numpg=166,numpg_primary=62,numpg_removing=0,numpg_replica=104,numpg_stray=0,object_ctx_cache_hit=476529,object_ctx_cache_total=476536,op=476525,op_before_dequeue_op_lat.avgcount=755708,op_before_dequeue_op_lat.avgtime=0.000205759,op_before_dequeue_op_lat.sum=155.493843473,op_before_queue_op_lat.avgcount=755702,op_before_queue_op_lat.avgtime=0.000047877,op_before_queue_op_lat.sum=36.181069552,op_cache_hit=0,op_in_bytes=0,op_latency.avgcount=476525,op_latency.avgtime=0.000365956,op_latency.sum=174.387387878,op_out_bytes=10882,op_prepare_latency.avgcount=476527,op_prepare_latency.avgtime=0.000205307,op_prepare_latency.sum=97.834380034,op_process_latency.avgcount=476525,op_process_latency.avgtime=0.000139616,op_process_latency.sum=66.530847665,op_r=476521,op_r_latency.avgcount=476521,op_r_latency.avgtime=0.00036559,op_r_latency.sum=174.21148267,op_r_out_bytes=10882,op_r_prepare_latency.avgcount=476523,op_r_prepare_latency.avgtime=0.000205302,op_r_prepare_latency.sum=97.831473175,op_r_process_latency.avgcount=476521,op_r_process_latency.avgtime=0.000139396,op_r_process_latency.sum=66.425498624,op_rw=2,op_rw_in_bytes=0,op_rw_latency.avgcount=2,op_rw_latency.avgtime=0.048818975,op_rw_latency.sum=0.097637951,op_rw_out_bytes=0,op_rw_prepare_latency.avgcount=2,op_rw_prepare_latency.avgtime=0.000467887,op_rw_prepare_latency.sum=0.000935775,op_rw_process_latency.avgcount=2,op_rw_process_latency.avgtime=0.013741256,op_rw_process_latency.sum=0.027482512,op_w=2,op_w_in_bytes=0,op_w_latency.avgcount=2,op_w_latency.avgtime=0.039133628,op_w_latency.sum=0.078267257,op_w_prepare_latency.avgcount=2,op_w_prepare_latency.avgtime=0.000985542,op_w_prepare_latency.sum=0.001971084,op_w_process_latency.avgcount=2,op_w_process_latency.avgtime=0.038933264,op_w_process_latency.sum=0.077866529,op_wip=0,osd_map_bl_cache_hit=22,osd_map_bl_cache_miss=40,osd_map_cache_hit=4570,osd_map_cache_miss=15,osd_map_cache_miss_low=0,osd_map_cache_miss_low_avg.avgcount=0,osd_map_cache_miss_low_avg.sum=0,osd_pg_biginfo=2050,osd_pg_fastinfo=265780,osd_pg_info=274542,osd_tier_flush_lat.avgcount=0,osd_tier_flush_lat.avgtime=0,osd_tier_flush_lat.sum=0,osd_tier_promote_lat.avgcount=0,osd_tier_promote_lat.avgtime=0,osd_tier_promote_lat.sum=0,osd_tier_r_lat.avgcount=0,osd_tier_r_lat.avgtime=0,osd_tier_r_lat.sum=0,pull=0,push=2,push_out_bytes=10,recovery_bytes=10,recovery_ops=2,stat_bytes=107369988096,stat_bytes_avail=106271539200,stat_bytes_used=1098448896,subop=253554,subop_in_bytes=168644225,subop_latency.avgcount=253554,subop_latency.avgtime=0.0073036,subop_latency.sum=1851.857230388,subop_pull=0,subop_pull_latency.avgcount=0,subop_pull_latency.avgtime=0,subop_pull_latency.sum=0,subop_push=0,subop_push_in_bytes=0,subop_push_latency.avgcount=0,subop_push_latency.avgtime=0,subop_push_latency.sum=0,subop_w=253554,subop_w_in_bytes=168644225,subop_w_latency.avgcount=253554,subop_w_latency.avgtime=0.0073036,subop_w_latency.sum=1851.857230388,tier_clean=0,tier_delay=0,tier_dirty=0,tier_evict=0,tier_flush=0,tier_flush_fail=0,tier_promote=0,tier_proxy_read=0,tier_proxy_write=0,tier_try_flush=0,tier_try_flush_fail=0,tier_whiteout=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-ms_objecter,host=stefanosd1,id=0,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-2,host=stefanosd1,id=0,type=osd msgr_active_connections=2055,msgr_created_connections=27411,msgr_recv_bytes=6431950009,msgr_recv_messages=3552443,msgr_running_fast_dispatch_time=162.271664213,msgr_running_recv_time=254.307853033,msgr_running_send_time=503.037285799,msgr_running_total_time=1130.21070681,msgr_send_bytes=10865436237,msgr_send_messages=3523374 1587117698000000000
> ceph,collection=bluestore,host=stefanosd1,id=0,type=osd bluestore_allocated=24641536,bluestore_blob_split=0,bluestore_blobs=88,bluestore_buffer_bytes=622592,bluestore_buffer_hit_bytes=160578,bluestore_buffer_miss_bytes=540236,bluestore_buffers=43,bluestore_compressed=0,bluestore_compressed_allocated=0,bluestore_compressed_original=0,bluestore_extent_compress=0,bluestore_extents=88,bluestore_fragmentation_micros=1,bluestore_gc_merged=0,bluestore_onode_hits=532102,bluestore_onode_misses=388,bluestore_onode_reshard=0,bluestore_onode_shard_hits=0,bluestore_onode_shard_misses=0,bluestore_onodes=380,bluestore_read_eio=0,bluestore_reads_with_retries=0,bluestore_stored=1987856,bluestore_txc=275707,bluestore_write_big=0,bluestore_write_big_blobs=0,bluestore_write_big_bytes=0,bluestore_write_small=60,bluestore_write_small_bytes=343843,bluestore_write_small_deferred=22,bluestore_write_small_new=38,bluestore_write_small_pre_read=22,bluestore_write_small_unused=0,commit_lat.avgcount=275707,commit_lat.avgtime=0.00699778,commit_lat.sum=1929.337103334,compress_lat.avgcount=0,compress_lat.avgtime=0,compress_lat.sum=0,compress_rejected_count=0,compress_success_count=0,csum_lat.avgcount=67,csum_lat.avgtime=0.000032601,csum_lat.sum=0.002184323,decompress_lat.avgcount=0,decompress_lat.avgtime=0,decompress_lat.sum=0,deferred_write_bytes=0,deferred_write_ops=0,kv_commit_lat.avgcount=268870,kv_commit_lat.avgtime=0.006365428,kv_commit_lat.sum=1711.472749866,kv_final_lat.avgcount=268867,kv_final_lat.avgtime=0.000043227,kv_final_lat.sum=11.622427109,kv_flush_lat.avgcount=268870,kv_flush_lat.avgtime=0.000000223,kv_flush_lat.sum=0.060141588,kv_sync_lat.avgcount=268870,kv_sync_lat.avgtime=0.006365652,kv_sync_lat.sum=1711.532891454,omap_lower_bound_lat.avgcount=2,omap_lower_bound_lat.avgtime=0.000006524,omap_lower_bound_lat.sum=0.000013048,omap_next_lat.avgcount=6704,omap_next_lat.avgtime=0.000004721,omap_next_lat.sum=0.031654097,omap_seek_to_first_lat.avgcount=323,omap_seek_to_first_lat.avgtime=0.00000522,omap_seek_to_first_lat.sum=0.00168614,omap_upper_bound_lat.avgcount=4,omap_upper_bound_lat.avgtime=0.000013086,omap_upper_bound_lat.sum=0.000052344,read_lat.avgcount=227,read_lat.avgtime=0.000699457,read_lat.sum=0.158776879,read_onode_meta_lat.avgcount=311,read_onode_meta_lat.avgtime=0.000072207,read_onode_meta_lat.sum=0.022456667,read_wait_aio_lat.avgcount=84,read_wait_aio_lat.avgtime=0.001556141,read_wait_aio_lat.sum=0.130715885,state_aio_wait_lat.avgcount=275707,state_aio_wait_lat.avgtime=0.000000345,state_aio_wait_lat.sum=0.095246457,state_deferred_aio_wait_lat.avgcount=0,state_deferred_aio_wait_lat.avgtime=0,state_deferred_aio_wait_lat.sum=0,state_deferred_cleanup_lat.avgcount=0,state_deferred_cleanup_lat.avgtime=0,state_deferred_cleanup_lat.sum=0,state_deferred_queued_lat.avgcount=0,state_deferred_queued_lat.avgtime=0,state_deferred_queued_lat.sum=0,state_done_lat.avgcount=275696,state_done_lat.avgtime=0.00000286,state_done_lat.sum=0.788700007,state_finishing_lat.avgcount=275696,state_finishing_lat.avgtime=0.000000302,state_finishing_lat.sum=0.083437168,state_io_done_lat.avgcount=275707,state_io_done_lat.avgtime=0.000001041,state_io_done_lat.sum=0.287025147,state_kv_commiting_lat.avgcount=275707,state_kv_commiting_lat.avgtime=0.006424459,state_kv_commiting_lat.sum=1771.268407864,state_kv_done_lat.avgcount=275707,state_kv_done_lat.avgtime=0.000001627,state_kv_done_lat.sum=0.448805853,state_kv_queued_lat.avgcount=275707,state_kv_queued_lat.avgtime=0.000488565,state_kv_queued_lat.sum=134.7009424,state_prepare_lat.avgcount=275707,state_prepare_lat.avgtime=0.000082464,state_prepare_lat.sum=22.736065534,submit_lat.avgcount=275707,submit_lat.avgtime=0.000120236,submit_lat.sum=33.149934412,throttle_lat.avgcount=275707,throttle_lat.avgtime=0.000001571,throttle_lat.sum=0.433185935,write_pad_bytes=151773,write_penalty_read_ops=0 1587117698000000000
> ceph,collection=finisher-objecter-finisher-0,host=stefanosd1,id=0,type=osd complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117698000000000
> ceph,collection=objecter,host=stefanosd1,id=0,type=osd command_active=0,command_resend=0,command_send=0,linger_active=0,linger_ping=0,linger_resend=0,linger_send=0,map_epoch=203,map_full=0,map_inc=19,omap_del=0,omap_rd=0,omap_wr=0,op=0,op_active=0,op_laggy=0,op_pg=0,op_r=0,op_reply=0,op_resend=0,op_rmw=0,op_send=0,op_send_bytes=0,op_w=0,osd_laggy=0,osd_session_close=0,osd_session_open=0,osd_sessions=0,osdop_append=0,osdop_call=0,osdop_clonerange=0,osdop_cmpxattr=0,osdop_create=0,osdop_delete=0,osdop_getxattr=0,osdop_mapext=0,osdop_notify=0,osdop_other=0,osdop_pgls=0,osdop_pgls_filter=0,osdop_read=0,osdop_resetxattrs=0,osdop_rmxattr=0,osdop_setxattr=0,osdop_sparse_read=0,osdop_src_cmpxattr=0,osdop_stat=0,osdop_truncate=0,osdop_watch=0,osdop_write=0,osdop_writefull=0,osdop_writesame=0,osdop_zero=0,poolop_active=0,poolop_resend=0,poolop_send=0,poolstat_active=0,poolstat_resend=0,poolstat_send=0,statfs_active=0,statfs_resend=0,statfs_send=0 1587117698000000000
> ceph,collection=finisher-commit_finisher,host=stefanosd1,id=0,type=osd complete_latency.avgcount=11,complete_latency.avgtime=0.003447516,complete_latency.sum=0.037922681,queue_len=0 1587117698000000000
> ceph,collection=throttle-objecter_ops,host=stefanosd1,id=0,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=1024,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-0,host=stefanosd1,id=0,type=osd msgr_active_connections=2128,msgr_created_connections=33685,msgr_recv_bytes=8679123051,msgr_recv_messages=4200356,msgr_running_fast_dispatch_time=151.889337454,msgr_running_recv_time=297.632294886,msgr_running_send_time=599.20020523,msgr_running_total_time=1321.361931202,msgr_send_bytes=11716202897,msgr_send_messages=4347418 1587117698000000000
> ceph,collection=throttle-osd_client_bytes,host=stefanosd1,id=0,type=osd get=476554,get_or_fail_fail=0,get_or_fail_success=476554,get_started=0,get_sum=103413728,max=524288000,put=476587,put_sum=103413728,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-bluestore_throttle_deferred_bytes,host=stefanosd1,id=0,type=osd get=11,get_or_fail_fail=0,get_or_fail_success=11,get_started=0,get_sum=7723117,max=201326592,put=0,put_sum=0,take=0,take_sum=0,val=7723117,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-cluster,host=stefanosd1,id=1,type=osd get=860895,get_or_fail_fail=0,get_or_fail_success=860895,get_started=0,get_sum=596482256,max=104857600,put=860895,put_sum=596482256,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-objecter_ops,host=stefanosd1,id=1,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=1024,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-objecter_bytes,host=stefanosd1,id=1,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=finisher-defered_finisher,host=stefanosd1,id=1,type=osd complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117698000000000
> ceph,collection=osd,host=stefanosd1,id=1,type=osd agent_evict=0,agent_flush=0,agent_skip=0,agent_wake=0,cached_crc=0,cached_crc_adjusted=0,copyfrom=0,heartbeat_to_peers=7,loadavg=11,map_message_epoch_dups=29,map_message_epochs=50,map_messages=39,messages_delayed_for_map=0,missed_crc=0,numpg=188,numpg_primary=71,numpg_removing=0,numpg_replica=117,numpg_stray=0,object_ctx_cache_hit=1349777,object_ctx_cache_total=2934118,op=1319230,op_before_dequeue_op_lat.avgcount=3792053,op_before_dequeue_op_lat.avgtime=0.000405802,op_before_dequeue_op_lat.sum=1538.826381623,op_before_queue_op_lat.avgcount=3778690,op_before_queue_op_lat.avgtime=0.000033273,op_before_queue_op_lat.sum=125.731131596,op_cache_hit=0,op_in_bytes=0,op_latency.avgcount=1319230,op_latency.avgtime=0.002858138,op_latency.sum=3770.541581676,op_out_bytes=1789210,op_prepare_latency.avgcount=1336472,op_prepare_latency.avgtime=0.000279458,op_prepare_latency.sum=373.488913339,op_process_latency.avgcount=1319230,op_process_latency.avgtime=0.002666408,op_process_latency.sum=3517.606407526,op_r=1075394,op_r_latency.avgcount=1075394,op_r_latency.avgtime=0.000303779,op_r_latency.sum=326.682443032,op_r_out_bytes=1789210,op_r_prepare_latency.avgcount=1075394,op_r_prepare_latency.avgtime=0.000171228,op_r_prepare_latency.sum=184.138580631,op_r_process_latency.avgcount=1075394,op_r_process_latency.avgtime=0.00011609,op_r_process_latency.sum=124.842894319,op_rw=243832,op_rw_in_bytes=0,op_rw_latency.avgcount=243832,op_rw_latency.avgtime=0.014123636,op_rw_latency.sum=3443.79445124,op_rw_out_bytes=0,op_rw_prepare_latency.avgcount=261072,op_rw_prepare_latency.avgtime=0.000725265,op_rw_prepare_latency.sum=189.346543463,op_rw_process_latency.avgcount=243832,op_rw_process_latency.avgtime=0.013914089,op_rw_process_latency.sum=3392.700241086,op_w=4,op_w_in_bytes=0,op_w_latency.avgcount=4,op_w_latency.avgtime=0.016171851,op_w_latency.sum=0.064687404,op_w_prepare_latency.avgcount=6,op_w_prepare_latency.avgtime=0.00063154,op_w_prepare_latency.sum=0.003789245,op_w_process_latency.avgcount=4,op_w_process_latency.avgtime=0.01581803,op_w_process_latency.sum=0.063272121,op_wip=0,osd_map_bl_cache_hit=36,osd_map_bl_cache_miss=40,osd_map_cache_hit=5404,osd_map_cache_miss=14,osd_map_cache_miss_low=0,osd_map_cache_miss_low_avg.avgcount=0,osd_map_cache_miss_low_avg.sum=0,osd_pg_biginfo=2333,osd_pg_fastinfo=576157,osd_pg_info=591751,osd_tier_flush_lat.avgcount=0,osd_tier_flush_lat.avgtime=0,osd_tier_flush_lat.sum=0,osd_tier_promote_lat.avgcount=0,osd_tier_promote_lat.avgtime=0,osd_tier_promote_lat.sum=0,osd_tier_r_lat.avgcount=0,osd_tier_r_lat.avgtime=0,osd_tier_r_lat.sum=0,pull=0,push=22,push_out_bytes=0,recovery_bytes=0,recovery_ops=21,stat_bytes=107369988096,stat_bytes_avail=106271997952,stat_bytes_used=1097990144,subop=306946,subop_in_bytes=204236742,subop_latency.avgcount=306946,subop_latency.avgtime=0.006744881,subop_latency.sum=2070.314452989,subop_pull=0,subop_pull_latency.avgcount=0,subop_pull_latency.avgtime=0,subop_pull_latency.sum=0,subop_push=0,subop_push_in_bytes=0,subop_push_latency.avgcount=0,subop_push_latency.avgtime=0,subop_push_latency.sum=0,subop_w=306946,subop_w_in_bytes=204236742,subop_w_latency.avgcount=306946,subop_w_latency.avgtime=0.006744881,subop_w_latency.sum=2070.314452989,tier_clean=0,tier_delay=0,tier_dirty=8,tier_evict=0,tier_flush=0,tier_flush_fail=0,tier_promote=0,tier_proxy_read=0,tier_proxy_write=0,tier_try_flush=0,tier_try_flush_fail=0,tier_whiteout=0 1587117698000000000
> ceph,collection=objecter,host=stefanosd1,id=1,type=osd command_active=0,command_resend=0,command_send=0,linger_active=0,linger_ping=0,linger_resend=0,linger_send=0,map_epoch=203,map_full=0,map_inc=19,omap_del=0,omap_rd=0,omap_wr=0,op=0,op_active=0,op_laggy=0,op_pg=0,op_r=0,op_reply=0,op_resend=0,op_rmw=0,op_send=0,op_send_bytes=0,op_w=0,osd_laggy=0,osd_session_close=0,osd_session_open=0,osd_sessions=0,osdop_append=0,osdop_call=0,osdop_clonerange=0,osdop_cmpxattr=0,osdop_create=0,osdop_delete=0,osdop_getxattr=0,osdop_mapext=0,osdop_notify=0,osdop_other=0,osdop_pgls=0,osdop_pgls_filter=0,osdop_read=0,osdop_resetxattrs=0,osdop_rmxattr=0,osdop_setxattr=0,osdop_sparse_read=0,osdop_src_cmpxattr=0,osdop_stat=0,osdop_truncate=0,osdop_watch=0,osdop_write=0,osdop_writefull=0,osdop_writesame=0,osdop_zero=0,poolop_active=0,poolop_resend=0,poolop_send=0,poolstat_active=0,poolstat_resend=0,poolstat_send=0,statfs_active=0,statfs_resend=0,statfs_send=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-0,host=stefanosd1,id=1,type=osd msgr_active_connections=1356,msgr_created_connections=12290,msgr_recv_bytes=8577187219,msgr_recv_messages=6387040,msgr_running_fast_dispatch_time=475.903632306,msgr_running_recv_time=425.937196699,msgr_running_send_time=783.676217521,msgr_running_total_time=1989.242459076,msgr_send_bytes=12583034449,msgr_send_messages=6074344 1587117698000000000
> ceph,collection=bluestore,host=stefanosd1,id=1,type=osd bluestore_allocated=24182784,bluestore_blob_split=0,bluestore_blobs=88,bluestore_buffer_bytes=614400,bluestore_buffer_hit_bytes=142047,bluestore_buffer_miss_bytes=541480,bluestore_buffers=41,bluestore_compressed=0,bluestore_compressed_allocated=0,bluestore_compressed_original=0,bluestore_extent_compress=0,bluestore_extents=88,bluestore_fragmentation_micros=1,bluestore_gc_merged=0,bluestore_onode_hits=1403948,bluestore_onode_misses=1584732,bluestore_onode_reshard=0,bluestore_onode_shard_hits=0,bluestore_onode_shard_misses=0,bluestore_onodes=459,bluestore_read_eio=0,bluestore_reads_with_retries=0,bluestore_stored=1985647,bluestore_txc=593150,bluestore_write_big=0,bluestore_write_big_blobs=0,bluestore_write_big_bytes=0,bluestore_write_small=58,bluestore_write_small_bytes=343091,bluestore_write_small_deferred=20,bluestore_write_small_new=38,bluestore_write_small_pre_read=20,bluestore_write_small_unused=0,commit_lat.avgcount=593150,commit_lat.avgtime=0.006514834,commit_lat.sum=3864.274280733,compress_lat.avgcount=0,compress_lat.avgtime=0,compress_lat.sum=0,compress_rejected_count=0,compress_success_count=0,csum_lat.avgcount=60,csum_lat.avgtime=0.000028258,csum_lat.sum=0.001695512,decompress_lat.avgcount=0,decompress_lat.avgtime=0,decompress_lat.sum=0,deferred_write_bytes=0,deferred_write_ops=0,kv_commit_lat.avgcount=578129,kv_commit_lat.avgtime=0.00570707,kv_commit_lat.sum=3299.423186928,kv_final_lat.avgcount=578124,kv_final_lat.avgtime=0.000042752,kv_final_lat.sum=24.716171934,kv_flush_lat.avgcount=578129,kv_flush_lat.avgtime=0.000000209,kv_flush_lat.sum=0.121169044,kv_sync_lat.avgcount=578129,kv_sync_lat.avgtime=0.00570728,kv_sync_lat.sum=3299.544355972,omap_lower_bound_lat.avgcount=22,omap_lower_bound_lat.avgtime=0.000005979,omap_lower_bound_lat.sum=0.000131539,omap_next_lat.avgcount=13248,omap_next_lat.avgtime=0.000004836,omap_next_lat.sum=0.064077797,omap_seek_to_first_lat.avgcount=525,omap_seek_to_first_lat.avgtime=0.000004906,omap_seek_to_first_lat.sum=0.002575786,omap_upper_bound_lat.avgcount=0,omap_upper_bound_lat.avgtime=0,omap_upper_bound_lat.sum=0,read_lat.avgcount=406,read_lat.avgtime=0.000383254,read_lat.sum=0.155601529,read_onode_meta_lat.avgcount=483,read_onode_meta_lat.avgtime=0.000008805,read_onode_meta_lat.sum=0.004252832,read_wait_aio_lat.avgcount=77,read_wait_aio_lat.avgtime=0.001907361,read_wait_aio_lat.sum=0.146866799,state_aio_wait_lat.avgcount=593150,state_aio_wait_lat.avgtime=0.000000388,state_aio_wait_lat.sum=0.230498048,state_deferred_aio_wait_lat.avgcount=0,state_deferred_aio_wait_lat.avgtime=0,state_deferred_aio_wait_lat.sum=0,state_deferred_cleanup_lat.avgcount=0,state_deferred_cleanup_lat.avgtime=0,state_deferred_cleanup_lat.sum=0,state_deferred_queued_lat.avgcount=0,state_deferred_queued_lat.avgtime=0,state_deferred_queued_lat.sum=0,state_done_lat.avgcount=593140,state_done_lat.avgtime=0.000003048,state_done_lat.sum=1.80789161,state_finishing_lat.avgcount=593140,state_finishing_lat.avgtime=0.000000325,state_finishing_lat.sum=0.192952339,state_io_done_lat.avgcount=593150,state_io_done_lat.avgtime=0.000001202,state_io_done_lat.sum=0.713333116,state_kv_commiting_lat.avgcount=593150,state_kv_commiting_lat.avgtime=0.005788541,state_kv_commiting_lat.sum=3433.473378536,state_kv_done_lat.avgcount=593150,state_kv_done_lat.avgtime=0.000001472,state_kv_done_lat.sum=0.873559611,state_kv_queued_lat.avgcount=593150,state_kv_queued_lat.avgtime=0.000634215,state_kv_queued_lat.sum=376.18491577,state_prepare_lat.avgcount=593150,state_prepare_lat.avgtime=0.000089694,state_prepare_lat.sum=53.202464675,submit_lat.avgcount=593150,submit_lat.avgtime=0.000127856,submit_lat.sum=75.83816759,throttle_lat.avgcount=593150,throttle_lat.avgtime=0.000001726,throttle_lat.sum=1.023832181,write_pad_bytes=144333,write_penalty_read_ops=0 1587117698000000000
> ceph,collection=throttle-osd_client_bytes,host=stefanosd1,id=1,type=osd get=2920772,get_or_fail_fail=0,get_or_fail_success=2920772,get_started=0,get_sum=739935873,max=524288000,put=4888498,put_sum=739935873,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_front_client,host=stefanosd1,id=1,type=osd get=2605442,get_or_fail_fail=0,get_or_fail_success=2605442,get_started=0,get_sum=5221305768,max=104857600,put=2605442,put_sum=5221305768,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-2,host=stefanosd1,id=1,type=osd msgr_active_connections=1375,msgr_created_connections=12689,msgr_recv_bytes=6393440855,msgr_recv_messages=3260458,msgr_running_fast_dispatch_time=120.622437418,msgr_running_recv_time=225.24709441,msgr_running_send_time=499.150587343,msgr_running_total_time=1043.340296846,msgr_send_bytes=11134862571,msgr_send_messages=3450760 1587117698000000000
> ceph,collection=bluefs,host=stefanosd1,id=1,type=osd bytes_written_slow=0,bytes_written_sst=19824993,bytes_written_wal=1788507023,db_total_bytes=4294967296,db_used_bytes=522190848,files_written_sst=4,files_written_wal=2,gift_bytes=0,log_bytes=1056768,log_compactions=2,logged_bytes=1933271040,max_bytes_db=1483735040,max_bytes_slow=0,max_bytes_wal=0,num_files=12,reclaim_bytes=0,slow_total_bytes=0,slow_used_bytes=0,wal_total_bytes=0,wal_used_bytes=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_back_client,host=stefanosd1,id=1,type=osd get=2605442,get_or_fail_fail=0,get_or_fail_success=2605442,get_started=0,get_sum=5221305768,max=104857600,put=2605442,put_sum=5221305768,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-bluestore_throttle_deferred_bytes,host=stefanosd1,id=1,type=osd get=10,get_or_fail_fail=0,get_or_fail_success=10,get_started=0,get_sum=7052009,max=201326592,put=0,put_sum=0,take=0,take_sum=0,val=7052009,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=rocksdb,host=stefanosd1,id=1,type=osd compact=0,compact_queue_len=0,compact_queue_merge=0,compact_range=0,get=1586061,get_latency.avgcount=1586061,get_latency.avgtime=0.000083009,get_latency.sum=131.658296684,rocksdb_write_delay_time.avgcount=0,rocksdb_write_delay_time.avgtime=0,rocksdb_write_delay_time.sum=0,rocksdb_write_memtable_time.avgcount=0,rocksdb_write_memtable_time.avgtime=0,rocksdb_write_memtable_time.sum=0,rocksdb_write_pre_and_post_time.avgcount=0,rocksdb_write_pre_and_post_time.avgtime=0,rocksdb_write_pre_and_post_time.sum=0,rocksdb_write_wal_time.avgcount=0,rocksdb_write_wal_time.avgtime=0,rocksdb_write_wal_time.sum=0,submit_latency.avgcount=593150,submit_latency.avgtime=0.000172072,submit_latency.sum=102.064900673,submit_sync_latency.avgcount=578129,submit_sync_latency.avgtime=0.005447017,submit_sync_latency.sum=3149.078822012,submit_transaction=593150,submit_transaction_sync=578129 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_back_server,host=stefanosd1,id=1,type=osd get=2607669,get_or_fail_fail=0,get_or_fail_success=2607669,get_started=0,get_sum=5225768676,max=104857600,put=2607669,put_sum=5225768676,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=recoverystate_perf,host=stefanosd1,id=1,type=osd activating_latency.avgcount=104,activating_latency.avgtime=0.071646485,activating_latency.sum=7.451234493,active_latency.avgcount=33,active_latency.avgtime=1734.369034268,active_latency.sum=57234.178130859,backfilling_latency.avgcount=1,backfilling_latency.avgtime=2.598401698,backfilling_latency.sum=2.598401698,clean_latency.avgcount=33,clean_latency.avgtime=1734.213467342,clean_latency.sum=57229.044422292,down_latency.avgcount=0,down_latency.avgtime=0,down_latency.sum=0,getinfo_latency.avgcount=167,getinfo_latency.avgtime=0.373444627,getinfo_latency.sum=62.365252849,getlog_latency.avgcount=105,getlog_latency.avgtime=0.003575062,getlog_latency.sum=0.375381569,getmissing_latency.avgcount=104,getmissing_latency.avgtime=0.000157091,getmissing_latency.sum=0.016337565,incomplete_latency.avgcount=0,incomplete_latency.avgtime=0,incomplete_latency.sum=0,initial_latency.avgcount=188,initial_latency.avgtime=0.001833512,initial_latency.sum=0.344700343,notbackfilling_latency.avgcount=0,notbackfilling_latency.avgtime=0,notbackfilling_latency.sum=0,notrecovering_latency.avgcount=0,notrecovering_latency.avgtime=0,notrecovering_latency.sum=0,peering_latency.avgcount=167,peering_latency.avgtime=1.501818082,peering_latency.sum=250.803619796,primary_latency.avgcount=97,primary_latency.avgtime=591.344286378,primary_latency.sum=57360.395778762,recovered_latency.avgcount=104,recovered_latency.avgtime=0.000291138,recovered_latency.sum=0.030278433,recovering_latency.avgcount=2,recovering_latency.avgtime=0.142378096,recovering_latency.sum=0.284756192,replicaactive_latency.avgcount=32,replicaactive_latency.avgtime=1788.474901442,replicaactive_latency.sum=57231.196846165,repnotrecovering_latency.avgcount=34,repnotrecovering_latency.avgtime=1683.273587087,repnotrecovering_latency.sum=57231.301960987,reprecovering_latency.avgcount=2,reprecovering_latency.avgtime=0.418094818,reprecovering_latency.sum=0.836189637,repwaitbackfillreserved_latency.avgcount=0,repwaitbackfillreserved_latency.avgtime=0,repwaitbackfillreserved_latency.sum=0,repwaitrecoveryreserved_latency.avgcount=2,repwaitrecoveryreserved_latency.avgtime=0.000588413,repwaitrecoveryreserved_latency.sum=0.001176827,reset_latency.avgcount=433,reset_latency.avgtime=0.15669689,reset_latency.sum=67.849753631,start_latency.avgcount=433,start_latency.avgtime=0.000412707,start_latency.sum=0.178702508,started_latency.avgcount=245,started_latency.avgtime=468.419544137,started_latency.sum=114762.788313581,stray_latency.avgcount=266,stray_latency.avgtime=1.489291271,stray_latency.sum=396.151478238,waitactingchange_latency.avgcount=1,waitactingchange_latency.avgtime=0.982689906,waitactingchange_latency.sum=0.982689906,waitlocalbackfillreserved_latency.avgcount=1,waitlocalbackfillreserved_latency.avgtime=0.000542092,waitlocalbackfillreserved_latency.sum=0.000542092,waitlocalrecoveryreserved_latency.avgcount=2,waitlocalrecoveryreserved_latency.avgtime=0.00391669,waitlocalrecoveryreserved_latency.sum=0.007833381,waitremotebackfillreserved_latency.avgcount=1,waitremotebackfillreserved_latency.avgtime=0.003110409,waitremotebackfillreserved_latency.sum=0.003110409,waitremoterecoveryreserved_latency.avgcount=2,waitremoterecoveryreserved_latency.avgtime=0.012229338,waitremoterecoveryreserved_latency.sum=0.024458677,waitupthru_latency.avgcount=104,waitupthru_latency.avgtime=1.807608905,waitupthru_latency.sum=187.991326197 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-1,host=stefanosd1,id=1,type=osd msgr_active_connections=1289,msgr_created_connections=9469,msgr_recv_bytes=8348149800,msgr_recv_messages=5048791,msgr_running_fast_dispatch_time=313.754567889,msgr_running_recv_time=372.054833029,msgr_running_send_time=694.900405016,msgr_running_total_time=1656.294769387,msgr_send_bytes=11550148208,msgr_send_messages=5175962 1587117698000000000
> ceph,collection=throttle-bluestore_throttle_bytes,host=stefanosd1,id=1,type=osd get=593150,get_or_fail_fail=0,get_or_fail_success=0,get_started=593150,get_sum=398147414260,max=67108864,put=578129,put_sum=398147414260,take=0,take_sum=0,val=0,wait.avgcount=29,wait.avgtime=0.000972655,wait.sum=0.028207005 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-ms_objecter,host=stefanosd1,id=1,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=cct,host=stefanosd1,id=1,type=osd total_workers=6,unhealthy_workers=0 1587117698000000000
> ceph,collection=mempool,host=stefanosd1,id=1,type=osd bloom_filter_bytes=0,bloom_filter_items=0,bluefs_bytes=13064,bluefs_items=593,bluestore_alloc_bytes=230288,bluestore_alloc_items=28786,bluestore_cache_data_bytes=614400,bluestore_cache_data_items=41,bluestore_cache_onode_bytes=301104,bluestore_cache_onode_items=459,bluestore_cache_other_bytes=230945,bluestore_cache_other_items=26119,bluestore_fsck_bytes=0,bluestore_fsck_items=0,bluestore_txc_bytes=7520,bluestore_txc_items=10,bluestore_writing_bytes=0,bluestore_writing_deferred_bytes=657768,bluestore_writing_deferred_items=172,bluestore_writing_items=0,buffer_anon_bytes=2328515,buffer_anon_items=271,buffer_meta_bytes=5808,buffer_meta_items=66,mds_co_bytes=0,mds_co_items=0,osd_bytes=2406400,osd_items=188,osd_mapbl_bytes=139623,osd_mapbl_items=9,osd_pglog_bytes=6768784,osd_pglog_items=18179,osdmap_bytes=710892,osdmap_items=4426,osdmap_mapping_bytes=0,osdmap_mapping_items=0,pgmap_bytes=0,pgmap_items=0,unittest_1_bytes=0,unittest_1_items=0,unittest_2_bytes=0,unittest_2_items=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-client,host=stefanosd1,id=1,type=osd get=2932513,get_or_fail_fail=0,get_or_fail_success=2932513,get_started=0,get_sum=740620215,max=104857600,put=2932513,put_sum=740620215,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_front_server,host=stefanosd1,id=1,type=osd get=2607669,get_or_fail_fail=0,get_or_fail_success=2607669,get_started=0,get_sum=5225768676,max=104857600,put=2607669,put_sum=5225768676,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=finisher-commit_finisher,host=stefanosd1,id=1,type=osd complete_latency.avgcount=10,complete_latency.avgtime=0.002884646,complete_latency.sum=0.028846469,queue_len=0 1587117698000000000
> ceph,collection=finisher-objecter-finisher-0,host=stefanosd1,id=1,type=osd complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117698000000000
> ceph,collection=throttle-objecter_bytes,host=stefanosd1,id=2,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=finisher-commit_finisher,host=stefanosd1,id=2,type=osd complete_latency.avgcount=11,complete_latency.avgtime=0.002714416,complete_latency.sum=0.029858583,queue_len=0 1587117698000000000
> ceph,collection=finisher-defered_finisher,host=stefanosd1,id=2,type=osd complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117698000000000
> ceph,collection=objecter,host=stefanosd1,id=2,type=osd command_active=0,command_resend=0,command_send=0,linger_active=0,linger_ping=0,linger_resend=0,linger_send=0,map_epoch=203,map_full=0,map_inc=19,omap_del=0,omap_rd=0,omap_wr=0,op=0,op_active=0,op_laggy=0,op_pg=0,op_r=0,op_reply=0,op_resend=0,op_rmw=0,op_send=0,op_send_bytes=0,op_w=0,osd_laggy=0,osd_session_close=0,osd_session_open=0,osd_sessions=0,osdop_append=0,osdop_call=0,osdop_clonerange=0,osdop_cmpxattr=0,osdop_create=0,osdop_delete=0,osdop_getxattr=0,osdop_mapext=0,osdop_notify=0,osdop_other=0,osdop_pgls=0,osdop_pgls_filter=0,osdop_read=0,osdop_resetxattrs=0,osdop_rmxattr=0,osdop_setxattr=0,osdop_sparse_read=0,osdop_src_cmpxattr=0,osdop_stat=0,osdop_truncate=0,osdop_watch=0,osdop_write=0,osdop_writefull=0,osdop_writesame=0,osdop_zero=0,poolop_active=0,poolop_resend=0,poolop_send=0,poolstat_active=0,poolstat_resend=0,poolstat_send=0,statfs_active=0,statfs_resend=0,statfs_send=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_back_client,host=stefanosd1,id=2,type=osd get=2607136,get_or_fail_fail=0,get_or_fail_success=2607136,get_started=0,get_sum=5224700544,max=104857600,put=2607136,put_sum=5224700544,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=mempool,host=stefanosd1,id=2,type=osd bloom_filter_bytes=0,bloom_filter_items=0,bluefs_bytes=11624,bluefs_items=522,bluestore_alloc_bytes=230288,bluestore_alloc_items=28786,bluestore_cache_data_bytes=614400,bluestore_cache_data_items=41,bluestore_cache_onode_bytes=228288,bluestore_cache_onode_items=348,bluestore_cache_other_bytes=174158,bluestore_cache_other_items=18527,bluestore_fsck_bytes=0,bluestore_fsck_items=0,bluestore_txc_bytes=8272,bluestore_txc_items=11,bluestore_writing_bytes=0,bluestore_writing_deferred_bytes=670130,bluestore_writing_deferred_items=176,bluestore_writing_items=0,buffer_anon_bytes=2311664,buffer_anon_items=244,buffer_meta_bytes=5456,buffer_meta_items=62,mds_co_bytes=0,mds_co_items=0,osd_bytes=1920000,osd_items=150,osd_mapbl_bytes=155152,osd_mapbl_items=10,osd_pglog_bytes=3393520,osd_pglog_items=9128,osdmap_bytes=710892,osdmap_items=4426,osdmap_mapping_bytes=0,osdmap_mapping_items=0,pgmap_bytes=0,pgmap_items=0,unittest_1_bytes=0,unittest_1_items=0,unittest_2_bytes=0,unittest_2_items=0 1587117698000000000
> ceph,collection=osd,host=stefanosd1,id=2,type=osd agent_evict=0,agent_flush=0,agent_skip=0,agent_wake=0,cached_crc=0,cached_crc_adjusted=0,copyfrom=0,heartbeat_to_peers=7,loadavg=11,map_message_epoch_dups=37,map_message_epochs=56,map_messages=37,messages_delayed_for_map=0,missed_crc=0,numpg=150,numpg_primary=59,numpg_removing=0,numpg_replica=91,numpg_stray=0,object_ctx_cache_hit=705923,object_ctx_cache_total=705951,op=690584,op_before_dequeue_op_lat.avgcount=1155697,op_before_dequeue_op_lat.avgtime=0.000217926,op_before_dequeue_op_lat.sum=251.856487141,op_before_queue_op_lat.avgcount=1148445,op_before_queue_op_lat.avgtime=0.000039696,op_before_queue_op_lat.sum=45.589516462,op_cache_hit=0,op_in_bytes=0,op_latency.avgcount=690584,op_latency.avgtime=0.002488685,op_latency.sum=1718.646504654,op_out_bytes=1026000,op_prepare_latency.avgcount=698700,op_prepare_latency.avgtime=0.000300375,op_prepare_latency.sum=209.872029659,op_process_latency.avgcount=690584,op_process_latency.avgtime=0.00230742,op_process_latency.sum=1593.46739165,op_r=548020,op_r_latency.avgcount=548020,op_r_latency.avgtime=0.000298287,op_r_latency.sum=163.467760649,op_r_out_bytes=1026000,op_r_prepare_latency.avgcount=548020,op_r_prepare_latency.avgtime=0.000186359,op_r_prepare_latency.sum=102.128629183,op_r_process_latency.avgcount=548020,op_r_process_latency.avgtime=0.00012716,op_r_process_latency.sum=69.686468884,op_rw=142562,op_rw_in_bytes=0,op_rw_latency.avgcount=142562,op_rw_latency.avgtime=0.010908597,op_rw_latency.sum=1555.151525732,op_rw_out_bytes=0,op_rw_prepare_latency.avgcount=150678,op_rw_prepare_latency.avgtime=0.000715043,op_rw_prepare_latency.sum=107.741399304,op_rw_process_latency.avgcount=142562,op_rw_process_latency.avgtime=0.01068836,op_rw_process_latency.sum=1523.754107887,op_w=2,op_w_in_bytes=0,op_w_latency.avgcount=2,op_w_latency.avgtime=0.013609136,op_w_latency.sum=0.027218273,op_w_prepare_latency.avgcount=2,op_w_prepare_latency.avgtime=0.001000586,op_w_prepare_latency.sum=0.002001172,op_w_process_latency.avgcount=2,op_w_process_latency.avgtime=0.013407439,op_w_process_latency.sum=0.026814879,op_wip=0,osd_map_bl_cache_hit=15,osd_map_bl_cache_miss=41,osd_map_cache_hit=4241,osd_map_cache_miss=14,osd_map_cache_miss_low=0,osd_map_cache_miss_low_avg.avgcount=0,osd_map_cache_miss_low_avg.sum=0,osd_pg_biginfo=1824,osd_pg_fastinfo=285998,osd_pg_info=294869,osd_tier_flush_lat.avgcount=0,osd_tier_flush_lat.avgtime=0,osd_tier_flush_lat.sum=0,osd_tier_promote_lat.avgcount=0,osd_tier_promote_lat.avgtime=0,osd_tier_promote_lat.sum=0,osd_tier_r_lat.avgcount=0,osd_tier_r_lat.avgtime=0,osd_tier_r_lat.sum=0,pull=0,push=1,push_out_bytes=0,recovery_bytes=0,recovery_ops=0,stat_bytes=107369988096,stat_bytes_avail=106271932416,stat_bytes_used=1098055680,subop=134165,subop_in_bytes=89501237,subop_latency.avgcount=134165,subop_latency.avgtime=0.007313523,subop_latency.sum=981.218888627,subop_pull=0,subop_pull_latency.avgcount=0,subop_pull_latency.avgtime=0,subop_pull_latency.sum=0,subop_push=0,subop_push_in_bytes=0,subop_push_latency.avgcount=0,subop_push_latency.avgtime=0,subop_push_latency.sum=0,subop_w=134165,subop_w_in_bytes=89501237,subop_w_latency.avgcount=134165,subop_w_latency.avgtime=0.007313523,subop_w_latency.sum=981.218888627,tier_clean=0,tier_delay=0,tier_dirty=4,tier_evict=0,tier_flush=0,tier_flush_fail=0,tier_promote=0,tier_proxy_read=0,tier_proxy_write=0,tier_try_flush=0,tier_try_flush_fail=0,tier_whiteout=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-1,host=stefanosd1,id=2,type=osd msgr_active_connections=746,msgr_created_connections=15212,msgr_recv_bytes=8633229006,msgr_recv_messages=4284202,msgr_running_fast_dispatch_time=153.820479102,msgr_running_recv_time=282.031655658,msgr_running_send_time=585.444749736,msgr_running_total_time=1231.431789242,msgr_send_bytes=11962769351,msgr_send_messages=4440622 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-ms_objecter,host=stefanosd1,id=2,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_front_client,host=stefanosd1,id=2,type=osd get=2607136,get_or_fail_fail=0,get_or_fail_success=2607136,get_started=0,get_sum=5224700544,max=104857600,put=2607136,put_sum=5224700544,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=bluefs,host=stefanosd1,id=2,type=osd bytes_written_slow=0,bytes_written_sst=9065815,bytes_written_wal=901884611,db_total_bytes=4294967296,db_used_bytes=546308096,files_written_sst=3,files_written_wal=2,gift_bytes=0,log_bytes=225726464,log_compactions=1,logged_bytes=1195945984,max_bytes_db=1234173952,max_bytes_slow=0,max_bytes_wal=0,num_files=11,reclaim_bytes=0,slow_total_bytes=0,slow_used_bytes=0,wal_total_bytes=0,wal_used_bytes=0 1587117698000000000
> ceph,collection=recoverystate_perf,host=stefanosd1,id=2,type=osd activating_latency.avgcount=88,activating_latency.avgtime=0.086149065,activating_latency.sum=7.581117751,active_latency.avgcount=29,active_latency.avgtime=1790.849396082,active_latency.sum=51934.632486379,backfilling_latency.avgcount=0,backfilling_latency.avgtime=0,backfilling_latency.sum=0,clean_latency.avgcount=29,clean_latency.avgtime=1790.754765195,clean_latency.sum=51931.888190683,down_latency.avgcount=0,down_latency.avgtime=0,down_latency.sum=0,getinfo_latency.avgcount=134,getinfo_latency.avgtime=0.427567953,getinfo_latency.sum=57.294105786,getlog_latency.avgcount=88,getlog_latency.avgtime=0.011810192,getlog_latency.sum=1.03929697,getmissing_latency.avgcount=88,getmissing_latency.avgtime=0.000104598,getmissing_latency.sum=0.009204673,incomplete_latency.avgcount=0,incomplete_latency.avgtime=0,incomplete_latency.sum=0,initial_latency.avgcount=150,initial_latency.avgtime=0.001251361,initial_latency.sum=0.187704197,notbackfilling_latency.avgcount=0,notbackfilling_latency.avgtime=0,notbackfilling_latency.sum=0,notrecovering_latency.avgcount=0,notrecovering_latency.avgtime=0,notrecovering_latency.sum=0,peering_latency.avgcount=134,peering_latency.avgtime=0.998405763,peering_latency.sum=133.786372331,primary_latency.avgcount=75,primary_latency.avgtime=693.473306562,primary_latency.sum=52010.497992212,recovered_latency.avgcount=88,recovered_latency.avgtime=0.000609715,recovered_latency.sum=0.053654964,recovering_latency.avgcount=1,recovering_latency.avgtime=0.100713031,recovering_latency.sum=0.100713031,replicaactive_latency.avgcount=21,replicaactive_latency.avgtime=1790.852354921,replicaactive_latency.sum=37607.89945336,repnotrecovering_latency.avgcount=21,repnotrecovering_latency.avgtime=1790.852315529,repnotrecovering_latency.sum=37607.898626121,reprecovering_latency.avgcount=0,reprecovering_latency.avgtime=0,reprecovering_latency.sum=0,repwaitbackfillreserved_latency.avgcount=0,repwaitbackfillreserved_latency.avgtime=0,repwaitbackfillreserved_latency.sum=0,repwaitrecoveryreserved_latency.avgcount=0,repwaitrecoveryreserved_latency.avgtime=0,repwaitrecoveryreserved_latency.sum=0,reset_latency.avgcount=346,reset_latency.avgtime=0.126826803,reset_latency.sum=43.882073917,start_latency.avgcount=346,start_latency.avgtime=0.000233277,start_latency.sum=0.080713962,started_latency.avgcount=196,started_latency.avgtime=457.885378797,started_latency.sum=89745.534244237,stray_latency.avgcount=212,stray_latency.avgtime=1.013774396,stray_latency.sum=214.920172121,waitactingchange_latency.avgcount=0,waitactingchange_latency.avgtime=0,waitactingchange_latency.sum=0,waitlocalbackfillreserved_latency.avgcount=0,waitlocalbackfillreserved_latency.avgtime=0,waitlocalbackfillreserved_latency.sum=0,waitlocalrecoveryreserved_latency.avgcount=1,waitlocalrecoveryreserved_latency.avgtime=0.001572379,waitlocalrecoveryreserved_latency.sum=0.001572379,waitremotebackfillreserved_latency.avgcount=0,waitremotebackfillreserved_latency.avgtime=0,waitremotebackfillreserved_latency.sum=0,waitremoterecoveryreserved_latency.avgcount=1,waitremoterecoveryreserved_latency.avgtime=0.012729633,waitremoterecoveryreserved_latency.sum=0.012729633,waitupthru_latency.avgcount=88,waitupthru_latency.avgtime=0.857137729,waitupthru_latency.sum=75.428120205 1587117698000000000
> ceph,collection=throttle-objecter_ops,host=stefanosd1,id=2,type=osd get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=1024,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=bluestore,host=stefanosd1,id=2,type=osd bluestore_allocated=24248320,bluestore_blob_split=0,bluestore_blobs=83,bluestore_buffer_bytes=614400,bluestore_buffer_hit_bytes=161362,bluestore_buffer_miss_bytes=534799,bluestore_buffers=41,bluestore_compressed=0,bluestore_compressed_allocated=0,bluestore_compressed_original=0,bluestore_extent_compress=0,bluestore_extents=83,bluestore_fragmentation_micros=1,bluestore_gc_merged=0,bluestore_onode_hits=723852,bluestore_onode_misses=364,bluestore_onode_reshard=0,bluestore_onode_shard_hits=0,bluestore_onode_shard_misses=0,bluestore_onodes=348,bluestore_read_eio=0,bluestore_reads_with_retries=0,bluestore_stored=1984402,bluestore_txc=295997,bluestore_write_big=0,bluestore_write_big_blobs=0,bluestore_write_big_bytes=0,bluestore_write_small=60,bluestore_write_small_bytes=343843,bluestore_write_small_deferred=22,bluestore_write_small_new=38,bluestore_write_small_pre_read=22,bluestore_write_small_unused=0,commit_lat.avgcount=295997,commit_lat.avgtime=0.006994931,commit_lat.sum=2070.478673619,compress_lat.avgcount=0,compress_lat.avgtime=0,compress_lat.sum=0,compress_rejected_count=0,compress_success_count=0,csum_lat.avgcount=47,csum_lat.avgtime=0.000034434,csum_lat.sum=0.001618423,decompress_lat.avgcount=0,decompress_lat.avgtime=0,decompress_lat.sum=0,deferred_write_bytes=0,deferred_write_ops=0,kv_commit_lat.avgcount=291889,kv_commit_lat.avgtime=0.006347015,kv_commit_lat.sum=1852.624108527,kv_final_lat.avgcount=291885,kv_final_lat.avgtime=0.00004358,kv_final_lat.sum=12.720529751,kv_flush_lat.avgcount=291889,kv_flush_lat.avgtime=0.000000211,kv_flush_lat.sum=0.061636079,kv_sync_lat.avgcount=291889,kv_sync_lat.avgtime=0.006347227,kv_sync_lat.sum=1852.685744606,omap_lower_bound_lat.avgcount=1,omap_lower_bound_lat.avgtime=0.000004482,omap_lower_bound_lat.sum=0.000004482,omap_next_lat.avgcount=6933,omap_next_lat.avgtime=0.000003956,omap_next_lat.sum=0.027427456,omap_seek_to_first_lat.avgcount=309,omap_seek_to_first_lat.avgtime=0.000005879,omap_seek_to_first_lat.sum=0.001816658,omap_upper_bound_lat.avgcount=0,omap_upper_bound_lat.avgtime=0,omap_upper_bound_lat.sum=0,read_lat.avgcount=229,read_lat.avgtime=0.000394981,read_lat.sum=0.090450704,read_onode_meta_lat.avgcount=295,read_onode_meta_lat.avgtime=0.000016832,read_onode_meta_lat.sum=0.004965516,read_wait_aio_lat.avgcount=66,read_wait_aio_lat.avgtime=0.001237841,read_wait_aio_lat.sum=0.081697561,state_aio_wait_lat.avgcount=295997,state_aio_wait_lat.avgtime=0.000000357,state_aio_wait_lat.sum=0.105827433,state_deferred_aio_wait_lat.avgcount=0,state_deferred_aio_wait_lat.avgtime=0,state_deferred_aio_wait_lat.sum=0,state_deferred_cleanup_lat.avgcount=0,state_deferred_cleanup_lat.avgtime=0,state_deferred_cleanup_lat.sum=0,state_deferred_queued_lat.avgcount=0,state_deferred_queued_lat.avgtime=0,state_deferred_queued_lat.sum=0,state_done_lat.avgcount=295986,state_done_lat.avgtime=0.000003017,state_done_lat.sum=0.893199127,state_finishing_lat.avgcount=295986,state_finishing_lat.avgtime=0.000000306,state_finishing_lat.sum=0.090792683,state_io_done_lat.avgcount=295997,state_io_done_lat.avgtime=0.000001066,state_io_done_lat.sum=0.315577655,state_kv_commiting_lat.avgcount=295997,state_kv_commiting_lat.avgtime=0.006423586,state_kv_commiting_lat.sum=1901.362268572,state_kv_done_lat.avgcount=295997,state_kv_done_lat.avgtime=0.00000155,state_kv_done_lat.sum=0.458963064,state_kv_queued_lat.avgcount=295997,state_kv_queued_lat.avgtime=0.000477234,state_kv_queued_lat.sum=141.260101773,state_prepare_lat.avgcount=295997,state_prepare_lat.avgtime=0.000091806,state_prepare_lat.sum=27.174436583,submit_lat.avgcount=295997,submit_lat.avgtime=0.000135729,submit_lat.sum=40.17557682,throttle_lat.avgcount=295997,throttle_lat.avgtime=0.000002734,throttle_lat.sum=0.809479837,write_pad_bytes=151773,write_penalty_read_ops=0 1587117698000000000
> ceph,collection=throttle-bluestore_throttle_bytes,host=stefanosd1,id=2,type=osd get=295997,get_or_fail_fail=0,get_or_fail_success=0,get_started=295997,get_sum=198686579299,max=67108864,put=291889,put_sum=198686579299,take=0,take_sum=0,val=0,wait.avgcount=83,wait.avgtime=0.003670612,wait.sum=0.304660858 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-cluster,host=stefanosd1,id=2,type=osd get=452060,get_or_fail_fail=0,get_or_fail_success=452060,get_started=0,get_sum=269934345,max=104857600,put=452060,put_sum=269934345,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-bluestore_throttle_deferred_bytes,host=stefanosd1,id=2,type=osd get=11,get_or_fail_fail=0,get_or_fail_success=11,get_started=0,get_sum=7723117,max=201326592,put=0,put_sum=0,take=0,take_sum=0,val=7723117,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_front_server,host=stefanosd1,id=2,type=osd get=2607433,get_or_fail_fail=0,get_or_fail_success=2607433,get_started=0,get_sum=5225295732,max=104857600,put=2607433,put_sum=5225295732,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=finisher-objecter-finisher-0,host=stefanosd1,id=2,type=osd complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117698000000000
> ceph,collection=cct,host=stefanosd1,id=2,type=osd total_workers=6,unhealthy_workers=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-2,host=stefanosd1,id=2,type=osd msgr_active_connections=670,msgr_created_connections=13455,msgr_recv_bytes=6334605563,msgr_recv_messages=3287843,msgr_running_fast_dispatch_time=137.016615819,msgr_running_recv_time=240.687997039,msgr_running_send_time=471.710658466,msgr_running_total_time=1034.029109337,msgr_send_bytes=9753423475,msgr_send_messages=3439611 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-client,host=stefanosd1,id=2,type=osd get=710355,get_or_fail_fail=0,get_or_fail_success=710355,get_started=0,get_sum=166306283,max=104857600,put=710355,put_sum=166306283,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=throttle-msgr_dispatch_throttler-hb_back_server,host=stefanosd1,id=2,type=osd get=2607433,get_or_fail_fail=0,get_or_fail_success=2607433,get_started=0,get_sum=5225295732,max=104857600,put=2607433,put_sum=5225295732,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=AsyncMessenger::Worker-0,host=stefanosd1,id=2,type=osd msgr_active_connections=705,msgr_created_connections=17953,msgr_recv_bytes=7261438733,msgr_recv_messages=4496034,msgr_running_fast_dispatch_time=254.716476808,msgr_running_recv_time=272.196741555,msgr_running_send_time=571.102924903,msgr_running_total_time=1338.461077493,msgr_send_bytes=10772250508,msgr_send_messages=4192781 1587117698000000000
> ceph,collection=rocksdb,host=stefanosd1,id=2,type=osd compact=0,compact_queue_len=0,compact_queue_merge=0,compact_range=0,get=1424,get_latency.avgcount=1424,get_latency.avgtime=0.000030752,get_latency.sum=0.043792142,rocksdb_write_delay_time.avgcount=0,rocksdb_write_delay_time.avgtime=0,rocksdb_write_delay_time.sum=0,rocksdb_write_memtable_time.avgcount=0,rocksdb_write_memtable_time.avgtime=0,rocksdb_write_memtable_time.sum=0,rocksdb_write_pre_and_post_time.avgcount=0,rocksdb_write_pre_and_post_time.avgtime=0,rocksdb_write_pre_and_post_time.sum=0,rocksdb_write_wal_time.avgcount=0,rocksdb_write_wal_time.avgtime=0,rocksdb_write_wal_time.sum=0,submit_latency.avgcount=295997,submit_latency.avgtime=0.000173137,submit_latency.sum=51.248072285,submit_sync_latency.avgcount=291889,submit_sync_latency.avgtime=0.006094397,submit_sync_latency.sum=1778.887521449,submit_transaction=295997,submit_transaction_sync=291889 1587117698000000000
> ceph,collection=throttle-osd_client_bytes,host=stefanosd1,id=2,type=osd get=698701,get_or_fail_fail=0,get_or_fail_success=698701,get_started=0,get_sum=165630172,max=524288000,put=920880,put_sum=165630172,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117698000000000
> ceph,collection=mds_sessions,host=stefanmds1,id=stefanmds1,type=mds average_load=0,avg_session_uptime=0,session_add=0,session_count=0,session_remove=0,sessions_open=0,sessions_stale=0,total_load=0 1587117476000000000
> ceph,collection=mempool,host=stefanmds1,id=stefanmds1,type=mds bloom_filter_bytes=0,bloom_filter_items=0,bluefs_bytes=0,bluefs_items=0,bluestore_alloc_bytes=0,bluestore_alloc_items=0,bluestore_cache_data_bytes=0,bluestore_cache_data_items=0,bluestore_cache_onode_bytes=0,bluestore_cache_onode_items=0,bluestore_cache_other_bytes=0,bluestore_cache_other_items=0,bluestore_fsck_bytes=0,bluestore_fsck_items=0,bluestore_txc_bytes=0,bluestore_txc_items=0,bluestore_writing_bytes=0,bluestore_writing_deferred_bytes=0,bluestore_writing_deferred_items=0,bluestore_writing_items=0,buffer_anon_bytes=132069,buffer_anon_items=82,buffer_meta_bytes=0,buffer_meta_items=0,mds_co_bytes=44208,mds_co_items=154,osd_bytes=0,osd_items=0,osd_mapbl_bytes=0,osd_mapbl_items=0,osd_pglog_bytes=0,osd_pglog_items=0,osdmap_bytes=16952,osdmap_items=139,osdmap_mapping_bytes=0,osdmap_mapping_items=0,pgmap_bytes=0,pgmap_items=0,unittest_1_bytes=0,unittest_1_items=0,unittest_2_bytes=0,unittest_2_items=0 1587117476000000000
> ceph,collection=objecter,host=stefanmds1,id=stefanmds1,type=mds command_active=0,command_resend=0,command_send=0,linger_active=0,linger_ping=0,linger_resend=0,linger_send=0,map_epoch=203,map_full=0,map_inc=1,omap_del=0,omap_rd=28,omap_wr=1,op=33,op_active=0,op_laggy=0,op_pg=0,op_r=26,op_reply=33,op_resend=2,op_rmw=0,op_send=35,op_send_bytes=364,op_w=7,osd_laggy=0,osd_session_close=91462,osd_session_open=91468,osd_sessions=6,osdop_append=0,osdop_call=0,osdop_clonerange=0,osdop_cmpxattr=0,osdop_create=0,osdop_delete=5,osdop_getxattr=14,osdop_mapext=0,osdop_notify=0,osdop_other=0,osdop_pgls=0,osdop_pgls_filter=0,osdop_read=8,osdop_resetxattrs=0,osdop_rmxattr=0,osdop_setxattr=0,osdop_sparse_read=0,osdop_src_cmpxattr=0,osdop_stat=2,osdop_truncate=0,osdop_watch=0,osdop_write=0,osdop_writefull=0,osdop_writesame=0,osdop_zero=1,poolop_active=0,poolop_resend=0,poolop_send=0,poolstat_active=0,poolstat_resend=0,poolstat_send=0,statfs_active=0,statfs_resend=0,statfs_send=0 1587117476000000000
> ceph,collection=cct,host=stefanmds1,id=stefanmds1,type=mds total_workers=1,unhealthy_workers=0 1587117476000000000
> ceph,collection=mds_server,host=stefanmds1,id=stefanmds1,type=mds cap_revoke_eviction=0,dispatch_client_request=0,dispatch_server_request=0,handle_client_request=0,handle_client_session=0,handle_slave_request=0,req_create_latency.avgcount=0,req_create_latency.avgtime=0,req_create_latency.sum=0,req_getattr_latency.avgcount=0,req_getattr_latency.avgtime=0,req_getattr_latency.sum=0,req_getfilelock_latency.avgcount=0,req_getfilelock_latency.avgtime=0,req_getfilelock_latency.sum=0,req_link_latency.avgcount=0,req_link_latency.avgtime=0,req_link_latency.sum=0,req_lookup_latency.avgcount=0,req_lookup_latency.avgtime=0,req_lookup_latency.sum=0,req_lookuphash_latency.avgcount=0,req_lookuphash_latency.avgtime=0,req_lookuphash_latency.sum=0,req_lookupino_latency.avgcount=0,req_lookupino_latency.avgtime=0,req_lookupino_latency.sum=0,req_lookupname_latency.avgcount=0,req_lookupname_latency.avgtime=0,req_lookupname_latency.sum=0,req_lookupparent_latency.avgcount=0,req_lookupparent_latency.avgtime=0,req_lookupparent_latency.sum=0,req_lookupsnap_latency.avgcount=0,req_lookupsnap_latency.avgtime=0,req_lookupsnap_latency.sum=0,req_lssnap_latency.avgcount=0,req_lssnap_latency.avgtime=0,req_lssnap_latency.sum=0,req_mkdir_latency.avgcount=0,req_mkdir_latency.avgtime=0,req_mkdir_latency.sum=0,req_mknod_latency.avgcount=0,req_mknod_latency.avgtime=0,req_mknod_latency.sum=0,req_mksnap_latency.avgcount=0,req_mksnap_latency.avgtime=0,req_mksnap_latency.sum=0,req_open_latency.avgcount=0,req_open_latency.avgtime=0,req_open_latency.sum=0,req_readdir_latency.avgcount=0,req_readdir_latency.avgtime=0,req_readdir_latency.sum=0,req_rename_latency.avgcount=0,req_rename_latency.avgtime=0,req_rename_latency.sum=0,req_renamesnap_latency.avgcount=0,req_renamesnap_latency.avgtime=0,req_renamesnap_latency.sum=0,req_rmdir_latency.avgcount=0,req_rmdir_latency.avgtime=0,req_rmdir_latency.sum=0,req_rmsnap_latency.avgcount=0,req_rmsnap_latency.avgtime=0,req_rmsnap_latency.sum=0,req_rmxattr_latency.avgcount=0,req_rmxattr_latency.avgtime=0,req_rmxattr_latency.sum=0,req_setattr_latency.avgcount=0,req_setattr_latency.avgtime=0,req_setattr_latency.sum=0,req_setdirlayout_latency.avgcount=0,req_setdirlayout_latency.avgtime=0,req_setdirlayout_latency.sum=0,req_setfilelock_latency.avgcount=0,req_setfilelock_latency.avgtime=0,req_setfilelock_latency.sum=0,req_setlayout_latency.avgcount=0,req_setlayout_latency.avgtime=0,req_setlayout_latency.sum=0,req_setxattr_latency.avgcount=0,req_setxattr_latency.avgtime=0,req_setxattr_latency.sum=0,req_symlink_latency.avgcount=0,req_symlink_latency.avgtime=0,req_symlink_latency.sum=0,req_unlink_latency.avgcount=0,req_unlink_latency.avgtime=0,req_unlink_latency.sum=0 1587117476000000000
> ceph,collection=AsyncMessenger::Worker-2,host=stefanmds1,id=stefanmds1,type=mds msgr_active_connections=84,msgr_created_connections=68511,msgr_recv_bytes=238078,msgr_recv_messages=2655,msgr_running_fast_dispatch_time=0.004247777,msgr_running_recv_time=25.369012545,msgr_running_send_time=3.743427461,msgr_running_total_time=130.277111559,msgr_send_bytes=172767043,msgr_send_messages=18172 1587117476000000000
> ceph,collection=mds_log,host=stefanmds1,id=stefanmds1,type=mds ev=0,evadd=0,evex=0,evexd=0,evexg=0,evtrm=0,expos=4194304,jlat.avgcount=0,jlat.avgtime=0,jlat.sum=0,rdpos=4194304,replayed=1,seg=1,segadd=0,segex=0,segexd=0,segexg=0,segtrm=0,wrpos=0 1587117476000000000
> ceph,collection=AsyncMessenger::Worker-0,host=stefanmds1,id=stefanmds1,type=mds msgr_active_connections=595,msgr_created_connections=943825,msgr_recv_bytes=78618003,msgr_recv_messages=914080,msgr_running_fast_dispatch_time=0.001544386,msgr_running_recv_time=459.627068807,msgr_running_send_time=469.337032316,msgr_running_total_time=2744.084305898,msgr_send_bytes=61684163658,msgr_send_messages=1858008 1587117476000000000
> ceph,collection=throttle-msgr_dispatch_throttler-mds,host=stefanmds1,id=stefanmds1,type=mds get=1216458,get_or_fail_fail=0,get_or_fail_success=1216458,get_started=0,get_sum=51976882,max=104857600,put=1216458,put_sum=51976882,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117476000000000
> ceph,collection=AsyncMessenger::Worker-1,host=stefanmds1,id=stefanmds1,type=mds msgr_active_connections=226,msgr_created_connections=42679,msgr_recv_bytes=63140151,msgr_recv_messages=299727,msgr_running_fast_dispatch_time=26.316138629,msgr_running_recv_time=36.969916165,msgr_running_send_time=70.457421128,msgr_running_total_time=226.230019936,msgr_send_bytes=193154464,msgr_send_messages=310481 1587117476000000000
> ceph,collection=mds,host=stefanmds1,id=stefanmds1,type=mds caps=0,dir_commit=0,dir_fetch=12,dir_merge=0,dir_split=0,exported=0,exported_inodes=0,forward=0,imported=0,imported_inodes=0,inode_max=2147483647,inodes=10,inodes_bottom=3,inodes_expired=0,inodes_pin_tail=0,inodes_pinned=10,inodes_top=7,inodes_with_caps=0,load_cent=0,openino_backtrace_fetch=0,openino_dir_fetch=0,openino_peer_discover=0,q=0,reply=0,reply_latency.avgcount=0,reply_latency.avgtime=0,reply_latency.sum=0,request=0,subtrees=2,traverse=0,traverse_dir_fetch=0,traverse_discover=0,traverse_forward=0,traverse_hit=0,traverse_lock=0,traverse_remote_ino=0 1587117476000000000
> ceph,collection=purge_queue,host=stefanmds1,id=stefanmds1,type=mds pq_executed=0,pq_executing=0,pq_executing_ops=0 1587117476000000000
> ceph,collection=throttle-write_buf_throttle,host=stefanmds1,id=stefanmds1,type=mds get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=3758096384,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117476000000000
> ceph,collection=throttle-write_buf_throttle-0x5624e9377f40,host=stefanmds1,id=stefanmds1,type=mds get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=3758096384,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117476000000000
> ceph,collection=mds_cache,host=stefanmds1,id=stefanmds1,type=mds ireq_enqueue_scrub=0,ireq_exportdir=0,ireq_flush=0,ireq_fragmentdir=0,ireq_fragstats=0,ireq_inodestats=0,num_recovering_enqueued=0,num_recovering_prioritized=0,num_recovering_processing=0,num_strays=0,num_strays_delayed=0,num_strays_enqueuing=0,recovery_completed=0,recovery_started=0,strays_created=0,strays_enqueued=0,strays_migrated=0,strays_reintegrated=0 1587117476000000000
> ceph,collection=throttle-objecter_bytes,host=stefanmds1,id=stefanmds1,type=mds get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=104857600,put=16,put_sum=1016,take=33,take_sum=1016,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117476000000000
> ceph,collection=throttle-objecter_ops,host=stefanmds1,id=stefanmds1,type=mds get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=1024,put=33,put_sum=33,take=33,take_sum=33,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117476000000000
> ceph,collection=mds_mem,host=stefanmds1,id=stefanmds1,type=mds cap=0,cap+=0,cap-=0,dir=12,dir+=12,dir-=0,dn=10,dn+=10,dn-=0,heap=322284,ino=13,ino+=13,ino-=0,rss=76032 1587117476000000000
> ceph,collection=finisher-PurgeQueue,host=stefanmds1,id=stefanmds1,type=mds complete_latency.avgcount=4,complete_latency.avgtime=0.000176985,complete_latency.sum=0.000707941,queue_len=0 1587117476000000000
> ceph,collection=cct,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw total_workers=0,unhealthy_workers=0 1587117156000000000
> ceph,collection=throttle-objecter_bytes,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw get=791732,get_or_fail_fail=0,get_or_fail_success=791732,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117156000000000
> ceph,collection=rgw,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw cache_hit=0,cache_miss=791706,failed_req=0,get=0,get_b=0,get_initial_lat.avgcount=0,get_initial_lat.avgtime=0,get_initial_lat.sum=0,keystone_token_cache_hit=0,keystone_token_cache_miss=0,pubsub_event_lost=0,pubsub_event_triggered=0,pubsub_events=0,pubsub_push_failed=0,pubsub_push_ok=0,pubsub_push_pending=0,pubsub_store_fail=0,pubsub_store_ok=0,put=0,put_b=0,put_initial_lat.avgcount=0,put_initial_lat.avgtime=0,put_initial_lat.sum=0,qactive=0,qlen=0,req=791705 1587117156000000000
> ceph,collection=throttle-msgr_dispatch_throttler-radosclient,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw get=2697988,get_or_fail_fail=0,get_or_fail_success=2697988,get_started=0,get_sum=444563051,max=104857600,put=2697988,put_sum=444563051,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117156000000000
> ceph,collection=finisher-radosclient,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw complete_latency.avgcount=2,complete_latency.avgtime=0.003530161,complete_latency.sum=0.007060323,queue_len=0 1587117156000000000
> ceph,collection=throttle-rgw_async_rados_ops,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw get=0,get_or_fail_fail=0,get_or_fail_success=0,get_started=0,get_sum=0,max=64,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117156000000000
> ceph,collection=throttle-objecter_ops,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw get=791732,get_or_fail_fail=0,get_or_fail_success=791732,get_started=0,get_sum=791732,max=24576,put=791732,put_sum=791732,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117156000000000
> ceph,collection=throttle-objecter_bytes-0x5598969981c0,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw get=1637900,get_or_fail_fail=0,get_or_fail_success=1637900,get_started=0,get_sum=0,max=104857600,put=0,put_sum=0,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117156000000000
> ceph,collection=objecter,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw command_active=0,command_resend=0,command_send=0,linger_active=8,linger_ping=1905736,linger_resend=4,linger_send=13,map_epoch=203,map_full=0,map_inc=17,omap_del=0,omap_rd=0,omap_wr=0,op=2697488,op_active=0,op_laggy=0,op_pg=0,op_r=791730,op_reply=2697476,op_resend=1,op_rmw=0,op_send=2697490,op_send_bytes=362,op_w=1905758,osd_laggy=5,osd_session_close=59558,osd_session_open=59566,osd_sessions=8,osdop_append=0,osdop_call=1,osdop_clonerange=0,osdop_cmpxattr=0,osdop_create=8,osdop_delete=0,osdop_getxattr=0,osdop_mapext=0,osdop_notify=0,osdop_other=791714,osdop_pgls=0,osdop_pgls_filter=0,osdop_read=16,osdop_resetxattrs=0,osdop_rmxattr=0,osdop_setxattr=0,osdop_sparse_read=0,osdop_src_cmpxattr=0,osdop_stat=791706,osdop_truncate=0,osdop_watch=1905750,osdop_write=0,osdop_writefull=0,osdop_writesame=0,osdop_zero=0,poolop_active=0,poolop_resend=0,poolop_send=0,poolstat_active=0,poolstat_resend=0,poolstat_send=0,statfs_active=0,statfs_resend=0,statfs_send=0 1587117156000000000
> ceph,collection=AsyncMessenger::Worker-2,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw msgr_active_connections=11,msgr_created_connections=59839,msgr_recv_bytes=342697143,msgr_recv_messages=1441603,msgr_running_fast_dispatch_time=161.807937536,msgr_running_recv_time=118.174064257,msgr_running_send_time=207.679154333,msgr_running_total_time=698.527662129,msgr_send_bytes=530785909,msgr_send_messages=1679950 1587117156000000000
> ceph,collection=mempool,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw bloom_filter_bytes=0,bloom_filter_items=0,bluefs_bytes=0,bluefs_items=0,bluestore_alloc_bytes=0,bluestore_alloc_items=0,bluestore_cache_data_bytes=0,bluestore_cache_data_items=0,bluestore_cache_onode_bytes=0,bluestore_cache_onode_items=0,bluestore_cache_other_bytes=0,bluestore_cache_other_items=0,bluestore_fsck_bytes=0,bluestore_fsck_items=0,bluestore_txc_bytes=0,bluestore_txc_items=0,bluestore_writing_bytes=0,bluestore_writing_deferred_bytes=0,bluestore_writing_deferred_items=0,bluestore_writing_items=0,buffer_anon_bytes=225471,buffer_anon_items=163,buffer_meta_bytes=0,buffer_meta_items=0,mds_co_bytes=0,mds_co_items=0,osd_bytes=0,osd_items=0,osd_mapbl_bytes=0,osd_mapbl_items=0,osd_pglog_bytes=0,osd_pglog_items=0,osdmap_bytes=33904,osdmap_items=278,osdmap_mapping_bytes=0,osdmap_mapping_items=0,pgmap_bytes=0,pgmap_items=0,unittest_1_bytes=0,unittest_1_items=0,unittest_2_bytes=0,unittest_2_items=0 1587117156000000000
> ceph,collection=throttle-msgr_dispatch_throttler-radosclient-0x559896998120,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw get=1652935,get_or_fail_fail=0,get_or_fail_success=1652935,get_started=0,get_sum=276333029,max=104857600,put=1652935,put_sum=276333029,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117156000000000
> ceph,collection=AsyncMessenger::Worker-1,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw msgr_active_connections=17,msgr_created_connections=84859,msgr_recv_bytes=211170759,msgr_recv_messages=922646,msgr_running_fast_dispatch_time=31.487443762,msgr_running_recv_time=83.190789333,msgr_running_send_time=174.670510496,msgr_running_total_time=484.22086275,msgr_send_bytes=1322113179,msgr_send_messages=1636839 1587117156000000000
> ceph,collection=finisher-radosclient-0x559896998080,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw complete_latency.avgcount=0,complete_latency.avgtime=0,complete_latency.sum=0,queue_len=0 1587117156000000000
> ceph,collection=throttle-objecter_ops-0x559896997b80,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw get=1637900,get_or_fail_fail=0,get_or_fail_success=1637900,get_started=0,get_sum=1637900,max=24576,put=1637900,put_sum=1637900,take=0,take_sum=0,val=0,wait.avgcount=0,wait.avgtime=0,wait.sum=0 1587117156000000000
> ceph,collection=AsyncMessenger::Worker-0,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw msgr_active_connections=18,msgr_created_connections=74757,msgr_recv_bytes=489001094,msgr_recv_messages=1986686,msgr_running_fast_dispatch_time=168.60950961,msgr_running_recv_time=142.903031533,msgr_running_send_time=267.911165712,msgr_running_total_time=824.885614951,msgr_send_bytes=707973504,msgr_send_messages=2463727 1587117156000000000
> ceph,collection=objecter-0x559896997720,host=stefanrgw1,id=rgw.stefanrgw1.4219.94113851143184,type=rgw command_active=0,command_resend=0,command_send=0,linger_active=0,linger_ping=0,linger_resend=0,linger_send=0,map_epoch=203,map_full=0,map_inc=8,omap_del=0,omap_rd=0,omap_wr=0,op=1637998,op_active=0,op_laggy=0,op_pg=0,op_r=1062803,op_reply=1637998,op_resend=15,op_rmw=0,op_send=1638013,op_send_bytes=63321099,op_w=575195,osd_laggy=0,osd_session_close=125555,osd_session_open=125563,osd_sessions=8,osdop_append=0,osdop_call=1637886,osdop_clonerange=0,osdop_cmpxattr=0,osdop_create=0,osdop_delete=0,osdop_getxattr=0,osdop_mapext=0,osdop_notify=0,osdop_other=112,osdop_pgls=0,osdop_pgls_filter=0,osdop_read=0,osdop_resetxattrs=0,osdop_rmxattr=0,osdop_setxattr=0,osdop_sparse_read=0,osdop_src_cmpxattr=0,osdop_stat=0,osdop_truncate=0,osdop_watch=0,osdop_write=0,osdop_writefull=0,osdop_writesame=0,osdop_zero=0,poolop_active=0,poolop_resend=0,poolop_send=0,poolstat_active=0,poolstat_resend=0,poolstat_send=0,statfs_active=0,statfs_resend=0,statfs_send=0 1587117156000000000
```

View File

@@ -18,8 +18,12 @@ const (
measurement = "ceph"
typeMon = "monitor"
typeOsd = "osd"
typeMds = "mds"
typeRgw = "rgw"
osdPrefix = "ceph-osd"
monPrefix = "ceph-mon"
mdsPrefix = "ceph-mds"
rgwPrefix = "ceph-client"
sockSuffix = "asok"
)
@@ -27,6 +31,8 @@ type Ceph struct {
CephBinary string
OsdPrefix string
MonPrefix string
MdsPrefix string
RgwPrefix string
SocketDir string
SocketSuffix string
CephUser string
@@ -36,7 +42,7 @@ type Ceph struct {
}
func (c *Ceph) Description() string {
return "Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster."
return "Collects performance metrics from the MON, OSD, MDS and RGW nodes in a Ceph storage cluster."
}
var sampleConfig = `
@@ -55,6 +61,8 @@ var sampleConfig = `
## prefix of MON and OSD socket files, used to determine socket type
mon_prefix = "ceph-mon"
osd_prefix = "ceph-osd"
mds_prefix = "ceph-mds"
rgw_prefix = "ceph-client"
## suffix used to identify socket files
socket_suffix = "asok"
@@ -101,12 +109,12 @@ func (c *Ceph) gatherAdminSocketStats(acc telegraf.Accumulator) error {
for _, s := range sockets {
dump, err := perfDump(c.CephBinary, s)
if err != nil {
acc.AddError(fmt.Errorf("E! error reading from socket '%s': %v", s.socket, err))
acc.AddError(fmt.Errorf("error reading from socket '%s': %v", s.socket, err))
continue
}
data, err := parseDump(dump)
if err != nil {
acc.AddError(fmt.Errorf("E! error parsing dump from socket '%s': %v", s.socket, err))
acc.AddError(fmt.Errorf("error parsing dump from socket '%s': %v", s.socket, err))
continue
}
for tag, metrics := range data {
@@ -148,6 +156,8 @@ func init() {
CephBinary: "/usr/bin/ceph",
OsdPrefix: osdPrefix,
MonPrefix: monPrefix,
MdsPrefix: mdsPrefix,
RgwPrefix: rgwPrefix,
SocketDir: "/var/run/ceph",
SocketSuffix: sockSuffix,
CephUser: "client.admin",
@@ -157,7 +167,6 @@ func init() {
}
inputs.Add(measurement, func() telegraf.Input { return &c })
}
var perfDump = func(binary string, socket *socket) (string, error) {
@@ -166,6 +175,10 @@ var perfDump = func(binary string, socket *socket) (string, error) {
cmdArgs = append(cmdArgs, "perf", "dump")
} else if socket.sockType == typeMon {
cmdArgs = append(cmdArgs, "perfcounters_dump")
} else if socket.sockType == typeMds {
cmdArgs = append(cmdArgs, "perf", "dump")
} else if socket.sockType == typeRgw {
cmdArgs = append(cmdArgs, "perf", "dump")
} else {
return "", fmt.Errorf("ignoring unknown socket type: %s", socket.sockType)
}
@@ -200,7 +213,18 @@ var findSockets = func(c *Ceph) ([]*socket, error) {
sockPrefix = osdPrefix
}
if sockType == typeOsd || sockType == typeMon {
if strings.HasPrefix(f, c.MdsPrefix) {
sockType = typeMds
sockPrefix = mdsPrefix
}
if strings.HasPrefix(f, c.RgwPrefix) {
sockType = typeRgw
sockPrefix = rgwPrefix
}
if sockType == typeOsd || sockType == typeMon || sockType == typeMds || sockType == typeRgw {
path := filepath.Join(c.SocketDir, f)
sockets = append(sockets, &socket{parseSockId(f, sockPrefix, c.SocketSuffix), sockType, path})
}
@@ -278,7 +302,7 @@ func flatten(data interface{}) []*metric {
switch val := data.(type) {
case float64:
metrics = []*metric{&metric{make([]string, 0, 1), val}}
metrics = []*metric{{make([]string, 0, 1), val}}
case map[string]interface{}:
metrics = make([]*metric, 0, len(val))
for k, v := range val {
@@ -288,12 +312,13 @@ func flatten(data interface{}) []*metric {
}
}
default:
log.Printf("I! Ignoring unexpected type '%T' for value %v", val, val)
log.Printf("I! [inputs.ceph] ignoring unexpected type '%T' for value %v", val, val)
}
return metrics
}
// exec executes the 'ceph' command with the supplied arguments, returning JSON formatted output
func (c *Ceph) exec(command string) (string, error) {
cmdArgs := []string{"--conf", c.CephConfig, "--name", c.CephUser, "--format", "json"}
cmdArgs = append(cmdArgs, strings.Split(command, " ")...)
@@ -317,145 +342,174 @@ func (c *Ceph) exec(command string) (string, error) {
return output, nil
}
// CephStatus is used to unmarshal "ceph -s" output
type CephStatus struct {
Health struct {
Status string `json:"status"`
OverallStatus string `json:"overall_status"`
} `json:"health"`
OSDMap struct {
OSDMap struct {
Epoch float64 `json:"epoch"`
NumOSDs float64 `json:"num_osds"`
NumUpOSDs float64 `json:"num_up_osds"`
NumInOSDs float64 `json:"num_in_osds"`
Full bool `json:"full"`
NearFull bool `json:"nearfull"`
NumRemappedPGs float64 `json:"num_remapped_pgs"`
} `json:"osdmap"`
} `json:"osdmap"`
PGMap struct {
PGsByState []struct {
StateName string `json:"state_name"`
Count float64 `json:"count"`
} `json:"pgs_by_state"`
Version float64 `json:"version"`
NumPGs float64 `json:"num_pgs"`
DataBytes float64 `json:"data_bytes"`
BytesUsed float64 `json:"bytes_used"`
BytesAvail float64 `json:"bytes_avail"`
BytesTotal float64 `json:"bytes_total"`
ReadBytesSec float64 `json:"read_bytes_sec"`
WriteBytesSec float64 `json:"write_bytes_sec"`
OpPerSec *float64 `json:"op_per_sec"` // This field is no longer reported in ceph 10 and later
ReadOpPerSec float64 `json:"read_op_per_sec"`
WriteOpPerSec float64 `json:"write_op_per_sec"`
} `json:"pgmap"`
}
// decodeStatus decodes the output of 'ceph -s'
func decodeStatus(acc telegraf.Accumulator, input string) error {
data := make(map[string]interface{})
err := json.Unmarshal([]byte(input), &data)
if err != nil {
data := &CephStatus{}
if err := json.Unmarshal([]byte(input), data); err != nil {
return fmt.Errorf("failed to parse json: '%s': %v", input, err)
}
err = decodeStatusOsdmap(acc, data)
if err != nil {
return err
decoders := []func(telegraf.Accumulator, *CephStatus) error{
decodeStatusHealth,
decodeStatusOsdmap,
decodeStatusPgmap,
decodeStatusPgmapState,
}
err = decodeStatusPgmap(acc, data)
if err != nil {
return err
}
err = decodeStatusPgmapState(acc, data)
if err != nil {
return err
for _, decoder := range decoders {
if err := decoder(acc, data); err != nil {
return err
}
}
return nil
}
func decodeStatusOsdmap(acc telegraf.Accumulator, data map[string]interface{}) error {
osdmap, ok := data["osdmap"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osdmap", measurement)
// decodeStatusHealth decodes the health portion of the output of 'ceph status'
func decodeStatusHealth(acc telegraf.Accumulator, data *CephStatus) error {
fields := map[string]interface{}{
"status": data.Health.Status,
"overall_status": data.Health.OverallStatus,
}
fields, ok := osdmap["osdmap"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osdmap", measurement)
acc.AddFields("ceph_health", fields, map[string]string{})
return nil
}
// decodeStatusOsdmap decodes the OSD map portion of the output of 'ceph -s'
func decodeStatusOsdmap(acc telegraf.Accumulator, data *CephStatus) error {
fields := map[string]interface{}{
"epoch": data.OSDMap.OSDMap.Epoch,
"num_osds": data.OSDMap.OSDMap.NumOSDs,
"num_up_osds": data.OSDMap.OSDMap.NumUpOSDs,
"num_in_osds": data.OSDMap.OSDMap.NumInOSDs,
"full": data.OSDMap.OSDMap.Full,
"nearfull": data.OSDMap.OSDMap.NearFull,
"num_remapped_pgs": data.OSDMap.OSDMap.NumRemappedPGs,
}
acc.AddFields("ceph_osdmap", fields, map[string]string{})
return nil
}
func decodeStatusPgmap(acc telegraf.Accumulator, data map[string]interface{}) error {
pgmap, ok := data["pgmap"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pgmap", measurement)
}
fields := make(map[string]interface{})
for key, value := range pgmap {
switch value.(type) {
case float64:
fields[key] = value
}
// decodeStatusPgmap decodes the PG map portion of the output of 'ceph -s'
func decodeStatusPgmap(acc telegraf.Accumulator, data *CephStatus) error {
fields := map[string]interface{}{
"version": data.PGMap.Version,
"num_pgs": data.PGMap.NumPGs,
"data_bytes": data.PGMap.DataBytes,
"bytes_used": data.PGMap.BytesUsed,
"bytes_avail": data.PGMap.BytesAvail,
"bytes_total": data.PGMap.BytesTotal,
"read_bytes_sec": data.PGMap.ReadBytesSec,
"write_bytes_sec": data.PGMap.WriteBytesSec,
"op_per_sec": data.PGMap.OpPerSec, // This field is no longer reported in ceph 10 and later
"read_op_per_sec": data.PGMap.ReadOpPerSec,
"write_op_per_sec": data.PGMap.WriteOpPerSec,
}
acc.AddFields("ceph_pgmap", fields, map[string]string{})
return nil
}
func extractPgmapStates(data map[string]interface{}) ([]interface{}, error) {
const key = "pgs_by_state"
pgmap, ok := data["pgmap"].(map[string]interface{})
if !ok {
return nil, fmt.Errorf("WARNING %s - unable to decode pgmap", measurement)
}
s, ok := pgmap[key]
if !ok {
return nil, fmt.Errorf("WARNING %s - pgmap is missing the %s field", measurement, key)
}
states, ok := s.([]interface{})
if !ok {
return nil, fmt.Errorf("WARNING %s - pgmap[%s] is not a list", measurement, key)
}
return states, nil
}
func decodeStatusPgmapState(acc telegraf.Accumulator, data map[string]interface{}) error {
states, err := extractPgmapStates(data)
if err != nil {
return err
}
for _, state := range states {
stateMap, ok := state.(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pg state", measurement)
}
stateName, ok := stateMap["state_name"].(string)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pg state name", measurement)
}
stateCount, ok := stateMap["count"].(float64)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pg state count", measurement)
}
// decodeStatusPgmapState decodes the PG map state portion of the output of 'ceph -s'
func decodeStatusPgmapState(acc telegraf.Accumulator, data *CephStatus) error {
for _, pgState := range data.PGMap.PGsByState {
tags := map[string]string{
"state": stateName,
"state": pgState.StateName,
}
fields := map[string]interface{}{
"count": stateCount,
"count": pgState.Count,
}
acc.AddFields("ceph_pgmap_state", fields, tags)
}
return nil
}
// CephDF is used to unmarshal 'ceph df' output
type CephDf struct {
Stats struct {
TotalSpace *float64 `json:"total_space"` // pre ceph 0.84
TotalUsed *float64 `json:"total_used"` // pre ceph 0.84
TotalAvail *float64 `json:"total_avail"` // pre ceph 0.84
TotalBytes *float64 `json:"total_bytes"`
TotalUsedBytes *float64 `json:"total_used_bytes"`
TotalAvailBytes *float64 `json:"total_avail_bytes"`
} `json:"stats"`
Pools []struct {
Name string `json:"name"`
Stats struct {
KBUsed float64 `json:"kb_used"`
BytesUsed float64 `json:"bytes_used"`
Objects float64 `json:"objects"`
PercentUsed *float64 `json:"percent_used"`
MaxAvail *float64 `json:"max_avail"`
} `json:"stats"`
} `json:"pools"`
}
// decodeDf decodes the output of 'ceph df'
func decodeDf(acc telegraf.Accumulator, input string) error {
data := make(map[string]interface{})
err := json.Unmarshal([]byte(input), &data)
if err != nil {
data := &CephDf{}
if err := json.Unmarshal([]byte(input), data); err != nil {
return fmt.Errorf("failed to parse json: '%s': %v", input, err)
}
// ceph.usage: records global utilization and number of objects
stats_fields, ok := data["stats"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df stats", measurement)
fields := map[string]interface{}{
"total_space": data.Stats.TotalSpace,
"total_used": data.Stats.TotalUsed,
"total_avail": data.Stats.TotalAvail,
"total_bytes": data.Stats.TotalBytes,
"total_used_bytes": data.Stats.TotalUsedBytes,
"total_avail_bytes": data.Stats.TotalAvailBytes,
}
acc.AddFields("ceph_usage", stats_fields, map[string]string{})
acc.AddFields("ceph_usage", fields, map[string]string{})
// ceph.pool.usage: records per pool utilization and number of objects
pools, ok := data["pools"].([]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pools", measurement)
}
for _, pool := range pools {
pool_map, ok := pool.(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pool", measurement)
}
pool_name, ok := pool_map["name"].(string)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pool name", measurement)
}
fields, ok := pool_map["stats"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pool stats", measurement)
}
for _, pool := range data.Pools {
tags := map[string]string{
"name": pool_name,
"name": pool.Name,
}
fields := map[string]interface{}{
"kb_used": pool.Stats.KBUsed,
"bytes_used": pool.Stats.BytesUsed,
"objects": pool.Stats.Objects,
"percent_used": pool.Stats.PercentUsed,
"max_avail": pool.Stats.MaxAvail,
}
acc.AddFields("ceph_pool_usage", fields, tags)
}
@@ -463,36 +517,44 @@ func decodeDf(acc telegraf.Accumulator, input string) error {
return nil
}
// CephOSDPoolStats is used to unmarshal 'ceph osd pool stats' output
type CephOSDPoolStats []struct {
PoolName string `json:"pool_name"`
ClientIORate struct {
ReadBytesSec float64 `json:"read_bytes_sec"`
WriteBytesSec float64 `json:"write_bytes_sec"`
OpPerSec *float64 `json:"op_per_sec"` // This field is no longer reported in ceph 10 and later
ReadOpPerSec float64 `json:"read_op_per_sec"`
WriteOpPerSec float64 `json:"write_op_per_sec"`
} `json:"client_io_rate"`
RecoveryRate struct {
RecoveringObjectsPerSec float64 `json:"recovering_objects_per_sec"`
RecoveringBytesPerSec float64 `json:"recovering_bytes_per_sec"`
RecoveringKeysPerSec float64 `json:"recovering_keys_per_sec"`
} `json:"recovery_rate"`
}
// decodeOsdPoolStats decodes the output of 'ceph osd pool stats'
func decodeOsdPoolStats(acc telegraf.Accumulator, input string) error {
data := make([]map[string]interface{}, 0)
err := json.Unmarshal([]byte(input), &data)
if err != nil {
data := CephOSDPoolStats{}
if err := json.Unmarshal([]byte(input), &data); err != nil {
return fmt.Errorf("failed to parse json: '%s': %v", input, err)
}
// ceph.pool.stats: records pre pool IO and recovery throughput
for _, pool := range data {
pool_name, ok := pool["pool_name"].(string)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osd pool stats name", measurement)
}
// Note: the 'recovery' object looks broken (in hammer), so it's omitted
objects := []string{
"client_io_rate",
"recovery_rate",
}
fields := make(map[string]interface{})
for _, object := range objects {
perfdata, ok := pool[object].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osd pool stats", measurement)
}
for key, value := range perfdata {
fields[key] = value
}
}
tags := map[string]string{
"name": pool_name,
"name": pool.PoolName,
}
fields := map[string]interface{}{
"read_bytes_sec": pool.ClientIORate.ReadBytesSec,
"write_bytes_sec": pool.ClientIORate.WriteBytesSec,
"op_per_sec": pool.ClientIORate.OpPerSec, // This field is no longer reported in ceph 10 and later
"read_op_per_sec": pool.ClientIORate.ReadOpPerSec,
"write_op_per_sec": pool.ClientIORate.WriteOpPerSec,
"recovering_objects_per_sec": pool.RecoveryRate.RecoveringObjectsPerSec,
"recovering_bytes_per_sec": pool.RecoveryRate.RecoveringBytesPerSec,
"recovering_keys_per_sec": pool.RecoveryRate.RecoveringKeysPerSec,
}
acc.AddFields("ceph_pool_stats", fields, tags)
}

File diff suppressed because it is too large Load Diff

View File

@@ -81,6 +81,7 @@ func isDir(path string) (bool, error) {
}
func (g *CGroup) generateDirs(list chan<- pathInfo) {
defer close(list)
for _, dir := range g.Paths {
// getting all dirs that match the pattern 'dir'
items, err := filepath.Glob(dir)
@@ -101,10 +102,10 @@ func (g *CGroup) generateDirs(list chan<- pathInfo) {
}
}
}
close(list)
}
func (g *CGroup) generateFiles(dir string, list chan<- pathInfo) {
defer close(list)
for _, file := range g.Files {
// getting all file paths that match the pattern 'dir + file'
// path.Base make sure that file variable does not contains part of path
@@ -126,7 +127,6 @@ func (g *CGroup) generateFiles(dir string, list chan<- pathInfo) {
}
}
}
close(list)
}
// ======================================================================
@@ -173,7 +173,7 @@ const valuePattern = "[\\d-]+"
var fileFormats = [...]fileFormat{
// VAL\n
fileFormat{
{
name: "Single value",
pattern: "^" + valuePattern + "\n$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {
@@ -185,7 +185,7 @@ var fileFormats = [...]fileFormat{
// VAL0\n
// VAL1\n
// ...
fileFormat{
{
name: "New line separated values",
pattern: "^(" + valuePattern + "\n){2,}$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {
@@ -197,7 +197,7 @@ var fileFormats = [...]fileFormat{
},
},
// VAL0 VAL1 ...\n
fileFormat{
{
name: "Space separated values",
pattern: "^(" + valuePattern + " )+\n$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {
@@ -211,7 +211,7 @@ var fileFormats = [...]fileFormat{
// KEY0 VAL0\n
// KEY1 VAL1\n
// ...
fileFormat{
{
name: "New line separated key-space-value's",
pattern: "^(" + keyPattern + " " + valuePattern + "\n)+$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {

View File

@@ -33,11 +33,16 @@ func (*Chrony) SampleConfig() string {
`
}
func (c *Chrony) Gather(acc telegraf.Accumulator) error {
if len(c.path) == 0 {
func (c *Chrony) Init() error {
var err error
c.path, err = exec.LookPath("chronyc")
if err != nil {
return errors.New("chronyc not found: verify that chrony is installed and that chronyc is in your PATH")
}
return nil
}
func (c *Chrony) Gather(acc telegraf.Accumulator) error {
flags := []string{}
if !c.DNSLookup {
flags = append(flags, "-n")
@@ -120,12 +125,7 @@ func processChronycOutput(out string) (map[string]interface{}, map[string]string
}
func init() {
c := Chrony{}
path, _ := exec.LookPath("chronyc")
if len(path) > 0 {
c.path = path
}
inputs.Add("chrony", func() telegraf.Input {
return &c
return &Chrony{}
})
}

View File

@@ -0,0 +1,72 @@
# Cisco GNMI Telemetry
Cisco GNMI Telemetry is an input plugin that consumes telemetry data based on the [GNMI](https://github.com/openconfig/reference/blob/master/rpc/gnmi/gnmi-specification.md) Subscribe method. TLS is supported for authentication and encryption.
It has been optimized to support GNMI telemetry as produced by Cisco IOS XR (64-bit) version 6.5.1, Cisco NX-OS 9.3 and Cisco IOS XE 16.12 and later.
### Configuration
```toml
[[inputs.cisco_telemetry_gnmi]]
## Address and port of the GNMI GRPC server
addresses = ["10.49.234.114:57777"]
## define credentials
username = "cisco"
password = "cisco"
## GNMI encoding requested (one of: "proto", "json", "json_ietf")
# encoding = "proto"
## redial in case of failures after
redial = "10s"
## enable client-side TLS and define CA to authenticate the device
# enable_tls = true
# tls_ca = "/etc/telegraf/ca.pem"
# insecure_skip_verify = true
## define client-side TLS certificate & key to authenticate to the device
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## GNMI subscription prefix (optional, can usually be left empty)
## See: https://github.com/openconfig/reference/blob/master/rpc/gnmi/gnmi-specification.md#222-paths
# origin = ""
# prefix = ""
# target = ""
## Define additional aliases to map telemetry encoding paths to simple measurement names
# [inputs.cisco_telemetry_gnmi.aliases]
# ifcounters = "openconfig:/interfaces/interface/state/counters"
[[inputs.cisco_telemetry_gnmi.subscription]]
## Name of the measurement that will be emitted
name = "ifcounters"
## Origin and path of the subscription
## See: https://github.com/openconfig/reference/blob/master/rpc/gnmi/gnmi-specification.md#222-paths
##
## origin usually refers to a (YANG) data model implemented by the device
## and path to a specific substructure inside it that should be subscribed to (similar to an XPath)
## YANG models can be found e.g. here: https://github.com/YangModels/yang/tree/master/vendor/cisco/xr
origin = "openconfig-interfaces"
path = "/interfaces/interface/state/counters"
# Subscription mode (one of: "target_defined", "sample", "on_change") and interval
subscription_mode = "sample"
sample_interval = "10s"
## Suppress redundant transmissions when measured values are unchanged
# suppress_redundant = false
## If suppression is enabled, send updates at least every X seconds anyway
# heartbeat_interval = "60s"
```
### Example Output
```
ifcounters,path=openconfig-interfaces:/interfaces/interface/state/counters,host=linux,name=MgmtEth0/RP0/CPU0/0,source=10.49.234.115 in-multicast-pkts=0i,out-multicast-pkts=0i,out-errors=0i,out-discards=0i,in-broadcast-pkts=0i,out-broadcast-pkts=0i,in-discards=0i,in-unknown-protos=0i,in-errors=0i,out-unicast-pkts=0i,in-octets=0i,out-octets=0i,last-clear="2019-05-22T16:53:21Z",in-unicast-pkts=0i 1559145777425000000
ifcounters,path=openconfig-interfaces:/interfaces/interface/state/counters,host=linux,name=GigabitEthernet0/0/0/0,source=10.49.234.115 out-multicast-pkts=0i,out-broadcast-pkts=0i,in-errors=0i,out-errors=0i,in-discards=0i,out-octets=0i,in-unknown-protos=0i,in-unicast-pkts=0i,in-octets=0i,in-multicast-pkts=0i,in-broadcast-pkts=0i,last-clear="2019-05-22T16:54:50Z",out-unicast-pkts=0i,out-discards=0i 1559145777425000000
```

View File

@@ -0,0 +1,556 @@
package cisco_telemetry_gnmi
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"math"
"net"
"path"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
internaltls "github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/inputs"
jsonparser "github.com/influxdata/telegraf/plugins/parsers/json"
"github.com/openconfig/gnmi/proto/gnmi"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/metadata"
)
// CiscoTelemetryGNMI plugin instance
type CiscoTelemetryGNMI struct {
Addresses []string `toml:"addresses"`
Subscriptions []Subscription `toml:"subscription"`
Aliases map[string]string `toml:"aliases"`
// Optional subscription configuration
Encoding string
Origin string
Prefix string
Target string
UpdatesOnly bool `toml:"updates_only"`
// Cisco IOS XR credentials
Username string
Password string
// Redial
Redial internal.Duration
// GRPC TLS settings
EnableTLS bool `toml:"enable_tls"`
internaltls.ClientConfig
// Internal state
aliases map[string]string
acc telegraf.Accumulator
cancel context.CancelFunc
wg sync.WaitGroup
Log telegraf.Logger
}
// Subscription for a GNMI client
type Subscription struct {
Name string
Origin string
Path string
// Subscription mode and interval
SubscriptionMode string `toml:"subscription_mode"`
SampleInterval internal.Duration `toml:"sample_interval"`
// Duplicate suppression
SuppressRedundant bool `toml:"suppress_redundant"`
HeartbeatInterval internal.Duration `toml:"heartbeat_interval"`
}
// Start the http listener service
func (c *CiscoTelemetryGNMI) Start(acc telegraf.Accumulator) error {
var err error
var ctx context.Context
var tlscfg *tls.Config
var request *gnmi.SubscribeRequest
c.acc = acc
ctx, c.cancel = context.WithCancel(context.Background())
// Validate configuration
if request, err = c.newSubscribeRequest(); err != nil {
return err
} else if c.Redial.Duration.Nanoseconds() <= 0 {
return fmt.Errorf("redial duration must be positive")
}
// Parse TLS config
if c.EnableTLS {
if tlscfg, err = c.ClientConfig.TLSConfig(); err != nil {
return err
}
}
if len(c.Username) > 0 {
ctx = metadata.AppendToOutgoingContext(ctx, "username", c.Username, "password", c.Password)
}
// Invert explicit alias list and prefill subscription names
c.aliases = make(map[string]string, len(c.Subscriptions)+len(c.Aliases))
for _, subscription := range c.Subscriptions {
var gnmiLongPath, gnmiShortPath *gnmi.Path
// Build the subscription path without keys
if gnmiLongPath, err = parsePath(subscription.Origin, subscription.Path, ""); err != nil {
return err
}
if gnmiShortPath, err = parsePath("", subscription.Path, ""); err != nil {
return err
}
longPath, _ := c.handlePath(gnmiLongPath, nil, "")
shortPath, _ := c.handlePath(gnmiShortPath, nil, "")
name := subscription.Name
// If the user didn't provide a measurement name, use last path element
if len(name) == 0 {
name = path.Base(shortPath)
}
if len(name) > 0 {
c.aliases[longPath] = name
c.aliases[shortPath] = name
}
}
for alias, path := range c.Aliases {
c.aliases[path] = alias
}
// Create a goroutine for each device, dial and subscribe
c.wg.Add(len(c.Addresses))
for _, addr := range c.Addresses {
go func(address string) {
defer c.wg.Done()
for ctx.Err() == nil {
if err := c.subscribeGNMI(ctx, address, tlscfg, request); err != nil && ctx.Err() == nil {
acc.AddError(err)
}
select {
case <-ctx.Done():
case <-time.After(c.Redial.Duration):
}
}
}(addr)
}
return nil
}
// Create a new GNMI SubscribeRequest
func (c *CiscoTelemetryGNMI) newSubscribeRequest() (*gnmi.SubscribeRequest, error) {
// Create subscription objects
subscriptions := make([]*gnmi.Subscription, len(c.Subscriptions))
for i, subscription := range c.Subscriptions {
gnmiPath, err := parsePath(subscription.Origin, subscription.Path, "")
if err != nil {
return nil, err
}
mode, ok := gnmi.SubscriptionMode_value[strings.ToUpper(subscription.SubscriptionMode)]
if !ok {
return nil, fmt.Errorf("invalid subscription mode %s", subscription.SubscriptionMode)
}
subscriptions[i] = &gnmi.Subscription{
Path: gnmiPath,
Mode: gnmi.SubscriptionMode(mode),
SampleInterval: uint64(subscription.SampleInterval.Duration.Nanoseconds()),
SuppressRedundant: subscription.SuppressRedundant,
HeartbeatInterval: uint64(subscription.HeartbeatInterval.Duration.Nanoseconds()),
}
}
// Construct subscribe request
gnmiPath, err := parsePath(c.Origin, c.Prefix, c.Target)
if err != nil {
return nil, err
}
if c.Encoding != "proto" && c.Encoding != "json" && c.Encoding != "json_ietf" {
return nil, fmt.Errorf("unsupported encoding %s", c.Encoding)
}
return &gnmi.SubscribeRequest{
Request: &gnmi.SubscribeRequest_Subscribe{
Subscribe: &gnmi.SubscriptionList{
Prefix: gnmiPath,
Mode: gnmi.SubscriptionList_STREAM,
Encoding: gnmi.Encoding(gnmi.Encoding_value[strings.ToUpper(c.Encoding)]),
Subscription: subscriptions,
UpdatesOnly: c.UpdatesOnly,
},
},
}, nil
}
// SubscribeGNMI and extract telemetry data
func (c *CiscoTelemetryGNMI) subscribeGNMI(ctx context.Context, address string, tlscfg *tls.Config, request *gnmi.SubscribeRequest) error {
var opt grpc.DialOption
if tlscfg != nil {
opt = grpc.WithTransportCredentials(credentials.NewTLS(tlscfg))
} else {
opt = grpc.WithInsecure()
}
client, err := grpc.DialContext(ctx, address, opt)
if err != nil {
return fmt.Errorf("failed to dial: %v", err)
}
defer client.Close()
subscribeClient, err := gnmi.NewGNMIClient(client).Subscribe(ctx)
if err != nil {
return fmt.Errorf("failed to setup subscription: %v", err)
}
if err = subscribeClient.Send(request); err != nil {
return fmt.Errorf("failed to send subscription request: %v", err)
}
c.Log.Debugf("Connection to GNMI device %s established", address)
defer c.Log.Debugf("Connection to GNMI device %s closed", address)
for ctx.Err() == nil {
var reply *gnmi.SubscribeResponse
if reply, err = subscribeClient.Recv(); err != nil {
if err != io.EOF && ctx.Err() == nil {
return fmt.Errorf("aborted GNMI subscription: %v", err)
}
break
}
c.handleSubscribeResponse(address, reply)
}
return nil
}
// HandleSubscribeResponse message from GNMI and parse contained telemetry data
func (c *CiscoTelemetryGNMI) handleSubscribeResponse(address string, reply *gnmi.SubscribeResponse) {
// Check if response is a GNMI Update and if we have a prefix to derive the measurement name
response, ok := reply.Response.(*gnmi.SubscribeResponse_Update)
if !ok {
return
}
var prefix, prefixAliasPath string
grouper := metric.NewSeriesGrouper()
timestamp := time.Unix(0, response.Update.Timestamp)
prefixTags := make(map[string]string)
if response.Update.Prefix != nil {
prefix, prefixAliasPath = c.handlePath(response.Update.Prefix, prefixTags, "")
}
prefixTags["source"], _, _ = net.SplitHostPort(address)
prefixTags["path"] = prefix
// Parse individual Update message and create measurements
var name, lastAliasPath string
for _, update := range response.Update.Update {
// Prepare tags from prefix
tags := make(map[string]string, len(prefixTags))
for key, val := range prefixTags {
tags[key] = val
}
aliasPath, fields := c.handleTelemetryField(update, tags, prefix)
// Inherent valid alias from prefix parsing
if len(prefixAliasPath) > 0 && len(aliasPath) == 0 {
aliasPath = prefixAliasPath
}
// Lookup alias if alias-path has changed
if aliasPath != lastAliasPath {
name = prefix
if alias, ok := c.aliases[aliasPath]; ok {
name = alias
} else {
c.Log.Debugf("No measurement alias for GNMI path: %s", name)
}
}
// Group metrics
for k, v := range fields {
key := k
if len(aliasPath) < len(key) {
// This may not be an exact prefix, due to naming style
// conversion on the key.
key = key[len(aliasPath)+1:]
} else {
// Otherwise use the last path element as the field key.
key = path.Base(key)
// If there are no elements skip the item; this would be an
// invalid message.
key = strings.TrimLeft(key, "/.")
if key == "" {
c.Log.Errorf("invalid empty path: %q", k)
continue
}
}
grouper.Add(name, tags, timestamp, key, v)
}
lastAliasPath = aliasPath
}
// Add grouped measurements
for _, metric := range grouper.Metrics() {
c.acc.AddMetric(metric)
}
}
// HandleTelemetryField and add it to a measurement
func (c *CiscoTelemetryGNMI) handleTelemetryField(update *gnmi.Update, tags map[string]string, prefix string) (string, map[string]interface{}) {
path, aliasPath := c.handlePath(update.Path, tags, prefix)
var value interface{}
var jsondata []byte
// Make sure a value is actually set
if update.Val == nil || update.Val.Value == nil {
c.Log.Infof("Discarded empty or legacy type value with path: %q", path)
return aliasPath, nil
}
switch val := update.Val.Value.(type) {
case *gnmi.TypedValue_AsciiVal:
value = val.AsciiVal
case *gnmi.TypedValue_BoolVal:
value = val.BoolVal
case *gnmi.TypedValue_BytesVal:
value = val.BytesVal
case *gnmi.TypedValue_DecimalVal:
value = float64(val.DecimalVal.Digits) / math.Pow(10, float64(val.DecimalVal.Precision))
case *gnmi.TypedValue_FloatVal:
value = val.FloatVal
case *gnmi.TypedValue_IntVal:
value = val.IntVal
case *gnmi.TypedValue_StringVal:
value = val.StringVal
case *gnmi.TypedValue_UintVal:
value = val.UintVal
case *gnmi.TypedValue_JsonIetfVal:
jsondata = val.JsonIetfVal
case *gnmi.TypedValue_JsonVal:
jsondata = val.JsonVal
}
name := strings.Replace(path, "-", "_", -1)
fields := make(map[string]interface{})
if value != nil {
fields[name] = value
} else if jsondata != nil {
if err := json.Unmarshal(jsondata, &value); err != nil {
c.acc.AddError(fmt.Errorf("failed to parse JSON value: %v", err))
} else {
flattener := jsonparser.JSONFlattener{Fields: fields}
flattener.FullFlattenJSON(name, value, true, true)
}
}
return aliasPath, fields
}
// Parse path to path-buffer and tag-field
func (c *CiscoTelemetryGNMI) handlePath(path *gnmi.Path, tags map[string]string, prefix string) (string, string) {
var aliasPath string
builder := bytes.NewBufferString(prefix)
// Prefix with origin
if len(path.Origin) > 0 {
builder.WriteString(path.Origin)
builder.WriteRune(':')
}
// Parse generic keys from prefix
for _, elem := range path.Elem {
if len(elem.Name) > 0 {
builder.WriteRune('/')
builder.WriteString(elem.Name)
}
name := builder.String()
if _, exists := c.aliases[name]; exists {
aliasPath = name
}
if tags != nil {
for key, val := range elem.Key {
key = strings.Replace(key, "-", "_", -1)
// Use short-form of key if possible
if _, exists := tags[key]; exists {
tags[name+"/"+key] = val
} else {
tags[key] = val
}
}
}
}
return builder.String(), aliasPath
}
//ParsePath from XPath-like string to GNMI path structure
func parsePath(origin string, path string, target string) (*gnmi.Path, error) {
var err error
gnmiPath := gnmi.Path{Origin: origin, Target: target}
if len(path) > 0 && path[0] != '/' {
return nil, fmt.Errorf("path does not start with a '/': %s", path)
}
elem := &gnmi.PathElem{}
start, name, value, end := 0, -1, -1, -1
path = path + "/"
for i := 0; i < len(path); i++ {
if path[i] == '[' {
if name >= 0 {
break
}
if end < 0 {
end = i
elem.Key = make(map[string]string)
}
name = i + 1
} else if path[i] == '=' {
if name <= 0 || value >= 0 {
break
}
value = i + 1
} else if path[i] == ']' {
if name <= 0 || value <= name {
break
}
elem.Key[path[name:value-1]] = strings.Trim(path[value:i], "'\"")
name, value = -1, -1
} else if path[i] == '/' {
if name < 0 {
if end < 0 {
end = i
}
if end > start {
elem.Name = path[start:end]
gnmiPath.Elem = append(gnmiPath.Elem, elem)
gnmiPath.Element = append(gnmiPath.Element, path[start:i])
}
start, name, value, end = i+1, -1, -1, -1
elem = &gnmi.PathElem{}
}
}
}
if name >= 0 || value >= 0 {
err = fmt.Errorf("Invalid GNMI path: %s", path)
}
if err != nil {
return nil, err
}
return &gnmiPath, nil
}
// Stop listener and cleanup
func (c *CiscoTelemetryGNMI) Stop() {
c.cancel()
c.wg.Wait()
}
const sampleConfig = `
## Address and port of the GNMI GRPC server
addresses = ["10.49.234.114:57777"]
## define credentials
username = "cisco"
password = "cisco"
## GNMI encoding requested (one of: "proto", "json", "json_ietf")
# encoding = "proto"
## redial in case of failures after
redial = "10s"
## enable client-side TLS and define CA to authenticate the device
# enable_tls = true
# tls_ca = "/etc/telegraf/ca.pem"
# insecure_skip_verify = true
## define client-side TLS certificate & key to authenticate to the device
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## GNMI subscription prefix (optional, can usually be left empty)
## See: https://github.com/openconfig/reference/blob/master/rpc/gnmi/gnmi-specification.md#222-paths
# origin = ""
# prefix = ""
# target = ""
## Define additional aliases to map telemetry encoding paths to simple measurement names
#[inputs.cisco_telemetry_gnmi.aliases]
# ifcounters = "openconfig:/interfaces/interface/state/counters"
[[inputs.cisco_telemetry_gnmi.subscription]]
## Name of the measurement that will be emitted
name = "ifcounters"
## Origin and path of the subscription
## See: https://github.com/openconfig/reference/blob/master/rpc/gnmi/gnmi-specification.md#222-paths
##
## origin usually refers to a (YANG) data model implemented by the device
## and path to a specific substructure inside it that should be subscribed to (similar to an XPath)
## YANG models can be found e.g. here: https://github.com/YangModels/yang/tree/master/vendor/cisco/xr
origin = "openconfig-interfaces"
path = "/interfaces/interface/state/counters"
# Subscription mode (one of: "target_defined", "sample", "on_change") and interval
subscription_mode = "sample"
sample_interval = "10s"
## Suppress redundant transmissions when measured values are unchanged
# suppress_redundant = false
## If suppression is enabled, send updates at least every X seconds anyway
# heartbeat_interval = "60s"
`
// SampleConfig of plugin
func (c *CiscoTelemetryGNMI) SampleConfig() string {
return sampleConfig
}
// Description of plugin
func (c *CiscoTelemetryGNMI) Description() string {
return "Cisco GNMI telemetry input plugin based on GNMI telemetry data produced in IOS XR"
}
// Gather plugin measurements (unused)
func (c *CiscoTelemetryGNMI) Gather(_ telegraf.Accumulator) error {
return nil
}
func init() {
inputs.Add("cisco_telemetry_gnmi", func() telegraf.Input {
return &CiscoTelemetryGNMI{
Encoding: "proto",
Redial: internal.Duration{Duration: 10 * time.Second},
}
})
}

View File

@@ -0,0 +1,473 @@
package cisco_telemetry_gnmi
import (
"context"
"errors"
"fmt"
"net"
"sync"
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/testutil"
"github.com/openconfig/gnmi/proto/gnmi"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
)
func TestParsePath(t *testing.T) {
path := "/foo/bar/bla[shoo=woo][shoop=/woop/]/z"
parsed, err := parsePath("theorigin", path, "thetarget")
assert.Nil(t, err)
assert.Equal(t, parsed.Origin, "theorigin")
assert.Equal(t, parsed.Target, "thetarget")
assert.Equal(t, parsed.Element, []string{"foo", "bar", "bla[shoo=woo][shoop=/woop/]", "z"})
assert.Equal(t, parsed.Elem, []*gnmi.PathElem{{Name: "foo"}, {Name: "bar"},
{Name: "bla", Key: map[string]string{"shoo": "woo", "shoop": "/woop/"}}, {Name: "z"}})
parsed, err = parsePath("", "", "")
assert.Nil(t, err)
assert.Equal(t, *parsed, gnmi.Path{})
parsed, err = parsePath("", "/foo[[", "")
assert.Nil(t, parsed)
assert.Equal(t, errors.New("Invalid GNMI path: /foo[[/"), err)
}
type MockServer struct {
SubscribeF func(gnmi.GNMI_SubscribeServer) error
GRPCServer *grpc.Server
}
func (s *MockServer) Capabilities(context.Context, *gnmi.CapabilityRequest) (*gnmi.CapabilityResponse, error) {
return nil, nil
}
func (s *MockServer) Get(context.Context, *gnmi.GetRequest) (*gnmi.GetResponse, error) {
return nil, nil
}
func (s *MockServer) Set(context.Context, *gnmi.SetRequest) (*gnmi.SetResponse, error) {
return nil, nil
}
func (s *MockServer) Subscribe(server gnmi.GNMI_SubscribeServer) error {
return s.SubscribeF(server)
}
func TestWaitError(t *testing.T) {
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
grpcServer := grpc.NewServer()
gnmiServer := &MockServer{
SubscribeF: func(server gnmi.GNMI_SubscribeServer) error {
return fmt.Errorf("testerror")
},
GRPCServer: grpcServer,
}
gnmi.RegisterGNMIServer(grpcServer, gnmiServer)
plugin := &CiscoTelemetryGNMI{
Log: testutil.Logger{},
Addresses: []string{listener.Addr().String()},
Encoding: "proto",
Redial: internal.Duration{Duration: 1 * time.Second},
}
var acc testutil.Accumulator
err = plugin.Start(&acc)
require.NoError(t, err)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
err := grpcServer.Serve(listener)
require.NoError(t, err)
}()
acc.WaitError(1)
plugin.Stop()
grpcServer.Stop()
wg.Wait()
require.Contains(t, acc.Errors,
errors.New("aborted GNMI subscription: rpc error: code = Unknown desc = testerror"))
}
func TestUsernamePassword(t *testing.T) {
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
grpcServer := grpc.NewServer()
gnmiServer := &MockServer{
SubscribeF: func(server gnmi.GNMI_SubscribeServer) error {
metadata, ok := metadata.FromIncomingContext(server.Context())
if !ok {
return errors.New("failed to get metadata")
}
username := metadata.Get("username")
if len(username) != 1 || username[0] != "theusername" {
return errors.New("wrong username")
}
password := metadata.Get("password")
if len(password) != 1 || password[0] != "thepassword" {
return errors.New("wrong password")
}
return errors.New("success")
},
GRPCServer: grpcServer,
}
gnmi.RegisterGNMIServer(grpcServer, gnmiServer)
plugin := &CiscoTelemetryGNMI{
Log: testutil.Logger{},
Addresses: []string{listener.Addr().String()},
Username: "theusername",
Password: "thepassword",
Encoding: "proto",
Redial: internal.Duration{Duration: 1 * time.Second},
}
var acc testutil.Accumulator
err = plugin.Start(&acc)
require.NoError(t, err)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
err := grpcServer.Serve(listener)
require.NoError(t, err)
}()
acc.WaitError(1)
plugin.Stop()
grpcServer.Stop()
wg.Wait()
require.Contains(t, acc.Errors,
errors.New("aborted GNMI subscription: rpc error: code = Unknown desc = success"))
}
func mockGNMINotification() *gnmi.Notification {
return &gnmi.Notification{
Timestamp: 1543236572000000000,
Prefix: &gnmi.Path{
Origin: "type",
Elem: []*gnmi.PathElem{
{
Name: "model",
Key: map[string]string{"foo": "bar"},
},
},
Target: "subscription",
},
Update: []*gnmi.Update{
{
Path: &gnmi.Path{
Elem: []*gnmi.PathElem{
{Name: "some"},
{
Name: "path",
Key: map[string]string{"name": "str", "uint64": "1234"}},
},
},
Val: &gnmi.TypedValue{Value: &gnmi.TypedValue_IntVal{IntVal: 5678}},
},
{
Path: &gnmi.Path{
Elem: []*gnmi.PathElem{
{Name: "other"},
{Name: "path"},
},
},
Val: &gnmi.TypedValue{Value: &gnmi.TypedValue_StringVal{StringVal: "foobar"}},
},
{
Path: &gnmi.Path{
Elem: []*gnmi.PathElem{
{Name: "other"},
{Name: "this"},
},
},
Val: &gnmi.TypedValue{Value: &gnmi.TypedValue_StringVal{StringVal: "that"}},
},
},
}
}
func TestNotification(t *testing.T) {
tests := []struct {
name string
plugin *CiscoTelemetryGNMI
server *MockServer
expected []telegraf.Metric
}{
{
name: "multiple metrics",
plugin: &CiscoTelemetryGNMI{
Log: testutil.Logger{},
Encoding: "proto",
Redial: internal.Duration{Duration: 1 * time.Second},
Subscriptions: []Subscription{
{
Name: "alias",
Origin: "type",
Path: "/model",
SubscriptionMode: "sample",
},
},
},
server: &MockServer{
SubscribeF: func(server gnmi.GNMI_SubscribeServer) error {
notification := mockGNMINotification()
server.Send(&gnmi.SubscribeResponse{Response: &gnmi.SubscribeResponse_Update{Update: notification}})
server.Send(&gnmi.SubscribeResponse{Response: &gnmi.SubscribeResponse_SyncResponse{SyncResponse: true}})
notification.Prefix.Elem[0].Key["foo"] = "bar2"
notification.Update[0].Path.Elem[1].Key["name"] = "str2"
notification.Update[0].Val = &gnmi.TypedValue{Value: &gnmi.TypedValue_JsonVal{JsonVal: []byte{'"', '1', '2', '3', '"'}}}
server.Send(&gnmi.SubscribeResponse{Response: &gnmi.SubscribeResponse_Update{Update: notification}})
return nil
},
},
expected: []telegraf.Metric{
testutil.MustMetric(
"alias",
map[string]string{
"path": "type:/model",
"source": "127.0.0.1",
"foo": "bar",
"name": "str",
"uint64": "1234",
},
map[string]interface{}{
"some/path": int64(5678),
},
time.Unix(0, 0),
),
testutil.MustMetric(
"alias",
map[string]string{
"path": "type:/model",
"source": "127.0.0.1",
"foo": "bar",
},
map[string]interface{}{
"other/path": "foobar",
"other/this": "that",
},
time.Unix(0, 0),
),
testutil.MustMetric(
"alias",
map[string]string{
"path": "type:/model",
"foo": "bar2",
"source": "127.0.0.1",
"name": "str2",
"uint64": "1234",
},
map[string]interface{}{
"some/path": "123",
},
time.Unix(0, 0),
),
testutil.MustMetric(
"alias",
map[string]string{
"path": "type:/model",
"source": "127.0.0.1",
"foo": "bar2",
},
map[string]interface{}{
"other/path": "foobar",
"other/this": "that",
},
time.Unix(0, 0),
),
},
},
{
name: "full path field key",
plugin: &CiscoTelemetryGNMI{
Log: testutil.Logger{},
Encoding: "proto",
Redial: internal.Duration{Duration: 1 * time.Second},
Subscriptions: []Subscription{
{
Name: "PHY_COUNTERS",
Origin: "type",
Path: "/state/port[port-id=*]/ethernet/oper-speed",
SubscriptionMode: "sample",
},
},
},
server: &MockServer{
SubscribeF: func(server gnmi.GNMI_SubscribeServer) error {
response := &gnmi.SubscribeResponse{
Response: &gnmi.SubscribeResponse_Update{
Update: &gnmi.Notification{
Timestamp: 1543236572000000000,
Prefix: &gnmi.Path{
Origin: "type",
Elem: []*gnmi.PathElem{
{
Name: "state",
},
{
Name: "port",
Key: map[string]string{"port-id": "1"},
},
{
Name: "ethernet",
},
{
Name: "oper-speed",
},
},
Target: "subscription",
},
Update: []*gnmi.Update{
{
Path: &gnmi.Path{},
Val: &gnmi.TypedValue{
Value: &gnmi.TypedValue_IntVal{IntVal: 42},
},
},
},
},
},
}
server.Send(response)
return nil
},
},
expected: []telegraf.Metric{
testutil.MustMetric(
"PHY_COUNTERS",
map[string]string{
"path": "type:/state/port/ethernet/oper-speed",
"source": "127.0.0.1",
"port_id": "1",
},
map[string]interface{}{
"oper_speed": 42,
},
time.Unix(0, 0),
),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
tt.plugin.Addresses = []string{listener.Addr().String()}
grpcServer := grpc.NewServer()
tt.server.GRPCServer = grpcServer
gnmi.RegisterGNMIServer(grpcServer, tt.server)
var acc testutil.Accumulator
err = tt.plugin.Start(&acc)
require.NoError(t, err)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
err := grpcServer.Serve(listener)
require.NoError(t, err)
}()
acc.Wait(len(tt.expected))
tt.plugin.Stop()
grpcServer.Stop()
wg.Wait()
testutil.RequireMetricsEqual(t, tt.expected, acc.GetTelegrafMetrics(),
testutil.IgnoreTime())
})
}
}
func TestRedial(t *testing.T) {
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
plugin := &CiscoTelemetryGNMI{
Log: testutil.Logger{},
Addresses: []string{listener.Addr().String()},
Encoding: "proto",
Redial: internal.Duration{Duration: 10 * time.Millisecond},
}
grpcServer := grpc.NewServer()
gnmiServer := &MockServer{
SubscribeF: func(server gnmi.GNMI_SubscribeServer) error {
notification := mockGNMINotification()
server.Send(&gnmi.SubscribeResponse{Response: &gnmi.SubscribeResponse_Update{Update: notification}})
return nil
},
GRPCServer: grpcServer,
}
gnmi.RegisterGNMIServer(grpcServer, gnmiServer)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
err := grpcServer.Serve(listener)
require.NoError(t, err)
}()
var acc testutil.Accumulator
err = plugin.Start(&acc)
require.NoError(t, err)
acc.Wait(2)
grpcServer.Stop()
wg.Wait()
// Restart GNMI server at the same address
listener, err = net.Listen("tcp", listener.Addr().String())
require.NoError(t, err)
grpcServer = grpc.NewServer()
gnmiServer = &MockServer{
SubscribeF: func(server gnmi.GNMI_SubscribeServer) error {
notification := mockGNMINotification()
notification.Prefix.Elem[0].Key["foo"] = "bar2"
notification.Update[0].Path.Elem[1].Key["name"] = "str2"
notification.Update[0].Val = &gnmi.TypedValue{Value: &gnmi.TypedValue_BoolVal{BoolVal: false}}
server.Send(&gnmi.SubscribeResponse{Response: &gnmi.SubscribeResponse_Update{Update: notification}})
return nil
},
GRPCServer: grpcServer,
}
gnmi.RegisterGNMIServer(grpcServer, gnmiServer)
wg.Add(1)
go func() {
defer wg.Done()
err := grpcServer.Serve(listener)
require.NoError(t, err)
}()
acc.Wait(4)
plugin.Stop()
grpcServer.Stop()
wg.Wait()
}

View File

@@ -0,0 +1,44 @@
# Cisco model-driven telemetry (MDT)
Cisco model-driven telemetry (MDT) is an input plugin that consumes
telemetry data from Cisco IOS XR, IOS XE and NX-OS platforms. It supports TCP & GRPC dialout transports.
GRPC-based transport can utilize TLS for authentication and encryption.
Telemetry data is expected to be GPB-KV (self-describing-gpb) encoded.
The GRPC dialout transport is supported on various IOS XR (64-bit) 6.1.x and later, IOS XE 16.10 and later, as well as NX-OS 7.x and later platforms.
The TCP dialout transport is supported on IOS XR (32-bit and 64-bit) 6.1.x and later.
### Configuration:
```toml
[[inputs.cisco_telemetry_mdt]]
## Telemetry transport can be "tcp" or "grpc". TLS is only supported when
## using the grpc transport.
transport = "grpc"
## Address and port to host telemetry listener
service_address = ":57000"
## Enable TLS; grpc transport only.
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Enable TLS client authentication and define allowed CA certificates; grpc
## transport only.
# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"]
## Define (for certain nested telemetry measurements with embedded tags) which fields are tags
# embedded_tags = ["Cisco-IOS-XR-qos-ma-oper:qos/interface-table/interface/input/service-policy-names/service-policy-instance/statistics/class-stats/class-name"]
## Define aliases to map telemetry encoding paths to simple measurement names
[inputs.cisco_telemetry_mdt.aliases]
ifstats = "ietf-interfaces:interfaces-state/interface/statistics"
```
### Example Output:
```
ifstats,path=ietf-interfaces:interfaces-state/interface/statistics,host=linux,name=GigabitEthernet2,source=csr1kv,subscription=101 in-unicast-pkts=27i,in-multicast-pkts=0i,discontinuity-time="2019-05-23T07:40:23.000362+00:00",in-octets=5233i,in-errors=0i,out-multicast-pkts=0i,out-discards=0i,in-broadcast-pkts=0i,in-discards=0i,in-unknown-protos=0i,out-unicast-pkts=0i,out-broadcast-pkts=0i,out-octets=0i,out-errors=0i 1559150462624000000
ifstats,path=ietf-interfaces:interfaces-state/interface/statistics,host=linux,name=GigabitEthernet1,source=csr1kv,subscription=101 in-octets=3394770806i,in-broadcast-pkts=0i,in-multicast-pkts=0i,out-broadcast-pkts=0i,in-unknown-protos=0i,out-octets=350212i,in-unicast-pkts=9477273i,in-discards=0i,out-unicast-pkts=2726i,out-discards=0i,discontinuity-time="2019-05-23T07:40:23.000363+00:00",in-errors=30i,out-multicast-pkts=0i,out-errors=0i 1559150462624000000
```

View File

@@ -0,0 +1,558 @@
package cisco_telemetry_mdt
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"net"
"path"
"strconv"
"strings"
"sync"
"time"
dialout "github.com/cisco-ie/nx-telemetry-proto/mdt_dialout"
telemetry "github.com/cisco-ie/nx-telemetry-proto/telemetry_bis"
"github.com/golang/protobuf/proto"
"github.com/influxdata/telegraf"
internaltls "github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/inputs"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials" // Register GRPC gzip decoder to support compressed telemetry
_ "google.golang.org/grpc/encoding/gzip"
"google.golang.org/grpc/peer"
)
const (
// Maximum telemetry payload size (in bytes) to accept for GRPC dialout transport
tcpMaxMsgLen uint32 = 1024 * 1024
)
// CiscoTelemetryMDT plugin for IOS XR, IOS XE and NXOS platforms
type CiscoTelemetryMDT struct {
// Common configuration
Transport string
ServiceAddress string `toml:"service_address"`
MaxMsgSize int `toml:"max_msg_size"`
Aliases map[string]string `toml:"aliases"`
EmbeddedTags []string `toml:"embedded_tags"`
Log telegraf.Logger
// GRPC TLS settings
internaltls.ServerConfig
// Internal listener / client handle
grpcServer *grpc.Server
listener net.Listener
// Internal state
aliases map[string]string
warned map[string]struct{}
extraTags map[string]map[string]struct{}
mutex sync.Mutex
acc telegraf.Accumulator
wg sync.WaitGroup
}
// Start the Cisco MDT service
func (c *CiscoTelemetryMDT) Start(acc telegraf.Accumulator) error {
var err error
c.acc = acc
c.listener, err = net.Listen("tcp", c.ServiceAddress)
if err != nil {
return err
}
// Invert aliases list
c.warned = make(map[string]struct{})
c.aliases = make(map[string]string, len(c.Aliases))
for alias, path := range c.Aliases {
c.aliases[path] = alias
}
// Fill extra tags
c.extraTags = make(map[string]map[string]struct{})
for _, tag := range c.EmbeddedTags {
dir := strings.Replace(path.Dir(tag), "-", "_", -1)
if _, hasKey := c.extraTags[dir]; !hasKey {
c.extraTags[dir] = make(map[string]struct{})
}
c.extraTags[dir][path.Base(tag)] = struct{}{}
}
switch c.Transport {
case "tcp":
// TCP dialout server accept routine
c.wg.Add(1)
go func() {
c.acceptTCPClients()
c.wg.Done()
}()
case "grpc":
var opts []grpc.ServerOption
tlsConfig, err := c.ServerConfig.TLSConfig()
if err != nil {
c.listener.Close()
return err
} else if tlsConfig != nil {
opts = append(opts, grpc.Creds(credentials.NewTLS(tlsConfig)))
}
if c.MaxMsgSize > 0 {
opts = append(opts, grpc.MaxRecvMsgSize(c.MaxMsgSize))
}
c.grpcServer = grpc.NewServer(opts...)
dialout.RegisterGRPCMdtDialoutServer(c.grpcServer, c)
c.wg.Add(1)
go func() {
c.grpcServer.Serve(c.listener)
c.wg.Done()
}()
default:
c.listener.Close()
return fmt.Errorf("invalid Cisco MDT transport: %s", c.Transport)
}
return nil
}
// AcceptTCPDialoutClients defines the TCP dialout server main routine
func (c *CiscoTelemetryMDT) acceptTCPClients() {
// Keep track of all active connections, so we can close them if necessary
var mutex sync.Mutex
clients := make(map[net.Conn]struct{})
for {
conn, err := c.listener.Accept()
if neterr, ok := err.(*net.OpError); ok && (neterr.Timeout() || neterr.Temporary()) {
continue
} else if err != nil {
break // Stop() will close the connection so Accept() will fail here
}
mutex.Lock()
clients[conn] = struct{}{}
mutex.Unlock()
// Individual client connection routine
c.wg.Add(1)
go func() {
c.Log.Debugf("Accepted Cisco MDT TCP dialout connection from %s", conn.RemoteAddr())
if err := c.handleTCPClient(conn); err != nil {
c.acc.AddError(err)
}
c.Log.Debugf("Closed Cisco MDT TCP dialout connection from %s", conn.RemoteAddr())
mutex.Lock()
delete(clients, conn)
mutex.Unlock()
conn.Close()
c.wg.Done()
}()
}
// Close all remaining client connections
mutex.Lock()
for client := range clients {
if err := client.Close(); err != nil {
c.Log.Errorf("Failed to close TCP dialout client: %v", err)
}
}
mutex.Unlock()
}
// Handle a TCP telemetry client
func (c *CiscoTelemetryMDT) handleTCPClient(conn net.Conn) error {
// TCP Dialout telemetry framing header
var hdr struct {
MsgType uint16
MsgEncap uint16
MsgHdrVersion uint16
MsgFlags uint16
MsgLen uint32
}
var payload bytes.Buffer
for {
// Read and validate dialout telemetry header
if err := binary.Read(conn, binary.BigEndian, &hdr); err != nil {
return err
}
maxMsgSize := tcpMaxMsgLen
if c.MaxMsgSize > 0 {
maxMsgSize = uint32(c.MaxMsgSize)
}
if hdr.MsgLen > maxMsgSize {
return fmt.Errorf("dialout packet too long: %v", hdr.MsgLen)
} else if hdr.MsgFlags != 0 {
return fmt.Errorf("invalid dialout flags: %v", hdr.MsgFlags)
}
// Read and handle telemetry packet
payload.Reset()
if size, err := payload.ReadFrom(io.LimitReader(conn, int64(hdr.MsgLen))); size != int64(hdr.MsgLen) {
if err != nil {
return err
}
return fmt.Errorf("TCP dialout premature EOF")
}
c.handleTelemetry(payload.Bytes())
}
}
// MdtDialout RPC server method for grpc-dialout transport
func (c *CiscoTelemetryMDT) MdtDialout(stream dialout.GRPCMdtDialout_MdtDialoutServer) error {
peer, peerOK := peer.FromContext(stream.Context())
if peerOK {
c.Log.Debugf("Accepted Cisco MDT GRPC dialout connection from %s", peer.Addr)
}
var chunkBuffer bytes.Buffer
for {
packet, err := stream.Recv()
if err != nil {
if err != io.EOF {
c.acc.AddError(fmt.Errorf("GRPC dialout receive error: %v", err))
}
break
}
if len(packet.Data) == 0 && len(packet.Errors) != 0 {
c.acc.AddError(fmt.Errorf("GRPC dialout error: %s", packet.Errors))
break
}
// Reassemble chunked telemetry data received from NX-OS
if packet.TotalSize == 0 {
c.handleTelemetry(packet.Data)
} else if int(packet.TotalSize) <= c.MaxMsgSize {
chunkBuffer.Write(packet.Data)
if chunkBuffer.Len() >= int(packet.TotalSize) {
c.handleTelemetry(chunkBuffer.Bytes())
chunkBuffer.Reset()
}
} else {
c.acc.AddError(fmt.Errorf("dropped too large packet: %dB > %dB", packet.TotalSize, c.MaxMsgSize))
}
}
if peerOK {
c.Log.Debugf("Closed Cisco MDT GRPC dialout connection from %s", peer.Addr)
}
return nil
}
// Handle telemetry packet from any transport, decode and add as measurement
func (c *CiscoTelemetryMDT) handleTelemetry(data []byte) {
msg := &telemetry.Telemetry{}
err := proto.Unmarshal(data, msg)
if err != nil {
c.acc.AddError(fmt.Errorf("Cisco MDT failed to decode: %v", err))
return
}
grouper := metric.NewSeriesGrouper()
for _, gpbkv := range msg.DataGpbkv {
// Produce metadata tags
var tags map[string]string
// Top-level field may have measurement timestamp, if not use message timestamp
measured := gpbkv.Timestamp
if measured == 0 {
measured = msg.MsgTimestamp
}
timestamp := time.Unix(int64(measured/1000), int64(measured%1000)*1000000)
// Find toplevel GPBKV fields "keys" and "content"
var keys, content *telemetry.TelemetryField = nil, nil
for _, field := range gpbkv.Fields {
if field.Name == "keys" {
keys = field
} else if field.Name == "content" {
content = field
}
}
if keys == nil || content == nil {
c.Log.Infof("Message from %s missing keys or content", msg.GetNodeIdStr())
continue
}
// Parse keys
tags = make(map[string]string, len(keys.Fields)+3)
tags["source"] = msg.GetNodeIdStr()
tags["subscription"] = msg.GetSubscriptionIdStr()
tags["path"] = msg.GetEncodingPath()
for _, subfield := range keys.Fields {
c.parseKeyField(tags, subfield, "")
}
// Parse values
for _, subfield := range content.Fields {
c.parseContentField(grouper, subfield, "", msg.EncodingPath, tags, timestamp)
}
}
for _, metric := range grouper.Metrics() {
c.acc.AddMetric(metric)
}
}
func decodeValue(field *telemetry.TelemetryField) interface{} {
switch val := field.ValueByType.(type) {
case *telemetry.TelemetryField_BytesValue:
return val.BytesValue
case *telemetry.TelemetryField_StringValue:
if len(val.StringValue) > 0 {
return val.StringValue
}
case *telemetry.TelemetryField_BoolValue:
return val.BoolValue
case *telemetry.TelemetryField_Uint32Value:
return val.Uint32Value
case *telemetry.TelemetryField_Uint64Value:
return val.Uint64Value
case *telemetry.TelemetryField_Sint32Value:
return val.Sint32Value
case *telemetry.TelemetryField_Sint64Value:
return val.Sint64Value
case *telemetry.TelemetryField_DoubleValue:
return val.DoubleValue
case *telemetry.TelemetryField_FloatValue:
return val.FloatValue
}
return nil
}
func decodeTag(field *telemetry.TelemetryField) string {
switch val := field.ValueByType.(type) {
case *telemetry.TelemetryField_BytesValue:
return string(val.BytesValue)
case *telemetry.TelemetryField_StringValue:
return val.StringValue
case *telemetry.TelemetryField_BoolValue:
if val.BoolValue {
return "true"
}
return "false"
case *telemetry.TelemetryField_Uint32Value:
return strconv.FormatUint(uint64(val.Uint32Value), 10)
case *telemetry.TelemetryField_Uint64Value:
return strconv.FormatUint(val.Uint64Value, 10)
case *telemetry.TelemetryField_Sint32Value:
return strconv.FormatInt(int64(val.Sint32Value), 10)
case *telemetry.TelemetryField_Sint64Value:
return strconv.FormatInt(val.Sint64Value, 10)
case *telemetry.TelemetryField_DoubleValue:
return strconv.FormatFloat(val.DoubleValue, 'f', -1, 64)
case *telemetry.TelemetryField_FloatValue:
return strconv.FormatFloat(float64(val.FloatValue), 'f', -1, 32)
default:
return ""
}
}
// Recursively parse tag fields
func (c *CiscoTelemetryMDT) parseKeyField(tags map[string]string, field *telemetry.TelemetryField, prefix string) {
localname := strings.Replace(field.Name, "-", "_", -1)
name := localname
if len(localname) == 0 {
name = prefix
} else if len(prefix) > 0 {
name = prefix + "/" + localname
}
if tag := decodeTag(field); len(name) > 0 && len(tag) > 0 {
if _, exists := tags[localname]; !exists { // Use short keys whenever possible
tags[localname] = tag
} else {
tags[name] = tag
}
}
for _, subfield := range field.Fields {
c.parseKeyField(tags, subfield, name)
}
}
func (c *CiscoTelemetryMDT) parseContentField(grouper *metric.SeriesGrouper, field *telemetry.TelemetryField, prefix string,
path string, tags map[string]string, timestamp time.Time) {
name := strings.Replace(field.Name, "-", "_", -1)
if len(name) == 0 {
name = prefix
} else if len(prefix) > 0 {
name = prefix + "/" + name
}
extraTags := c.extraTags[strings.Replace(path, "-", "_", -1)+"/"+name]
if value := decodeValue(field); value != nil {
// Do alias lookup, to shorten measurement names
measurement := path
if alias, ok := c.aliases[path]; ok {
measurement = alias
} else {
c.mutex.Lock()
if _, haveWarned := c.warned[path]; !haveWarned {
c.Log.Debugf("No measurement alias for encoding path: %s", path)
c.warned[path] = struct{}{}
}
c.mutex.Unlock()
}
grouper.Add(measurement, tags, timestamp, name, value)
return
}
if len(extraTags) > 0 {
for _, subfield := range field.Fields {
if _, isExtraTag := extraTags[subfield.Name]; isExtraTag {
tags[name+"/"+strings.Replace(subfield.Name, "-", "_", -1)] = decodeTag(subfield)
}
}
}
var nxAttributes, nxChildren, nxRows *telemetry.TelemetryField
isNXOS := !strings.ContainsRune(path, ':') // IOS-XR and IOS-XE have a colon in their encoding path, NX-OS does not
for _, subfield := range field.Fields {
if isNXOS && subfield.Name == "attributes" && len(subfield.Fields) > 0 {
nxAttributes = subfield.Fields[0]
} else if isNXOS && subfield.Name == "children" && len(subfield.Fields) > 0 {
nxChildren = subfield
} else if isNXOS && strings.HasPrefix(subfield.Name, "ROW_") {
nxRows = subfield
} else if _, isExtraTag := extraTags[subfield.Name]; !isExtraTag { // Regular telemetry decoding
c.parseContentField(grouper, subfield, name, path, tags, timestamp)
}
}
if nxAttributes == nil && nxRows == nil {
return
} else if nxRows != nil {
// NXAPI structure: https://developer.cisco.com/docs/cisco-nexus-9000-series-nx-api-cli-reference-release-9-2x/
for _, row := range nxRows.Fields {
for i, subfield := range row.Fields {
if i == 0 { // First subfield contains the index, promote it from value to tag
tags[prefix] = decodeTag(subfield)
} else {
c.parseContentField(grouper, subfield, "", path, tags, timestamp)
}
}
delete(tags, prefix)
}
return
}
// DME structure: https://developer.cisco.com/site/nxapi-dme-model-reference-api/
rn := ""
dn := false
for _, subfield := range nxAttributes.Fields {
if subfield.Name == "rn" {
rn = decodeTag(subfield)
} else if subfield.Name == "dn" {
dn = true
}
}
if len(rn) > 0 {
tags[prefix] = rn
} else if !dn { // Check for distinguished name being present
c.acc.AddError(fmt.Errorf("NX-OS decoding failed: missing dn field"))
return
}
for _, subfield := range nxAttributes.Fields {
if subfield.Name != "rn" {
c.parseContentField(grouper, subfield, "", path, tags, timestamp)
}
}
if nxChildren != nil {
// This is a nested structure, children will inherit relative name keys of parent
for _, subfield := range nxChildren.Fields {
c.parseContentField(grouper, subfield, prefix, path, tags, timestamp)
}
}
delete(tags, prefix)
}
func (c *CiscoTelemetryMDT) Address() net.Addr {
return c.listener.Addr()
}
// Stop listener and cleanup
func (c *CiscoTelemetryMDT) Stop() {
if c.grpcServer != nil {
// Stop server and terminate all running dialout routines
c.grpcServer.Stop()
}
if c.listener != nil {
c.listener.Close()
}
c.wg.Wait()
}
const sampleConfig = `
## Telemetry transport can be "tcp" or "grpc". TLS is only supported when
## using the grpc transport.
transport = "grpc"
## Address and port to host telemetry listener
service_address = ":57000"
## Enable TLS; grpc transport only.
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Enable TLS client authentication and define allowed CA certificates; grpc
## transport only.
# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"]
## Define (for certain nested telemetry measurements with embedded tags) which fields are tags
# embedded_tags = ["Cisco-IOS-XR-qos-ma-oper:qos/interface-table/interface/input/service-policy-names/service-policy-instance/statistics/class-stats/class-name"]
## Define aliases to map telemetry encoding paths to simple measurement names
[inputs.cisco_telemetry_mdt.aliases]
ifstats = "ietf-interfaces:interfaces-state/interface/statistics"
`
// SampleConfig of plugin
func (c *CiscoTelemetryMDT) SampleConfig() string {
return sampleConfig
}
// Description of plugin
func (c *CiscoTelemetryMDT) Description() string {
return "Cisco model-driven telemetry (MDT) input plugin for IOS XR, IOS XE and NX-OS platforms"
}
// Gather plugin measurements (unused)
func (c *CiscoTelemetryMDT) Gather(_ telegraf.Accumulator) error {
return nil
}
func init() {
inputs.Add("cisco_telemetry_mdt", func() telegraf.Input {
return &CiscoTelemetryMDT{
Transport: "grpc",
ServiceAddress: "127.0.0.1:57000",
}
})
}

View File

@@ -0,0 +1,592 @@
package cisco_telemetry_mdt
import (
"context"
"encoding/binary"
"errors"
"net"
"testing"
dialout "github.com/cisco-ie/nx-telemetry-proto/mdt_dialout"
telemetry "github.com/cisco-ie/nx-telemetry-proto/telemetry_bis"
"github.com/golang/protobuf/proto"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
)
func TestHandleTelemetryTwoSimple(t *testing.T) {
c := &CiscoTelemetryMDT{Log: testutil.Logger{}, Transport: "dummy", Aliases: map[string]string{"alias": "type:model/some/path"}}
acc := &testutil.Accumulator{}
err := c.Start(acc)
// error is expected since we are passing in dummy transport
require.Error(t, err)
telemetry := &telemetry.Telemetry{
MsgTimestamp: 1543236572000,
EncodingPath: "type:model/some/path",
NodeId: &telemetry.Telemetry_NodeIdStr{NodeIdStr: "hostname"},
Subscription: &telemetry.Telemetry_SubscriptionIdStr{SubscriptionIdStr: "subscription"},
DataGpbkv: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "keys",
Fields: []*telemetry.TelemetryField{
{
Name: "name",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "str"},
},
{
Name: "uint64",
ValueByType: &telemetry.TelemetryField_Uint64Value{Uint64Value: 1234},
},
},
},
{
Name: "content",
Fields: []*telemetry.TelemetryField{
{
Name: "bool",
ValueByType: &telemetry.TelemetryField_BoolValue{BoolValue: true},
},
},
},
},
},
{
Fields: []*telemetry.TelemetryField{
{
Name: "keys",
Fields: []*telemetry.TelemetryField{
{
Name: "name",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "str2"},
},
},
},
{
Name: "content",
Fields: []*telemetry.TelemetryField{
{
Name: "bool",
ValueByType: &telemetry.TelemetryField_BoolValue{BoolValue: false},
},
},
},
},
},
},
}
data, _ := proto.Marshal(telemetry)
c.handleTelemetry(data)
require.Empty(t, acc.Errors)
tags := map[string]string{"path": "type:model/some/path", "name": "str", "uint64": "1234", "source": "hostname", "subscription": "subscription"}
fields := map[string]interface{}{"bool": true}
acc.AssertContainsTaggedFields(t, "alias", fields, tags)
tags = map[string]string{"path": "type:model/some/path", "name": "str2", "source": "hostname", "subscription": "subscription"}
fields = map[string]interface{}{"bool": false}
acc.AssertContainsTaggedFields(t, "alias", fields, tags)
}
func TestHandleTelemetrySingleNested(t *testing.T) {
c := &CiscoTelemetryMDT{Log: testutil.Logger{}, Transport: "dummy", Aliases: map[string]string{"nested": "type:model/nested/path"}}
acc := &testutil.Accumulator{}
err := c.Start(acc)
// error is expected since we are passing in dummy transport
require.Error(t, err)
telemetry := &telemetry.Telemetry{
MsgTimestamp: 1543236572000,
EncodingPath: "type:model/nested/path",
NodeId: &telemetry.Telemetry_NodeIdStr{NodeIdStr: "hostname"},
Subscription: &telemetry.Telemetry_SubscriptionIdStr{SubscriptionIdStr: "subscription"},
DataGpbkv: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "keys",
Fields: []*telemetry.TelemetryField{
{
Name: "nested",
Fields: []*telemetry.TelemetryField{
{
Name: "key",
Fields: []*telemetry.TelemetryField{
{
Name: "level",
ValueByType: &telemetry.TelemetryField_DoubleValue{DoubleValue: 3},
},
},
},
},
},
},
},
{
Name: "content",
Fields: []*telemetry.TelemetryField{
{
Name: "nested",
Fields: []*telemetry.TelemetryField{
{
Name: "value",
Fields: []*telemetry.TelemetryField{
{
Name: "foo",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "bar"},
},
},
},
},
},
},
},
},
},
},
}
data, _ := proto.Marshal(telemetry)
c.handleTelemetry(data)
require.Empty(t, acc.Errors)
tags := map[string]string{"path": "type:model/nested/path", "level": "3", "source": "hostname", "subscription": "subscription"}
fields := map[string]interface{}{"nested/value/foo": "bar"}
acc.AssertContainsTaggedFields(t, "nested", fields, tags)
}
func TestHandleEmbeddedTags(t *testing.T) {
c := &CiscoTelemetryMDT{Transport: "dummy", Aliases: map[string]string{"extra": "type:model/extra"}, EmbeddedTags: []string{"type:model/extra/list/name"}}
acc := &testutil.Accumulator{}
err := c.Start(acc)
// error is expected since we are passing in dummy transport
require.Error(t, err)
telemetry := &telemetry.Telemetry{
MsgTimestamp: 1543236572000,
EncodingPath: "type:model/extra",
NodeId: &telemetry.Telemetry_NodeIdStr{NodeIdStr: "hostname"},
Subscription: &telemetry.Telemetry_SubscriptionIdStr{SubscriptionIdStr: "subscription"},
DataGpbkv: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "keys",
Fields: []*telemetry.TelemetryField{
{
Name: "foo",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "bar"},
},
},
},
{
Name: "content",
Fields: []*telemetry.TelemetryField{
{
Name: "list",
Fields: []*telemetry.TelemetryField{
{
Name: "name",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "entry1"},
},
{
Name: "test",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "foo"},
},
},
},
{
Name: "list",
Fields: []*telemetry.TelemetryField{
{
Name: "name",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "entry2"},
},
{
Name: "test",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "bar"},
},
},
},
},
},
},
},
},
}
data, _ := proto.Marshal(telemetry)
c.handleTelemetry(data)
require.Empty(t, acc.Errors)
tags1 := map[string]string{"path": "type:model/extra", "foo": "bar", "source": "hostname", "subscription": "subscription", "list/name": "entry1"}
fields1 := map[string]interface{}{"list/test": "foo"}
tags2 := map[string]string{"path": "type:model/extra", "foo": "bar", "source": "hostname", "subscription": "subscription", "list/name": "entry2"}
fields2 := map[string]interface{}{"list/test": "bar"}
acc.AssertContainsTaggedFields(t, "extra", fields1, tags1)
acc.AssertContainsTaggedFields(t, "extra", fields2, tags2)
}
func TestHandleNXAPI(t *testing.T) {
c := &CiscoTelemetryMDT{Transport: "dummy", Aliases: map[string]string{"nxapi": "show nxapi"}}
acc := &testutil.Accumulator{}
err := c.Start(acc)
// error is expected since we are passing in dummy transport
require.Error(t, err)
telemetry := &telemetry.Telemetry{
MsgTimestamp: 1543236572000,
EncodingPath: "show nxapi",
NodeId: &telemetry.Telemetry_NodeIdStr{NodeIdStr: "hostname"},
Subscription: &telemetry.Telemetry_SubscriptionIdStr{SubscriptionIdStr: "subscription"},
DataGpbkv: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "keys",
Fields: []*telemetry.TelemetryField{
{
Name: "foo",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "bar"},
},
},
},
{
Name: "content",
Fields: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "TABLE_nxapi",
Fields: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "ROW_nxapi",
Fields: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "index",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "i1"},
},
{
Name: "value",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "foo"},
},
},
},
{
Fields: []*telemetry.TelemetryField{
{
Name: "index",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "i2"},
},
{
Name: "value",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "bar"},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
}
data, _ := proto.Marshal(telemetry)
c.handleTelemetry(data)
require.Empty(t, acc.Errors)
tags1 := map[string]string{"path": "show nxapi", "foo": "bar", "TABLE_nxapi": "i1", "source": "hostname", "subscription": "subscription"}
fields1 := map[string]interface{}{"value": "foo"}
tags2 := map[string]string{"path": "show nxapi", "foo": "bar", "TABLE_nxapi": "i2", "source": "hostname", "subscription": "subscription"}
fields2 := map[string]interface{}{"value": "bar"}
acc.AssertContainsTaggedFields(t, "nxapi", fields1, tags1)
acc.AssertContainsTaggedFields(t, "nxapi", fields2, tags2)
}
func TestHandleNXDME(t *testing.T) {
c := &CiscoTelemetryMDT{Transport: "dummy", Aliases: map[string]string{"dme": "sys/dme"}}
acc := &testutil.Accumulator{}
err := c.Start(acc)
// error is expected since we are passing in dummy transport
require.Error(t, err)
telemetry := &telemetry.Telemetry{
MsgTimestamp: 1543236572000,
EncodingPath: "sys/dme",
NodeId: &telemetry.Telemetry_NodeIdStr{NodeIdStr: "hostname"},
Subscription: &telemetry.Telemetry_SubscriptionIdStr{SubscriptionIdStr: "subscription"},
DataGpbkv: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "keys",
Fields: []*telemetry.TelemetryField{
{
Name: "foo",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "bar"},
},
},
},
{
Name: "content",
Fields: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "fooEntity",
Fields: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "attributes",
Fields: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "rn",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "some-rn"},
},
{
Name: "value",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "foo"},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
}
data, _ := proto.Marshal(telemetry)
c.handleTelemetry(data)
require.Empty(t, acc.Errors)
tags1 := map[string]string{"path": "sys/dme", "foo": "bar", "fooEntity": "some-rn", "source": "hostname", "subscription": "subscription"}
fields1 := map[string]interface{}{"value": "foo"}
acc.AssertContainsTaggedFields(t, "dme", fields1, tags1)
}
func TestTCPDialoutOverflow(t *testing.T) {
c := &CiscoTelemetryMDT{Log: testutil.Logger{}, Transport: "tcp", ServiceAddress: "127.0.0.1:0"}
acc := &testutil.Accumulator{}
err := c.Start(acc)
require.NoError(t, err)
hdr := struct {
MsgType uint16
MsgEncap uint16
MsgHdrVersion uint16
MsgFlags uint16
MsgLen uint32
}{MsgLen: uint32(1000000000)}
addr := c.Address()
conn, err := net.Dial(addr.Network(), addr.String())
require.NoError(t, err)
binary.Write(conn, binary.BigEndian, hdr)
conn.Read([]byte{0})
conn.Close()
c.Stop()
require.Contains(t, acc.Errors, errors.New("dialout packet too long: 1000000000"))
}
func mockTelemetryMessage() *telemetry.Telemetry {
return &telemetry.Telemetry{
MsgTimestamp: 1543236572000,
EncodingPath: "type:model/some/path",
NodeId: &telemetry.Telemetry_NodeIdStr{NodeIdStr: "hostname"},
Subscription: &telemetry.Telemetry_SubscriptionIdStr{SubscriptionIdStr: "subscription"},
DataGpbkv: []*telemetry.TelemetryField{
{
Fields: []*telemetry.TelemetryField{
{
Name: "keys",
Fields: []*telemetry.TelemetryField{
{
Name: "name",
ValueByType: &telemetry.TelemetryField_StringValue{StringValue: "str"},
},
},
},
{
Name: "content",
Fields: []*telemetry.TelemetryField{
{
Name: "value",
ValueByType: &telemetry.TelemetryField_Sint64Value{Sint64Value: -1},
},
},
},
},
},
},
}
}
func TestTCPDialoutMultiple(t *testing.T) {
c := &CiscoTelemetryMDT{Log: testutil.Logger{}, Transport: "tcp", ServiceAddress: "127.0.0.1:0", Aliases: map[string]string{
"some": "type:model/some/path", "parallel": "type:model/parallel/path", "other": "type:model/other/path"}}
acc := &testutil.Accumulator{}
err := c.Start(acc)
require.NoError(t, err)
telemetry := mockTelemetryMessage()
hdr := struct {
MsgType uint16
MsgEncap uint16
MsgHdrVersion uint16
MsgFlags uint16
MsgLen uint32
}{}
addr := c.Address()
conn, err := net.Dial(addr.Network(), addr.String())
require.NoError(t, err)
data, _ := proto.Marshal(telemetry)
hdr.MsgLen = uint32(len(data))
binary.Write(conn, binary.BigEndian, hdr)
conn.Write(data)
conn2, err := net.Dial(addr.Network(), addr.String())
require.NoError(t, err)
telemetry.EncodingPath = "type:model/parallel/path"
data, _ = proto.Marshal(telemetry)
hdr.MsgLen = uint32(len(data))
binary.Write(conn2, binary.BigEndian, hdr)
conn2.Write(data)
conn2.Write([]byte{0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0})
conn2.Read([]byte{0})
conn2.Close()
telemetry.EncodingPath = "type:model/other/path"
data, _ = proto.Marshal(telemetry)
hdr.MsgLen = uint32(len(data))
binary.Write(conn, binary.BigEndian, hdr)
conn.Write(data)
conn.Write([]byte{0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0})
conn.Read([]byte{0})
c.Stop()
conn.Close()
// We use the invalid dialout flags to let the server close the connection
require.Equal(t, acc.Errors, []error{errors.New("invalid dialout flags: 257"), errors.New("invalid dialout flags: 257")})
tags := map[string]string{"path": "type:model/some/path", "name": "str", "source": "hostname", "subscription": "subscription"}
fields := map[string]interface{}{"value": int64(-1)}
acc.AssertContainsTaggedFields(t, "some", fields, tags)
tags = map[string]string{"path": "type:model/parallel/path", "name": "str", "source": "hostname", "subscription": "subscription"}
fields = map[string]interface{}{"value": int64(-1)}
acc.AssertContainsTaggedFields(t, "parallel", fields, tags)
tags = map[string]string{"path": "type:model/other/path", "name": "str", "source": "hostname", "subscription": "subscription"}
fields = map[string]interface{}{"value": int64(-1)}
acc.AssertContainsTaggedFields(t, "other", fields, tags)
}
func TestGRPCDialoutError(t *testing.T) {
c := &CiscoTelemetryMDT{Log: testutil.Logger{}, Transport: "grpc", ServiceAddress: "127.0.0.1:0"}
acc := &testutil.Accumulator{}
err := c.Start(acc)
require.NoError(t, err)
addr := c.Address()
conn, _ := grpc.Dial(addr.String(), grpc.WithInsecure())
client := dialout.NewGRPCMdtDialoutClient(conn)
stream, _ := client.MdtDialout(context.Background())
args := &dialout.MdtDialoutArgs{Errors: "foobar"}
stream.Send(args)
// Wait for the server to close
stream.Recv()
c.Stop()
require.Equal(t, acc.Errors, []error{errors.New("GRPC dialout error: foobar")})
}
func TestGRPCDialoutMultiple(t *testing.T) {
c := &CiscoTelemetryMDT{Log: testutil.Logger{}, Transport: "grpc", ServiceAddress: "127.0.0.1:0", Aliases: map[string]string{
"some": "type:model/some/path", "parallel": "type:model/parallel/path", "other": "type:model/other/path"}}
acc := &testutil.Accumulator{}
err := c.Start(acc)
require.NoError(t, err)
telemetry := mockTelemetryMessage()
addr := c.Address()
conn, _ := grpc.Dial(addr.String(), grpc.WithInsecure(), grpc.WithBlock())
client := dialout.NewGRPCMdtDialoutClient(conn)
stream, _ := client.MdtDialout(context.TODO())
data, _ := proto.Marshal(telemetry)
args := &dialout.MdtDialoutArgs{Data: data, ReqId: 456}
stream.Send(args)
conn2, _ := grpc.Dial(addr.String(), grpc.WithInsecure(), grpc.WithBlock())
client2 := dialout.NewGRPCMdtDialoutClient(conn2)
stream2, _ := client2.MdtDialout(context.TODO())
telemetry.EncodingPath = "type:model/parallel/path"
data, _ = proto.Marshal(telemetry)
args = &dialout.MdtDialoutArgs{Data: data}
stream2.Send(args)
stream2.Send(&dialout.MdtDialoutArgs{Errors: "testclose"})
stream2.Recv()
conn2.Close()
telemetry.EncodingPath = "type:model/other/path"
data, _ = proto.Marshal(telemetry)
args = &dialout.MdtDialoutArgs{Data: data}
stream.Send(args)
stream.Send(&dialout.MdtDialoutArgs{Errors: "testclose"})
stream.Recv()
c.Stop()
conn.Close()
require.Equal(t, acc.Errors, []error{errors.New("GRPC dialout error: testclose"), errors.New("GRPC dialout error: testclose")})
tags := map[string]string{"path": "type:model/some/path", "name": "str", "source": "hostname", "subscription": "subscription"}
fields := map[string]interface{}{"value": int64(-1)}
acc.AssertContainsTaggedFields(t, "some", fields, tags)
tags = map[string]string{"path": "type:model/parallel/path", "name": "str", "source": "hostname", "subscription": "subscription"}
fields = map[string]interface{}{"value": int64(-1)}
acc.AssertContainsTaggedFields(t, "parallel", fields, tags)
tags = map[string]string{"path": "type:model/other/path", "name": "str", "source": "hostname", "subscription": "subscription"}
fields = map[string]interface{}{"value": int64(-1)}
acc.AssertContainsTaggedFields(t, "other", fields, tags)
}

View File

@@ -0,0 +1,124 @@
# ClickHouse Input Plugin
This plugin gathers the statistic data from [ClickHouse](https://github.com/ClickHouse/ClickHouse) server.
### Configuration
```toml
# Read metrics from one or many ClickHouse servers
[[inputs.clickhouse]]
## Username for authorization on ClickHouse server
## example: user = "default"
username = "default"
## Password for authorization on ClickHouse server
## example: password = "super_secret"
## HTTP(s) timeout while getting metrics values
## The timeout includes connection time, any redirects, and reading the response body.
## example: timeout = 1s
# timeout = 5s
## List of servers for metrics scraping
## metrics scrape via HTTP(s) clickhouse interface
## https://clickhouse.tech/docs/en/interfaces/http/
## example: servers = ["http://127.0.0.1:8123","https://custom-server.mdb.yandexcloud.net"]
servers = ["http://127.0.0.1:8123"]
## If "auto_discovery"" is "true" plugin tries to connect to all servers available in the cluster
## with using same "user:password" described in "user" and "password" parameters
## and get this server hostname list from "system.clusters" table
## see
## - https://clickhouse.tech/docs/en/operations/system_tables/#system-clusters
## - https://clickhouse.tech/docs/en/operations/server_settings/settings/#server_settings_remote_servers
## - https://clickhouse.tech/docs/en/operations/table_engines/distributed/
## - https://clickhouse.tech/docs/en/operations/table_engines/replication/#creating-replicated-tables
## example: auto_discovery = false
# auto_discovery = true
## Filter cluster names in "system.clusters" when "auto_discovery" is "true"
## when this filter present then "WHERE cluster IN (...)" filter will apply
## please use only full cluster names here, regexp and glob filters is not allowed
## for "/etc/clickhouse-server/config.d/remote.xml"
## <yandex>
## <remote_servers>
## <my-own-cluster>
## <shard>
## <replica><host>clickhouse-ru-1.local</host><port>9000</port></replica>
## <replica><host>clickhouse-ru-2.local</host><port>9000</port></replica>
## </shard>
## <shard>
## <replica><host>clickhouse-eu-1.local</host><port>9000</port></replica>
## <replica><host>clickhouse-eu-2.local</host><port>9000</port></replica>
## </shard>
## </my-onw-cluster>
## </remote_servers>
##
## </yandex>
##
## example: cluster_include = ["my-own-cluster"]
# cluster_include = []
## Filter cluster names in "system.clusters" when "auto_discovery" is "true"
## when this filter present then "WHERE cluster NOT IN (...)" filter will apply
## example: cluster_exclude = ["my-internal-not-discovered-cluster"]
# cluster_exclude = []
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
```
### Metrics
- clickhouse_events
- tags:
- source (ClickHouse server hostname)
- cluster (Name of the cluster [optional])
- shard_num (Shard number in the cluster [optional])
- fields:
- all rows from [system.events][]
+ clickhouse_metrics
- tags:
- source (ClickHouse server hostname)
- cluster (Name of the cluster [optional])
- shard_num (Shard number in the cluster [optional])
- fields:
- all rows from [system.metrics][]
- clickhouse_asynchronous_metrics
- tags:
- source (ClickHouse server hostname)
- cluster (Name of the cluster [optional])
- shard_num (Shard number in the cluster [optional])
- fields:
- all rows from [system.asynchronous_metrics][]
+ clickhouse_tables
- tags:
- source (ClickHouse server hostname)
- table
- database
- cluster (Name of the cluster [optional])
- shard_num (Shard number in the cluster [optional])
- fields:
- bytes
- parts
- rows
### Example Output
```
clickhouse_events,cluster=test_cluster_two_shards_localhost,host=kshvakov,source=localhost,shard_num=1 read_compressed_bytes=212i,arena_alloc_chunks=35i,function_execute=85i,merge_tree_data_writer_rows=3i,rw_lock_acquired_read_locks=421i,file_open=46i,io_buffer_alloc_bytes=86451985i,inserted_bytes=196i,regexp_created=3i,real_time_microseconds=116832i,query=23i,network_receive_elapsed_microseconds=268i,merge_tree_data_writer_compressed_bytes=1080i,arena_alloc_bytes=212992i,disk_write_elapsed_microseconds=556i,inserted_rows=3i,compressed_read_buffer_bytes=81i,read_buffer_from_file_descriptor_read_bytes=148i,write_buffer_from_file_descriptor_write=47i,merge_tree_data_writer_blocks=3i,soft_page_faults=896i,hard_page_faults=7i,select_query=21i,merge_tree_data_writer_uncompressed_bytes=196i,merge_tree_data_writer_blocks_already_sorted=3i,user_time_microseconds=40196i,compressed_read_buffer_blocks=5i,write_buffer_from_file_descriptor_write_bytes=3246i,io_buffer_allocs=296i,created_write_buffer_ordinary=12i,disk_read_elapsed_microseconds=59347044i,network_send_elapsed_microseconds=1538i,context_lock=1040i,insert_query=1i,system_time_microseconds=14582i,read_buffer_from_file_descriptor_read=3i 1569421000000000000
clickhouse_asynchronous_metrics,cluster=test_cluster_two_shards_localhost,host=kshvakov,source=localhost,shard_num=1 jemalloc.metadata_thp=0i,replicas_max_relative_delay=0i,jemalloc.mapped=1803177984i,jemalloc.allocated=1724839256i,jemalloc.background_thread.run_interval=0i,jemalloc.background_thread.num_threads=0i,uncompressed_cache_cells=0i,replicas_max_absolute_delay=0i,mark_cache_bytes=0i,compiled_expression_cache_count=0i,replicas_sum_queue_size=0i,number_of_tables=35i,replicas_max_merges_in_queue=0i,replicas_max_inserts_in_queue=0i,replicas_sum_merges_in_queue=0i,replicas_max_queue_size=0i,mark_cache_files=0i,jemalloc.background_thread.num_runs=0i,jemalloc.active=1726210048i,uptime=158i,jemalloc.retained=380481536i,replicas_sum_inserts_in_queue=0i,uncompressed_cache_bytes=0i,number_of_databases=2i,jemalloc.metadata=9207704i,max_part_count_for_partition=1i,jemalloc.resident=1742442496i 1569421000000000000
clickhouse_metrics,cluster=test_cluster_two_shards_localhost,host=kshvakov,source=localhost,shard_num=1 replicated_send=0i,write=0i,ephemeral_node=0i,zoo_keeper_request=0i,distributed_files_to_insert=0i,replicated_fetch=0i,background_schedule_pool_task=0i,interserver_connection=0i,leader_replica=0i,delayed_inserts=0i,global_thread_active=41i,merge=0i,readonly_replica=0i,memory_tracking_in_background_schedule_pool=0i,memory_tracking_for_merges=0i,zoo_keeper_session=0i,context_lock_wait=0i,storage_buffer_bytes=0i,background_pool_task=0i,send_external_tables=0i,zoo_keeper_watch=0i,part_mutation=0i,disk_space_reserved_for_merge=0i,distributed_send=0i,version_integer=19014003i,local_thread=0i,replicated_checks=0i,memory_tracking=0i,memory_tracking_in_background_processing_pool=0i,leader_election=0i,revision=54425i,open_file_for_read=0i,open_file_for_write=0i,storage_buffer_rows=0i,rw_lock_waiting_readers=0i,rw_lock_waiting_writers=0i,rw_lock_active_writers=0i,local_thread_active=0i,query_preempted=0i,tcp_connection=1i,http_connection=1i,read=2i,query_thread=0i,dict_cache_requests=0i,rw_lock_active_readers=1i,global_thread=43i,query=1i 1569421000000000000
clickhouse_tables,cluster=test_cluster_two_shards_localhost,database=system,host=kshvakov,source=localhost,shard_num=1,table=trace_log bytes=754i,parts=1i,rows=1i 1569421000000000000
clickhouse_tables,cluster=test_cluster_two_shards_localhost,database=default,host=kshvakov,source=localhost,shard_num=1,table=example bytes=326i,parts=2i,rows=2i 1569421000000000000
```
[system.events]: https://clickhouse.tech/docs/en/operations/system_tables/#system_tables-events
[system.metrics]: https://clickhouse.tech/docs/en/operations/system_tables/#system_tables-metrics
[system.asynchronous_metrics]: https://clickhouse.tech/docs/en/operations/system_tables/#system_tables-asynchronous_metrics

View File

@@ -0,0 +1,394 @@
package clickhouse
import (
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs"
)
var defaultTimeout = 5 * time.Second
var sampleConfig = `
## Username for authorization on ClickHouse server
## example: user = "default""
username = "default"
## Password for authorization on ClickHouse server
## example: password = "super_secret"
## HTTP(s) timeout while getting metrics values
## The timeout includes connection time, any redirects, and reading the response body.
## example: timeout = 1s
# timeout = 5s
## List of servers for metrics scraping
## metrics scrape via HTTP(s) clickhouse interface
## https://clickhouse.tech/docs/en/interfaces/http/
## example: servers = ["http://127.0.0.1:8123","https://custom-server.mdb.yandexcloud.net"]
servers = ["http://127.0.0.1:8123"]
## If "auto_discovery"" is "true" plugin tries to connect to all servers available in the cluster
## with using same "user:password" described in "user" and "password" parameters
## and get this server hostname list from "system.clusters" table
## see
## - https://clickhouse.tech/docs/en/operations/system_tables/#system-clusters
## - https://clickhouse.tech/docs/en/operations/server_settings/settings/#server_settings_remote_servers
## - https://clickhouse.tech/docs/en/operations/table_engines/distributed/
## - https://clickhouse.tech/docs/en/operations/table_engines/replication/#creating-replicated-tables
## example: auto_discovery = false
# auto_discovery = true
## Filter cluster names in "system.clusters" when "auto_discovery" is "true"
## when this filter present then "WHERE cluster IN (...)" filter will apply
## please use only full cluster names here, regexp and glob filters is not allowed
## for "/etc/clickhouse-server/config.d/remote.xml"
## <yandex>
## <remote_servers>
## <my-own-cluster>
## <shard>
## <replica><host>clickhouse-ru-1.local</host><port>9000</port></replica>
## <replica><host>clickhouse-ru-2.local</host><port>9000</port></replica>
## </shard>
## <shard>
## <replica><host>clickhouse-eu-1.local</host><port>9000</port></replica>
## <replica><host>clickhouse-eu-2.local</host><port>9000</port></replica>
## </shard>
## </my-onw-cluster>
## </remote_servers>
##
## </yandex>
##
## example: cluster_include = ["my-own-cluster"]
# cluster_include = []
## Filter cluster names in "system.clusters" when "auto_discovery" is "true"
## when this filter present then "WHERE cluster NOT IN (...)" filter will apply
## example: cluster_exclude = ["my-internal-not-discovered-cluster"]
# cluster_exclude = []
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
`
type connect struct {
Cluster string `json:"cluster"`
ShardNum int `json:"shard_num"`
Hostname string `json:"host_name"`
url *url.URL
}
func init() {
inputs.Add("clickhouse", func() telegraf.Input {
return &ClickHouse{
AutoDiscovery: true,
ClientConfig: tls.ClientConfig{
InsecureSkipVerify: false,
},
Timeout: internal.Duration{Duration: defaultTimeout},
}
})
}
// ClickHouse Telegraf Input Plugin
type ClickHouse struct {
Username string `toml:"username"`
Password string `toml:"password"`
Servers []string `toml:"servers"`
AutoDiscovery bool `toml:"auto_discovery"`
ClusterInclude []string `toml:"cluster_include"`
ClusterExclude []string `toml:"cluster_exclude"`
Timeout internal.Duration `toml:"timeout"`
client http.Client
tls.ClientConfig
}
// SampleConfig returns the sample config
func (*ClickHouse) SampleConfig() string {
return sampleConfig
}
// Description return plugin description
func (*ClickHouse) Description() string {
return "Read metrics from one or many ClickHouse servers"
}
// Start ClickHouse input service
func (ch *ClickHouse) Start(telegraf.Accumulator) error {
timeout := defaultTimeout
if ch.Timeout.Duration != 0 {
timeout = ch.Timeout.Duration
}
tlsCfg, err := ch.ClientConfig.TLSConfig()
if err != nil {
return err
}
ch.client = http.Client{
Timeout: timeout,
Transport: &http.Transport{
TLSClientConfig: tlsCfg,
Proxy: http.ProxyFromEnvironment,
},
}
return nil
}
// Gather collect data from ClickHouse server
func (ch *ClickHouse) Gather(acc telegraf.Accumulator) (err error) {
var (
connects []connect
exists = func(host string) bool {
for _, c := range connects {
if c.Hostname == host {
return true
}
}
return false
}
)
for _, server := range ch.Servers {
u, err := url.Parse(server)
if err != nil {
return err
}
switch {
case ch.AutoDiscovery:
var conns []connect
if err := ch.execQuery(u, "SELECT cluster, shard_num, host_name FROM system.clusters "+ch.clusterIncludeExcludeFilter(), &conns); err != nil {
acc.AddError(err)
continue
}
for _, c := range conns {
if !exists(c.Hostname) {
c.url = &url.URL{
Scheme: u.Scheme,
Host: net.JoinHostPort(c.Hostname, u.Port()),
}
connects = append(connects, c)
}
}
default:
connects = append(connects, connect{
url: u,
})
}
}
for _, conn := range connects {
if err := ch.tables(acc, &conn); err != nil {
acc.AddError(err)
}
for metric := range commonMetrics {
if err := ch.commonMetrics(acc, &conn, metric); err != nil {
acc.AddError(err)
}
}
}
return nil
}
func (ch *ClickHouse) Stop() {
ch.client.CloseIdleConnections()
}
func (ch *ClickHouse) clusterIncludeExcludeFilter() string {
if len(ch.ClusterInclude) == 0 && len(ch.ClusterExclude) == 0 {
return ""
}
var (
escape = func(in string) string {
return "'" + strings.NewReplacer(`\`, `\\`, `'`, `\'`).Replace(in) + "'"
}
makeFilter = func(expr string, args []string) string {
in := make([]string, 0, len(args))
for _, v := range args {
in = append(in, escape(v))
}
return fmt.Sprintf("cluster %s (%s)", expr, strings.Join(in, ", "))
}
includeFilter, excludeFilter string
)
if len(ch.ClusterInclude) != 0 {
includeFilter = makeFilter("IN", ch.ClusterInclude)
}
if len(ch.ClusterExclude) != 0 {
excludeFilter = makeFilter("NOT IN", ch.ClusterExclude)
}
if includeFilter != "" && excludeFilter != "" {
return "WHERE " + includeFilter + " OR " + excludeFilter
}
if includeFilter == "" && excludeFilter != "" {
return "WHERE " + excludeFilter
}
if includeFilter != "" && excludeFilter == "" {
return "WHERE " + includeFilter
}
return ""
}
func (ch *ClickHouse) commonMetrics(acc telegraf.Accumulator, conn *connect, metric string) error {
var result []struct {
Metric string `json:"metric"`
Value chUInt64 `json:"value"`
}
if err := ch.execQuery(conn.url, commonMetrics[metric], &result); err != nil {
return err
}
tags := map[string]string{
"source": conn.Hostname,
}
if len(conn.Cluster) != 0 {
tags["cluster"] = conn.Cluster
}
if conn.ShardNum != 0 {
tags["shard_num"] = strconv.Itoa(conn.ShardNum)
}
fields := make(map[string]interface{})
for _, r := range result {
fields[internal.SnakeCase(r.Metric)] = uint64(r.Value)
}
acc.AddFields("clickhouse_"+metric, fields, tags)
return nil
}
func (ch *ClickHouse) tables(acc telegraf.Accumulator, conn *connect) error {
var parts []struct {
Database string `json:"database"`
Table string `json:"table"`
Bytes chUInt64 `json:"bytes"`
Parts chUInt64 `json:"parts"`
Rows chUInt64 `json:"rows"`
}
if err := ch.execQuery(conn.url, systemParts, &parts); err != nil {
return err
}
tags := map[string]string{
"source": conn.Hostname,
}
if len(conn.Cluster) != 0 {
tags["cluster"] = conn.Cluster
}
if conn.ShardNum != 0 {
tags["shard_num"] = strconv.Itoa(conn.ShardNum)
}
for _, part := range parts {
tags["table"] = part.Table
tags["database"] = part.Database
acc.AddFields("clickhouse_tables",
map[string]interface{}{
"bytes": uint64(part.Bytes),
"parts": uint64(part.Parts),
"rows": uint64(part.Rows),
},
tags,
)
}
return nil
}
type clickhouseError struct {
StatusCode int
body []byte
}
func (e *clickhouseError) Error() string {
return fmt.Sprintf("received error code %d: %s", e.StatusCode, e.body)
}
func (ch *ClickHouse) execQuery(url *url.URL, query string, i interface{}) error {
q := url.Query()
q.Set("query", query+" FORMAT JSON")
url.RawQuery = q.Encode()
req, _ := http.NewRequest("GET", url.String(), nil)
if ch.Username != "" {
req.Header.Add("X-ClickHouse-User", ch.Username)
}
if ch.Password != "" {
req.Header.Add("X-ClickHouse-Key", ch.Password)
}
resp, err := ch.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
body, _ := ioutil.ReadAll(io.LimitReader(resp.Body, 200))
return &clickhouseError{
StatusCode: resp.StatusCode,
body: body,
}
}
var response struct {
Data json.RawMessage
}
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
return err
}
return json.Unmarshal(response.Data, i)
}
// see https://clickhouse.yandex/docs/en/operations/settings/settings/#session_settings-output_format_json_quote_64bit_integers
type chUInt64 uint64
func (i *chUInt64) UnmarshalJSON(b []byte) error {
b = bytes.TrimPrefix(b, []byte(`"`))
b = bytes.TrimSuffix(b, []byte(`"`))
v, err := strconv.ParseUint(string(b), 10, 64)
if err != nil {
return err
}
*i = chUInt64(v)
return nil
}
const (
systemEventsSQL = "SELECT event AS metric, CAST(value AS UInt64) AS value FROM system.events"
systemMetricsSQL = "SELECT metric, CAST(value AS UInt64) AS value FROM system.metrics"
systemAsyncMetricsSQL = "SELECT metric, CAST(value AS UInt64) AS value FROM system.asynchronous_metrics"
systemParts = `
SELECT
database,
table,
SUM(bytes) AS bytes,
COUNT(*) AS parts,
SUM(rows) AS rows
FROM system.parts
WHERE active = 1
GROUP BY
database, table
ORDER BY
database, table
`
)
var commonMetrics = map[string]string{
"events": systemEventsSQL,
"metrics": systemMetricsSQL,
"asynchronous_metrics": systemAsyncMetricsSQL,
}
var _ telegraf.ServiceInput = &ClickHouse{}

View File

@@ -0,0 +1,161 @@
package clickhouse
import (
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestClusterIncludeExcludeFilter(t *testing.T) {
ch := ClickHouse{}
if assert.Equal(t, "", ch.clusterIncludeExcludeFilter()) {
ch.ClusterExclude = []string{"test_cluster"}
assert.Equal(t, "WHERE cluster NOT IN ('test_cluster')", ch.clusterIncludeExcludeFilter())
ch.ClusterExclude = []string{"test_cluster"}
ch.ClusterInclude = []string{"cluster"}
assert.Equal(t, "WHERE cluster IN ('cluster') OR cluster NOT IN ('test_cluster')", ch.clusterIncludeExcludeFilter())
ch.ClusterExclude = []string{}
ch.ClusterInclude = []string{"cluster1", "cluster2"}
assert.Equal(t, "WHERE cluster IN ('cluster1', 'cluster2')", ch.clusterIncludeExcludeFilter())
ch.ClusterExclude = []string{"cluster1", "cluster2"}
ch.ClusterInclude = []string{}
assert.Equal(t, "WHERE cluster NOT IN ('cluster1', 'cluster2')", ch.clusterIncludeExcludeFilter())
}
}
func TestChInt64(t *testing.T) {
assets := map[string]uint64{
`"1"`: 1,
"1": 1,
"42": 42,
`"42"`: 42,
"18446743937525109187": 18446743937525109187,
}
for src, expected := range assets {
var v chUInt64
if err := v.UnmarshalJSON([]byte(src)); assert.NoError(t, err) {
assert.Equal(t, expected, uint64(v))
}
}
}
func TestGather(t *testing.T) {
var (
ts = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
type result struct {
Data interface{} `json:"data"`
}
enc := json.NewEncoder(w)
switch query := r.URL.Query().Get("query"); {
case strings.Contains(query, "system.parts"):
enc.Encode(result{
Data: []struct {
Database string `json:"database"`
Table string `json:"table"`
Bytes chUInt64 `json:"bytes"`
Parts chUInt64 `json:"parts"`
Rows chUInt64 `json:"rows"`
}{
{
Database: "test_database",
Table: "test_table",
Bytes: 1,
Parts: 10,
Rows: 100,
},
},
})
case strings.Contains(query, "system.events"):
enc.Encode(result{
Data: []struct {
Metric string `json:"metric"`
Value chUInt64 `json:"value"`
}{
{
Metric: "TestSystemEvent",
Value: 1000,
},
{
Metric: "TestSystemEvent2",
Value: 2000,
},
},
})
case strings.Contains(query, "system.metrics"):
enc.Encode(result{
Data: []struct {
Metric string `json:"metric"`
Value chUInt64 `json:"value"`
}{
{
Metric: "TestSystemMetric",
Value: 1000,
},
{
Metric: "TestSystemMetric2",
Value: 2000,
},
},
})
case strings.Contains(query, "system.asynchronous_metrics"):
enc.Encode(result{
Data: []struct {
Metric string `json:"metric"`
Value chUInt64 `json:"value"`
}{
{
Metric: "TestSystemAsynchronousMetric",
Value: 1000,
},
{
Metric: "TestSystemAsynchronousMetric2",
Value: 2000,
},
},
})
}
}))
ch = &ClickHouse{
Servers: []string{
ts.URL,
},
}
acc = &testutil.Accumulator{}
)
defer ts.Close()
ch.Gather(acc)
acc.AssertContainsFields(t, "clickhouse_tables",
map[string]interface{}{
"bytes": uint64(1),
"parts": uint64(10),
"rows": uint64(100),
},
)
acc.AssertContainsFields(t, "clickhouse_events",
map[string]interface{}{
"test_system_event": uint64(1000),
"test_system_event2": uint64(2000),
},
)
acc.AssertContainsFields(t, "clickhouse_metrics",
map[string]interface{}{
"test_system_metric": uint64(1000),
"test_system_metric2": uint64(2000),
},
)
acc.AssertContainsFields(t, "clickhouse_asynchronous_metrics",
map[string]interface{}{
"test_system_asynchronous_metric": uint64(1000),
"test_system_asynchronous_metric2": uint64(2000),
},
)
}

View File

@@ -0,0 +1,13 @@
-----BEGIN DH PARAMETERS-----
MIICCAKCAgEAoo1x7wI5K57P1/AkHUmVWzKNfy46b/ni/QtClomTB78Ks1FP8dzs
CQBW/pfL8yidxTialNhMRCZO1J+uPjTvd8dG8SFZzVylkF41LBNrUD+MLyh/b6Nr
8uWf3tqYCtsiqsQsnq/oU7C29wn6UjhPPVbRRDPGyJUFOgp0ebPR0L2gOc5HhXSF
Tt0fuWnvgZJBKGvyodby3p2CSheu8K6ZteVc8ZgHuanhCQA30nVN+yNQzyozlB2H
B9jxTDPJy8+/4Mui3iiNyXg6FaiI9lWdH7xgKoZlHi8BWlLz5Se9JVNYg0dPrMTz
K0itQyyTKUlK73x+1uPm6q1AJwz08EZiCXNbk58/Sf+pdwDmAO2QSRrERC73vnvc
B1+4+Kf7RS7oYpAHknKm/MFnkCJLVIq1b6kikYcIgVCYe+Z1UytSmG1QfwdgL8QQ
TVYVHBg4w07+s3/IJ1ekvNhdxpkmmevYt7GjohWu8vKkip4se+reNdo+sqLsgFKf
1IuDMD36zn9FVukvs7e3BwZCTkdosGHvHGjA7zm2DwPPO16hCvJ4mE6ULLpp2NEw
EBYWm3Tv6M/xtrF5Afyh0gAh7eL767/qsarbx6jlqs+dnh3LptqsE3WerWK54+0B
3Hr5CVfgYbeXuW2HeFb+fS6CNUWmiAsq1XRiz5p16hpeMGYN/qyF1IsCAQI=
-----END DH PARAMETERS-----

View File

@@ -0,0 +1,16 @@
version: '3'
services:
clickhouse:
image: yandex/clickhouse-server:latest
volumes:
- ./dhparam.pem:/etc/clickhouse-server/dhparam.pem
- ./tls_settings.xml:/etc/clickhouse-server/config.d/00-tls_settings.xml
- ../../../../testutil/pki/serverkey.pem:/etc/clickhouse-server/server.key
- ../../../../testutil/pki/servercert.pem:/etc/clickhouse-server/server.crt
restart: always
ports:
- 8123:8123
- 8443:8443
- 9000:9000
- 9009:9009

View File

@@ -0,0 +1,12 @@
### ClickHouse input plugin
[[inputs.clickhouse]]
timeout = 2
user = "default"
servers = ["http://127.0.0.1:8123"]
auto_discovery = true
cluster_include = []
cluster_exclude = ["test_shard_localhost"]
[[outputs.file]]
files = ["stdout"]

View File

@@ -0,0 +1,16 @@
### ClickHouse input plugin
[[inputs.clickhouse]]
timeout = 2
user = "default"
servers = ["https://127.0.0.1:8443"]
auto_discovery = true
cluster_include = []
cluster_exclude = ["test_shard_localhost"]
insecure_skip_verify = false
tls_cert = "./testutil/pki/clientcert.pem"
tls_key = "./testutil/pki/clientkey.pem"
tls_ca = "./testutil/pki/cacert.pem"
[[outputs.file]]
files = ["stdout"]

View File

@@ -0,0 +1,4 @@
<yandex>
<https_port>8443</https_port>
<tcp_port_secure>9440</tcp_port_secure>
</yandex>

View File

@@ -0,0 +1,98 @@
# Google Cloud PubSub Input Plugin
The GCP PubSub plugin ingests metrics from [Google Cloud PubSub][pubsub]
and creates metrics using one of the supported [input data formats][].
### Configuration
```toml
[[inputs.pubsub]]
## Required. Name of Google Cloud Platform (GCP) Project that owns
## the given PubSub subscription.
project = "my-project"
## Required. Name of PubSub subscription to ingest metrics from.
subscription = "my-subscription"
## Required. Data format to consume.
## Each data format has its own unique set of configuration options.
## Read more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
## Optional. Filepath for GCP credentials JSON file to authorize calls to
## PubSub APIs. If not set explicitly, Telegraf will attempt to use
## Application Default Credentials, which is preferred.
# credentials_file = "path/to/my/creds.json"
## Optional. Number of seconds to wait before attempting to restart the
## PubSub subscription receiver after an unexpected error.
## If the streaming pull for a PubSub Subscription fails (receiver),
## the agent attempts to restart receiving messages after this many seconds.
# retry_delay_seconds = 5
## Optional. Maximum byte length of a message to consume.
## Larger messages are dropped with an error. If less than 0 or unspecified,
## treated as no limit.
# max_message_len = 1000000
## Optional. Maximum messages to read from PubSub that have not been written
## to an output. Defaults to %d.
## For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message contains 10 metrics and the output
## metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## The following are optional Subscription ReceiveSettings in PubSub.
## Read more about these values:
## https://godoc.org/cloud.google.com/go/pubsub#ReceiveSettings
## Optional. Maximum number of seconds for which a PubSub subscription
## should auto-extend the PubSub ACK deadline for each message. If less than
## 0, auto-extension is disabled.
# max_extension = 0
## Optional. Maximum number of unprocessed messages in PubSub
## (unacknowledged but not yet expired in PubSub).
## A value of 0 is treated as the default PubSub value.
## Negative values will be treated as unlimited.
# max_outstanding_messages = 0
## Optional. Maximum size in bytes of unprocessed messages in PubSub
## (unacknowledged but not yet expired in PubSub).
## A value of 0 is treated as the default PubSub value.
## Negative values will be treated as unlimited.
# max_outstanding_bytes = 0
## Optional. Max number of goroutines a PubSub Subscription receiver can spawn
## to pull messages from PubSub concurrently. This limit applies to each
## subscription separately and is treated as the PubSub default if less than
## 1. Note this setting does not limit the number of messages that can be
## processed concurrently (use "max_outstanding_messages" instead).
# max_receiver_go_routines = 0
## Optional. If true, Telegraf will attempt to base64 decode the
## PubSub message data before parsing. Many GCP services that
## output JSON to Google PubSub base64-encode the JSON payload.
# base64_data = false
```
### Multiple Subscriptions and Topics
This plugin assumes you have already created a PULL subscription for a given
PubSub topic. To learn how to do so, see [how to create a subscription][pubsub create sub].
Each plugin agent can listen to one subscription at a time, so you will
need to run multiple instances of the plugin to pull messages from multiple
subscriptions/topics.
[pubsub]: https://cloud.google.com/pubsub
[pubsub create sub]: https://cloud.google.com/pubsub/docs/admin#create_a_pull_subscription
[input data formats]: /docs/DATA_FORMATS_INPUT.md

View File

@@ -0,0 +1,368 @@
package cloud_pubsub
import (
"context"
"fmt"
"sync"
"encoding/base64"
"time"
"cloud.google.com/go/pubsub"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"golang.org/x/oauth2/google"
"google.golang.org/api/option"
)
type empty struct{}
type semaphore chan empty
const defaultMaxUndeliveredMessages = 1000
const defaultRetryDelaySeconds = 5
type PubSub struct {
sync.Mutex
CredentialsFile string `toml:"credentials_file"`
Project string `toml:"project"`
Subscription string `toml:"subscription"`
// Subscription ReceiveSettings
MaxExtension internal.Duration `toml:"max_extension"`
MaxOutstandingMessages int `toml:"max_outstanding_messages"`
MaxOutstandingBytes int `toml:"max_outstanding_bytes"`
MaxReceiverGoRoutines int `toml:"max_receiver_go_routines"`
// Agent settings
MaxMessageLen int `toml:"max_message_len"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
RetryReceiveDelaySeconds int `toml:"retry_delay_seconds"`
Base64Data bool `toml:"base64_data"`
Log telegraf.Logger
sub subscription
stubSub func() subscription
cancel context.CancelFunc
parser parsers.Parser
wg *sync.WaitGroup
acc telegraf.TrackingAccumulator
undelivered map[telegraf.TrackingID]message
sem semaphore
}
func (ps *PubSub) Description() string {
return "Read metrics from Google PubSub"
}
func (ps *PubSub) SampleConfig() string {
return fmt.Sprintf(sampleConfig, defaultMaxUndeliveredMessages)
}
// Gather does nothing for this service input.
func (ps *PubSub) Gather(acc telegraf.Accumulator) error {
return nil
}
// SetParser implements ParserInput interface.
func (ps *PubSub) SetParser(parser parsers.Parser) {
ps.parser = parser
}
// Start initializes the plugin and processing messages from Google PubSub.
// Two goroutines are started - one pulling for the subscription, one
// receiving delivery notifications from the accumulator.
func (ps *PubSub) Start(ac telegraf.Accumulator) error {
if ps.Subscription == "" {
return fmt.Errorf(`"subscription" is required`)
}
if ps.Project == "" {
return fmt.Errorf(`"project" is required`)
}
ps.sem = make(semaphore, ps.MaxUndeliveredMessages)
ps.acc = ac.WithTracking(ps.MaxUndeliveredMessages)
// Create top-level context with cancel that will be called on Stop().
ctx, cancel := context.WithCancel(context.Background())
ps.cancel = cancel
if ps.stubSub != nil {
ps.sub = ps.stubSub()
} else {
subRef, err := ps.getGCPSubscription(ps.Subscription)
if err != nil {
return fmt.Errorf("unable to create subscription handle: %v", err)
}
ps.sub = subRef
}
ps.wg = &sync.WaitGroup{}
// Start goroutine to handle delivery notifications from accumulator.
ps.wg.Add(1)
go func() {
defer ps.wg.Done()
ps.waitForDelivery(ctx)
}()
// Start goroutine for subscription receiver.
ps.wg.Add(1)
go func() {
defer ps.wg.Done()
ps.receiveWithRetry(ctx)
}()
return nil
}
// Stop ensures the PubSub subscriptions receivers are stopped by
// canceling the context and waits for goroutines to finish.
func (ps *PubSub) Stop() {
ps.cancel()
ps.wg.Wait()
}
// startReceiver is called within a goroutine and manages keeping a
// subscription.Receive() up and running while the plugin has not been stopped.
func (ps *PubSub) receiveWithRetry(parentCtx context.Context) {
err := ps.startReceiver(parentCtx)
for err != nil && parentCtx.Err() == nil {
ps.Log.Errorf("Receiver for subscription %s exited with error: %v", ps.sub.ID(), err)
delay := defaultRetryDelaySeconds
if ps.RetryReceiveDelaySeconds > 0 {
delay = ps.RetryReceiveDelaySeconds
}
ps.Log.Infof("Waiting %d seconds before attempting to restart receiver...", delay)
time.Sleep(time.Duration(delay) * time.Second)
err = ps.startReceiver(parentCtx)
}
}
func (ps *PubSub) startReceiver(parentCtx context.Context) error {
ps.Log.Infof("Starting receiver for subscription %s...", ps.sub.ID())
cctx, ccancel := context.WithCancel(parentCtx)
err := ps.sub.Receive(cctx, func(ctx context.Context, msg message) {
if err := ps.onMessage(ctx, msg); err != nil {
ps.acc.AddError(fmt.Errorf("unable to add message from subscription %s: %v", ps.sub.ID(), err))
}
})
if err != nil {
ps.acc.AddError(fmt.Errorf("receiver for subscription %s exited: %v", ps.sub.ID(), err))
} else {
ps.Log.Info("Subscription pull ended (no error, most likely stopped)")
}
ccancel()
return err
}
// onMessage handles parsing and adding a received message to the accumulator.
func (ps *PubSub) onMessage(ctx context.Context, msg message) error {
if ps.MaxMessageLen > 0 && len(msg.Data()) > ps.MaxMessageLen {
msg.Ack()
return fmt.Errorf("message longer than max_message_len (%d > %d)", len(msg.Data()), ps.MaxMessageLen)
}
var data []byte
if ps.Base64Data {
strData, err := base64.StdEncoding.DecodeString(string(msg.Data()))
if err != nil {
return fmt.Errorf("unable to base64 decode message: %v", err)
}
data = []byte(strData)
} else {
data = msg.Data()
}
metrics, err := ps.parser.Parse(data)
if err != nil {
msg.Ack()
return err
}
if len(metrics) == 0 {
msg.Ack()
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case ps.sem <- empty{}:
break
}
ps.Lock()
defer ps.Unlock()
id := ps.acc.AddTrackingMetricGroup(metrics)
if ps.undelivered == nil {
ps.undelivered = make(map[telegraf.TrackingID]message)
}
ps.undelivered[id] = msg
return nil
}
func (ps *PubSub) waitForDelivery(parentCtx context.Context) {
for {
select {
case <-parentCtx.Done():
return
case info := <-ps.acc.Delivered():
<-ps.sem
msg := ps.removeDelivered(info.ID())
if msg != nil {
msg.Ack()
}
}
}
}
func (ps *PubSub) removeDelivered(id telegraf.TrackingID) message {
ps.Lock()
defer ps.Unlock()
msg, ok := ps.undelivered[id]
if !ok {
return nil
}
delete(ps.undelivered, id)
return msg
}
func (ps *PubSub) getPubSubClient() (*pubsub.Client, error) {
var credsOpt option.ClientOption
if ps.CredentialsFile != "" {
credsOpt = option.WithCredentialsFile(ps.CredentialsFile)
} else {
creds, err := google.FindDefaultCredentials(context.Background(), pubsub.ScopeCloudPlatform)
if err != nil {
return nil, fmt.Errorf(
"unable to find GCP Application Default Credentials: %v."+
"Either set ADC or provide CredentialsFile config", err)
}
credsOpt = option.WithCredentials(creds)
}
client, err := pubsub.NewClient(
context.Background(),
ps.Project,
credsOpt,
option.WithScopes(pubsub.ScopeCloudPlatform),
option.WithUserAgent(internal.ProductToken()),
)
if err != nil {
return nil, fmt.Errorf("unable to generate PubSub client: %v", err)
}
return client, nil
}
func (ps *PubSub) getGCPSubscription(subId string) (subscription, error) {
client, err := ps.getPubSubClient()
if err != nil {
return nil, err
}
s := client.Subscription(subId)
s.ReceiveSettings = pubsub.ReceiveSettings{
NumGoroutines: ps.MaxReceiverGoRoutines,
MaxExtension: ps.MaxExtension.Duration,
MaxOutstandingMessages: ps.MaxOutstandingMessages,
MaxOutstandingBytes: ps.MaxOutstandingBytes,
}
return &gcpSubscription{s}, nil
}
func init() {
inputs.Add("cloud_pubsub", func() telegraf.Input {
ps := &PubSub{
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
return ps
})
}
const sampleConfig = `
## Required. Name of Google Cloud Platform (GCP) Project that owns
## the given PubSub subscription.
project = "my-project"
## Required. Name of PubSub subscription to ingest metrics from.
subscription = "my-subscription"
## Required. Data format to consume.
## Each data format has its own unique set of configuration options.
## Read more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
## Optional. Filepath for GCP credentials JSON file to authorize calls to
## PubSub APIs. If not set explicitly, Telegraf will attempt to use
## Application Default Credentials, which is preferred.
# credentials_file = "path/to/my/creds.json"
## Optional. Number of seconds to wait before attempting to restart the
## PubSub subscription receiver after an unexpected error.
## If the streaming pull for a PubSub Subscription fails (receiver),
## the agent attempts to restart receiving messages after this many seconds.
# retry_delay_seconds = 5
## Optional. Maximum byte length of a message to consume.
## Larger messages are dropped with an error. If less than 0 or unspecified,
## treated as no limit.
# max_message_len = 1000000
## Optional. Maximum messages to read from PubSub that have not been written
## to an output. Defaults to %d.
## For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message contains 10 metrics and the output
## metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## The following are optional Subscription ReceiveSettings in PubSub.
## Read more about these values:
## https://godoc.org/cloud.google.com/go/pubsub#ReceiveSettings
## Optional. Maximum number of seconds for which a PubSub subscription
## should auto-extend the PubSub ACK deadline for each message. If less than
## 0, auto-extension is disabled.
# max_extension = 0
## Optional. Maximum number of unprocessed messages in PubSub
## (unacknowledged but not yet expired in PubSub).
## A value of 0 is treated as the default PubSub value.
## Negative values will be treated as unlimited.
# max_outstanding_messages = 0
## Optional. Maximum size in bytes of unprocessed messages in PubSub
## (unacknowledged but not yet expired in PubSub).
## A value of 0 is treated as the default PubSub value.
## Negative values will be treated as unlimited.
# max_outstanding_bytes = 0
## Optional. Max number of goroutines a PubSub Subscription receiver can spawn
## to pull messages from PubSub concurrently. This limit applies to each
## subscription separately and is treated as the PubSub default if less than
## 1. Note this setting does not limit the number of messages that can be
## processed concurrently (use "max_outstanding_messages" instead).
# max_receiver_go_routines = 0
## Optional. If true, Telegraf will attempt to base64 decode the
## PubSub message data before parsing
# base64_data = false
`

View File

@@ -0,0 +1,239 @@
package cloud_pubsub
import (
"encoding/base64"
"errors"
"testing"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
const (
msgInflux = "cpu_load_short,host=server01 value=23422.0 1422568543702900257\n"
)
// Test ingesting InfluxDB-format PubSub message
func TestRunParse(t *testing.T) {
subId := "sub-run-parse"
testParser, _ := parsers.NewInfluxParser()
sub := &stubSub{
id: subId,
messages: make(chan *testMsg, 100),
}
sub.receiver = testMessagesReceive(sub)
ps := &PubSub{
Log: testutil.Logger{},
parser: testParser,
stubSub: func() subscription { return sub },
Project: "projectIDontMatterForTests",
Subscription: subId,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
acc := &testutil.Accumulator{}
if err := ps.Start(acc); err != nil {
t.Fatalf("test PubSub failed to start: %s", err)
}
defer ps.Stop()
if ps.sub == nil {
t.Fatal("expected plugin subscription to be non-nil")
}
testTracker := &testTracker{}
msg := &testMsg{
value: msgInflux,
tracker: testTracker,
}
sub.messages <- msg
acc.Wait(1)
assert.Equal(t, acc.NFields(), 1)
metric := acc.Metrics[0]
validateTestInfluxMetric(t, metric)
}
// Test ingesting InfluxDB-format PubSub message
func TestRunBase64(t *testing.T) {
subId := "sub-run-base64"
testParser, _ := parsers.NewInfluxParser()
sub := &stubSub{
id: subId,
messages: make(chan *testMsg, 100),
}
sub.receiver = testMessagesReceive(sub)
ps := &PubSub{
Log: testutil.Logger{},
parser: testParser,
stubSub: func() subscription { return sub },
Project: "projectIDontMatterForTests",
Subscription: subId,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
Base64Data: true,
}
acc := &testutil.Accumulator{}
if err := ps.Start(acc); err != nil {
t.Fatalf("test PubSub failed to start: %s", err)
}
defer ps.Stop()
if ps.sub == nil {
t.Fatal("expected plugin subscription to be non-nil")
}
testTracker := &testTracker{}
msg := &testMsg{
value: base64.StdEncoding.EncodeToString([]byte(msgInflux)),
tracker: testTracker,
}
sub.messages <- msg
acc.Wait(1)
assert.Equal(t, acc.NFields(), 1)
metric := acc.Metrics[0]
validateTestInfluxMetric(t, metric)
}
func TestRunInvalidMessages(t *testing.T) {
subId := "sub-invalid-messages"
testParser, _ := parsers.NewInfluxParser()
sub := &stubSub{
id: subId,
messages: make(chan *testMsg, 100),
}
sub.receiver = testMessagesReceive(sub)
ps := &PubSub{
Log: testutil.Logger{},
parser: testParser,
stubSub: func() subscription { return sub },
Project: "projectIDontMatterForTests",
Subscription: subId,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
acc := &testutil.Accumulator{}
if err := ps.Start(acc); err != nil {
t.Fatalf("test PubSub failed to start: %s", err)
}
defer ps.Stop()
if ps.sub == nil {
t.Fatal("expected plugin subscription to be non-nil")
}
testTracker := &testTracker{}
msg := &testMsg{
value: "~invalidInfluxMsg~",
tracker: testTracker,
}
sub.messages <- msg
acc.WaitError(1)
// Make sure we acknowledged message so we don't receive it again.
testTracker.WaitForAck(1)
assert.Equal(t, acc.NFields(), 0)
}
func TestRunOverlongMessages(t *testing.T) {
subId := "sub-message-too-long"
acc := &testutil.Accumulator{}
testParser, _ := parsers.NewInfluxParser()
sub := &stubSub{
id: subId,
messages: make(chan *testMsg, 100),
}
sub.receiver = testMessagesReceive(sub)
ps := &PubSub{
Log: testutil.Logger{},
parser: testParser,
stubSub: func() subscription { return sub },
Project: "projectIDontMatterForTests",
Subscription: subId,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
// Add MaxMessageLen Param
MaxMessageLen: 1,
}
if err := ps.Start(acc); err != nil {
t.Fatalf("test PubSub failed to start: %s", err)
}
defer ps.Stop()
if ps.sub == nil {
t.Fatal("expected plugin subscription to be non-nil")
}
testTracker := &testTracker{}
msg := &testMsg{
value: msgInflux,
tracker: testTracker,
}
sub.messages <- msg
acc.WaitError(1)
// Make sure we acknowledged message so we don't receive it again.
testTracker.WaitForAck(1)
assert.Equal(t, acc.NFields(), 0)
}
func TestRunErrorInSubscriber(t *testing.T) {
subId := "sub-unexpected-error"
acc := &testutil.Accumulator{}
testParser, _ := parsers.NewInfluxParser()
sub := &stubSub{
id: subId,
messages: make(chan *testMsg, 100),
}
fakeErrStr := "a fake error"
sub.receiver = testMessagesError(sub, errors.New("a fake error"))
ps := &PubSub{
Log: testutil.Logger{},
parser: testParser,
stubSub: func() subscription { return sub },
Project: "projectIDontMatterForTests",
Subscription: subId,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
RetryReceiveDelaySeconds: 1,
}
if err := ps.Start(acc); err != nil {
t.Fatalf("test PubSub failed to start: %s", err)
}
defer ps.Stop()
if ps.sub == nil {
t.Fatal("expected plugin subscription to be non-nil")
}
acc.WaitError(1)
assert.Regexp(t, fakeErrStr, acc.Errors[0])
}
func validateTestInfluxMetric(t *testing.T, m *testutil.Metric) {
assert.Equal(t, "cpu_load_short", m.Measurement)
assert.Equal(t, "server01", m.Tags["host"])
assert.Equal(t, 23422.0, m.Fields["value"])
assert.Equal(t, int64(1422568543702900257), m.Time.UnixNano())
}

View File

@@ -0,0 +1,68 @@
package cloud_pubsub
import (
"cloud.google.com/go/pubsub"
"context"
"time"
)
type (
subscription interface {
ID() string
Receive(ctx context.Context, f func(context.Context, message)) error
}
message interface {
Ack()
Nack()
ID() string
Data() []byte
Attributes() map[string]string
PublishTime() time.Time
}
gcpSubscription struct {
sub *pubsub.Subscription
}
gcpMessage struct {
msg *pubsub.Message
}
)
func (s *gcpSubscription) ID() string {
if s.sub == nil {
return ""
}
return s.sub.ID()
}
func (s *gcpSubscription) Receive(ctx context.Context, f func(context.Context, message)) error {
return s.sub.Receive(ctx, func(cctx context.Context, m *pubsub.Message) {
f(cctx, &gcpMessage{m})
})
}
func (env *gcpMessage) Ack() {
env.msg.Ack()
}
func (env *gcpMessage) Nack() {
env.msg.Nack()
}
func (env *gcpMessage) ID() string {
return env.msg.ID
}
func (env *gcpMessage) Data() []byte {
return env.msg.Data
}
func (env *gcpMessage) Attributes() map[string]string {
return env.msg.Attributes
}
func (env *gcpMessage) PublishTime() time.Time {
return env.msg.PublishTime
}

View File

@@ -0,0 +1,119 @@
package cloud_pubsub
import (
"context"
"sync"
"time"
)
type stubSub struct {
id string
messages chan *testMsg
receiver receiveFunc
}
func (s *stubSub) ID() string {
return s.id
}
func (s *stubSub) Receive(ctx context.Context, f func(context.Context, message)) error {
return s.receiver(ctx, f)
}
type receiveFunc func(ctx context.Context, f func(context.Context, message)) error
func testMessagesError(s *stubSub, expectedErr error) receiveFunc {
return func(ctx context.Context, f func(context.Context, message)) error {
return expectedErr
}
}
func testMessagesReceive(s *stubSub) receiveFunc {
return func(ctx context.Context, f func(context.Context, message)) error {
for {
select {
case <-ctx.Done():
return ctx.Err()
case m := <-s.messages:
f(ctx, m)
}
}
}
}
type testMsg struct {
id string
value string
attributes map[string]string
publishTime time.Time
tracker *testTracker
}
func (tm *testMsg) Ack() {
tm.tracker.Ack()
}
func (tm *testMsg) Nack() {
tm.tracker.Nack()
}
func (tm *testMsg) ID() string {
return tm.id
}
func (tm *testMsg) Data() []byte {
return []byte(tm.value)
}
func (tm *testMsg) Attributes() map[string]string {
return tm.attributes
}
func (tm *testMsg) PublishTime() time.Time {
return tm.publishTime
}
type testTracker struct {
sync.Mutex
*sync.Cond
numAcks int
numNacks int
}
func (t *testTracker) WaitForAck(num int) {
t.Lock()
if t.Cond == nil {
t.Cond = sync.NewCond(&t.Mutex)
}
for t.numAcks < num {
t.Wait()
}
t.Unlock()
}
func (t *testTracker) WaitForNack(num int) {
t.Lock()
if t.Cond == nil {
t.Cond = sync.NewCond(&t.Mutex)
}
for t.numNacks < num {
t.Wait()
}
t.Unlock()
}
func (t *testTracker) Ack() {
t.Lock()
defer t.Unlock()
t.numAcks++
}
func (t *testTracker) Nack() {
t.Lock()
defer t.Unlock()
t.numNacks++
}

View File

@@ -0,0 +1,72 @@
# Google Cloud PubSub Push Input Service Plugin
The Google Cloud PubSub Push listener is a service input plugin that listens for messages sent via an HTTP POST from [Google Cloud PubSub][pubsub].
The plugin expects messages in Google's Pub/Sub JSON Format ONLY.
The intent of the plugin is to allow Telegraf to serve as an endpoint of the Google Pub/Sub 'Push' service.
Google's PubSub service will **only** send over HTTPS/TLS so this plugin must be behind a valid proxy or must be configured to use TLS.
Enable TLS by specifying the file names of a service TLS certificate and key.
Enable mutually authenticated TLS and authorize client connections by signing certificate authority by including a list of allowed CA certificate file names in `tls_allowed_cacerts`.
### Configuration:
This is a sample configuration for the plugin.
```toml
[[inputs.cloud_pubsub_push]]
## Address and port to host HTTP listener on
service_address = ":8080"
## Application secret to verify messages originate from Cloud Pub/Sub
# token = ""
## Path to listen to.
# path = "/"
## Maximum duration before timing out read of the request
# read_timeout = "10s"
## Maximum duration before timing out write of the response. This should be set to a value
## large enough that you can send at least 'metric_batch_size' number of messages within the
## duration.
# write_timeout = "10s"
## Maximum allowed http request body size in bytes.
## 0 means to use the default of 524,288,00 bytes (500 mebibytes)
# max_body_size = "500MB"
## Whether to add the pubsub metadata, such as message attributes and subscription as a tag.
# add_meta = false
## Optional. Maximum messages to read from PubSub that have not been written
## to an output. Defaults to 1000.
## For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message contains 10 metrics and the output
## metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Set one or more allowed client CA certificate file names to
## enable mutually authenticated TLS connections
# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"]
## Add service certificate and key
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```
This plugin assumes you have already created a PUSH subscription for a given
PubSub topic.
[pubsub]: https://cloud.google.com/pubsub
[input data formats]: /docs/DATA_FORMATS_INPUT.md

View File

@@ -0,0 +1,323 @@
package cloud_pubsub_push
import (
"context"
"crypto/subtle"
"encoding/base64"
"encoding/json"
"io/ioutil"
"net"
"net/http"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
tlsint "github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
)
// defaultMaxBodySize is the default maximum request body size, in bytes.
// if the request body is over this size, we will return an HTTP 413 error.
// 500 MB
const defaultMaxBodySize = 500 * 1024 * 1024
const defaultMaxUndeliveredMessages = 1000
type PubSubPush struct {
ServiceAddress string
Token string
Path string
ReadTimeout internal.Duration
WriteTimeout internal.Duration
MaxBodySize internal.Size
AddMeta bool
Log telegraf.Logger
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
tlsint.ServerConfig
parsers.Parser
listener net.Listener
server *http.Server
acc telegraf.TrackingAccumulator
ctx context.Context
cancel context.CancelFunc
wg *sync.WaitGroup
mu *sync.Mutex
undelivered map[telegraf.TrackingID]chan bool
sem chan struct{}
}
// Message defines the structure of a Google Pub/Sub message.
type Message struct {
Atts map[string]string `json:"attributes"`
Data string `json:"data"` // Data is base64 encoded data
}
// Payload is the received Google Pub/Sub data. (https://cloud.google.com/pubsub/docs/push)
type Payload struct {
Msg Message `json:"message"`
Subscription string `json:"subscription"`
}
const sampleConfig = `
## Address and port to host HTTP listener on
service_address = ":8080"
## Application secret to verify messages originate from Cloud Pub/Sub
# token = ""
## Path to listen to.
# path = "/"
## Maximum duration before timing out read of the request
# read_timeout = "10s"
## Maximum duration before timing out write of the response. This should be set to a value
## large enough that you can send at least 'metric_batch_size' number of messages within the
## duration.
# write_timeout = "10s"
## Maximum allowed http request body size in bytes.
## 0 means to use the default of 524,288,00 bytes (500 mebibytes)
# max_body_size = "500MB"
## Whether to add the pubsub metadata, such as message attributes and subscription as a tag.
# add_meta = false
## Optional. Maximum messages to read from PubSub that have not been written
## to an output. Defaults to 1000.
## For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message contains 10 metrics and the output
## metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Set one or more allowed client CA certificate file names to
## enable mutually authenticated TLS connections
# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"]
## Add service certificate and key
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
`
func (p *PubSubPush) SampleConfig() string {
return sampleConfig
}
func (p *PubSubPush) Description() string {
return "Google Cloud Pub/Sub Push HTTP listener"
}
func (p *PubSubPush) Gather(_ telegraf.Accumulator) error {
return nil
}
func (p *PubSubPush) SetParser(parser parsers.Parser) {
p.Parser = parser
}
// Start starts the http listener service.
func (p *PubSubPush) Start(acc telegraf.Accumulator) error {
if p.MaxBodySize.Size == 0 {
p.MaxBodySize.Size = defaultMaxBodySize
}
if p.ReadTimeout.Duration < time.Second {
p.ReadTimeout.Duration = time.Second * 10
}
if p.WriteTimeout.Duration < time.Second {
p.WriteTimeout.Duration = time.Second * 10
}
tlsConf, err := p.ServerConfig.TLSConfig()
if err != nil {
return err
}
p.server = &http.Server{
Addr: p.ServiceAddress,
Handler: http.TimeoutHandler(p, p.WriteTimeout.Duration, "timed out processing metric"),
ReadTimeout: p.ReadTimeout.Duration,
TLSConfig: tlsConf,
}
p.ctx, p.cancel = context.WithCancel(context.Background())
p.wg = &sync.WaitGroup{}
p.acc = acc.WithTracking(p.MaxUndeliveredMessages)
p.sem = make(chan struct{}, p.MaxUndeliveredMessages)
p.undelivered = make(map[telegraf.TrackingID]chan bool)
p.mu = &sync.Mutex{}
p.wg.Add(1)
go func() {
defer p.wg.Done()
p.receiveDelivered()
}()
p.wg.Add(1)
go func() {
defer p.wg.Done()
if tlsConf != nil {
p.server.ListenAndServeTLS("", "")
} else {
p.server.ListenAndServe()
}
}()
return nil
}
// Stop cleans up all resources
func (p *PubSubPush) Stop() {
p.cancel()
p.server.Shutdown(p.ctx)
p.wg.Wait()
}
func (p *PubSubPush) ServeHTTP(res http.ResponseWriter, req *http.Request) {
if req.URL.Path == p.Path {
p.AuthenticateIfSet(p.serveWrite, res, req)
} else {
p.AuthenticateIfSet(http.NotFound, res, req)
}
}
func (p *PubSubPush) serveWrite(res http.ResponseWriter, req *http.Request) {
select {
case <-req.Context().Done():
res.WriteHeader(http.StatusServiceUnavailable)
return
case <-p.ctx.Done():
res.WriteHeader(http.StatusServiceUnavailable)
return
case p.sem <- struct{}{}:
break
}
// Check that the content length is not too large for us to handle.
if req.ContentLength > p.MaxBodySize.Size {
res.WriteHeader(http.StatusRequestEntityTooLarge)
return
}
if req.Method != http.MethodPost {
res.WriteHeader(http.StatusMethodNotAllowed)
return
}
body := http.MaxBytesReader(res, req.Body, p.MaxBodySize.Size)
bytes, err := ioutil.ReadAll(body)
if err != nil {
res.WriteHeader(http.StatusRequestEntityTooLarge)
return
}
var payload Payload
if err = json.Unmarshal(bytes, &payload); err != nil {
p.Log.Errorf("Error decoding payload %s", err.Error())
res.WriteHeader(http.StatusBadRequest)
return
}
sDec, err := base64.StdEncoding.DecodeString(payload.Msg.Data)
if err != nil {
p.Log.Errorf("Base64-decode failed %s", err.Error())
res.WriteHeader(http.StatusBadRequest)
return
}
metrics, err := p.Parse(sDec)
if err != nil {
p.Log.Debug(err.Error())
res.WriteHeader(http.StatusBadRequest)
return
}
if p.AddMeta {
for i := range metrics {
for k, v := range payload.Msg.Atts {
metrics[i].AddTag(k, v)
}
metrics[i].AddTag("subscription", payload.Subscription)
}
}
ch := make(chan bool, 1)
p.mu.Lock()
p.undelivered[p.acc.AddTrackingMetricGroup(metrics)] = ch
p.mu.Unlock()
select {
case <-req.Context().Done():
res.WriteHeader(http.StatusServiceUnavailable)
return
case success := <-ch:
if success {
res.WriteHeader(http.StatusNoContent)
} else {
res.WriteHeader(http.StatusInternalServerError)
}
}
}
func (p *PubSubPush) receiveDelivered() {
for {
select {
case <-p.ctx.Done():
return
case info := <-p.acc.Delivered():
<-p.sem
p.mu.Lock()
ch, ok := p.undelivered[info.ID()]
if !ok {
p.mu.Unlock()
continue
}
delete(p.undelivered, info.ID())
p.mu.Unlock()
if info.Delivered() {
ch <- true
} else {
ch <- false
p.Log.Debug("Metric group failed to process")
}
}
}
}
func (p *PubSubPush) AuthenticateIfSet(handler http.HandlerFunc, res http.ResponseWriter, req *http.Request) {
if p.Token != "" {
if subtle.ConstantTimeCompare([]byte(req.FormValue("token")), []byte(p.Token)) != 1 {
http.Error(res, "Unauthorized.", http.StatusUnauthorized)
return
}
}
handler(res, req)
}
func init() {
inputs.Add("cloud_pubsub_push", func() telegraf.Input {
return &PubSubPush{
ServiceAddress: ":8080",
Path: "/",
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
})
}

View File

@@ -0,0 +1,226 @@
package cloud_pubsub_push
import (
"context"
"fmt"
"io"
"net/http"
"net/http/httptest"
"strings"
"sync"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/agent"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/models"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/testutil"
)
func TestServeHTTP(t *testing.T) {
tests := []struct {
name string
method string
path string
body io.Reader
status int
maxsize int64
expected string
fail bool
full bool
}{
{
name: "bad method get",
method: "GET",
path: "/",
status: http.StatusMethodNotAllowed,
},
{
name: "post not found",
method: "POST",
path: "/allthings",
status: http.StatusNotFound,
},
{
name: "post large date",
method: "POST",
path: "/",
status: http.StatusRequestEntityTooLarge,
body: strings.NewReader(`{"message":{"attributes":{"deviceId":"myPi","deviceNumId":"2808946627307959","deviceRegistryId":"my-registry","deviceRegistryLocation":"us-central1","projectId":"conference-demos","subFolder":""},"data":"dGVzdGluZ0dvb2dsZSxzZW5zb3I9Ym1lXzI4MCB0ZW1wX2M9MjMuOTUsaHVtaWRpdHk9NjIuODMgMTUzNjk1Mjk3NDU1MzUxMDIzMQ==","messageId":"204004313210337","message_id":"204004313210337","publishTime":"2018-09-14T19:22:54.587Z","publish_time":"2018-09-14T19:22:54.587Z"},"subscription":"projects/conference-demos/subscriptions/my-subscription"}`),
},
{
name: "post valid data",
method: "POST",
path: "/",
maxsize: 500 * 1024 * 1024,
status: http.StatusNoContent,
body: strings.NewReader(`{"message":{"attributes":{"deviceId":"myPi","deviceNumId":"2808946627307959","deviceRegistryId":"my-registry","deviceRegistryLocation":"us-central1","projectId":"conference-demos","subFolder":""},"data":"dGVzdGluZ0dvb2dsZSxzZW5zb3I9Ym1lXzI4MCB0ZW1wX2M9MjMuOTUsaHVtaWRpdHk9NjIuODMgMTUzNjk1Mjk3NDU1MzUxMDIzMQ==","messageId":"204004313210337","message_id":"204004313210337","publishTime":"2018-09-14T19:22:54.587Z","publish_time":"2018-09-14T19:22:54.587Z"},"subscription":"projects/conference-demos/subscriptions/my-subscription"}`),
},
{
name: "fail write",
method: "POST",
path: "/",
maxsize: 500 * 1024 * 1024,
status: http.StatusServiceUnavailable,
body: strings.NewReader(`{"message":{"attributes":{"deviceId":"myPi","deviceNumId":"2808946627307959","deviceRegistryId":"my-registry","deviceRegistryLocation":"us-central1","projectId":"conference-demos","subFolder":""},"data":"dGVzdGluZ0dvb2dsZSxzZW5zb3I9Ym1lXzI4MCB0ZW1wX2M9MjMuOTUsaHVtaWRpdHk9NjIuODMgMTUzNjk1Mjk3NDU1MzUxMDIzMQ==","messageId":"204004313210337","message_id":"204004313210337","publishTime":"2018-09-14T19:22:54.587Z","publish_time":"2018-09-14T19:22:54.587Z"},"subscription":"projects/conference-demos/subscriptions/my-subscription"}`),
fail: true,
},
{
name: "full buffer",
method: "POST",
path: "/",
maxsize: 500 * 1024 * 1024,
status: http.StatusServiceUnavailable,
body: strings.NewReader(`{"message":{"attributes":{"deviceId":"myPi","deviceNumId":"2808946627307959","deviceRegistryId":"my-registry","deviceRegistryLocation":"us-central1","projectId":"conference-demos","subFolder":""},"data":"dGVzdGluZ0dvb2dsZSxzZW5zb3I9Ym1lXzI4MCB0ZW1wX2M9MjMuOTUsaHVtaWRpdHk9NjIuODMgMTUzNjk1Mjk3NDU1MzUxMDIzMQ==","messageId":"204004313210337","message_id":"204004313210337","publishTime":"2018-09-14T19:22:54.587Z","publish_time":"2018-09-14T19:22:54.587Z"},"subscription":"projects/conference-demos/subscriptions/my-subscription"}`),
full: true,
},
{
name: "post invalid body",
method: "POST",
path: "/",
maxsize: 500 * 1024 * 1024,
status: http.StatusBadRequest,
body: strings.NewReader(`invalid body`),
},
{
name: "post invalid data",
method: "POST",
path: "/",
maxsize: 500 * 1024 * 1024,
status: http.StatusBadRequest,
body: strings.NewReader(`{"message":{"attributes":{"deviceId":"myPi","deviceNumId":"2808946627307959","deviceRegistryId":"my-registry","deviceRegistryLocation":"us-central1","projectId":"conference-demos","subFolder":""},"data":"not base 64 encoded data","messageId":"204004313210337","message_id":"204004313210337","publishTime":"2018-09-14T19:22:54.587Z","publish_time":"2018-09-14T19:22:54.587Z"},"subscription":"projects/conference-demos/subscriptions/my-subscription"}`),
},
{
name: "post invalid data format",
method: "POST",
path: "/",
maxsize: 500 * 1024 * 1024,
status: http.StatusBadRequest,
body: strings.NewReader(`{"message":{"attributes":{"deviceId":"myPi","deviceNumId":"2808946627307959","deviceRegistryId":"my-registry","deviceRegistryLocation":"us-central1","projectId":"conference-demos","subFolder":""},"data":"bm90IHZhbGlkIGZvcm1hdHRlZCBkYXRh","messageId":"204004313210337","message_id":"204004313210337","publishTime":"2018-09-14T19:22:54.587Z","publish_time":"2018-09-14T19:22:54.587Z"},"subscription":"projects/conference-demos/subscriptions/my-subscription"}`),
},
{
name: "post invalid structured body",
method: "POST",
path: "/",
maxsize: 500 * 1024 * 1024,
status: http.StatusBadRequest,
body: strings.NewReader(`{"message":{"attributes":{"thing":1},"data":"bm90IHZhbGlkIGZvcm1hdHRlZCBkYXRh"},"subscription":"projects/conference-demos/subscriptions/my-subscription"}`),
},
}
for _, test := range tests {
wg := &sync.WaitGroup{}
req, err := http.NewRequest(test.method, test.path, test.body)
require.NoError(t, err)
rr := httptest.NewRecorder()
pubPush := &PubSubPush{
Log: testutil.Logger{},
Path: "/",
MaxBodySize: internal.Size{
Size: test.maxsize,
},
sem: make(chan struct{}, 1),
undelivered: make(map[telegraf.TrackingID]chan bool),
mu: &sync.Mutex{},
WriteTimeout: internal.Duration{Duration: time.Second * 1},
}
pubPush.ctx, pubPush.cancel = context.WithCancel(context.Background())
if test.full {
// fill buffer with fake message
pubPush.sem <- struct{}{}
}
p, _ := parsers.NewParser(&parsers.Config{
MetricName: "cloud_pubsub_push",
DataFormat: "influx",
})
pubPush.SetParser(p)
dst := make(chan telegraf.Metric, 1)
ro := models.NewRunningOutput("test", &testOutput{failWrite: test.fail}, &models.OutputConfig{}, 1, 1)
pubPush.acc = agent.NewAccumulator(&testMetricMaker{}, dst).WithTracking(1)
wg.Add(1)
go func() {
defer wg.Done()
pubPush.receiveDelivered()
}()
wg.Add(1)
go func(status int, d chan telegraf.Metric) {
defer wg.Done()
for m := range d {
ro.AddMetric(m)
ro.Write()
}
}(test.status, dst)
ctx, cancel := context.WithTimeout(req.Context(), pubPush.WriteTimeout.Duration)
req = req.WithContext(ctx)
pubPush.ServeHTTP(rr, req)
require.Equal(t, test.status, rr.Code, test.name)
if test.expected != "" {
require.Equal(t, test.expected, rr.Body.String(), test.name)
}
pubPush.cancel()
cancel()
close(dst)
wg.Wait()
}
}
type testMetricMaker struct{}
func (tm *testMetricMaker) Name() string {
return "TestPlugin"
}
func (tm *testMetricMaker) LogName() string {
return tm.Name()
}
func (tm *testMetricMaker) MakeMetric(metric telegraf.Metric) telegraf.Metric {
return metric
}
func (tm *testMetricMaker) Log() telegraf.Logger {
return models.NewLogger("test", "test", "")
}
type testOutput struct {
// if true, mock a write failure
failWrite bool
}
func (*testOutput) Connect() error {
return nil
}
func (*testOutput) Close() error {
return nil
}
func (*testOutput) Description() string {
return ""
}
func (*testOutput) SampleConfig() string {
return ""
}
func (t *testOutput) Write(metrics []telegraf.Metric) error {
if t.failWrite {
return fmt.Errorf("failed write")
}
return nil
}

View File

@@ -17,7 +17,7 @@ API endpoint. In the following order the plugin will attempt to authenticate.
```toml
[[inputs.cloudwatch]]
## Amazon Region (required)
## Amazon Region
region = "us-east-1"
## Amazon Credentials
@@ -28,12 +28,18 @@ API endpoint. In the following order the plugin will attempt to authenticate.
## 4) environment variables
## 5) shared credentials file
## 6) EC2 Instance Profile
#access_key = ""
#secret_key = ""
#token = ""
#role_arn = ""
#profile = ""
#shared_credential_file = ""
# access_key = ""
# secret_key = ""
# token = ""
# role_arn = ""
# profile = ""
# shared_credential_file = ""
## Endpoint to make request against, the correct endpoint is automatically
## determined and this option should only be set if you wish to override the
## default.
## ex: endpoint_url = "http://localhost:8000"
# endpoint_url = ""
# The minimum period for Cloudwatch metrics is 1 minute (60s). However not all
# metrics are made available to the 1 minute period. Some are collected at
@@ -48,32 +54,46 @@ API endpoint. In the following order the plugin will attempt to authenticate.
## Collection Delay (required - must account for metrics availability via CloudWatch API)
delay = "5m"
## Override global run interval (optional - defaults to global interval)
## Recomended: use metric 'interval' that is a multiple of 'period' to avoid
## Recommended: use metric 'interval' that is a multiple of 'period' to avoid
## gaps or overlap in pulled data
interval = "5m"
## Configure the TTL for the internal cache of metrics.
# cache_ttl = "1h"
## Metric Statistic Namespace (required)
namespace = "AWS/ELB"
## Maximum requests per second. Note that the global default AWS rate limit is
## 400 reqs/sec, so if you define multiple namespaces, these should add up to a
## maximum of 400. Optional - default value is 200.
## 50 reqs/sec, so if you define multiple namespaces, these should add up to a
## maximum of 50.
## See http://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_limits.html
ratelimit = 200
# ratelimit = 25
## Metrics to Pull (optional)
## Timeout for http requests made by the cloudwatch client.
# timeout = "5s"
## Namespace-wide statistic filters. These allow fewer queries to be made to
## cloudwatch.
# statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ]
# statistic_exclude = []
## Metrics to Pull
## Defaults to all Metrics in Namespace if nothing is provided
## Refreshes Namespace available metrics every 1h
[[inputs.cloudwatch.metrics]]
names = ["Latency", "RequestCount"]
## Dimension filters for Metric. These are optional however all dimensions
## defined for the metric names must be specified in order to retrieve
## the metric statistics.
[[inputs.cloudwatch.metrics.dimensions]]
name = "LoadBalancerName"
value = "p-example"
#[[inputs.cloudwatch.metrics]]
# names = ["Latency", "RequestCount"]
#
# ## Statistic filters for Metric. These allow for retrieving specific
# ## statistics for an individual metric.
# # statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ]
# # statistic_exclude = []
#
# ## Dimension filters for Metric. All dimensions defined for the metric names
# ## must be specified in order to retrieve the metric statistics.
# [[inputs.cloudwatch.metrics.dimensions]]
# name = "LoadBalancerName"
# value = "p-example"
```
#### Requirements and Terminology
@@ -91,17 +111,21 @@ wildcard dimension is ignored.
Example:
```
[[inputs.cloudwatch.metrics]]
names = ["Latency"]
[[inputs.cloudwatch]]
period = "1m"
interval = "5m"
## Dimension filters for Metric (optional)
[[inputs.cloudwatch.metrics.dimensions]]
name = "LoadBalancerName"
value = "p-example"
[[inputs.cloudwatch.metrics]]
names = ["Latency"]
[[inputs.cloudwatch.metrics.dimensions]]
name = "AvailabilityZone"
value = "*"
## Dimension filters for Metric (optional)
[[inputs.cloudwatch.metrics.dimensions]]
name = "LoadBalancerName"
value = "p-example"
[[inputs.cloudwatch.metrics.dimensions]]
name = "AvailabilityZone"
value = "*"
```
If the following ELBs are available:
@@ -118,9 +142,11 @@ Then 2 metrics will be output:
If the `AvailabilityZone` wildcard dimension was omitted, then a single metric (name: `p-example`)
would be exported containing the aggregate values of the ELB across availability zones.
To maximize efficiency and savings, consider making fewer requests by increasing `interval` but keeping `period` at the duration you would like metrics to be reported. The above example will request metrics from Cloudwatch every 5 minutes but will output five metrics timestamped one minute apart.
#### Restrictions and Limitations
- CloudWatch metrics are not available instantly via the CloudWatch API. You should adjust your collection `delay` to account for this lag in metrics availability based on your [monitoring subscription level](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html)
- CloudWatch API usage incurs cost - see [GetMetricStatistics Pricing](https://aws.amazon.com/cloudwatch/pricing/)
- CloudWatch API usage incurs cost - see [GetMetricData Pricing](https://aws.amazon.com/cloudwatch/pricing/)
### Measurements & Fields:
@@ -141,7 +167,6 @@ Tag Dimension names are represented in [snake case](https://en.wikipedia.org/wik
- All measurements have the following tags:
- region (CloudWatch Region)
- unit (CloudWatch Metric Unit)
- {dimension-name} (Cloudwatch Dimension value - one for each metric dimension)
### Troubleshooting:
@@ -155,12 +180,34 @@ aws cloudwatch list-metrics --namespace AWS/EC2 --region us-east-1 --metric-name
If the expected metrics are not returned, you can try getting them manually
for a short period of time:
```
aws cloudwatch get-metric-statistics --namespace AWS/EC2 --region us-east-1 --period 300 --start-time 2018-07-01T00:00:00Z --end-time 2018-07-01T00:15:00Z --statistics Average --metric-name CPUCreditBalance --dimensions Name=InstanceId,Value=i-deadbeef
aws cloudwatch get-metric-data \
--start-time 2018-07-01T00:00:00Z \
--end-time 2018-07-01T00:15:00Z \
--metric-data-queries '[
{
"Id": "avgCPUCreditBalance",
"MetricStat": {
"Metric": {
"Namespace": "AWS/EC2",
"MetricName": "CPUCreditBalance",
"Dimensions": [
{
"Name": "InstanceId",
"Value": "i-deadbeef"
}
]
},
"Period": 300,
"Stat": "Average"
},
"Label": "avgCPUCreditBalance"
}
]'
```
### Example Output:
```
$ ./telegraf --config telegraf.conf --input-filter cloudwatch --test
> cloudwatch_aws_elb,load_balancer_name=p-example,region=us-east-1,unit=seconds latency_average=0.004810798017284538,latency_maximum=0.1100282669067383,latency_minimum=0.0006084442138671875,latency_sample_count=4029,latency_sum=19.382705211639404 1459542420000000000
> cloudwatch_aws_elb,load_balancer_name=p-example,region=us-east-1 latency_average=0.004810798017284538,latency_maximum=0.1100282669067383,latency_minimum=0.0006084442138671875,latency_sample_count=4029,latency_sum=19.382705211639404 1459542420000000000
```

View File

@@ -2,63 +2,85 @@ package cloudwatch
import (
"fmt"
"net"
"net/http"
"strconv"
"strings"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/influxdata/telegraf"
internalaws "github.com/influxdata/telegraf/config/aws"
"github.com/influxdata/telegraf/filter"
"github.com/influxdata/telegraf/internal"
internalaws "github.com/influxdata/telegraf/internal/config/aws"
"github.com/influxdata/telegraf/internal/limiter"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/inputs"
)
type (
// CloudWatch contains the configuration and cache for the cloudwatch plugin.
CloudWatch struct {
Region string `toml:"region"`
AccessKey string `toml:"access_key"`
SecretKey string `toml:"secret_key"`
RoleARN string `toml:"role_arn"`
Profile string `toml:"profile"`
Filename string `toml:"shared_credential_file"`
Token string `toml:"token"`
Region string `toml:"region"`
AccessKey string `toml:"access_key"`
SecretKey string `toml:"secret_key"`
RoleARN string `toml:"role_arn"`
Profile string `toml:"profile"`
CredentialPath string `toml:"shared_credential_file"`
Token string `toml:"token"`
EndpointURL string `toml:"endpoint_url"`
StatisticExclude []string `toml:"statistic_exclude"`
StatisticInclude []string `toml:"statistic_include"`
Timeout internal.Duration `toml:"timeout"`
Period internal.Duration `toml:"period"`
Delay internal.Duration `toml:"delay"`
Namespace string `toml:"namespace"`
Metrics []*Metric `toml:"metrics"`
CacheTTL internal.Duration `toml:"cache_ttl"`
RateLimit int `toml:"ratelimit"`
client cloudwatchClient
metricCache *MetricCache
Period internal.Duration `toml:"period"`
Delay internal.Duration `toml:"delay"`
Namespace string `toml:"namespace"`
Metrics []*Metric `toml:"metrics"`
CacheTTL internal.Duration `toml:"cache_ttl"`
RateLimit int `toml:"ratelimit"`
Log telegraf.Logger `toml:"-"`
client cloudwatchClient
statFilter filter.Filter
metricCache *metricCache
queryDimensions map[string]*map[string]string
windowStart time.Time
windowEnd time.Time
}
// Metric defines a simplified Cloudwatch metric.
Metric struct {
MetricNames []string `toml:"names"`
Dimensions []*Dimension `toml:"dimensions"`
StatisticExclude *[]string `toml:"statistic_exclude"`
StatisticInclude *[]string `toml:"statistic_include"`
MetricNames []string `toml:"names"`
Dimensions []*Dimension `toml:"dimensions"`
}
// Dimension defines a simplified Cloudwatch dimension (provides metric filtering).
Dimension struct {
Name string `toml:"name"`
Value string `toml:"value"`
}
MetricCache struct {
TTL time.Duration
Fetched time.Time
Metrics []*cloudwatch.Metric
// metricCache caches metrics, their filters, and generated queries.
metricCache struct {
ttl time.Duration
built time.Time
metrics []filteredMetric
queries []*cloudwatch.MetricDataQuery
}
cloudwatchClient interface {
ListMetrics(*cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error)
GetMetricStatistics(*cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error)
GetMetricData(*cloudwatch.GetMetricDataInput) (*cloudwatch.GetMetricDataOutput, error)
}
)
// SampleConfig returns the default configuration of the Cloudwatch input plugin.
func (c *CloudWatch) SampleConfig() string {
return `
## Amazon Region
@@ -72,12 +94,18 @@ func (c *CloudWatch) SampleConfig() string {
## 4) environment variables
## 5) shared credentials file
## 6) EC2 Instance Profile
#access_key = ""
#secret_key = ""
#token = ""
#role_arn = ""
#profile = ""
#shared_credential_file = ""
# access_key = ""
# secret_key = ""
# token = ""
# role_arn = ""
# profile = ""
# shared_credential_file = ""
## Endpoint to make request against, the correct endpoint is automatically
## determined and this option should only be set if you wish to override the
## default.
## ex: endpoint_url = "http://localhost:8000"
# endpoint_url = ""
# The minimum period for Cloudwatch metrics is 1 minute (60s). However not all
# metrics are made available to the 1 minute period. Some are collected at
@@ -97,45 +125,177 @@ func (c *CloudWatch) SampleConfig() string {
interval = "5m"
## Configure the TTL for the internal cache of metrics.
## Defaults to 1 hr if not specified
#cache_ttl = "10m"
# cache_ttl = "1h"
## Metric Statistic Namespace (required)
namespace = "AWS/ELB"
## Maximum requests per second. Note that the global default AWS rate limit is
## 400 reqs/sec, so if you define multiple namespaces, these should add up to a
## maximum of 400. Optional - default value is 200.
## 50 reqs/sec, so if you define multiple namespaces, these should add up to a
## maximum of 50.
## See http://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_limits.html
ratelimit = 200
# ratelimit = 25
## Metrics to Pull (optional)
## Timeout for http requests made by the cloudwatch client.
# timeout = "5s"
## Namespace-wide statistic filters. These allow fewer queries to be made to
## cloudwatch.
# statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ]
# statistic_exclude = []
## Metrics to Pull
## Defaults to all Metrics in Namespace if nothing is provided
## Refreshes Namespace available metrics every 1h
#[[inputs.cloudwatch.metrics]]
# names = ["Latency", "RequestCount"]
#
# ## Dimension filters for Metric. These are optional however all dimensions
# ## defined for the metric names must be specified in order to retrieve
# ## the metric statistics.
# ## Statistic filters for Metric. These allow for retrieving specific
# ## statistics for an individual metric.
# # statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ]
# # statistic_exclude = []
#
# ## Dimension filters for Metric. All dimensions defined for the metric names
# ## must be specified in order to retrieve the metric statistics.
# [[inputs.cloudwatch.metrics.dimensions]]
# name = "LoadBalancerName"
# value = "p-example"
`
}
// Description returns a one-sentence description on the Cloudwatch input plugin.
func (c *CloudWatch) Description() string {
return "Pull Metric Statistics from Amazon CloudWatch"
}
func SelectMetrics(c *CloudWatch) ([]*cloudwatch.Metric, error) {
var metrics []*cloudwatch.Metric
// Gather takes in an accumulator and adds the metrics that the Input
// gathers. This is called every "interval".
func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
if c.statFilter == nil {
var err error
// Set config level filter (won't change throughout life of plugin).
c.statFilter, err = filter.NewIncludeExcludeFilter(c.StatisticInclude, c.StatisticExclude)
if err != nil {
return err
}
}
if c.client == nil {
c.initializeCloudWatch()
}
filteredMetrics, err := getFilteredMetrics(c)
if err != nil {
return err
}
c.updateWindow(time.Now())
// Get all of the possible queries so we can send groups of 100.
queries, err := c.getDataQueries(filteredMetrics)
if err != nil {
return err
}
if len(queries) == 0 {
return nil
}
// Limit concurrency or we can easily exhaust user connection limit.
// See cloudwatch API request limits:
// http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_limits.html
lmtr := limiter.NewRateLimiter(c.RateLimit, time.Second)
defer lmtr.Stop()
wg := sync.WaitGroup{}
rLock := sync.Mutex{}
results := []*cloudwatch.MetricDataResult{}
// 100 is the maximum number of metric data queries a `GetMetricData` request can contain.
batchSize := 500
var batches [][]*cloudwatch.MetricDataQuery
for batchSize < len(queries) {
queries, batches = queries[batchSize:], append(batches, queries[0:batchSize:batchSize])
}
batches = append(batches, queries)
for i := range batches {
wg.Add(1)
<-lmtr.C
go func(inm []*cloudwatch.MetricDataQuery) {
defer wg.Done()
result, err := c.gatherMetrics(c.getDataInputs(inm))
if err != nil {
acc.AddError(err)
return
}
rLock.Lock()
results = append(results, result...)
rLock.Unlock()
}(batches[i])
}
wg.Wait()
return c.aggregateMetrics(acc, results)
}
func (c *CloudWatch) initializeCloudWatch() {
credentialConfig := &internalaws.CredentialConfig{
Region: c.Region,
AccessKey: c.AccessKey,
SecretKey: c.SecretKey,
RoleARN: c.RoleARN,
Profile: c.Profile,
Filename: c.CredentialPath,
Token: c.Token,
EndpointURL: c.EndpointURL,
}
configProvider := credentialConfig.Credentials()
cfg := &aws.Config{
HTTPClient: &http.Client{
// use values from DefaultTransport
Transport: &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
DualStack: true,
}).DialContext,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
},
Timeout: c.Timeout.Duration,
},
}
loglevel := aws.LogOff
c.client = cloudwatch.New(configProvider, cfg.WithLogLevel(loglevel))
}
type filteredMetric struct {
metrics []*cloudwatch.Metric
statFilter filter.Filter
}
// getFilteredMetrics returns metrics specified in the config file or metrics listed from Cloudwatch.
func getFilteredMetrics(c *CloudWatch) ([]filteredMetric, error) {
if c.metricCache != nil && c.metricCache.isValid() {
return c.metricCache.metrics, nil
}
fMetrics := []filteredMetric{}
// check for provided metric filter
if c.Metrics != nil {
metrics = []*cloudwatch.Metric{}
for _, m := range c.Metrics {
if !hasWilcard(m.Dimensions) {
metrics := []*cloudwatch.Metric{}
if !hasWildcard(m.Dimensions) {
dimensions := make([]*cloudwatch.Dimension, len(m.Dimensions))
for k, d := range m.Dimensions {
dimensions[k] = &cloudwatch.Dimension{
@@ -167,215 +327,283 @@ func SelectMetrics(c *CloudWatch) ([]*cloudwatch.Metric, error) {
}
}
}
if m.StatisticExclude == nil {
m.StatisticExclude = &c.StatisticExclude
}
if m.StatisticInclude == nil {
m.StatisticInclude = &c.StatisticInclude
}
statFilter, err := filter.NewIncludeExcludeFilter(*m.StatisticInclude, *m.StatisticExclude)
if err != nil {
return nil, err
}
fMetrics = append(fMetrics, filteredMetric{
metrics: metrics,
statFilter: statFilter,
})
}
} else {
var err error
metrics, err = c.fetchNamespaceMetrics()
metrics, err := c.fetchNamespaceMetrics()
if err != nil {
return nil, err
}
fMetrics = []filteredMetric{{
metrics: metrics,
statFilter: c.statFilter,
}}
}
return metrics, nil
c.metricCache = &metricCache{
metrics: fMetrics,
built: time.Now(),
ttl: c.CacheTTL.Duration,
}
return fMetrics, nil
}
func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
if c.client == nil {
c.initializeCloudWatch()
}
metrics, err := SelectMetrics(c)
if err != nil {
return err
}
now := time.Now()
// limit concurrency or we can easily exhaust user connection limit
// see cloudwatch API request limits:
// http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_limits.html
lmtr := limiter.NewRateLimiter(c.RateLimit, time.Second)
defer lmtr.Stop()
var wg sync.WaitGroup
wg.Add(len(metrics))
for _, m := range metrics {
<-lmtr.C
go func(inm *cloudwatch.Metric) {
defer wg.Done()
acc.AddError(c.gatherMetric(acc, inm, now))
}(m)
}
wg.Wait()
return nil
}
func init() {
inputs.Add("cloudwatch", func() telegraf.Input {
ttl, _ := time.ParseDuration("1hr")
return &CloudWatch{
CacheTTL: internal.Duration{Duration: ttl},
RateLimit: 200,
}
})
}
/*
* Initialize CloudWatch client
*/
func (c *CloudWatch) initializeCloudWatch() error {
credentialConfig := &internalaws.CredentialConfig{
Region: c.Region,
AccessKey: c.AccessKey,
SecretKey: c.SecretKey,
RoleARN: c.RoleARN,
Profile: c.Profile,
Filename: c.Filename,
Token: c.Token,
}
configProvider := credentialConfig.Credentials()
c.client = cloudwatch.New(configProvider)
return nil
}
/*
* Fetch available metrics for given CloudWatch Namespace
*/
// fetchNamespaceMetrics retrieves available metrics for a given CloudWatch namespace.
func (c *CloudWatch) fetchNamespaceMetrics() ([]*cloudwatch.Metric, error) {
if c.metricCache != nil && c.metricCache.IsValid() {
return c.metricCache.Metrics, nil
}
metrics := []*cloudwatch.Metric{}
var token *string
for more := true; more; {
params := &cloudwatch.ListMetricsInput{
Namespace: aws.String(c.Namespace),
Dimensions: []*cloudwatch.DimensionFilter{},
NextToken: token,
MetricName: nil,
}
params := &cloudwatch.ListMetricsInput{
Namespace: aws.String(c.Namespace),
Dimensions: []*cloudwatch.DimensionFilter{},
NextToken: token,
MetricName: nil,
}
for {
resp, err := c.client.ListMetrics(params)
if err != nil {
return nil, err
}
metrics = append(metrics, resp.Metrics...)
if resp.NextToken == nil {
break
}
token = resp.NextToken
more = token != nil
}
c.metricCache = &MetricCache{
Metrics: metrics,
Fetched: time.Now(),
TTL: c.CacheTTL.Duration,
params.NextToken = resp.NextToken
}
return metrics, nil
}
/*
* Gather given Metric and emit any error
*/
func (c *CloudWatch) gatherMetric(
acc telegraf.Accumulator,
metric *cloudwatch.Metric,
now time.Time,
) error {
params := c.getStatisticsInput(metric, now)
resp, err := c.client.GetMetricStatistics(params)
if err != nil {
return err
func (c *CloudWatch) updateWindow(relativeTo time.Time) {
windowEnd := relativeTo.Add(-c.Delay.Duration)
if c.windowEnd.IsZero() {
// this is the first run, no window info, so just get a single period
c.windowStart = windowEnd.Add(-c.Period.Duration)
} else {
// subsequent window, start where last window left off
c.windowStart = c.windowEnd
}
for _, point := range resp.Datapoints {
tags := map[string]string{
"region": c.Region,
"unit": snakeCase(*point.Unit),
c.windowEnd = windowEnd
}
// getDataQueries gets all of the possible queries so we can maximize the request payload.
func (c *CloudWatch) getDataQueries(filteredMetrics []filteredMetric) ([]*cloudwatch.MetricDataQuery, error) {
if c.metricCache != nil && c.metricCache.queries != nil && c.metricCache.isValid() {
return c.metricCache.queries, nil
}
c.queryDimensions = map[string]*map[string]string{}
dataQueries := []*cloudwatch.MetricDataQuery{}
for i, filtered := range filteredMetrics {
for j, metric := range filtered.metrics {
id := strconv.Itoa(j) + "_" + strconv.Itoa(i)
dimension := ctod(metric.Dimensions)
if filtered.statFilter.Match("average") {
c.queryDimensions["average_"+id] = dimension
dataQueries = append(dataQueries, &cloudwatch.MetricDataQuery{
Id: aws.String("average_" + id),
Label: aws.String(snakeCase(*metric.MetricName + "_average")),
MetricStat: &cloudwatch.MetricStat{
Metric: metric,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Stat: aws.String(cloudwatch.StatisticAverage),
},
})
}
if filtered.statFilter.Match("maximum") {
c.queryDimensions["maximum_"+id] = dimension
dataQueries = append(dataQueries, &cloudwatch.MetricDataQuery{
Id: aws.String("maximum_" + id),
Label: aws.String(snakeCase(*metric.MetricName + "_maximum")),
MetricStat: &cloudwatch.MetricStat{
Metric: metric,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Stat: aws.String(cloudwatch.StatisticMaximum),
},
})
}
if filtered.statFilter.Match("minimum") {
c.queryDimensions["minimum_"+id] = dimension
dataQueries = append(dataQueries, &cloudwatch.MetricDataQuery{
Id: aws.String("minimum_" + id),
Label: aws.String(snakeCase(*metric.MetricName + "_minimum")),
MetricStat: &cloudwatch.MetricStat{
Metric: metric,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Stat: aws.String(cloudwatch.StatisticMinimum),
},
})
}
if filtered.statFilter.Match("sum") {
c.queryDimensions["sum_"+id] = dimension
dataQueries = append(dataQueries, &cloudwatch.MetricDataQuery{
Id: aws.String("sum_" + id),
Label: aws.String(snakeCase(*metric.MetricName + "_sum")),
MetricStat: &cloudwatch.MetricStat{
Metric: metric,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Stat: aws.String(cloudwatch.StatisticSum),
},
})
}
if filtered.statFilter.Match("sample_count") {
c.queryDimensions["sample_count_"+id] = dimension
dataQueries = append(dataQueries, &cloudwatch.MetricDataQuery{
Id: aws.String("sample_count_" + id),
Label: aws.String(snakeCase(*metric.MetricName + "_sample_count")),
MetricStat: &cloudwatch.MetricStat{
Metric: metric,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Stat: aws.String(cloudwatch.StatisticSampleCount),
},
})
}
}
}
if len(dataQueries) == 0 {
c.Log.Debug("no metrics found to collect")
return nil, nil
}
if c.metricCache == nil {
c.metricCache = &metricCache{
queries: dataQueries,
built: time.Now(),
ttl: c.CacheTTL.Duration,
}
} else {
c.metricCache.queries = dataQueries
}
return dataQueries, nil
}
// gatherMetrics gets metric data from Cloudwatch.
func (c *CloudWatch) gatherMetrics(
params *cloudwatch.GetMetricDataInput,
) ([]*cloudwatch.MetricDataResult, error) {
results := []*cloudwatch.MetricDataResult{}
for {
resp, err := c.client.GetMetricData(params)
if err != nil {
return nil, fmt.Errorf("failed to get metric data: %v", err)
}
for _, d := range metric.Dimensions {
tags[snakeCase(*d.Name)] = *d.Value
results = append(results, resp.MetricDataResults...)
if resp.NextToken == nil {
break
}
params.NextToken = resp.NextToken
}
// record field for each statistic
fields := map[string]interface{}{}
return results, nil
}
if point.Average != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticAverage)] = *point.Average
}
if point.Maximum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticMaximum)] = *point.Maximum
}
if point.Minimum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticMinimum)] = *point.Minimum
}
if point.SampleCount != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticSampleCount)] = *point.SampleCount
}
if point.Sum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticSum)] = *point.Sum
}
func (c *CloudWatch) aggregateMetrics(
acc telegraf.Accumulator,
metricDataResults []*cloudwatch.MetricDataResult,
) error {
var (
grouper = metric.NewSeriesGrouper()
namespace = sanitizeMeasurement(c.Namespace)
)
acc.AddFields(formatMeasurement(c.Namespace), fields, tags, *point.Timestamp)
for _, result := range metricDataResults {
tags := map[string]string{}
if dimensions, ok := c.queryDimensions[*result.Id]; ok {
tags = *dimensions
}
tags["region"] = c.Region
for i := range result.Values {
grouper.Add(namespace, tags, *result.Timestamps[i], *result.Label, *result.Values[i])
}
}
for _, metric := range grouper.Metrics() {
acc.AddMetric(metric)
}
return nil
}
/*
* Formatting helpers
*/
func formatField(metricName string, statistic string) string {
return fmt.Sprintf("%s_%s", snakeCase(metricName), snakeCase(statistic))
func init() {
inputs.Add("cloudwatch", func() telegraf.Input {
return &CloudWatch{
CacheTTL: internal.Duration{Duration: time.Hour},
RateLimit: 25,
Timeout: internal.Duration{Duration: time.Second * 5},
}
})
}
func formatMeasurement(namespace string) string {
func sanitizeMeasurement(namespace string) string {
namespace = strings.Replace(namespace, "/", "_", -1)
namespace = snakeCase(namespace)
return fmt.Sprintf("cloudwatch_%s", namespace)
return "cloudwatch_" + namespace
}
func snakeCase(s string) string {
s = internal.SnakeCase(s)
s = strings.Replace(s, " ", "_", -1)
s = strings.Replace(s, "__", "_", -1)
return s
}
/*
* Map Metric to *cloudwatch.GetMetricStatisticsInput for given timeframe
*/
func (c *CloudWatch) getStatisticsInput(metric *cloudwatch.Metric, now time.Time) *cloudwatch.GetMetricStatisticsInput {
end := now.Add(-c.Delay.Duration)
type dimension struct {
name string
value string
}
input := &cloudwatch.GetMetricStatisticsInput{
StartTime: aws.Time(end.Add(-c.Period.Duration)),
EndTime: aws.Time(end),
MetricName: metric.MetricName,
Namespace: metric.Namespace,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Dimensions: metric.Dimensions,
Statistics: []*string{
aws.String(cloudwatch.StatisticAverage),
aws.String(cloudwatch.StatisticMaximum),
aws.String(cloudwatch.StatisticMinimum),
aws.String(cloudwatch.StatisticSum),
aws.String(cloudwatch.StatisticSampleCount)},
// ctod converts cloudwatch dimensions to regular dimensions.
func ctod(cDimensions []*cloudwatch.Dimension) *map[string]string {
dimensions := map[string]string{}
for i := range cDimensions {
dimensions[snakeCase(*cDimensions[i].Name)] = *cDimensions[i].Value
}
return input
return &dimensions
}
/*
* Check Metric Cache validity
*/
func (c *MetricCache) IsValid() bool {
return c.Metrics != nil && time.Since(c.Fetched) < c.TTL
func (c *CloudWatch) getDataInputs(dataQueries []*cloudwatch.MetricDataQuery) *cloudwatch.GetMetricDataInput {
return &cloudwatch.GetMetricDataInput{
StartTime: aws.Time(c.windowStart),
EndTime: aws.Time(c.windowEnd),
MetricDataQueries: dataQueries,
}
}
func hasWilcard(dimensions []*Dimension) bool {
// isValid checks the validity of the metric cache.
func (f *metricCache) isValid() bool {
return f.metrics != nil && time.Since(f.built) < f.ttl
}
func hasWildcard(dimensions []*Dimension) bool {
for _, d := range dimensions {
if d.Value == "" || d.Value == "*" {
return true

View File

@@ -6,46 +6,98 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf/filter"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
type mockGatherCloudWatchClient struct{}
func (m *mockGatherCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
metric := &cloudwatch.Metric{
Namespace: params.Namespace,
MetricName: aws.String("Latency"),
Dimensions: []*cloudwatch.Dimension{
&cloudwatch.Dimension{
Name: aws.String("LoadBalancerName"),
Value: aws.String("p-example"),
return &cloudwatch.ListMetricsOutput{
Metrics: []*cloudwatch.Metric{
{
Namespace: params.Namespace,
MetricName: aws.String("Latency"),
Dimensions: []*cloudwatch.Dimension{
{
Name: aws.String("LoadBalancerName"),
Value: aws.String("p-example"),
},
},
},
},
}
result := &cloudwatch.ListMetricsOutput{
Metrics: []*cloudwatch.Metric{metric},
}
return result, nil
}, nil
}
func (m *mockGatherCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
dataPoint := &cloudwatch.Datapoint{
Timestamp: params.EndTime,
Minimum: aws.Float64(0.1),
Maximum: aws.Float64(0.3),
Average: aws.Float64(0.2),
Sum: aws.Float64(123),
SampleCount: aws.Float64(100),
Unit: aws.String("Seconds"),
}
result := &cloudwatch.GetMetricStatisticsOutput{
Label: aws.String("Latency"),
Datapoints: []*cloudwatch.Datapoint{dataPoint},
}
return result, nil
func (m *mockGatherCloudWatchClient) GetMetricData(params *cloudwatch.GetMetricDataInput) (*cloudwatch.GetMetricDataOutput, error) {
return &cloudwatch.GetMetricDataOutput{
MetricDataResults: []*cloudwatch.MetricDataResult{
{
Id: aws.String("minimum_0_0"),
Label: aws.String("latency_minimum"),
StatusCode: aws.String("completed"),
Timestamps: []*time.Time{
params.EndTime,
},
Values: []*float64{
aws.Float64(0.1),
},
},
{
Id: aws.String("maximum_0_0"),
Label: aws.String("latency_maximum"),
StatusCode: aws.String("completed"),
Timestamps: []*time.Time{
params.EndTime,
},
Values: []*float64{
aws.Float64(0.3),
},
},
{
Id: aws.String("average_0_0"),
Label: aws.String("latency_average"),
StatusCode: aws.String("completed"),
Timestamps: []*time.Time{
params.EndTime,
},
Values: []*float64{
aws.Float64(0.2),
},
},
{
Id: aws.String("sum_0_0"),
Label: aws.String("latency_sum"),
StatusCode: aws.String("completed"),
Timestamps: []*time.Time{
params.EndTime,
},
Values: []*float64{
aws.Float64(123),
},
},
{
Id: aws.String("sample_count_0_0"),
Label: aws.String("latency_sample_count"),
StatusCode: aws.String("completed"),
Timestamps: []*time.Time{
params.EndTime,
},
Values: []*float64{
aws.Float64(100),
},
},
},
}, nil
}
func TestSnakeCase(t *testing.T) {
assert.Equal(t, "cluster_name", snakeCase("Cluster Name"))
assert.Equal(t, "broker_id", snakeCase("Broker ID"))
}
func TestGather(t *testing.T) {
@@ -64,7 +116,7 @@ func TestGather(t *testing.T) {
var acc testutil.Accumulator
c.client = &mockGatherCloudWatchClient{}
acc.GatherError(c.Gather)
assert.NoError(t, acc.GatherError(c.Gather))
fields := map[string]interface{}{}
fields["latency_minimum"] = 0.1
@@ -74,13 +126,11 @@ func TestGather(t *testing.T) {
fields["latency_sample_count"] = 100.0
tags := map[string]string{}
tags["unit"] = "seconds"
tags["region"] = "us-east-1"
tags["load_balancer_name"] = "p-example"
assert.True(t, acc.HasMeasurement("cloudwatch_aws_elb"))
acc.AssertContainsTaggedFields(t, "cloudwatch_aws_elb", fields, tags)
}
type mockSelectMetricsCloudWatchClient struct{}
@@ -100,7 +150,7 @@ func (m *mockSelectMetricsCloudWatchClient) ListMetrics(params *cloudwatch.ListM
Namespace: aws.String("AWS/ELB"),
MetricName: aws.String(m),
Dimensions: []*cloudwatch.Dimension{
&cloudwatch.Dimension{
{
Name: aws.String("LoadBalancerName"),
Value: aws.String(lb),
},
@@ -112,11 +162,11 @@ func (m *mockSelectMetricsCloudWatchClient) ListMetrics(params *cloudwatch.ListM
Namespace: aws.String("AWS/ELB"),
MetricName: aws.String(m),
Dimensions: []*cloudwatch.Dimension{
&cloudwatch.Dimension{
{
Name: aws.String("LoadBalancerName"),
Value: aws.String(lb),
},
&cloudwatch.Dimension{
{
Name: aws.String("AvailabilityZone"),
Value: aws.String(az),
},
@@ -132,7 +182,7 @@ func (m *mockSelectMetricsCloudWatchClient) ListMetrics(params *cloudwatch.ListM
return result, nil
}
func (m *mockSelectMetricsCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
func (m *mockSelectMetricsCloudWatchClient) GetMetricData(params *cloudwatch.GetMetricDataInput) (*cloudwatch.GetMetricDataOutput, error) {
return nil, nil
}
@@ -148,14 +198,14 @@ func TestSelectMetrics(t *testing.T) {
Period: internalDuration,
RateLimit: 200,
Metrics: []*Metric{
&Metric{
{
MetricNames: []string{"Latency", "RequestCount"},
Dimensions: []*Dimension{
&Dimension{
{
Name: "LoadBalancerName",
Value: "*",
},
&Dimension{
{
Name: "AvailabilityZone",
Value: "*",
},
@@ -164,10 +214,10 @@ func TestSelectMetrics(t *testing.T) {
},
}
c.client = &mockSelectMetricsCloudWatchClient{}
metrics, err := SelectMetrics(c)
filtered, err := getFilteredMetrics(c)
// We've asked for 2 (out of 4) metrics, over all 3 load balancers in all 2
// AZs. We should get 12 metrics.
assert.Equal(t, 12, len(metrics))
assert.Equal(t, 12, len(filtered[0].metrics))
assert.Nil(t, err)
}
@@ -197,23 +247,99 @@ func TestGenerateStatisticsInputParams(t *testing.T) {
now := time.Now()
params := c.getStatisticsInput(m, now)
c.updateWindow(now)
statFilter, _ := filter.NewIncludeExcludeFilter(nil, nil)
queries, _ := c.getDataQueries([]filteredMetric{{metrics: []*cloudwatch.Metric{m}, statFilter: statFilter}})
params := c.getDataInputs(queries)
assert.EqualValues(t, *params.EndTime, now.Add(-c.Delay.Duration))
assert.EqualValues(t, *params.StartTime, now.Add(-c.Period.Duration).Add(-c.Delay.Duration))
assert.Len(t, params.Dimensions, 1)
assert.Len(t, params.Statistics, 5)
assert.EqualValues(t, *params.Period, 60)
require.Len(t, params.MetricDataQueries, 5)
assert.Len(t, params.MetricDataQueries[0].MetricStat.Metric.Dimensions, 1)
assert.EqualValues(t, *params.MetricDataQueries[0].MetricStat.Period, 60)
}
func TestGenerateStatisticsInputParamsFiltered(t *testing.T) {
d := &cloudwatch.Dimension{
Name: aws.String("LoadBalancerName"),
Value: aws.String("p-example"),
}
m := &cloudwatch.Metric{
MetricName: aws.String("Latency"),
Dimensions: []*cloudwatch.Dimension{d},
}
duration, _ := time.ParseDuration("1m")
internalDuration := internal.Duration{
Duration: duration,
}
c := &CloudWatch{
Namespace: "AWS/ELB",
Delay: internalDuration,
Period: internalDuration,
}
c.initializeCloudWatch()
now := time.Now()
c.updateWindow(now)
statFilter, _ := filter.NewIncludeExcludeFilter([]string{"average", "sample_count"}, nil)
queries, _ := c.getDataQueries([]filteredMetric{{metrics: []*cloudwatch.Metric{m}, statFilter: statFilter}})
params := c.getDataInputs(queries)
assert.EqualValues(t, *params.EndTime, now.Add(-c.Delay.Duration))
assert.EqualValues(t, *params.StartTime, now.Add(-c.Period.Duration).Add(-c.Delay.Duration))
require.Len(t, params.MetricDataQueries, 2)
assert.Len(t, params.MetricDataQueries[0].MetricStat.Metric.Dimensions, 1)
assert.EqualValues(t, *params.MetricDataQueries[0].MetricStat.Period, 60)
}
func TestMetricsCacheTimeout(t *testing.T) {
cache := &MetricCache{
Metrics: []*cloudwatch.Metric{},
Fetched: time.Now(),
TTL: time.Minute,
cache := &metricCache{
metrics: []filteredMetric{},
built: time.Now(),
ttl: time.Minute,
}
assert.True(t, cache.IsValid())
cache.Fetched = time.Now().Add(-time.Minute)
assert.False(t, cache.IsValid())
assert.True(t, cache.isValid())
cache.built = time.Now().Add(-time.Minute)
assert.False(t, cache.isValid())
}
func TestUpdateWindow(t *testing.T) {
duration, _ := time.ParseDuration("1m")
internalDuration := internal.Duration{
Duration: duration,
}
c := &CloudWatch{
Namespace: "AWS/ELB",
Delay: internalDuration,
Period: internalDuration,
}
now := time.Now()
assert.True(t, c.windowEnd.IsZero())
assert.True(t, c.windowStart.IsZero())
c.updateWindow(now)
newStartTime := c.windowEnd
// initial window just has a single period
assert.EqualValues(t, c.windowEnd, now.Add(-c.Delay.Duration))
assert.EqualValues(t, c.windowStart, now.Add(-c.Delay.Duration).Add(-c.Period.Duration))
now = time.Now()
c.updateWindow(now)
// subsequent window uses previous end time as start time
assert.EqualValues(t, c.windowEnd, now.Add(-c.Delay.Duration))
assert.EqualValues(t, c.windowStart, newStartTime)
}

View File

@@ -34,7 +34,7 @@ For more information on conntrack-tools, see the
"nf_conntrack_count","nf_conntrack_max"]
## Directories to search within for the conntrack files above.
## Missing directrories will be ignored.
## Missing directories will be ignored.
dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
```

View File

@@ -61,7 +61,7 @@ var sampleConfig = `
"nf_conntrack_count","nf_conntrack_max"]
## Directories to search within for the conntrack files above.
## Missing directrories will be ignored.
## Missing directories will be ignored.
dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
`

View File

@@ -12,7 +12,7 @@ report those stats already using StatsD protocol if needed.
# Gather health check statuses from services registered in Consul
[[inputs.consul]]
## Consul server address
# address = "localhost"
# address = "localhost:8500"
## URI scheme for the Consul server, one of "http", "https"
# scheme = "http"
@@ -24,8 +24,8 @@ report those stats already using StatsD protocol if needed.
# username = ""
# password = ""
## Data centre to query the health checks from
# datacentre = ""
## Data center to query the health checks from
# datacenter = ""
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
@@ -44,7 +44,7 @@ report those stats already using StatsD protocol if needed.
- consul_health_checks
- tags:
- node (node that check/service is registred on)
- node (node that check/service is registered on)
- service_name
- check_id
- fields:

View File

@@ -16,7 +16,8 @@ type Consul struct {
Token string
Username string
Password string
Datacentre string
Datacentre string // deprecated in 1.10; use Datacenter
Datacenter string
tls.ClientConfig
TagDelimiter string
@@ -26,7 +27,7 @@ type Consul struct {
var sampleConfig = `
## Consul server address
# address = "localhost"
# address = "localhost:8500"
## URI scheme for the Consul server, one of "http", "https"
# scheme = "http"
@@ -38,8 +39,8 @@ var sampleConfig = `
# username = ""
# password = ""
## Data centre to query the health checks from
# datacentre = ""
## Data center to query the health checks from
# datacenter = ""
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
@@ -77,6 +78,10 @@ func (c *Consul) createAPIClient() (*api.Client, error) {
config.Datacenter = c.Datacentre
}
if c.Datacenter != "" {
config.Datacenter = c.Datacenter
}
if c.Token != "" {
config.Token = c.Token
}
@@ -121,12 +126,12 @@ func (c *Consul) GatherHealthCheck(acc telegraf.Accumulator, checks []*api.Healt
for _, checkTag := range check.ServiceTags {
if c.TagDelimiter != "" {
splittedTag := strings.SplitN(checkTag, c.TagDelimiter, 2)
if len(splittedTag) == 1 {
if len(splittedTag) == 1 && checkTag != "" {
tags[checkTag] = checkTag
} else if len(splittedTag) == 2 {
} else if len(splittedTag) == 2 && splittedTag[1] != "" {
tags[splittedTag[0]] = splittedTag[1]
}
} else {
} else if checkTag != "" {
tags[checkTag] = checkTag
}
}

View File

@@ -8,7 +8,7 @@ import (
)
var sampleChecks = []*api.HealthCheck{
&api.HealthCheck{
{
Node: "localhost",
CheckID: "foo.health123",
Name: "foo.health",

View File

@@ -12,7 +12,7 @@
## http://admin:secret@couchbase-0.example.com:8091/
##
## If no servers are specified, then localhost is used as the host.
## If no protocol is specifed, HTTP is used.
## If no protocol is specified, HTTP is used.
## If no port is specified, 8091 is used.
servers = ["http://localhost:8091"]
```
@@ -48,16 +48,6 @@ Fields:
## Example output
```
$ telegraf --config telegraf.conf --input-filter couchbase --test
* Plugin: couchbase, Collection 1
> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.10.187:8091 memory_free=22927384576,memory_total=64424656896 1458381183695864929
> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.10.65:8091 memory_free=23520161792,memory_total=64424656896 1458381183695972112
> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.13.105:8091 memory_free=23531704320,memory_total=64424656896 1458381183695995259
> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.13.173:8091 memory_free=23628767232,memory_total=64424656896 1458381183696010870
> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.15.120:8091 memory_free=23616692224,memory_total=64424656896 1458381183696027406
> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.8.127:8091 memory_free=23431770112,memory_total=64424656896 1458381183696041040
> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.8.148:8091 memory_free=23811371008,memory_total=64424656896 1458381183696059060
> couchbase_bucket,bucket=default,cluster=https://couchbase-0.example.com/ data_used=25743360,disk_fetches=0,disk_used=31744886,item_count=0,mem_used=77729224,ops_per_sec=0,quota_percent_used=10.58976636614118 1458381183696210074
> couchbase_bucket,bucket=demoncat,cluster=https://couchbase-0.example.com/ data_used=38157584951,disk_fetches=0,disk_used=62730302441,item_count=14662532,mem_used=24015304256,ops_per_sec=1207.753207753208,quota_percent_used=79.87855353525707 1458381183696242695
> couchbase_bucket,bucket=blastro-df,cluster=https://couchbase-0.example.com/ data_used=212552491622,disk_fetches=0,disk_used=413323157621,item_count=944655680,mem_used=202421103760,ops_per_sec=1692.176692176692,quota_percent_used=68.9442170551845 1458381183696272206
couchbase_node,cluster=http://localhost:8091/,hostname=172.17.0.2:8091 memory_free=7705575424,memory_total=16558182400 1547829754000000000
couchbase_bucket,bucket=beer-sample,cluster=http://localhost:8091/ quota_percent_used=27.09285736083984,ops_per_sec=0,disk_fetches=0,item_count=7303,disk_used=21662946,data_used=9325087,mem_used=28408920 1547829754000000000
```

View File

@@ -86,7 +86,7 @@ func (r *Couchbase) gatherServer(addr string, acc telegraf.Accumulator, pool *co
}
for bucketName := range pool.BucketMap {
tags := map[string]string{"cluster": addr, "bucket": bucketName}
tags := map[string]string{"cluster": regexpURI.ReplaceAllString(addr, "${1}"), "bucket": bucketName}
bs := pool.BucketMap[bucketName].BasicStats
fields := make(map[string]interface{})
fields["quota_percent_used"] = bs["quotaPercentUsed"]

View File

@@ -1,14 +1,18 @@
# CouchDB Input Plugin
---
The CouchDB plugin gathers metrics of CouchDB using [_stats](http://docs.couchdb.org/en/1.6.1/api/server/common.html?highlight=stats#get--_stats) endpoint.
The CouchDB plugin gathers metrics of CouchDB using [_stats] endpoint.
### Configuration:
### Configuration
```
# Sample Config:
```toml
[[inputs.couchdb]]
hosts = ["http://localhost:5984/_stats"]
## Works with CouchDB stats endpoints out of the box
## Multiple Hosts from which to read CouchDB stats:
hosts = ["http://localhost:8086/_stats"]
## Use HTTP Basic Authentication.
# basic_username = "telegraf"
# basic_password = "p@ssw0rd"
```
### Measurements & Fields:
@@ -62,194 +66,14 @@ httpd statistics:
### Example output:
**Post Couchdb 2.0**
```
➜ telegraf git:(master) ✗ ./telegraf --config ./config.conf --input-filter couchdb --test
* Plugin: couchdb,
Collection 1
> couchdb,server=http://localhost:5984/_stats couchdb_auth_cache_hits_current=0,
couchdb_auth_cache_hits_max=0,
couchdb_auth_cache_hits_mean=0,
couchdb_auth_cache_hits_min=0,
couchdb_auth_cache_hits_stddev=0,
couchdb_auth_cache_hits_sum=0,
couchdb_auth_cache_misses_current=0,
couchdb_auth_cache_misses_max=0,
couchdb_auth_cache_misses_mean=0,
couchdb_auth_cache_misses_min=0,
couchdb_auth_cache_misses_stddev=0,
couchdb_auth_cache_misses_sum=0,
couchdb_database_reads_current=0,
couchdb_database_reads_max=0,
couchdb_database_reads_mean=0,
couchdb_database_reads_min=0,
couchdb_database_reads_stddev=0,
couchdb_database_reads_sum=0,
couchdb_database_writes_current=1102,
couchdb_database_writes_max=131,
couchdb_database_writes_mean=0.116,
couchdb_database_writes_min=0,
couchdb_database_writes_stddev=3.536,
couchdb_database_writes_sum=1102,
couchdb_open_databases_current=1,
couchdb_open_databases_max=1,
couchdb_open_databases_mean=0,
couchdb_open_databases_min=0,
couchdb_open_databases_stddev=0.01,
couchdb_open_databases_sum=1,
couchdb_open_os_files_current=2,
couchdb_open_os_files_max=2,
couchdb_open_os_files_mean=0,
couchdb_open_os_files_min=0,
couchdb_open_os_files_stddev=0.02,
couchdb_open_os_files_sum=2,
couchdb_request_time_current=242.21,
couchdb_request_time_max=102,
couchdb_request_time_mean=5.767,
couchdb_request_time_min=1,
couchdb_request_time_stddev=17.369,
couchdb_request_time_sum=242.21,
httpd_bulk_requests_current=0,
httpd_bulk_requests_max=0,
httpd_bulk_requests_mean=0,
httpd_bulk_requests_min=0,
httpd_bulk_requests_stddev=0,
httpd_bulk_requests_sum=0,
httpd_clients_requesting_changes_current=0,
httpd_clients_requesting_changes_max=0,
httpd_clients_requesting_changes_mean=0,
httpd_clients_requesting_changes_min=0,
httpd_clients_requesting_changes_stddev=0,
httpd_clients_requesting_changes_sum=0,
httpd_request_methods_copy_current=0,
httpd_request_methods_copy_max=0,
httpd_request_methods_copy_mean=0,
httpd_request_methods_copy_min=0,
httpd_request_methods_copy_stddev=0,
httpd_request_methods_copy_sum=0,
httpd_request_methods_delete_current=0,
httpd_request_methods_delete_max=0,
httpd_request_methods_delete_mean=0,
httpd_request_methods_delete_min=0,
httpd_request_methods_delete_stddev=0,
httpd_request_methods_delete_sum=0,
httpd_request_methods_get_current=31,
httpd_request_methods_get_max=1,
httpd_request_methods_get_mean=0.003,
httpd_request_methods_get_min=0,
httpd_request_methods_get_stddev=0.057,
httpd_request_methods_get_sum=31,
httpd_request_methods_head_current=0,
httpd_request_methods_head_max=0,
httpd_request_methods_head_mean=0,
httpd_request_methods_head_min=0,
httpd_request_methods_head_stddev=0,
httpd_request_methods_head_sum=0,
httpd_request_methods_post_current=1102,
httpd_request_methods_post_max=131,
httpd_request_methods_post_mean=0.116,
httpd_request_methods_post_min=0,
httpd_request_methods_post_stddev=3.536,
httpd_request_methods_post_sum=1102,
httpd_request_methods_put_current=1,
httpd_request_methods_put_max=1,
httpd_request_methods_put_mean=0,
httpd_request_methods_put_min=0,
httpd_request_methods_put_stddev=0.01,
httpd_request_methods_put_sum=1,
httpd_requests_current=1133,
httpd_requests_max=130,
httpd_requests_mean=0.118,
httpd_requests_min=0,
httpd_requests_stddev=3.512,
httpd_requests_sum=1133,
httpd_status_codes_200_current=31,
httpd_status_codes_200_max=1,
httpd_status_codes_200_mean=0.003,
httpd_status_codes_200_min=0,
httpd_status_codes_200_stddev=0.057,
httpd_status_codes_200_sum=31,
httpd_status_codes_201_current=1103,
httpd_status_codes_201_max=130,
httpd_status_codes_201_mean=0.116,
httpd_status_codes_201_min=0,
httpd_status_codes_201_stddev=3.532,
httpd_status_codes_201_sum=1103,
httpd_status_codes_202_current=0,
httpd_status_codes_202_max=0,
httpd_status_codes_202_mean=0,
httpd_status_codes_202_min=0,
httpd_status_codes_202_stddev=0,
httpd_status_codes_202_sum=0,
httpd_status_codes_301_current=0,
httpd_status_codes_301_max=0,
httpd_status_codes_301_mean=0,
httpd_status_codes_301_min=0,
httpd_status_codes_301_stddev=0,
httpd_status_codes_301_sum=0,
httpd_status_codes_304_current=0,
httpd_status_codes_304_max=0,
httpd_status_codes_304_mean=0,
httpd_status_codes_304_min=0,
httpd_status_codes_304_stddev=0,
httpd_status_codes_304_sum=0,
httpd_status_codes_400_current=0,
httpd_status_codes_400_max=0,
httpd_status_codes_400_mean=0,
httpd_status_codes_400_min=0,
httpd_status_codes_400_stddev=0,
httpd_status_codes_400_sum=0,
httpd_status_codes_401_current=0,
httpd_status_codes_401_max=0,
httpd_status_codes_401_mean=0,
httpd_status_codes_401_min=0,
httpd_status_codes_401_stddev=0,
httpd_status_codes_401_sum=0,
httpd_status_codes_403_current=0,
httpd_status_codes_403_max=0,
httpd_status_codes_403_mean=0,
httpd_status_codes_403_min=0,
httpd_status_codes_403_stddev=0,
httpd_status_codes_403_sum=0,
httpd_status_codes_404_current=0,
httpd_status_codes_404_max=0,
httpd_status_codes_404_mean=0,
httpd_status_codes_404_min=0,
httpd_status_codes_404_stddev=0,
httpd_status_codes_404_sum=0,
httpd_status_codes_405_current=0,
httpd_status_codes_405_max=0,
httpd_status_codes_405_mean=0,
httpd_status_codes_405_min=0,
httpd_status_codes_405_stddev=0,
httpd_status_codes_405_sum=0,
httpd_status_codes_409_current=0,
httpd_status_codes_409_max=0,
httpd_status_codes_409_mean=0,
httpd_status_codes_409_min=0,
httpd_status_codes_409_stddev=0,
httpd_status_codes_409_sum=0,
httpd_status_codes_412_current=0,
httpd_status_codes_412_max=0,
httpd_status_codes_412_mean=0,
httpd_status_codes_412_min=0,
httpd_status_codes_412_stddev=0,
httpd_status_codes_412_sum=0,
httpd_status_codes_500_current=0,
httpd_status_codes_500_max=0,
httpd_status_codes_500_mean=0,
httpd_status_codes_500_min=0,
httpd_status_codes_500_stddev=0,
httpd_status_codes_500_sum=0,
httpd_temporary_view_reads_current=0,
httpd_temporary_view_reads_max=0,
httpd_temporary_view_reads_mean=0,
httpd_temporary_view_reads_min=0,
httpd_temporary_view_reads_stddev=0,
httpd_temporary_view_reads_sum=0,
httpd_view_reads_current=0,
httpd_view_reads_max=0,
httpd_view_reads_mean=0,
httpd_view_reads_min=0,
httpd_view_reads_stddev=0,
httpd_view_reads_sum=0 1454692257621938169
couchdb,server=http://couchdb22:5984/_node/_local/_stats couchdb_auth_cache_hits_value=0,httpd_request_methods_delete_value=0,couchdb_auth_cache_misses_value=0,httpd_request_methods_get_value=42,httpd_status_codes_304_value=0,httpd_status_codes_400_value=0,httpd_request_methods_head_value=0,httpd_status_codes_201_value=0,couchdb_database_reads_value=0,httpd_request_methods_copy_value=0,couchdb_request_time_max=0,httpd_status_codes_200_value=42,httpd_status_codes_301_value=0,couchdb_open_os_files_value=2,httpd_request_methods_put_value=0,httpd_request_methods_post_value=0,httpd_status_codes_202_value=0,httpd_status_codes_403_value=0,httpd_status_codes_409_value=0,couchdb_database_writes_value=0,couchdb_request_time_min=0,httpd_status_codes_412_value=0,httpd_status_codes_500_value=0,httpd_status_codes_401_value=0,httpd_status_codes_404_value=0,httpd_status_codes_405_value=0,couchdb_open_databases_value=0 1536707179000000000
```
**Pre Couchdb 2.0**
```
couchdb,server=http://couchdb16:5984/_stats couchdb_request_time_sum=96,httpd_status_codes_200_sum=37,httpd_status_codes_200_min=0,httpd_requests_mean=0.005,httpd_requests_min=0,couchdb_request_time_stddev=3.833,couchdb_request_time_min=1,httpd_request_methods_get_stddev=0.073,httpd_request_methods_get_min=0,httpd_status_codes_200_mean=0.005,httpd_status_codes_200_max=1,httpd_requests_sum=37,couchdb_request_time_current=96,httpd_request_methods_get_sum=37,httpd_request_methods_get_mean=0.005,httpd_request_methods_get_max=1,httpd_status_codes_200_stddev=0.073,couchdb_request_time_mean=2.595,couchdb_request_time_max=25,httpd_request_methods_get_current=37,httpd_status_codes_200_current=37,httpd_requests_current=37,httpd_requests_stddev=0.073,httpd_requests_max=1 1536707179000000000
```
[_stats]: http://docs.couchdb.org/en/1.6.1/api/server/common.html?highlight=stats#get--_stats

View File

@@ -3,44 +3,52 @@ package couchdb
import (
"encoding/json"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"net/http"
"reflect"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
// Schema:
type metaData struct {
Description string `json:"description"`
Current float64 `json:"current"`
Sum float64 `json:"sum"`
Mean float64 `json:"mean"`
Stddev float64 `json:"stddev"`
Min float64 `json:"min"`
Max float64 `json:"max"`
}
type (
metaData struct {
Current *float64 `json:"current"`
Sum *float64 `json:"sum"`
Mean *float64 `json:"mean"`
Stddev *float64 `json:"stddev"`
Min *float64 `json:"min"`
Max *float64 `json:"max"`
Value *float64 `json:"value"`
}
type Stats struct {
Couchdb struct {
AuthCacheMisses metaData `json:"auth_cache_misses"`
DatabaseWrites metaData `json:"database_writes"`
OpenDatabases metaData `json:"open_databases"`
AuthCacheHits metaData `json:"auth_cache_hits"`
RequestTime metaData `json:"request_time"`
DatabaseReads metaData `json:"database_reads"`
OpenOsFiles metaData `json:"open_os_files"`
} `json:"couchdb"`
HttpdRequestMethods struct {
oldValue struct {
Value metaData `json:"value"`
metaData
}
couchdb struct {
AuthCacheHits metaData `json:"auth_cache_hits"`
AuthCacheMisses metaData `json:"auth_cache_misses"`
DatabaseWrites metaData `json:"database_writes"`
DatabaseReads metaData `json:"database_reads"`
OpenDatabases metaData `json:"open_databases"`
OpenOsFiles metaData `json:"open_os_files"`
RequestTime oldValue `json:"request_time"`
HttpdRequestMethods httpdRequestMethods `json:"httpd_request_methods"`
HttpdStatusCodes httpdStatusCodes `json:"httpd_status_codes"`
}
httpdRequestMethods struct {
Put metaData `json:"PUT"`
Get metaData `json:"GET"`
Copy metaData `json:"COPY"`
Delete metaData `json:"DELETE"`
Post metaData `json:"POST"`
Head metaData `json:"HEAD"`
} `json:"httpd_request_methods"`
HttpdStatusCodes struct {
}
httpdStatusCodes struct {
Status200 metaData `json:"200"`
Status201 metaData `json:"201"`
Status202 metaData `json:"202"`
@@ -54,19 +62,31 @@ type Stats struct {
Status409 metaData `json:"409"`
Status412 metaData `json:"412"`
Status500 metaData `json:"500"`
} `json:"httpd_status_codes"`
Httpd struct {
ClientsRequestingChanges metaData `json:"clients_requesting_changes"`
TemporaryViewReads metaData `json:"temporary_view_reads"`
Requests metaData `json:"requests"`
BulkRequests metaData `json:"bulk_requests"`
ViewReads metaData `json:"view_reads"`
} `json:"httpd"`
}
}
type CouchDB struct {
HOSTs []string `toml:"hosts"`
}
httpd struct {
BulkRequests metaData `json:"bulk_requests"`
Requests metaData `json:"requests"`
TemporaryViewReads metaData `json:"temporary_view_reads"`
ViewReads metaData `json:"view_reads"`
ClientsRequestingChanges metaData `json:"clients_requesting_changes"`
}
Stats struct {
Couchdb couchdb `json:"couchdb"`
HttpdRequestMethods httpdRequestMethods `json:"httpd_request_methods"`
HttpdStatusCodes httpdStatusCodes `json:"httpd_status_codes"`
Httpd httpd `json:"httpd"`
}
CouchDB struct {
Hosts []string `toml:"hosts"`
BasicUsername string `toml:"basic_username"`
BasicPassword string `toml:"basic_password"`
client *http.Client
}
)
func (*CouchDB) Description() string {
return "Read CouchDB Stats from one or more servers"
@@ -75,14 +95,18 @@ func (*CouchDB) Description() string {
func (*CouchDB) SampleConfig() string {
return `
## Works with CouchDB stats endpoints out of the box
## Multiple HOSTs from which to read CouchDB stats:
## Multiple Hosts from which to read CouchDB stats:
hosts = ["http://localhost:8086/_stats"]
## Use HTTP Basic Authentication.
# basic_username = "telegraf"
# basic_password = "p@ssw0rd"
`
}
func (c *CouchDB) Gather(accumulator telegraf.Accumulator) error {
var wg sync.WaitGroup
for _, u := range c.HOSTs {
for _, u := range c.Hosts {
wg.Add(1)
go func(host string) {
defer wg.Done()
@@ -97,67 +121,135 @@ func (c *CouchDB) Gather(accumulator telegraf.Accumulator) error {
return nil
}
var tr = &http.Transport{
ResponseHeaderTimeout: time.Duration(3 * time.Second),
}
var client = &http.Client{
Transport: tr,
Timeout: time.Duration(4 * time.Second),
}
func (c *CouchDB) fetchAndInsertData(accumulator telegraf.Accumulator, host string) error {
if c.client == nil {
c.client = &http.Client{
Transport: &http.Transport{
ResponseHeaderTimeout: time.Duration(3 * time.Second),
},
Timeout: time.Duration(4 * time.Second),
}
}
response, error := client.Get(host)
req, err := http.NewRequest("GET", host, nil)
if err != nil {
return err
}
if c.BasicUsername != "" || c.BasicPassword != "" {
req.SetBasicAuth(c.BasicUsername, c.BasicPassword)
}
response, error := c.client.Do(req)
if error != nil {
return error
}
defer response.Body.Close()
var stats Stats
if response.StatusCode != 200 {
return fmt.Errorf("Failed to get stats from couchdb: HTTP responded %d", response.StatusCode)
}
stats := Stats{}
decoder := json.NewDecoder(response.Body)
decoder.Decode(&stats)
fields := map[string]interface{}{}
// for couchdb 2.0 API changes
requestTime := metaData{
Current: stats.Couchdb.RequestTime.Current,
Sum: stats.Couchdb.RequestTime.Sum,
Mean: stats.Couchdb.RequestTime.Mean,
Stddev: stats.Couchdb.RequestTime.Stddev,
Min: stats.Couchdb.RequestTime.Min,
Max: stats.Couchdb.RequestTime.Max,
}
httpdRequestMethodsPut := stats.HttpdRequestMethods.Put
httpdRequestMethodsGet := stats.HttpdRequestMethods.Get
httpdRequestMethodsCopy := stats.HttpdRequestMethods.Copy
httpdRequestMethodsDelete := stats.HttpdRequestMethods.Delete
httpdRequestMethodsPost := stats.HttpdRequestMethods.Post
httpdRequestMethodsHead := stats.HttpdRequestMethods.Head
httpdStatusCodesStatus200 := stats.HttpdStatusCodes.Status200
httpdStatusCodesStatus201 := stats.HttpdStatusCodes.Status201
httpdStatusCodesStatus202 := stats.HttpdStatusCodes.Status202
httpdStatusCodesStatus301 := stats.HttpdStatusCodes.Status301
httpdStatusCodesStatus304 := stats.HttpdStatusCodes.Status304
httpdStatusCodesStatus400 := stats.HttpdStatusCodes.Status400
httpdStatusCodesStatus401 := stats.HttpdStatusCodes.Status401
httpdStatusCodesStatus403 := stats.HttpdStatusCodes.Status403
httpdStatusCodesStatus404 := stats.HttpdStatusCodes.Status404
httpdStatusCodesStatus405 := stats.HttpdStatusCodes.Status405
httpdStatusCodesStatus409 := stats.HttpdStatusCodes.Status409
httpdStatusCodesStatus412 := stats.HttpdStatusCodes.Status412
httpdStatusCodesStatus500 := stats.HttpdStatusCodes.Status500
// check if couchdb2.0 is used
if stats.Couchdb.HttpdRequestMethods.Get.Value != nil {
requestTime = stats.Couchdb.RequestTime.Value
httpdRequestMethodsPut = stats.Couchdb.HttpdRequestMethods.Put
httpdRequestMethodsGet = stats.Couchdb.HttpdRequestMethods.Get
httpdRequestMethodsCopy = stats.Couchdb.HttpdRequestMethods.Copy
httpdRequestMethodsDelete = stats.Couchdb.HttpdRequestMethods.Delete
httpdRequestMethodsPost = stats.Couchdb.HttpdRequestMethods.Post
httpdRequestMethodsHead = stats.Couchdb.HttpdRequestMethods.Head
httpdStatusCodesStatus200 = stats.Couchdb.HttpdStatusCodes.Status200
httpdStatusCodesStatus201 = stats.Couchdb.HttpdStatusCodes.Status201
httpdStatusCodesStatus202 = stats.Couchdb.HttpdStatusCodes.Status202
httpdStatusCodesStatus301 = stats.Couchdb.HttpdStatusCodes.Status301
httpdStatusCodesStatus304 = stats.Couchdb.HttpdStatusCodes.Status304
httpdStatusCodesStatus400 = stats.Couchdb.HttpdStatusCodes.Status400
httpdStatusCodesStatus401 = stats.Couchdb.HttpdStatusCodes.Status401
httpdStatusCodesStatus403 = stats.Couchdb.HttpdStatusCodes.Status403
httpdStatusCodesStatus404 = stats.Couchdb.HttpdStatusCodes.Status404
httpdStatusCodesStatus405 = stats.Couchdb.HttpdStatusCodes.Status405
httpdStatusCodesStatus409 = stats.Couchdb.HttpdStatusCodes.Status409
httpdStatusCodesStatus412 = stats.Couchdb.HttpdStatusCodes.Status412
httpdStatusCodesStatus500 = stats.Couchdb.HttpdStatusCodes.Status500
}
// CouchDB meta stats:
c.MapCopy(fields, c.generateFields("couchdb_auth_cache_misses", stats.Couchdb.AuthCacheMisses))
c.MapCopy(fields, c.generateFields("couchdb_database_writes", stats.Couchdb.DatabaseWrites))
c.MapCopy(fields, c.generateFields("couchdb_open_databases", stats.Couchdb.OpenDatabases))
c.MapCopy(fields, c.generateFields("couchdb_auth_cache_hits", stats.Couchdb.AuthCacheHits))
c.MapCopy(fields, c.generateFields("couchdb_request_time", stats.Couchdb.RequestTime))
c.MapCopy(fields, c.generateFields("couchdb_database_reads", stats.Couchdb.DatabaseReads))
c.MapCopy(fields, c.generateFields("couchdb_open_os_files", stats.Couchdb.OpenOsFiles))
c.generateFields(fields, "couchdb_auth_cache_misses", stats.Couchdb.AuthCacheMisses)
c.generateFields(fields, "couchdb_database_writes", stats.Couchdb.DatabaseWrites)
c.generateFields(fields, "couchdb_open_databases", stats.Couchdb.OpenDatabases)
c.generateFields(fields, "couchdb_auth_cache_hits", stats.Couchdb.AuthCacheHits)
c.generateFields(fields, "couchdb_request_time", requestTime)
c.generateFields(fields, "couchdb_database_reads", stats.Couchdb.DatabaseReads)
c.generateFields(fields, "couchdb_open_os_files", stats.Couchdb.OpenOsFiles)
// http request methods stats:
c.MapCopy(fields, c.generateFields("httpd_request_methods_put", stats.HttpdRequestMethods.Put))
c.MapCopy(fields, c.generateFields("httpd_request_methods_get", stats.HttpdRequestMethods.Get))
c.MapCopy(fields, c.generateFields("httpd_request_methods_copy", stats.HttpdRequestMethods.Copy))
c.MapCopy(fields, c.generateFields("httpd_request_methods_delete", stats.HttpdRequestMethods.Delete))
c.MapCopy(fields, c.generateFields("httpd_request_methods_post", stats.HttpdRequestMethods.Post))
c.MapCopy(fields, c.generateFields("httpd_request_methods_head", stats.HttpdRequestMethods.Head))
c.generateFields(fields, "httpd_request_methods_put", httpdRequestMethodsPut)
c.generateFields(fields, "httpd_request_methods_get", httpdRequestMethodsGet)
c.generateFields(fields, "httpd_request_methods_copy", httpdRequestMethodsCopy)
c.generateFields(fields, "httpd_request_methods_delete", httpdRequestMethodsDelete)
c.generateFields(fields, "httpd_request_methods_post", httpdRequestMethodsPost)
c.generateFields(fields, "httpd_request_methods_head", httpdRequestMethodsHead)
// status code stats:
c.MapCopy(fields, c.generateFields("httpd_status_codes_200", stats.HttpdStatusCodes.Status200))
c.MapCopy(fields, c.generateFields("httpd_status_codes_201", stats.HttpdStatusCodes.Status201))
c.MapCopy(fields, c.generateFields("httpd_status_codes_202", stats.HttpdStatusCodes.Status202))
c.MapCopy(fields, c.generateFields("httpd_status_codes_301", stats.HttpdStatusCodes.Status301))
c.MapCopy(fields, c.generateFields("httpd_status_codes_304", stats.HttpdStatusCodes.Status304))
c.MapCopy(fields, c.generateFields("httpd_status_codes_400", stats.HttpdStatusCodes.Status400))
c.MapCopy(fields, c.generateFields("httpd_status_codes_401", stats.HttpdStatusCodes.Status401))
c.MapCopy(fields, c.generateFields("httpd_status_codes_403", stats.HttpdStatusCodes.Status403))
c.MapCopy(fields, c.generateFields("httpd_status_codes_404", stats.HttpdStatusCodes.Status404))
c.MapCopy(fields, c.generateFields("httpd_status_codes_405", stats.HttpdStatusCodes.Status405))
c.MapCopy(fields, c.generateFields("httpd_status_codes_409", stats.HttpdStatusCodes.Status409))
c.MapCopy(fields, c.generateFields("httpd_status_codes_412", stats.HttpdStatusCodes.Status412))
c.MapCopy(fields, c.generateFields("httpd_status_codes_500", stats.HttpdStatusCodes.Status500))
c.generateFields(fields, "httpd_status_codes_200", httpdStatusCodesStatus200)
c.generateFields(fields, "httpd_status_codes_201", httpdStatusCodesStatus201)
c.generateFields(fields, "httpd_status_codes_202", httpdStatusCodesStatus202)
c.generateFields(fields, "httpd_status_codes_301", httpdStatusCodesStatus301)
c.generateFields(fields, "httpd_status_codes_304", httpdStatusCodesStatus304)
c.generateFields(fields, "httpd_status_codes_400", httpdStatusCodesStatus400)
c.generateFields(fields, "httpd_status_codes_401", httpdStatusCodesStatus401)
c.generateFields(fields, "httpd_status_codes_403", httpdStatusCodesStatus403)
c.generateFields(fields, "httpd_status_codes_404", httpdStatusCodesStatus404)
c.generateFields(fields, "httpd_status_codes_405", httpdStatusCodesStatus405)
c.generateFields(fields, "httpd_status_codes_409", httpdStatusCodesStatus409)
c.generateFields(fields, "httpd_status_codes_412", httpdStatusCodesStatus412)
c.generateFields(fields, "httpd_status_codes_500", httpdStatusCodesStatus500)
// httpd stats:
c.MapCopy(fields, c.generateFields("httpd_clients_requesting_changes", stats.Httpd.ClientsRequestingChanges))
c.MapCopy(fields, c.generateFields("httpd_temporary_view_reads", stats.Httpd.TemporaryViewReads))
c.MapCopy(fields, c.generateFields("httpd_requests", stats.Httpd.Requests))
c.MapCopy(fields, c.generateFields("httpd_bulk_requests", stats.Httpd.BulkRequests))
c.MapCopy(fields, c.generateFields("httpd_view_reads", stats.Httpd.ViewReads))
c.generateFields(fields, "httpd_clients_requesting_changes", stats.Httpd.ClientsRequestingChanges)
c.generateFields(fields, "httpd_temporary_view_reads", stats.Httpd.TemporaryViewReads)
c.generateFields(fields, "httpd_requests", stats.Httpd.Requests)
c.generateFields(fields, "httpd_bulk_requests", stats.Httpd.BulkRequests)
c.generateFields(fields, "httpd_view_reads", stats.Httpd.ViewReads)
tags := map[string]string{
"server": host,
@@ -166,34 +258,39 @@ func (c *CouchDB) fetchAndInsertData(accumulator telegraf.Accumulator, host stri
return nil
}
func (*CouchDB) MapCopy(dst, src interface{}) {
dv, sv := reflect.ValueOf(dst), reflect.ValueOf(src)
for _, k := range sv.MapKeys() {
dv.SetMapIndex(k, sv.MapIndex(k))
func (c *CouchDB) generateFields(fields map[string]interface{}, prefix string, obj metaData) {
if obj.Value != nil {
fields[prefix+"_value"] = *obj.Value
}
}
func (*CouchDB) safeCheck(value interface{}) interface{} {
if value == nil {
return 0.0
if obj.Current != nil {
fields[prefix+"_current"] = *obj.Current
}
return value
}
func (c *CouchDB) generateFields(prefix string, obj metaData) map[string]interface{} {
fields := map[string]interface{}{
prefix + "_current": c.safeCheck(obj.Current),
prefix + "_sum": c.safeCheck(obj.Sum),
prefix + "_mean": c.safeCheck(obj.Mean),
prefix + "_stddev": c.safeCheck(obj.Stddev),
prefix + "_min": c.safeCheck(obj.Min),
prefix + "_max": c.safeCheck(obj.Max),
if obj.Sum != nil {
fields[prefix+"_sum"] = *obj.Sum
}
if obj.Mean != nil {
fields[prefix+"_mean"] = *obj.Mean
}
if obj.Stddev != nil {
fields[prefix+"_stddev"] = *obj.Stddev
}
if obj.Min != nil {
fields[prefix+"_min"] = *obj.Min
}
if obj.Max != nil {
fields[prefix+"_max"] = *obj.Max
}
return fields
}
func init() {
inputs.Add("couchdb", func() telegraf.Input {
return &CouchDB{}
return &CouchDB{
client: &http.Client{
Transport: &http.Transport{
ResponseHeaderTimeout: time.Duration(3 * time.Second),
},
Timeout: time.Duration(4 * time.Second),
},
}
})
}

View File

@@ -1,12 +1,13 @@
package couchdb_test
import (
"github.com/influxdata/telegraf/plugins/inputs/couchdb"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
"net/http"
"net/http/httptest"
"testing"
"github.com/influxdata/telegraf/plugins/inputs/couchdb"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
func TestBasic(t *testing.T) {
@@ -312,7 +313,7 @@ func TestBasic(t *testing.T) {
defer fakeServer.Close()
plugin := &couchdb.CouchDB{
HOSTs: []string{fakeServer.URL + "/_stats"},
Hosts: []string{fakeServer.URL + "/_stats"},
}
var acc testutil.Accumulator

View File

@@ -0,0 +1,9 @@
[agent]
interval="1s"
flush_interval="1s"
[[inputs.couchdb]]
hosts = ["http://couchdb16:5984/_stats", "http://couchdb22:5984/_node/_local/_stats"]
[[outputs.file]]
files = ["stdout"]

View File

@@ -0,0 +1,67 @@
# CPU Input Plugin
The `cpu` plugin gather metrics on the system CPUs.
#### Configuration
```toml
[[inputs.cpu]]
## Whether to report per-cpu stats or not
percpu = true
## Whether to report total system cpu stats or not
totalcpu = true
## If true, collect raw CPU time metrics.
collect_cpu_time = false
## If true, compute and report the sum of all non-idle CPU states.
report_active = false
```
### Metrics
On Linux, consult `man proc` for details on the meanings of these values.
- cpu
- tags:
- cpu (CPU ID or `cpu-total`)
- fields:
- time_user (float)
- time_system (float)
- time_idle (float)
- time_active (float)
- time_nice (float)
- time_iowait (float)
- time_irq (float)
- time_softirq (float)
- time_steal (float)
- time_guest (float)
- time_guest_nice (float)
- usage_user (float, percent)
- usage_system (float, percent)
- usage_idle (float, percent)
- usage_active (float)
- usage_nice (float, percent)
- usage_iowait (float, percent)
- usage_irq (float, percent)
- usage_softirq (float, percent)
- usage_steal (float, percent)
- usage_guest (float, percent)
- usage_guest_nice (float, percent)
### Troubleshooting
On Linux systems the `/proc/stat` file is used to gather CPU times.
Percentages are based on the last 2 samples.
### Example Output
```
cpu,cpu=cpu0,host=loaner time_active=202224.15999999992,time_guest=30250.35,time_guest_nice=0,time_idle=1527035.04,time_iowait=1352,time_irq=0,time_nice=169.28,time_softirq=6281.4,time_steal=0,time_system=40097.14,time_user=154324.34 1568760922000000000
cpu,cpu=cpu0,host=loaner usage_active=31.249999981810106,usage_guest=2.083333333080696,usage_guest_nice=0,usage_idle=68.7500000181899,usage_iowait=0,usage_irq=0,usage_nice=0,usage_softirq=0,usage_steal=0,usage_system=4.166666666161392,usage_user=25.000000002273737 1568760922000000000
cpu,cpu=cpu1,host=loaner time_active=201890.02000000002,time_guest=30508.41,time_guest_nice=0,time_idle=264641.18,time_iowait=210.44,time_irq=0,time_nice=181.75,time_softirq=4537.88,time_steal=0,time_system=39480.7,time_user=157479.25 1568760922000000000
cpu,cpu=cpu1,host=loaner usage_active=12.500000010610771,usage_guest=2.0833333328280585,usage_guest_nice=0,usage_idle=87.49999998938922,usage_iowait=0,usage_irq=0,usage_nice=0,usage_softirq=2.0833333332070145,usage_steal=0,usage_system=4.166666665656117,usage_user=4.166666666414029 1568760922000000000
cpu,cpu=cpu2,host=loaner time_active=201382.78999999998,time_guest=30325.8,time_guest_nice=0,time_idle=264686.63,time_iowait=202.77,time_irq=0,time_nice=162.81,time_softirq=3378.34,time_steal=0,time_system=39270.59,time_user=158368.28 1568760922000000000
cpu,cpu=cpu2,host=loaner usage_active=15.999999993480742,usage_guest=1.9999999999126885,usage_guest_nice=0,usage_idle=84.00000000651926,usage_iowait=0,usage_irq=0,usage_nice=0,usage_softirq=2.0000000002764864,usage_steal=0,usage_system=3.999999999825377,usage_user=7.999999998923158 1568760922000000000
cpu,cpu=cpu3,host=loaner time_active=198953.51000000007,time_guest=30344.43,time_guest_nice=0,time_idle=265504.09,time_iowait=187.64,time_irq=0,time_nice=197.47,time_softirq=2301.47,time_steal=0,time_system=39313.73,time_user=156953.2 1568760922000000000
cpu,cpu=cpu3,host=loaner usage_active=10.41666667424579,usage_guest=0,usage_guest_nice=0,usage_idle=89.58333332575421,usage_iowait=0,usage_irq=0,usage_nice=0,usage_softirq=0,usage_steal=0,usage_system=4.166666666666667,usage_user=6.249999998484175 1568760922000000000
cpu,cpu=cpu-total,host=loaner time_active=804450.5299999998,time_guest=121429,time_guest_nice=0,time_idle=2321866.96,time_iowait=1952.86,time_irq=0,time_nice=711.32,time_softirq=16499.1,time_steal=0,time_system=158162.17,time_user=627125.08 1568760922000000000
cpu,cpu=cpu-total,host=loaner usage_active=17.616580305880305,usage_guest=1.036269430422946,usage_guest_nice=0,usage_idle=82.3834196941197,usage_iowait=0,usage_irq=0,usage_nice=0,usage_softirq=1.0362694300459534,usage_steal=0,usage_system=4.145077721691784,usage_user=11.398963731636465 1568760922000000000
```

View File

@@ -1,4 +1,4 @@
package system
package cpu
import (
"fmt"
@@ -6,11 +6,12 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/system"
"github.com/shirou/gopsutil/cpu"
)
type CPUStats struct {
ps PS
ps system.PS
lastStats map[string]cpu.TimesStat
PerCPU bool `toml:"percpu"`
@@ -19,7 +20,7 @@ type CPUStats struct {
ReportActive bool `toml:"report_active"`
}
func NewCPUStats(ps PS) *CPUStats {
func NewCPUStats(ps system.PS) *CPUStats {
return &CPUStats{
ps: ps,
CollectCPUTime: true,
@@ -146,7 +147,7 @@ func init() {
return &CPUStats{
PerCPU: true,
TotalCPU: true,
ps: newSystemPS(),
ps: system.NewSystemPS(),
}
})
}

View File

@@ -1,9 +1,10 @@
package system
package cpu
import (
"fmt"
"testing"
"github.com/influxdata/telegraf/plugins/inputs/system"
"github.com/influxdata/telegraf/testutil"
"github.com/shirou/gopsutil/cpu"
"github.com/stretchr/testify/assert"
@@ -11,7 +12,7 @@ import (
)
func TestCPUStats(t *testing.T) {
var mps MockPS
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
@@ -54,7 +55,7 @@ func TestCPUStats(t *testing.T) {
err := cs.Gather(&acc)
require.NoError(t, err)
// Computed values are checked with delta > 0 because of floating point arithmatic
// Computed values are checked with delta > 0 because of floating point arithmetic
// imprecision
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 8.8, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_system", 8.2, 0, cputags)
@@ -68,7 +69,7 @@ func TestCPUStats(t *testing.T) {
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 3.1, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest_nice", 0.324, 0, cputags)
mps2 := MockPS{}
mps2 := system.MockPS{}
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
cs.ps = &mps2
@@ -101,7 +102,7 @@ func TestCPUStats(t *testing.T) {
assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest_nice", 2.2, 0.0005, cputags)
}
// Asserts that a given accumulator contains a measurment of type float64 with
// Asserts that a given accumulator contains a measurement of type float64 with
// specific tags within a certain distance of a given expected value. Asserts a failure
// if the measurement is of the wrong type, or if no matching measurements are found
//
@@ -112,7 +113,7 @@ func TestCPUStats(t *testing.T) {
// expectedValue float64 : Value to search for within the measurement
// delta float64 : Maximum acceptable distance of an accumulated value
// from the expectedValue parameter. Useful when
// floating-point arithmatic imprecision makes looking
// floating-point arithmetic imprecision makes looking
// for an exact match impractical
// tags map[string]string : Tag set the found measurement must have. Set to nil to
// ignore the tag set.
@@ -153,8 +154,8 @@ func assertContainsTaggedFloat(
// TestCPUCountChange tests that no errors are encountered if the number of
// CPUs increases as reported with LXC.
func TestCPUCountIncrease(t *testing.T) {
var mps MockPS
var mps2 MockPS
var mps system.MockPS
var mps2 system.MockPS
var acc testutil.Accumulator
var err error
@@ -162,7 +163,7 @@ func TestCPUCountIncrease(t *testing.T) {
mps.On("CPUTimes").Return(
[]cpu.TimesStat{
cpu.TimesStat{
{
CPU: "cpu0",
},
}, nil)
@@ -172,10 +173,10 @@ func TestCPUCountIncrease(t *testing.T) {
mps2.On("CPUTimes").Return(
[]cpu.TimesStat{
cpu.TimesStat{
{
CPU: "cpu0",
},
cpu.TimesStat{
{
CPU: "cpu1",
},
}, nil)
@@ -188,7 +189,7 @@ func TestCPUCountIncrease(t *testing.T) {
// TestCPUTimesDecrease tests that telegraf continue to works after
// CPU times decrease, which seems to occur when Linux system is suspended.
func TestCPUTimesDecrease(t *testing.T) {
var mps MockPS
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
@@ -224,13 +225,13 @@ func TestCPUTimesDecrease(t *testing.T) {
err := cs.Gather(&acc)
require.NoError(t, err)
// Computed values are checked with delta > 0 because of floating point arithmatic
// Computed values are checked with delta > 0 because of floating point arithmetic
// imprecision
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 18, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 2, 0, cputags)
mps2 := MockPS{}
mps2 := system.MockPS{}
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
cs.ps = &mps2
@@ -238,7 +239,7 @@ func TestCPUTimesDecrease(t *testing.T) {
err = cs.Gather(&acc)
require.Error(t, err)
mps3 := MockPS{}
mps3 := system.MockPS{}
mps3.On("CPUTimes").Return([]cpu.TimesStat{cts3}, nil)
cs.ps = &mps3

View File

@@ -115,8 +115,8 @@ func TestGetSummary(t *testing.T) {
expectedValue: &Summary{
Cluster: "a",
Slaves: []Slave{
Slave{ID: "a"},
Slave{ID: "b"},
{ID: "a"},
{ID: "b"},
},
},
expectedError: nil,

View File

@@ -385,8 +385,8 @@ func TestGatherFilterNode(t *testing.T) {
return &Summary{
Cluster: "a",
Slaves: []Slave{
Slave{ID: "x"},
Slave{ID: "y"},
{ID: "x"},
{ID: "y"},
},
}, nil
},

View File

@@ -9,14 +9,13 @@ https://en.wikipedia.org/wiki/Df_(Unix) for more details.
### Configuration:
```toml
# Read metrics about disk usage by mount point
[[inputs.disk]]
## By default stats will be gathered for all mount points.
## Set mount_points will restrict the stats to only the specified mount points.
# mount_points = ["/"]
## Ignore mount points by filesystem type.
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
```
#### Docker container
@@ -49,6 +48,22 @@ docker run -v /:/hostfs:ro -e HOST_MOUNT_PREFIX=/hostfs -e HOST_PROC=/hostfs/pro
- inodes_total (integer, files)
- inodes_used (integer, files)
### Troubleshooting
On Linux, the list of disks is taken from the `/proc/self/mounts` file and a
[statfs] call is made on the second column. If any expected filesystems are
missing ensure that the `telegraf` user can read these files:
```
$ sudo -u telegraf cat /proc/self/mounts | grep sda2
/dev/sda2 /home ext4 rw,relatime,data=ordered 0 0
$ sudo -u telegraf stat /home
```
It may be desired to use POSIX ACLs to provide additional access:
```
sudo setfacl -R -m u:telegraf:X /var/lib/docker/volumes/
```
### Example Output:
```
@@ -58,4 +73,4 @@ disk,fstype=autofs,mode=rw,path=/net free=0i,inodes_free=0i,inodes_total=0i,inod
disk,fstype=autofs,mode=rw,path=/home free=0i,inodes_free=0i,inodes_total=0i,inodes_used=0i,total=0i,used=0i,used_percent=0 1453832006274169688
```
[statfs]: http://man7.org/linux/man-pages/man2/statfs.2.html

Some files were not shown because too many files have changed in this diff Show More