Add a new input plugin for InfiniBand card/port statistics (#6631)
This commit is contained in:
committed by
Daniel Nelson
parent
93f149f126
commit
182104f95e
@@ -56,6 +56,7 @@ import (
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/icinga2"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/infiniband"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/internal"
|
||||
|
||||
29
plugins/inputs/infiniband/README.md
Normal file
29
plugins/inputs/infiniband/README.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# InfiniBand Input Plugin
|
||||
|
||||
This plugin gathers statistics for all InfiniBand devices and ports on the system. These are the counters that can be found in /sys/class/infiniband/<dev>/port/<port>/counters/
|
||||
|
||||
### Configuration
|
||||
|
||||
This section contains the default TOML to configure the plugin. You can
|
||||
generate it using `telegraf --usage infiniband`.
|
||||
|
||||
```toml
|
||||
[[inputs.infiniband]]
|
||||
```
|
||||
|
||||
There are no configuration options for this plugin.
|
||||
|
||||
### Metrics
|
||||
|
||||
You can find more information about the counters that are gathered here:
|
||||
https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters
|
||||
|
||||
There is a simple mapping from counter -> counter value. All counter values are 64 bit integers. A seperate measurement is made for each port.
|
||||
Each measurement is tagged with the device and port that it relates to. These are strings.
|
||||
|
||||
|
||||
### Example Output
|
||||
|
||||
```
|
||||
infiniband,device=mlx5_0,port=1,VL15_dropped=0i,excessive_buffer_overrun_errors=0i,link_downed=0i,link_error_recovery=0i,local_link_integrity_errors=0i,multicast_rcv_packets=0i,multicast_xmit_packets=0i,port_rcv_constraint_errors=0i,port_rcv_data=237159415345822i,port_rcv_errors=0i,port_rcv_packets=801977655075i,port_rcv_remote_physical_errors=0i,port_rcv_switch_relay_errors=0i,port_xmit_constraint_errors=0i,port_xmit_data=238334949937759i,port_xmit_discards=0i,port_xmit_packets=803162651391i,port_xmit_wait=4294967295i,symbol_error=0i,unicast_rcv_packets=801977655075i,unicast_xmit_packets=803162651391i 1573125558000000000
|
||||
```
|
||||
22
plugins/inputs/infiniband/infiniband.go
Normal file
22
plugins/inputs/infiniband/infiniband.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
)
|
||||
|
||||
// Stores the configuration values for the infiniband plugin - as there are no
|
||||
// config values, this is intentionally empty
|
||||
type Infiniband struct {
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
}
|
||||
|
||||
// Sample configuration for plugin
|
||||
var InfinibandConfig = ``
|
||||
|
||||
func (_ *Infiniband) SampleConfig() string {
|
||||
return InfinibandConfig
|
||||
}
|
||||
|
||||
func (_ *Infiniband) Description() string {
|
||||
return "Gets counters from all InfiniBand cards and ports installed"
|
||||
}
|
||||
59
plugins/inputs/infiniband/infiniband_linux.go
Normal file
59
plugins/inputs/infiniband/infiniband_linux.go
Normal file
@@ -0,0 +1,59 @@
|
||||
// +build linux
|
||||
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/Mellanox/rdmamap"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Gather statistics from our infiniband cards
|
||||
func (_ *Infiniband) Gather(acc telegraf.Accumulator) error {
|
||||
|
||||
rdmaDevices := rdmamap.GetRdmaDeviceList()
|
||||
|
||||
if len(rdmaDevices) == 0 {
|
||||
return fmt.Errorf("no InfiniBand devices found in /sys/class/infiniband/")
|
||||
}
|
||||
|
||||
for _, dev := range rdmaDevices {
|
||||
devicePorts := rdmamap.GetPorts(dev)
|
||||
for _, port := range devicePorts {
|
||||
portInt, err := strconv.Atoi(port)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats, err := rdmamap.GetRdmaSysfsStats(dev, portInt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
addStats(dev, port, stats, acc)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add the statistics to the accumulator
|
||||
func addStats(dev string, port string, stats []rdmamap.RdmaStatEntry, acc telegraf.Accumulator) {
|
||||
|
||||
// Allow users to filter by card and port
|
||||
tags := map[string]string{"device": dev, "port": port}
|
||||
fields := make(map[string]interface{})
|
||||
|
||||
for _, entry := range stats {
|
||||
fields[entry.Name] = entry.Value
|
||||
}
|
||||
|
||||
acc.AddFields("infiniband", fields, tags)
|
||||
}
|
||||
|
||||
// Initialise plugin
|
||||
func init() {
|
||||
inputs.Add("infiniband", func() telegraf.Input { return &Infiniband{} })
|
||||
}
|
||||
23
plugins/inputs/infiniband/infiniband_notlinux.go
Normal file
23
plugins/inputs/infiniband/infiniband_notlinux.go
Normal file
@@ -0,0 +1,23 @@
|
||||
// +build !linux
|
||||
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
func (i *Infiniband) Init() error {
|
||||
i.Log.Warn("Current platform is not supported")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (_ *Infiniband) Gather(acc telegraf.Accumulator) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("infiniband", func() telegraf.Input {
|
||||
return &Infiniband{}
|
||||
})
|
||||
}
|
||||
134
plugins/inputs/infiniband/infiniband_test.go
Normal file
134
plugins/inputs/infiniband/infiniband_test.go
Normal file
@@ -0,0 +1,134 @@
|
||||
// +build linux
|
||||
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
"github.com/Mellanox/rdmamap"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInfiniband(t *testing.T) {
|
||||
fields := map[string]interface{}{
|
||||
"excessive_buffer_overrun_errors": uint64(0),
|
||||
"link_downed": uint64(0),
|
||||
"link_error_recovery": uint64(0),
|
||||
"local_link_integrity_errors": uint64(0),
|
||||
"multicast_rcv_packets": uint64(0),
|
||||
"multicast_xmit_packets": uint64(0),
|
||||
"port_rcv_constraint_errors": uint64(0),
|
||||
"port_rcv_data": uint64(237159415345822),
|
||||
"port_rcv_errors": uint64(0),
|
||||
"port_rcv_packets": uint64(801977655075),
|
||||
"port_rcv_remote_physical_errors": uint64(0),
|
||||
"port_rcv_switch_relay_errors": uint64(0),
|
||||
"port_xmit_constraint_errors": uint64(0),
|
||||
"port_xmit_data": uint64(238334949937759),
|
||||
"port_xmit_discards": uint64(0),
|
||||
"port_xmit_packets": uint64(803162651391),
|
||||
"port_xmit_wait": uint64(4294967295),
|
||||
"symbol_error": uint64(0),
|
||||
"unicast_rcv_packets": uint64(801977655075),
|
||||
"unicast_xmit_packets": uint64(803162651391),
|
||||
"VL15_dropped": uint64(0),
|
||||
}
|
||||
|
||||
tags := map[string]string{
|
||||
"device": "m1x5_0",
|
||||
"port": "1",
|
||||
}
|
||||
|
||||
sample_rdmastats_entries := []rdmamap.RdmaStatEntry{
|
||||
{
|
||||
Name: "excessive_buffer_overrun_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "link_downed",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "link_error_recovery",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "local_link_integrity_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "multicast_rcv_packets",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "multicast_xmit_packets",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_constraint_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_data",
|
||||
Value: uint64(237159415345822),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_packets",
|
||||
Value: uint64(801977655075),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_remote_physical_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_switch_relay_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_constraint_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_data",
|
||||
Value: uint64(238334949937759),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_discards",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_packets",
|
||||
Value: uint64(803162651391),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_wait",
|
||||
Value: uint64(4294967295),
|
||||
},
|
||||
{
|
||||
Name: "symbol_error",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "unicast_rcv_packets",
|
||||
Value: uint64(801977655075),
|
||||
},
|
||||
{
|
||||
Name: "unicast_xmit_packets",
|
||||
Value: uint64(803162651391),
|
||||
},
|
||||
{
|
||||
Name: "VL15_dropped",
|
||||
Value: uint64(0),
|
||||
},
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
|
||||
addStats("m1x5_0", "1", sample_rdmastats_entries, &acc)
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "infiniband", fields, tags)
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user