Add a new input plugin for InfiniBand card/port statistics (#6631)

This commit is contained in:
Will Furnell 2020-01-16 20:51:33 +00:00 committed by Daniel Nelson
parent 93f149f126
commit 182104f95e
8 changed files with 285 additions and 1 deletions

14
Gopkg.lock generated
View File

@ -80,6 +80,14 @@
revision = "3492b2aff5036c67228ab3c7dba3577c871db200" revision = "3492b2aff5036c67228ab3c7dba3577c871db200"
version = "v13.3.0" version = "v13.3.0"
[[projects]]
branch = "master"
digest = "1:005d83d9daaea4e3fc7b2eedf28f68ebf87df7d331a874e5d7d14f643467e7d9"
name = "github.com/Mellanox/rdmamap"
packages = ["."]
pruneopts = ""
revision = "7c3c4763a6ee6a4d624fe133135dc3a7c483111c"
[[projects]] [[projects]]
digest = "1:298712a3ee36b59c3ca91f4183bd75d174d5eaa8b4aed5072831f126e2e752f6" digest = "1:298712a3ee36b59c3ca91f4183bd75d174d5eaa8b4aed5072831f126e2e752f6"
name = "github.com/Microsoft/ApplicationInsights-Go" name = "github.com/Microsoft/ApplicationInsights-Go"
@ -1197,7 +1205,10 @@
[[projects]] [[projects]]
digest = "1:026b6ceaabbacaa147e94a63579efc3d3c73e00c73b67fa5c43ab46191ed04eb" digest = "1:026b6ceaabbacaa147e94a63579efc3d3c73e00c73b67fa5c43ab46191ed04eb"
name = "github.com/vishvananda/netlink" name = "github.com/vishvananda/netlink"
packages = ["nl"] packages = [
".",
"nl",
]
pruneopts = "" pruneopts = ""
revision = "b2de5d10e38ecce8607e6b438b6d174f389a004e" revision = "b2de5d10e38ecce8607e6b438b6d174f389a004e"
@ -1712,6 +1723,7 @@
"github.com/Azure/azure-storage-queue-go/azqueue", "github.com/Azure/azure-storage-queue-go/azqueue",
"github.com/Azure/go-autorest/autorest", "github.com/Azure/go-autorest/autorest",
"github.com/Azure/go-autorest/autorest/azure/auth", "github.com/Azure/go-autorest/autorest/azure/auth",
"github.com/Mellanox/rdmamap",
"github.com/Microsoft/ApplicationInsights-Go/appinsights", "github.com/Microsoft/ApplicationInsights-Go/appinsights",
"github.com/Shopify/sarama", "github.com/Shopify/sarama",
"github.com/StackExchange/wmi", "github.com/StackExchange/wmi",

View File

@ -297,6 +297,10 @@
branch = "master" branch = "master"
name = "github.com/cisco-ie/nx-telemetry-proto" name = "github.com/cisco-ie/nx-telemetry-proto"
[[constraint]]
branch = "master"
name = "github.com/Mellanox/rdmamap"
[[constraint]] [[constraint]]
name = "gopkg.in/ldap.v3" name = "gopkg.in/ldap.v3"
version = "3.1.0" version = "3.1.0"

View File

@ -56,6 +56,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/http_response" _ "github.com/influxdata/telegraf/plugins/inputs/http_response"
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson" _ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
_ "github.com/influxdata/telegraf/plugins/inputs/icinga2" _ "github.com/influxdata/telegraf/plugins/inputs/icinga2"
_ "github.com/influxdata/telegraf/plugins/inputs/infiniband"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/internal" _ "github.com/influxdata/telegraf/plugins/inputs/internal"

View File

@ -0,0 +1,29 @@
# InfiniBand Input Plugin
This plugin gathers statistics for all InfiniBand devices and ports on the system. These are the counters that can be found in /sys/class/infiniband/<dev>/port/<port>/counters/
### Configuration
This section contains the default TOML to configure the plugin. You can
generate it using `telegraf --usage infiniband`.
```toml
[[inputs.infiniband]]
```
There are no configuration options for this plugin.
### Metrics
You can find more information about the counters that are gathered here:
https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters
There is a simple mapping from counter -> counter value. All counter values are 64 bit integers. A seperate measurement is made for each port.
Each measurement is tagged with the device and port that it relates to. These are strings.
### Example Output
```
infiniband,device=mlx5_0,port=1,VL15_dropped=0i,excessive_buffer_overrun_errors=0i,link_downed=0i,link_error_recovery=0i,local_link_integrity_errors=0i,multicast_rcv_packets=0i,multicast_xmit_packets=0i,port_rcv_constraint_errors=0i,port_rcv_data=237159415345822i,port_rcv_errors=0i,port_rcv_packets=801977655075i,port_rcv_remote_physical_errors=0i,port_rcv_switch_relay_errors=0i,port_xmit_constraint_errors=0i,port_xmit_data=238334949937759i,port_xmit_discards=0i,port_xmit_packets=803162651391i,port_xmit_wait=4294967295i,symbol_error=0i,unicast_rcv_packets=801977655075i,unicast_xmit_packets=803162651391i 1573125558000000000
```

View File

@ -0,0 +1,22 @@
package infiniband
import (
"github.com/influxdata/telegraf"
)
// Stores the configuration values for the infiniband plugin - as there are no
// config values, this is intentionally empty
type Infiniband struct {
Log telegraf.Logger `toml:"-"`
}
// Sample configuration for plugin
var InfinibandConfig = ``
func (_ *Infiniband) SampleConfig() string {
return InfinibandConfig
}
func (_ *Infiniband) Description() string {
return "Gets counters from all InfiniBand cards and ports installed"
}

View File

@ -0,0 +1,59 @@
// +build linux
package infiniband
import (
"fmt"
"github.com/Mellanox/rdmamap"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"strconv"
)
// Gather statistics from our infiniband cards
func (_ *Infiniband) Gather(acc telegraf.Accumulator) error {
rdmaDevices := rdmamap.GetRdmaDeviceList()
if len(rdmaDevices) == 0 {
return fmt.Errorf("no InfiniBand devices found in /sys/class/infiniband/")
}
for _, dev := range rdmaDevices {
devicePorts := rdmamap.GetPorts(dev)
for _, port := range devicePorts {
portInt, err := strconv.Atoi(port)
if err != nil {
return err
}
stats, err := rdmamap.GetRdmaSysfsStats(dev, portInt)
if err != nil {
return err
}
addStats(dev, port, stats, acc)
}
}
return nil
}
// Add the statistics to the accumulator
func addStats(dev string, port string, stats []rdmamap.RdmaStatEntry, acc telegraf.Accumulator) {
// Allow users to filter by card and port
tags := map[string]string{"device": dev, "port": port}
fields := make(map[string]interface{})
for _, entry := range stats {
fields[entry.Name] = entry.Value
}
acc.AddFields("infiniband", fields, tags)
}
// Initialise plugin
func init() {
inputs.Add("infiniband", func() telegraf.Input { return &Infiniband{} })
}

View File

@ -0,0 +1,23 @@
// +build !linux
package infiniband
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
func (i *Infiniband) Init() error {
i.Log.Warn("Current platform is not supported")
return nil
}
func (_ *Infiniband) Gather(acc telegraf.Accumulator) error {
return nil
}
func init() {
inputs.Add("infiniband", func() telegraf.Input {
return &Infiniband{}
})
}

View File

@ -0,0 +1,134 @@
// +build linux
package infiniband
import (
"github.com/Mellanox/rdmamap"
"github.com/influxdata/telegraf/testutil"
"testing"
)
func TestInfiniband(t *testing.T) {
fields := map[string]interface{}{
"excessive_buffer_overrun_errors": uint64(0),
"link_downed": uint64(0),
"link_error_recovery": uint64(0),
"local_link_integrity_errors": uint64(0),
"multicast_rcv_packets": uint64(0),
"multicast_xmit_packets": uint64(0),
"port_rcv_constraint_errors": uint64(0),
"port_rcv_data": uint64(237159415345822),
"port_rcv_errors": uint64(0),
"port_rcv_packets": uint64(801977655075),
"port_rcv_remote_physical_errors": uint64(0),
"port_rcv_switch_relay_errors": uint64(0),
"port_xmit_constraint_errors": uint64(0),
"port_xmit_data": uint64(238334949937759),
"port_xmit_discards": uint64(0),
"port_xmit_packets": uint64(803162651391),
"port_xmit_wait": uint64(4294967295),
"symbol_error": uint64(0),
"unicast_rcv_packets": uint64(801977655075),
"unicast_xmit_packets": uint64(803162651391),
"VL15_dropped": uint64(0),
}
tags := map[string]string{
"device": "m1x5_0",
"port": "1",
}
sample_rdmastats_entries := []rdmamap.RdmaStatEntry{
{
Name: "excessive_buffer_overrun_errors",
Value: uint64(0),
},
{
Name: "link_downed",
Value: uint64(0),
},
{
Name: "link_error_recovery",
Value: uint64(0),
},
{
Name: "local_link_integrity_errors",
Value: uint64(0),
},
{
Name: "multicast_rcv_packets",
Value: uint64(0),
},
{
Name: "multicast_xmit_packets",
Value: uint64(0),
},
{
Name: "port_rcv_constraint_errors",
Value: uint64(0),
},
{
Name: "port_rcv_data",
Value: uint64(237159415345822),
},
{
Name: "port_rcv_errors",
Value: uint64(0),
},
{
Name: "port_rcv_packets",
Value: uint64(801977655075),
},
{
Name: "port_rcv_remote_physical_errors",
Value: uint64(0),
},
{
Name: "port_rcv_switch_relay_errors",
Value: uint64(0),
},
{
Name: "port_xmit_constraint_errors",
Value: uint64(0),
},
{
Name: "port_xmit_data",
Value: uint64(238334949937759),
},
{
Name: "port_xmit_discards",
Value: uint64(0),
},
{
Name: "port_xmit_packets",
Value: uint64(803162651391),
},
{
Name: "port_xmit_wait",
Value: uint64(4294967295),
},
{
Name: "symbol_error",
Value: uint64(0),
},
{
Name: "unicast_rcv_packets",
Value: uint64(801977655075),
},
{
Name: "unicast_xmit_packets",
Value: uint64(803162651391),
},
{
Name: "VL15_dropped",
Value: uint64(0),
},
}
var acc testutil.Accumulator
addStats("m1x5_0", "1", sample_rdmastats_entries, &acc)
acc.AssertContainsTaggedFields(t, "infiniband", fields, tags)
}