508 lines
13 KiB
Go
508 lines
13 KiB
Go
package azuremonitor
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/Azure/go-autorest/autorest/adal"
|
|
"github.com/Azure/go-autorest/autorest/azure"
|
|
"github.com/influxdata/telegraf"
|
|
"github.com/influxdata/telegraf/plugins/outputs"
|
|
)
|
|
|
|
// AzureMonitor allows publishing of metrics to the Azure Monitor custom metrics service
|
|
type AzureMonitor struct {
|
|
ResourceID string `toml:"resourceId"`
|
|
Region string `toml:"region"`
|
|
HTTPPostTimeout int `toml:"httpPostTimeout"`
|
|
AzureSubscriptionID string `toml:"azureSubscription"`
|
|
AzureTenantID string `toml:"azureTenant"`
|
|
AzureClientID string `toml:"azureClientId"`
|
|
AzureClientSecret string `toml:"azureClientSecret"`
|
|
|
|
useMsi bool
|
|
metadataService *AzureInstanceMetadata
|
|
instanceMetadata *VirtualMachineMetadata
|
|
msiToken *MsiToken
|
|
msiResource string
|
|
bearerToken string
|
|
expiryWatermark time.Duration
|
|
|
|
oauthConfig *adal.OAuthConfig
|
|
adalToken adal.OAuthTokenProvider
|
|
|
|
client *http.Client
|
|
|
|
cache map[uint64]azureMonitorMetric
|
|
period time.Duration
|
|
delay time.Duration
|
|
periodStart time.Time
|
|
periodEnd time.Time
|
|
|
|
metrics chan telegraf.Metric
|
|
shutdown chan struct{}
|
|
}
|
|
|
|
type azureMonitorMetric struct {
|
|
Time time.Time `json:"time"`
|
|
Data *azureMonitorData `json:"data"`
|
|
}
|
|
|
|
type azureMonitorData struct {
|
|
BaseData *azureMonitorBaseData `json:"baseData"`
|
|
}
|
|
|
|
type azureMonitorBaseData struct {
|
|
Metric string `json:"metric"`
|
|
Namespace string `json:"namespace"`
|
|
DimensionNames []string `json:"dimNames"`
|
|
Series []*azureMonitorSeries `json:"series"`
|
|
}
|
|
|
|
type azureMonitorSeries struct {
|
|
DimensionValues []string `json:"dimValues"`
|
|
Min float64 `json:"min"`
|
|
Max float64 `json:"max"`
|
|
Sum float64 `json:"sum"`
|
|
Count float64 `json:"count"`
|
|
}
|
|
|
|
var sampleConfig = `
|
|
## The resource ID against which metric will be logged. If not
|
|
## specified, the plugin will attempt to retrieve the resource ID
|
|
## of the VM via the instance metadata service (optional if running
|
|
## on an Azure VM with MSI)
|
|
resourceId = "/subscriptions/3e9c2afc-52b3-4137-9bba-02b6eb204331/resourceGroups/someresourcegroup-rg/providers/Microsoft.Compute/virtualMachines/somevmname"
|
|
## Azure region to publish metrics against. Defaults to eastus
|
|
region = "useast"
|
|
## Maximum duration to wait for HTTP post (in seconds). Defaults to 15
|
|
httpPostTimeout = 15
|
|
## Whether or not to use managed service identity (defaults to true).
|
|
useManagedServiceIdentity = true
|
|
|
|
## Leave this section blank to use Managed Service Identity.
|
|
## TODO
|
|
azureSubscription = "TODO"
|
|
## TODO
|
|
azureTenant = "TODO"
|
|
## TODO
|
|
azureClientId = "TODO"
|
|
## TODO
|
|
azureClientSecret = "TODO"
|
|
`
|
|
|
|
const (
|
|
azureMonitorDefaultRegion = "eastus"
|
|
)
|
|
|
|
// Connect initializes the plugin and validates connectivity
|
|
func (a *AzureMonitor) Connect() error {
|
|
// Set defaults
|
|
|
|
// If no direct AD values provided, fall back to MSI
|
|
if a.AzureSubscriptionID == "" && a.AzureTenantID == "" && a.AzureClientID == "" && a.AzureClientSecret == "" {
|
|
a.useMsi = true
|
|
} else if a.AzureSubscriptionID == "" || a.AzureTenantID == "" || a.AzureClientID == "" || a.AzureClientSecret == "" {
|
|
return fmt.Errorf("Must provide values for azureSubscription, azureTenant, azureClient and azureClientSecret, or leave all blank to default to MSI")
|
|
}
|
|
|
|
if a.useMsi == false {
|
|
// If using direct AD authentication create the AD access client
|
|
oauthConfig, err := adal.NewOAuthConfig(azure.PublicCloud.ActiveDirectoryEndpoint, a.AzureTenantID)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not initialize AD client: %s", err)
|
|
}
|
|
a.oauthConfig = oauthConfig
|
|
|
|
}
|
|
|
|
if a.HTTPPostTimeout == 0 {
|
|
a.HTTPPostTimeout = 10
|
|
}
|
|
|
|
a.metadataService = &AzureInstanceMetadata{}
|
|
|
|
// For the metrics API the MSI resource has to be https://ingestion.monitor.azure.com
|
|
a.msiResource = "https://monitoring.azure.com/"
|
|
|
|
// Validate the resource identifier
|
|
if a.ResourceID == "" {
|
|
metadata, err := a.metadataService.GetInstanceMetadata()
|
|
if err != nil {
|
|
return fmt.Errorf("No resource id specified, and Azure Instance metadata service not available. If not running on an Azure VM, provide a value for resourceId")
|
|
}
|
|
a.ResourceID = metadata.AzureResourceID
|
|
|
|
if a.Region == "" {
|
|
a.Region = metadata.Compute.Location
|
|
}
|
|
}
|
|
|
|
if a.Region == "" {
|
|
a.Region = azureMonitorDefaultRegion
|
|
}
|
|
|
|
// Validate credentials
|
|
err := a.validateCredentials()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
a.reset()
|
|
go a.run()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Description provides a description of the plugin
|
|
func (a *AzureMonitor) Description() string {
|
|
return "Configuration for sending aggregate metrics to Azure Monitor"
|
|
}
|
|
|
|
// SampleConfig provides a sample configuration for the plugin
|
|
func (a *AzureMonitor) SampleConfig() string {
|
|
return sampleConfig
|
|
}
|
|
|
|
// Close shuts down an any active connections
|
|
func (a *AzureMonitor) Close() error {
|
|
// Close connection to the URL here
|
|
close(a.shutdown)
|
|
return nil
|
|
}
|
|
|
|
// Write writes metrics to the remote endpoint
|
|
func (a *AzureMonitor) Write(metrics []telegraf.Metric) error {
|
|
log.Printf("metrics collected: %+v", metrics)
|
|
|
|
// Assemble stats on incoming metrics
|
|
for _, metric := range metrics {
|
|
select {
|
|
case a.metrics <- metric:
|
|
default:
|
|
log.Printf("metrics buffer is full")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (a *AzureMonitor) validateCredentials() error {
|
|
// Use managed service identity
|
|
if a.useMsi {
|
|
// Check expiry on the token
|
|
if a.msiToken != nil {
|
|
expiryDuration := a.msiToken.ExpiresInDuration()
|
|
if expiryDuration > a.expiryWatermark {
|
|
return nil
|
|
}
|
|
|
|
// Token is about to expire
|
|
log.Printf("Bearer token expiring in %s; acquiring new token\n", expiryDuration.String())
|
|
a.msiToken = nil
|
|
}
|
|
|
|
// No token, acquire an MSI token
|
|
if a.msiToken == nil {
|
|
msiToken, err := a.metadataService.GetMsiToken(a.AzureClientID, a.msiResource)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Printf("Bearer token acquired; expiring in %s\n", msiToken.ExpiresInDuration().String())
|
|
a.msiToken = msiToken
|
|
a.bearerToken = msiToken.AccessToken
|
|
}
|
|
// Otherwise directory acquire a token
|
|
} else {
|
|
adToken, err := adal.NewServicePrincipalToken(
|
|
*(a.oauthConfig), a.AzureClientID, a.AzureClientSecret,
|
|
azure.PublicCloud.ActiveDirectoryEndpoint)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not acquire ADAL token: %s", err)
|
|
}
|
|
a.adalToken = adToken
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (a *AzureMonitor) add(metric telegraf.Metric) {
|
|
id := metric.HashID()
|
|
if azm, ok := a.cache[id]; !ok {
|
|
// hit an uncached metric, create caches for first time:
|
|
var dimensionNames []string
|
|
var dimensionValues []string
|
|
for i, tag := range metric.TagList() {
|
|
// Azure custom metrics service supports up to 10 dimensions
|
|
if i > 9 {
|
|
continue
|
|
}
|
|
dimensionNames = append(dimensionNames, tag.Key)
|
|
dimensionValues = append(dimensionValues, tag.Value)
|
|
}
|
|
// Field keys are stored as the last dimension
|
|
dimensionNames = append(dimensionNames, "field")
|
|
|
|
var seriesList []*azureMonitorSeries
|
|
// Store each field as a separate series with field key as a new dimension
|
|
for _, field := range metric.FieldList() {
|
|
azmseries := newAzureMonitorSeries(field, dimensionValues)
|
|
seriesList = append(seriesList, azmseries)
|
|
}
|
|
|
|
if len(seriesList) < 1 {
|
|
log.Printf("no valid fields for metric: %s", metric)
|
|
return
|
|
}
|
|
|
|
a.cache[id] = azureMonitorMetric{
|
|
Time: metric.Time(),
|
|
Data: &azureMonitorData{
|
|
BaseData: &azureMonitorBaseData{
|
|
Metric: metric.Name(),
|
|
Namespace: "default",
|
|
DimensionNames: dimensionNames,
|
|
Series: seriesList,
|
|
},
|
|
},
|
|
}
|
|
} else {
|
|
for _, f := range metric.FieldList() {
|
|
fv, ok := convert(f.Value)
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
tmp, ok := azm.findSeriesWithField(f.Key)
|
|
if !ok {
|
|
// hit an uncached field of a cached metric
|
|
var dimensionValues []string
|
|
for i, tag := range metric.TagList() {
|
|
// Azure custom metrics service supports up to 10 dimensions
|
|
if i > 9 {
|
|
continue
|
|
}
|
|
dimensionValues = append(dimensionValues, tag.Value)
|
|
}
|
|
azm.Data.BaseData.Series = append(azm.Data.BaseData.Series, newAzureMonitorSeries(f, dimensionValues))
|
|
continue
|
|
}
|
|
|
|
//counter compute
|
|
n := tmp.Count + 1
|
|
tmp.Count = n
|
|
//max/min compute
|
|
if fv < tmp.Min {
|
|
tmp.Min = fv
|
|
} else if fv > tmp.Max {
|
|
tmp.Max = fv
|
|
}
|
|
//sum compute
|
|
tmp.Sum += fv
|
|
//store final data
|
|
a.cache[id].Data.BaseData.Series = append(a.cache[id].Data.BaseData.Series, tmp)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (b *azureMonitorMetric) findSeriesWithField(f string) (*azureMonitorSeries, bool) {
|
|
if len(b.Data.BaseData.Series) > 0 {
|
|
for _, s := range b.Data.BaseData.Series {
|
|
if f == s.DimensionValues[len(s.DimensionValues)-1] {
|
|
return s, true
|
|
}
|
|
}
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
func newAzureMonitorSeries(f *telegraf.Field, dv []string) *azureMonitorSeries {
|
|
fv, ok := convert(f.Value)
|
|
if !ok {
|
|
log.Printf("unable to convert field %s (type %T) to float type: %v", f.Key, fv, fv)
|
|
return nil
|
|
}
|
|
return &azureMonitorSeries{
|
|
DimensionValues: append(append([]string{}, dv...), f.Key),
|
|
Min: fv,
|
|
Max: fv,
|
|
Sum: fv,
|
|
Count: 1,
|
|
}
|
|
}
|
|
|
|
func (a *AzureMonitor) reset() {
|
|
a.cache = make(map[uint64]azureMonitorMetric)
|
|
}
|
|
|
|
func convert(in interface{}) (float64, bool) {
|
|
switch v := in.(type) {
|
|
case int:
|
|
return float64(v), true
|
|
case int8:
|
|
return float64(v), true
|
|
case int16:
|
|
return float64(v), true
|
|
case int32:
|
|
return float64(v), true
|
|
case int64:
|
|
return float64(v), true
|
|
case uint:
|
|
return float64(v), true
|
|
case uint8:
|
|
return float64(v), true
|
|
case uint16:
|
|
return float64(v), true
|
|
case uint32:
|
|
return float64(v), true
|
|
case uint64:
|
|
return float64(v), true
|
|
case float32:
|
|
return float64(v), true
|
|
case float64:
|
|
return v, true
|
|
case string:
|
|
f, err := strconv.ParseFloat(v, 64)
|
|
if err != nil {
|
|
log.Printf("converted string: %s to %v", v, f)
|
|
return 0, false
|
|
}
|
|
return f, true
|
|
default:
|
|
log.Printf("did not convert %T: %s", v, v)
|
|
return 0, false
|
|
}
|
|
}
|
|
|
|
func (a *AzureMonitor) push() {
|
|
var body []byte
|
|
for _, metric := range a.cache {
|
|
jsonBytes, err := json.Marshal(&metric)
|
|
log.Printf("marshalled point %s", jsonBytes)
|
|
if err != nil {
|
|
log.Printf("Error marshalling metrics %s", err)
|
|
return
|
|
}
|
|
body = append(body, jsonBytes...)
|
|
body = append(body, '\n')
|
|
}
|
|
|
|
log.Printf("Publishing metrics %s", body)
|
|
_, err := a.postData(&body)
|
|
if err != nil {
|
|
log.Printf("Error publishing metrics %s", err)
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (a *AzureMonitor) postData(msg *[]byte) (*http.Request, error) {
|
|
metricsEndpoint := fmt.Sprintf("https://%s.monitoring.azure.com%s/metrics",
|
|
a.Region, a.ResourceID)
|
|
|
|
req, err := http.NewRequest("POST", metricsEndpoint, bytes.NewBuffer(*msg))
|
|
if err != nil {
|
|
log.Printf("Error creating HTTP request")
|
|
return nil, err
|
|
}
|
|
|
|
req.Header.Set("Authorization", "Bearer "+a.bearerToken)
|
|
req.Header.Set("Content-Type", "application/x-ndjson")
|
|
|
|
tr := &http.Transport{
|
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
}
|
|
client := http.Client{
|
|
Transport: tr,
|
|
// TODO - fix this
|
|
//Timeout: time.Duration(s.HTTPPostTimeout * time.Second),
|
|
Timeout: time.Duration(10 * time.Second),
|
|
}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return req, err
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode >= 300 || resp.StatusCode < 200 {
|
|
var reply []byte
|
|
reply, err = ioutil.ReadAll(resp.Body)
|
|
|
|
if err != nil {
|
|
reply = nil
|
|
}
|
|
return req, fmt.Errorf("Post Error. HTTP response code:%d message:%s reply:\n%s",
|
|
resp.StatusCode, resp.Status, reply)
|
|
}
|
|
return req, nil
|
|
}
|
|
|
|
func (a *AzureMonitor) run() {
|
|
// The start of the period is truncated to the nearest minute.
|
|
//
|
|
// Every metric then gets it's timestamp checked and is dropped if it
|
|
// is not within:
|
|
//
|
|
// start < t < end + truncation + delay
|
|
//
|
|
// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
|
|
// now = 00:00.2
|
|
// start = 00:00
|
|
// truncation = 00:00.2
|
|
// end = 00:10
|
|
// 1st interval: 00:00 - 00:10.5
|
|
// 2nd interval: 00:10 - 00:20.5
|
|
// etc.
|
|
//
|
|
now := time.Now()
|
|
a.periodStart = now.Truncate(time.Minute)
|
|
truncation := now.Sub(a.periodStart)
|
|
a.periodEnd = a.periodStart.Add(a.period)
|
|
time.Sleep(a.delay)
|
|
periodT := time.NewTicker(a.period)
|
|
defer periodT.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-a.shutdown:
|
|
if len(a.metrics) > 0 {
|
|
// wait until metrics are flushed before exiting
|
|
continue
|
|
}
|
|
return
|
|
case m := <-a.metrics:
|
|
if m.Time().Before(a.periodStart) ||
|
|
m.Time().After(a.periodEnd.Add(truncation).Add(a.delay)) {
|
|
// the metric is outside the current aggregation period, so
|
|
// skip it.
|
|
continue
|
|
}
|
|
a.add(m)
|
|
case <-periodT.C:
|
|
a.periodStart = a.periodEnd
|
|
a.periodEnd = a.periodStart.Add(a.period)
|
|
a.push()
|
|
a.reset()
|
|
}
|
|
}
|
|
}
|
|
|
|
func init() {
|
|
outputs.Add("azuremonitor", func() telegraf.Output {
|
|
return &AzureMonitor{
|
|
period: time.Minute,
|
|
delay: time.Second * 5,
|
|
metrics: make(chan telegraf.Metric, 100),
|
|
shutdown: make(chan struct{}),
|
|
}
|
|
})
|
|
}
|