2018-03-28 02:15:52 +00:00
package azuremonitor
import (
"bytes"
2018-05-03 09:11:31 +00:00
"encoding/binary"
2018-03-28 02:15:52 +00:00
"encoding/json"
"fmt"
2018-05-03 09:11:31 +00:00
"hash/fnv"
2018-03-28 02:15:52 +00:00
"io/ioutil"
"log"
"net/http"
2018-05-03 09:11:31 +00:00
"regexp"
2018-03-28 02:15:52 +00:00
"time"
"github.com/Azure/go-autorest/autorest/adal"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/influxdata/telegraf"
2018-04-29 07:31:24 +00:00
"github.com/influxdata/telegraf/internal"
2018-05-03 09:11:31 +00:00
"github.com/influxdata/telegraf/metric"
2018-04-11 23:50:48 +00:00
"github.com/influxdata/telegraf/plugins/outputs"
2018-03-28 02:15:52 +00:00
)
2018-05-03 09:11:31 +00:00
var _ telegraf . AggregatingOutput = ( * AzureMonitor ) ( nil )
var _ telegraf . Output = ( * AzureMonitor ) ( nil )
2018-03-28 02:15:52 +00:00
// AzureMonitor allows publishing of metrics to the Azure Monitor custom metrics service
type AzureMonitor struct {
2018-05-03 09:11:31 +00:00
useMsi bool ` toml:"use_managed_service_identity" `
2018-04-29 07:31:24 +00:00
ResourceID string ` toml:"resource_id" `
Region string ` toml:"region" `
Timeout internal . Duration ` toml:"Timeout" `
AzureSubscriptionID string ` toml:"azure_subscription" `
AzureTenantID string ` toml:"azure_tenant" `
AzureClientID string ` toml:"azure_client_id" `
AzureClientSecret string ` toml:"azure_client_secret" `
StringAsDimension bool ` toml:"string_as_dimension" `
2018-05-03 09:11:31 +00:00
url string
msiToken * msiToken
2018-03-28 02:15:52 +00:00
oauthConfig * adal . OAuthConfig
adalToken adal . OAuthTokenProvider
2018-04-11 23:50:48 +00:00
client * http . Client
2018-05-03 09:11:31 +00:00
cache map [ time . Time ] map [ uint64 ] * aggregate
2018-04-11 23:50:48 +00:00
}
2018-05-03 09:11:31 +00:00
type aggregate struct {
telegraf . Metric
updated bool
2018-04-11 23:50:48 +00:00
}
2018-05-03 09:11:31 +00:00
const (
defaultRegion string = "eastus"
2018-04-11 23:50:48 +00:00
2018-05-03 09:11:31 +00:00
defaultMSIResource string = "https://monitoring.azure.com/"
2018-04-11 23:50:48 +00:00
2018-05-03 09:11:31 +00:00
urlTemplate string = "https://%s.monitoring.azure.com%s/metrics"
)
2018-03-28 02:15:52 +00:00
var sampleConfig = `
2018-04-29 07:31:24 +00:00
# # The resource ID against which metric will be logged . If not
# # specified , the plugin will attempt to retrieve the resource ID
# # of the VM via the instance metadata service ( optional if running
# # on an Azure VM with MSI )
# resource_id = "/subscriptions/<subscription-id>/resourceGroups/<resource-group>/providers/Microsoft.Compute/virtualMachines/<vm-name>"
# # Azure region to publish metrics against . Defaults to eastus .
# # Leave blank to automatically query the region via MSI .
# region = "useast"
# # Write HTTP timeout , formatted as a string . If not provided , will default
# # to 5 s . 0 s means no timeout ( not recommended ) .
# timeout = "5s"
# # Whether or not to use managed service identity .
# use_managed_service_identity = true
# # Fill in the following values if using Active Directory Service
# # Principal or User Principal for authentication .
# # Subscription ID
# azure_subscription = ""
# # Tenant ID
# azure_tenant = ""
# # Client ID
# azure_client_id = ""
# # Client secrete
# azure_client_secret = ""
2018-03-28 02:15:52 +00:00
`
2018-05-03 09:11:31 +00:00
// Description provides a description of the plugin
func ( a * AzureMonitor ) Description ( ) string {
return "Configuration for sending aggregate metrics to Azure Monitor"
}
2018-04-29 07:31:24 +00:00
2018-05-03 09:11:31 +00:00
// SampleConfig provides a sample configuration for the plugin
func ( a * AzureMonitor ) SampleConfig ( ) string {
return sampleConfig
}
2018-03-28 02:15:52 +00:00
// Connect initializes the plugin and validates connectivity
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) Connect ( ) error {
2018-05-03 09:11:31 +00:00
a . client = & http . Client {
Transport : & http . Transport {
Proxy : http . ProxyFromEnvironment ,
} ,
Timeout : a . Timeout . Duration ,
}
2018-03-28 02:15:52 +00:00
// If no direct AD values provided, fall back to MSI
2018-04-11 23:50:48 +00:00
if a . AzureSubscriptionID == "" && a . AzureTenantID == "" && a . AzureClientID == "" && a . AzureClientSecret == "" {
a . useMsi = true
} else if a . AzureSubscriptionID == "" || a . AzureTenantID == "" || a . AzureClientID == "" || a . AzureClientSecret == "" {
2018-03-28 02:15:52 +00:00
return fmt . Errorf ( "Must provide values for azureSubscription, azureTenant, azureClient and azureClientSecret, or leave all blank to default to MSI" )
}
2018-04-29 07:31:24 +00:00
if ! a . useMsi {
2018-03-28 02:15:52 +00:00
// If using direct AD authentication create the AD access client
2018-04-11 23:50:48 +00:00
oauthConfig , err := adal . NewOAuthConfig ( azure . PublicCloud . ActiveDirectoryEndpoint , a . AzureTenantID )
2018-03-28 02:15:52 +00:00
if err != nil {
return fmt . Errorf ( "Could not initialize AD client: %s" , err )
}
2018-04-11 23:50:48 +00:00
a . oauthConfig = oauthConfig
2018-03-28 02:15:52 +00:00
}
// Validate the resource identifier
2018-05-03 09:11:31 +00:00
metadata , err := a . GetInstanceMetadata ( )
2018-04-29 07:31:24 +00:00
if err != nil {
return fmt . Errorf ( "No resource id specified, and Azure Instance metadata service not available. If not running on an Azure VM, provide a value for resourceId" )
2018-03-28 02:15:52 +00:00
}
2018-04-29 07:31:24 +00:00
a . ResourceID = metadata . AzureResourceID
2018-03-28 02:15:52 +00:00
2018-04-11 23:50:48 +00:00
if a . Region == "" {
2018-04-29 07:31:24 +00:00
a . Region = metadata . Compute . Location
2018-03-28 02:15:52 +00:00
}
2018-05-03 09:11:31 +00:00
a . url := fmt . Sprintf ( urlTemplate , a . Region , a . ResourceID )
2018-03-28 02:15:52 +00:00
// Validate credentials
2018-04-29 07:31:24 +00:00
err = a . validateCredentials ( )
2018-03-28 02:15:52 +00:00
if err != nil {
return err
}
2018-05-03 09:11:31 +00:00
a . Reset ( )
2018-04-11 23:50:48 +00:00
2018-03-28 02:15:52 +00:00
return nil
}
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) validateCredentials ( ) error {
if a . useMsi {
2018-03-28 02:15:52 +00:00
// Check expiry on the token
2018-05-03 09:11:31 +00:00
if a . msiToken == nil || a . msiToken . ExpiresInDuration ( ) < time . Minute {
msiToken , err := a . getMsiToken ( a . AzureClientID , defaultMSIResource )
2018-03-28 02:15:52 +00:00
if err != nil {
return err
}
2018-04-11 23:50:48 +00:00
a . msiToken = msiToken
2018-03-28 02:15:52 +00:00
}
2018-05-03 09:11:31 +00:00
return nil
2018-03-28 02:15:52 +00:00
}
2018-05-03 09:11:31 +00:00
adToken , err := adal . NewServicePrincipalToken (
* ( a . oauthConfig ) , a . AzureClientID , a . AzureClientSecret ,
azure . PublicCloud . ActiveDirectoryEndpoint )
if err != nil {
return fmt . Errorf ( "Could not acquire ADAL token: %s" , err )
}
a . adalToken = adToken
2018-03-28 02:15:52 +00:00
return nil
}
2018-05-03 09:11:31 +00:00
// Close shuts down an any active connections
func ( a * AzureMonitor ) Close ( ) error {
a . client = nil
return nil
2018-04-29 07:31:24 +00:00
}
2018-05-03 09:11:31 +00:00
type azureMonitorMetric struct {
Time time . Time ` json:"time" `
Data * azureMonitorData ` json:"data" `
2018-04-29 07:31:24 +00:00
}
2018-05-03 09:11:31 +00:00
type azureMonitorData struct {
BaseData * azureMonitorBaseData ` json:"baseData" `
}
type azureMonitorBaseData struct {
Metric string ` json:"metric" `
Namespace string ` json:"namespace" `
DimensionNames [ ] string ` json:"dimNames" `
Series [ ] * azureMonitorSeries ` json:"series" `
}
type azureMonitorSeries struct {
DimensionValues [ ] string ` json:"dimValues" `
Min float64 ` json:"min" `
Max float64 ` json:"max" `
Sum float64 ` json:"sum" `
Count int64 ` json:"count" `
2018-04-29 07:31:24 +00:00
}
// Write writes metrics to the remote endpoint
func ( a * AzureMonitor ) Write ( metrics [ ] telegraf . Metric ) error {
2018-05-03 09:11:31 +00:00
azmetrics := make ( map [ uint64 ] * azureMonitorMetric , len ( metrics ) )
for _ , m := range metrics {
id := hashIDWithTagKeysOnly ( m )
if azm , ok := azmetrics [ id ] ; ! ok {
azmetrics [ id ] = translate ( m )
} else {
azmetrics [ id ] . Data . BaseData . Series = append (
azm . Data . BaseData . Series ,
translate ( m ) . Data . BaseData . Series ... ,
)
2018-03-28 02:15:52 +00:00
}
2018-04-29 07:31:24 +00:00
}
2018-05-03 09:11:31 +00:00
var body [ ] byte
for _ , m := range azmetrics {
// Azure Monitor accepts new batches of points in new-line delimited
// JSON, following RFC 4288 (see https://github.com/ndjson/ndjson-spec).
jsonBytes , err := json . Marshal ( & m )
if err != nil {
return err
}
body = append ( body , jsonBytes ... )
body = append ( body , '\n' )
}
2018-03-28 02:15:52 +00:00
2018-05-03 09:11:31 +00:00
if err := a . validateCredentials ( ) ; err != nil {
return fmt . Errorf ( "Error authenticating: %v" , err )
}
req , err := http . NewRequest ( "POST" , a . url , bytes . NewBuffer ( body ) )
if err != nil {
return err
}
req . Header . Set ( "Authorization" , "Bearer " + a . msiToken . AccessToken )
req . Header . Set ( "Content-Type" , "application/x-ndjson" )
resp , err := a . client . Do ( req )
if err != nil {
return err
}
defer resp . Body . Close ( )
if resp . StatusCode >= 300 || resp . StatusCode < 200 {
reply , err := ioutil . ReadAll ( resp . Body )
if err != nil {
reply = nil
2018-03-28 02:15:52 +00:00
}
2018-05-03 09:11:31 +00:00
return fmt . Errorf ( "Post Error. HTTP response code:%d message:%s reply:\n%s" ,
resp . StatusCode , resp . Status , reply )
2018-04-29 07:31:24 +00:00
}
2018-05-03 09:11:31 +00:00
return nil
2018-04-29 07:31:24 +00:00
}
2018-03-28 02:15:52 +00:00
2018-05-03 09:11:31 +00:00
func hashIDWithTagKeysOnly ( m telegraf . Metric ) uint64 {
h := fnv . New64a ( )
h . Write ( [ ] byte ( m . Name ( ) ) )
h . Write ( [ ] byte ( "\n" ) )
for _ , tag := range m . TagList ( ) {
h . Write ( [ ] byte ( tag . Key ) )
h . Write ( [ ] byte ( "\n" ) )
}
b := make ( [ ] byte , binary . MaxVarintLen64 )
n := binary . PutUvarint ( b , uint64 ( m . Time ( ) . UnixNano ( ) ) )
h . Write ( b [ : n ] )
h . Write ( [ ] byte ( "\n" ) )
return h . Sum64 ( )
2018-04-29 07:31:24 +00:00
}
2018-05-03 09:11:31 +00:00
func translate ( m telegraf . Metric ) * azureMonitorMetric {
2018-04-29 07:31:24 +00:00
var dimensionNames [ ] string
var dimensionValues [ ] string
2018-05-03 09:11:31 +00:00
for i , tag := range m . TagList ( ) {
2018-04-29 07:31:24 +00:00
// Azure custom metrics service supports up to 10 dimensions
if i > 10 {
2018-05-03 09:11:31 +00:00
log . Printf ( "W! [outputs.azuremonitor] metric [%s] exceeds 10 dimensions" , m . Name ( ) )
2018-04-29 07:31:24 +00:00
continue
2018-03-28 02:15:52 +00:00
}
2018-04-29 07:31:24 +00:00
dimensionNames = append ( dimensionNames , tag . Key )
dimensionValues = append ( dimensionValues , tag . Value )
}
2018-05-03 09:11:31 +00:00
min , _ := m . GetField ( "min" )
max , _ := m . GetField ( "max" )
sum , _ := m . GetField ( "sum" )
count , _ := m . GetField ( "count" )
return & azureMonitorMetric {
Time : m . Time ( ) ,
Data : & azureMonitorData {
BaseData : & azureMonitorBaseData {
Metric : m . Name ( ) ,
Namespace : "default" ,
DimensionNames : dimensionNames ,
Series : [ ] * azureMonitorSeries {
& azureMonitorSeries {
DimensionValues : dimensionValues ,
Min : min . ( float64 ) ,
Max : max . ( float64 ) ,
Sum : sum . ( float64 ) ,
Count : count . ( int64 ) ,
} ,
} ,
} ,
} ,
}
}
// Add will append a metric to the output aggregate
func ( a * AzureMonitor ) Add ( m telegraf . Metric ) {
// Azure Monitor only supports aggregates 30 minutes into the past
// and 4 minutes into the future. Future metrics are dropped when pushed.
t := m . Time ( )
tbucket := time . Date ( t . Year ( ) , t . Month ( ) , t . Day ( ) , t . Hour ( ) , t . Minute ( ) , 0 , 0 , t . Location ( ) )
if tbucket . Before ( time . Now ( ) . Add ( - time . Minute * 30 ) ) {
log . Printf ( "W! attempted to aggregate metric over 30 minutes old: %v, %v" , t , tbucket )
return
}
// Azure Monitor doesn't have a string value type, so convert string
// fields to dimensions (a.k.a. tags) if enabled.
2018-04-29 07:31:24 +00:00
if a . StringAsDimension {
2018-05-03 09:11:31 +00:00
for fk , fv := range m . Fields ( ) {
if v , ok := fv . ( string ) ; ok {
m . AddTag ( fk , v )
2018-04-11 23:50:48 +00:00
}
2018-04-29 07:31:24 +00:00
}
}
2018-05-03 09:11:31 +00:00
for _ , f := range m . FieldList ( ) {
2018-04-29 07:31:24 +00:00
fv , ok := convert ( f . Value )
if ! ok {
continue
}
2018-03-28 02:15:52 +00:00
2018-05-03 09:11:31 +00:00
// Azure Monitor does not support fields so the field
// name is appended to the metric name.
name := m . Name ( ) + "_" + sanitize ( f . Key )
id := hashIDWithField ( m . HashID ( ) , f . Key )
2018-04-11 23:50:48 +00:00
2018-05-03 09:11:31 +00:00
_ , ok = a . cache [ tbucket ]
if ! ok {
// Time bucket does not exist and needs to be created.
a . cache [ tbucket ] = make ( map [ uint64 ] * aggregate )
2018-04-11 23:50:48 +00:00
}
2018-03-28 02:15:52 +00:00
2018-05-03 09:11:31 +00:00
nf := make ( map [ string ] interface { } , 4 )
nf [ "min" ] = fv
nf [ "max" ] = fv
nf [ "sum" ] = fv
nf [ "count" ] = 1
// Fetch existing aggregate
agg , ok := a . cache [ tbucket ] [ id ]
if ok {
aggfields := agg . Fields ( )
if fv > aggfields [ "min" ] . ( float64 ) {
nf [ "min" ] = aggfields [ "min" ]
}
if fv < aggfields [ "max" ] . ( float64 ) {
nf [ "max" ] = aggfields [ "max" ]
}
nf [ "sum" ] = fv + aggfields [ "sum" ] . ( float64 )
nf [ "count" ] = aggfields [ "count" ] . ( int64 ) + 1
2018-04-11 23:50:48 +00:00
}
2018-03-28 02:15:52 +00:00
2018-05-03 09:11:31 +00:00
na , _ := metric . New ( name , m . Tags ( ) , nf , tbucket )
a . cache [ tbucket ] [ id ] = & aggregate { na , true }
2018-04-11 23:50:48 +00:00
}
}
func convert ( in interface { } ) ( float64 , bool ) {
switch v := in . ( type ) {
2018-03-28 02:15:52 +00:00
case int64 :
2018-04-11 23:50:48 +00:00
return float64 ( v ) , true
case uint64 :
return float64 ( v ) , true
2018-03-28 02:15:52 +00:00
case float64 :
2018-04-11 23:50:48 +00:00
return v , true
2018-04-29 07:31:24 +00:00
case bool :
if v {
return 1 , true
}
2018-05-03 09:11:31 +00:00
return 0 , true
2018-03-28 02:15:52 +00:00
default :
2018-04-11 23:50:48 +00:00
return 0 , false
2018-03-28 02:15:52 +00:00
}
}
2018-05-03 09:11:31 +00:00
var invalidNameCharRE = regexp . MustCompile ( ` [^a-zA-Z0-9_] ` )
2018-04-11 23:50:48 +00:00
2018-05-03 09:11:31 +00:00
func sanitize ( value string ) string {
return invalidNameCharRE . ReplaceAllString ( value , "_" )
2018-04-11 23:50:48 +00:00
}
2018-05-03 09:11:31 +00:00
func hashIDWithField ( id uint64 , fk string ) uint64 {
h := fnv . New64a ( )
b := make ( [ ] byte , binary . MaxVarintLen64 )
n := binary . PutUvarint ( b , id )
h . Write ( b [ : n ] )
h . Write ( [ ] byte ( "\n" ) )
h . Write ( [ ] byte ( fk ) )
h . Write ( [ ] byte ( "\n" ) )
return h . Sum64 ( )
}
2018-03-28 02:15:52 +00:00
2018-05-03 09:11:31 +00:00
// Push sends metrics to the output metric buffer
func ( a * AzureMonitor ) Push ( ) [ ] telegraf . Metric {
var metrics [ ] telegraf . Metric
for tbucket , aggs := range a . cache {
// Do not send metrics early
if tbucket . After ( time . Now ( ) . Add ( - time . Minute ) ) {
continue
}
for _ , agg := range aggs {
// Only send aggregates that have had an update since
// the last push.
if ! agg . updated {
continue
}
metrics = append ( metrics , agg . Metric )
}
2018-03-28 02:15:52 +00:00
}
2018-05-03 09:11:31 +00:00
return metrics
}
2018-03-28 02:15:52 +00:00
2018-05-03 09:11:31 +00:00
// Reset clears the cache of aggregate metrics
func ( a * AzureMonitor ) Reset ( ) {
for tbucket := range a . cache {
// Remove aggregates older than 30 minutes
if tbucket . Before ( time . Now ( ) . Add ( - time . Minute * 30 ) ) {
delete ( a . cache , tbucket )
continue
}
for id := range a . cache [ tbucket ] {
a . cache [ tbucket ] [ id ] . updated = false
2018-03-28 02:15:52 +00:00
}
}
}
2018-04-11 23:50:48 +00:00
func init ( ) {
outputs . Add ( "azuremonitor" , func ( ) telegraf . Output {
return & AzureMonitor {
2018-05-03 09:11:31 +00:00
StringAsDimension : false ,
2018-04-29 07:31:24 +00:00
Timeout : internal . Duration { Duration : time . Second * 5 } ,
Region : defaultRegion ,
2018-05-03 09:11:31 +00:00
cache : make ( map [ time . Time ] map [ uint64 ] * aggregate , 36 ) ,
2018-04-11 23:50:48 +00:00
}
} )
}