2018-03-28 02:15:52 +00:00
package azuremonitor
import (
"bytes"
"crypto/tls"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"strconv"
"time"
"github.com/Azure/go-autorest/autorest/adal"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/influxdata/telegraf"
2018-04-11 23:50:48 +00:00
"github.com/influxdata/telegraf/plugins/outputs"
2018-03-28 02:15:52 +00:00
)
// AzureMonitor allows publishing of metrics to the Azure Monitor custom metrics service
type AzureMonitor struct {
ResourceID string ` toml:"resourceId" `
Region string ` toml:"region" `
HTTPPostTimeout int ` toml:"httpPostTimeout" `
AzureSubscriptionID string ` toml:"azureSubscription" `
AzureTenantID string ` toml:"azureTenant" `
AzureClientID string ` toml:"azureClientId" `
AzureClientSecret string ` toml:"azureClientSecret" `
useMsi bool
metadataService * AzureInstanceMetadata
instanceMetadata * VirtualMachineMetadata
msiToken * MsiToken
msiResource string
bearerToken string
expiryWatermark time . Duration
oauthConfig * adal . OAuthConfig
adalToken adal . OAuthTokenProvider
2018-04-11 23:50:48 +00:00
client * http . Client
cache map [ uint64 ] azureMonitorMetric
period time . Duration
delay time . Duration
periodStart time . Time
periodEnd time . Time
metrics chan telegraf . Metric
shutdown chan struct { }
}
type azureMonitorMetric struct {
Time time . Time ` json:"time" `
Data * azureMonitorData ` json:"data" `
}
type azureMonitorData struct {
BaseData * azureMonitorBaseData ` json:"baseData" `
}
type azureMonitorBaseData struct {
Metric string ` json:"metric" `
Namespace string ` json:"namespace" `
DimensionNames [ ] string ` json:"dimNames" `
Series [ ] * azureMonitorSeries ` json:"series" `
}
type azureMonitorSeries struct {
DimensionValues [ ] string ` json:"dimValues" `
Min float64 ` json:"min" `
Max float64 ` json:"max" `
Sum float64 ` json:"sum" `
Count float64 ` json:"count" `
2018-03-28 02:15:52 +00:00
}
var sampleConfig = `
# # The resource ID against which metric will be logged . If not
# # specified , the plugin will attempt to retrieve the resource ID
# # of the VM via the instance metadata service ( optional if running
# # on an Azure VM with MSI )
resourceId = "/subscriptions/3e9c2afc-52b3-4137-9bba-02b6eb204331/resourceGroups/someresourcegroup-rg/providers/Microsoft.Compute/virtualMachines/somevmname"
# # Azure region to publish metrics against . Defaults to eastus
region = "useast"
# # Maximum duration to wait for HTTP post ( in seconds ) . Defaults to 15
httpPostTimeout = 15
# # Whether or not to use managed service identity ( defaults to true ) .
useManagedServiceIdentity = true
2018-04-11 23:50:48 +00:00
# # Leave this section blank to use Managed Service Identity .
2018-03-28 02:15:52 +00:00
# # TODO
azureSubscription = "TODO"
# # TODO
azureTenant = "TODO"
# # TODO
azureClientId = "TODO"
# # TODO
azureClientSecret = "TODO"
`
const (
azureMonitorDefaultRegion = "eastus"
)
// Connect initializes the plugin and validates connectivity
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) Connect ( ) error {
2018-03-28 02:15:52 +00:00
// Set defaults
// If no direct AD values provided, fall back to MSI
2018-04-11 23:50:48 +00:00
if a . AzureSubscriptionID == "" && a . AzureTenantID == "" && a . AzureClientID == "" && a . AzureClientSecret == "" {
a . useMsi = true
} else if a . AzureSubscriptionID == "" || a . AzureTenantID == "" || a . AzureClientID == "" || a . AzureClientSecret == "" {
2018-03-28 02:15:52 +00:00
return fmt . Errorf ( "Must provide values for azureSubscription, azureTenant, azureClient and azureClientSecret, or leave all blank to default to MSI" )
}
2018-04-11 23:50:48 +00:00
if a . useMsi == false {
2018-03-28 02:15:52 +00:00
// If using direct AD authentication create the AD access client
2018-04-11 23:50:48 +00:00
oauthConfig , err := adal . NewOAuthConfig ( azure . PublicCloud . ActiveDirectoryEndpoint , a . AzureTenantID )
2018-03-28 02:15:52 +00:00
if err != nil {
return fmt . Errorf ( "Could not initialize AD client: %s" , err )
}
2018-04-11 23:50:48 +00:00
a . oauthConfig = oauthConfig
2018-03-28 02:15:52 +00:00
}
2018-04-11 23:50:48 +00:00
if a . HTTPPostTimeout == 0 {
a . HTTPPostTimeout = 10
2018-03-28 02:15:52 +00:00
}
2018-04-11 23:50:48 +00:00
a . metadataService = & AzureInstanceMetadata { }
2018-03-28 02:15:52 +00:00
// For the metrics API the MSI resource has to be https://ingestion.monitor.azure.com
2018-04-11 23:50:48 +00:00
a . msiResource = "https://monitoring.azure.com/"
2018-03-28 02:15:52 +00:00
// Validate the resource identifier
2018-04-11 23:50:48 +00:00
if a . ResourceID == "" {
metadata , err := a . metadataService . GetInstanceMetadata ( )
2018-03-28 02:15:52 +00:00
if err != nil {
return fmt . Errorf ( "No resource id specified, and Azure Instance metadata service not available. If not running on an Azure VM, provide a value for resourceId" )
}
2018-04-11 23:50:48 +00:00
a . ResourceID = metadata . AzureResourceID
2018-03-28 02:15:52 +00:00
2018-04-11 23:50:48 +00:00
if a . Region == "" {
a . Region = metadata . Compute . Location
2018-03-28 02:15:52 +00:00
}
}
2018-04-11 23:50:48 +00:00
if a . Region == "" {
a . Region = azureMonitorDefaultRegion
2018-03-28 02:15:52 +00:00
}
// Validate credentials
2018-04-11 23:50:48 +00:00
err := a . validateCredentials ( )
2018-03-28 02:15:52 +00:00
if err != nil {
return err
}
2018-04-11 23:50:48 +00:00
a . reset ( )
go a . run ( )
2018-03-28 02:15:52 +00:00
return nil
}
2018-04-11 23:50:48 +00:00
// Description provides a description of the plugin
func ( a * AzureMonitor ) Description ( ) string {
return "Configuration for sending aggregate metrics to Azure Monitor"
}
// SampleConfig provides a sample configuration for the plugin
func ( a * AzureMonitor ) SampleConfig ( ) string {
return sampleConfig
}
2018-03-28 02:15:52 +00:00
// Close shuts down an any active connections
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) Close ( ) error {
2018-03-28 02:15:52 +00:00
// Close connection to the URL here
2018-04-11 23:50:48 +00:00
close ( a . shutdown )
2018-03-28 02:15:52 +00:00
return nil
}
// Write writes metrics to the remote endpoint
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) Write ( metrics [ ] telegraf . Metric ) error {
log . Printf ( "metrics collected: %+v" , metrics )
2018-03-28 02:15:52 +00:00
2018-04-11 23:50:48 +00:00
// Assemble stats on incoming metrics
for _ , metric := range metrics {
select {
case a . metrics <- metric :
default :
log . Printf ( "metrics buffer is full" )
2018-03-28 02:15:52 +00:00
}
}
return nil
}
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) validateCredentials ( ) error {
2018-03-28 02:15:52 +00:00
// Use managed service identity
2018-04-11 23:50:48 +00:00
if a . useMsi {
2018-03-28 02:15:52 +00:00
// Check expiry on the token
2018-04-11 23:50:48 +00:00
if a . msiToken != nil {
expiryDuration := a . msiToken . ExpiresInDuration ( )
if expiryDuration > a . expiryWatermark {
2018-03-28 02:15:52 +00:00
return nil
}
// Token is about to expire
log . Printf ( "Bearer token expiring in %s; acquiring new token\n" , expiryDuration . String ( ) )
2018-04-11 23:50:48 +00:00
a . msiToken = nil
2018-03-28 02:15:52 +00:00
}
// No token, acquire an MSI token
2018-04-11 23:50:48 +00:00
if a . msiToken == nil {
msiToken , err := a . metadataService . GetMsiToken ( a . AzureClientID , a . msiResource )
2018-03-28 02:15:52 +00:00
if err != nil {
return err
}
log . Printf ( "Bearer token acquired; expiring in %s\n" , msiToken . ExpiresInDuration ( ) . String ( ) )
2018-04-11 23:50:48 +00:00
a . msiToken = msiToken
a . bearerToken = msiToken . AccessToken
2018-03-28 02:15:52 +00:00
}
// Otherwise directory acquire a token
} else {
adToken , err := adal . NewServicePrincipalToken (
2018-04-11 23:50:48 +00:00
* ( a . oauthConfig ) , a . AzureClientID , a . AzureClientSecret ,
2018-03-28 02:15:52 +00:00
azure . PublicCloud . ActiveDirectoryEndpoint )
if err != nil {
return fmt . Errorf ( "Could not acquire ADAL token: %s" , err )
}
2018-04-11 23:50:48 +00:00
a . adalToken = adToken
2018-03-28 02:15:52 +00:00
}
return nil
}
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) add ( metric telegraf . Metric ) {
id := metric . HashID ( )
if azm , ok := a . cache [ id ] ; ! ok {
// hit an uncached metric, create caches for first time:
2018-03-28 02:15:52 +00:00
var dimensionNames [ ] string
var dimensionValues [ ] string
2018-04-11 23:50:48 +00:00
for i , tag := range metric . TagList ( ) {
// Azure custom metrics service supports up to 10 dimensions
if i > 9 {
continue
}
dimensionNames = append ( dimensionNames , tag . Key )
dimensionValues = append ( dimensionValues , tag . Value )
2018-03-28 02:15:52 +00:00
}
2018-04-11 23:50:48 +00:00
// Field keys are stored as the last dimension
dimensionNames = append ( dimensionNames , "field" )
var seriesList [ ] * azureMonitorSeries
// Store each field as a separate series with field key as a new dimension
for _ , field := range metric . FieldList ( ) {
azmseries := newAzureMonitorSeries ( field , dimensionValues )
seriesList = append ( seriesList , azmseries )
2018-03-28 02:15:52 +00:00
}
2018-04-11 23:50:48 +00:00
if len ( seriesList ) < 1 {
log . Printf ( "no valid fields for metric: %s" , metric )
return
2018-03-28 02:15:52 +00:00
}
2018-04-11 23:50:48 +00:00
a . cache [ id ] = azureMonitorMetric {
2018-03-28 02:15:52 +00:00
Time : metric . Time ( ) ,
2018-04-11 23:50:48 +00:00
Data : & azureMonitorData {
BaseData : & azureMonitorBaseData {
2018-03-28 02:15:52 +00:00
Metric : metric . Name ( ) ,
Namespace : "default" ,
DimensionNames : dimensionNames ,
2018-04-11 23:50:48 +00:00
Series : seriesList ,
2018-03-28 02:15:52 +00:00
} ,
} ,
}
2018-04-11 23:50:48 +00:00
} else {
for _ , f := range metric . FieldList ( ) {
fv , ok := convert ( f . Value )
if ! ok {
continue
}
2018-03-28 02:15:52 +00:00
2018-04-11 23:50:48 +00:00
tmp , ok := azm . findSeriesWithField ( f . Key )
if ! ok {
// hit an uncached field of a cached metric
var dimensionValues [ ] string
for i , tag := range metric . TagList ( ) {
// Azure custom metrics service supports up to 10 dimensions
if i > 9 {
continue
}
dimensionValues = append ( dimensionValues , tag . Value )
}
azm . Data . BaseData . Series = append ( azm . Data . BaseData . Series , newAzureMonitorSeries ( f , dimensionValues ) )
continue
}
//counter compute
n := tmp . Count + 1
tmp . Count = n
//max/min compute
if fv < tmp . Min {
tmp . Min = fv
} else if fv > tmp . Max {
tmp . Max = fv
}
//sum compute
tmp . Sum += fv
//store final data
a . cache [ id ] . Data . BaseData . Series = append ( a . cache [ id ] . Data . BaseData . Series , tmp )
}
2018-03-28 02:15:52 +00:00
}
}
2018-04-11 23:50:48 +00:00
func ( b * azureMonitorMetric ) findSeriesWithField ( f string ) ( * azureMonitorSeries , bool ) {
if len ( b . Data . BaseData . Series ) > 0 {
for _ , s := range b . Data . BaseData . Series {
if f == s . DimensionValues [ len ( s . DimensionValues ) - 1 ] {
return s , true
}
}
}
return nil , false
2018-03-28 02:15:52 +00:00
}
2018-04-11 23:50:48 +00:00
func newAzureMonitorSeries ( f * telegraf . Field , dv [ ] string ) * azureMonitorSeries {
fv , ok := convert ( f . Value )
if ! ok {
log . Printf ( "unable to convert field %s (type %T) to float type: %v" , f . Key , fv , fv )
return nil
}
return & azureMonitorSeries {
DimensionValues : append ( append ( [ ] string { } , dv ... ) , f . Key ) ,
Min : fv ,
Max : fv ,
Sum : fv ,
Count : 1 ,
}
}
func ( a * AzureMonitor ) reset ( ) {
a . cache = make ( map [ uint64 ] azureMonitorMetric )
}
2018-03-28 02:15:52 +00:00
2018-04-11 23:50:48 +00:00
func convert ( in interface { } ) ( float64 , bool ) {
switch v := in . ( type ) {
2018-03-28 02:15:52 +00:00
case int :
2018-04-11 23:50:48 +00:00
return float64 ( v ) , true
2018-03-28 02:15:52 +00:00
case int8 :
2018-04-11 23:50:48 +00:00
return float64 ( v ) , true
2018-03-28 02:15:52 +00:00
case int16 :
2018-04-11 23:50:48 +00:00
return float64 ( v ) , true
2018-03-28 02:15:52 +00:00
case int32 :
2018-04-11 23:50:48 +00:00
return float64 ( v ) , true
2018-03-28 02:15:52 +00:00
case int64 :
2018-04-11 23:50:48 +00:00
return float64 ( v ) , true
case uint :
return float64 ( v ) , true
case uint8 :
return float64 ( v ) , true
case uint16 :
return float64 ( v ) , true
case uint32 :
return float64 ( v ) , true
case uint64 :
return float64 ( v ) , true
2018-03-28 02:15:52 +00:00
case float32 :
2018-04-11 23:50:48 +00:00
return float64 ( v ) , true
2018-03-28 02:15:52 +00:00
case float64 :
2018-04-11 23:50:48 +00:00
return v , true
case string :
f , err := strconv . ParseFloat ( v , 64 )
if err != nil {
log . Printf ( "converted string: %s to %v" , v , f )
return 0 , false
}
return f , true
2018-03-28 02:15:52 +00:00
default :
2018-04-11 23:50:48 +00:00
log . Printf ( "did not convert %T: %s" , v , v )
return 0 , false
2018-03-28 02:15:52 +00:00
}
}
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) push ( ) {
var body [ ] byte
for _ , metric := range a . cache {
jsonBytes , err := json . Marshal ( & metric )
log . Printf ( "marshalled point %s" , jsonBytes )
if err != nil {
log . Printf ( "Error marshalling metrics %s" , err )
return
}
body = append ( body , jsonBytes ... )
body = append ( body , '\n' )
}
log . Printf ( "Publishing metrics %s" , body )
_ , err := a . postData ( & body )
if err != nil {
log . Printf ( "Error publishing metrics %s" , err )
return
}
return
}
func ( a * AzureMonitor ) postData ( msg * [ ] byte ) ( * http . Request , error ) {
2018-03-28 02:15:52 +00:00
metricsEndpoint := fmt . Sprintf ( "https://%s.monitoring.azure.com%s/metrics" ,
2018-04-11 23:50:48 +00:00
a . Region , a . ResourceID )
2018-03-28 02:15:52 +00:00
req , err := http . NewRequest ( "POST" , metricsEndpoint , bytes . NewBuffer ( * msg ) )
if err != nil {
log . Printf ( "Error creating HTTP request" )
return nil , err
}
2018-04-11 23:50:48 +00:00
req . Header . Set ( "Authorization" , "Bearer " + a . bearerToken )
req . Header . Set ( "Content-Type" , "application/x-ndjson" )
2018-03-28 02:15:52 +00:00
tr := & http . Transport {
TLSClientConfig : & tls . Config { InsecureSkipVerify : true } ,
}
client := http . Client {
Transport : tr ,
// TODO - fix this
//Timeout: time.Duration(s.HTTPPostTimeout * time.Second),
Timeout : time . Duration ( 10 * time . Second ) ,
}
resp , err := client . Do ( req )
if err != nil {
return req , err
}
defer resp . Body . Close ( )
if resp . StatusCode >= 300 || resp . StatusCode < 200 {
var reply [ ] byte
reply , err = ioutil . ReadAll ( resp . Body )
if err != nil {
reply = nil
}
return req , fmt . Errorf ( "Post Error. HTTP response code:%d message:%s reply:\n%s" ,
resp . StatusCode , resp . Status , reply )
}
return req , nil
}
2018-04-11 23:50:48 +00:00
func ( a * AzureMonitor ) run ( ) {
// The start of the period is truncated to the nearest minute.
//
// Every metric then gets it's timestamp checked and is dropped if it
// is not within:
//
// start < t < end + truncation + delay
//
// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
// now = 00:00.2
// start = 00:00
// truncation = 00:00.2
// end = 00:10
// 1st interval: 00:00 - 00:10.5
// 2nd interval: 00:10 - 00:20.5
// etc.
//
now := time . Now ( )
a . periodStart = now . Truncate ( time . Minute )
truncation := now . Sub ( a . periodStart )
a . periodEnd = a . periodStart . Add ( a . period )
time . Sleep ( a . delay )
periodT := time . NewTicker ( a . period )
defer periodT . Stop ( )
for {
select {
case <- a . shutdown :
if len ( a . metrics ) > 0 {
// wait until metrics are flushed before exiting
continue
}
return
case m := <- a . metrics :
if m . Time ( ) . Before ( a . periodStart ) ||
m . Time ( ) . After ( a . periodEnd . Add ( truncation ) . Add ( a . delay ) ) {
// the metric is outside the current aggregation period, so
// skip it.
continue
}
a . add ( m )
case <- periodT . C :
a . periodStart = a . periodEnd
a . periodEnd = a . periodStart . Add ( a . period )
a . push ( )
a . reset ( )
}
}
}
func init ( ) {
outputs . Add ( "azuremonitor" , func ( ) telegraf . Output {
return & AzureMonitor {
period : time . Minute ,
delay : time . Second * 5 ,
metrics : make ( chan telegraf . Metric , 100 ) ,
shutdown : make ( chan struct { } ) ,
}
} )
}