telegraf/plugins/inputs/cloud_pubsub_push/pubsub_push.go

324 lines
8.0 KiB
Go

package cloud_pubsub_push
import (
"context"
"crypto/subtle"
"encoding/base64"
"encoding/json"
"io/ioutil"
"net"
"net/http"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
tlsint "github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
)
// defaultMaxBodySize is the default maximum request body size, in bytes.
// if the request body is over this size, we will return an HTTP 413 error.
// 500 MB
const defaultMaxBodySize = 500 * 1024 * 1024
const defaultMaxUndeliveredMessages = 1000
type PubSubPush struct {
ServiceAddress string
Token string
Path string
ReadTimeout internal.Duration
WriteTimeout internal.Duration
MaxBodySize internal.Size
AddMeta bool
Log telegraf.Logger
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
tlsint.ServerConfig
parsers.Parser
listener net.Listener
server *http.Server
acc telegraf.TrackingAccumulator
ctx context.Context
cancel context.CancelFunc
wg *sync.WaitGroup
mu *sync.Mutex
undelivered map[telegraf.TrackingID]chan bool
sem chan struct{}
}
// Message defines the structure of a Google Pub/Sub message.
type Message struct {
Atts map[string]string `json:"attributes"`
Data string `json:"data"` // Data is base64 encoded data
}
// Payload is the received Google Pub/Sub data. (https://cloud.google.com/pubsub/docs/push)
type Payload struct {
Msg Message `json:"message"`
Subscription string `json:"subscription"`
}
const sampleConfig = `
## Address and port to host HTTP listener on
service_address = ":8080"
## Application secret to verify messages originate from Cloud Pub/Sub
# token = ""
## Path to listen to.
# path = "/"
## Maximum duration before timing out read of the request
# read_timeout = "10s"
## Maximum duration before timing out write of the response. This should be set to a value
## large enough that you can send at least 'metric_batch_size' number of messages within the
## duration.
# write_timeout = "10s"
## Maximum allowed http request body size in bytes.
## 0 means to use the default of 524,288,00 bytes (500 mebibytes)
# max_body_size = "500MB"
## Whether to add the pubsub metadata, such as message attributes and subscription as a tag.
# add_meta = false
## Optional. Maximum messages to read from PubSub that have not been written
## to an output. Defaults to 1000.
## For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message contains 10 metrics and the output
## metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Set one or more allowed client CA certificate file names to
## enable mutually authenticated TLS connections
# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"]
## Add service certificate and key
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
`
func (p *PubSubPush) SampleConfig() string {
return sampleConfig
}
func (p *PubSubPush) Description() string {
return "Google Cloud Pub/Sub Push HTTP listener"
}
func (p *PubSubPush) Gather(_ telegraf.Accumulator) error {
return nil
}
func (p *PubSubPush) SetParser(parser parsers.Parser) {
p.Parser = parser
}
// Start starts the http listener service.
func (p *PubSubPush) Start(acc telegraf.Accumulator) error {
if p.MaxBodySize.Size == 0 {
p.MaxBodySize.Size = defaultMaxBodySize
}
if p.ReadTimeout.Duration < time.Second {
p.ReadTimeout.Duration = time.Second * 10
}
if p.WriteTimeout.Duration < time.Second {
p.WriteTimeout.Duration = time.Second * 10
}
tlsConf, err := p.ServerConfig.TLSConfig()
if err != nil {
return err
}
p.server = &http.Server{
Addr: p.ServiceAddress,
Handler: http.TimeoutHandler(p, p.WriteTimeout.Duration, "timed out processing metric"),
ReadTimeout: p.ReadTimeout.Duration,
TLSConfig: tlsConf,
}
p.ctx, p.cancel = context.WithCancel(context.Background())
p.wg = &sync.WaitGroup{}
p.acc = acc.WithTracking(p.MaxUndeliveredMessages)
p.sem = make(chan struct{}, p.MaxUndeliveredMessages)
p.undelivered = make(map[telegraf.TrackingID]chan bool)
p.mu = &sync.Mutex{}
p.wg.Add(1)
go func() {
defer p.wg.Done()
p.receiveDelivered()
}()
p.wg.Add(1)
go func() {
defer p.wg.Done()
if tlsConf != nil {
p.server.ListenAndServeTLS("", "")
} else {
p.server.ListenAndServe()
}
}()
return nil
}
// Stop cleans up all resources
func (p *PubSubPush) Stop() {
p.cancel()
p.server.Shutdown(p.ctx)
p.wg.Wait()
}
func (p *PubSubPush) ServeHTTP(res http.ResponseWriter, req *http.Request) {
if req.URL.Path == p.Path {
p.AuthenticateIfSet(p.serveWrite, res, req)
} else {
p.AuthenticateIfSet(http.NotFound, res, req)
}
}
func (p *PubSubPush) serveWrite(res http.ResponseWriter, req *http.Request) {
select {
case <-req.Context().Done():
res.WriteHeader(http.StatusServiceUnavailable)
return
case <-p.ctx.Done():
res.WriteHeader(http.StatusServiceUnavailable)
return
case p.sem <- struct{}{}:
break
}
// Check that the content length is not too large for us to handle.
if req.ContentLength > p.MaxBodySize.Size {
res.WriteHeader(http.StatusRequestEntityTooLarge)
return
}
if req.Method != http.MethodPost {
res.WriteHeader(http.StatusMethodNotAllowed)
return
}
body := http.MaxBytesReader(res, req.Body, p.MaxBodySize.Size)
bytes, err := ioutil.ReadAll(body)
if err != nil {
res.WriteHeader(http.StatusRequestEntityTooLarge)
return
}
var payload Payload
if err = json.Unmarshal(bytes, &payload); err != nil {
p.Log.Errorf("Error decoding payload %s", err.Error())
res.WriteHeader(http.StatusBadRequest)
return
}
sDec, err := base64.StdEncoding.DecodeString(payload.Msg.Data)
if err != nil {
p.Log.Errorf("Base64-decode failed %s", err.Error())
res.WriteHeader(http.StatusBadRequest)
return
}
metrics, err := p.Parse(sDec)
if err != nil {
p.Log.Debug(err.Error())
res.WriteHeader(http.StatusBadRequest)
return
}
if p.AddMeta {
for i := range metrics {
for k, v := range payload.Msg.Atts {
metrics[i].AddTag(k, v)
}
metrics[i].AddTag("subscription", payload.Subscription)
}
}
ch := make(chan bool, 1)
p.mu.Lock()
p.undelivered[p.acc.AddTrackingMetricGroup(metrics)] = ch
p.mu.Unlock()
select {
case <-req.Context().Done():
res.WriteHeader(http.StatusServiceUnavailable)
return
case success := <-ch:
if success {
res.WriteHeader(http.StatusNoContent)
} else {
res.WriteHeader(http.StatusInternalServerError)
}
}
}
func (p *PubSubPush) receiveDelivered() {
for {
select {
case <-p.ctx.Done():
return
case info := <-p.acc.Delivered():
<-p.sem
p.mu.Lock()
ch, ok := p.undelivered[info.ID()]
if !ok {
p.mu.Unlock()
continue
}
delete(p.undelivered, info.ID())
p.mu.Unlock()
if info.Delivered() {
ch <- true
} else {
ch <- false
p.Log.Debug("Metric group failed to process")
}
}
}
}
func (p *PubSubPush) AuthenticateIfSet(handler http.HandlerFunc, res http.ResponseWriter, req *http.Request) {
if p.Token != "" {
if subtle.ConstantTimeCompare([]byte(req.FormValue("token")), []byte(p.Token)) != 1 {
http.Error(res, "Unauthorized.", http.StatusUnauthorized)
return
}
}
handler(res, req)
}
func init() {
inputs.Add("cloud_pubsub_push", func() telegraf.Input {
return &PubSubPush{
ServiceAddress: ":8080",
Path: "/",
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
})
}