telegraf/plugins/inputs/clickhouse/clickhouse.go

391 lines
10 KiB
Go

package clickhouse
import (
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs"
)
var defaultTimeout = 5 * time.Second
var sampleConfig = `
## Username for authorization on ClickHouse server
## example: user = "default""
user = "default"
## Password for authorization on ClickHouse server
## example: password = "super_secret"
## HTTP(s) timeout while getting metrics values
## The timeout includes connection time, any redirects, and reading the response body.
## example: timeout = 1s
# timeout = 5s
## List of servers for metrics scraping
## metrics scrape via HTTP(s) clickhouse interface
## https://clickhouse.tech/docs/en/interfaces/http/
## example: servers = ["http://127.0.0.1:8123","https://custom-server.mdb.yandexcloud.net"]
servers = ["http://127.0.0.1:8123"]
## If "auto_discovery"" is "true" plugin tries to connect to all servers available in the cluster
## with using same "user:password" described in "user" and "password" parameters
## and get this server hostname list from "system.clusters" table
## see
## - https://clickhouse.tech/docs/en/operations/system_tables/#system-clusters
## - https://clickhouse.tech/docs/en/operations/server_settings/settings/#server_settings_remote_servers
## - https://clickhouse.tech/docs/en/operations/table_engines/distributed/
## - https://clickhouse.tech/docs/en/operations/table_engines/replication/#creating-replicated-tables
## example: auto_discovery = false
# auto_discovery = true
## Filter cluster names in "system.clusters" when "auto_discovery" is "true"
## when this filter present then "WHERE cluster IN (...)" filter will apply
## please use only full cluster names here, regexp and glob filters is not allowed
## for "/etc/clickhouse-server/config.d/remote.xml"
## <yandex>
## <remote_servers>
## <my-own-cluster>
## <shard>
## <replica><host>clickhouse-ru-1.local</host><port>9000</port></replica>
## <replica><host>clickhouse-ru-2.local</host><port>9000</port></replica>
## </shard>
## <shard>
## <replica><host>clickhouse-eu-1.local</host><port>9000</port></replica>
## <replica><host>clickhouse-eu-2.local</host><port>9000</port></replica>
## </shard>
## </my-onw-cluster>
## </remote_servers>
##
## </yandex>
##
## example: cluster_include = ["my-own-cluster"]
# cluster_include = []
## Filter cluster names in "system.clusters" when "auto_discovery" is "true"
## when this filter present then "WHERE cluster NOT IN (...)" filter will apply
## example: cluster_exclude = ["my-internal-not-discovered-cluster"]
# cluster_exclude = []
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
`
type connect struct {
Cluster string `json:"cluster"`
ShardNum int `json:"shard_num"`
Hostname string `json:"host_name"`
url *url.URL
}
func init() {
inputs.Add("clickhouse", func() telegraf.Input {
return &ClickHouse{
AutoDiscovery: true,
ClientConfig: tls.ClientConfig{
InsecureSkipVerify: false,
},
Timeout: internal.Duration{Duration: defaultTimeout},
}
})
}
// ClickHouse Telegraf Input Plugin
type ClickHouse struct {
User string `toml:"user"`
Password string `toml:"password"`
Servers []string `toml:"servers"`
AutoDiscovery bool `toml:"auto_discovery"`
ClusterInclude []string `toml:"cluster_include"`
ClusterExclude []string `toml:"cluster_exclude"`
Timeout internal.Duration `toml:"timeout"`
client http.Client
tls.ClientConfig
}
// SampleConfig returns the sample config
func (*ClickHouse) SampleConfig() string {
return sampleConfig
}
// Description return plugin description
func (*ClickHouse) Description() string {
return "Read metrics from one or many ClickHouse servers"
}
// Start ClickHouse input service
func (ch *ClickHouse) Start(telegraf.Accumulator) error {
timeout := defaultTimeout
if ch.Timeout.Duration != 0 {
timeout = ch.Timeout.Duration
}
tlsCfg, err := ch.ClientConfig.TLSConfig()
if err != nil {
return err
}
ch.client = http.Client{
Timeout: timeout,
Transport: &http.Transport{
TLSClientConfig: tlsCfg,
Proxy: http.ProxyFromEnvironment,
},
}
return nil
}
// Gather collect data from ClickHouse server
func (ch *ClickHouse) Gather(acc telegraf.Accumulator) (err error) {
var (
connects []connect
exists = func(host string) bool {
for _, c := range connects {
if c.Hostname == host {
return true
}
}
return false
}
)
for _, server := range ch.Servers {
u, err := url.Parse(server)
if err != nil {
return err
}
switch {
case ch.AutoDiscovery:
var conns []connect
if err := ch.execQuery(u, "SELECT cluster, shard_num, host_name FROM system.clusters "+ch.clusterIncludeExcludeFilter(), &conns); err != nil {
acc.AddError(err)
continue
}
for _, c := range conns {
if !exists(c.Hostname) {
c.url = &url.URL{
Scheme: u.Scheme,
Host: net.JoinHostPort(c.Hostname, u.Port()),
}
connects = append(connects, c)
}
}
default:
connects = append(connects, connect{
url: u,
})
}
}
for _, conn := range connects {
if err := ch.tables(acc, &conn); err != nil {
acc.AddError(err)
}
for metric := range commonMetrics {
if err := ch.commonMetrics(acc, &conn, metric); err != nil {
acc.AddError(err)
}
}
}
return nil
}
func (ch *ClickHouse) clusterIncludeExcludeFilter() string {
if len(ch.ClusterInclude) == 0 && len(ch.ClusterExclude) == 0 {
return ""
}
var (
escape = func(in string) string {
return "'" + strings.NewReplacer(`\`, `\\`, `'`, `\'`).Replace(in) + "'"
}
makeFilter = func(expr string, args []string) string {
in := make([]string, 0, len(args))
for _, v := range args {
in = append(in, escape(v))
}
return fmt.Sprintf("cluster %s (%s)", expr, strings.Join(in, ", "))
}
includeFilter, excludeFilter string
)
if len(ch.ClusterInclude) != 0 {
includeFilter = makeFilter("IN", ch.ClusterInclude)
}
if len(ch.ClusterExclude) != 0 {
excludeFilter = makeFilter("NOT IN", ch.ClusterExclude)
}
if includeFilter != "" && excludeFilter != "" {
return "WHERE " + includeFilter + " OR " + excludeFilter
}
if includeFilter == "" && excludeFilter != "" {
return "WHERE " + excludeFilter
}
if includeFilter != "" && excludeFilter == "" {
return "WHERE " + includeFilter
}
return ""
}
func (ch *ClickHouse) commonMetrics(acc telegraf.Accumulator, conn *connect, metric string) error {
var result []struct {
Metric string `json:"metric"`
Value chUInt64 `json:"value"`
}
if err := ch.execQuery(conn.url, commonMetrics[metric], &result); err != nil {
return err
}
tags := map[string]string{
"source": conn.Hostname,
}
if len(conn.Cluster) != 0 {
tags["cluster"] = conn.Cluster
}
if conn.ShardNum != 0 {
tags["shard_num"] = strconv.Itoa(conn.ShardNum)
}
fields := make(map[string]interface{})
for _, r := range result {
fields[internal.SnakeCase(r.Metric)] = uint64(r.Value)
}
acc.AddFields("clickhouse_"+metric, fields, tags)
return nil
}
func (ch *ClickHouse) tables(acc telegraf.Accumulator, conn *connect) error {
var parts []struct {
Database string `json:"database"`
Table string `json:"table"`
Bytes chUInt64 `json:"bytes"`
Parts chUInt64 `json:"parts"`
Rows chUInt64 `json:"rows"`
}
if err := ch.execQuery(conn.url, systemParts, &parts); err != nil {
return err
}
tags := map[string]string{
"source": conn.Hostname,
}
if len(conn.Cluster) != 0 {
tags["cluster"] = conn.Cluster
}
if conn.ShardNum != 0 {
tags["shard_num"] = strconv.Itoa(conn.ShardNum)
}
for _, part := range parts {
tags["table"] = part.Table
tags["database"] = part.Database
acc.AddFields("clickhouse_tables",
map[string]interface{}{
"bytes": uint64(part.Bytes),
"parts": uint64(part.Parts),
"rows": uint64(part.Rows),
},
tags,
)
}
return nil
}
type clickhouseError struct {
StatusCode int
body []byte
}
func (e *clickhouseError) Error() string {
return fmt.Sprintf("received error code %d: %s", e.StatusCode, e.body)
}
func (ch *ClickHouse) execQuery(url *url.URL, query string, i interface{}) error {
q := url.Query()
q.Set("query", query+" FORMAT JSON")
url.RawQuery = q.Encode()
req, _ := http.NewRequest("GET", url.String(), nil)
if ch.User != "" {
req.Header.Add("X-ClickHouse-User", ch.User)
}
if ch.Password != "" {
req.Header.Add("X-ClickHouse-Key", ch.Password)
}
resp, err := ch.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
body, _ := ioutil.ReadAll(io.LimitReader(resp.Body, 200))
return &clickhouseError{
StatusCode: resp.StatusCode,
body: body,
}
}
var response struct {
Data json.RawMessage
}
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
return err
}
return json.Unmarshal(response.Data, i)
}
// see https://clickhouse.yandex/docs/en/operations/settings/settings/#session_settings-output_format_json_quote_64bit_integers
type chUInt64 uint64
func (i *chUInt64) UnmarshalJSON(b []byte) error {
b = bytes.TrimPrefix(b, []byte(`"`))
b = bytes.TrimSuffix(b, []byte(`"`))
v, err := strconv.ParseUint(string(b), 10, 64)
if err != nil {
return err
}
*i = chUInt64(v)
return nil
}
const (
systemEventsSQL = "SELECT event AS metric, CAST(value AS UInt64) AS value FROM system.events"
systemMetricsSQL = "SELECT metric, CAST(value AS UInt64) AS value FROM system.metrics"
systemAsyncMetricsSQL = "SELECT metric, CAST(value AS UInt64) AS value FROM system.asynchronous_metrics"
systemParts = `
SELECT
database,
table,
SUM(bytes) AS bytes,
COUNT(*) AS parts,
SUM(rows) AS rows
FROM system.parts
WHERE active = 1
GROUP BY
database, table
ORDER BY
database, table
`
)
var commonMetrics = map[string]string{
"events": systemEventsSQL,
"metrics": systemMetricsSQL,
"asynchronous_metrics": systemAsyncMetricsSQL,
}
var _ telegraf.ServiceInput = &ClickHouse{}