Major Logging Overhaul

in this commit: - centralize logging output handler. - set global Info/Debug/Error log levels based on config file or flags. - remove per-plugin debug arg handling. - add a I!, D!, or E! to every log message. - add configuration option to specify where to send logs. closes #1786
2016-09-30 22:37:56 +01:00
parent 78ced6bc30
commit c7834209d2
52 changed files with 363 additions and 269 deletions
--- a/etc/telegraf.conf
+++ b/etc/telegraf.conf
@@ -30,12 +30,15 @@
  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
  round_interval = true

-  ## Telegraf will send metrics to outputs in batches of at
-  ## most metric_batch_size metrics.
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
  metric_batch_size = 1000
+
  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
  ## output, and will flush this buffer on a successful write. Oldest metrics
  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
  metric_buffer_limit = 10000

  ## Collection jitter is used to jitter the collection by a random amount.
@@ -57,10 +60,15 @@
  ## Precision will NOT be used for service inputs, such as logparser and statsd.
  ## Valid values are "ns", "us" (or "µs"), "ms", "s".
  precision = ""
-  ## Run telegraf in debug mode
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
  debug = false
-  ## Run telegraf in quiet mode
+  ## Run telegraf in quiet mode (error log messages only).
  quiet = false
+  ## Specify the log file name. The empty string means to log to stdout.
+  logfile = ""
+
  ## Override default hostname, if empty use os.Hostname()
  hostname = ""
  ## If set to true, do no set the "host" tag in the telegraf agent.
@@ -1064,8 +1072,6 @@
 #   #   "tasks",
 #   #   "messages",
 #   # ]
-#   ## Include mesos tasks statistics, default is false
-#   # slave_tasks = true


 # # Read metrics from one or many MongoDB servers
@@ -1442,25 +1448,29 @@
 # # Retrieves SNMP values from remote agents
 # [[inputs.snmp]]
 #   agents = [ "127.0.0.1:161" ]
+#   ## Timeout for each SNMP query.
 #   timeout = "5s"
+#   ## Number of retries to attempt within timeout.
+#   retries = 3
+#   ## SNMP version, values can be 1, 2, or 3
 #   version = 2
 #
-#   # SNMPv1 & SNMPv2 parameters
+#   ## SNMP community string.
 #   community = "public"
 #
-#   # SNMPv2 & SNMPv3 parameters
-#   max_repetitions = 50
+#   ## The GETBULK max-repetitions parameter
+#   max_repetitions = 10
 #
-#   # SNMPv3 parameters
+#   ## SNMPv3 auth parameters
 #   #sec_name = "myuser"
-#   #auth_protocol = "md5"         # Values: "MD5", "SHA", ""
-#   #auth_password = "password123"
-#   #sec_level = "authNoPriv"      # Values: "noAuthNoPriv", "authNoPriv", "authPriv"
+#   #auth_protocol = "md5"      # Values: "MD5", "SHA", ""
+#   #auth_password = "pass"
+#   #sec_level = "authNoPriv"   # Values: "noAuthNoPriv", "authNoPriv", "authPriv"
 #   #context_name = ""
-#   #priv_protocol = ""            # Values: "DES", "AES", ""
+#   #priv_protocol = ""         # Values: "DES", "AES", ""
 #   #priv_password = ""
 #
-#   # measurement name
+#   ## measurement name
 #   name = "system"
 #   [[inputs.snmp.field]]
 #     name = "hostname"
@@ -1475,7 +1485,7 @@
 #     oid = "HOST-RESOURCES-MIB::hrMemorySize"
 #
 #   [[inputs.snmp.table]]
-#     # measurement name
+#     ## measurement name
 #     name = "remote_servers"
 #     inherit_tags = [ "hostname" ]
 #     [[inputs.snmp.table.field]]
@@ -1490,7 +1500,7 @@
 #       oid = ".1.0.0.0.1.2"
 #
 #   [[inputs.snmp.table]]
-#     # auto populate table's fields using the MIB
+#     ## auto populate table's fields using the MIB
 #     oid = "HOST-RESOURCES-MIB::hrNetworkTable"


--- a/etc/telegraf_windows.conf
+++ b/etc/telegraf_windows.conf
@@ -42,10 +42,14 @@
  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
  flush_jitter = "0s"

+  ## Logging configuration:
  ## Run telegraf in debug mode
  debug = false
  ## Run telegraf in quiet mode
  quiet = false
+  ## Specify the log file name. The empty string means to log to stdout.
+  logfile = "/Program Files/Telegraf/telegraf.log"
+
  ## Override default hostname, if empty use os.Hostname()
  hostname = ""

@@ -85,7 +89,7 @@
 # Windows Performance Counters plugin.
 # These are the recommended method of monitoring system metrics on windows,
 # as the regular system plugins (inputs.cpu, inputs.mem, etc.) rely on WMI,
-# which utilizes a lot of system resources.
+# which utilize more system resources.
 #
 # See more configuration examples at:
 #   https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters
@@ -95,70 +99,104 @@
    # Processor usage, alternative to native, reports on a per core.
    ObjectName = "Processor"
    Instances = ["*"]
-    Counters = ["% Idle Time", "% Interrupt Time", "% Privileged Time", "% User Time", "% Processor Time"]
+    Counters = [
+      "% Idle Time",
+      "% Interrupt Time",
+      "% Privileged Time",
+      "% User Time",
+      "% Processor Time",
+    ]
    Measurement = "win_cpu"
-    #IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
+    # Set to true to include _Total instance when querying for all (*).
+    #IncludeTotal=false

  [[inputs.win_perf_counters.object]]
    # Disk times and queues
    ObjectName = "LogicalDisk"
    Instances = ["*"]
-    Counters = ["% Idle Time", "% Disk Time","% Disk Read Time", "% Disk Write Time", "% User Time", "Current Disk Queue Length"]
+    Counters = [
+      "% Idle Time",
+      "% Disk Time","% Disk Read Time",
+      "% Disk Write Time",
+      "% User Time",
+      "Current Disk Queue Length",
+    ]
    Measurement = "win_disk"
-    #IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
+    # Set to true to include _Total instance when querying for all (*).
+    #IncludeTotal=false

  [[inputs.win_perf_counters.object]]
    ObjectName = "System"
-    Counters = ["Context Switches/sec","System Calls/sec"]
+    Counters = [
+      "Context Switches/sec",
+      "System Calls/sec",
+    ]
    Instances = ["------"]
    Measurement = "win_system"
-    #IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
+    # Set to true to include _Total instance when querying for all (*).
+    #IncludeTotal=false

  [[inputs.win_perf_counters.object]]
-    # Example query where the Instance portion must be removed to get data back, such as from the Memory object.
+    # Example query where the Instance portion must be removed to get data back,
+    # such as from the Memory object.
    ObjectName = "Memory"
-    Counters = ["Available Bytes","Cache Faults/sec","Demand Zero Faults/sec","Page Faults/sec","Pages/sec","Transition Faults/sec","Pool Nonpaged Bytes","Pool Paged Bytes"]
-    Instances = ["------"] # Use 6 x - to remove the Instance bit from the query.
+    Counters = [
+      "Available Bytes",
+      "Cache Faults/sec",
+      "Demand Zero Faults/sec",
+      "Page Faults/sec",
+      "Pages/sec",
+      "Transition Faults/sec",
+      "Pool Nonpaged Bytes",
+      "Pool Paged Bytes",
+    ]
+    # Use 6 x - to remove the Instance bit from the query.
+    Instances = ["------"]
    Measurement = "win_mem"
-    #IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
+    # Set to true to include _Total instance when querying for all (*).
+    #IncludeTotal=false


 # Windows system plugins using WMI (disabled by default, using
 # win_perf_counters over WMI is recommended)

-# Read metrics about cpu usage
-#[[inputs.cpu]]
-  ## Whether to report per-cpu stats or not
-  #percpu = true
-  ## Whether to report total system cpu stats or not
-  #totalcpu = true
-  ## Comment this line if you want the raw CPU time metrics
-  #fielddrop = ["time_*"]
+# # Read metrics about cpu usage
+# [[inputs.cpu]]
+#   ## Whether to report per-cpu stats or not
+#   percpu = true
+#   ## Whether to report total system cpu stats or not
+#   totalcpu = true
+#   ## Comment this line if you want the raw CPU time metrics
+#   fielddrop = ["time_*"]

-# Read metrics about disk usage by mount point
-#[[inputs.disk]]
-  ## By default, telegraf gather stats for all mountpoints.
-  ## Setting mountpoints will restrict the stats to the specified mountpoints.
-  ## mount_points=["/"]

-  ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
-  ## present on /run, /var/run, /dev/shm or /dev).
-  #ignore_fs = ["tmpfs", "devtmpfs"]
+# # Read metrics about disk usage by mount point
+# [[inputs.disk]]
+#   ## By default, telegraf gather stats for all mountpoints.
+#   ## Setting mountpoints will restrict the stats to the specified mountpoints.
+#   ## mount_points=["/"]
+#
+#   ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
+#   ## present on /run, /var/run, /dev/shm or /dev).
+#   # ignore_fs = ["tmpfs", "devtmpfs"]

-# Read metrics about disk IO by device
-#[[inputs.diskio]]
-  ## By default, telegraf will gather stats for all devices including
-  ## disk partitions.
-  ## Setting devices will restrict the stats to the specified devices.
-  ## devices = ["sda", "sdb"]
-  ## Uncomment the following line if you do not need disk serial numbers.
-  ## skip_serial_number = true

-# Read metrics about memory usage
-#[[inputs.mem]]
-  # no configuration
+# # Read metrics about disk IO by device
+# [[inputs.diskio]]
+#   ## By default, telegraf will gather stats for all devices including
+#   ## disk partitions.
+#   ## Setting devices will restrict the stats to the specified devices.
+#   ## devices = ["sda", "sdb"]
+#   ## Uncomment the following line if you do not need disk serial numbers.
+#   ## skip_serial_number = true

-# Read metrics about swap memory usage
-#[[inputs.swap]]
-  # no configuration
+
+# # Read metrics about memory usage
+# [[inputs.mem]]
+#   # no configuration
+
+
+# # Read metrics about swap memory usage
+# [[inputs.swap]]
+#   # no configuration