Improve startup error reporting when ran as Windows service (#4291)

This commit is contained in:
Vlasta Hajek 2019-08-28 23:34:44 +02:00 committed by Daniel Nelson
parent 8c6ddcd7fa
commit 7ec54d4be9
8 changed files with 295 additions and 33 deletions

View File

@ -150,8 +150,6 @@ func runAgent(ctx context.Context,
inputFilters []string, inputFilters []string,
outputFilters []string, outputFilters []string,
) error { ) error {
// Setup default logging. This may need to change after reading the config
// file, but we can configure it to use our logger implementation now.
log.Printf("I! Starting Telegraf %s", version) log.Printf("I! Starting Telegraf %s", version)
// If no other options are specified, load the config file and run. // If no other options are specified, load the config file and run.
@ -195,6 +193,7 @@ func runAgent(ctx context.Context,
logConfig := logger.LogConfig{ logConfig := logger.LogConfig{
Debug: ag.Config.Agent.Debug || *fDebug, Debug: ag.Config.Agent.Debug || *fDebug,
Quiet: ag.Config.Agent.Quiet || *fQuiet, Quiet: ag.Config.Agent.Quiet || *fQuiet,
LogTarget: ag.Config.Agent.LogTarget,
Logfile: ag.Config.Agent.Logfile, Logfile: ag.Config.Agent.Logfile,
RotationInterval: ag.Config.Agent.LogfileRotationInterval, RotationInterval: ag.Config.Agent.LogfileRotationInterval,
RotationMaxSize: ag.Config.Agent.LogfileRotationMaxSize, RotationMaxSize: ag.Config.Agent.LogfileRotationMaxSize,
@ -429,18 +428,28 @@ func main() {
// may not have an interactive session, e.g. installing from Ansible. // may not have an interactive session, e.g. installing from Ansible.
if *fService != "" { if *fService != "" {
if *fConfig != "" { if *fConfig != "" {
(*svcConfig).Arguments = []string{"--config", *fConfig} svcConfig.Arguments = []string{"--config", *fConfig}
} }
if *fConfigDirectory != "" { if *fConfigDirectory != "" {
(*svcConfig).Arguments = append((*svcConfig).Arguments, "--config-directory", *fConfigDirectory) svcConfig.Arguments = append(svcConfig.Arguments, "--config-directory", *fConfigDirectory)
} }
//set servicename to service cmd line, to have a custom name after relaunch as a service
svcConfig.Arguments = append(svcConfig.Arguments, "--service-name", *fServiceName)
err := service.Control(s, *fService) err := service.Control(s, *fService)
if err != nil { if err != nil {
log.Fatal("E! " + err.Error()) log.Fatal("E! " + err.Error())
} }
os.Exit(0) os.Exit(0)
} else { } else {
winlogger, err := s.Logger(nil)
if err == nil {
//When in service mode, register eventlog target andd setup default logging to eventlog
logger.RegisterEventLogger(winlogger)
logger.SetupLogging(logger.LogConfig{LogTarget: logger.LogTargetEventlog})
}
err = s.Run() err = s.Run()
if err != nil { if err != nil {
log.Println("E! " + err.Error()) log.Println("E! " + err.Error())
} }

View File

@ -141,8 +141,12 @@ The agent table configures Telegraf and the defaults used across all plugins.
- **quiet**: - **quiet**:
Log only error level messages. Log only error level messages.
- **logtarget**:
Log target - `file`, `stderr` or `eventlog` (Windows only).
The empty string means to log to stderr.
- **logfile**: - **logfile**:
Log file name, the empty string means to log to stderr. Log file name.
- **logfile_rotation_interval**: - **logfile_rotation_interval**:
The logfile will be rotated after the time interval specified. When set to The logfile will be rotated after the time interval specified. When set to

View File

@ -56,8 +56,13 @@ You can install multiple telegraf instances with --service-name flag:
> C:\"Program Files"\Telegraf\telegraf.exe --service uninstall --service-name telegraf-1 > C:\"Program Files"\Telegraf\telegraf.exe --service uninstall --service-name telegraf-1
``` ```
Troubleshooting common error #1067 ## Troubleshooting
When Telegraf runs as a Windows service, Telegraf logs messages to Windows events log before configuration file with logging settings is loaded.
Check event log for an error reported by `telegraf` service in case of Telegraf service reports failure on its start: Event Viewer->Windows Logs->Application
**Troubleshooting common error #1067**
When installing as service in Windows, always double check to specify full path of the config file, otherwise windows service will fail to start When installing as service in Windows, always double check to specify full path of the config file, otherwise windows service will fail to start
--config C:\"Program Files"\Telegraf\telegraf.conf --config "C:\Program Files\Telegraf\telegraf.conf"

View File

@ -146,7 +146,10 @@ type AgentConfig struct {
// Quiet is the option for running in quiet mode // Quiet is the option for running in quiet mode
Quiet bool `toml:"quiet"` Quiet bool `toml:"quiet"`
// Log file name, the empty string means to log to stderr. // Log target - file, stderr, eventlog (Windows only). The empty string means to log to stderr.
LogTarget string `toml:"logtarget"`
// Log file name .
Logfile string `toml:"logfile"` Logfile string `toml:"logfile"`
// The file will be rotated after the time interval specified. When set // The file will be rotated after the time interval specified. When set

49
logger/event_logger.go Normal file
View File

@ -0,0 +1,49 @@
package logger
import (
"io"
"strings"
"github.com/influxdata/wlog"
"github.com/kardianos/service"
)
const (
LogTargetEventlog = "eventlog"
)
type eventLogger struct {
logger service.Logger
}
func (t *eventLogger) Write(b []byte) (n int, err error) {
loc := prefixRegex.FindIndex(b)
n = len(b)
if loc == nil {
err = t.logger.Info(b)
} else if n > 2 { //skip empty log messages
line := strings.Trim(string(b[loc[1]:]), " \t\r\n")
switch rune(b[loc[0]]) {
case 'I':
err = t.logger.Info(line)
case 'W':
err = t.logger.Warning(line)
case 'E':
err = t.logger.Error(line)
}
}
return
}
type eventLoggerCreator struct {
serviceLogger service.Logger
}
func (e *eventLoggerCreator) CreateLogger(config LogConfig) (io.Writer, error) {
return wlog.NewWriter(&eventLogger{logger: e.serviceLogger}), nil
}
func RegisterEventLogger(serviceLogger service.Logger) {
registerLogger(LogTargetEventlog, &eventLoggerCreator{serviceLogger: serviceLogger})
}

100
logger/event_logger_test.go Normal file
View File

@ -0,0 +1,100 @@
//+build windows
package logger
import (
"bytes"
"encoding/xml"
"log"
"os/exec"
"testing"
"time"
"github.com/kardianos/service"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type Levels int
const (
Info Levels = iota + 1
Warning
Error
)
type Event struct {
Message string `xml:"EventData>Data"`
Level Levels `xml:"System>EventID"`
}
func getEventLog(t *testing.T, since time.Time) []Event {
timeStr := since.UTC().Format(time.RFC3339)
cmd := exec.Command("wevtutil", "qe", "Application", "/rd:true", "/q:Event[System[TimeCreated[@SystemTime >= '"+timeStr+"'] and Provider[@Name='Telegraf']]]")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
require.NoError(t, err)
xmlStr := "<events>" + out.String() + "</events>"
var events struct {
Events []Event `xml:"Event"`
}
err = xml.Unmarshal([]byte(xmlStr), &events)
require.NoError(t, err)
return events.Events
}
func TestEventLog(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
prepareLogger(t)
config := LogConfig{
LogTarget: LogTargetEventlog,
Logfile: "",
}
SetupLogging(config)
now := time.Now()
log.Println("I! Info message")
log.Println("W! Warn message")
log.Println("E! Err message")
events := getEventLog(t, now)
assert.Len(t, events, 3)
assert.Contains(t, events, Event{Message: "Info message", Level: Info})
assert.Contains(t, events, Event{Message: "Warn message", Level: Warning})
assert.Contains(t, events, Event{Message: "Err message", Level: Error})
}
func TestRestrictedEventLog(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
prepareLogger(t)
config := LogConfig{
LogTarget: LogTargetEventlog,
Quiet: true,
}
SetupLogging(config)
//separate previous log messages by small delay
time.Sleep(time.Second)
now := time.Now()
log.Println("I! Info message")
log.Println("W! Warning message")
log.Println("E! Error message")
events := getEventLog(t, now)
assert.Len(t, events, 1)
assert.Contains(t, events, Event{Message: "Error message", Level: Error})
}
func prepareLogger(t *testing.T) {
svc, err := service.New(nil, &service.Config{Name: "Telegraf"})
require.NoError(t, err)
svcLogger, err := svc.SystemLogger(nil)
require.NoError(t, err)
require.NotNil(t, svcLogger)
registerLogger(LogTargetEventlog, &eventLoggerCreator{serviceLogger: svcLogger})
}

View File

@ -2,6 +2,7 @@ package logger
import ( import (
"errors" "errors"
"io" "io"
"log" "log"
"os" "os"
@ -15,13 +16,10 @@ import (
var prefixRegex = regexp.MustCompile("^[DIWE]!") var prefixRegex = regexp.MustCompile("^[DIWE]!")
// newTelegrafWriter returns a logging-wrapped writer. const (
func newTelegrafWriter(w io.Writer) io.Writer { LogTargetFile = "file"
return &telegrafLog{ LogTargetStderr = "stderr"
writer: wlog.NewWriter(w), )
internalWriter: w,
}
}
// LogConfig contains the log configuration settings // LogConfig contains the log configuration settings
type LogConfig struct { type LogConfig struct {
@ -29,6 +27,8 @@ type LogConfig struct {
Debug bool Debug bool
//will set the log level to ERROR //will set the log level to ERROR
Quiet bool Quiet bool
//stderr, stdout, file or eventlog (Windows only)
LogTarget string
// will direct the logging output to a file. Empty string is // will direct the logging output to a file. Empty string is
// interpreted as stderr. If there is an error opening the file the // interpreted as stderr. If there is an error opening the file the
// logger will fallback to stderr // logger will fallback to stderr
@ -41,6 +41,19 @@ type LogConfig struct {
RotationMaxArchives int RotationMaxArchives int
} }
type LoggerCreator interface {
CreateLogger(config LogConfig) (io.Writer, error)
}
var loggerRegistry map[string]LoggerCreator
func registerLogger(name string, loggerCreator LoggerCreator) {
if loggerRegistry == nil {
loggerRegistry = make(map[string]LoggerCreator)
}
loggerRegistry[name] = loggerCreator
}
type telegrafLog struct { type telegrafLog struct {
writer io.Writer writer io.Writer
internalWriter io.Writer internalWriter io.Writer
@ -57,11 +70,25 @@ func (t *telegrafLog) Write(b []byte) (n int, err error) {
} }
func (t *telegrafLog) Close() error { func (t *telegrafLog) Close() error {
closer, isCloser := t.internalWriter.(io.Closer) var stdErrWriter io.Writer
if !isCloser { stdErrWriter = os.Stderr
return errors.New("the underlying writer cannot be closed") // avoid closing stderr
if t.internalWriter != stdErrWriter {
closer, isCloser := t.internalWriter.(io.Closer)
if !isCloser {
return errors.New("the underlying writer cannot be closed")
}
return closer.Close()
}
return nil
}
// newTelegrafWriter returns a logging-wrapped writer.
func newTelegrafWriter(w io.Writer) io.Writer {
return &telegrafLog{
writer: wlog.NewWriter(w),
internalWriter: w,
} }
return closer.Close()
} }
// SetupLogging configures the logging output. // SetupLogging configures the logging output.
@ -69,6 +96,39 @@ func SetupLogging(config LogConfig) {
newLogWriter(config) newLogWriter(config)
} }
type telegrafLogCreator struct {
}
func (t *telegrafLogCreator) CreateLogger(config LogConfig) (io.Writer, error) {
var writer, defaultWriter io.Writer
defaultWriter = os.Stderr
switch config.LogTarget {
case LogTargetFile:
if config.Logfile != "" {
var err error
if writer, err = rotate.NewFileWriter(config.Logfile, config.RotationInterval.Duration, config.RotationMaxSize.Size, config.RotationMaxArchives); err != nil {
log.Printf("E! Unable to open %s (%s), using stderr", config.Logfile, err)
writer = defaultWriter
}
} else {
log.Print("E! Empty logfile, using stderr")
writer = defaultWriter
}
case LogTargetStderr, "":
writer = defaultWriter
default:
log.Printf("E! Unsupported logtarget: %s, using stderr", config.LogTarget)
writer = defaultWriter
}
return newTelegrafWriter(writer), nil
}
// Keep track what is actually set as a log output, because log package doesn't provide a getter.
// It allows closing previous writer if re-set and have possibility to test what is actually set
var actualLogger io.Writer
func newLogWriter(config LogConfig) io.Writer { func newLogWriter(config LogConfig) io.Writer {
log.SetFlags(0) log.SetFlags(0)
if config.Debug { if config.Debug {
@ -77,19 +137,29 @@ func newLogWriter(config LogConfig) io.Writer {
if config.Quiet { if config.Quiet {
wlog.SetLevel(wlog.ERROR) wlog.SetLevel(wlog.ERROR)
} }
if !config.Debug && !config.Quiet {
var writer io.Writer wlog.SetLevel(wlog.INFO)
if config.Logfile != "" { }
var err error var logWriter io.Writer
if writer, err = rotate.NewFileWriter(config.Logfile, config.RotationInterval.Duration, config.RotationMaxSize.Size, config.RotationMaxArchives); err != nil { if logCreator, ok := loggerRegistry[config.LogTarget]; ok {
log.Printf("E! Unable to open %s (%s), using stderr", config.Logfile, err) logWriter, _ = logCreator.CreateLogger(config)
writer = os.Stderr }
} if logWriter == nil {
} else { logWriter, _ = (&telegrafLogCreator{}).CreateLogger(config)
writer = os.Stderr
} }
telegrafLog := newTelegrafWriter(writer) if closer, isCloser := actualLogger.(io.Closer); isCloser {
log.SetOutput(telegrafLog) closer.Close()
return telegrafLog }
log.SetOutput(logWriter)
actualLogger = logWriter
return logWriter
}
func init() {
tlc := &telegrafLogCreator{}
registerLogger("", tlc)
registerLogger(LogTargetStderr, tlc)
registerLogger(LogTargetFile, tlc)
} }

View File

@ -85,7 +85,7 @@ func TestWriteToTruncatedFile(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, f[19:], []byte("Z I! TEST\n")) assert.Equal(t, f[19:], []byte("Z I! TEST\n"))
tmpf, err := os.OpenFile(tmpfile.Name(), os.O_TRUNC, 0644) tmpf, err := os.OpenFile(tmpfile.Name(), os.O_RDWR|os.O_TRUNC, 0644)
assert.NoError(t, err) assert.NoError(t, err)
assert.NoError(t, tmpf.Close()) assert.NoError(t, tmpf.Close())
@ -100,6 +100,7 @@ func TestWriteToFileInRotation(t *testing.T) {
tempDir, err := ioutil.TempDir("", "LogRotation") tempDir, err := ioutil.TempDir("", "LogRotation")
require.NoError(t, err) require.NoError(t, err)
config := createBasicLogConfig(filepath.Join(tempDir, "test.log")) config := createBasicLogConfig(filepath.Join(tempDir, "test.log"))
config.LogTarget = LogTargetFile
config.RotationMaxSize = internal.Size{Size: int64(30)} config.RotationMaxSize = internal.Size{Size: int64(30)}
writer := newLogWriter(config) writer := newLogWriter(config)
// Close the writer here, otherwise the temp folder cannot be deleted because the current log file is in use. // Close the writer here, otherwise the temp folder cannot be deleted because the current log file is in use.
@ -113,6 +114,26 @@ func TestWriteToFileInRotation(t *testing.T) {
assert.Equal(t, 2, len(files)) assert.Equal(t, 2, len(files))
} }
func TestLogTargetSettings(t *testing.T) {
config := LogConfig{
LogTarget: "",
Quiet: true,
}
SetupLogging(config)
logger, isTelegrafLogger := actualLogger.(*telegrafLog)
assert.True(t, isTelegrafLogger)
assert.Equal(t, logger.internalWriter, os.Stderr)
config = LogConfig{
LogTarget: "stderr",
Quiet: true,
}
SetupLogging(config)
logger, isTelegrafLogger = actualLogger.(*telegrafLog)
assert.True(t, isTelegrafLogger)
assert.Equal(t, logger.internalWriter, os.Stderr)
}
func BenchmarkTelegrafLogWrite(b *testing.B) { func BenchmarkTelegrafLogWrite(b *testing.B) {
var msg = []byte("test") var msg = []byte("test")
var buf bytes.Buffer var buf bytes.Buffer
@ -126,6 +147,7 @@ func BenchmarkTelegrafLogWrite(b *testing.B) {
func createBasicLogConfig(filename string) LogConfig { func createBasicLogConfig(filename string) LogConfig {
return LogConfig{ return LogConfig{
Logfile: filename, Logfile: filename,
LogTarget: LogTargetFile,
RotationMaxArchives: -1, RotationMaxArchives: -1,
} }
} }