Improve startup error reporting when ran as Windows service (#4291)

This commit is contained in:
Vlasta Hajek 2019-08-28 23:34:44 +02:00 committed by Daniel Nelson
parent 8c6ddcd7fa
commit 7ec54d4be9
8 changed files with 295 additions and 33 deletions

View File

@ -150,8 +150,6 @@ func runAgent(ctx context.Context,
inputFilters []string,
outputFilters []string,
) error {
// Setup default logging. This may need to change after reading the config
// file, but we can configure it to use our logger implementation now.
log.Printf("I! Starting Telegraf %s", version)
// If no other options are specified, load the config file and run.
@ -195,6 +193,7 @@ func runAgent(ctx context.Context,
logConfig := logger.LogConfig{
Debug: ag.Config.Agent.Debug || *fDebug,
Quiet: ag.Config.Agent.Quiet || *fQuiet,
LogTarget: ag.Config.Agent.LogTarget,
Logfile: ag.Config.Agent.Logfile,
RotationInterval: ag.Config.Agent.LogfileRotationInterval,
RotationMaxSize: ag.Config.Agent.LogfileRotationMaxSize,
@ -429,18 +428,28 @@ func main() {
// may not have an interactive session, e.g. installing from Ansible.
if *fService != "" {
if *fConfig != "" {
(*svcConfig).Arguments = []string{"--config", *fConfig}
svcConfig.Arguments = []string{"--config", *fConfig}
}
if *fConfigDirectory != "" {
(*svcConfig).Arguments = append((*svcConfig).Arguments, "--config-directory", *fConfigDirectory)
svcConfig.Arguments = append(svcConfig.Arguments, "--config-directory", *fConfigDirectory)
}
//set servicename to service cmd line, to have a custom name after relaunch as a service
svcConfig.Arguments = append(svcConfig.Arguments, "--service-name", *fServiceName)
err := service.Control(s, *fService)
if err != nil {
log.Fatal("E! " + err.Error())
}
os.Exit(0)
} else {
winlogger, err := s.Logger(nil)
if err == nil {
//When in service mode, register eventlog target andd setup default logging to eventlog
logger.RegisterEventLogger(winlogger)
logger.SetupLogging(logger.LogConfig{LogTarget: logger.LogTargetEventlog})
}
err = s.Run()
if err != nil {
log.Println("E! " + err.Error())
}

View File

@ -141,8 +141,12 @@ The agent table configures Telegraf and the defaults used across all plugins.
- **quiet**:
Log only error level messages.
- **logtarget**:
Log target - `file`, `stderr` or `eventlog` (Windows only).
The empty string means to log to stderr.
- **logfile**:
Log file name, the empty string means to log to stderr.
Log file name.
- **logfile_rotation_interval**:
The logfile will be rotated after the time interval specified. When set to

View File

@ -56,8 +56,13 @@ You can install multiple telegraf instances with --service-name flag:
> C:\"Program Files"\Telegraf\telegraf.exe --service uninstall --service-name telegraf-1
```
Troubleshooting common error #1067
## Troubleshooting
When Telegraf runs as a Windows service, Telegraf logs messages to Windows events log before configuration file with logging settings is loaded.
Check event log for an error reported by `telegraf` service in case of Telegraf service reports failure on its start: Event Viewer->Windows Logs->Application
**Troubleshooting common error #1067**
When installing as service in Windows, always double check to specify full path of the config file, otherwise windows service will fail to start
--config C:\"Program Files"\Telegraf\telegraf.conf
--config "C:\Program Files\Telegraf\telegraf.conf"

View File

@ -146,7 +146,10 @@ type AgentConfig struct {
// Quiet is the option for running in quiet mode
Quiet bool `toml:"quiet"`
// Log file name, the empty string means to log to stderr.
// Log target - file, stderr, eventlog (Windows only). The empty string means to log to stderr.
LogTarget string `toml:"logtarget"`
// Log file name .
Logfile string `toml:"logfile"`
// The file will be rotated after the time interval specified. When set

49
logger/event_logger.go Normal file
View File

@ -0,0 +1,49 @@
package logger
import (
"io"
"strings"
"github.com/influxdata/wlog"
"github.com/kardianos/service"
)
const (
LogTargetEventlog = "eventlog"
)
type eventLogger struct {
logger service.Logger
}
func (t *eventLogger) Write(b []byte) (n int, err error) {
loc := prefixRegex.FindIndex(b)
n = len(b)
if loc == nil {
err = t.logger.Info(b)
} else if n > 2 { //skip empty log messages
line := strings.Trim(string(b[loc[1]:]), " \t\r\n")
switch rune(b[loc[0]]) {
case 'I':
err = t.logger.Info(line)
case 'W':
err = t.logger.Warning(line)
case 'E':
err = t.logger.Error(line)
}
}
return
}
type eventLoggerCreator struct {
serviceLogger service.Logger
}
func (e *eventLoggerCreator) CreateLogger(config LogConfig) (io.Writer, error) {
return wlog.NewWriter(&eventLogger{logger: e.serviceLogger}), nil
}
func RegisterEventLogger(serviceLogger service.Logger) {
registerLogger(LogTargetEventlog, &eventLoggerCreator{serviceLogger: serviceLogger})
}

100
logger/event_logger_test.go Normal file
View File

@ -0,0 +1,100 @@
//+build windows
package logger
import (
"bytes"
"encoding/xml"
"log"
"os/exec"
"testing"
"time"
"github.com/kardianos/service"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type Levels int
const (
Info Levels = iota + 1
Warning
Error
)
type Event struct {
Message string `xml:"EventData>Data"`
Level Levels `xml:"System>EventID"`
}
func getEventLog(t *testing.T, since time.Time) []Event {
timeStr := since.UTC().Format(time.RFC3339)
cmd := exec.Command("wevtutil", "qe", "Application", "/rd:true", "/q:Event[System[TimeCreated[@SystemTime >= '"+timeStr+"'] and Provider[@Name='Telegraf']]]")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
require.NoError(t, err)
xmlStr := "<events>" + out.String() + "</events>"
var events struct {
Events []Event `xml:"Event"`
}
err = xml.Unmarshal([]byte(xmlStr), &events)
require.NoError(t, err)
return events.Events
}
func TestEventLog(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
prepareLogger(t)
config := LogConfig{
LogTarget: LogTargetEventlog,
Logfile: "",
}
SetupLogging(config)
now := time.Now()
log.Println("I! Info message")
log.Println("W! Warn message")
log.Println("E! Err message")
events := getEventLog(t, now)
assert.Len(t, events, 3)
assert.Contains(t, events, Event{Message: "Info message", Level: Info})
assert.Contains(t, events, Event{Message: "Warn message", Level: Warning})
assert.Contains(t, events, Event{Message: "Err message", Level: Error})
}
func TestRestrictedEventLog(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
prepareLogger(t)
config := LogConfig{
LogTarget: LogTargetEventlog,
Quiet: true,
}
SetupLogging(config)
//separate previous log messages by small delay
time.Sleep(time.Second)
now := time.Now()
log.Println("I! Info message")
log.Println("W! Warning message")
log.Println("E! Error message")
events := getEventLog(t, now)
assert.Len(t, events, 1)
assert.Contains(t, events, Event{Message: "Error message", Level: Error})
}
func prepareLogger(t *testing.T) {
svc, err := service.New(nil, &service.Config{Name: "Telegraf"})
require.NoError(t, err)
svcLogger, err := svc.SystemLogger(nil)
require.NoError(t, err)
require.NotNil(t, svcLogger)
registerLogger(LogTargetEventlog, &eventLoggerCreator{serviceLogger: svcLogger})
}

View File

@ -2,6 +2,7 @@ package logger
import (
"errors"
"io"
"log"
"os"
@ -15,13 +16,10 @@ import (
var prefixRegex = regexp.MustCompile("^[DIWE]!")
// newTelegrafWriter returns a logging-wrapped writer.
func newTelegrafWriter(w io.Writer) io.Writer {
return &telegrafLog{
writer: wlog.NewWriter(w),
internalWriter: w,
}
}
const (
LogTargetFile = "file"
LogTargetStderr = "stderr"
)
// LogConfig contains the log configuration settings
type LogConfig struct {
@ -29,6 +27,8 @@ type LogConfig struct {
Debug bool
//will set the log level to ERROR
Quiet bool
//stderr, stdout, file or eventlog (Windows only)
LogTarget string
// will direct the logging output to a file. Empty string is
// interpreted as stderr. If there is an error opening the file the
// logger will fallback to stderr
@ -41,6 +41,19 @@ type LogConfig struct {
RotationMaxArchives int
}
type LoggerCreator interface {
CreateLogger(config LogConfig) (io.Writer, error)
}
var loggerRegistry map[string]LoggerCreator
func registerLogger(name string, loggerCreator LoggerCreator) {
if loggerRegistry == nil {
loggerRegistry = make(map[string]LoggerCreator)
}
loggerRegistry[name] = loggerCreator
}
type telegrafLog struct {
writer io.Writer
internalWriter io.Writer
@ -57,11 +70,25 @@ func (t *telegrafLog) Write(b []byte) (n int, err error) {
}
func (t *telegrafLog) Close() error {
closer, isCloser := t.internalWriter.(io.Closer)
if !isCloser {
return errors.New("the underlying writer cannot be closed")
var stdErrWriter io.Writer
stdErrWriter = os.Stderr
// avoid closing stderr
if t.internalWriter != stdErrWriter {
closer, isCloser := t.internalWriter.(io.Closer)
if !isCloser {
return errors.New("the underlying writer cannot be closed")
}
return closer.Close()
}
return nil
}
// newTelegrafWriter returns a logging-wrapped writer.
func newTelegrafWriter(w io.Writer) io.Writer {
return &telegrafLog{
writer: wlog.NewWriter(w),
internalWriter: w,
}
return closer.Close()
}
// SetupLogging configures the logging output.
@ -69,6 +96,39 @@ func SetupLogging(config LogConfig) {
newLogWriter(config)
}
type telegrafLogCreator struct {
}
func (t *telegrafLogCreator) CreateLogger(config LogConfig) (io.Writer, error) {
var writer, defaultWriter io.Writer
defaultWriter = os.Stderr
switch config.LogTarget {
case LogTargetFile:
if config.Logfile != "" {
var err error
if writer, err = rotate.NewFileWriter(config.Logfile, config.RotationInterval.Duration, config.RotationMaxSize.Size, config.RotationMaxArchives); err != nil {
log.Printf("E! Unable to open %s (%s), using stderr", config.Logfile, err)
writer = defaultWriter
}
} else {
log.Print("E! Empty logfile, using stderr")
writer = defaultWriter
}
case LogTargetStderr, "":
writer = defaultWriter
default:
log.Printf("E! Unsupported logtarget: %s, using stderr", config.LogTarget)
writer = defaultWriter
}
return newTelegrafWriter(writer), nil
}
// Keep track what is actually set as a log output, because log package doesn't provide a getter.
// It allows closing previous writer if re-set and have possibility to test what is actually set
var actualLogger io.Writer
func newLogWriter(config LogConfig) io.Writer {
log.SetFlags(0)
if config.Debug {
@ -77,19 +137,29 @@ func newLogWriter(config LogConfig) io.Writer {
if config.Quiet {
wlog.SetLevel(wlog.ERROR)
}
var writer io.Writer
if config.Logfile != "" {
var err error
if writer, err = rotate.NewFileWriter(config.Logfile, config.RotationInterval.Duration, config.RotationMaxSize.Size, config.RotationMaxArchives); err != nil {
log.Printf("E! Unable to open %s (%s), using stderr", config.Logfile, err)
writer = os.Stderr
}
} else {
writer = os.Stderr
if !config.Debug && !config.Quiet {
wlog.SetLevel(wlog.INFO)
}
var logWriter io.Writer
if logCreator, ok := loggerRegistry[config.LogTarget]; ok {
logWriter, _ = logCreator.CreateLogger(config)
}
if logWriter == nil {
logWriter, _ = (&telegrafLogCreator{}).CreateLogger(config)
}
telegrafLog := newTelegrafWriter(writer)
log.SetOutput(telegrafLog)
return telegrafLog
if closer, isCloser := actualLogger.(io.Closer); isCloser {
closer.Close()
}
log.SetOutput(logWriter)
actualLogger = logWriter
return logWriter
}
func init() {
tlc := &telegrafLogCreator{}
registerLogger("", tlc)
registerLogger(LogTargetStderr, tlc)
registerLogger(LogTargetFile, tlc)
}

View File

@ -85,7 +85,7 @@ func TestWriteToTruncatedFile(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, f[19:], []byte("Z I! TEST\n"))
tmpf, err := os.OpenFile(tmpfile.Name(), os.O_TRUNC, 0644)
tmpf, err := os.OpenFile(tmpfile.Name(), os.O_RDWR|os.O_TRUNC, 0644)
assert.NoError(t, err)
assert.NoError(t, tmpf.Close())
@ -100,6 +100,7 @@ func TestWriteToFileInRotation(t *testing.T) {
tempDir, err := ioutil.TempDir("", "LogRotation")
require.NoError(t, err)
config := createBasicLogConfig(filepath.Join(tempDir, "test.log"))
config.LogTarget = LogTargetFile
config.RotationMaxSize = internal.Size{Size: int64(30)}
writer := newLogWriter(config)
// Close the writer here, otherwise the temp folder cannot be deleted because the current log file is in use.
@ -113,6 +114,26 @@ func TestWriteToFileInRotation(t *testing.T) {
assert.Equal(t, 2, len(files))
}
func TestLogTargetSettings(t *testing.T) {
config := LogConfig{
LogTarget: "",
Quiet: true,
}
SetupLogging(config)
logger, isTelegrafLogger := actualLogger.(*telegrafLog)
assert.True(t, isTelegrafLogger)
assert.Equal(t, logger.internalWriter, os.Stderr)
config = LogConfig{
LogTarget: "stderr",
Quiet: true,
}
SetupLogging(config)
logger, isTelegrafLogger = actualLogger.(*telegrafLog)
assert.True(t, isTelegrafLogger)
assert.Equal(t, logger.internalWriter, os.Stderr)
}
func BenchmarkTelegrafLogWrite(b *testing.B) {
var msg = []byte("test")
var buf bytes.Buffer
@ -126,6 +147,7 @@ func BenchmarkTelegrafLogWrite(b *testing.B) {
func createBasicLogConfig(filename string) LogConfig {
return LogConfig{
Logfile: filename,
LogTarget: LogTargetFile,
RotationMaxArchives: -1,
}
}