Improve startup error reporting when ran as Windows service (#4291)
This commit is contained in:
parent
8c6ddcd7fa
commit
7ec54d4be9
|
@ -150,8 +150,6 @@ func runAgent(ctx context.Context,
|
|||
inputFilters []string,
|
||||
outputFilters []string,
|
||||
) error {
|
||||
// Setup default logging. This may need to change after reading the config
|
||||
// file, but we can configure it to use our logger implementation now.
|
||||
log.Printf("I! Starting Telegraf %s", version)
|
||||
|
||||
// If no other options are specified, load the config file and run.
|
||||
|
@ -195,6 +193,7 @@ func runAgent(ctx context.Context,
|
|||
logConfig := logger.LogConfig{
|
||||
Debug: ag.Config.Agent.Debug || *fDebug,
|
||||
Quiet: ag.Config.Agent.Quiet || *fQuiet,
|
||||
LogTarget: ag.Config.Agent.LogTarget,
|
||||
Logfile: ag.Config.Agent.Logfile,
|
||||
RotationInterval: ag.Config.Agent.LogfileRotationInterval,
|
||||
RotationMaxSize: ag.Config.Agent.LogfileRotationMaxSize,
|
||||
|
@ -429,18 +428,28 @@ func main() {
|
|||
// may not have an interactive session, e.g. installing from Ansible.
|
||||
if *fService != "" {
|
||||
if *fConfig != "" {
|
||||
(*svcConfig).Arguments = []string{"--config", *fConfig}
|
||||
svcConfig.Arguments = []string{"--config", *fConfig}
|
||||
}
|
||||
if *fConfigDirectory != "" {
|
||||
(*svcConfig).Arguments = append((*svcConfig).Arguments, "--config-directory", *fConfigDirectory)
|
||||
svcConfig.Arguments = append(svcConfig.Arguments, "--config-directory", *fConfigDirectory)
|
||||
}
|
||||
//set servicename to service cmd line, to have a custom name after relaunch as a service
|
||||
svcConfig.Arguments = append(svcConfig.Arguments, "--service-name", *fServiceName)
|
||||
|
||||
err := service.Control(s, *fService)
|
||||
if err != nil {
|
||||
log.Fatal("E! " + err.Error())
|
||||
}
|
||||
os.Exit(0)
|
||||
} else {
|
||||
winlogger, err := s.Logger(nil)
|
||||
if err == nil {
|
||||
//When in service mode, register eventlog target andd setup default logging to eventlog
|
||||
logger.RegisterEventLogger(winlogger)
|
||||
logger.SetupLogging(logger.LogConfig{LogTarget: logger.LogTargetEventlog})
|
||||
}
|
||||
err = s.Run()
|
||||
|
||||
if err != nil {
|
||||
log.Println("E! " + err.Error())
|
||||
}
|
||||
|
|
|
@ -141,8 +141,12 @@ The agent table configures Telegraf and the defaults used across all plugins.
|
|||
- **quiet**:
|
||||
Log only error level messages.
|
||||
|
||||
- **logtarget**:
|
||||
Log target - `file`, `stderr` or `eventlog` (Windows only).
|
||||
The empty string means to log to stderr.
|
||||
|
||||
- **logfile**:
|
||||
Log file name, the empty string means to log to stderr.
|
||||
Log file name.
|
||||
|
||||
- **logfile_rotation_interval**:
|
||||
The logfile will be rotated after the time interval specified. When set to
|
||||
|
|
|
@ -56,8 +56,13 @@ You can install multiple telegraf instances with --service-name flag:
|
|||
> C:\"Program Files"\Telegraf\telegraf.exe --service uninstall --service-name telegraf-1
|
||||
```
|
||||
|
||||
Troubleshooting common error #1067
|
||||
## Troubleshooting
|
||||
|
||||
When Telegraf runs as a Windows service, Telegraf logs messages to Windows events log before configuration file with logging settings is loaded.
|
||||
Check event log for an error reported by `telegraf` service in case of Telegraf service reports failure on its start: Event Viewer->Windows Logs->Application
|
||||
|
||||
**Troubleshooting common error #1067**
|
||||
|
||||
When installing as service in Windows, always double check to specify full path of the config file, otherwise windows service will fail to start
|
||||
|
||||
--config C:\"Program Files"\Telegraf\telegraf.conf
|
||||
--config "C:\Program Files\Telegraf\telegraf.conf"
|
||||
|
|
|
@ -146,7 +146,10 @@ type AgentConfig struct {
|
|||
// Quiet is the option for running in quiet mode
|
||||
Quiet bool `toml:"quiet"`
|
||||
|
||||
// Log file name, the empty string means to log to stderr.
|
||||
// Log target - file, stderr, eventlog (Windows only). The empty string means to log to stderr.
|
||||
LogTarget string `toml:"logtarget"`
|
||||
|
||||
// Log file name .
|
||||
Logfile string `toml:"logfile"`
|
||||
|
||||
// The file will be rotated after the time interval specified. When set
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
package logger
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/influxdata/wlog"
|
||||
"github.com/kardianos/service"
|
||||
)
|
||||
|
||||
const (
|
||||
LogTargetEventlog = "eventlog"
|
||||
)
|
||||
|
||||
type eventLogger struct {
|
||||
logger service.Logger
|
||||
}
|
||||
|
||||
func (t *eventLogger) Write(b []byte) (n int, err error) {
|
||||
loc := prefixRegex.FindIndex(b)
|
||||
n = len(b)
|
||||
if loc == nil {
|
||||
err = t.logger.Info(b)
|
||||
} else if n > 2 { //skip empty log messages
|
||||
line := strings.Trim(string(b[loc[1]:]), " \t\r\n")
|
||||
switch rune(b[loc[0]]) {
|
||||
case 'I':
|
||||
err = t.logger.Info(line)
|
||||
case 'W':
|
||||
err = t.logger.Warning(line)
|
||||
case 'E':
|
||||
err = t.logger.Error(line)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type eventLoggerCreator struct {
|
||||
serviceLogger service.Logger
|
||||
}
|
||||
|
||||
func (e *eventLoggerCreator) CreateLogger(config LogConfig) (io.Writer, error) {
|
||||
return wlog.NewWriter(&eventLogger{logger: e.serviceLogger}), nil
|
||||
}
|
||||
|
||||
func RegisterEventLogger(serviceLogger service.Logger) {
|
||||
registerLogger(LogTargetEventlog, &eventLoggerCreator{serviceLogger: serviceLogger})
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
//+build windows
|
||||
|
||||
package logger
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"log"
|
||||
"os/exec"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kardianos/service"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type Levels int
|
||||
|
||||
const (
|
||||
Info Levels = iota + 1
|
||||
Warning
|
||||
Error
|
||||
)
|
||||
|
||||
type Event struct {
|
||||
Message string `xml:"EventData>Data"`
|
||||
Level Levels `xml:"System>EventID"`
|
||||
}
|
||||
|
||||
func getEventLog(t *testing.T, since time.Time) []Event {
|
||||
timeStr := since.UTC().Format(time.RFC3339)
|
||||
cmd := exec.Command("wevtutil", "qe", "Application", "/rd:true", "/q:Event[System[TimeCreated[@SystemTime >= '"+timeStr+"'] and Provider[@Name='Telegraf']]]")
|
||||
var out bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
err := cmd.Run()
|
||||
require.NoError(t, err)
|
||||
xmlStr := "<events>" + out.String() + "</events>"
|
||||
var events struct {
|
||||
Events []Event `xml:"Event"`
|
||||
}
|
||||
err = xml.Unmarshal([]byte(xmlStr), &events)
|
||||
require.NoError(t, err)
|
||||
return events.Events
|
||||
}
|
||||
|
||||
func TestEventLog(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
prepareLogger(t)
|
||||
|
||||
config := LogConfig{
|
||||
LogTarget: LogTargetEventlog,
|
||||
Logfile: "",
|
||||
}
|
||||
|
||||
SetupLogging(config)
|
||||
now := time.Now()
|
||||
log.Println("I! Info message")
|
||||
log.Println("W! Warn message")
|
||||
log.Println("E! Err message")
|
||||
events := getEventLog(t, now)
|
||||
assert.Len(t, events, 3)
|
||||
assert.Contains(t, events, Event{Message: "Info message", Level: Info})
|
||||
assert.Contains(t, events, Event{Message: "Warn message", Level: Warning})
|
||||
assert.Contains(t, events, Event{Message: "Err message", Level: Error})
|
||||
}
|
||||
|
||||
func TestRestrictedEventLog(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
prepareLogger(t)
|
||||
|
||||
config := LogConfig{
|
||||
LogTarget: LogTargetEventlog,
|
||||
Quiet: true,
|
||||
}
|
||||
|
||||
SetupLogging(config)
|
||||
//separate previous log messages by small delay
|
||||
time.Sleep(time.Second)
|
||||
now := time.Now()
|
||||
log.Println("I! Info message")
|
||||
log.Println("W! Warning message")
|
||||
log.Println("E! Error message")
|
||||
events := getEventLog(t, now)
|
||||
assert.Len(t, events, 1)
|
||||
assert.Contains(t, events, Event{Message: "Error message", Level: Error})
|
||||
}
|
||||
|
||||
func prepareLogger(t *testing.T) {
|
||||
svc, err := service.New(nil, &service.Config{Name: "Telegraf"})
|
||||
require.NoError(t, err)
|
||||
svcLogger, err := svc.SystemLogger(nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, svcLogger)
|
||||
registerLogger(LogTargetEventlog, &eventLoggerCreator{serviceLogger: svcLogger})
|
||||
}
|
108
logger/logger.go
108
logger/logger.go
|
@ -2,6 +2,7 @@ package logger
|
|||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
|
@ -15,13 +16,10 @@ import (
|
|||
|
||||
var prefixRegex = regexp.MustCompile("^[DIWE]!")
|
||||
|
||||
// newTelegrafWriter returns a logging-wrapped writer.
|
||||
func newTelegrafWriter(w io.Writer) io.Writer {
|
||||
return &telegrafLog{
|
||||
writer: wlog.NewWriter(w),
|
||||
internalWriter: w,
|
||||
}
|
||||
}
|
||||
const (
|
||||
LogTargetFile = "file"
|
||||
LogTargetStderr = "stderr"
|
||||
)
|
||||
|
||||
// LogConfig contains the log configuration settings
|
||||
type LogConfig struct {
|
||||
|
@ -29,6 +27,8 @@ type LogConfig struct {
|
|||
Debug bool
|
||||
//will set the log level to ERROR
|
||||
Quiet bool
|
||||
//stderr, stdout, file or eventlog (Windows only)
|
||||
LogTarget string
|
||||
// will direct the logging output to a file. Empty string is
|
||||
// interpreted as stderr. If there is an error opening the file the
|
||||
// logger will fallback to stderr
|
||||
|
@ -41,6 +41,19 @@ type LogConfig struct {
|
|||
RotationMaxArchives int
|
||||
}
|
||||
|
||||
type LoggerCreator interface {
|
||||
CreateLogger(config LogConfig) (io.Writer, error)
|
||||
}
|
||||
|
||||
var loggerRegistry map[string]LoggerCreator
|
||||
|
||||
func registerLogger(name string, loggerCreator LoggerCreator) {
|
||||
if loggerRegistry == nil {
|
||||
loggerRegistry = make(map[string]LoggerCreator)
|
||||
}
|
||||
loggerRegistry[name] = loggerCreator
|
||||
}
|
||||
|
||||
type telegrafLog struct {
|
||||
writer io.Writer
|
||||
internalWriter io.Writer
|
||||
|
@ -57,11 +70,25 @@ func (t *telegrafLog) Write(b []byte) (n int, err error) {
|
|||
}
|
||||
|
||||
func (t *telegrafLog) Close() error {
|
||||
var stdErrWriter io.Writer
|
||||
stdErrWriter = os.Stderr
|
||||
// avoid closing stderr
|
||||
if t.internalWriter != stdErrWriter {
|
||||
closer, isCloser := t.internalWriter.(io.Closer)
|
||||
if !isCloser {
|
||||
return errors.New("the underlying writer cannot be closed")
|
||||
}
|
||||
return closer.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// newTelegrafWriter returns a logging-wrapped writer.
|
||||
func newTelegrafWriter(w io.Writer) io.Writer {
|
||||
return &telegrafLog{
|
||||
writer: wlog.NewWriter(w),
|
||||
internalWriter: w,
|
||||
}
|
||||
}
|
||||
|
||||
// SetupLogging configures the logging output.
|
||||
|
@ -69,6 +96,39 @@ func SetupLogging(config LogConfig) {
|
|||
newLogWriter(config)
|
||||
}
|
||||
|
||||
type telegrafLogCreator struct {
|
||||
}
|
||||
|
||||
func (t *telegrafLogCreator) CreateLogger(config LogConfig) (io.Writer, error) {
|
||||
var writer, defaultWriter io.Writer
|
||||
defaultWriter = os.Stderr
|
||||
|
||||
switch config.LogTarget {
|
||||
case LogTargetFile:
|
||||
if config.Logfile != "" {
|
||||
var err error
|
||||
if writer, err = rotate.NewFileWriter(config.Logfile, config.RotationInterval.Duration, config.RotationMaxSize.Size, config.RotationMaxArchives); err != nil {
|
||||
log.Printf("E! Unable to open %s (%s), using stderr", config.Logfile, err)
|
||||
writer = defaultWriter
|
||||
}
|
||||
} else {
|
||||
log.Print("E! Empty logfile, using stderr")
|
||||
writer = defaultWriter
|
||||
}
|
||||
case LogTargetStderr, "":
|
||||
writer = defaultWriter
|
||||
default:
|
||||
log.Printf("E! Unsupported logtarget: %s, using stderr", config.LogTarget)
|
||||
writer = defaultWriter
|
||||
}
|
||||
|
||||
return newTelegrafWriter(writer), nil
|
||||
}
|
||||
|
||||
// Keep track what is actually set as a log output, because log package doesn't provide a getter.
|
||||
// It allows closing previous writer if re-set and have possibility to test what is actually set
|
||||
var actualLogger io.Writer
|
||||
|
||||
func newLogWriter(config LogConfig) io.Writer {
|
||||
log.SetFlags(0)
|
||||
if config.Debug {
|
||||
|
@ -77,19 +137,29 @@ func newLogWriter(config LogConfig) io.Writer {
|
|||
if config.Quiet {
|
||||
wlog.SetLevel(wlog.ERROR)
|
||||
}
|
||||
|
||||
var writer io.Writer
|
||||
if config.Logfile != "" {
|
||||
var err error
|
||||
if writer, err = rotate.NewFileWriter(config.Logfile, config.RotationInterval.Duration, config.RotationMaxSize.Size, config.RotationMaxArchives); err != nil {
|
||||
log.Printf("E! Unable to open %s (%s), using stderr", config.Logfile, err)
|
||||
writer = os.Stderr
|
||||
if !config.Debug && !config.Quiet {
|
||||
wlog.SetLevel(wlog.INFO)
|
||||
}
|
||||
} else {
|
||||
writer = os.Stderr
|
||||
var logWriter io.Writer
|
||||
if logCreator, ok := loggerRegistry[config.LogTarget]; ok {
|
||||
logWriter, _ = logCreator.CreateLogger(config)
|
||||
}
|
||||
if logWriter == nil {
|
||||
logWriter, _ = (&telegrafLogCreator{}).CreateLogger(config)
|
||||
}
|
||||
|
||||
telegrafLog := newTelegrafWriter(writer)
|
||||
log.SetOutput(telegrafLog)
|
||||
return telegrafLog
|
||||
if closer, isCloser := actualLogger.(io.Closer); isCloser {
|
||||
closer.Close()
|
||||
}
|
||||
log.SetOutput(logWriter)
|
||||
actualLogger = logWriter
|
||||
|
||||
return logWriter
|
||||
}
|
||||
|
||||
func init() {
|
||||
tlc := &telegrafLogCreator{}
|
||||
registerLogger("", tlc)
|
||||
registerLogger(LogTargetStderr, tlc)
|
||||
registerLogger(LogTargetFile, tlc)
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ func TestWriteToTruncatedFile(t *testing.T) {
|
|||
assert.NoError(t, err)
|
||||
assert.Equal(t, f[19:], []byte("Z I! TEST\n"))
|
||||
|
||||
tmpf, err := os.OpenFile(tmpfile.Name(), os.O_TRUNC, 0644)
|
||||
tmpf, err := os.OpenFile(tmpfile.Name(), os.O_RDWR|os.O_TRUNC, 0644)
|
||||
assert.NoError(t, err)
|
||||
assert.NoError(t, tmpf.Close())
|
||||
|
||||
|
@ -100,6 +100,7 @@ func TestWriteToFileInRotation(t *testing.T) {
|
|||
tempDir, err := ioutil.TempDir("", "LogRotation")
|
||||
require.NoError(t, err)
|
||||
config := createBasicLogConfig(filepath.Join(tempDir, "test.log"))
|
||||
config.LogTarget = LogTargetFile
|
||||
config.RotationMaxSize = internal.Size{Size: int64(30)}
|
||||
writer := newLogWriter(config)
|
||||
// Close the writer here, otherwise the temp folder cannot be deleted because the current log file is in use.
|
||||
|
@ -113,6 +114,26 @@ func TestWriteToFileInRotation(t *testing.T) {
|
|||
assert.Equal(t, 2, len(files))
|
||||
}
|
||||
|
||||
func TestLogTargetSettings(t *testing.T) {
|
||||
config := LogConfig{
|
||||
LogTarget: "",
|
||||
Quiet: true,
|
||||
}
|
||||
SetupLogging(config)
|
||||
logger, isTelegrafLogger := actualLogger.(*telegrafLog)
|
||||
assert.True(t, isTelegrafLogger)
|
||||
assert.Equal(t, logger.internalWriter, os.Stderr)
|
||||
|
||||
config = LogConfig{
|
||||
LogTarget: "stderr",
|
||||
Quiet: true,
|
||||
}
|
||||
SetupLogging(config)
|
||||
logger, isTelegrafLogger = actualLogger.(*telegrafLog)
|
||||
assert.True(t, isTelegrafLogger)
|
||||
assert.Equal(t, logger.internalWriter, os.Stderr)
|
||||
}
|
||||
|
||||
func BenchmarkTelegrafLogWrite(b *testing.B) {
|
||||
var msg = []byte("test")
|
||||
var buf bytes.Buffer
|
||||
|
@ -126,6 +147,7 @@ func BenchmarkTelegrafLogWrite(b *testing.B) {
|
|||
func createBasicLogConfig(filename string) LogConfig {
|
||||
return LogConfig{
|
||||
Logfile: filename,
|
||||
LogTarget: LogTargetFile,
|
||||
RotationMaxArchives: -1,
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue