From bcf7516a23fc45987fe5460c6563fde674faad75 Mon Sep 17 00:00:00 2001 From: javicrespo Date: Fri, 3 May 2019 19:25:28 +0200 Subject: [PATCH] Add in process log rotation (#5778) --- cmd/telegraf/telegraf.go | 17 ++- docs/CONFIGURATION.md | 8 ++ internal/config/config.go | 20 +++ internal/rotate/file_writer.go | 183 ++++++++++++++++++++++++++++ internal/rotate/file_writer_test.go | 115 +++++++++++++++++ logger/logger.go | 66 +++++++--- logger/logger_test.go | 51 ++++++-- 7 files changed, 428 insertions(+), 32 deletions(-) create mode 100644 internal/rotate/file_writer.go create mode 100644 internal/rotate/file_writer_test.go diff --git a/cmd/telegraf/telegraf.go b/cmd/telegraf/telegraf.go index 55ff58473d793..dcc8f29facc6c 100644 --- a/cmd/telegraf/telegraf.go +++ b/cmd/telegraf/telegraf.go @@ -115,7 +115,7 @@ func runAgent(ctx context.Context, ) error { // Setup default logging. This may need to change after reading the config // file, but we can configure it to use our logger implementation now. - logger.SetupLogging(false, false, "") + logger.SetupLogging(logger.LogConfig{}) log.Printf("I! Starting Telegraf %s", version) // If no other options are specified, load the config file and run. @@ -156,11 +156,16 @@ func runAgent(ctx context.Context, } // Setup logging as configured. - logger.SetupLogging( - ag.Config.Agent.Debug || *fDebug, - ag.Config.Agent.Quiet || *fQuiet, - ag.Config.Agent.Logfile, - ) + logConfig := logger.LogConfig{ + Debug: ag.Config.Agent.Debug || *fDebug, + Quiet: ag.Config.Agent.Quiet || *fQuiet, + Logfile: ag.Config.Agent.Logfile, + RotationInterval: ag.Config.Agent.LogfileRotationInterval, + RotationMaxSize: ag.Config.Agent.LogfileRotationMaxSize, + RotationMaxArchives: ag.Config.Agent.LogfileRotationMaxArchives, + } + + logger.SetupLogging(logConfig) if *fTest { return ag.Test(ctx) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index edb3341456fe2..0baf2e033c054 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -141,6 +141,14 @@ The agent table configures Telegraf and the defaults used across all plugins. Run telegraf in quiet mode (error log messages only). - **logfile**: Specify the log file name. The empty string means to log to stderr. +- **logfile_rotation_interval**: + Log file rotation time [interval][], e.g. "1d" means logs will rotated every day. Default is 0 => no rotation based on time. +- **logfile_rotation_max_size**: + The log file max [size][]. Log files will be rotated when they exceed this size. Default is 0 => no rotation based on file size. +- **logfile_rotation_max_archives**: + Maximum number of archives (rotated) files to keep. Older log files are deleted first. + This setting is only applicable if `logfile_rotation_interval` and/or `logfile_rotation_max_size` settings have been specified (otherwise there is no rotation) + Default is 0 => all rotated files are deleted. Use -1 to keep all archives. - **hostname**: Override default hostname, if empty use os.Hostname() diff --git a/internal/config/config.go b/internal/config/config.go index 263481ebdff4c..fd73657df81fd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -145,6 +145,15 @@ type AgentConfig struct { // Logfile specifies the file to send logs to Logfile string + // The log file rotation interval + LogfileRotationInterval internal.Duration + + // The log file max size. Logs will rotated when they exceed this size. + LogfileRotationMaxSize internal.Size + + // The max number of log archives to keep + LogfileRotationMaxArchives int + // Quiet is the option for running in quiet mode Quiet bool Hostname string @@ -273,6 +282,17 @@ var agentConfig = ` quiet = false ## Specify the log file name. The empty string means to log to stderr. logfile = "" + ## Rotation settings, only applicable when log file name is specified. + ## Log file rotation time interval, e.g. "1d" means logs will rotated every day. Default is 0 => no rotation based on time. + # logfile_rotation_interval = "1d" + ## The log file max size. Log files will be rotated when they exceed this size. Default is 0 => no rotation based on file size. + # logfile_rotation_max_size = "10 MB" + ## Maximum number of archives (rotated) files to keep. Older log files are deleted first. + ## This setting is only applicable if logfile_rotation_interval and/or logfile_rotation_max_size settings have been specified (otherwise there is no rotation) + ## Default is 0 => all rotated files are deleted. + ## Use -1 to keep all archives. + ## Analogous to logrotate "rotate" setting http://man7.org/linux/man-pages/man8/logrotate.8.html + # logfile_rotation_max_archives = 0 ## Override default hostname, if empty use os.Hostname() hostname = "" diff --git a/internal/rotate/file_writer.go b/internal/rotate/file_writer.go new file mode 100644 index 0000000000000..fe8c2fd71c22b --- /dev/null +++ b/internal/rotate/file_writer.go @@ -0,0 +1,183 @@ +package rotate + +// Rotating things +import ( + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "sync" + "time" +) + +// FilePerm defines the permissions that Writer will use for all +// the files it creates. +const ( + FilePerm = os.FileMode(0644) + DateFormat = "2006-01-02" +) + +// FileWriter implements the io.Writer interface and writes to the +// filename specified. +// Will rotate at the specified interval and/or when the current file size exceeds maxSizeInBytes +// At rotation time, current file is renamed and a new file is created. +// If the number of archives exceeds maxArchives, older files are deleted. +type FileWriter struct { + filename string + filenameRotationTemplate string + current *os.File + interval time.Duration + maxSizeInBytes int64 + maxArchives int + expireTime time.Time + bytesWritten int64 + sync.Mutex +} + +// NewFileWriter creates a new file writer. +func NewFileWriter(filename string, interval time.Duration, maxSizeInBytes int64, maxArchives int) (io.WriteCloser, error) { + if interval == 0 && maxSizeInBytes <= 0 { + // No rotation needed so a basic io.Writer will do the trick + return openFile(filename) + } + + w := &FileWriter{ + filename: filename, + interval: interval, + maxSizeInBytes: maxSizeInBytes, + maxArchives: maxArchives, + filenameRotationTemplate: getFilenameRotationTemplate(filename), + } + + if err := w.openCurrent(); err != nil { + return nil, err + } + + return w, nil +} + +func openFile(filename string) (*os.File, error) { + return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_APPEND, FilePerm) +} + +func getFilenameRotationTemplate(filename string) string { + // Extract the file extension + fileExt := filepath.Ext(filename) + // Remove the file extension from the filename (if any) + stem := strings.TrimSuffix(filename, fileExt) + return stem + ".%s-%s" + fileExt +} + +// Write writes p to the current file, then checks to see if +// rotation is necessary. +func (w *FileWriter) Write(p []byte) (n int, err error) { + w.Lock() + defer w.Unlock() + if n, err = w.current.Write(p); err != nil { + return 0, err + } + w.bytesWritten += int64(n) + + if err = w.rotateIfNeeded(); err != nil { + return 0, err + } + + return n, nil +} + +// Close closes the current file. Writer is unusable after this +// is called. +func (w *FileWriter) Close() (err error) { + w.Lock() + defer w.Unlock() + + // Rotate before closing + if err = w.rotate(); err != nil { + return err + } + + if err = w.current.Close(); err != nil { + return err + } + w.current = nil + return nil +} + +func (w *FileWriter) openCurrent() (err error) { + // In case ModTime() fails, we use time.Now() + w.expireTime = time.Now().Add(w.interval) + w.bytesWritten = 0 + w.current, err = openFile(w.filename) + + if err != nil { + return err + } + + // Goal here is to rotate old pre-existing files. + // For that we use fileInfo.ModTime, instead of time.Now(). + // Example: telegraf is restarted every 23 hours and + // the rotation interval is set to 24 hours. + // With time.now() as a reference we'd never rotate the file. + if fileInfo, err := w.current.Stat(); err == nil { + w.expireTime = fileInfo.ModTime().Add(w.interval) + } + return nil +} + +func (w *FileWriter) rotateIfNeeded() error { + if (w.interval > 0 && time.Now().After(w.expireTime)) || + (w.maxSizeInBytes > 0 && w.bytesWritten >= w.maxSizeInBytes) { + if err := w.rotate(); err != nil { + //Ignore rotation errors and keep the log open + fmt.Printf("unable to rotate the file '%s', %s", w.filename, err.Error()) + } + return w.openCurrent() + } + return nil +} + +func (w *FileWriter) rotate() (err error) { + if err = w.current.Close(); err != nil { + return err + } + + // Use year-month-date for readability, unix time to make the file name unique with second precision + now := time.Now() + rotatedFilename := fmt.Sprintf(w.filenameRotationTemplate, now.Format(DateFormat), strconv.FormatInt(now.Unix(), 10)) + if err = os.Rename(w.filename, rotatedFilename); err != nil { + return err + } + + if err = w.purgeArchivesIfNeeded(); err != nil { + return err + } + + return nil +} + +func (w *FileWriter) purgeArchivesIfNeeded() (err error) { + if w.maxArchives == -1 { + //Skip archiving + return nil + } + + var matches []string + if matches, err = filepath.Glob(fmt.Sprintf(w.filenameRotationTemplate, "*", "*")); err != nil { + return err + } + + //if there are more archives than the configured maximum, then purge older files + if len(matches) > w.maxArchives { + //sort files alphanumerically to delete older files first + sort.Strings(matches) + for _, filename := range matches[:len(matches)-w.maxArchives] { + if err = os.Remove(filename); err != nil { + return err + } + } + } + return nil +} diff --git a/internal/rotate/file_writer_test.go b/internal/rotate/file_writer_test.go new file mode 100644 index 0000000000000..88ba94b9d8132 --- /dev/null +++ b/internal/rotate/file_writer_test.go @@ -0,0 +1,115 @@ +package rotate + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFileWriter_NoRotation(t *testing.T) { + tempDir, err := ioutil.TempDir("", "RotationNo") + require.NoError(t, err) + writer, err := NewFileWriter(filepath.Join(tempDir, "test"), 0, 0, 0) + require.NoError(t, err) + defer func() { writer.Close(); os.RemoveAll(tempDir) }() + + _, err = writer.Write([]byte("Hello World")) + require.NoError(t, err) + _, err = writer.Write([]byte("Hello World 2")) + require.NoError(t, err) + files, _ := ioutil.ReadDir(tempDir) + assert.Equal(t, 1, len(files)) +} + +func TestFileWriter_TimeRotation(t *testing.T) { + tempDir, err := ioutil.TempDir("", "RotationTime") + require.NoError(t, err) + interval, _ := time.ParseDuration("1s") + writer, err := NewFileWriter(filepath.Join(tempDir, "test"), interval, 0, -1) + require.NoError(t, err) + defer func() { writer.Close(); os.RemoveAll(tempDir) }() + + _, err = writer.Write([]byte("Hello World")) + require.NoError(t, err) + time.Sleep(1 * time.Second) + _, err = writer.Write([]byte("Hello World 2")) + require.NoError(t, err) + files, _ := ioutil.ReadDir(tempDir) + assert.Equal(t, 2, len(files)) +} + +func TestFileWriter_SizeRotation(t *testing.T) { + tempDir, err := ioutil.TempDir("", "RotationSize") + require.NoError(t, err) + maxSize := int64(9) + writer, err := NewFileWriter(filepath.Join(tempDir, "test.log"), 0, maxSize, -1) + require.NoError(t, err) + defer func() { writer.Close(); os.RemoveAll(tempDir) }() + + _, err = writer.Write([]byte("Hello World")) + require.NoError(t, err) + _, err = writer.Write([]byte("World 2")) + require.NoError(t, err) + files, _ := ioutil.ReadDir(tempDir) + assert.Equal(t, 2, len(files)) +} + +func TestFileWriter_DeleteArchives(t *testing.T) { + tempDir, err := ioutil.TempDir("", "RotationDeleteArchives") + require.NoError(t, err) + maxSize := int64(5) + writer, err := NewFileWriter(filepath.Join(tempDir, "test.log"), 0, maxSize, 2) + require.NoError(t, err) + defer func() { writer.Close(); os.RemoveAll(tempDir) }() + + _, err = writer.Write([]byte("First file")) + require.NoError(t, err) + // File names include the date with second precision + // So, to force rotation with different file names + // we need to wait + time.Sleep(1 * time.Second) + _, err = writer.Write([]byte("Second file")) + require.NoError(t, err) + time.Sleep(1 * time.Second) + _, err = writer.Write([]byte("Third file")) + require.NoError(t, err) + + files, _ := ioutil.ReadDir(tempDir) + assert.Equal(t, 3, len(files)) + + for _, tempFile := range files { + var bytes []byte + var err error + path := filepath.Join(tempDir, tempFile.Name()) + if bytes, err = ioutil.ReadFile(path); err != nil { + t.Error(err.Error()) + return + } + contents := string(bytes) + + if contents != "" && contents != "Second file" && contents != "Third file" { + t.Error("Should have deleted the eldest log file") + return + } + } +} + +func TestFileWriter_CloseRotates(t *testing.T) { + tempDir, err := ioutil.TempDir("", "RotationClose") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + maxSize := int64(9) + writer, err := NewFileWriter(filepath.Join(tempDir, "test.log"), 0, maxSize, -1) + require.NoError(t, err) + + writer.Close() + + files, _ := ioutil.ReadDir(tempDir) + assert.Equal(t, 1, len(files)) + assert.Regexp(t, "^test\\.[^\\.]+\\.log$", files[0].Name()) +} diff --git a/logger/logger.go b/logger/logger.go index 6250dedd62529..a7b32b6e0232b 100644 --- a/logger/logger.go +++ b/logger/logger.go @@ -1,12 +1,15 @@ package logger import ( + "errors" "io" "log" "os" "regexp" "time" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/internal/rotate" "github.com/influxdata/wlog" ) @@ -15,12 +18,32 @@ var prefixRegex = regexp.MustCompile("^[DIWE]!") // newTelegrafWriter returns a logging-wrapped writer. func newTelegrafWriter(w io.Writer) io.Writer { return &telegrafLog{ - writer: wlog.NewWriter(w), + writer: wlog.NewWriter(w), + internalWriter: w, } } +// LogConfig contains the log configuration settings +type LogConfig struct { + // will set the log level to DEBUG + Debug bool + //will set the log level to ERROR + Quiet bool + // will direct the logging output to a file. Empty string is + // interpreted as stderr. If there is an error opening the file the + // logger will fallback to stderr + Logfile string + // will rotate when current file at the specified time interval + RotationInterval internal.Duration + // will rotate when current file size exceeds this parameter. + RotationMaxSize internal.Size + // maximum rotated files to keep (older ones will be deleted) + RotationMaxArchives int +} + type telegrafLog struct { - writer io.Writer + writer io.Writer + internalWriter io.Writer } func (t *telegrafLog) Write(b []byte) (n int, err error) { @@ -33,31 +56,40 @@ func (t *telegrafLog) Write(b []byte) (n int, err error) { return t.writer.Write(line) } +func (t *telegrafLog) Close() error { + closer, isCloser := t.internalWriter.(io.Closer) + if !isCloser { + return errors.New("the underlying writer cannot be closed") + } + return closer.Close() +} + // SetupLogging configures the logging output. -// debug will set the log level to DEBUG -// quiet will set the log level to ERROR -// logfile will direct the logging output to a file. Empty string is -// interpreted as stderr. If there is an error opening the file the -// logger will fallback to stderr. -func SetupLogging(debug, quiet bool, logfile string) { +func SetupLogging(config LogConfig) { + newLogWriter(config) +} + +func newLogWriter(config LogConfig) io.Writer { log.SetFlags(0) - if debug { + if config.Debug { wlog.SetLevel(wlog.DEBUG) } - if quiet { + if config.Quiet { wlog.SetLevel(wlog.ERROR) } - var oFile *os.File - if logfile != "" { + var writer io.Writer + if config.Logfile != "" { var err error - if oFile, err = os.OpenFile(logfile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, os.ModeAppend|0644); err != nil { - log.Printf("E! Unable to open %s (%s), using stderr", logfile, err) - oFile = os.Stderr + if writer, err = rotate.NewFileWriter(config.Logfile, config.RotationInterval.Duration, config.RotationMaxSize.Size, config.RotationMaxArchives); err != nil { + log.Printf("E! Unable to open %s (%s), using stderr", config.Logfile, err) + writer = os.Stderr } } else { - oFile = os.Stderr + writer = os.Stderr } - log.SetOutput(newTelegrafWriter(oFile)) + telegrafLog := newTelegrafWriter(writer) + log.SetOutput(telegrafLog) + return telegrafLog } diff --git a/logger/logger_test.go b/logger/logger_test.go index a721cbba7beaa..504e9a4bbdc30 100644 --- a/logger/logger_test.go +++ b/logger/logger_test.go @@ -2,12 +2,16 @@ package logger import ( "bytes" + "io" "io/ioutil" "log" "os" + "path/filepath" "testing" + "github.com/influxdata/telegraf/internal" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestWriteLogToFile(t *testing.T) { @@ -15,7 +19,8 @@ func TestWriteLogToFile(t *testing.T) { assert.NoError(t, err) defer func() { os.Remove(tmpfile.Name()) }() - SetupLogging(false, false, tmpfile.Name()) + config := createBasicLogConfig(tmpfile.Name()) + SetupLogging(config) log.Printf("I! TEST") log.Printf("D! TEST") // <- should be ignored @@ -28,8 +33,9 @@ func TestDebugWriteLogToFile(t *testing.T) { tmpfile, err := ioutil.TempFile("", "") assert.NoError(t, err) defer func() { os.Remove(tmpfile.Name()) }() - - SetupLogging(true, false, tmpfile.Name()) + config := createBasicLogConfig(tmpfile.Name()) + config.Debug = true + SetupLogging(config) log.Printf("D! TEST") f, err := ioutil.ReadFile(tmpfile.Name()) @@ -41,8 +47,9 @@ func TestErrorWriteLogToFile(t *testing.T) { tmpfile, err := ioutil.TempFile("", "") assert.NoError(t, err) defer func() { os.Remove(tmpfile.Name()) }() - - SetupLogging(false, true, tmpfile.Name()) + config := createBasicLogConfig(tmpfile.Name()) + config.Quiet = true + SetupLogging(config) log.Printf("E! TEST") log.Printf("I! TEST") // <- should be ignored @@ -55,8 +62,9 @@ func TestAddDefaultLogLevel(t *testing.T) { tmpfile, err := ioutil.TempFile("", "") assert.NoError(t, err) defer func() { os.Remove(tmpfile.Name()) }() - - SetupLogging(true, false, tmpfile.Name()) + config := createBasicLogConfig(tmpfile.Name()) + config.Debug = true + SetupLogging(config) log.Printf("TEST") f, err := ioutil.ReadFile(tmpfile.Name()) @@ -68,8 +76,9 @@ func TestWriteToTruncatedFile(t *testing.T) { tmpfile, err := ioutil.TempFile("", "") assert.NoError(t, err) defer func() { os.Remove(tmpfile.Name()) }() - - SetupLogging(true, false, tmpfile.Name()) + config := createBasicLogConfig(tmpfile.Name()) + config.Debug = true + SetupLogging(config) log.Printf("TEST") f, err := ioutil.ReadFile(tmpfile.Name()) @@ -87,6 +96,23 @@ func TestWriteToTruncatedFile(t *testing.T) { assert.Equal(t, f[19:], []byte("Z I! SHOULD BE FIRST\n")) } +func TestWriteToFileInRotation(t *testing.T) { + tempDir, err := ioutil.TempDir("", "LogRotation") + require.NoError(t, err) + config := createBasicLogConfig(filepath.Join(tempDir, "test.log")) + config.RotationMaxSize = internal.Size{Size: int64(30)} + writer := newLogWriter(config) + // Close the writer here, otherwise the temp folder cannot be deleted because the current log file is in use. + closer, isCloser := writer.(io.Closer) + assert.True(t, isCloser) + defer func() { closer.Close(); os.RemoveAll(tempDir) }() + + log.Printf("I! TEST 1") // Writes 31 bytes, will rotate + log.Printf("I! TEST") // Writes 29 byes, no rotation expected + files, _ := ioutil.ReadDir(tempDir) + assert.Equal(t, 2, len(files)) +} + func BenchmarkTelegrafLogWrite(b *testing.B) { var msg = []byte("test") var buf bytes.Buffer @@ -96,3 +122,10 @@ func BenchmarkTelegrafLogWrite(b *testing.B) { w.Write(msg) } } + +func createBasicLogConfig(filename string) LogConfig { + return LogConfig{ + Logfile: filename, + RotationMaxArchives: -1, + } +}