From 584e8acd99409aaeb1b165b478763734ec832f96 Mon Sep 17 00:00:00 2001 From: Marc Date: Mon, 12 Aug 2019 23:00:29 +0800 Subject: [PATCH 1/3] Made the SMART timeout a config option --- plugins/inputs/smart/README.md | 3 ++ plugins/inputs/smart/smart.go | 26 ++++++++++----- plugins/inputs/smart/smart_test.go | 51 ++++++++++++++++-------------- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index b677bf7bdb88e..c5f4c24a4c9ff 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -60,6 +60,9 @@ smartctl -s on ## done and all found will be included except for the ## excluded in excludes. # devices = [ "/dev/ada0 -d atacam" ] + + ## Timeout for the smartctl command to complete. + # timeout = "5s" ``` ### Permissions diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 3e6620c8c9d2e..c934513dae79f 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -119,6 +119,7 @@ type Smart struct { Excludes []string Devices []string UseSudo bool + Timeout internal.Duration } var sampleConfig = ` @@ -151,8 +152,17 @@ var sampleConfig = ` ## done and all found will be included except for the ## excluded in excludes. # devices = [ "/dev/ada0 -d atacam" ] + + ## Timeout for the smartctl command to complete. + # timeout = "5s" ` +func NewSmart() *Smart { + return &Smart{ + Timeout: internal.Duration{Duration: time.Second * 5}, + } +} + func (m *Smart) SampleConfig() string { return sampleConfig } @@ -180,17 +190,17 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error { } // Wrap with sudo -var runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { +var runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { cmd := exec.Command(command, args...) if sudo { cmd = exec.Command("sudo", append([]string{"-n", command}, args...)...) } - return internal.CombinedOutputTimeout(cmd, time.Second*5) + return internal.CombinedOutputTimeout(cmd, timeout.Duration) } // Scan for S.M.A.R.T. devices func (m *Smart) scan() ([]string, error) { - out, err := runCmd(m.UseSudo, m.Path, "--scan") + out, err := runCmd(m.Timeout, m.UseSudo, m.Path, "--scan") if err != nil { return []string{}, fmt.Errorf("failed to run command '%s --scan': %s - %s", m.Path, err, string(out)) } @@ -226,7 +236,7 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) { wg.Add(len(devices)) for _, device := range devices { - go gatherDisk(acc, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg) + go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg) } wg.Wait() @@ -243,12 +253,12 @@ func exitStatus(err error) (int, error) { return 0, err } -func gatherDisk(acc telegraf.Accumulator, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) { +func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) { defer wg.Done() // smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nocheck, "--format=brief"} args = append(args, strings.Split(device, " ")...) - out, e := runCmd(usesudo, smartctl, args...) + out, e := runCmd(timeout, usesudo, smartctl, args...) outStr := string(out) // Ignore all exit statuses except if it is a command line parse error @@ -436,7 +446,7 @@ func parseTemperature(fields, deviceFields map[string]interface{}, str string) e } func init() { - m := Smart{} + m := NewSmart() path, _ := exec.LookPath("smartctl") if len(path) > 0 { m.Path = path @@ -444,6 +454,6 @@ func init() { m.Nocheck = "standby" inputs.Add("smart", func() telegraf.Input { - return &m + return m }) } diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 0b030366d7f31..a08284e9b78f9 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -7,19 +7,22 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestGatherAttributes(t *testing.T) { - s := &Smart{ - Path: "smartctl", - Attributes: true, - } + s := NewSmart() + s.Path = "smartctl" + s.Attributes = true + + assert.Equal(t, time.Second*5, s.Timeout.Duration) + var acc testutil.Accumulator - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { if len(args) > 0 { if args[0] == "--scan" { return []byte(mockScanData), nil @@ -326,10 +329,12 @@ func TestGatherAttributes(t *testing.T) { } func TestGatherNoAttributes(t *testing.T) { - s := &Smart{ - Path: "smartctl", - Attributes: false, - } + s := NewSmart() + s.Path = "smartctl" + s.Attributes = false + + assert.Equal(t, time.Second*5, s.Timeout.Duration) + // overwriting exec commands with mock commands var acc testutil.Accumulator @@ -374,7 +379,7 @@ func TestExcludedDev(t *testing.T) { } func TestGatherSATAInfo(t *testing.T) { - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(hgstSATAInfoData), nil } @@ -384,13 +389,13 @@ func TestGatherSATAInfo(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered") } func TestGatherSATAInfo65(t *testing.T) { - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(hgstSATAInfoData65), nil } @@ -400,13 +405,13 @@ func TestGatherSATAInfo65(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered") } func TestGatherHgstSAS(t *testing.T) { - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(hgstSASInfoData), nil } @@ -416,13 +421,13 @@ func TestGatherHgstSAS(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered") } func TestGatherHtSAS(t *testing.T) { - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(htSASInfoData), nil } @@ -432,13 +437,13 @@ func TestGatherHtSAS(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(3), acc.NMetrics(), "Wrong number of metrics gathered") } func TestGatherSSD(t *testing.T) { - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(ssdInfoData), nil } @@ -448,13 +453,13 @@ func TestGatherSSD(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered") } func TestGatherSSDRaid(t *testing.T) { - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(ssdRaidInfoData), nil } @@ -464,13 +469,13 @@ func TestGatherSSDRaid(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") } func TestGatherNvme(t *testing.T) { - runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(nvmeInfoData), nil } @@ -480,7 +485,7 @@ func TestGatherNvme(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) expected := []telegraf.Metric{ testutil.MustMetric("smart_device", From 64a613da03caf704d56694d108df772de9937350 Mon Sep 17 00:00:00 2001 From: Marc Date: Mon, 12 Aug 2019 23:03:58 +0800 Subject: [PATCH 2/3] Update the SMART timeout to 30s This provides an out-of-the box experience with modern HDDs. --- plugins/inputs/smart/README.md | 2 +- plugins/inputs/smart/smart.go | 4 ++-- plugins/inputs/smart/smart_test.go | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index c5f4c24a4c9ff..47320aeac2ebf 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -62,7 +62,7 @@ smartctl -s on # devices = [ "/dev/ada0 -d atacam" ] ## Timeout for the smartctl command to complete. - # timeout = "5s" + # timeout = "30s" ``` ### Permissions diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index c934513dae79f..52d2cd57e5d57 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -154,12 +154,12 @@ var sampleConfig = ` # devices = [ "/dev/ada0 -d atacam" ] ## Timeout for the smartctl command to complete. - # timeout = "5s" + # timeout = "30s" ` func NewSmart() *Smart { return &Smart{ - Timeout: internal.Duration{Duration: time.Second * 5}, + Timeout: internal.Duration{Duration: time.Second * 30}, } } diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index a08284e9b78f9..d66a31fea0797 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -18,7 +18,7 @@ func TestGatherAttributes(t *testing.T) { s.Path = "smartctl" s.Attributes = true - assert.Equal(t, time.Second*5, s.Timeout.Duration) + assert.Equal(t, time.Second*30, s.Timeout.Duration) var acc testutil.Accumulator @@ -333,7 +333,7 @@ func TestGatherNoAttributes(t *testing.T) { s.Path = "smartctl" s.Attributes = false - assert.Equal(t, time.Second*5, s.Timeout.Duration) + assert.Equal(t, time.Second*30, s.Timeout.Duration) // overwriting exec commands with mock commands var acc testutil.Accumulator @@ -389,7 +389,7 @@ func TestGatherSATAInfo(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered") } @@ -405,7 +405,7 @@ func TestGatherSATAInfo65(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered") } @@ -421,7 +421,7 @@ func TestGatherHgstSAS(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered") } @@ -437,7 +437,7 @@ func TestGatherHtSAS(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(3), acc.NMetrics(), "Wrong number of metrics gathered") } @@ -453,7 +453,7 @@ func TestGatherSSD(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered") } @@ -469,7 +469,7 @@ func TestGatherSSDRaid(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") } @@ -485,7 +485,7 @@ func TestGatherNvme(t *testing.T) { ) wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 5}, true, true, "", "", "", wg) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) expected := []telegraf.Metric{ testutil.MustMetric("smart_device", From 220806c731d58071d3b4ccb360846f4e785e49ab Mon Sep 17 00:00:00 2001 From: Marc Date: Tue, 13 Aug 2019 20:58:52 +0800 Subject: [PATCH 3/3] Fixed plugin instantiation --- plugins/inputs/smart/smart.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 52d2cd57e5d57..b17f979d3fdb5 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -446,14 +446,13 @@ func parseTemperature(fields, deviceFields map[string]interface{}, str string) e } func init() { - m := NewSmart() - path, _ := exec.LookPath("smartctl") - if len(path) > 0 { - m.Path = path - } - m.Nocheck = "standby" - inputs.Add("smart", func() telegraf.Input { + m := NewSmart() + path, _ := exec.LookPath("smartctl") + if len(path) > 0 { + m.Path = path + } + m.Nocheck = "standby" return m }) }