Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made the SMART timeout a parameter, and set the default to 30s #6241

Merged
merged 3 commits into from
Aug 13, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions plugins/inputs/smart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ smartctl -s on <device>
## done and all found will be included except for the
## excluded in excludes.
# devices = [ "/dev/ada0 -d atacam" ]

## Timeout for the smartctl command to complete.
# timeout = "30s"
```

### Permissions
Expand Down
26 changes: 18 additions & 8 deletions plugins/inputs/smart/smart.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ type Smart struct {
Excludes []string
Devices []string
UseSudo bool
Timeout internal.Duration
}

var sampleConfig = `
Expand Down Expand Up @@ -151,8 +152,17 @@ var sampleConfig = `
## done and all found will be included except for the
## excluded in excludes.
# devices = [ "/dev/ada0 -d atacam" ]

## Timeout for the smartctl command to complete.
# timeout = "30s"
`

func NewSmart() *Smart {
return &Smart{
Timeout: internal.Duration{Duration: time.Second * 30},
}
}

func (m *Smart) SampleConfig() string {
return sampleConfig
}
Expand Down Expand Up @@ -180,17 +190,17 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error {
}

// Wrap with sudo
var runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
var runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
cmd := exec.Command(command, args...)
if sudo {
cmd = exec.Command("sudo", append([]string{"-n", command}, args...)...)
}
return internal.CombinedOutputTimeout(cmd, time.Second*5)
return internal.CombinedOutputTimeout(cmd, timeout.Duration)
}

// Scan for S.M.A.R.T. devices
func (m *Smart) scan() ([]string, error) {
out, err := runCmd(m.UseSudo, m.Path, "--scan")
out, err := runCmd(m.Timeout, m.UseSudo, m.Path, "--scan")
if err != nil {
return []string{}, fmt.Errorf("failed to run command '%s --scan': %s - %s", m.Path, err, string(out))
}
Expand Down Expand Up @@ -226,7 +236,7 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) {
wg.Add(len(devices))

for _, device := range devices {
go gatherDisk(acc, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg)
go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg)
}

wg.Wait()
Expand All @@ -243,12 +253,12 @@ func exitStatus(err error) (int, error) {
return 0, err
}

func gatherDisk(acc telegraf.Accumulator, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) {
func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) {
defer wg.Done()
// smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n
args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nocheck, "--format=brief"}
args = append(args, strings.Split(device, " ")...)
out, e := runCmd(usesudo, smartctl, args...)
out, e := runCmd(timeout, usesudo, smartctl, args...)
outStr := string(out)

// Ignore all exit statuses except if it is a command line parse error
Expand Down Expand Up @@ -436,14 +446,14 @@ func parseTemperature(fields, deviceFields map[string]interface{}, str string) e
}

func init() {
m := Smart{}
m := NewSmart()
path, _ := exec.LookPath("smartctl")
if len(path) > 0 {
m.Path = path
}
m.Nocheck = "standby"

inputs.Add("smart", func() telegraf.Input {
return &m
return m
marcv81 marked this conversation as resolved.
Show resolved Hide resolved
})
}
51 changes: 28 additions & 23 deletions plugins/inputs/smart/smart_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@ import (
"time"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestGatherAttributes(t *testing.T) {
s := &Smart{
Path: "smartctl",
Attributes: true,
}
s := NewSmart()
s.Path = "smartctl"
s.Attributes = true

assert.Equal(t, time.Second*30, s.Timeout.Duration)

var acc testutil.Accumulator

runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
if len(args) > 0 {
if args[0] == "--scan" {
return []byte(mockScanData), nil
Expand Down Expand Up @@ -326,10 +329,12 @@ func TestGatherAttributes(t *testing.T) {
}

func TestGatherNoAttributes(t *testing.T) {
s := &Smart{
Path: "smartctl",
Attributes: false,
}
s := NewSmart()
s.Path = "smartctl"
s.Attributes = false

assert.Equal(t, time.Second*30, s.Timeout.Duration)

// overwriting exec commands with mock commands
var acc testutil.Accumulator

Expand Down Expand Up @@ -374,7 +379,7 @@ func TestExcludedDev(t *testing.T) {
}

func TestGatherSATAInfo(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(hgstSATAInfoData), nil
}

Expand All @@ -384,13 +389,13 @@ func TestGatherSATAInfo(t *testing.T) {
)

wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg)
gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered")
}

func TestGatherSATAInfo65(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(hgstSATAInfoData65), nil
}

Expand All @@ -400,13 +405,13 @@ func TestGatherSATAInfo65(t *testing.T) {
)

wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg)
gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered")
}

func TestGatherHgstSAS(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(hgstSASInfoData), nil
}

Expand All @@ -416,13 +421,13 @@ func TestGatherHgstSAS(t *testing.T) {
)

wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg)
gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered")
}

func TestGatherHtSAS(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(htSASInfoData), nil
}

Expand All @@ -432,13 +437,13 @@ func TestGatherHtSAS(t *testing.T) {
)

wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg)
gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(3), acc.NMetrics(), "Wrong number of metrics gathered")
}

func TestGatherSSD(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(ssdInfoData), nil
}

Expand All @@ -448,13 +453,13 @@ func TestGatherSSD(t *testing.T) {
)

wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg)
gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered")
}

func TestGatherSSDRaid(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(ssdRaidInfoData), nil
}

Expand All @@ -464,13 +469,13 @@ func TestGatherSSDRaid(t *testing.T) {
)

wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg)
gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
}

func TestGatherNvme(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(nvmeInfoData), nil
}

Expand All @@ -480,7 +485,7 @@ func TestGatherNvme(t *testing.T) {
)

wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg)
gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)

expected := []telegraf.Metric{
testutil.MustMetric("smart_device",
Expand Down