Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/scollector: When WatchedProc processes die, clean them up. #1962

Merged
merged 1 commit into from
Nov 18, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 34 additions & 20 deletions cmd/scollector/collectors/processes_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"sort"
"strconv"
"strings"
"time"

"bosun.org/cmd/scollector/conf"
"bosun.org/metadata"
Expand Down Expand Up @@ -64,37 +65,38 @@ func linuxProcMonitor(w *WatchedProc, md *opentsdb.MultiDataPoint) error {
var totalCPU int64
var totalVirtualMem int64
var totalRSSMem int64
for pid, id := range w.Processes {
for proc, id := range w.Processes {
pid := proc.Pid
file_status, e := os.Stat("/proc/" + pid)
if e != nil {
w.Remove(pid)
w.Remove(proc)
continue
}
processCount++
stats_file, e := ioutil.ReadFile("/proc/" + pid + "/stat")
if e != nil {
w.Remove(pid)
w.Remove(proc)
continue
}
io_file, e := ioutil.ReadFile("/proc/" + pid + "/io")
if e != nil {
w.Remove(pid)
w.Remove(proc)
continue
}
limits, e := ioutil.ReadFile("/proc/" + pid + "/limits")
if e != nil {
w.Remove(pid)
w.Remove(proc)
continue
}
fd_dir, e := os.Open("/proc/" + pid + "/fd")
if e != nil {
w.Remove(pid)
w.Remove(proc)
continue
}
fds, e := fd_dir.Readdirnames(0)
fd_dir.Close()
if e != nil {
w.Remove(pid)
w.Remove(proc)
continue
}
stats := strings.Fields(string(stats_file))
Expand Down Expand Up @@ -220,22 +222,23 @@ func getLinuxProccesses() ([]*Process, error) {
return nil, err
}
sort.Sort(byModTime(files))
var pids []string
var pidFiles []os.FileInfo
for _, f := range files {
if _, err := strconv.Atoi(f.Name()); err == nil && f.IsDir() {
pids = append(pids, f.Name())
pidFiles = append(pidFiles, f)
}
}
var lps []*Process
for _, pid := range pids {
cl, err := getLinuxCmdline(pid)
for _, pidFile := range pidFiles {
cl, err := getLinuxCmdline(pidFile.Name())
if err != nil || cl == nil {
//Continue because the pid might not exist any more
continue
}
lp := &Process{
Pid: pid,
Pid: pidFile.Name(),
Command: cl[0],
Started: pidFile.ModTime(),
}
if len(cl) > 1 {
lp.Arguments = strings.Join(cl[1:], "")
Expand Down Expand Up @@ -276,6 +279,7 @@ type Process struct {
Pid string
Command string
Arguments string
Started time.Time
}

// NewWatchedProc takes a configuration block [[Process]] from conf
Expand All @@ -290,7 +294,7 @@ func NewWatchedProc(params conf.ProcessParams) (*WatchedProc, error) {
Command: regexp.MustCompile(params.Command),
Name: params.Name,
IncludeCount: params.IncludeCount,
Processes: make(map[string]int),
Processes: make(map[Process]int),
ArgMatch: regexp.MustCompile(params.Args),
idPool: new(idPool),
}, nil
Expand All @@ -300,15 +304,19 @@ type WatchedProc struct {
Command *regexp.Regexp
Name string
IncludeCount bool
Processes map[string]int
Processes map[Process]int
ArgMatch *regexp.Regexp
*idPool
}

// Check finds all matching processes and assigns them a new unique id.
// Check finds all matching processes and assigns them a new unique id. If
// WatchedProc has processes that no longer exist, it removes them from
// WatchedProc.Processes.
func (w *WatchedProc) Check(procs []*Process) {
procFound := make(map[Process]bool)
for _, l := range procs {
if _, ok := w.Processes[l.Pid]; ok {
if _, ok := w.Processes[*l]; ok {
procFound[*l] = true
continue
}
if !w.Command.MatchString(l.Command) {
Expand All @@ -317,13 +325,19 @@ func (w *WatchedProc) Check(procs []*Process) {
if !w.ArgMatch.MatchString(l.Arguments) {
continue
}
w.Processes[l.Pid] = w.get()
w.Processes[*l] = w.get()
procFound[*l] = true
}
for proc, _ := range w.Processes {
if !procFound[proc] {
w.Remove(proc)
}
}
}

func (w *WatchedProc) Remove(pid string) {
w.put(w.Processes[pid])
delete(w.Processes, pid)
func (w *WatchedProc) Remove(proc Process) {
w.put(w.Processes[proc])
delete(w.Processes, proc)
}

type idPool struct {
Expand Down
22 changes: 18 additions & 4 deletions cmd/scollector/collectors/systemd_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package collectors

import (
"fmt"
"os"
"os/exec"
"regexp"
"strconv"
"strings"

"bosun.org/cmd/scollector/conf"
Expand Down Expand Up @@ -100,29 +102,41 @@ func watchSystemdServiceProc(md *opentsdb.MultiDataPoint, conn *dbus.Conn, unit
return err
}

mainPID := mainPIDProp.Value.Value().(uint32)
mainPID := mainPIDProp.Value.Value().(int)
// MainPID is 0 if there is no running service.
if mainPID == 0 {
return nil
}
pidStr := strconv.Itoa(mainPID)

cmdline, err := getLinuxCmdline(fmt.Sprint(mainPID))
cmdline, err := getLinuxCmdline(pidStr)
if err != nil {
return err
}
if cmdline == nil {
return nil
}

pidFile, err := os.Stat("/proc/" + pidStr)
if err != nil {
return err
}

proc := Process{
Pid: pidStr,
Command: cmdline[0],
Started: pidFile.ModTime(),
}

wp := WatchedProc{
Command: regexp.MustCompile("^" + regexp.QuoteMeta(cmdline[0]) + "$"),
Name: strings.TrimSuffix(unit.Name, ".service"),
Processes: make(map[string]int),
Processes: make(map[Process]int),
ArgMatch: regexp.MustCompile(""),
idPool: new(idPool)}

// Since we only have one PID per service (at the moment), this is always set to 1
wp.Processes[fmt.Sprint(mainPID)] = wp.get()
wp.Processes[proc] = wp.get()

if e := linuxProcMonitor(&wp, md); e != nil {
return e
Expand Down