Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

discovery: add rails service name detector #30091

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,8 @@ func (s *discovery) getServiceInfo(proc *process.Process) (*serviceInfo, error)
}

contextMap := make(usm.DetectorContextMap)
contextMap[usm.ServiceProc] = proc

fs := usm.NewSubDirFS(root)
ctx := usm.NewDetectionContext(cmdline, env, fs)
ctx.Pid = int(proc.Pid)
Expand Down
104 changes: 104 additions & 0 deletions pkg/collector/corechecks/servicediscovery/usm/ruby.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

package usm

import (
"errors"
"fmt"
"io"
"io/fs"
"regexp"
"strings"

"github.com/shirou/gopsutil/v3/process"

"github.com/DataDog/datadog-agent/pkg/util/log"
)

var (
moduleRegexp = regexp.MustCompile(`module\s+([A-Z][a-zA-Z0-9_]*)`)
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
)

type railsDetector struct {
ctx DetectionContext
}

func newRailsDetector(ctx DetectionContext) detector {
return &railsDetector{ctx}
}

// detect checks if the service is a Rails application by looking for a
// `config/application.rb` file generated by `rails new` when a new rails
// project is created. This file should contain a `module` declaration with the
// application name.
func (r railsDetector) detect(_ []string) (ServiceMetadata, bool) {
var proc *process.Process

if procEntry, ok := r.ctx.ContextMap[ServiceProc]; ok {
if p, ok := procEntry.(*process.Process); ok {
proc = p
} else {
log.Errorf("could not get process object in rails detector: got type %T", procEntry)
}
}

cwd, err := proc.Cwd()
if err != nil {
log.Debugf("could not get cwd of process: %s", err)
return ServiceMetadata{}, false
}

absFile := abs("config/application.rb", cwd)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it necessary to call "abs"? the first parameter is hard-coded and it is not absolute path.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded part is considered relative to the cwd. We use abs to make it a absolute path by concatenating it to the cwd.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant you can path.Join(cwd, "config/application.rb") without calling abs(), but is up to you.

if _, err := fs.Stat(r.ctx.fs, absFile); err != nil {
return ServiceMetadata{}, false
}

name, err := r.findRailsApplicationName(absFile)
if err != nil {
log.Debugf("could not find ruby application name: %s", err)
return ServiceMetadata{}, false
}

return NewServiceMetadata(railsUnderscore(name)), true
}

// findRailsApplicationName scans the `config/application.rb` file to find the
// Rails application name.
func (r railsDetector) findRailsApplicationName(filename string) (string, error) {
file, err := r.ctx.fs.Open(filename)
if err != nil {
return "", fmt.Errorf("could not open application.rb: %w", err)
}
defer file.Close()

reader, err := SizeVerifiedReader(file)
if err != nil {
return "", fmt.Errorf("skipping application.rb (%q): %w", filename, err)
}

bytes, err := io.ReadAll(reader)
yuri-lipnesh marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return "", fmt.Errorf("unable to read application.rb (%q): %w", filename, err)
}

matches := moduleRegexp.FindSubmatch(bytes)
if len(matches) < 2 {
// No match found
return "", errors.New("could not find Ruby module name")
}

return string(matches[1]), nil
}

// railsUnderscore converts a PascalCasedWord to a snake_cased_word.
// It keeps uppercase acronyms together when converting (e.g. "HTTPServer" -> "http_server").
func railsUnderscore(pascalCasedWord string) string {
snake := matchFirstCap.ReplaceAllString(pascalCasedWord, "${1}_${2}")
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
Comment on lines +101 to +102
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

operations on strings are expensive, especially when regex is involved
can't we make a single call to ReplaceAllString?

Copy link
Member Author

@Yumasi Yumasi Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked a bit into this, and AFAICT, doing it in one call to ReplaceAllString would require lookahead (to find the next word first upper case letter, without actually matching it), which Go's regexp does not support.
So if we want to avoid doing that it looks like we need to make our own Pascal->Snake case converter.

return strings.ToLower(snake)
}
48 changes: 48 additions & 0 deletions pkg/collector/corechecks/servicediscovery/usm/ruby_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build linux

package usm

import (
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestGenerateNameFromRailsApplicationRb(t *testing.T) {
tests := []struct {
name string
path string
expected string
}{
{
name: "name is found",
path: "./testdata/application.rb",
expected: "rails_hello",
},
{
name: "name not found",
path: "./testdata/application_invalid.rb",
expected: "",
},
Comment on lines +24 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as my other comment suggested, let's work on the test cases to gain full coverage of the function

}
full, err := filepath.Abs("testdata/root")
require.NoError(t, err)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
instance := &railsDetector{ctx: DetectionContext{
fs: NewSubDirFS(full),
contextMap: make(DetectorContextMap),
}}
value, ok := instance.findRailsApplicationName(tt.path)
assert.Equal(t, len(tt.expected) > 0, ok)
assert.Equal(t, tt.expected, railsUnderscore(value))
})
}
}
5 changes: 4 additions & 1 deletion pkg/collector/corechecks/servicediscovery/usm/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ const (
NodePackageJSONPath = iota
// ServiceSubFS The SubdirFS instance package.json path is valid in.
ServiceSubFS = iota
// The pointer to the Process instance of the service
ServiceProc = iota
)

const (
Expand Down Expand Up @@ -188,8 +190,9 @@ var languageDetectors = map[language.Language]detectorCreatorFn{
// Map executables that usually have additional process context of what's
// running, to context detectors
var executableDetectors = map[string]detectorCreatorFn{
"sudo": newSimpleDetector,
"gunicorn": newGunicornDetector,
"puma": newRailsDetector,
"sudo": newSimpleDetector,
}

func serviceNameInjected(envs envs.Variables) bool {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
require_relative "boot"

require "rails"
# Pick the frameworks you want:
require "active_model/railtie"
# require "active_job/railtie"
require "active_record/railtie"
# require "active_storage/engine"
require "action_controller/railtie"
# require "action_mailer/railtie"
# require "action_mailbox/engine"
# require "action_text/engine"
require "action_view/railtie"
# require "action_cable/engine"
require "sprockets/railtie"
require "rails/test_unit/railtie"

# Require the gems listed in Gemfile, including any gems
# you've limited to :test, :development, or :production.
Bundler.require(*Rails.groups)

module RailsHello
class Application < Rails::Application
# Initialize configuration defaults for originally generated Rails version.
config.load_defaults 6.1

# Configuration for the application, engines, and railties goes here.
#
# These settings can be overridden in specific environments using the files
# in config/environments, which are processed later.
#
# config.time_zone = "Central Time (US & Canada)"
# config.eager_load_paths << Rails.root.join("extras")

# Don't generate system test files.
config.generators.system_tests = nil
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class SomeRubyClass
end
17 changes: 16 additions & 1 deletion test/new-e2e/tests/discovery/linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ type linuxTestSuite struct {
e2e.BaseSuite[environments.Host]
}

var services = []string{"python-svc", "python-instrumented", "node-json-server", "node-instrumented"}
var services = []string{
"python-svc",
"python-instrumented",
"node-json-server",
"node-instrumented",
"rails-svc",
}

func TestLinuxTestSuite(t *testing.T) {
agentParams := []func(*agentparams.Params) error{
Expand Down Expand Up @@ -125,6 +131,15 @@ func (s *linuxTestSuite) TestServiceDiscoveryCheck() {
assert.NotZero(c, found.Payload.RSSMemory)
}

found = foundMap["rails_hello"]
if assert.NotNil(c, found) {
assert.Equal(c, "rails_hello", found.Payload.ServiceName)
assert.Equal(c, "rails_hello", found.Payload.GeneratedServiceName)
assert.Empty(c, found.Payload.DDService)
assert.Empty(c, found.Payload.ServiceNameSource)
assert.NotZero(c, found.Payload.RSSMemory)
}

assert.Contains(c, foundMap, "json-server")
}, 3*time.Minute, 10*time.Second)
}
Expand Down
89 changes: 60 additions & 29 deletions test/new-e2e/tests/discovery/testdata/provision/provision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,47 @@

set -e

install_systemd_unit() {
while [[ $# -ge 2 ]]; do
case $1 in
--workdir)
shift
workdir="WorkingDirectory=$1"
shift
;;
*)
break
;;
esac
done

name=$1
command=$2
port=$3
extraenv=$4

cat > "/etc/systemd/system/${name}.service" <<- EOM
[Unit]
Description=${name}
After=network.target
StartLimitIntervalSec=0

[Service]
Type=simple
Restart=always
RestartSec=1
User=root
ExecStart=${command}
Environment="PORT=${port}"
Environment="NODE_VERSION=20"
Environment="${extraenv}"
${workdir}

[Install]
WantedBy=multi-user.target
EOM
}

apt-get update
apt-get install -y \
ca-certificates \
Expand All @@ -28,43 +69,33 @@ nvm install 20 || nvm install 20 || nvm install 20
npm install json-server || npm install json-server
npm install /home/ubuntu/e2e-test/node/instrumented

# Install our own services
install_systemd_unit () {
name=$1
command=$2
port=$3
extraenv=$4

cat > "/etc/systemd/system/${name}.service" <<- EOM
[Unit]
Description=${name}
After=network.target
StartLimitIntervalSec=0

[Service]
Type=simple
Restart=always
RestartSec=1
User=root
ExecStart=${command}
Environment="PORT=${port}"
Environment="NODE_VERSION=20"
Environment="${extraenv}"
# Install Ruby
## Packages
apt-get install -y \
ruby \
ruby-dev \
ruby-rails \
sqlite3 \

[Install]
WantedBy=multi-user.target
EOM
}
## Create new Rails project
pushd /home/ubuntu
rails new rails-hello --minimal
bundle install --gemfile=/home/ubuntu/rails-hello/Gemfile
popd

# Node
# Install our own services
## Node
install_systemd_unit "node-json-server" "$NVM_DIR/nvm-exec npx json-server --port 8084 /home/ubuntu/e2e-test/node/json-server/db.json" "8084" ""
install_systemd_unit "node-instrumented" "$NVM_DIR/nvm-exec node /home/ubuntu/e2e-test/node/instrumented/server.js" "8085" ""

# Python
## Python
install_systemd_unit "python-svc" "/usr/bin/python3 /home/ubuntu/e2e-test/python/server.py" "8082" "DD_SERVICE=python-svc-dd"
install_systemd_unit "python-instrumented" "/usr/bin/python3 /home/ubuntu/e2e-test/python/instrumented.py" "8083" ""

## Ruby
install_systemd_unit --workdir "/home/ubuntu/rails-hello" "rails-svc" "rails server" "7777" ""

systemctl daemon-reload

# leave them stopped
## leave them stopped
systemctl stop python-svc
Loading