Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split install and enroll on test framework #4482

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions magefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -2270,6 +2270,7 @@ func createTestRunner(matrix bool, singleTest string, goTestFlags string, batche
DiagnosticsDir: diagDir,
StateDir: ".integration-cache",
Platforms: testPlatforms(),
Packages: testPackages(),
Groups: testGroups(),
Matrix: matrix,
SingleTest: singleTest,
Expand Down Expand Up @@ -2365,6 +2366,23 @@ func testPlatforms() []string {
return platforms
}

func testPackages() []string {
packagesStr, defined := os.LookupEnv("TEST_PACKAGES")
if !defined {
return nil
}

var packages []string
for _, p := range strings.Split(packagesStr, ",") {
if p == "tar.gz" {
p = "targz"
}
packages = append(packages, p)
}

return packages
}

func testGroups() []string {
groupsStr := os.Getenv("TEST_GROUPS")
if groupsStr == "" {
Expand Down
244 changes: 143 additions & 101 deletions pkg/testing/fixture_install.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,37 +34,42 @@ import (
// ErrNotInstalled is returned in cases where Agent isn't installed
var ErrNotInstalled = errors.New("Elastic Agent is not installed") //nolint:stylecheck // Elastic Agent is a proper noun

// CmdOpts creates vectors of command arguments for different agent commands
type CmdOpts interface {
toCmdArgs() []string
}

// EnrollOpts specifies the options for the enroll command
type EnrollOpts struct {
URL string // --url
DelayEnroll bool // --delay-enroll
EnrollmentToken string // --enrollment-token
Insecure bool // --insecure
ProxyURL string // --proxy-url
URL string // --url
}

func (e EnrollOpts) toCmdArgs() []string {
var args []string
if e.URL != "" {
args = append(args, "--url", e.URL)

if e.DelayEnroll {
args = append(args, "--delay-enroll")
}
if e.EnrollmentToken != "" {
args = append(args, "--enrollment-token", e.EnrollmentToken)
}
if e.Insecure {
args = append(args, "--insecure")
}
if e.ProxyURL != "" {
args = append(args, "--proxy-url="+e.ProxyURL)
}
if e.URL != "" {
args = append(args, "--url", e.URL)
}

return args
}

// InstallOpts specifies the options for the install command
type InstallOpts struct {
BasePath string // --base-path
Force bool // --force
Insecure bool // --insecure
NonInteractive bool // --non-interactive
ProxyURL string // --proxy-url
DelayEnroll bool // --delay-enroll

// Unprivileged by default installs the Elastic Agent as `--unprivileged` unless
// the platform being tested doesn't currently support it, or it's explicitly set
// to false.
Expand All @@ -82,26 +87,24 @@ func (i InstallOpts) IsUnprivileged(operatingSystem string) bool {
}

func (i InstallOpts) toCmdArgs(operatingSystem string) ([]string, error) {
// BasePath string // --base-path
// Force bool // --force
// NonInteractive bool // --non-interactive
// // Unprivileged by default installs the Elastic Agent as `--unprivileged` unless
// // the platform being tested doesn't currently support it, or it's explicitly set
// // to false.
// Unprivileged *bool // --unprivileged

var args []string
if i.BasePath != "" {
args = append(args, "--base-path", i.BasePath)
}
if i.Force {
args = append(args, "--force")
}
if i.Insecure {
args = append(args, "--insecure")
}
if i.NonInteractive {
args = append(args, "--non-interactive")
}
if i.ProxyURL != "" {
args = append(args, "--proxy-url="+i.ProxyURL)
}
if i.DelayEnroll {
args = append(args, "--delay-enroll")
}

unprivileged := i.IsUnprivileged(operatingSystem)
if unprivileged {
if operatingSystem != "linux" {
Expand Down Expand Up @@ -130,8 +133,10 @@ func NewBool(value bool) *bool {
func (f *Fixture) Install(ctx context.Context, installOpts *InstallOpts, opts ...process.CmdOption) ([]byte, error) {
f.t.Logf("[test %s] Inside fixture install function", f.t.Name())

// check for running agents before installing, but proceed anyway
assert.Empty(f.t, getElasticAgentProcesses(f.t), "there should be no running agent at beginning of Install()")
// check for running agents before installing, there should be no agent
// process running. However, even if there is one, let's keep the test going
// and add a failure so we can investigate what happened.
// assert.Empty(f.t, getElasticAgentProcesses(f.t), "there should be no running agent at beginning of Install()")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this commented now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as I commented in PR, it's still in draft, sorry I forgot to set it as draft when opening the PR


switch f.packageFormat {
case "targz", "zip":
Expand All @@ -158,10 +163,18 @@ func (f *Fixture) installNoPkgManager(ctx context.Context, installOpts *InstallO
installOpts = &InstallOpts{}
}

// split install and enroll so if the enrollment fails, the agent won't
// uninstall itself. We will collect either a diagnostics or the agent
// directory if any failure occurs.

enrollOnlyOpts := installOpts.EnrollOpts
installOnlyOpts := installOpts
installOnlyOpts.EnrollOpts = EnrollOpts{}

installArgs := []string{"install"}
installOptsArgs, err := installOpts.toCmdArgs(f.operatingSystem)
installOptsArgs, err := installOnlyOpts.toCmdArgs(f.operatingSystem)
if err != nil {
return nil, err
return nil, fmt.Errorf("could not generate install args: %w", err)
}
installArgs = append(installArgs, installOptsArgs...)
out, err := f.Exec(ctx, installArgs, opts...)
Expand Down Expand Up @@ -193,98 +206,127 @@ func (f *Fixture) installNoPkgManager(ctx context.Context, installOpts *InstallO
f.setClient(c)

f.t.Cleanup(func() {
if f.t.Failed() {
procs := getProcesses(f.t, `.*`)
dir, err := findProjectRoot(f.caller)
if err != nil {
f.t.Logf("failed to dump process; failed to find project root: %s", err)
return
}
f.t.Logf("[test %s] Inside fixture cleanup function", f.t.Name())

// Sub-test names are separated by "/" characters which are not valid filenames on Linux.
sanitizedTestName := strings.ReplaceAll(f.t.Name(), "/", "-")
f.cleanUpCollectDiagnostics()

filePath := filepath.Join(dir, "build", "diagnostics", fmt.Sprintf("TEST-%s-%s-%s-ProcessDump.json", sanitizedTestName, f.operatingSystem, f.architecture))
f.t.Logf("Dumping running processes in %s", filePath)
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
f.t.Logf("failed to dump process; failed to create output file %s root: %s", file.Name(), err)
return
}
defer func(file *os.File) {
err := file.Close()
if err != nil {
f.t.Logf("error closing file %s: %s", file.Name(), err)
}
}(file)
err = json.NewEncoder(file).Encode(procs)
if err != nil {
f.t.Logf("error serializing processes: %s", err)
}
if f.cleanUpKeepInstalled() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This returns a boolean which is not documented, I cannot understand from the function name what this boolean is.

return
}
})

f.t.Cleanup(func() {
if f.cleanUpUninstall() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same about boolean here.

return
}

f.dumpProcessOnFailure()

// check for running agents after uninstall had a chance to run
assert.Empty(f.t, getElasticAgentProcesses(f.t), "there should be no running agent at the end of the test")
})

f.t.Cleanup(func() {
f.t.Logf("[test %s] Inside fixture cleanup function", f.t.Name())

if !f.installed {
f.t.Logf("skipping uninstall; agent not installed (fixture.installed is false)")
// not installed; no need to clean up or collect diagnostics
return
emptyEnrollOpts := EnrollOpts{}
if enrollOnlyOpts != emptyEnrollOpts {
enrollArgs := []string{"enroll"}
enrollArgs = append(enrollArgs, enrollOnlyOpts.toCmdArgs()...)
out, err = f.Exec(ctx, enrollArgs, opts...)
if err != nil {
return out, fmt.Errorf("error running agent enroll command: %w", err)
}
}

// diagnostics is collected when either the environment variable
// AGENT_COLLECT_DIAG=true or the test is marked failed
collect := collectDiagFlag()
failed := f.t.Failed()
if collect || failed {
if collect {
f.t.Logf("collecting diagnostics; AGENT_COLLECT_DIAG=true")
} else if failed {
f.t.Logf("collecting diagnostics; test failed")
}
f.collectDiagnostics()
return out, nil
}

func (f *Fixture) cleanUpUninstall() bool {
if !f.installed {
f.t.Logf("skipping uninstall; agent not installed (fixture.installed is false)")
// not installed; no need to clean up or collect diagnostics
return true
}

// uninstall agent
// 5 minute timeout, to ensure that it at least doesn't get stuck.
// original context is not used as it could have a timeout on the context
// for the install and we don't want that context to prevent the uninstall
uninstallCtx, uninstallCancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer uninstallCancel()
out, err := f.Uninstall(uninstallCtx, &UninstallOpts{Force: true, UninstallToken: f.uninstallToken})
f.setClient(nil)
if err != nil &&
(errors.Is(err, ErrNotInstalled) ||
strings.Contains(
err.Error(),
"elastic-agent: no such file or directory")) {
Comment on lines +255 to +259
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're already refactoring, please assign each condition to a variable, this is very hard to read.

f.t.Logf("fixture.Install Cleanup: agent was already uninstalled, skipping uninstall")
// Agent fixture has already been uninstalled, perhaps by
// an explicit call to fixture.Uninstall, so nothing needs
// to be done here.
return true
}
require.NoErrorf(f.t, err, "uninstalling agent failed. Output: %q", out)
return false
}

func (f *Fixture) cleanUpKeepInstalled() bool {
// environment variable AGENT_KEEP_INSTALLED=true will skip the uninstallation
// useful to debug the issue with the Elastic Agent
if f.t.Failed() && keepInstalledFlag() {
f.t.Logf("skipping uninstall; test failed and AGENT_KEEP_INSTALLED=true")
return true
}

if keepInstalledFlag() {
f.t.Logf("ignoring AGENT_KEEP_INSTALLED=true as test succeeded, " +
"keeping the agent installed will jeopardise other tests")
}
return false
}

func (f *Fixture) cleanUpCollectDiagnostics() {
// diagnostics is collected when either the environment variable
// AGENT_COLLECT_DIAG=true or the test is marked failed
collect := collectDiagFlag()
failed := f.t.Failed()
if collect || failed {
if collect {
f.t.Logf("collecting diagnostics; AGENT_COLLECT_DIAG=true")
} else if failed {
f.t.Logf("collecting diagnostics; test failed")
}
f.collectDiagnostics()
}
}

// environment variable AGENT_KEEP_INSTALLED=true will skip the uninstallation
// useful to debug the issue with the Elastic Agent
if f.t.Failed() && keepInstalledFlag() {
f.t.Logf("skipping uninstall; test failed and AGENT_KEEP_INSTALLED=true")
func (f *Fixture) dumpProcessOnFailure() {
if f.t.Failed() {
procs := getProcesses(f.t, `.*`)
dir, err := findProjectRoot(f.caller)
if err != nil {
f.t.Logf("failed to dump process; failed to find project root: %s", err)
return
}

if keepInstalledFlag() {
f.t.Logf("ignoring AGENT_KEEP_INSTALLED=true as test succeeded, " +
"keeping the agent installed will jeopardise other tests")
}
// Sub-test names are separated by "/" characters which are not valid filenames on Linux.
sanitizedTestName := strings.ReplaceAll(f.t.Name(), "/", "-")

// 5 minute timeout, to ensure that it at least doesn't get stuck.
// original context is not used as it could have a timeout on the context
// for the install and we don't want that context to prevent the uninstall
uninstallCtx, uninstallCancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer uninstallCancel()
out, err := f.Uninstall(uninstallCtx, &UninstallOpts{Force: true, UninstallToken: f.uninstallToken})
f.setClient(nil)
if err != nil &&
(errors.Is(err, ErrNotInstalled) ||
strings.Contains(
err.Error(),
"elastic-agent: no such file or directory")) {
f.t.Logf("fixture.Install Cleanup: agent was already uninstalled, skipping uninstall")
// Agent fixture has already been uninstalled, perhaps by
// an explicit call to fixture.Uninstall, so nothing needs
// to be done here.
filePath := filepath.Join(dir, "build", "diagnostics", fmt.Sprintf("TEST-%s-%s-%s-ProcessDump.json", sanitizedTestName, f.operatingSystem, f.architecture))
f.t.Logf("Dumping running processes in %s", filePath)
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
f.t.Logf("failed to dump process; failed to create output file %s root: %s", file.Name(), err)
return
}
require.NoErrorf(f.t, err, "uninstalling agent failed. Output: %q", out)
})

return out, nil
defer func(file *os.File) {
err := file.Close()
if err != nil {
f.t.Logf("error closing file %s: %s", file.Name(), err)
}
}(file)
err = json.NewEncoder(file).Encode(procs)
if err != nil {
f.t.Logf("error serializing processes: %s", err)
}
}
}

type runningProcess struct {
Expand Down Expand Up @@ -370,7 +412,7 @@ func getProcesses(t *gotesting.T, regex string) []runningProcess {
// - an error if any.
func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts []process.CmdOption) ([]byte, error) {
f.t.Logf("[test %s] Inside fixture installDeb function", f.t.Name())
//Prepare so that the f.srcPackage string is populated
// Prepare so that the f.srcPackage string is populated
err := f.EnsurePrepared(ctx)
if err != nil {
return nil, fmt.Errorf("failed to prepare: %w", err)
Expand Down
5 changes: 5 additions & 0 deletions pkg/testing/runner/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ type Config struct {
// defined in this list.
Platforms []string

// Packages filters the tests to only run on the provided list
// of platforms even if the tests supports more than what is
// defined in this list.
Packages []string

// BinaryName is the name of the binary package under test, i.e, elastic-agent, metricbeat, etc
// this is used to copy the .tar.gz to the remote host
BinaryName string
Expand Down
Loading
Loading