Skip to content

Commit

Permalink
Fix git binary handling and add a smoke test (#3379)
Browse files Browse the repository at this point in the history
* Fix git binary handling and add a smoke test

* hide stdout

* add failure case to smoke test

* run again with deadlock fix

* Add logic to drain reader in the event of an error

* add tests

* be picky

* set author identity

* suppress linter

---------

Co-authored-by: Ahrav Dutta <[email protected]>
  • Loading branch information
dustin-decker and ahrav authored Oct 7, 2024
1 parent 57802ab commit 59c615a
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 31 deletions.
37 changes: 35 additions & 2 deletions .github/workflows/smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,38 @@ jobs:
- name: Smoke
run: |
set -e
go run . git https:/dustin-decker/secretsandstuff.git
go run . github --repo https:/dustin-decker/secretsandstuff.git
go run . git https:/dustin-decker/secretsandstuff.git > /dev/null
go run . github --repo https:/dustin-decker/secretsandstuff.git > /dev/null
zombies:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: "1.23"
- name: Checkout code
uses: actions/checkout@v4
- name: Run trufflehog
run: |
set -e
go run . git --no-verification file://. > /dev/null
# This case previously had a deadlock issue and left zombies after trufflehog exited #3379
go run . git --no-verification https:/git-test-fixtures/binary.git > /dev/null
- name: Check for running git processes and zombies
run: |
if pgrep -x "git" > /dev/null
then
echo "Git processes are still running"
exit 1
else
echo "No git processes found"
fi
if ps -A -ostat,ppid | grep -e '[zZ]' > /dev/null
then
echo "Zombie processes found"
exit 1
else
echo "No zombie processes found"
fi
3 changes: 3 additions & 0 deletions pkg/handlers/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package handlers
import (
"context"
"errors"
"io"
"time"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
Expand Down Expand Up @@ -77,6 +78,8 @@ func (h *defaultHandler) handleNonArchiveContent(ctx logContext.Context, reader
if common.SkipFile(mimeExt) || common.IsBinary(mimeExt) {
ctx.Logger().V(5).Info("skipping file", "ext", mimeExt)
h.metrics.incFilesSkipped()
// Make sure we consume the reader to avoid potentially blocking indefinitely.
_, _ = io.Copy(io.Discard, reader)
return nil
}

Expand Down
16 changes: 15 additions & 1 deletion pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,21 @@ func HandleFile(
}
return fmt.Errorf("error creating custom reader: %w", err)
}
defer rdr.Close()
defer func() {
// Ensure all data is read to prevent broken pipe.
_, copyErr := io.Copy(io.Discard, rdr)
if copyErr != nil {
err = fmt.Errorf("error discarding remaining data: %w", copyErr)
}
closeErr := rdr.Close()
if closeErr != nil {
if err != nil {
err = fmt.Errorf("%v; error closing reader: %w", err, closeErr)
} else {
err = fmt.Errorf("error closing reader: %w", closeErr)
}
}
}()

ctx = logContext.WithValues(ctx, "mime", rdr.mime.String())

Expand Down
205 changes: 205 additions & 0 deletions pkg/handlers/handlers_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package handlers

import (
"archive/zip"
"bytes"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -475,3 +479,204 @@ func TestHandleZipCommandStdoutPipe(t *testing.T) {

assert.Equal(t, wantCount, count)
}

func TestHandleGitCatFile(t *testing.T) {
tests := []struct {
name string
fileName string
fileSize int
supportedType bool
expectedChunks int
}{
{
name: "LargeBlob",
fileName: "largefile.bin",
fileSize: 50 * 1024 * 1024, // 50 MB
supportedType: true,
expectedChunks: 5120,
},
{
name: "UnsupportedType",
fileName: "unsupported.so",
fileSize: 1024 * 1024, // 1 MB
supportedType: false,
expectedChunks: 0,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Set up a temporary git repository with the specified file.
var gitDir string
if tt.supportedType {
gitDir = setupTempGitRepo(t, tt.fileName, tt.fileSize)
} else {
gitDir = setupTempGitRepoWithUnsupportedFile(t, tt.fileName, tt.fileSize)
}
defer os.RemoveAll(gitDir)

cmd := exec.Command("git", "-C", gitDir, "rev-parse", "HEAD")
hashBytes, err := cmd.Output()
assert.NoError(t, err, "Failed to get commit hash")
commitHash := strings.TrimSpace(string(hashBytes))

// Create a pipe to simulate the git cat-file stdout.
cmd = exec.Command("git", "-C", gitDir, "cat-file", "blob", fmt.Sprintf("%s:%s", commitHash, tt.fileName))

var stderr bytes.Buffer
cmd.Stderr = &stderr

stdout, err := cmd.StdoutPipe()
assert.NoError(t, err, "Failed to create stdout pipe")

err = cmd.Start()
assert.NoError(t, err, "Failed to start git cat-file command")

ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()

chunkCh := make(chan *sources.Chunk, 1000)

go func() {
defer close(chunkCh)
err := HandleFile(ctx, stdout, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh}, WithSkipArchives(false))
assert.NoError(t, err, "HandleFile should not return an error")
}()

err = cmd.Wait()
assert.NoError(t, err, "git cat-file command should complete without error")

count := 0
for range chunkCh {
count++
}

assert.Equal(t, tt.expectedChunks, count, "Number of chunks should match the expected value")
})
}
}

func setupTempGitRepoWithUnsupportedFile(t *testing.T, fileName string, fileSize int) string {
t.Helper()
return setupTempGitRepoCommon(t, fileName, fileSize, true)
}

func setupTempGitRepo(t *testing.T, archiveName string, fileSize int) string {
t.Helper()
return setupTempGitRepoCommon(t, archiveName, fileSize, false)
}

func setupTempGitRepoCommon(t *testing.T, fileName string, fileSize int, isUnsupported bool) string {
t.Helper()

tempDir := t.TempDir()

cmd := exec.Command("git", "init", tempDir)
var initStderr bytes.Buffer
cmd.Stderr = &initStderr
err := cmd.Run()
if err != nil {
t.Fatalf("Failed to initialize git repository: %v, stderr: %s", err, initStderr.String())
}

cmds := [][]string{
{"git", "-C", tempDir, "config", "user.name", "Test User"},
{"git", "-C", tempDir, "config", "user.email", "[email protected]"},
}

for _, cmdArgs := range cmds {
cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...) //nolint:gosec
var cmdStderr bytes.Buffer
cmd.Stderr = &cmdStderr
err := cmd.Run()
if err != nil {
t.Fatalf("Failed to set git config: %v, stderr: %s", err, cmdStderr.String())
}
}

filePath := filepath.Join(tempDir, fileName)

// Create the file with appropriate content.
f, err := os.Create(filePath)
if err != nil {
t.Fatalf("Failed to create file: %v", err)
}
defer f.Close()

if isUnsupported {
// Write ELF header for unsupported file.
// https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
elfHeader := []byte{
0x7f, 'E', 'L', 'F', // ELF magic number
2, // 64-bit format
1, // Little endian
1, // Current version of ELF
0, // Target OS ABI
0, // ABI Version
0, 0, 0, 0, 0, 0, 0, // 7 bytes of padding
3, 0, // Relocatable file
0x3e, 0, // AMD x86-64 architecture
1, 0, 0, 0, // ELF version
0, 0, 0, 0, 0, 0, 0, 0, // Entry point
0, 0, 0, 0, 0, 0, 0, 0, // Program header offset
0, 0, 0, 0, 0, 0, 0, 0, // Section header offset
}
_, err = f.Write(elfHeader)
if err != nil {
t.Fatalf("Failed to write ELF header: %v", err)
}
} else {
// Write ZIP content for supported file.
zipWriter := zip.NewWriter(f)
header := &zip.FileHeader{
Name: "largefile.txt",
Method: zip.Store, // No compression
}
zipFileWriter, err := zipWriter.CreateHeader(header)
if err != nil {
t.Fatalf("Failed to create file in ZIP archive: %v", err)
}

dataChunk := bytes.Repeat([]byte("A"), 1024) // 1KB chunk
totalWritten := 0
for totalWritten < fileSize {
remaining := fileSize - totalWritten
if remaining < len(dataChunk) {
_, err = zipFileWriter.Write(dataChunk[:remaining])
if err != nil {
t.Fatalf("Failed to write to inner file in ZIP archive: %v", err)
}
totalWritten += remaining
} else {
_, err = zipFileWriter.Write(dataChunk)
if err != nil {
t.Fatalf("Failed to write to inner file in ZIP archive: %v", err)
}
totalWritten += len(dataChunk)
}
}

if err := zipWriter.Close(); err != nil {
t.Fatalf("Failed to close ZIP writer: %v", err)
}
}

// Add and commit the file to Git.
cmd = exec.Command("git", "-C", tempDir, "add", fileName)
var addStderr bytes.Buffer
cmd.Stderr = &addStderr
err = cmd.Run()
if err != nil {
t.Fatalf("Failed to add file to git: %v, stderr: %s", err, addStderr.String())
}

cmd = exec.Command("git", "-C", tempDir, "commit", "-m", "Add file")
var commitStderr bytes.Buffer
cmd.Stderr = &commitStderr
err = cmd.Run()
if err != nil {
t.Fatalf("Failed to commit file to git: %v, stderr: %s", err, commitStderr.String())
}

return tempDir
}
33 changes: 5 additions & 28 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -1241,44 +1241,21 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.
}

cmd := exec.Command("git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path)
stdout, catCmd, err := s.executeCatFileCmd(cmd)
if err != nil {
return err
}
// Wait must be called after closing the pipe (defer is a stack, so first defer is executed last)
defer func() {
_ = catCmd.Wait()
}()
defer stdout.Close()

err = handlers.HandleFile(ctx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives))

// Always call Wait() to ensure the process is properly cleaned up
waitErr := cmd.Wait()

// If there was an error in HandleFile, return that error
if err != nil {
return err
}

// If Wait() resulted in an error, return that error
return waitErr
}

func (s *Git) executeCatFileCmd(cmd *exec.Cmd) (io.ReadCloser, *exec.Cmd, error) {
var stderr bytes.Buffer
cmd.Stderr = &stderr

stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, nil, fmt.Errorf("error running git cat-file: %w\n%s", err, stderr.Bytes())
return fmt.Errorf("error running git cat-file: %w\n%s", err, stderr.Bytes())
}

if err := cmd.Start(); err != nil {
return nil, nil, fmt.Errorf("error starting git cat-file: %w\n%s", err, stderr.Bytes())
return fmt.Errorf("error starting git cat-file: %w\n%s", err, stderr.Bytes())
}

return stdout, cmd, nil
defer func() { _ = cmd.Wait() }()

return handlers.HandleFile(ctx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives))
}

func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error {
Expand Down

0 comments on commit 59c615a

Please sign in to comment.