trufflesecurity · ahrav · Sep 27, 2024 · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024
@@ -0,0 +1,14 @@
+package hasher
+
+import "golang.org/x/crypto/blake2b"
+
+// Blaker2bHasher implements the Hasher interface using Blake2b algorithm.
+type Blaker2bHasher struct{ baseHasher }
+
+// NewBlaker2bHasher creates a new Blaker2bHasher.
+func NewBlaker2bHasher() *Blaker2bHasher {
+ h, _ := blake2b.New256(nil)
+ return &Blaker2bHasher{
+ baseHasher: baseHasher{hash: h},
+ }
+}
diff --git a/pkg/hasher/fnv.go b/pkg/hasher/fnv.go
@@ -0,0 +1,13 @@
+package hasher
+
+import "hash/fnv"
+
+// FNVHasher implements the Hasher interface using FNV algorithm.
+type FNVHasher struct{ baseHasher }
+
+// NewFNVHasher creates a new FNVHasher.
+func NewFNVHasher() *FNVHasher {
+ return &FNVHasher{
+ baseHasher: baseHasher{hash: fnv.New64a()},
+ }
+}
@@ -0,0 +1,54 @@
+// Package hasher provides a generic interface and base implementation for hashing data.
+package hasher
+
+import (
+ "fmt"
+ "hash"
+)
+
+// Hasher defines a generic interface for hashing data.
+// Implementations of this interface may choose to be safe for concurrent use,
+// but it is not a requirement. Users should check the documentation of specific
+// implementations for concurrent safety guarantees.
+type Hasher interface {
+ // Hash takes input data and returns the hashed result.
+ // It returns an error if the input data is too large.
+ // The function is idempotent - calling it multiple times with the same input
+ // will produce the same output, assuming the underlying hash function is deterministic.
+ Hash(data []byte) ([]byte, error)
+}
+
+// baseHasher provides a base implementation for the Hasher interface.
+// It uses the hash.Hash interface from the standard library to perform the actual hashing.
+// This implementation is not safe for concurrent use. Each goroutine/worker should
+// use its own instance of baseHasher for concurrent operations.
+// Implementations that require concurrent access should wrap baseHasher with a mutex. (e.g., MutexHasher)
+type baseHasher struct{ hash hash.Hash }
+
+// InputTooLargeError is returned when the input data exceeds the maximum allowed size.
+type InputTooLargeError struct {
+ inputSize int
+ maxSize int
+}
+
+func (e *InputTooLargeError) Error() string {
+ return fmt.Sprintf("input data exceeds the maximum allowed size: %d > %d", e.inputSize, e.maxSize)
+}
+
+const maxInputSize = 1 << 14 // 16KB
+
+// Hash computes the hash of the given data.
+// It returns an ErrEmptyData if the input is empty or if writing to the hash fails.
+// This method resets the underlying hash before each computation to ensure
+// that previous hashing operations do not affect the result.
+func (b *baseHasher) Hash(data []byte) ([]byte, error) {
+ if len(data) > maxInputSize {
+ return nil, &InputTooLargeError{inputSize: len(data), maxSize: maxInputSize}
+ }
+ b.hash.Reset()
+ // nolint:errcheck
+ // The hash.Hash interface does not return errors on Write.
+ // (https://cs.opensource.google/go/go/+/refs/tags/go1.23.1:src/hash/hash.go;l=27-28)
+ _, _ = b.hash.Write(data)
+ return b.hash.Sum(nil), nil
+}
@@ -0,0 +1,249 @@
+package hasher
+
+import (
+ "bytes"
+ "encoding/hex"
+ "errors"
+ "fmt"
+ "sync"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestHasherHash(t *testing.T) {
+ testCases := []struct {
+ name string
+ hasher Hasher
+ input []byte
+ expectedHex string
+ expectError error
+ }{
+ {
+ name: "FNV-64a with 'Hello, World!'",
+ hasher: NewFNVHasher(),
+ input: []byte("Hello, World!"),
+ expectedHex: "6ef05bd7cc857c54",
+ },
+ {
+ name: "SHA-256 with 'Hello, World!'",
+ hasher: NewSHA256Hasher(),
+ input: []byte("Hello, World!"),
+ expectedHex: "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
+ },
+ {
+ name: "SHA-256 input at max size",
+ hasher: NewSHA256Hasher(),
+ input: bytes.Repeat([]byte("a"), maxInputSize),
+ expectedHex: "f3336bea752b5a28743033dd2c844a4a63fba08871aaee2586a2bf2d69be83a2",
+ },
+ {
+ name: "FN-64a input exceeds max size",
+ hasher: NewFNVHasher(),
+ input: bytes.Repeat([]byte("a"), maxInputSize+1),
+ expectError: &InputTooLargeError{},
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ got, err := tc.hasher.Hash(tc.input)
+ checkError(t, err, tc.expectError, len(tc.input))
+
+ if tc.expectError != nil {
+ return
+ }
+
+ expected, err := hex.DecodeString(tc.expectedHex)
+ if err != nil {
+ t.Fatalf("invalid expected hex string '%s': %v", tc.expectedHex, err)
+ }
+
+ if !bytes.Equal(got, expected) {
+ t.Errorf("hash mismatch.\nGot: %x\nExpected: %x", got, expected)
+ }
+ })
+ }
+}
+
+func checkError(t *testing.T, err, expectError error, inputSize int) {
+ t.Helper()
+
+ if expectError != nil {
+ var inputTooLargeError *InputTooLargeError
+ if errors.As(expectError, &inputTooLargeError) {
+ var inputTooLargeErr *InputTooLargeError
+ if assert.ErrorAs(t, err, &inputTooLargeErr) {
+ assert.Equal(t, inputSize, inputTooLargeErr.inputSize)
+ assert.Equal(t, maxInputSize, inputTooLargeErr.maxSize)
+ }
+ }
+ } else {
+ assert.NoError(t, err)
+ }
+}
+
+func TestBaseHasherHashIdempotency(t *testing.T) {
+ t.Parallel()
+
+ hasher := NewFNVHasher()
+ input := bytes.Repeat([]byte("a"), maxInputSize)
+
+ hash1, err1 := hasher.Hash(input)
+ assert.NoError(t, err1, "unexpected error on first hash")
+
+ hash2, err2 := hasher.Hash(input)
+ assert.NoError(t, err2, "unexpected error on second hash")
+
+ if !bytes.Equal(hash1, hash2) {
+ t.Errorf("hash results are not identical.\nFirst: %x\nSecond: %x", hash1, hash2)
+ }
+}
+
+const (
+ numGoroutines = 512
+ numIterations = 10_000
+)
+
+// TestMutexHasherConcurrentHash verifies that MutexHasher is thread-safe
+// and produces consistent hash results when used concurrently.
+func TestMutexHasherConcurrentHash(t *testing.T) {
+ t.Parallel()
+
+ mutexHasher := NewMutexHasher(NewSHA256Hasher())
+
+ input := []byte("Concurrent Hashing Test")
+
+ // Compute the expected hash once for comparison.
+ expectedHash, err := mutexHasher.Hash(input)
+ assert.NoError(t, err, "unexpected error computing expected hash")
+
+ // Channel to collect errors from goroutines.
+ // Buffered to prevent goroutines from blocking if the main thread is slow.
+ errs := make(chan error, numGoroutines*numIterations)
+
+ var wg sync.WaitGroup
+ wg.Add(numGoroutines)
+
+ // Launch multiple goroutines to perform hashing concurrently.
+ for i := range numGoroutines {
+ go func(goroutineID int) {
+ defer wg.Done()
+ for j := range numIterations {
+ hash, err := mutexHasher.Hash(input)
+ if err != nil {
+ errs <- fmt.Errorf("goroutine %d: hash error: %v", goroutineID, err)
+ continue
+ }
+ if !bytes.Equal(hash, expectedHash) {
+ errs <- fmt.Errorf("goroutine %d: hash mismatch on iteration %d", goroutineID, j)
+ }
+ }
+ }(i)
+ }
+
+ wg.Wait()
+ close(errs)
+
+ for err := range errs {
+ t.Error(err)
+ }
+}
+
+var sampleData = []byte("The quick brown fox jumps over the lazy dog")
+
+// BenchmarkHasherWithMutex_SHA256 benchmarks hashing using a single SHA-256 Hasher instance
+// protected by a sync.Mutex across multiple goroutines.
+func BenchmarkHasherWithMutex_SHA256(b *testing.B) {
+ mutexHasher := NewMutexHasher(NewSHA256Hasher())
+
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ _, err := mutexHasher.Hash(sampleData)
+ assert.NoError(b, err)
+ }
+ })
+}
+
+// BenchmarkHasherPerGoroutine_SHA256 benchmarks hashing using separate SHA-256 Hasher instances
+// for each goroutine, eliminating the need for synchronization.
+func BenchmarkHasherPerGoroutine_SHA256(b *testing.B) {
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ b.RunParallel(func(pb *testing.PB) {
+ // Each goroutine maintains its own Hasher instance.
+ hasher := NewSHA256Hasher()
+ for pb.Next() {
+ _, err := hasher.Hash(sampleData)
+ assert.NoError(b, err)
+ }
+ })
+}
+
+// BenchmarkHasherWithMutex_FNV benchmarks hashing using a single FNV-64a Hasher instance
+// protected by a sync.Mutex across multiple goroutines.
+func BenchmarkHasherWithMutex_FNV(b *testing.B) {
+ mutexHasher := NewMutexHasher(NewFNVHasher())
+
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ _, err := mutexHasher.Hash(sampleData)
+ assert.NoError(b, err)
+ }
+ })
+}
+
+// BenchmarkHasherPerGoroutine_FNV benchmarks hashing using separate FNV-64a Hasher instances
+// for each goroutine, eliminating the need for synchronization.
+func BenchmarkHasherPerGoroutine_FNV(b *testing.B) {
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ b.RunParallel(func(pb *testing.PB) {
+ hasher := NewFNVHasher()
+ for pb.Next() {
+ _, err := hasher.Hash(sampleData)
+ assert.NoError(b, err)
+ }
+ })
+}
+
+// BenchmarkHasherWithMutex_Blake2b benchmarks hashing using a single Blake2b Hasher instance
+// protected by a sync.Mutex across multiple goroutines.
+func BenchmarkHasherWithMutex_Blake2b(b *testing.B) {
+ mutexHasher := NewMutexHasher(NewBlaker2bHasher())
+
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ _, err := mutexHasher.Hash(sampleData)
+ assert.NoError(b, err)
+ }
+ })
+}
+
+// BenchmarkHasherPerGoroutine_Blake2b benchmarks hashing using separate Blake2b Hasher instances
+// for each goroutine, eliminating the need for synchronization.
+func BenchmarkHasherPerGoroutine_Blake2b(b *testing.B) {
+ b.ReportAllocs()
+ b.ResetTimer()
+
+ b.RunParallel(func(pb *testing.PB) {
+ hasher := NewBlaker2bHasher()
+ for pb.Next() {
+ _, err := hasher.Hash(sampleData)
+ assert.NoError(b, err)
+ }
+ })
+}
diff --git a/pkg/hasher/mutex.go b/pkg/hasher/mutex.go
@@ -0,0 +1,24 @@
+package hasher
+
+import (
+ "sync"
+)
+
+// MutexHasher wraps a Hasher with a sync.Mutex to ensure thread-safe access.
+// This implementation is safe for concurrent use.
+type MutexHasher struct {
+ hasher Hasher
+ mu sync.Mutex
+}
+
+// NewMutexHasher creates a new MutexHasher wrapping the provided Hasher.
+func NewMutexHasher(hasher Hasher) *MutexHasher {
+ return &MutexHasher{hasher: hasher}
+}
+
+// Hash synchronizes access to the underlying Hasher using a mutex.
+func (m *MutexHasher) Hash(data []byte) ([]byte, error) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ return m.hasher.Hash(data)
+}
diff --git a/pkg/hasher/sha256.go b/pkg/hasher/sha256.go
@@ -0,0 +1,13 @@
+package hasher
+
+import "crypto/sha256"
+
+// SHA256Hasher implements the Hasher interface using SHA-256 algorithm.
+type SHA256Hasher struct{ baseHasher }
+
+// NewSHA256Hasher creates a new SHA256Hasher.
+func NewSHA256Hasher() *SHA256Hasher {
+ return &SHA256Hasher{
+ baseHasher: baseHasher{hash: sha256.New()},
+ }
+}