Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] - Add Generic Hasher Interface with Blake2b Implementation #3337

Merged
merged 8 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pkg/hasher/blake2b.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package hasher

import "golang.org/x/crypto/blake2b"

// Blaker2bHasher implements the Hasher interface using Blake2b algorithm.
type Blaker2bHasher struct{ baseHasher }

// NewBlaker2bHasher creates a new Blaker2bHasher.
func NewBlaker2bHasher() *Blaker2bHasher {
h, _ := blake2b.New256(nil)
return &Blaker2bHasher{
baseHasher: baseHasher{hash: h},
}
}
13 changes: 13 additions & 0 deletions pkg/hasher/fnv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package hasher

import "hash/fnv"

// FNVHasher implements the Hasher interface using FNV algorithm.
type FNVHasher struct{ baseHasher }

// NewFNVHasher creates a new FNVHasher.
func NewFNVHasher() *FNVHasher {
return &FNVHasher{
baseHasher: baseHasher{hash: fnv.New64a()},
}
}
54 changes: 54 additions & 0 deletions pkg/hasher/hasher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Package hasher provides a generic interface and base implementation for hashing data.
package hasher

import (
"fmt"
"hash"
)

// Hasher defines a generic interface for hashing data.
// Implementations of this interface may choose to be safe for concurrent use,
// but it is not a requirement. Users should check the documentation of specific
// implementations for concurrent safety guarantees.
type Hasher interface {
// Hash takes input data and returns the hashed result.
// It returns an error if the input data is too large.
// The function is idempotent - calling it multiple times with the same input
// will produce the same output, assuming the underlying hash function is deterministic.
Hash(data []byte) ([]byte, error)
}

// baseHasher provides a base implementation for the Hasher interface.
// It uses the hash.Hash interface from the standard library to perform the actual hashing.
// This implementation is not safe for concurrent use. Each goroutine/worker should
// use its own instance of baseHasher for concurrent operations.
// Implementations that require concurrent access should wrap baseHasher with a mutex. (e.g., MutexHasher)
type baseHasher struct{ hash hash.Hash }

// InputTooLargeError is returned when the input data exceeds the maximum allowed size.
type InputTooLargeError struct {
inputSize int
maxSize int
}

func (e *InputTooLargeError) Error() string {
return fmt.Sprintf("input data exceeds the maximum allowed size: %d > %d", e.inputSize, e.maxSize)
}

const maxInputSize = 1 << 14 // 16KB

// Hash computes the hash of the given data.
// It returns an ErrEmptyData if the input is empty or if writing to the hash fails.
ahrav marked this conversation as resolved.
Show resolved Hide resolved
// This method resets the underlying hash before each computation to ensure
// that previous hashing operations do not affect the result.
func (b *baseHasher) Hash(data []byte) ([]byte, error) {
if len(data) > maxInputSize {
return nil, &InputTooLargeError{inputSize: len(data), maxSize: maxInputSize}
}
b.hash.Reset()
// nolint:errcheck
// The hash.Hash interface does not return errors on Write.
// (https://cs.opensource.google/go/go/+/refs/tags/go1.23.1:src/hash/hash.go;l=27-28)
_, _ = b.hash.Write(data)
mcastorina marked this conversation as resolved.
Show resolved Hide resolved
return b.hash.Sum(nil), nil
}
249 changes: 249 additions & 0 deletions pkg/hasher/hasher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
package hasher

import (
"bytes"
"encoding/hex"
"errors"
"fmt"
"sync"
"testing"

"github.com/stretchr/testify/assert"
)

func TestHasherHash(t *testing.T) {
testCases := []struct {
name string
hasher Hasher
input []byte
expectedHex string
expectError error
}{
{
name: "FNV-64a with 'Hello, World!'",
hasher: NewFNVHasher(),
input: []byte("Hello, World!"),
expectedHex: "6ef05bd7cc857c54",
},
{
name: "SHA-256 with 'Hello, World!'",
hasher: NewSHA256Hasher(),
input: []byte("Hello, World!"),
expectedHex: "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
},
{
name: "SHA-256 input at max size",
hasher: NewSHA256Hasher(),
input: bytes.Repeat([]byte("a"), maxInputSize),
expectedHex: "f3336bea752b5a28743033dd2c844a4a63fba08871aaee2586a2bf2d69be83a2",
},
{
name: "FN-64a input exceeds max size",
hasher: NewFNVHasher(),
input: bytes.Repeat([]byte("a"), maxInputSize+1),
expectError: &InputTooLargeError{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

got, err := tc.hasher.Hash(tc.input)
checkError(t, err, tc.expectError, len(tc.input))

if tc.expectError != nil {
return
}

expected, err := hex.DecodeString(tc.expectedHex)
if err != nil {
t.Fatalf("invalid expected hex string '%s': %v", tc.expectedHex, err)
}

if !bytes.Equal(got, expected) {
t.Errorf("hash mismatch.\nGot: %x\nExpected: %x", got, expected)
}
})
}
}

func checkError(t *testing.T, err, expectError error, inputSize int) {
t.Helper()

if expectError != nil {
var inputTooLargeError *InputTooLargeError
if errors.As(expectError, &inputTooLargeError) {
var inputTooLargeErr *InputTooLargeError
if assert.ErrorAs(t, err, &inputTooLargeErr) {
assert.Equal(t, inputSize, inputTooLargeErr.inputSize)
assert.Equal(t, maxInputSize, inputTooLargeErr.maxSize)
}
}
} else {
assert.NoError(t, err)
}
}

func TestBaseHasherHashIdempotency(t *testing.T) {
t.Parallel()

hasher := NewFNVHasher()
input := bytes.Repeat([]byte("a"), maxInputSize)

hash1, err1 := hasher.Hash(input)
assert.NoError(t, err1, "unexpected error on first hash")

hash2, err2 := hasher.Hash(input)
assert.NoError(t, err2, "unexpected error on second hash")

if !bytes.Equal(hash1, hash2) {
t.Errorf("hash results are not identical.\nFirst: %x\nSecond: %x", hash1, hash2)
}
}

const (
numGoroutines = 512
numIterations = 10_000
)

// TestMutexHasherConcurrentHash verifies that MutexHasher is thread-safe
// and produces consistent hash results when used concurrently.
func TestMutexHasherConcurrentHash(t *testing.T) {
t.Parallel()

mutexHasher := NewMutexHasher(NewSHA256Hasher())

input := []byte("Concurrent Hashing Test")

// Compute the expected hash once for comparison.
expectedHash, err := mutexHasher.Hash(input)
assert.NoError(t, err, "unexpected error computing expected hash")

// Channel to collect errors from goroutines.
// Buffered to prevent goroutines from blocking if the main thread is slow.
errs := make(chan error, numGoroutines*numIterations)

var wg sync.WaitGroup
wg.Add(numGoroutines)

// Launch multiple goroutines to perform hashing concurrently.
for i := range numGoroutines {
go func(goroutineID int) {
defer wg.Done()
for j := range numIterations {
hash, err := mutexHasher.Hash(input)
if err != nil {
errs <- fmt.Errorf("goroutine %d: hash error: %v", goroutineID, err)
continue
}
if !bytes.Equal(hash, expectedHash) {
errs <- fmt.Errorf("goroutine %d: hash mismatch on iteration %d", goroutineID, j)
}
}
}(i)
}

wg.Wait()
close(errs)

for err := range errs {
t.Error(err)
}
}

var sampleData = []byte("The quick brown fox jumps over the lazy dog")

// BenchmarkHasherWithMutex_SHA256 benchmarks hashing using a single SHA-256 Hasher instance
// protected by a sync.Mutex across multiple goroutines.
func BenchmarkHasherWithMutex_SHA256(b *testing.B) {
mutexHasher := NewMutexHasher(NewSHA256Hasher())

b.ReportAllocs()
b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := mutexHasher.Hash(sampleData)
assert.NoError(b, err)
}
})
}

// BenchmarkHasherPerGoroutine_SHA256 benchmarks hashing using separate SHA-256 Hasher instances
// for each goroutine, eliminating the need for synchronization.
func BenchmarkHasherPerGoroutine_SHA256(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
// Each goroutine maintains its own Hasher instance.
hasher := NewSHA256Hasher()
for pb.Next() {
_, err := hasher.Hash(sampleData)
assert.NoError(b, err)
}
})
}

// BenchmarkHasherWithMutex_FNV benchmarks hashing using a single FNV-64a Hasher instance
// protected by a sync.Mutex across multiple goroutines.
func BenchmarkHasherWithMutex_FNV(b *testing.B) {
mutexHasher := NewMutexHasher(NewFNVHasher())

b.ReportAllocs()
b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := mutexHasher.Hash(sampleData)
assert.NoError(b, err)
}
})
}

// BenchmarkHasherPerGoroutine_FNV benchmarks hashing using separate FNV-64a Hasher instances
// for each goroutine, eliminating the need for synchronization.
func BenchmarkHasherPerGoroutine_FNV(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
hasher := NewFNVHasher()
for pb.Next() {
_, err := hasher.Hash(sampleData)
assert.NoError(b, err)
}
})
}

// BenchmarkHasherWithMutex_Blake2b benchmarks hashing using a single Blake2b Hasher instance
// protected by a sync.Mutex across multiple goroutines.
func BenchmarkHasherWithMutex_Blake2b(b *testing.B) {
mutexHasher := NewMutexHasher(NewBlaker2bHasher())

b.ReportAllocs()
b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := mutexHasher.Hash(sampleData)
assert.NoError(b, err)
}
})
}

// BenchmarkHasherPerGoroutine_Blake2b benchmarks hashing using separate Blake2b Hasher instances
// for each goroutine, eliminating the need for synchronization.
func BenchmarkHasherPerGoroutine_Blake2b(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
hasher := NewBlaker2bHasher()
for pb.Next() {
_, err := hasher.Hash(sampleData)
assert.NoError(b, err)
}
})
}
24 changes: 24 additions & 0 deletions pkg/hasher/mutex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package hasher

import (
"sync"
)

// MutexHasher wraps a Hasher with a sync.Mutex to ensure thread-safe access.
mcastorina marked this conversation as resolved.
Show resolved Hide resolved
// This implementation is safe for concurrent use.
type MutexHasher struct {
hasher Hasher
mu sync.Mutex
}

// NewMutexHasher creates a new MutexHasher wrapping the provided Hasher.
func NewMutexHasher(hasher Hasher) *MutexHasher {
return &MutexHasher{hasher: hasher}
}

// Hash synchronizes access to the underlying Hasher using a mutex.
func (m *MutexHasher) Hash(data []byte) ([]byte, error) {
m.mu.Lock()
defer m.mu.Unlock()
return m.hasher.Hash(data)
}
13 changes: 13 additions & 0 deletions pkg/hasher/sha256.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package hasher

import "crypto/sha256"

// SHA256Hasher implements the Hasher interface using SHA-256 algorithm.
type SHA256Hasher struct{ baseHasher }

// NewSHA256Hasher creates a new SHA256Hasher.
func NewSHA256Hasher() *SHA256Hasher {
return &SHA256Hasher{
baseHasher: baseHasher{hash: sha256.New()},
}
}
Loading