diff --git a/errors.go b/errors.go index 4033acda..961ab788 100644 --- a/errors.go +++ b/errors.go @@ -434,3 +434,33 @@ func (e UnreachableError) Error() string { func NewUnreachableError() *UnreachableError { return &UnreachableError{Stack: debug.Stack()} } + +// CollisionLimitError is a fatal error returned when a noncryptographic hash collision +// would exceed collision limit (per digest per map) we enforce in the first level. +type CollisionLimitError struct { + collisionLimitPerDigest uint32 // limit <= 255 is recommended, larger values are useful for tests +} + +// NewCollisionLimitError constructs a CollisionLimitError +func NewCollisionLimitError(collisionLimitPerDigest uint32) error { + return NewFatalError(&CollisionLimitError{collisionLimitPerDigest: collisionLimitPerDigest}) +} + +func (e *CollisionLimitError) Error() string { + return fmt.Sprintf("collision limit per digest %d already reached", e.collisionLimitPerDigest) +} + +// MapElementCountError is a fatal error returned when element count is unexpected. +// It is an implemtation error. +type MapElementCountError struct { + msg string +} + +// NewMapElementCountError constructs a MapElementCountError. +func NewMapElementCountError(msg string) error { + return NewFatalError(&MapElementCountError{msg: msg}) +} + +func (e *MapElementCountError) Error() string { + return e.msg +} diff --git a/map.go b/map.go index 3c8f13eb..252a647b 100644 --- a/map.go +++ b/map.go @@ -75,6 +75,13 @@ const ( typicalRandomConstant = uint64(0x1BD11BDAA9FC1A22) // DO NOT MODIFY ) +// MaxCollisionLimitPerDigest is the noncryptographic hash collision limit +// (per digest per map) we enforce in the first level. In the same map +// for the same digest, having a non-intentional collision should be rare and +// several collisions should be extremely rare. The default limit should +// be high enough to ignore accidental collisions while mitigating attacks. +var MaxCollisionLimitPerDigest = uint32(255) + type MapKey Storable type MapValue Storable @@ -123,6 +130,8 @@ type element interface { Size() uint32 + Count(storage SlabStorage) (uint32, error) + PopIterate(SlabStorage, MapPopIterationFunc) error } @@ -635,6 +644,10 @@ func (e *singleElement) Size() uint32 { return e.size } +func (e *singleElement) Count(_ SlabStorage) (uint32, error) { + return 1, nil +} + func (e *singleElement) PopIterate(_ SlabStorage, fn MapPopIterationFunc) error { fn(e.key, e.value) return nil @@ -787,6 +800,10 @@ func (e *inlineCollisionGroup) Elements(_ SlabStorage) (elements, error) { return e.elements, nil } +func (e *inlineCollisionGroup) Count(_ SlabStorage) (uint32, error) { + return e.elements.Count(), nil +} + func (e *inlineCollisionGroup) PopIterate(storage SlabStorage, fn MapPopIterationFunc) error { return e.elements.PopIterate(storage, fn) } @@ -946,6 +963,14 @@ func (e *externalCollisionGroup) Elements(storage SlabStorage) (elements, error) return dataSlab.elements, nil } +func (e *externalCollisionGroup) Count(storage SlabStorage) (uint32, error) { + elements, err := e.Elements(storage) + if err != nil { + return 0, err + } + return elements.Count(), nil +} + func (e *externalCollisionGroup) PopIterate(storage SlabStorage, fn MapPopIterationFunc) error { elements, err := e.Elements(storage) if err != nil { @@ -1248,11 +1273,48 @@ func (e *hkeyElements) Set(storage SlabStorage, address Address, b DigesterBuild } } - // Has matching hkey + // hkey digest has collision. if equalIndex != -1 { - + // New element has the same digest as existing elem. + // elem is existing element before new element is inserted. elem := e.elems[equalIndex] + // Enforce MaxCollisionLimitPerDigest at the first level (noncryptographic hash). + if e.level == 0 { + + // Before new element with colliding digest is inserted, + // existing elem is a single element or a collision group. + // elem.Count() returns 1 for single element, + // and returns > 1 for collision group. + elementCount, err := elem.Count(storage) + if err != nil { + return nil, err + } + if elementCount == 0 { + return nil, NewMapElementCountError("expect element count > 0, got element count == 0") + } + + // collisionCount is elementCount-1 because: + // - if elem is single element, collision count is 0 (no collsion yet) + // - if elem is collision group, collision count is 1 less than number + // of elements in collision group. + collisionCount := elementCount - 1 + + // Check if existing collision count reached MaxCollisionLimitPerDigest + if collisionCount >= MaxCollisionLimitPerDigest { + // Enforce collision limit on inserts and ignore updates. + _, err = elem.Get(storage, digester, level, hkey, comparator, key) + if err != nil { + var knfe *KeyNotFoundError + if errors.As(err, &knfe) { + // Don't allow any more collisions for a digest that + // already reached MaxCollisionLimitPerDigest. + return nil, NewCollisionLimitError(MaxCollisionLimitPerDigest) + } + } + } + } + oldElemSize := elem.Size() elem, existingValue, err := elem.Set(storage, address, b, digester, level, hkey, comparator, hip, key, value) diff --git a/map_test.go b/map_test.go index bb811429..7a809ff9 100644 --- a/map_test.go +++ b/map_test.go @@ -21,6 +21,7 @@ package atree import ( "errors" "fmt" + "math" "math/rand" "reflect" "sort" @@ -368,14 +369,20 @@ func TestMapSetAndGet(t *testing.T) { t.Run("unique keys with hash collision", func(t *testing.T) { - SetThreshold(256) - defer SetThreshold(1024) - const ( mapSize = 1024 keyStringSize = 16 ) + SetThreshold(256) + defer SetThreshold(1024) + + savedMaxCollisionLimitPerDigest := MaxCollisionLimitPerDigest + MaxCollisionLimitPerDigest = uint32(math.Ceil(float64(mapSize) / 10)) + defer func() { + MaxCollisionLimitPerDigest = savedMaxCollisionLimitPerDigest + }() + r := newRand(t) digesterBuilder := &mockDigesterBuilder{} @@ -410,14 +417,20 @@ func TestMapSetAndGet(t *testing.T) { }) t.Run("replicate keys with hash collision", func(t *testing.T) { - SetThreshold(256) - defer SetThreshold(1024) - const ( mapSize = 1024 keyStringSize = 16 ) + SetThreshold(256) + defer SetThreshold(1024) + + savedMaxCollisionLimitPerDigest := MaxCollisionLimitPerDigest + MaxCollisionLimitPerDigest = uint32(math.Ceil(float64(mapSize) / 10)) + defer func() { + MaxCollisionLimitPerDigest = savedMaxCollisionLimitPerDigest + }() + r := newRand(t) digesterBuilder := &mockDigesterBuilder{} @@ -430,7 +443,7 @@ func TestMapSetAndGet(t *testing.T) { i++ digests := []Digest{ - Digest(1 % 10), + Digest(i % 10), } digesterBuilder.On("Digest", k).Return(mockDigester{digests}) } @@ -3391,10 +3404,16 @@ func TestMapFromBatchData(t *testing.T) { t.Run("collision", func(t *testing.T) { + const mapSize = 1024 + SetThreshold(512) defer SetThreshold(1024) - const mapSize = 1024 + savedMaxCollisionLimitPerDigest := MaxCollisionLimitPerDigest + defer func() { + MaxCollisionLimitPerDigest = savedMaxCollisionLimitPerDigest + }() + MaxCollisionLimitPerDigest = mapSize / 2 typeInfo := testTypeInfo{42} @@ -3863,3 +3882,150 @@ func TestMapSlabDump(t *testing.T) { require.Equal(t, want, dumps) }) } + +func TestMaxCollisionLimitPerDigest(t *testing.T) { + savedMaxCollisionLimitPerDigest := MaxCollisionLimitPerDigest + defer func() { + MaxCollisionLimitPerDigest = savedMaxCollisionLimitPerDigest + }() + + t.Run("collision limit 0", func(t *testing.T) { + const mapSize = 1024 + + SetThreshold(256) + defer SetThreshold(1024) + + // Set noncryptographic hash collision limit as 0, + // meaning no collision is allowed at first level. + MaxCollisionLimitPerDigest = uint32(0) + + digesterBuilder := &mockDigesterBuilder{} + keyValues := make(map[Value]Value, mapSize) + for i := uint64(0); i < mapSize; i++ { + k := Uint64Value(i) + v := Uint64Value(i) + keyValues[k] = v + + digests := []Digest{Digest(i)} + digesterBuilder.On("Digest", k).Return(mockDigester{digests}) + } + + typeInfo := testTypeInfo{42} + address := Address{1, 2, 3, 4, 5, 6, 7, 8} + storage := newTestPersistentStorage(t) + + m, err := NewMap(storage, address, digesterBuilder, typeInfo) + require.NoError(t, err) + + // Insert elements within collision limits + for k, v := range keyValues { + existingStorable, err := m.Set(compare, hashInputProvider, k, v) + require.NoError(t, err) + require.Nil(t, existingStorable) + } + + verifyMap(t, storage, typeInfo, address, m, keyValues, nil, false) + + // Insert elements exceeding collision limits + collisionKeyValues := make(map[Value]Value, mapSize) + for i := uint64(0); i < mapSize; i++ { + k := Uint64Value(mapSize + i) + v := Uint64Value(mapSize + i) + collisionKeyValues[k] = v + + digests := []Digest{Digest(i)} + digesterBuilder.On("Digest", k).Return(mockDigester{digests}) + } + + for k, v := range collisionKeyValues { + existingStorable, err := m.Set(compare, hashInputProvider, k, v) + var collisionLimitError *CollisionLimitError + require.ErrorAs(t, err, &collisionLimitError) + require.Nil(t, existingStorable) + } + + // Verify that no new elements exceeding collision limit inserted + verifyMap(t, storage, typeInfo, address, m, keyValues, nil, false) + + // Update elements within collision limits + for k := range keyValues { + v := Uint64Value(0) + keyValues[k] = v + existingStorable, err := m.Set(compare, hashInputProvider, k, v) + require.NoError(t, err) + require.NotNil(t, existingStorable) + } + + verifyMap(t, storage, typeInfo, address, m, keyValues, nil, false) + }) + + t.Run("collision limit > 0", func(t *testing.T) { + const mapSize = 1024 + + SetThreshold(256) + defer SetThreshold(1024) + + // Set noncryptographic hash collision limit as 7, + // meaning at most 8 elements in collision group per digest at first level. + MaxCollisionLimitPerDigest = uint32(7) + + digesterBuilder := &mockDigesterBuilder{} + keyValues := make(map[Value]Value, mapSize) + for i := uint64(0); i < mapSize; i++ { + k := Uint64Value(i) + v := Uint64Value(i) + keyValues[k] = v + + digests := []Digest{Digest(i % 128)} + digesterBuilder.On("Digest", k).Return(mockDigester{digests}) + } + + typeInfo := testTypeInfo{42} + address := Address{1, 2, 3, 4, 5, 6, 7, 8} + storage := newTestPersistentStorage(t) + + m, err := NewMap(storage, address, digesterBuilder, typeInfo) + require.NoError(t, err) + + // Insert elements within collision limits + for k, v := range keyValues { + existingStorable, err := m.Set(compare, hashInputProvider, k, v) + require.NoError(t, err) + require.Nil(t, existingStorable) + } + + verifyMap(t, storage, typeInfo, address, m, keyValues, nil, false) + + // Insert elements exceeding collision limits + collisionKeyValues := make(map[Value]Value, mapSize) + for i := uint64(0); i < mapSize; i++ { + k := Uint64Value(mapSize + i) + v := Uint64Value(mapSize + i) + collisionKeyValues[k] = v + + digests := []Digest{Digest(i % 128)} + digesterBuilder.On("Digest", k).Return(mockDigester{digests}) + } + + for k, v := range collisionKeyValues { + existingStorable, err := m.Set(compare, hashInputProvider, k, v) + var collisionLimitError *CollisionLimitError + require.ErrorAs(t, err, &collisionLimitError) + require.Nil(t, existingStorable) + } + + // Verify that no new elements exceeding collision limit inserted + verifyMap(t, storage, typeInfo, address, m, keyValues, nil, false) + + // Update elements within collision limits + for k := range keyValues { + v := Uint64Value(0) + keyValues[k] = v + existingStorable, err := m.Set(compare, hashInputProvider, k, v) + require.NoError(t, err) + require.NotNil(t, existingStorable) + } + + verifyMap(t, storage, typeInfo, address, m, keyValues, nil, false) + }) +} diff --git a/mapcollision_bench_test.go b/mapcollision_bench_test.go new file mode 100644 index 00000000..fcc2cbf8 --- /dev/null +++ b/mapcollision_bench_test.go @@ -0,0 +1,151 @@ +/* + * Atree - Scalable Arrays and Ordered Maps + * + * Copyright 2022 Dapper Labs, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package atree + +import ( + "encoding/binary" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "github.com/zeebo/blake3" +) + +type collisionDigesterBuilder struct { + digest uint64 + collisionCount uint32 + maxCollisionCount uint32 +} + +var _ DigesterBuilder = &collisionDigesterBuilder{} + +func NewCollisionDigesterBuilder(maxCollisionLimitPerDigest uint32) DigesterBuilder { + return &collisionDigesterBuilder{ + maxCollisionCount: maxCollisionLimitPerDigest + 1, + } +} + +func (db *collisionDigesterBuilder) Digest(hip HashInputProvider, value Value) (Digester, error) { + + if db.collisionCount < db.maxCollisionCount { + db.collisionCount++ + } else { + db.digest++ + db.collisionCount = 0 + } + firstLevelHash := db.digest + + var scratch [32]byte + msg, err := hip(value, scratch[:]) + if err != nil { + return nil, err + } + + return &collisionDigester{ + firstLevelHash: firstLevelHash, + msg: msg, + }, nil +} + +func (db *collisionDigesterBuilder) SetSeed(k1 uint64, k2 uint64) { +} + +type collisionDigester struct { + firstLevelHash uint64 + blake3Hash [4]uint64 + msg []byte +} + +var _ Digester = &collisionDigester{} + +func (d *collisionDigester) Digest(level uint) (Digest, error) { + if level >= d.Levels() { + return Digest(0), fmt.Errorf("invalid digest level %d", level) + } + + switch level { + case 0: + return Digest(d.firstLevelHash), nil + default: + if d.blake3Hash == emptyBlake3Hash { + sum := blake3.Sum256(d.msg) + d.blake3Hash[0] = binary.BigEndian.Uint64(sum[:]) + d.blake3Hash[1] = binary.BigEndian.Uint64(sum[8:]) + d.blake3Hash[2] = binary.BigEndian.Uint64(sum[16:]) + d.blake3Hash[3] = binary.BigEndian.Uint64(sum[24:]) + } + return Digest(d.blake3Hash[level-1]), nil + } +} + +func (d *collisionDigester) DigestPrefix(level uint) ([]Digest, error) { + return nil, nil +} + +func (d *collisionDigester) Levels() uint { + return 4 +} + +func (d *collisionDigester) Reset() { +} + +func BenchmarkCollisionPerDigest(b *testing.B) { + + savedMaxCollisionLimitPerDigest := MaxCollisionLimitPerDigest + defer func() { + MaxCollisionLimitPerDigest = savedMaxCollisionLimitPerDigest + }() + + const mapCount = 1_000_000 + + collisionPerDigests := []uint32{0, 10, 255, 500, 1_000, 2_000, 5_000, 10_000} + + for _, collisionPerDigest := range collisionPerDigests { + + name := fmt.Sprintf("%d elements %d collision per digest", mapCount, collisionPerDigest) + + b.Run(name, func(b *testing.B) { + + MaxCollisionLimitPerDigest = collisionPerDigest + + digesterBuilder := NewCollisionDigesterBuilder(collisionPerDigest) + keyValues := make(map[Value]Value, mapCount) + for i := uint64(0); i < mapCount; i++ { + k := Uint64Value(i) + v := Uint64Value(i) + keyValues[k] = v + } + + typeInfo := testTypeInfo{42} + address := Address{1, 2, 3, 4, 5, 6, 7, 8} + storage := newTestPersistentStorage(b) + + m, err := NewMap(storage, address, digesterBuilder, typeInfo) + require.NoError(b, err) + + b.StartTimer() + + for i := 0; i < b.N; i++ { + for k, v := range keyValues { + _, _ = m.Set(compare, hashInputProvider, k, v) + } + } + }) + } +}